diff options
-rwxr-xr-x | bin/shyaml | 251 |
1 files changed, 251 insertions, 0 deletions
diff --git a/bin/shyaml b/bin/shyaml new file mode 100755 index 00000000..83171394 --- /dev/null +++ b/bin/shyaml @@ -0,0 +1,251 @@ +#!/usr/bin/env python + +## Note: to launch test, you can use: +## python -m doctest -d shyaml.py +## or +## nosetests + +from __future__ import print_function + +import sys +import yaml +import os.path +import re + +EXNAME = os.path.basename(sys.argv[0]) + + +def tokenize(s): + r"""Returns an iterable in all subpart of a '.' separated string + + So: + + >>> list(tokenize('foo.bar.wiz')) + ['foo', 'bar', 'wiz'] + + this function has to deal with any type of data in the string. So it + actually interprets the string. Characters with meaning are '.' and '\'. + Both of these can be included in a token by quoting them with '\'. + + So dot of slashes can be contained in token: + + >>> print('\n'.join(tokenize(r'foo.dot<\.>.slash<\\>'))) + foo + dot<.> + slash<\> + + Notice that empty keys are also supported: + + >>> list(tokenize(r'foo..bar')) + ['foo', '', 'bar'] + + Given an empty string: + + >>> list(tokenize(r'')) + [''] + + And a None value: + + >>> list(tokenize(None)) + [] + + """ + if s is None: + raise StopIteration + tokens = (re.sub(r'\\(\\|\.)', r'\1', m.group(0)) + for m in re.finditer(r'((\\.|[^.\\])*)', s)) + ## an empty string superfluous token is added after all non-empty string token: + for token in tokens: + if len(token) != 0: + next(tokens) + yield token + + +def mget(dct, key, default=None): + r"""Allow to get values deep in a dict with doted keys + + Accessing leaf values is quite straightforward: + + >>> dct = {'a': {'x': 1, 'b': {'c': 2}}} + >>> mget(dct, 'a.x') + 1 + >>> mget(dct, 'a.b.c') + 2 + + But you can also get subdict if your key is not targeting a + leaf value: + + >>> mget(dct, 'a.b') + {'c': 2} + + As a special feature, list access is also supported by providing a + (possibily signed) integer, it'll be interpreted as usual python + sequence access using bracket notation: + + >>> mget({'a': {'x': [1, 5], 'b': {'c': 2}}}, 'a.x.-1') + 5 + >>> mget({'a': {'x': 1, 'b': [{'c': 2}]}}, 'a.b.0.c') + 2 + + Keys that contains '.' can be accessed by escaping them: + + >>> dct = {'a': {'x': 1}, 'a.x': 3, 'a.y': 4} + >>> mget(dct, 'a.x') + 1 + >>> mget(dct, r'a\.x') + 3 + >>> mget(dct, r'a.y') + >>> mget(dct, r'a\.y') + 4 + + As a consequence, if your key contains a '\', you should also escape it: + + >>> dct = {r'a\x': 3, r'a\.x': 4, 'a.x': 5, 'a\\': {'x': 6}} + >>> mget(dct, r'a\\x') + 3 + >>> mget(dct, r'a\\\.x') + 4 + >>> mget(dct, r'a\\.x') + 6 + >>> mget({'a\\': {'b': 1}}, r'a\\.b') + 1 + >>> mget({r'a.b\.c': 1}, r'a\.b\\\.c') + 1 + + And even empty strings key are supported: + + >>> dct = {r'a': {'': {'y': 3}, 'y': 4}, 'b': {'': {'': 1}}, '': 2} + >>> mget(dct, r'a..y') + 3 + >>> mget(dct, r'a.y') + 4 + >>> mget(dct, r'') + 2 + >>> mget(dct, r'b..') + 1 + + mget support also default value if the key is not found: + + >>> mget({'a': 1}, 'b.y', default='N/A') + 'N/A' + + but will complain if you are trying to get into a leaf: + + >>> mget({'a': 1}, 'a.y', default='N/A') # doctest: +ELLIPSIS + Traceback (most recent call last): + ... + TypeError: 'int' object ... + + if the key is None, the whole dct should be sent back: + + >>> mget({'a': 1}, None) + {'a': 1} + + """ + return aget(dct, tokenize(key), default) + + +def aget(dct, key, default=None): + r"""Allow to get values deep in a dict with iterable keys + + Accessing leaf values is quite straightforward: + + >>> dct = {'a': {'x': 1, 'b': {'c': 2}}} + >>> aget(dct, ('a', 'x')) + 1 + >>> aget(dct, ('a', 'b', 'c')) + 2 + + If key is empty, it returns unchanged the ``dct`` value. + + >>> aget({'x': 1}, ()) + {'x': 1} + + """ + key = iter(key) + try: + head = next(key) + except StopIteration: + return dct + try: + value = dct[int(head)] if isinstance(dct, list) else dct[head] + except KeyError: + return default + return aget(value, key, default) + + +def stderr(msg): + sys.stderr.write(msg + "\n") + + +def die(msg, errlvl=1, prefix="Error: "): + stderr("%s%s" % (prefix, msg)) + sys.exit(errlvl) + +SIMPLE_TYPES = (str, int, float) +COMPLEX_TYPES = (list, dict) + + +def dump(value): + return value if isinstance(value, SIMPLE_TYPES) \ + else yaml.dump(value, default_flow_style=False) + +def type_name(value): + """Returns pseudo-YAML type name of given value.""" + return "struct" if isinstance(value, dict) else \ + "sequence" if isinstance(value, (tuple, list)) else \ + type(value).__name__ + +def stdout(value): + sys.stdout.write(value) + +def main(args): + usage = """usage: + %(exname)s {get-value{,-0},get-type,keys{,-0},values{,-0}} KEY DEFAULT + """ % {"exname": EXNAME} + if len(args) == 0: + die(usage, errlvl=0, prefix="") + action = args[0] + key_value = None if len(args) == 1 else args[1] + default = args[2] if len(args) > 2 else "" + contents = yaml.load(sys.stdin) + try: + value = mget(contents, key_value, default) + except IndexError: + die("list index error in path %r." % key_value) + except (KeyError, TypeError): + die("invalid path %r." % key_value) + + tvalue = type_name(value) + termination = "\0" if action.endswith("-0") else "\n" + + if action == "get-value": + print(dump(value), end='') + elif action in ("get-values", "get-values-0"): + if isinstance(value, dict): + for k, v in value.iteritems(): + stdout("%s%s%s%s" % (dump(k), termination, + dump(v), termination)) + elif isinstance(value, list): + for l in value: + stdout("%s%s" % (dump(l), termination)) + else: + die("%s does not support %r type. " + "Please provide or select a sequence or struct." + % (action, tvalue)) + elif action == "get-type": + print(tvalue) + elif action in ("keys", "keys-0", "values", "values-0"): + if isinstance(value, dict): + method = value.keys if action.startswith("keys") else value.values + for k in method(): + stdout("%s%s" % (dump(k), termination)) + else: + die("%s does not support %r type. " + "Please provide or select a struct." % (action, tvalue)) + else: + die("Invalid argument.\n%s" % usage) + + +if __name__ == "__main__": + sys.exit(main(sys.argv[1:])) |