summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rwxr-xr-xbin/shyaml251
1 files changed, 251 insertions, 0 deletions
diff --git a/bin/shyaml b/bin/shyaml
new file mode 100755
index 00000000..83171394
--- /dev/null
+++ b/bin/shyaml
@@ -0,0 +1,251 @@
+#!/usr/bin/env python
+
+## Note: to launch test, you can use:
+## python -m doctest -d shyaml.py
+## or
+## nosetests
+
+from __future__ import print_function
+
+import sys
+import yaml
+import os.path
+import re
+
+EXNAME = os.path.basename(sys.argv[0])
+
+
+def tokenize(s):
+ r"""Returns an iterable in all subpart of a '.' separated string
+
+ So:
+
+ >>> list(tokenize('foo.bar.wiz'))
+ ['foo', 'bar', 'wiz']
+
+ this function has to deal with any type of data in the string. So it
+ actually interprets the string. Characters with meaning are '.' and '\'.
+ Both of these can be included in a token by quoting them with '\'.
+
+ So dot of slashes can be contained in token:
+
+ >>> print('\n'.join(tokenize(r'foo.dot<\.>.slash<\\>')))
+ foo
+ dot<.>
+ slash<\>
+
+ Notice that empty keys are also supported:
+
+ >>> list(tokenize(r'foo..bar'))
+ ['foo', '', 'bar']
+
+ Given an empty string:
+
+ >>> list(tokenize(r''))
+ ['']
+
+ And a None value:
+
+ >>> list(tokenize(None))
+ []
+
+ """
+ if s is None:
+ raise StopIteration
+ tokens = (re.sub(r'\\(\\|\.)', r'\1', m.group(0))
+ for m in re.finditer(r'((\\.|[^.\\])*)', s))
+ ## an empty string superfluous token is added after all non-empty string token:
+ for token in tokens:
+ if len(token) != 0:
+ next(tokens)
+ yield token
+
+
+def mget(dct, key, default=None):
+ r"""Allow to get values deep in a dict with doted keys
+
+ Accessing leaf values is quite straightforward:
+
+ >>> dct = {'a': {'x': 1, 'b': {'c': 2}}}
+ >>> mget(dct, 'a.x')
+ 1
+ >>> mget(dct, 'a.b.c')
+ 2
+
+ But you can also get subdict if your key is not targeting a
+ leaf value:
+
+ >>> mget(dct, 'a.b')
+ {'c': 2}
+
+ As a special feature, list access is also supported by providing a
+ (possibily signed) integer, it'll be interpreted as usual python
+ sequence access using bracket notation:
+
+ >>> mget({'a': {'x': [1, 5], 'b': {'c': 2}}}, 'a.x.-1')
+ 5
+ >>> mget({'a': {'x': 1, 'b': [{'c': 2}]}}, 'a.b.0.c')
+ 2
+
+ Keys that contains '.' can be accessed by escaping them:
+
+ >>> dct = {'a': {'x': 1}, 'a.x': 3, 'a.y': 4}
+ >>> mget(dct, 'a.x')
+ 1
+ >>> mget(dct, r'a\.x')
+ 3
+ >>> mget(dct, r'a.y')
+ >>> mget(dct, r'a\.y')
+ 4
+
+ As a consequence, if your key contains a '\', you should also escape it:
+
+ >>> dct = {r'a\x': 3, r'a\.x': 4, 'a.x': 5, 'a\\': {'x': 6}}
+ >>> mget(dct, r'a\\x')
+ 3
+ >>> mget(dct, r'a\\\.x')
+ 4
+ >>> mget(dct, r'a\\.x')
+ 6
+ >>> mget({'a\\': {'b': 1}}, r'a\\.b')
+ 1
+ >>> mget({r'a.b\.c': 1}, r'a\.b\\\.c')
+ 1
+
+ And even empty strings key are supported:
+
+ >>> dct = {r'a': {'': {'y': 3}, 'y': 4}, 'b': {'': {'': 1}}, '': 2}
+ >>> mget(dct, r'a..y')
+ 3
+ >>> mget(dct, r'a.y')
+ 4
+ >>> mget(dct, r'')
+ 2
+ >>> mget(dct, r'b..')
+ 1
+
+ mget support also default value if the key is not found:
+
+ >>> mget({'a': 1}, 'b.y', default='N/A')
+ 'N/A'
+
+ but will complain if you are trying to get into a leaf:
+
+ >>> mget({'a': 1}, 'a.y', default='N/A') # doctest: +ELLIPSIS
+ Traceback (most recent call last):
+ ...
+ TypeError: 'int' object ...
+
+ if the key is None, the whole dct should be sent back:
+
+ >>> mget({'a': 1}, None)
+ {'a': 1}
+
+ """
+ return aget(dct, tokenize(key), default)
+
+
+def aget(dct, key, default=None):
+ r"""Allow to get values deep in a dict with iterable keys
+
+ Accessing leaf values is quite straightforward:
+
+ >>> dct = {'a': {'x': 1, 'b': {'c': 2}}}
+ >>> aget(dct, ('a', 'x'))
+ 1
+ >>> aget(dct, ('a', 'b', 'c'))
+ 2
+
+ If key is empty, it returns unchanged the ``dct`` value.
+
+ >>> aget({'x': 1}, ())
+ {'x': 1}
+
+ """
+ key = iter(key)
+ try:
+ head = next(key)
+ except StopIteration:
+ return dct
+ try:
+ value = dct[int(head)] if isinstance(dct, list) else dct[head]
+ except KeyError:
+ return default
+ return aget(value, key, default)
+
+
+def stderr(msg):
+ sys.stderr.write(msg + "\n")
+
+
+def die(msg, errlvl=1, prefix="Error: "):
+ stderr("%s%s" % (prefix, msg))
+ sys.exit(errlvl)
+
+SIMPLE_TYPES = (str, int, float)
+COMPLEX_TYPES = (list, dict)
+
+
+def dump(value):
+ return value if isinstance(value, SIMPLE_TYPES) \
+ else yaml.dump(value, default_flow_style=False)
+
+def type_name(value):
+ """Returns pseudo-YAML type name of given value."""
+ return "struct" if isinstance(value, dict) else \
+ "sequence" if isinstance(value, (tuple, list)) else \
+ type(value).__name__
+
+def stdout(value):
+ sys.stdout.write(value)
+
+def main(args):
+ usage = """usage:
+ %(exname)s {get-value{,-0},get-type,keys{,-0},values{,-0}} KEY DEFAULT
+ """ % {"exname": EXNAME}
+ if len(args) == 0:
+ die(usage, errlvl=0, prefix="")
+ action = args[0]
+ key_value = None if len(args) == 1 else args[1]
+ default = args[2] if len(args) > 2 else ""
+ contents = yaml.load(sys.stdin)
+ try:
+ value = mget(contents, key_value, default)
+ except IndexError:
+ die("list index error in path %r." % key_value)
+ except (KeyError, TypeError):
+ die("invalid path %r." % key_value)
+
+ tvalue = type_name(value)
+ termination = "\0" if action.endswith("-0") else "\n"
+
+ if action == "get-value":
+ print(dump(value), end='')
+ elif action in ("get-values", "get-values-0"):
+ if isinstance(value, dict):
+ for k, v in value.iteritems():
+ stdout("%s%s%s%s" % (dump(k), termination,
+ dump(v), termination))
+ elif isinstance(value, list):
+ for l in value:
+ stdout("%s%s" % (dump(l), termination))
+ else:
+ die("%s does not support %r type. "
+ "Please provide or select a sequence or struct."
+ % (action, tvalue))
+ elif action == "get-type":
+ print(tvalue)
+ elif action in ("keys", "keys-0", "values", "values-0"):
+ if isinstance(value, dict):
+ method = value.keys if action.startswith("keys") else value.values
+ for k in method():
+ stdout("%s%s" % (dump(k), termination))
+ else:
+ die("%s does not support %r type. "
+ "Please provide or select a struct." % (action, tvalue))
+ else:
+ die("Invalid argument.\n%s" % usage)
+
+
+if __name__ == "__main__":
+ sys.exit(main(sys.argv[1:]))