# HG changeset patch # User Stefano Rivera # Date 1378125918 -7200 # Node ID 16d9f3a0dc291c9a39ddd187b0449b4da6162cf1 # Parent 844073eaef6957d9040ba7fd15ad6812eb748c5d Refactored as classes for readability and to make it easier to support more types diff -r 844073eaef69 -r 16d9f3a0dc29 nagslang/yamlish.py --- a/nagslang/yamlish.py Mon Sep 02 14:25:16 2013 +0200 +++ b/nagslang/yamlish.py Mon Sep 02 14:45:18 2013 +0200 @@ -8,92 +8,150 @@ def dump(data, file_object): - file_object.write('\n'.join(_dump(data, 0))) + file_object.write(dump_s(data)) -def _dump(obj, indent): - if isinstance(obj, list): - for item in obj: - yield '%s- %s' % (' ' * indent, - '\n'.join(_dump(item, indent + 2)).strip()) - elif isinstance(obj, dict): - for k, v in obj.iteritems(): - if isinstance(v, basestring): - yield '%s%s: %s' % (' ' * indent, k, v) - else: - extra = 0 if isinstance(v, list) else 2 - yield '%s%s:\n%s' % (' ' * indent, k, - '\n'.join(_dump(v, indent + extra))) - elif isinstance(obj, basestring): - yield '%s%s' % (' ' * indent, obj) - - -spaces_re = re.compile(r'^(\s*)(.*)') -list_re = re.compile(r'^(-\s+)(.*)') -dict_re = re.compile(r'^([^-:]+):\s?(.*)') +def dump_s(data): + return Dumper().dump(data) def load(file_object): - # Stack of (indent level, container object) - stack = [(0, {})] - indent = lambda: stack[-1][0] - obj = lambda: stack[-1][1] + yaml = file_object.read() + return load_s(yaml) + + +def load_s(yaml): + return Parser().parse(yaml) + + + +class Dumper(object): + def dump(self, data): + return '\n'.join(self._dump(data)) + + def _dump(self, data, indent=0): + for type_ in (list, dict, basestring): + if isinstance(data, type_): + f = getattr(self, '_dump_%s' % type_.__name__) + return f(data, indent) + raise NotImplementedError() + + def _dump_list(self, data, indent): + output = [] + for item in data: + dumped = self._dump(item, indent + 2) + dumped[0] = '%s- %s' % (' ' * indent, dumped[0][indent + 2:]) + output += dumped + return output - # When a dict's value is a nested block, remember the key - parent_key = None + def _dump_dict(self, data, indent): + output = [] + for k, v in data.iteritems(): + output.append('%s%s:' % (' ' * indent, k)) + if isinstance(v, dict): + output += self._dump(v, indent + 2) + elif isinstance(v, list): + output += self._dump(v, indent) + else: + value = self._dump(v) + assert len(value) == 1 + output[-1] += ' %s' % value[0] + return output + + def _dump_basestring(self, data, indent): + return [' ' * indent + data] + - for line in file_object: - spaces, line = spaces_re.match(line).groups() +class Parser(object): + _spaces_re = re.compile(r'^(\s*)(.*)') + _list_re = re.compile(r'^(-\s+)(.*)') + _dict_re = re.compile(r'^([^-:]+):\s?(.*)') - while len(spaces) < indent(): - stack.pop() + def __init__(self): + # Stack of (indent level, container object) + self._stack = [(0, {})] + # When a dict's value is a nested block, remember the key + self._parent_key = None + + @property + def _indent(self): + return self._stack[-1][0] + + @property + def _container(self): + return self._stack[-1][1] - lm = list_re.match(line) - dm = dict_re.match(line) - if len(spaces) == indent(): - if lm: - assert isinstance(obj(), list) or parent_key - if isinstance(obj(), dict) and parent_key: + @property + def _in_list(self): + return isinstance(self._container, list) + + @property + def _in_dict(self): + return isinstance(self._container, dict) + + def _push(self, container, indent=None): + in_list = self._in_list + assert in_list or self._parent_key + + if indent is None: + indent = self._indent + self._stack.append((indent, container())) + if in_list: + self._stack[-2][1].append(self._container) + else: + self._stack[-2][1][self._parent_key] = self._container + self._parent_key = None + + def parse(self, yaml): + for line in yaml.splitlines(): + spaces, line = self._spaces_re.match(line).groups() + + while len(spaces) < self._indent: + self._stack.pop() + + lm = self._list_re.match(line) + dm = self._dict_re.match(line) + if len(spaces) == self._indent: + if lm and self._in_dict: # Starting a list in a dict - stack.append((indent(), [])) - stack[-2][1][parent_key] = obj() - parent_key = None - elif dm and isinstance(obj(), list): - # Left an embedded list - stack.pop() + self._push(list) + elif dm and self._in_list: + # Left an embedded list + self._stack.pop() + + if len(spaces) > self._indent: + assert self._parent_key + if dm: + # Nested dict + self._push(dict, len(spaces)) + elif lm: + # Over-indented list in a dict + self._push(list, len(spaces)) - if len(spaces) > indent(): - assert parent_key + indent = self._indent + while lm and lm.group(2).startswith('- '): + # Nested lists + prefix, line = lm.groups() + indent += len(prefix) + self._push(list, indent) + lm = self._list_re.match(line) + del indent + + if lm: + prefix, line = lm.groups() + dm = self._dict_re.match(line) + if dm: + self._push(dict, self._indent + len(prefix)) + else: + assert self._in_list + self._container.append(line) + if dm: - # Nested dict - stack.append((len(spaces), {})) - elif lm: - # Over-indented list in a dict - stack.append((len(spaces), [])) - stack[-2][1][parent_key] = obj() - parent_key = None - - while lm and lm.group(2).startswith('- '): - # Nested lists - prefix, line = lm.groups() - stack.append((indent() + len(prefix), [])) - stack[-2][1].append(obj()) - lm = list_re.match(line) + key, value = dm.groups() + if value: + assert self._in_dict + self._container[key] = value + else: + self._parent_key = key - if lm: - prefix, line = lm.groups() - dm = dict_re.match(line) - if dm: - stack.append((indent() + len(prefix), {})) - stack[-2][1].append(obj()) - else: - obj().append(line) - - if dm: - key, value = dm.groups() - if value: - obj()[key] = value - else: - parent_key = key - - return stack[0][1] + return self._stack[0][1]