source: nagslang/yamlish.py@ 130:67f18e72024d

Last change on this file since 130:67f18e72024d was 130:67f18e72024d, checked in by Stefano Rivera <stefano@…>, 7 years ago

Handle corner cases around quotes in strings

File size: 6.8 KB
Line 
1'''
2Serializer and dumper for a simple, YAMLish format (actually a YAML subset).
3The top level object is a dict or list.
4lists and dicts can contain:
5 * lists, dicts,
6 * single line strings,
7 * ints, floats,
8 * True, False, and None
9'''
10
11import re
12
13
14def dump(data, file_object):
15 file_object.write(dump_s(data))
16
17
18def dump_s(data):
19 return Dumper().dump(data)
20
21
22def load(file_object):
23 yaml = file_object.read()
24 return load_s(yaml)
25
26
27def load_s(yaml):
28 return Parser().parse(yaml.strip())
29
30
31class Dumper(object):
32 def dump(self, data):
33 return '\n'.join(self._dump(data))
34
35 def _dump(self, data, indent=0):
36 for type_ in (list, dict, basestring, int, float):
37 if isinstance(data, type_):
38 f = getattr(self, '_dump_%s' % type_.__name__)
39 return f(data, indent)
40 if data in (True, False, None):
41 return self._dump_literal(data, indent)
42 raise NotImplementedError()
43
44 def _dump_list(self, data, indent):
45 output = []
46 for item in data:
47 dumped = self._dump(item, indent + 2)
48 dumped[0] = '%s- %s' % (' ' * indent, dumped[0][indent + 2:])
49 output += dumped
50 return output
51
52 def _dump_dict(self, data, indent):
53 output = []
54 for k, v in sorted(data.iteritems()):
55 output.append('%s%s:' % (' ' * indent, k))
56 if isinstance(v, dict):
57 output += self._dump(v, indent + 2)
58 elif isinstance(v, list):
59 output += self._dump(v, indent)
60 else:
61 value = self._dump(v)
62 assert len(value) == 1
63 output[-1] += ' %s' % value[0]
64 return output
65
66 def _dump_basestring(self, data, indent):
67 if data in ('true', 'false', 'null'):
68 data = "'%s'" % data
69 elif "'" in data:
70 data = "'%s'" % data.replace("'", "''")
71 elif data == '':
72 data = "''"
73 return [' ' * indent + data]
74
75 def _dump_int(self, data, indent):
76 return ['%s%i' % (' ' * indent, data)]
77
78 def _dump_float(self, data, indent):
79 return ['%s%f' % (' ' * indent, data)]
80
81 def _dump_literal(self, data, indent):
82 string = {
83 True: 'true',
84 False: 'false',
85 None: 'null',
86 }[data]
87 return [' ' * indent + string]
88
89
90class Parser(object):
91 _spaces_re = re.compile(r'^(\s*)(.*)')
92 _list_re = re.compile(r'^(-\s+)(.*)')
93 _dict_re = re.compile(r'^((?![{[])[^-:]+):\s?(.*)')
94 _inline_list_re = re.compile(r"^([^',]+|(?:'')+|'.+?[^'](?:'')*')(?:, (.*))?$")
95
96 def __init__(self):
97 # Stack of (indent level, container object)
98 self._stack = []
99 # When a dict's value is a nested block, remember the key
100 self._parent_key = None
101
102 @property
103 def _indent(self):
104 return self._stack[-1][0]
105
106 @property
107 def _container(self):
108 return self._stack[-1][1]
109
110 @property
111 def _in_list(self):
112 return isinstance(self._container, list)
113
114 @property
115 def _in_dict(self):
116 return isinstance(self._container, dict)
117
118 def _push(self, container, indent=None):
119 in_list = self._in_list
120 assert in_list or self._parent_key
121
122 if indent is None:
123 indent = self._indent
124 self._stack.append((indent, container()))
125 if in_list:
126 self._stack[-2][1].append(self._container)
127 else:
128 self._stack[-2][1][self._parent_key] = self._container
129 self._parent_key = None
130
131 def parse(self, yaml):
132 if yaml.startswith(('[', '{')):
133 return self._parse_value(yaml)
134
135 if yaml.startswith('-'):
136 self._stack.append((0, []))
137 else:
138 self._stack.append((0, {}))
139
140 for line in yaml.splitlines():
141 spaces, line = self._spaces_re.match(line).groups()
142
143 while len(spaces) < self._indent:
144 self._stack.pop()
145
146 lm = self._list_re.match(line)
147 dm = self._dict_re.match(line)
148 if len(spaces) == self._indent:
149 if lm and self._in_dict:
150 # Starting a list in a dict
151 self._push(list)
152 elif dm and self._in_list:
153 # Left an embedded list
154 self._stack.pop()
155
156 if len(spaces) > self._indent:
157 assert self._parent_key
158 if dm:
159 # Nested dict
160 self._push(dict, len(spaces))
161 elif lm:
162 # Over-indented list in a dict
163 self._push(list, len(spaces))
164
165 indent = self._indent
166 while lm and lm.group(2).startswith('- '):
167 # Nested lists
168 prefix, line = lm.groups()
169 indent += len(prefix)
170 self._push(list, indent)
171 lm = self._list_re.match(line)
172 del indent
173
174 if lm:
175 prefix, line = lm.groups()
176 dm = self._dict_re.match(line)
177 if dm:
178 self._push(dict, self._indent + len(prefix))
179 else:
180 assert self._in_list
181 self._container.append(self._parse_value(line))
182
183 if dm:
184 key, value = dm.groups()
185 assert self._in_dict
186 if value:
187 self._container[key] = self._parse_value(value)
188 else:
189 self._parent_key = key
190
191 return self._stack[0][1]
192
193 def _parse_value(self, value):
194 if value.startswith("'") and value.endswith("'"):
195 return value[1:-1].replace("''", "'")
196 if value.startswith('[') and value.endswith(']'):
197 value = value[1:-1]
198 output = []
199 while value:
200 m = self._inline_list_re.match(value)
201 assert m, value
202 output.append(self._parse_value(m.group(1)))
203 value = m.group(2)
204 return output
205 if value.startswith('{') and value.endswith('}'):
206 value = value[1:-1]
207 output = {}
208 while value:
209 key, value = value.split(': ', 1)
210 m = self._inline_list_re.match(value)
211 assert m
212 output[key] = self._parse_value(m.group(1))
213 value = m.group(2)
214 return output
215 if value.startswith('!!'):
216 raise NotImplemented()
217 if value == 'true':
218 return True
219 if value == 'false':
220 return False
221 if value == 'null':
222 return None
223 for type_ in (int, float):
224 try:
225 return type_(value)
226 except ValueError:
227 pass
228 return value
Note: See TracBrowser for help on using the repository browser.