ia64/xen-unstable

view tools/python/xen/xend/sxp.py @ 12070:0d120850741a

[XEND] Fix backslash escaping so it ignores escapes if they don't make sense.

Signed-off-by: Alastair Tse <atse@xensource.com>
author acnt2@huggins.lce.cl.cam.ac.uk
date Thu Oct 05 17:29:19 2006 +0100 (2006-10-05)
parents 688012fc0e79
children aab988d497ea
line source
1 #!/usr/bin/env python
2 #============================================================================
3 # This library is free software; you can redistribute it and/or
4 # modify it under the terms of version 2.1 of the GNU Lesser General Public
5 # License as published by the Free Software Foundation.
6 #
7 # This library is distributed in the hope that it will be useful,
8 # but WITHOUT ANY WARRANTY; without even the implied warranty of
9 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
10 # Lesser General Public License for more details.
11 #
12 # You should have received a copy of the GNU Lesser General Public
13 # License along with this library; if not, write to the Free Software
14 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
15 #============================================================================
16 # Copyright (C) 2004, 2005 Mike Wray <mike.wray@hp.com>
17 #============================================================================
19 """
20 Input-driven parsing for s-expression (sxp) format.
21 Create a parser: pin = Parser();
22 Then call pin.input(buf) with your input.
23 Call pin.input_eof() when done.
24 Use pin.read() to see if a value has been parsed, pin.get_val()
25 to get a parsed value. You can call ready and get_val at any time -
26 you don't have to wait until after calling input_eof.
28 """
29 from __future__ import generators
31 import sys
32 import types
33 import errno
34 import string
35 from StringIO import StringIO
37 __all__ = [
38 "mime_type",
39 "ParseError",
40 "Parser",
41 "atomp",
42 "show",
43 "show_xml",
44 "elementp",
45 "name",
46 "attributes",
47 "attribute",
48 "children",
49 "child",
50 "child_at",
51 "child0",
52 "child1",
53 "child2",
54 "child3",
55 "child4",
56 "child_value",
57 "has_id",
58 "with_id",
59 "child_with_id",
60 "elements",
61 "merge",
62 "to_string",
63 "from_string",
64 "all_from_string",
65 "parse",
66 ]
68 mime_type = "application/sxp"
70 escapes = {
71 'a': '\a',
72 'b': '\b',
73 't': '\t',
74 'n': '\n',
75 'v': '\v',
76 'f': '\f',
77 'r': '\r',
78 '\\': '\\',
79 '\'': '\'',
80 '\"': '\"'}
82 k_list_open = "("
83 k_list_close = ")"
84 k_attr_open = "@"
85 k_eval = "!"
87 escapes_rev = {}
88 for k in escapes:
89 escapes_rev[escapes[k]] = k
91 class ParseError(StandardError):
93 def __init__(self, parser, value):
94 self.parser = parser
95 self.value = value
97 def __str__(self):
98 return self.value
100 class ParserState:
102 def __init__(self, fn, parent=None):
103 self.parent = parent
104 self.buf = ''
105 self.val = []
106 self.delim = None
107 self.fn = fn
109 def push(self, fn):
110 return ParserState(fn, parent=self)
112 class Parser:
114 def __init__(self):
115 self.error = sys.stderr
116 self.reset()
118 def reset(self):
119 self.val = []
120 self.eof = 0
121 self.err = 0
122 self.line_no = 0
123 self.char_no = 0
124 self.state = None
126 def push_state(self, fn):
127 self.state = self.state.push(fn)
129 def pop_state(self):
130 val = self.state
131 self.state = self.state.parent
132 if self.state and self.state.fn == self.state_start:
133 # Return to start state - produce the value.
134 self.val += self.state.val
135 self.state.val = []
136 return val
138 def in_class(self, c, s):
139 return s.find(c) >= 0
141 def in_space_class(self, c):
142 return self.in_class(c, ' \t\n\v\f\r')
144 def is_separator(self, c):
145 return self.in_class(c, '{}()<>[]!;')
147 def in_comment_class(self, c):
148 return self.in_class(c, '#')
150 def in_string_quote_class(self, c):
151 return self.in_class(c, '"\'')
153 def in_printable_class(self, c):
154 return self.in_class(c, string.printable)
156 def set_error_stream(self, error):
157 self.error = error
159 def has_error(self):
160 return self.err > 0
162 def at_eof(self):
163 return self.eof
165 def input_eof(self):
166 self.eof = 1
167 self.input_char(-1)
169 def input(self, buf):
170 if not buf or len(buf) == 0:
171 self.input_eof()
172 else:
173 for c in buf:
174 self.input_char(c)
176 def input_char(self, c):
177 if self.at_eof():
178 pass
179 elif c == '\n':
180 self.line_no += 1
181 self.char_no = 0
182 else:
183 self.char_no += 1
185 if self.state is None:
186 self.begin_start(None)
187 self.state.fn(c)
189 def ready(self):
190 return len(self.val) > 0
192 def get_val(self):
193 v = self.val[0]
194 self.val = self.val[1:]
195 return v
197 def get_all(self):
198 return self.val
200 def begin_start(self, c):
201 self.state = ParserState(self.state_start)
203 def end_start(self):
204 self.val += self.state.val
205 self.pop_state()
207 def state_start(self, c):
208 if self.at_eof():
209 self.end_start()
210 elif self.in_space_class(c):
211 pass
212 elif self.in_comment_class(c):
213 self.begin_comment(c)
214 elif c == k_list_open:
215 self.begin_list(c)
216 elif c == k_list_close:
217 raise ParseError(self, "syntax error: "+c)
218 elif self.in_string_quote_class(c):
219 self.begin_string(c)
220 elif self.in_printable_class(c):
221 self.begin_atom(c)
222 elif c == chr(4):
223 # ctrl-D, EOT: end-of-text.
224 self.input_eof()
225 else:
226 raise ParseError(self, "invalid character: code %d" % ord(c))
228 def begin_comment(self, c):
229 self.push_state(self.state_comment)
230 self.state.buf += c
232 def end_comment(self):
233 self.pop_state()
235 def state_comment(self, c):
236 if c == '\n' or self.at_eof():
237 self.end_comment()
238 else:
239 self.state.buf += c
241 def begin_string(self, c):
242 self.push_state(self.state_string)
243 self.state.delim = c
245 def end_string(self):
246 val = self.state.buf
247 self.state.parent.val.append(val)
248 self.pop_state()
250 def state_string(self, c):
251 if self.at_eof():
252 raise ParseError(self, "unexpected EOF")
253 elif c == self.state.delim:
254 self.end_string()
255 elif c == '\\':
256 self.push_state(self.state_escape)
257 else:
258 self.state.buf += c
260 def state_escape(self, c):
261 if self.at_eof():
262 raise ParseError(self, "unexpected EOF")
263 d = escapes.get(c)
264 if d:
265 self.state.parent.buf += d
266 self.pop_state()
267 elif c == 'x':
268 self.state.fn = self.state_hex
269 self.state.val = 0
270 elif c == '0':
271 self.state.fn = self.state_octal
272 self.state.val = 0
273 self.input_char(c)
274 else:
275 # ignore escape if it doesn't match anything we know
276 self.pop_state()
278 def state_octal(self, c):
279 def octaldigit(c):
280 self.state.val *= 8
281 self.state.val += ord(c) - ord('0')
282 self.state.buf += c
283 if self.state.val < 0 or self.state.val > 0xff:
284 raise ParseError(self, "invalid octal escape: out of range " + self.state.buf)
285 if len(self.state.buf) == 3:
286 octaldone()
288 def octaldone():
289 d = chr(self.state.val)
290 self.state.parent.buf += d
291 self.pop_state()
293 if self.at_eof():
294 raise ParseError(self, "unexpected EOF")
295 elif '0' <= c <= '7':
296 octaldigit(c)
297 elif len(self.state.buf):
298 octaldone()
299 self.input_char(c)
301 def state_hex(self, c):
302 def hexdone():
303 d = chr(self.state.val)
304 self.state.parent.buf += d
305 self.pop_state()
307 def hexdigit(c, d):
308 self.state.val *= 16
309 self.state.val += ord(c) - ord(d)
310 self.state.buf += c
311 if self.state.val < 0 or self.state.val > 0xff:
312 raise ParseError(self, "invalid hex escape: out of range " + self.state.buf)
313 if len(self.state.buf) == 2:
314 hexdone()
316 if self.at_eof():
317 raise ParseError(self, "unexpected EOF")
318 elif '0' <= c <= '9':
319 hexdigit(c, '0')
320 elif 'A' <= c <= 'F':
321 hexdigit(c, 'A')
322 elif 'a' <= c <= 'f':
323 hexdigit(c, 'a')
324 elif len(buf):
325 hexdone()
326 self.input_char(c)
328 def begin_atom(self, c):
329 self.push_state(self.state_atom)
330 self.state.buf = c
332 def end_atom(self):
333 val = self.state.buf
334 self.state.parent.val.append(val)
335 self.pop_state()
337 def state_atom(self, c):
338 if self.at_eof():
339 self.end_atom()
340 elif (self.is_separator(c) or
341 self.in_space_class(c) or
342 self.in_comment_class(c)):
343 self.end_atom()
344 self.input_char(c)
345 else:
346 self.state.buf += c
348 def begin_list(self, c):
349 self.push_state(self.state_list)
351 def end_list(self):
352 val = self.state.val
353 self.state.parent.val.append(val)
354 self.pop_state()
356 def state_list(self, c):
357 if self.at_eof():
358 raise ParseError(self, "unexpected EOF")
359 elif c == k_list_close:
360 self.end_list()
361 else:
362 self.state_start(c)
364 def atomp(sxpr):
365 """Check if an sxpr is an atom.
366 """
367 if sxpr.isalnum() or sxpr == '@':
368 return 1
369 for c in sxpr:
370 if c in string.whitespace: return 0
371 if c in '"\'\\(){}[]<>$#&%^': return 0
372 if c in string.ascii_letters: continue
373 if c in string.digits: continue
374 if c in '.-_:/~': continue
375 return 0
376 return 1
378 def show(sxpr, out=sys.stdout):
379 """Print an sxpr in bracketed (lisp-style) syntax.
380 """
381 if isinstance(sxpr, types.ListType):
382 out.write(k_list_open)
383 i = 0
384 for x in sxpr:
385 if i: out.write(' ')
386 show(x, out)
387 i += 1
388 out.write(k_list_close)
389 elif isinstance(sxpr, (types.IntType, types.FloatType)):
390 out.write(str(sxpr))
391 elif isinstance(sxpr, types.StringType) and atomp(sxpr):
392 out.write(sxpr)
393 else:
394 out.write(repr(str(sxpr)))
396 def show_xml(sxpr, out=sys.stdout):
397 """Print an sxpr in XML syntax.
398 """
399 if isinstance(sxpr, types.ListType):
400 element = name(sxpr)
401 out.write('<%s' % element)
402 for attr in attributes(sxpr):
403 out.write(' %s=%s' % (attr[0], attr[1]))
404 out.write('>')
405 i = 0
406 for x in children(sxpr):
407 if i: out.write(' ')
408 show_xml(x, out)
409 i += 1
410 out.write('</%s>' % element)
411 elif isinstance(sxpr, types.StringType) and atomp(sxpr):
412 out.write(sxpr)
413 else:
414 out.write(str(sxpr))
416 def elementp(sxpr, elt=None):
417 """Check if an sxpr is an element of the given type.
419 sxpr sxpr
420 elt element type
421 """
422 return (isinstance(sxpr, types.ListType)
423 and len(sxpr)
424 and (None == elt or sxpr[0] == elt))
426 def name(sxpr):
427 """Get the element name of an sxpr.
428 If the sxpr is not an element (i.e. it's an atomic value) its name
429 is None.
431 sxpr
433 returns name (None if not an element).
434 """
435 val = None
436 if isinstance(sxpr, types.StringType):
437 val = sxpr
438 elif isinstance(sxpr, types.ListType) and len(sxpr):
439 val = sxpr[0]
440 return val
442 def attributes(sxpr):
443 """Get the attribute list of an sxpr.
445 sxpr
447 returns attribute list
448 """
449 val = []
450 if isinstance(sxpr, types.ListType) and len(sxpr) > 1:
451 attr = sxpr[1]
452 if elementp(attr, k_attr_open):
453 val = attr[1:]
454 return val
456 def attribute(sxpr, key, val=None):
457 """Get an attribute of an sxpr.
459 sxpr sxpr
460 key attribute key
461 val default value (default None)
463 returns attribute value
464 """
465 for x in attributes(sxpr):
466 if x[0] == key:
467 val = x[1]
468 break
469 return val
471 def children(sxpr, elt=None):
472 """Get children of an sxpr.
474 sxpr sxpr
475 elt optional element type to filter by
477 returns children (filtered by elt if specified)
478 """
479 val = []
480 if isinstance(sxpr, types.ListType) and len(sxpr) > 1:
481 i = 1
482 x = sxpr[i]
483 if elementp(x, k_attr_open):
484 i += 1
485 val = sxpr[i : ]
486 if elt:
487 def iselt(x):
488 return elementp(x, elt)
489 val = filter(iselt, val)
490 return val
492 def child(sxpr, elt, val=None):
493 """Get the first child of the given element type.
495 sxpr sxpr
496 elt element type
497 val default value
498 """
499 for x in children(sxpr):
500 if elementp(x, elt):
501 val = x
502 break
503 return val
505 def child_at(sxpr, index, val=None):
506 """Get the child at the given index (zero-based).
508 sxpr sxpr
509 index index
510 val default value
511 """
512 kids = children(sxpr)
513 if len(kids) > index:
514 val = kids[index]
515 return val
517 def child0(sxpr, val=None):
518 """Get the zeroth child.
519 """
520 return child_at(sxpr, 0, val)
522 def child1(sxpr, val=None):
523 """Get the first child.
524 """
525 return child_at(sxpr, 1, val)
527 def child2(sxpr, val=None):
528 """Get the second child.
529 """
530 return child_at(sxpr, 2, val)
532 def child3(sxpr, val=None):
533 """Get the third child.
534 """
535 return child_at(sxpr, 3, val)
537 def child4(sxpr, val=None):
538 """Get the fourth child.
539 """
540 return child_at(sxpr, 4, val)
542 def child_value(sxpr, elt, val=None):
543 """Get the value of the first child of the given element type.
544 Assumes the child has an atomic value.
546 sxpr sxpr
547 elt element type
548 val default value
549 """
550 kid = child(sxpr, elt)
551 if kid:
552 val = child_at(kid, 0, val)
553 return val
555 def has_id(sxpr, id):
556 """Test if an s-expression has a given id.
557 """
558 return attribute(sxpr, 'id') == id
560 def with_id(sxpr, id, val=None):
561 """Find the first s-expression with a given id, at any depth.
563 sxpr s-exp or list
564 id id
565 val value if not found (default None)
567 return s-exp or val
568 """
569 if isinstance(sxpr, types.ListType):
570 for n in sxpr:
571 if has_id(n, id):
572 val = n
573 break
574 v = with_id(n, id)
575 if v is None: continue
576 val = v
577 break
578 return val
580 def child_with_id(sxpr, id, val=None):
581 """Find the first child with a given id.
583 sxpr s-exp or list
584 id id
585 val value if not found (default None)
587 return s-exp or val
588 """
589 if isinstance(sxpr, types.ListType):
590 for n in sxpr:
591 if has_id(n, id):
592 val = n
593 break
594 return val
596 def elements(sxpr, ctxt=None):
597 """Generate elements (at any depth).
598 Visit elements in pre-order.
599 Values generated are (node, context)
600 The context is None if there is no parent, otherwise
601 (index, parent, context) where index is the node's index w.r.t its parent,
602 and context is the parent's context.
604 sxpr s-exp
606 returns generator
607 """
608 yield (sxpr, ctxt)
609 i = 0
610 for n in children(sxpr):
611 if isinstance(n, types.ListType):
612 # Calling elements() recursively does not generate recursively,
613 # it just returns a generator object. So we must iterate over it.
614 for v in elements(n, (i, sxpr, ctxt)):
615 yield v
616 i += 1
618 def merge(s1, s2):
619 """Merge sxprs s1 and s2.
620 Returns an sxpr containing all the fields from s1 and s2, with
621 entries in s1 overriding s2. Recursively merges fields.
623 @param s1 sxpr
624 @param s2 sxpr
625 @return merged sxpr
626 """
627 if s1 is None:
628 val = s2
629 elif s2 is None:
630 val = s1
631 elif elementp(s1):
632 name1 = name(s1)
633 (m1, v1) = child_map(s1)
634 (m2, v2) = child_map(s2)
635 val = [name1]
636 for (k1, f1) in m1.items():
637 merge_list(val, f1, m2.get(k1, []))
638 for (k2, f2) in m2.items():
639 if k2 in m1: continue
640 val.extend(f2)
641 val.extend(v1)
642 else:
643 val = s1
644 return val
646 def merge_list(sxpr, l1, l2):
647 """Merge element lists l1 and l2 into sxpr.
648 The lists l1 and l2 are all element with the same name.
649 Values from l1 are merged with values in l2 and stored in sxpr.
650 If one list is longer than the other the excess values are used
651 as they are.
653 @param sxpr to merge into
654 @param l1 sxpr list
655 @param l2 sxpr list
656 @return modified sxpr
657 """
658 n1 = len(l1)
659 n2 = len(l2)
660 nmin = min(n1, n2)
661 for i in range(0, nmin):
662 sxpr.append(merge(l1[i], l2[i]))
663 for i in range(nmin, n1):
664 sxpr.append(l1[i])
665 for i in range(nmin, n2):
666 sxpr.append(l2[i])
667 return sxpr
669 def child_map(sxpr):
670 """Get a dict of the elements in sxpr and a list of its values.
671 The dict maps element name to the list of elements with that name,
672 and the list is the non-element children.
674 @param sxpr
675 @return (dict, list)
676 """
677 m = {}
678 v = []
679 for x in children(sxpr):
680 if elementp(x):
681 n = name(x)
682 l = m.get(n, [])
683 l.append(x)
684 m[n] = l
685 else:
686 v.append(x)
687 return (m, v)
689 def to_string(sxpr):
690 """Convert an sxpr to a string.
692 sxpr sxpr
693 returns string
694 """
695 io = StringIO()
696 show(sxpr, io)
697 io.seek(0)
698 val = io.getvalue()
699 io.close()
700 return val
702 def from_string(s):
703 """Create an sxpr by parsing a string.
705 s string
706 returns sxpr
707 """
708 if s == '':
709 return []
711 io = StringIO(s)
712 vals = parse(io)
713 if vals is []:
714 return None
715 else:
716 return vals[0]
719 def all_from_string(s):
720 """Create an sxpr list by parsing a string.
722 s string
723 returns sxpr list
724 """
725 io = StringIO(s)
726 vals = parse(io)
727 return vals
729 def parse(io):
730 """Completely parse all input from 'io'.
732 io input file object
733 returns list of values, None if incomplete
734 raises ParseError on parse error
735 """
736 pin = Parser()
737 while 1:
738 buf = io.readline()
739 pin.input(buf)
740 if len(buf) == 0:
741 break
742 if pin.ready():
743 val = pin.get_all()
744 else:
745 val = None
746 return val
749 if __name__ == '__main__':
750 print ">main"
751 pin = Parser()
752 while 1:
753 buf = sys.stdin.read(1024)
754 #buf = sys.stdin.readline()
755 pin.input(buf)
756 while pin.ready():
757 val = pin.get_val()
758 print
759 print '****** val=', val
760 if len(buf) == 0:
761 break