 e109e2e3e8
			
		
	
	
		e109e2e3e8
		
			
		
	
	
	
	
		
			
			* Debug: remove lxml dependency, pickle SVD tree for faster processing. * Debug: remove unused import in svd.py
		
			
				
	
	
		
			587 lines
		
	
	
		
			19 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			587 lines
		
	
	
		
			19 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| #!/usr/bin/env python
 | |
| "Makes working with XML feel like you are working with JSON"
 | |
| 
 | |
| try:
 | |
|     from defusedexpat import pyexpat as expat
 | |
| except ImportError:
 | |
|     from xml.parsers import expat
 | |
| 
 | |
| from xml.sax.saxutils import XMLGenerator
 | |
| from xml.sax.xmlreader import AttributesImpl
 | |
| 
 | |
| try:  # pragma no cover
 | |
|     from cStringIO import StringIO
 | |
| except ImportError:  # pragma no cover
 | |
|     try:
 | |
|         from StringIO import StringIO
 | |
|     except ImportError:
 | |
|         from io import StringIO
 | |
| 
 | |
| from inspect import isgenerator
 | |
| 
 | |
| 
 | |
| class ObjectDict(dict):
 | |
|     def __getattr__(self, name):
 | |
|         if name in self:
 | |
|             return self[name]
 | |
|         else:
 | |
|             raise AttributeError("No such attribute: " + name)
 | |
| 
 | |
| 
 | |
| try:  # pragma no cover
 | |
|     _basestring = basestring
 | |
| except NameError:  # pragma no cover
 | |
|     _basestring = str
 | |
| try:  # pragma no cover
 | |
|     _unicode = unicode
 | |
| except NameError:  # pragma no cover
 | |
|     _unicode = str
 | |
| 
 | |
| __author__ = "Martin Blech"
 | |
| __version__ = "0.12.0"
 | |
| __license__ = "MIT"
 | |
| 
 | |
| 
 | |
| class ParsingInterrupted(Exception):
 | |
|     pass
 | |
| 
 | |
| 
 | |
| class _DictSAXHandler(object):
 | |
|     def __init__(
 | |
|         self,
 | |
|         item_depth=0,
 | |
|         item_callback=lambda *args: True,
 | |
|         xml_attribs=True,
 | |
|         attr_prefix="@",
 | |
|         cdata_key="#text",
 | |
|         force_cdata=False,
 | |
|         cdata_separator="",
 | |
|         postprocessor=None,
 | |
|         dict_constructor=ObjectDict,
 | |
|         strip_whitespace=True,
 | |
|         namespace_separator=":",
 | |
|         namespaces=None,
 | |
|         force_list=None,
 | |
|         comment_key="#comment",
 | |
|     ):
 | |
|         self.path = []
 | |
|         self.stack = []
 | |
|         self.data = []
 | |
|         self.item = None
 | |
|         self.item_depth = item_depth
 | |
|         self.xml_attribs = xml_attribs
 | |
|         self.item_callback = item_callback
 | |
|         self.attr_prefix = attr_prefix
 | |
|         self.cdata_key = cdata_key
 | |
|         self.force_cdata = force_cdata
 | |
|         self.cdata_separator = cdata_separator
 | |
|         self.postprocessor = postprocessor
 | |
|         self.dict_constructor = dict_constructor
 | |
|         self.strip_whitespace = strip_whitespace
 | |
|         self.namespace_separator = namespace_separator
 | |
|         self.namespaces = namespaces
 | |
|         self.namespace_declarations = ObjectDict()
 | |
|         self.force_list = force_list
 | |
|         self.comment_key = comment_key
 | |
| 
 | |
|     def _build_name(self, full_name):
 | |
|         if self.namespaces is None:
 | |
|             return full_name
 | |
|         i = full_name.rfind(self.namespace_separator)
 | |
|         if i == -1:
 | |
|             return full_name
 | |
|         namespace, name = full_name[:i], full_name[i + 1 :]
 | |
|         try:
 | |
|             short_namespace = self.namespaces[namespace]
 | |
|         except KeyError:
 | |
|             short_namespace = namespace
 | |
|         if not short_namespace:
 | |
|             return name
 | |
|         else:
 | |
|             return self.namespace_separator.join((short_namespace, name))
 | |
| 
 | |
|     def _attrs_to_dict(self, attrs):
 | |
|         if isinstance(attrs, dict):
 | |
|             return attrs
 | |
|         return self.dict_constructor(zip(attrs[0::2], attrs[1::2]))
 | |
| 
 | |
|     def startNamespaceDecl(self, prefix, uri):
 | |
|         self.namespace_declarations[prefix or ""] = uri
 | |
| 
 | |
|     def startElement(self, full_name, attrs):
 | |
|         name = self._build_name(full_name)
 | |
|         attrs = self._attrs_to_dict(attrs)
 | |
|         if attrs and self.namespace_declarations:
 | |
|             attrs["xmlns"] = self.namespace_declarations
 | |
|             self.namespace_declarations = ObjectDict()
 | |
|         self.path.append((name, attrs or None))
 | |
|         if len(self.path) > self.item_depth:
 | |
|             self.stack.append((self.item, self.data))
 | |
|             if self.xml_attribs:
 | |
|                 attr_entries = []
 | |
|                 for key, value in attrs.items():
 | |
|                     key = self.attr_prefix + self._build_name(key)
 | |
|                     if self.postprocessor:
 | |
|                         entry = self.postprocessor(self.path, key, value)
 | |
|                     else:
 | |
|                         entry = (key, value)
 | |
|                     if entry:
 | |
|                         attr_entries.append(entry)
 | |
|                 attrs = self.dict_constructor(attr_entries)
 | |
|             else:
 | |
|                 attrs = None
 | |
|             self.item = attrs or None
 | |
|             self.data = []
 | |
| 
 | |
|     def endElement(self, full_name):
 | |
|         name = self._build_name(full_name)
 | |
|         if len(self.path) == self.item_depth:
 | |
|             item = self.item
 | |
|             if item is None:
 | |
|                 item = None if not self.data else self.cdata_separator.join(self.data)
 | |
| 
 | |
|             should_continue = self.item_callback(self.path, item)
 | |
|             if not should_continue:
 | |
|                 raise ParsingInterrupted()
 | |
|         if len(self.stack):
 | |
|             data = None if not self.data else self.cdata_separator.join(self.data)
 | |
|             item = self.item
 | |
|             self.item, self.data = self.stack.pop()
 | |
|             if self.strip_whitespace and data:
 | |
|                 data = data.strip() or None
 | |
|             if data and self.force_cdata and item is None:
 | |
|                 item = self.dict_constructor()
 | |
|             if item is not None:
 | |
|                 if data:
 | |
|                     self.push_data(item, self.cdata_key, data)
 | |
|                 self.item = self.push_data(self.item, name, item)
 | |
|             else:
 | |
|                 self.item = self.push_data(self.item, name, data)
 | |
|         else:
 | |
|             self.item = None
 | |
|             self.data = []
 | |
|         self.path.pop()
 | |
| 
 | |
|     def characters(self, data):
 | |
|         if not self.data:
 | |
|             self.data = [data]
 | |
|         else:
 | |
|             self.data.append(data)
 | |
| 
 | |
|     def comments(self, data):
 | |
|         if self.strip_whitespace:
 | |
|             data = data.strip()
 | |
|         self.item = self.push_data(self.item, self.comment_key, data)
 | |
| 
 | |
|     def push_data(self, item, key, data):
 | |
|         if self.postprocessor is not None:
 | |
|             result = self.postprocessor(self.path, key, data)
 | |
|             if result is None:
 | |
|                 return item
 | |
|             key, data = result
 | |
|         if item is None:
 | |
|             item = self.dict_constructor()
 | |
|         try:
 | |
|             value = item[key]
 | |
|             if isinstance(value, list):
 | |
|                 value.append(data)
 | |
|             else:
 | |
|                 item[key] = [value, data]
 | |
|         except KeyError:
 | |
|             if self._should_force_list(key, data):
 | |
|                 item[key] = [data]
 | |
|             else:
 | |
|                 item[key] = data
 | |
|         return item
 | |
| 
 | |
|     def _should_force_list(self, key, value):
 | |
|         if not self.force_list:
 | |
|             return False
 | |
|         if isinstance(self.force_list, bool):
 | |
|             return self.force_list
 | |
|         try:
 | |
|             return key in self.force_list
 | |
|         except TypeError:
 | |
|             return self.force_list(self.path[:-1], key, value)
 | |
| 
 | |
| 
 | |
| def parse(
 | |
|     xml_input,
 | |
|     encoding=None,
 | |
|     expat=expat,
 | |
|     process_namespaces=False,
 | |
|     namespace_separator=":",
 | |
|     disable_entities=True,
 | |
|     process_comments=False,
 | |
|     **kwargs
 | |
| ):
 | |
|     """Parse the given XML input and convert it into a dictionary.
 | |
| 
 | |
|     `xml_input` can either be a `string`, a file-like object, or a generator of strings.
 | |
| 
 | |
|     If `xml_attribs` is `True`, element attributes are put in the dictionary
 | |
|     among regular child elements, using `@` as a prefix to avoid collisions. If
 | |
|     set to `False`, they are just ignored.
 | |
| 
 | |
|     Simple example::
 | |
| 
 | |
|         >>> import xmltodict
 | |
|         >>> doc = xmltodict.parse(\"\"\"
 | |
|         ... <a prop="x">
 | |
|         ...   <b>1</b>
 | |
|         ...   <b>2</b>
 | |
|         ... </a>
 | |
|         ... \"\"\")
 | |
|         >>> doc['a']['@prop']
 | |
|         u'x'
 | |
|         >>> doc['a']['b']
 | |
|         [u'1', u'2']
 | |
| 
 | |
|     If `item_depth` is `0`, the function returns a dictionary for the root
 | |
|     element (default behavior). Otherwise, it calls `item_callback` every time
 | |
|     an item at the specified depth is found and returns `None` in the end
 | |
|     (streaming mode).
 | |
| 
 | |
|     The callback function receives two parameters: the `path` from the document
 | |
|     root to the item (name-attribs pairs), and the `item` (dict). If the
 | |
|     callback's return value is false-ish, parsing will be stopped with the
 | |
|     :class:`ParsingInterrupted` exception.
 | |
| 
 | |
|     Streaming example::
 | |
| 
 | |
|         >>> def handle(path, item):
 | |
|         ...     print('path:%s item:%s' % (path, item))
 | |
|         ...     return True
 | |
|         ...
 | |
|         >>> xmltodict.parse(\"\"\"
 | |
|         ... <a prop="x">
 | |
|         ...   <b>1</b>
 | |
|         ...   <b>2</b>
 | |
|         ... </a>\"\"\", item_depth=2, item_callback=handle)
 | |
|         path:[(u'a', {u'prop': u'x'}), (u'b', None)] item:1
 | |
|         path:[(u'a', {u'prop': u'x'}), (u'b', None)] item:2
 | |
| 
 | |
|     The optional argument `postprocessor` is a function that takes `path`,
 | |
|     `key` and `value` as positional arguments and returns a new `(key, value)`
 | |
|     pair where both `key` and `value` may have changed. Usage example::
 | |
| 
 | |
|         >>> def postprocessor(path, key, value):
 | |
|         ...     try:
 | |
|         ...         return key + ':int', int(value)
 | |
|         ...     except (ValueError, TypeError):
 | |
|         ...         return key, value
 | |
|         >>> xmltodict.parse('<a><b>1</b><b>2</b><b>x</b></a>',
 | |
|         ...                 postprocessor=postprocessor)
 | |
|         ObjectDict([(u'a', ObjectDict([(u'b:int', [1, 2]), (u'b', u'x')]))])
 | |
| 
 | |
|     You can pass an alternate version of `expat` (such as `defusedexpat`) by
 | |
|     using the `expat` parameter. E.g:
 | |
| 
 | |
|         >>> import defusedexpat
 | |
|         >>> xmltodict.parse('<a>hello</a>', expat=defusedexpat.pyexpat)
 | |
|         ObjectDict([(u'a', u'hello')])
 | |
| 
 | |
|     You can use the force_list argument to force lists to be created even
 | |
|     when there is only a single child of a given level of hierarchy. The
 | |
|     force_list argument is a tuple of keys. If the key for a given level
 | |
|     of hierarchy is in the force_list argument, that level of hierarchy
 | |
|     will have a list as a child (even if there is only one sub-element).
 | |
|     The index_keys operation takes precedence over this. This is applied
 | |
|     after any user-supplied postprocessor has already run.
 | |
| 
 | |
|         For example, given this input:
 | |
|         <servers>
 | |
|           <server>
 | |
|             <name>host1</name>
 | |
|             <os>Linux</os>
 | |
|             <interfaces>
 | |
|               <interface>
 | |
|                 <name>em0</name>
 | |
|                 <ip_address>10.0.0.1</ip_address>
 | |
|               </interface>
 | |
|             </interfaces>
 | |
|           </server>
 | |
|         </servers>
 | |
| 
 | |
|         If called with force_list=('interface',), it will produce
 | |
|         this dictionary:
 | |
|         {'servers':
 | |
|           {'server':
 | |
|             {'name': 'host1',
 | |
|              'os': 'Linux'},
 | |
|              'interfaces':
 | |
|               {'interface':
 | |
|                 [ {'name': 'em0', 'ip_address': '10.0.0.1' } ] } } }
 | |
| 
 | |
|         `force_list` can also be a callable that receives `path`, `key` and
 | |
|         `value`. This is helpful in cases where the logic that decides whether
 | |
|         a list should be forced is more complex.
 | |
| 
 | |
| 
 | |
|         If `process_comment` is `True` then comment will be added with comment_key
 | |
|         (default=`'#comment'`) to then tag which contains comment
 | |
| 
 | |
|             For example, given this input:
 | |
|             <a>
 | |
|               <b>
 | |
|                 <!-- b comment -->
 | |
|                 <c>
 | |
|                     <!-- c comment -->
 | |
|                     1
 | |
|                 </c>
 | |
|                 <d>2</d>
 | |
|               </b>
 | |
|             </a>
 | |
| 
 | |
|             If called with process_comment=True, it will produce
 | |
|             this dictionary:
 | |
|             'a': {
 | |
|                 'b': {
 | |
|                     '#comment': 'b comment',
 | |
|                     'c': {
 | |
| 
 | |
|                         '#comment': 'c comment',
 | |
|                         '#text': '1',
 | |
|                     },
 | |
|                     'd': '2',
 | |
|                 },
 | |
|             }
 | |
|     """
 | |
|     handler = _DictSAXHandler(namespace_separator=namespace_separator, **kwargs)
 | |
|     if isinstance(xml_input, _unicode):
 | |
|         if not encoding:
 | |
|             encoding = "utf-8"
 | |
|         xml_input = xml_input.encode(encoding)
 | |
|     if not process_namespaces:
 | |
|         namespace_separator = None
 | |
|     parser = expat.ParserCreate(encoding, namespace_separator)
 | |
|     try:
 | |
|         parser.ordered_attributes = True
 | |
|     except AttributeError:
 | |
|         # Jython's expat does not support ordered_attributes
 | |
|         pass
 | |
|     parser.StartNamespaceDeclHandler = handler.startNamespaceDecl
 | |
|     parser.StartElementHandler = handler.startElement
 | |
|     parser.EndElementHandler = handler.endElement
 | |
|     parser.CharacterDataHandler = handler.characters
 | |
|     if process_comments:
 | |
|         parser.CommentHandler = handler.comments
 | |
|     parser.buffer_text = True
 | |
|     if disable_entities:
 | |
|         try:
 | |
|             # Attempt to disable DTD in Jython's expat parser (Xerces-J).
 | |
|             feature = "http://apache.org/xml/features/disallow-doctype-decl"
 | |
|             parser._reader.setFeature(feature, True)
 | |
|         except AttributeError:
 | |
|             # For CPython / expat parser.
 | |
|             # Anything not handled ends up here and entities aren't expanded.
 | |
|             parser.DefaultHandler = lambda x: None
 | |
|             # Expects an integer return; zero means failure -> expat.ExpatError.
 | |
|             parser.ExternalEntityRefHandler = lambda *x: 1
 | |
|     if hasattr(xml_input, "read"):
 | |
|         parser.ParseFile(xml_input)
 | |
|     elif isgenerator(xml_input):
 | |
|         for chunk in xml_input:
 | |
|             parser.Parse(chunk, False)
 | |
|         parser.Parse(b"", True)
 | |
|     else:
 | |
|         parser.Parse(xml_input, True)
 | |
|     return handler.item
 | |
| 
 | |
| 
 | |
| def _process_namespace(name, namespaces, ns_sep=":", attr_prefix="@"):
 | |
|     if not namespaces:
 | |
|         return name
 | |
|     try:
 | |
|         ns, name = name.rsplit(ns_sep, 1)
 | |
|     except ValueError:
 | |
|         pass
 | |
|     else:
 | |
|         ns_res = namespaces.get(ns.strip(attr_prefix))
 | |
|         name = (
 | |
|             "{}{}{}{}".format(
 | |
|                 attr_prefix if ns.startswith(attr_prefix) else "", ns_res, ns_sep, name
 | |
|             )
 | |
|             if ns_res
 | |
|             else name
 | |
|         )
 | |
|     return name
 | |
| 
 | |
| 
 | |
| def _emit(
 | |
|     key,
 | |
|     value,
 | |
|     content_handler,
 | |
|     attr_prefix="@",
 | |
|     cdata_key="#text",
 | |
|     depth=0,
 | |
|     preprocessor=None,
 | |
|     pretty=False,
 | |
|     newl="\n",
 | |
|     indent="\t",
 | |
|     namespace_separator=":",
 | |
|     namespaces=None,
 | |
|     full_document=True,
 | |
|     expand_iter=None,
 | |
| ):
 | |
|     key = _process_namespace(key, namespaces, namespace_separator, attr_prefix)
 | |
|     if preprocessor is not None:
 | |
|         result = preprocessor(key, value)
 | |
|         if result is None:
 | |
|             return
 | |
|         key, value = result
 | |
|     if (
 | |
|         not hasattr(value, "__iter__")
 | |
|         or isinstance(value, _basestring)
 | |
|         or isinstance(value, dict)
 | |
|     ):
 | |
|         value = [value]
 | |
|     for index, v in enumerate(value):
 | |
|         if full_document and depth == 0 and index > 0:
 | |
|             raise ValueError("document with multiple roots")
 | |
|         if v is None:
 | |
|             v = ObjectDict()
 | |
|         elif isinstance(v, bool):
 | |
|             if v:
 | |
|                 v = _unicode("true")
 | |
|             else:
 | |
|                 v = _unicode("false")
 | |
|         elif not isinstance(v, dict):
 | |
|             if (
 | |
|                 expand_iter
 | |
|                 and hasattr(v, "__iter__")
 | |
|                 and not isinstance(v, _basestring)
 | |
|             ):
 | |
|                 v = ObjectDict(((expand_iter, v),))
 | |
|             else:
 | |
|                 v = _unicode(v)
 | |
|         if isinstance(v, _basestring):
 | |
|             v = ObjectDict(((cdata_key, v),))
 | |
|         cdata = None
 | |
|         attrs = ObjectDict()
 | |
|         children = []
 | |
|         for ik, iv in v.items():
 | |
|             if ik == cdata_key:
 | |
|                 cdata = iv
 | |
|                 continue
 | |
|             if ik.startswith(attr_prefix):
 | |
|                 ik = _process_namespace(
 | |
|                     ik, namespaces, namespace_separator, attr_prefix
 | |
|                 )
 | |
|                 if ik == "@xmlns" and isinstance(iv, dict):
 | |
|                     for k, v in iv.items():
 | |
|                         attr = "xmlns{}".format(":{}".format(k) if k else "")
 | |
|                         attrs[attr] = _unicode(v)
 | |
|                     continue
 | |
|                 if not isinstance(iv, _unicode):
 | |
|                     iv = _unicode(iv)
 | |
|                 attrs[ik[len(attr_prefix) :]] = iv
 | |
|                 continue
 | |
|             children.append((ik, iv))
 | |
|         if pretty:
 | |
|             content_handler.ignorableWhitespace(depth * indent)
 | |
|         content_handler.startElement(key, AttributesImpl(attrs))
 | |
|         if pretty and children:
 | |
|             content_handler.ignorableWhitespace(newl)
 | |
|         for child_key, child_value in children:
 | |
|             _emit(
 | |
|                 child_key,
 | |
|                 child_value,
 | |
|                 content_handler,
 | |
|                 attr_prefix,
 | |
|                 cdata_key,
 | |
|                 depth + 1,
 | |
|                 preprocessor,
 | |
|                 pretty,
 | |
|                 newl,
 | |
|                 indent,
 | |
|                 namespaces=namespaces,
 | |
|                 namespace_separator=namespace_separator,
 | |
|                 expand_iter=expand_iter,
 | |
|             )
 | |
|         if cdata is not None:
 | |
|             content_handler.characters(cdata)
 | |
|         if pretty and children:
 | |
|             content_handler.ignorableWhitespace(depth * indent)
 | |
|         content_handler.endElement(key)
 | |
|         if pretty and depth:
 | |
|             content_handler.ignorableWhitespace(newl)
 | |
| 
 | |
| 
 | |
| def unparse(
 | |
|     input_dict,
 | |
|     output=None,
 | |
|     encoding="utf-8",
 | |
|     full_document=True,
 | |
|     short_empty_elements=False,
 | |
|     **kwargs
 | |
| ):
 | |
|     """Emit an XML document for the given `input_dict` (reverse of `parse`).
 | |
| 
 | |
|     The resulting XML document is returned as a string, but if `output` (a
 | |
|     file-like object) is specified, it is written there instead.
 | |
| 
 | |
|     Dictionary keys prefixed with `attr_prefix` (default=`'@'`) are interpreted
 | |
|     as XML node attributes, whereas keys equal to `cdata_key`
 | |
|     (default=`'#text'`) are treated as character data.
 | |
| 
 | |
|     The `pretty` parameter (default=`False`) enables pretty-printing. In this
 | |
|     mode, lines are terminated with `'\n'` and indented with `'\t'`, but this
 | |
|     can be customized with the `newl` and `indent` parameters.
 | |
| 
 | |
|     """
 | |
|     if full_document and len(input_dict) != 1:
 | |
|         raise ValueError("Document must have exactly one root.")
 | |
|     must_return = False
 | |
|     if output is None:
 | |
|         output = StringIO()
 | |
|         must_return = True
 | |
|     if short_empty_elements:
 | |
|         content_handler = XMLGenerator(output, encoding, True)
 | |
|     else:
 | |
|         content_handler = XMLGenerator(output, encoding)
 | |
|     if full_document:
 | |
|         content_handler.startDocument()
 | |
|     for key, value in input_dict.items():
 | |
|         _emit(key, value, content_handler, full_document=full_document, **kwargs)
 | |
|     if full_document:
 | |
|         content_handler.endDocument()
 | |
|     if must_return:
 | |
|         value = output.getvalue()
 | |
|         try:  # pragma no cover
 | |
|             value = value.decode(encoding)
 | |
|         except AttributeError:  # pragma no cover
 | |
|             pass
 | |
|         return value
 | |
| 
 | |
| 
 | |
| if __name__ == "__main__":  # pragma: no cover
 | |
|     import sys
 | |
|     import marshal
 | |
| 
 | |
|     try:
 | |
|         stdin = sys.stdin.buffer
 | |
|         stdout = sys.stdout.buffer
 | |
|     except AttributeError:
 | |
|         stdin = sys.stdin
 | |
|         stdout = sys.stdout
 | |
| 
 | |
|     (item_depth,) = sys.argv[1:]
 | |
|     item_depth = int(item_depth)
 | |
| 
 | |
|     def handle_item(path, item):
 | |
|         marshal.dump((path, item), stdout)
 | |
|         return True
 | |
| 
 | |
|     try:
 | |
|         root = parse(
 | |
|             stdin,
 | |
|             item_depth=item_depth,
 | |
|             item_callback=handle_item,
 | |
|             dict_constructor=dict,
 | |
|         )
 | |
|         if item_depth == 0:
 | |
|             handle_item([], root)
 | |
|     except KeyboardInterrupt:
 | |
|         pass
 |