Source code for obspy.core.util.xmlwrapper

# -*- coding: utf-8 -*-
import StringIO
import warnings
try:
    # try using lxml as it is faster
    from lxml import etree
    from lxml.etree import register_namespace
    LXML_ETREE = True
except ImportError:
    LXML_ETREE = False
    from xml.etree import ElementTree as etree  # @UnusedImport
    try:
        from xml.etree import register_namespace  # @UnusedImport
    except ImportError:
        def register_namespace(prefix, uri):
            etree._namespace_map[uri] = prefix
import re


[docs]def tostring(element, xml_declaration=True, encoding="utf-8", pretty_print=False, __etree=etree): """ Generates a string representation of an XML element, including all subelements. :param element: Element instance. :type xml_declaration: bool, optional :param xml_declaration: Adds a XML declaration.. Defaults to ``True``. :type encoding: str, optional :param encoding: output encoding. Defaults to ''"utf-8"''. Note that changing the encoding to a non UTF-8 compatible encoding will enable a declaration by default. :type pretty_print: bool, optional :param pretty_print: Enables formatted XML. Defaults to ``False``. :return: Encoded string containing the XML data. """ try: # use lxml return __etree.tostring(element, xml_declaration=xml_declaration, method="xml", encoding=encoding, pretty_print=pretty_print) except: pass # use xml out = __etree.tostring(element, encoding=encoding) if xml_declaration: out = "<?xml version='1.0' encoding='%s'?>\n%s" % (encoding, out) return out
[docs]class XMLParser: """ Unified wrapper around Python's default xml module and the lxml module. """
[docs] def __init__(self, xml_doc, namespace=None): """ Initializes a XMLPaser object. :type xml_doc: str, filename, file-like object, parsed XML document :param xml_doc: XML document :type namespace: str, optional :param namespace: Document-wide default namespace. Defaults to ``''``. """ if isinstance(xml_doc, basestring): # some string - check if it starts with <?xml if xml_doc.strip()[0:5].upper().startswith('<?XML'): xml_doc = StringIO.StringIO(xml_doc) # parse XML file self.xml_doc = etree.parse(xml_doc) elif hasattr(xml_doc, 'seek'): # some file-based content xml_doc.seek(0) self.xml_doc = etree.parse(xml_doc) else: self.xml_doc = xml_doc self.xml_root = self.xml_doc.getroot() self.namespace = namespace or self._getRootNamespace()
[docs] def xpath2obj(self, xpath, xml_doc=None, convert_to=str, namespace=None): """ Converts XPath-like query into an object given by convert_to. Only the first element will be converted if multiple elements are returned from the XPath query. :type xpath: str :param xpath: XPath string, e.g. ``*/event``. :type xml_doc: Element or ElementTree, optional :param xml_doc: XML document to query. Defaults to parsed XML document. :type convert_to: any type :param convert_to: Type to convert to. Defaults to ``str``. :type namespace: str, optional :param namespace: Namespace used by query. Defaults to document-wide namespace set at root. """ try: text = self.xpath(xpath, xml_doc, namespace)[0].text except IndexError: return None if text is None: return None # handle empty nodes if text == '': return None # handle bool extra if convert_to == bool: if text in ["true", "1"]: return True elif text in ["false", "0"]: return False return None # try to convert into requested type try: return convert_to(text) except: msg = "Could not convert %s to type %s. Returning None." warnings.warn(msg % (text, convert_to)) return None
[docs] def xpath(self, xpath, xml_doc=None, namespace=None): """ Very limited XPath-like query. .. note:: This method does not support the full XPath syntax! :type xpath: str :param xpath: XPath string, e.g. ``*/event``. :type xml_doc: Element or ElementTree, optional :param xml_doc: XML document to query. Defaults to parsed XML document. :type namespace: str, optional :param namespace: Namespace used by query. Defaults to document-wide namespace set at root. :return: List of elements. """ if xml_doc is None: xml_doc = self.xml_doc if namespace is None: namespace = self.namespace # namespace handling in lxml as well xml is very limited # preserve prefix if xpath.startswith('//'): prefix = '//' xpath = xpath[1:] elif xpath.startswith('/'): prefix = '' xpath = xpath[1:] else: prefix = '' # add namespace to each node parts = xpath.split('/') xpath = '' if namespace: for part in parts: if part != '*': xpath += "/{%s}%s" % (namespace, part) else: xpath += "/%s" % (part) xpath = xpath[1:] else: xpath = '/'.join(parts) # restore prefix xpath = prefix + xpath # lxml try: return xml_doc.xpath(xpath) except: pass # emulate supports for index selectors (only last element)! selector = re.search('(.*)\[(\d+)\]$', xpath) if not selector: return xml_doc.findall(xpath) xpath = selector.groups()[0] list_of_elements = xml_doc.findall(xpath) try: return [list_of_elements[int(selector.groups()[1]) - 1]] except IndexError: return []
[docs] def _getRootNamespace(self): return self._getElementNamespace()
[docs] def _getElementNamespace(self, element=None): if element is None: element = self.xml_root tag = element.tag if tag.startswith('{') and '}' in tag: return tag[1:].split('}')[0] return ''
[docs] def _getFirstChildNamespace(self, element=None): if element is None: element = self.xml_root try: element = element[0] except: return None return self._getElementNamespace(element)
if __name__ == '__main__': import doctest doctest.testmod(exclude_empty=True)