2011-11-22 23:25:02 +00:00
|
|
|
# -*- coding: utf-8 -*-
|
2010-08-06 00:26:41 +00:00
|
|
|
"""
|
2011-11-22 23:25:02 +00:00
|
|
|
sleekxmpp.xmlstream.tostring
|
|
|
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
2010-08-04 18:41:37 +00:00
|
|
|
|
2011-11-22 23:25:02 +00:00
|
|
|
This module converts XML objects into Unicode strings and
|
|
|
|
intelligently includes namespaces only when necessary to
|
|
|
|
keep the output readable.
|
|
|
|
|
|
|
|
Part of SleekXMPP: The Sleek XMPP Library
|
|
|
|
|
|
|
|
:copyright: (c) 2011 Nathanael C. Fritz
|
|
|
|
:license: MIT, see LICENSE for more details
|
2010-08-06 00:26:41 +00:00
|
|
|
"""
|
|
|
|
|
2012-06-05 23:54:26 +00:00
|
|
|
from __future__ import unicode_literals
|
|
|
|
|
2011-08-04 18:41:36 +00:00
|
|
|
import sys
|
|
|
|
|
|
|
|
if sys.version_info < (3, 0):
|
|
|
|
import types
|
|
|
|
|
2010-08-06 00:26:41 +00:00
|
|
|
|
2012-06-05 23:54:26 +00:00
|
|
|
XML_NS = 'http://www.w3.org/XML/1998/namespace'
|
|
|
|
|
|
|
|
|
2013-01-24 10:43:46 +00:00
|
|
|
def tostring(xml=None, xmlns='', stream=None, outbuffer='',
|
|
|
|
top_level=False, open_only=False, namespaces=None):
|
2011-11-22 23:25:02 +00:00
|
|
|
"""Serialize an XML object to a Unicode string.
|
|
|
|
|
2012-09-25 03:59:51 +00:00
|
|
|
If an outer xmlns is provided using ``xmlns``, then the current element's
|
|
|
|
namespace will not be included if it matches the outer namespace. An
|
|
|
|
exception is made for elements that have an attached stream, and appear
|
|
|
|
at the stream root.
|
2011-11-22 23:25:02 +00:00
|
|
|
|
2011-11-23 00:25:33 +00:00
|
|
|
:param XML xml: The XML object to serialize.
|
2011-11-22 23:25:02 +00:00
|
|
|
:param string xmlns: Optional namespace of an element wrapping the XML
|
|
|
|
object.
|
|
|
|
:param stream: The XML stream that generated the XML object.
|
|
|
|
:param string outbuffer: Optional buffer for storing serializations
|
|
|
|
during recursive calls.
|
|
|
|
:param bool top_level: Indicates that the element is the outermost
|
|
|
|
element.
|
2013-01-24 10:43:46 +00:00
|
|
|
:param set namespaces: Track which namespaces are in active use so
|
|
|
|
that new ones can be declared when needed.
|
2011-11-23 00:25:33 +00:00
|
|
|
|
|
|
|
:type xml: :py:class:`~xml.etree.ElementTree.Element`
|
2011-11-22 23:25:02 +00:00
|
|
|
:type stream: :class:`~sleekxmpp.xmlstream.xmlstream.XMLStream`
|
|
|
|
|
|
|
|
:rtype: Unicode string
|
2010-08-06 00:26:41 +00:00
|
|
|
"""
|
|
|
|
# Add previous results to the start of the output.
|
|
|
|
output = [outbuffer]
|
|
|
|
|
|
|
|
# Extract the element's tag name.
|
|
|
|
tag_name = xml.tag.split('}', 1)[-1]
|
|
|
|
|
|
|
|
# Extract the element's namespace if it is defined.
|
|
|
|
if '}' in xml.tag:
|
|
|
|
tag_xmlns = xml.tag.split('}', 1)[0][1:]
|
|
|
|
else:
|
|
|
|
tag_xmlns = ''
|
|
|
|
|
2011-01-27 23:05:05 +00:00
|
|
|
default_ns = ''
|
|
|
|
stream_ns = ''
|
2012-07-24 10:25:55 +00:00
|
|
|
use_cdata = False
|
2013-01-24 10:43:46 +00:00
|
|
|
|
2011-01-27 23:05:05 +00:00
|
|
|
if stream:
|
|
|
|
default_ns = stream.default_ns
|
|
|
|
stream_ns = stream.stream_ns
|
2012-07-24 10:25:55 +00:00
|
|
|
use_cdata = stream.use_cdata
|
2011-01-27 23:05:05 +00:00
|
|
|
|
2010-08-06 00:26:41 +00:00
|
|
|
# Output the tag name and derived namespace of the element.
|
|
|
|
namespace = ''
|
2012-10-16 05:22:07 +00:00
|
|
|
if tag_xmlns:
|
|
|
|
if top_level and tag_xmlns not in [default_ns, xmlns, stream_ns] \
|
|
|
|
or not top_level and tag_xmlns != xmlns:
|
|
|
|
namespace = ' xmlns="%s"' % tag_xmlns
|
2011-01-27 23:05:05 +00:00
|
|
|
if stream and tag_xmlns in stream.namespace_map:
|
|
|
|
mapped_namespace = stream.namespace_map[tag_xmlns]
|
|
|
|
if mapped_namespace:
|
|
|
|
tag_name = "%s:%s" % (mapped_namespace, tag_name)
|
2010-08-06 00:26:41 +00:00
|
|
|
output.append("<%s" % tag_name)
|
|
|
|
output.append(namespace)
|
|
|
|
|
|
|
|
# Output escaped attribute values.
|
2013-01-24 10:43:46 +00:00
|
|
|
new_namespaces = set()
|
2010-08-06 00:26:41 +00:00
|
|
|
for attrib, value in xml.attrib.items():
|
2012-07-24 10:25:55 +00:00
|
|
|
value = escape(value, use_cdata)
|
2010-12-08 04:07:40 +00:00
|
|
|
if '}' not in attrib:
|
2010-08-06 00:26:41 +00:00
|
|
|
output.append(' %s="%s"' % (attrib, value))
|
2010-12-08 04:07:40 +00:00
|
|
|
else:
|
|
|
|
attrib_ns = attrib.split('}')[0][1:]
|
|
|
|
attrib = attrib.split('}')[1]
|
2013-01-26 23:10:06 +00:00
|
|
|
if attrib_ns == XML_NS:
|
|
|
|
output.append(' xml:%s="%s"' % (attrib, value))
|
|
|
|
elif stream and attrib_ns in stream.namespace_map:
|
2010-12-08 04:07:40 +00:00
|
|
|
mapped_ns = stream.namespace_map[attrib_ns]
|
|
|
|
if mapped_ns:
|
2013-01-24 10:43:46 +00:00
|
|
|
if namespaces is None:
|
|
|
|
namespaces = set()
|
|
|
|
if attrib_ns not in namespaces:
|
|
|
|
namespaces.add(attrib_ns)
|
|
|
|
new_namespaces.add(attrib_ns)
|
|
|
|
output.append(' xmlns:%s="%s"' % (
|
|
|
|
mapped_ns, attrib_ns))
|
|
|
|
output.append(' %s:%s="%s"' % (
|
|
|
|
mapped_ns, attrib, value))
|
2012-06-05 23:54:26 +00:00
|
|
|
|
|
|
|
if open_only:
|
|
|
|
# Only output the opening tag, regardless of content.
|
|
|
|
output.append(">")
|
|
|
|
return ''.join(output)
|
2010-08-06 00:26:41 +00:00
|
|
|
|
|
|
|
if len(xml) or xml.text:
|
|
|
|
# If there are additional child elements to serialize.
|
|
|
|
output.append(">")
|
|
|
|
if xml.text:
|
2012-07-24 10:25:55 +00:00
|
|
|
output.append(escape(xml.text, use_cdata))
|
2010-08-06 00:26:41 +00:00
|
|
|
if len(xml):
|
2012-06-19 16:47:31 +00:00
|
|
|
for child in xml:
|
2013-01-24 10:43:46 +00:00
|
|
|
output.append(tostring(child, tag_xmlns, stream,
|
|
|
|
namespaces=namespaces))
|
2010-08-06 00:26:41 +00:00
|
|
|
output.append("</%s>" % tag_name)
|
|
|
|
elif xml.text:
|
|
|
|
# If we only have text content.
|
2012-07-24 10:25:55 +00:00
|
|
|
output.append(">%s</%s>" % (escape(xml.text, use_cdata), tag_name))
|
2010-08-06 00:26:41 +00:00
|
|
|
else:
|
|
|
|
# Empty element.
|
|
|
|
output.append(" />")
|
|
|
|
if xml.tail:
|
|
|
|
# If there is additional text after the element.
|
2012-07-24 10:25:55 +00:00
|
|
|
output.append(escape(xml.tail, use_cdata))
|
2013-01-24 10:43:46 +00:00
|
|
|
for ns in new_namespaces:
|
|
|
|
# Remove namespaces introduced in this context. This is necessary
|
|
|
|
# because the namespaces object continues to be shared with other
|
|
|
|
# contexts.
|
|
|
|
namespaces.remove(ns)
|
2010-08-06 00:26:41 +00:00
|
|
|
return ''.join(output)
|
|
|
|
|
|
|
|
|
2012-07-24 10:25:55 +00:00
|
|
|
def escape(text, use_cdata=False):
|
2013-07-30 16:51:23 +00:00
|
|
|
encoding = 'utf-8'
|
|
|
|
from xml.etree.ElementTree import _escape_cdata, _raise_serialization_error
|
|
|
|
|
|
|
|
if use_cdata:
|
|
|
|
return _escape_cdata(text, encoding)
|
|
|
|
|
|
|
|
# copied from xml.etree.ElementTree._escape_attrib with "'" case
|
|
|
|
try:
|
|
|
|
if "&" in text:
|
|
|
|
text = text.replace("&", "&")
|
|
|
|
if "<" in text:
|
|
|
|
text = text.replace("<", "<")
|
|
|
|
if ">" in text:
|
|
|
|
text = text.replace(">", ">")
|
|
|
|
if "\"" in text:
|
|
|
|
text = text.replace("\"", """)
|
|
|
|
if "'" in text:
|
|
|
|
text = text.replace("'", "'")
|
|
|
|
if "\n" in text:
|
|
|
|
text = text.replace("\n", " ")
|
|
|
|
return text.encode(encoding, "xmlcharrefreplace")
|
|
|
|
except (TypeError, AttributeError):
|
|
|
|
_raise_serialization_error(text)
|