From db0ab9a0b377d33d2975b45f2368ea0197b4ed1c Mon Sep 17 00:00:00 2001 From: Jean-Philippe Caruana Date: Mon, 29 Jul 2013 12:22:10 +0200 Subject: [PATCH 1/4] .gitignore: idea --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 602416e8..7c2b5bce 100644 --- a/.gitignore +++ b/.gitignore @@ -12,3 +12,4 @@ sleekxmpp.egg-info/ *~ .baboon/ .DS_STORE +*.iml From c02adbb8e1a272196f891e3487e5ce079868cb0d Mon Sep 17 00:00:00 2001 From: Jean-Philippe Caruana Date: Tue, 30 Jul 2013 18:51:23 +0200 Subject: [PATCH 2/4] tostring.escape : optimization use of xml.etree.ElementTree._escape_attrib and xml.etree.ElementTree._escape_cdata --- sleekxmpp/xmlstream/tostring.py | 49 ++++++++++++++------------------- tests/test_tostring.py | 3 +- 2 files changed, 22 insertions(+), 30 deletions(-) diff --git a/sleekxmpp/xmlstream/tostring.py b/sleekxmpp/xmlstream/tostring.py index c49abd3e..771f8dab 100644 --- a/sleekxmpp/xmlstream/tostring.py +++ b/sleekxmpp/xmlstream/tostring.py @@ -140,33 +140,26 @@ def tostring(xml=None, xmlns='', stream=None, outbuffer='', def escape(text, use_cdata=False): - """Convert special characters in XML to escape sequences. + encoding = 'utf-8' + from xml.etree.ElementTree import _escape_cdata, _raise_serialization_error - :param string text: The XML text to convert. - :rtype: Unicode string - """ - if sys.version_info < (3, 0): - if type(text) != types.UnicodeType: - text = unicode(text, 'utf-8', 'ignore') + if use_cdata: + return _escape_cdata(text, encoding) - escapes = {'&': '&', - '<': '<', - '>': '>', - "'": ''', - '"': '"'} - - if not use_cdata: - text = list(text) - for i, c in enumerate(text): - text[i] = escapes.get(c, c) - return ''.join(text) - else: - escape_needed = False - for c in text: - if c in escapes: - escape_needed = True - break - if escape_needed: - escaped = map(lambda x : "" % x, text.split("]]>")) - return "]]>".join(escaped) - return text + # copied from xml.etree.ElementTree._escape_attrib with "'" case + try: + if "&" in text: + text = text.replace("&", "&") + if "<" in text: + text = text.replace("<", "<") + if ">" in text: + text = text.replace(">", ">") + if "\"" in text: + text = text.replace("\"", """) + if "'" in text: + text = text.replace("'", "'") + if "\n" in text: + text = text.replace("\n", " ") + return text.encode(encoding, "xmlcharrefreplace") + except (TypeError, AttributeError): + _raise_serialization_error(text) diff --git a/tests/test_tostring.py b/tests/test_tostring.py index e6148533..be11ab03 100644 --- a/tests/test_tostring.py +++ b/tests/test_tostring.py @@ -34,8 +34,7 @@ class TestToString(SleekTest): desired = """<foo bar="baz">'Hi""" desired += """ & welcome!'</foo>""" - self.failUnless(escaped == desired, - "XML escaping did not work: %s." % escaped) + self.assertEqual(escaped, desired) def testEmptyElement(self): """Test converting an empty element to a string.""" From 1c3bfd949bc7903ee945694a8c807250d0e893ab Mon Sep 17 00:00:00 2001 From: Jean-Philippe Caruana Date: Wed, 31 Jul 2013 10:50:39 +0200 Subject: [PATCH 3/4] escape: imports at the top --- sleekxmpp/xmlstream/tostring.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sleekxmpp/xmlstream/tostring.py b/sleekxmpp/xmlstream/tostring.py index 771f8dab..addd6989 100644 --- a/sleekxmpp/xmlstream/tostring.py +++ b/sleekxmpp/xmlstream/tostring.py @@ -16,6 +16,7 @@ from __future__ import unicode_literals import sys +from xml.etree.ElementTree import _escape_cdata, _raise_serialization_error if sys.version_info < (3, 0): import types @@ -141,7 +142,6 @@ def tostring(xml=None, xmlns='', stream=None, outbuffer='', def escape(text, use_cdata=False): encoding = 'utf-8' - from xml.etree.ElementTree import _escape_cdata, _raise_serialization_error if use_cdata: return _escape_cdata(text, encoding) From b6e53c7b1be3691514cbf25e3de09d6742a4a587 Mon Sep 17 00:00:00 2001 From: Jean-Philippe Caruana Date: Wed, 31 Jul 2013 11:01:34 +0200 Subject: [PATCH 4/4] escape: use xml.etree.ElementTree._escape_attrib to avoid duplication --- sleekxmpp/xmlstream/tostring.py | 23 +++++------------------ 1 file changed, 5 insertions(+), 18 deletions(-) diff --git a/sleekxmpp/xmlstream/tostring.py b/sleekxmpp/xmlstream/tostring.py index addd6989..4d7976b1 100644 --- a/sleekxmpp/xmlstream/tostring.py +++ b/sleekxmpp/xmlstream/tostring.py @@ -16,7 +16,7 @@ from __future__ import unicode_literals import sys -from xml.etree.ElementTree import _escape_cdata, _raise_serialization_error +from xml.etree.ElementTree import _escape_cdata, _escape_attrib if sys.version_info < (3, 0): import types @@ -146,20 +146,7 @@ def escape(text, use_cdata=False): if use_cdata: return _escape_cdata(text, encoding) - # copied from xml.etree.ElementTree._escape_attrib with "'" case - try: - if "&" in text: - text = text.replace("&", "&") - if "<" in text: - text = text.replace("<", "<") - if ">" in text: - text = text.replace(">", ">") - if "\"" in text: - text = text.replace("\"", """) - if "'" in text: - text = text.replace("'", "'") - if "\n" in text: - text = text.replace("\n", " ") - return text.encode(encoding, "xmlcharrefreplace") - except (TypeError, AttributeError): - _raise_serialization_error(text) + text = _escape_attrib(text, encoding) + if "'" in text: + text = text.replace("'", "'") + return text