The following patch adds a safe_string module and SafeStr class that should (and from the test seems to) resolve problems with unicode strings in all forms, binary data and whatever can come up to python-meh's processing.
I know it is a hack, but I'm really tired from adding 'if type(obj) == types.UnicodeType:" and adding a check for binary data to all these places would drive me crazy.
Vratislav Podzimek (1): Add safe_string module and tests
meh/dump.py | 13 ++++------ meh/safe_string.py | 60 +++++++++++++++++++++++++++++++++++++++++++++++ tests/handle_binary.py | 24 +++++++++++++++++++ tests/safe_string_test.py | 38 ++++++++++++++++++++++++++++++ 4 files changed, 126 insertions(+), 9 deletions(-) create mode 100644 meh/safe_string.py create mode 100644 tests/handle_binary.py create mode 100755 tests/safe_string_test.py
safe_string module provides 'SafeStr' class which is a class inheriting from Python's 'str' class and overriding the __add__ method to allow safe appending of ascii strings, utf-8 encoded unicode strings, unicode strings, binary data represented by a string and objects with or without __str__ method.
With this class we no longer have to check values for being of type UnicodeType, and we can handle binary data correctly.
Related: rhbz#886983
Signed-off-by: Vratislav Podzimek vpodzime@redhat.com --- meh/dump.py | 13 ++++------ meh/safe_string.py | 60 +++++++++++++++++++++++++++++++++++++++++++++++ tests/handle_binary.py | 24 +++++++++++++++++++ tests/safe_string_test.py | 38 ++++++++++++++++++++++++++++++ 4 files changed, 126 insertions(+), 9 deletions(-) create mode 100644 meh/safe_string.py create mode 100644 tests/handle_binary.py create mode 100755 tests/safe_string_test.py
diff --git a/meh/dump.py b/meh/dump.py index 81751d2..3e3a2a0 100644 --- a/meh/dump.py +++ b/meh/dump.py @@ -29,6 +29,7 @@ import traceback import types import sys import codecs +from meh.safe_string import SafeStr
class ExceptionDump(object): """This class represents a traceback and contains several useful methods @@ -284,7 +285,7 @@ class ExceptionDump(object): not hasattr(instance, "__class__") or \ not hasattr(instance, "__dict__")
- ret = "" + ret = SafeStr()
# protect from loops try: @@ -352,21 +353,15 @@ class ExceptionDump(object): first = 0 if type(k) == types.StringType: ret += "'%s': " % (k,) - elif type(k) == types.UnicodeType: - ret += "'%s': " % k.encode("utf-8") else: ret += "%s: " % (k,)
if __isSimpleType(v): - if type(v) == types.UnicodeType: - v = v.encode("utf-8") ret += "%s" % (v,) else: ret += self._dumpClass(v, level + 1, parentkey = curkey, skipList=skipList) ret += "}\n" elif __isSimpleType(value): - if type(value) == types.UnicodeType: - value = value.encode("utf-8") ret += "%s%s: %s\n" % (pad, curkey, value) else: ret += "%s%s: " % (pad, curkey) @@ -384,7 +379,7 @@ class ExceptionDump(object): written out, except for those mentioned in the attrSkipList. """ idSkipList = [] - ret = "" + ret = SafeStr()
# We need to augment the environment eval() will run in with the # bindings that were local when the traceback happened so that the @@ -471,7 +466,7 @@ class ExceptionDump(object):
"""
- ret = str(self) + ret = SafeStr(str(self)) ret += self.dump(obj)
return ret diff --git a/meh/safe_string.py b/meh/safe_string.py new file mode 100644 index 0000000..e0e7749 --- /dev/null +++ b/meh/safe_string.py @@ -0,0 +1,60 @@ +# +# Copyright (C) 2012 Red Hat, Inc. +# All rights reserved. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see http://www.gnu.org/licenses/. +# +# Author: Vratislav Podzimek vpodzime@redhat.com +# +# XXX: should be in a separate module??? +# + +""" +This module provides a SafeStr class. + +@see: SafeStr + +""" + +class SafeStr(str): + """ + String class that has a modified __add__ method so that ascii strings, + binary data represented as a byte string and unicode objects can be + safely appended to it (not causing traceback). BINARY DATA IS OMITTED. + + """ + + def __add__(self, other): + old_self = self + + if not (isinstance(other, str) or isinstance(other, unicode)): + if hasattr(other, "__str__"): + other = other.__str__() + else: + other = "OMITTED OBJECT WITHOUT __str__ METHOD" + + if isinstance(other, unicode): + self = SafeStr(str.__add__(self, other.encode("utf-8"))) + else: + try: + # doesn't cause traceback for utf-8 encoded non-ascii string + # and ascii string + other.decode("utf-8") + self = SafeStr(str.__add__(self, other)) + except UnicodeDecodeError: + # binary data + self = SafeStr(str.__add__(self, "OMITTED BINARY DATA")) + + del(old_self) + return self diff --git a/tests/handle_binary.py b/tests/handle_binary.py new file mode 100644 index 0000000..62d973c --- /dev/null +++ b/tests/handle_binary.py @@ -0,0 +1,24 @@ +# -*- coding: utf-8 -*- +import tempfile + +from tests.baseclass import BaseTestCase +from meh import Config + +BINARY_DATA = "\xff\xfe\xdd" + +class BinaryExample(object): + def __init__(self): + self.bin_data = BINARY_DATA + +class HandleUnicode_TestCase(BaseTestCase): + def runTest(self): + binary_example = BinaryExample() + + conf = Config(programName="UnicodeTest", + programVersion="1.0") + + # should not raise exception + dump = self.dump(conf, binary_example) + + self.assertIn("OMITTED BINARY DATA", dump) + diff --git a/tests/safe_string_test.py b/tests/safe_string_test.py new file mode 100755 index 0000000..a41e289 --- /dev/null +++ b/tests/safe_string_test.py @@ -0,0 +1,38 @@ +# -*- coding: utf-8 -*- + +from tests.baseclass import BaseTestCase +from meh.safe_string import SafeStr + +class TestClass(object): + def __str__(self): + return "string representation of TestClass instance" + +class TestClass2(): + pass + +class SafeStr_TestCase(BaseTestCase): + def setUp(self): + self.safestr = SafeStr() + self.unistr = u"ááááá" + self.enc_unistr = self.unistr.encode("utf-8") + self.asciistr = "aaaa" + self.bindata = '\xff\xff\xfe' + self.test_object = TestClass() + self.test_object2 = TestClass2() + + def runTest(self): + self.safestr += self.asciistr + self.safestr += self.enc_unistr + self.safestr += self.unistr + self.safestr += self.bindata + self.safestr += self.test_object + self.safestr += self.test_object2 + + self.assertIn(self.asciistr, self.safestr) + + # should be included twice -- appended enc_unistr and unistr + self.assertIn(2*self.enc_unistr, self.safestr) + self.assertIn("OMITTED BINARY DATA", self.safestr) + self.assertIn(str(self.test_object), self.safestr) + self.assertIn("OMITTED OBJECT WITHOUT __str__ METHOD", self.safestr) +
On Fri, 2012-12-14 at 12:09 +0100, Vratislav Podzimek wrote:
diff --git a/tests/handle_binary.py b/tests/handle_binary.py new file mode 100644 index 0000000..62d973c --- /dev/null +++ b/tests/handle_binary.py @@ -0,0 +1,24 @@ +# -*- coding: utf-8 -*- +import tempfile
tempfile don't needed here. Fixed locally.
anaconda-patches@lists.fedorahosted.org