No character shall be left forgotten (no more ? in case that character was not properly being decoded by used charset)

This commit is contained in:
Miroslav Stampar 2012-12-20 12:23:37 +01:00
parent 728e061c53
commit 63d9b7a1f8

View File

@ -1815,7 +1815,11 @@ def getUnicode(value, encoding=None, system=False, noneToNull=False):
if isinstance(value, unicode): if isinstance(value, unicode):
return value return value
elif isinstance(value, basestring): elif isinstance(value, basestring):
return unicode(value, encoding or kb.pageEncoding or UNICODE_ENCODING, "replace") while True:
try:
return unicode(value, encoding or kb.pageEncoding or UNICODE_ENCODING)
except UnicodeDecodeError, ex:
value = value[:ex.start] + "".join("\\x%02x" % ord(_) for _ in value[ex.start:ex.end]) + value[ex.end:]
else: else:
return unicode(value) # encoding ignored for non-basestring instances return unicode(value) # encoding ignored for non-basestring instances
else: else: