Improve handling invalid UTF-8 strings (#68)

This commit is contained in:
Gholamhossein Tavasoli 2017-05-03 15:32:44 +04:30 committed by Lonami
parent 3d66062eb4
commit 8ffec6b8be

View File

@ -89,19 +89,7 @@ class BinaryReader:
def tgread_string(self): def tgread_string(self):
"""Reads a Telegram-encoded string""" """Reads a Telegram-encoded string"""
return self._decode_string(self.tgread_bytes()) return str(self.tgread_bytes(), encoding='utf-8', errors='replace')
@staticmethod
def _decode_string(encoded):
# Workaround for issues #49 and #67
# Sometimes an invalid utf-8 string is received. We can just remove
# the offending parts and replace them with a different solution.
# There aren't many other options, besides crashing.
try:
return str(encoded, encoding='utf-8')
except UnicodeDecodeError as e:
fixed = encoded[:e.start] + encoded[e.end:]
return BinaryReader._decode_string(fixed)
def tgread_bool(self): def tgread_bool(self):
"""Reads a Telegram boolean value""" """Reads a Telegram boolean value"""