Some more DREI stuff

This commit is contained in:
Miroslav Stampar 2019-04-19 11:24:34 +02:00
parent da15701a55
commit bb7bd51d94
15 changed files with 94 additions and 71 deletions

View File

@ -20,6 +20,9 @@ from optparse import OptionParser
if sys.version_info >= (3, 0):
xrange = range
text_type = str
else:
text_type = unicode
# Regex used for recognition of hex encoded characters
HEX_ENCODED_CHAR_REGEX = r"(?P<result>\\x[0-9A-Fa-f]{2})"
@ -52,14 +55,14 @@ def safecharencode(value):
retVal = value
if isinstance(value, basestring):
if any([_ not in SAFE_CHARS for _ in value]):
if any(_ not in SAFE_CHARS for _ in value):
retVal = retVal.replace(HEX_ENCODED_PREFIX, HEX_ENCODED_PREFIX_MARKER)
retVal = retVal.replace('\\', SLASH_MARKER)
for char in SAFE_ENCODE_SLASH_REPLACEMENTS:
retVal = retVal.replace(char, repr(char).strip('\''))
retVal = reduce(lambda x, y: x + (y if (y in string.printable or isinstance(value, unicode) and ord(y) >= 160) else '\\x%02x' % ord(y)), retVal, (unicode if isinstance(value, unicode) else str)())
retVal = reduce(lambda x, y: x + (y if (y in string.printable or isinstance(value, text_type) and ord(y) >= 160) else '\\x%02x' % ord(y)), retVal, type(value)())
retVal = retVal.replace(SLASH_MARKER, "\\\\")
retVal = retVal.replace(HEX_ENCODED_PREFIX_MARKER, HEX_ENCODED_PREFIX)
@ -81,7 +84,7 @@ def safechardecode(value, binary=False):
while True:
match = re.search(HEX_ENCODED_CHAR_REGEX, retVal)
if match:
retVal = retVal.replace(match.group("result"), (unichr if isinstance(value, unicode) else chr)(ord(binascii.unhexlify(match.group("result").lstrip("\\x")))))
retVal = retVal.replace(match.group("result"), (unichr if isinstance(value, text_type) else chr)(ord(binascii.unhexlify(match.group("result").lstrip("\\x")))))
else:
break
@ -91,7 +94,7 @@ def safechardecode(value, binary=False):
retVal = retVal.replace(SLASH_MARKER, '\\')
if binary:
if isinstance(retVal, unicode):
if isinstance(retVal, text_type):
retVal = retVal.encode("utf8")
elif isinstance(value, (list, tuple)):

View File

@ -4,4 +4,4 @@
# See the file 'LICENSE' for copying permission
# Runs pyflakes on all python files (prerequisite: apt-get install pyflakes)
find . -wholename "./thirdparty" -prune -o -type f -iname "*.py" -exec pyflakes '{}' \;
find . -wholename "./thirdparty" -prune -o -type f -iname "*.py" -exec pyflakes '{}' \; | grep -v "redefines '_'"

View File

@ -333,7 +333,7 @@ def start():
testSqlInj = False
if PLACE.GET in conf.parameters and not any([conf.data, conf.testParameter]):
if PLACE.GET in conf.parameters and not any((conf.data, conf.testParameter)):
for parameter in re.findall(r"([^=]+)=([^%s]+%s?|\Z)" % (re.escape(conf.paramDel or "") or DEFAULT_GET_POST_DELIMITER, re.escape(conf.paramDel or "") or DEFAULT_GET_POST_DELIMITER), conf.parameters[PLACE.GET]):
paramKey = (conf.hostname, conf.path, PLACE.GET, parameter[0])

View File

@ -882,6 +882,16 @@ def singleTimeLogMessage(message, level=logging.INFO, flag=None):
logger.log(level, message)
def boldifyMessage(message):
"""
Sets ANSI bold marking on entire message if parts found in predefined BOLD_PATTERNS
>>> boldifyMessage("Hello World")
'Hello World'
>>> boldifyMessage("GET parameter id is not injectable")
'\\x1b[1mGET parameter id is not injectable\\x1b[0m'
"""
retVal = message
if any(_ in message for _ in BOLD_PATTERNS):
@ -890,6 +900,13 @@ def boldifyMessage(message):
return retVal
def setColor(message, color=None, bold=False, level=None):
"""
Sets ANSI color codes
>>> setColor("Hello World", "red")
'\\x1b[31mHello World\\x1b[0m'
"""
retVal = message
level = level or extractRegexResult(r"\[(?P<result>%s)\]" % '|'.join(_[0] for _ in getPublicTypeMembers(LOGGING_LEVELS)), message)
@ -933,7 +950,7 @@ def dataToStdout(data, forceOutput=False, bold=False, content_type=None, status=
if multiThreadMode:
logging._acquireLock()
if isinstance(data, unicode):
if isinstance(data, six.text_type):
message = stdoutencode(data)
else:
message = data
@ -1840,7 +1857,7 @@ def safeFilepathEncode(filepath):
retVal = filepath
if filepath and isinstance(filepath, unicode):
if filepath and isinstance(filepath, six.text_type):
retVal = filepath.encode(sys.getfilesystemencoding() or UNICODE_ENCODING)
return retVal
@ -1927,7 +1944,7 @@ def getFilteredPageContent(page, onlyText=True, split=" "):
retVal = page
# only if the page's charset has been successfully identified
if isinstance(page, unicode):
if isinstance(page, six.text_type):
retVal = re.sub(r"(?si)<script.+?</script>|<!--.+?-->|<style.+?</style>%s" % (r"|<[^>]+>|\t|\n|\r" if onlyText else ""), split, page)
retVal = re.sub(r"%s{2,}" % split, split, retVal)
retVal = htmlunescape(retVal.strip().strip(split))
@ -1945,7 +1962,7 @@ def getPageWordSet(page):
retVal = set()
# only if the page's charset has been successfully identified
if isinstance(page, unicode):
if isinstance(page, six.text_type):
retVal = set(_.group(0) for _ in re.finditer(r"\w+", getFilteredPageContent(page)))
return retVal
@ -2430,7 +2447,7 @@ def getUnicode(value, encoding=None, noneToNull=False):
except UnicodeDecodeError:
return six.text_type(str(value), errors="ignore") # encoding ignored for non-basestring instances
def getBytes(value, encoding=UNICODE_ENCODING):
def getBytes(value, encoding=UNICODE_ENCODING, errors="strict"):
"""
Returns byte representation of provided Unicode value
@ -2445,11 +2462,11 @@ def getBytes(value, encoding=UNICODE_ENCODING):
for char in xrange(0xF0000, 0xF00FF + 1):
value = value.replace(unichr(char), "%s%02x" % (SAFE_HEX_MARKER, char - 0xF0000))
retVal = value.encode(encoding)
retVal = value.encode(encoding, errors)
retVal = re.sub(r"%s([0-9a-f]{2})" % SAFE_HEX_MARKER, lambda _: _.group(1).decode("hex"), retVal)
else:
retVal = value.encode(encoding)
retVal = value.encode(encoding, errors)
retVal = re.sub(r"\\x([0-9a-f]{2})", lambda _: _.group(1).decode("hex"), retVal)
return retVal
@ -3694,7 +3711,7 @@ def removeReflectiveValues(content, payload, suppressWarning=False):
retVal = content
try:
if all((content, payload)) and isinstance(content, unicode) and kb.reflectiveMechanism and not kb.heuristicMode:
if all((content, payload)) and isinstance(content, six.text_type) and kb.reflectiveMechanism and not kb.heuristicMode:
def _(value):
while 2 * REFLECTED_REPLACEMENT_REGEX in value:
value = value.replace(2 * REFLECTED_REPLACEMENT_REGEX, REFLECTED_REPLACEMENT_REGEX)
@ -3786,7 +3803,7 @@ def normalizeUnicode(value):
'sucuraj'
"""
return unicodedata.normalize("NFKD", value).encode("ascii", "ignore") if isinstance(value, unicode) else value
return unicodedata.normalize("NFKD", value).encode("ascii", "ignore") if isinstance(value, six.text_type) else value
def safeSQLIdentificatorNaming(name, isTable=False):
"""
@ -4105,7 +4122,7 @@ def asciifyUrl(url, forceQuote=False):
# _urllib.parse.quote(s.replace('%', '')) != s.replace('%', '')
# which would trigger on all %-characters, e.g. "&".
if getUnicode(s).encode("ascii", "replace") != s or forceQuote:
return _urllib.parse.quote(s.encode(UNICODE_ENCODING) if isinstance(s, unicode) else s, safe=safe)
return _urllib.parse.quote(s.encode(UNICODE_ENCODING) if isinstance(s, six.text_type) else s, safe=safe)
return s
username = quote(parts.username, '')
@ -4459,8 +4476,8 @@ def decodeHexValue(value, raw=False):
retVal = retVal.decode("utf-16-be")
except UnicodeDecodeError:
pass
if not isinstance(retVal, unicode):
retVal = getUnicode(retVal, conf.encoding or "utf8")
if not isinstance(retVal, six.text_type):
retVal = getUnicode(retVal, conf.encoding or UNICODE_ENCODING)
return retVal

View File

@ -242,7 +242,7 @@ class Dump(object):
if table and isListLike(table):
table = table[0]
maxlength = max(maxlength, len(unsafeSQLIdentificatorNaming(normalizeUnicode(table) or unicode(table))))
maxlength = max(maxlength, len(unsafeSQLIdentificatorNaming(normalizeUnicode(table) or getUnicode(table))))
lines = "-" * (int(maxlength) + 2)
@ -263,7 +263,7 @@ class Dump(object):
table = table[0]
table = unsafeSQLIdentificatorNaming(table)
blank = " " * (maxlength - len(normalizeUnicode(table) or unicode(table)))
blank = " " * (maxlength - len(normalizeUnicode(table) or getUnicode(table)))
self._write("| %s%s |" % (table, blank))
self._write("+%s+\n" % lines)
@ -358,7 +358,7 @@ class Dump(object):
for ctables in dbTables.values():
for tables in ctables.values():
for table in tables:
maxlength1 = max(maxlength1, len(normalizeUnicode(table) or unicode(table)))
maxlength1 = max(maxlength1, len(normalizeUnicode(table) or getUnicode(table)))
for db, counts in dbTables.items():
self._write("Database: %s" % unsafeSQLIdentificatorNaming(db) if db else "Current database")
@ -384,7 +384,7 @@ class Dump(object):
tables.sort(key=lambda _: _.lower() if hasattr(_, "lower") else _)
for table in tables:
blank1 = " " * (maxlength1 - len(normalizeUnicode(table) or unicode(table)))
blank1 = " " * (maxlength1 - len(normalizeUnicode(table) or getUnicode(table)))
blank2 = " " * (maxlength2 - len(str(count)))
self._write("| %s%s | %d%s |" % (table, blank1, count, blank2))

View File

@ -1716,7 +1716,7 @@ def _cleanupOptions():
except re.error:
conf.csrfToken = re.escape(conf.csrfToken)
finally:
class _(unicode):
class _(six.text_type):
pass
conf.csrfToken = _(conf.csrfToken)
conf.csrfToken._original = original

View File

@ -17,7 +17,7 @@ from lib.core.enums import DBMS_DIRECTORY_NAME
from lib.core.enums import OS
# sqlmap version (<major>.<minor>.<month>.<monthly commit>)
VERSION = "1.3.4.28"
VERSION = "1.3.4.29"
TYPE = "dev" if VERSION.count('.') > 2 and VERSION.split('.')[-1] != '0' else "stable"
TYPE_COLORS = {"dev": 33, "stable": 90, "pip": 34}
VERSION_STRING = "sqlmap/%s#%s" % ('.'.join(VERSION.split('.')[:-1]) if VERSION.count('.') > 2 and VERSION.split('.')[-1] == '0' else VERSION, TYPE)

View File

@ -73,6 +73,7 @@ from lib.core.settings import URI_INJECTABLE_REGEX
from lib.core.settings import USER_AGENT_ALIASES
from lib.core.settings import XML_RECOGNITION_REGEX
from lib.utils.hashdb import HashDB
from thirdparty import six
from thirdparty.odict import OrderedDict
from thirdparty.six.moves import urllib as _urllib
@ -409,7 +410,7 @@ def _setRequestParams():
message += "Do you want sqlmap to automatically update it in further requests? [y/N] "
if readInput(message, default='N', boolean=True):
class _(unicode):
class _(six.text_type):
pass
conf.csrfToken = _(re.escape(getUnicode(parameter)))
conf.csrfToken._original = getUnicode(parameter)
@ -712,7 +713,7 @@ def initTargetEnv():
_setDBMS()
if conf.data:
class _(unicode):
class _(six.text_type):
pass
kb.postUrlEncode = True

View File

@ -17,6 +17,7 @@ from lib.core.common import Backend
from lib.core.common import extractErrorMessage
from lib.core.common import extractRegexResult
from lib.core.common import filterNone
from lib.core.common import getBytes
from lib.core.common import getPublicTypeMembers
from lib.core.common import getSafeExString
from lib.core.common import getUnicode
@ -42,11 +43,11 @@ from lib.core.settings import MAX_CONNECTION_TOTAL_SIZE
from lib.core.settings import META_CHARSET_REGEX
from lib.core.settings import PARSE_HEADERS_LIMIT
from lib.core.settings import SELECT_FROM_TABLE_REGEX
from lib.core.settings import UNICODE_ENCODING
from lib.core.settings import VIEWSTATE_REGEX
from lib.parse.headers import headersParser
from lib.parse.html import htmlParser
from lib.utils.htmlentities import htmlEntities
from thirdparty import six
from thirdparty.chardet import detect
from thirdparty.odict import OrderedDict
@ -219,13 +220,13 @@ def checkCharEncoding(encoding, warn=True):
# Reference: http://www.iana.org/assignments/character-sets
# Reference: http://docs.python.org/library/codecs.html
try:
codecs.lookup(encoding.encode(UNICODE_ENCODING) if isinstance(encoding, unicode) else encoding)
except (LookupError, ValueError):
codecs.lookup(encoding)
except:
encoding = None
if encoding:
try:
unicode(randomStr(), encoding)
six.text_type(getBytes(randomStr()), encoding)
except:
if warn:
warnMsg = "invalid web page charset '%s'" % encoding
@ -313,7 +314,7 @@ def decodePage(page, contentEncoding, contentType):
kb.pageEncoding = conf.encoding
# can't do for all responses because we need to support binary files too
if not isinstance(page, unicode) and "text/" in contentType:
if isinstance(page, six.binary_type) and "text/" in contentType:
# e.g. &#x9;&#195;&#235;&#224;&#226;&#224;
if "&#" in page:
page = re.sub(r"&#x([0-9a-f]{1,2});", lambda _: (_.group(1) if len(_.group(1)) == 2 else "0%s" % _.group(1)).decode("hex"), page)

View File

@ -8,6 +8,7 @@ See the file 'LICENSE' for copying permission
import re
from lib.core.common import extractRegexResult
from lib.core.common import getBytes
from lib.core.common import getFilteredPageContent
from lib.core.common import listToStrValue
from lib.core.common import removeDynamicContent
@ -28,6 +29,7 @@ from lib.core.settings import LOWER_RATIO_BOUND
from lib.core.settings import UPPER_RATIO_BOUND
from lib.core.settings import URI_HTTP_HEADER
from lib.core.threads import getCurrentThreadData
from thirdparty import six
def comparison(page, headers, code=None, getRatioValue=False, pageLength=None):
_ = _adjust(_comparison(page, headers, code, getRatioValue, pageLength), getRatioValue)
@ -105,10 +107,10 @@ def _comparison(page, headers, code, getRatioValue, pageLength):
else:
# Preventing "Unicode equal comparison failed to convert both arguments to Unicode"
# (e.g. if one page is PDF and the other is HTML)
if isinstance(seqMatcher.a, str) and isinstance(page, unicode):
page = page.encode(kb.pageEncoding or DEFAULT_PAGE_ENCODING, "ignore")
elif isinstance(seqMatcher.a, unicode) and isinstance(page, str):
seqMatcher.a = seqMatcher.a.encode(kb.pageEncoding or DEFAULT_PAGE_ENCODING, "ignore")
if isinstance(seqMatcher.a, six.binary_type) and isinstance(page, six.text_type):
page = getBytes(page, kb.pageEncoding or DEFAULT_PAGE_ENCODING, "ignore")
elif isinstance(seqMatcher.a, six.text_type) and isinstance(page, six.binary_type):
seqMatcher.a = getBytes(seqMatcher.a, kb.pageEncoding or DEFAULT_PAGE_ENCODING, "ignore")
if any(_ is None for _ in (page, seqMatcher.a)):
return None

View File

@ -486,7 +486,7 @@ def getValue(expression, blind=True, union=True, error=True, time=True, fromUser
singleTimeWarnMessage(warnMsg)
# Dirty patch (safe-encoded unicode characters)
if isinstance(value, unicode) and "\\x" in value:
if isinstance(value, six.text_type) and "\\x" in value:
try:
candidate = eval(repr(value).replace("\\\\x", "\\x").replace("u'", "'", 1)).decode(conf.encoding or UNICODE_ENCODING)
if "\\x" not in candidate:

View File

@ -32,6 +32,7 @@ from lib.core.threads import getCurrentThreadData
from lib.core.threads import runThreads
from lib.parse.sitemap import parseSitemap
from lib.request.connect import Connect as Request
from thirdparty import six
from thirdparty.beautifulsoup.beautifulsoup import BeautifulSoup
from thirdparty.six.moves import http_client as _http_client
from thirdparty.six.moves import urllib as _urllib
@ -79,7 +80,7 @@ def crawl(target):
if not kb.threadContinue:
break
if isinstance(content, unicode):
if isinstance(content, six.text_type):
try:
match = re.search(r"(?si)<html[^>]*>(.+)</html>", content)
if match:

View File

@ -7,8 +7,8 @@ See the file 'LICENSE' for copying permission
import binascii
from lib.core.common import getBytes
from lib.core.common import isDBMSVersionAtLeast
from lib.core.settings import UNICODE_ENCODING
from plugins.generic.syntax import Syntax as GenericSyntax
class Syntax(GenericSyntax):
@ -28,7 +28,7 @@ class Syntax(GenericSyntax):
def escaper(value):
# Reference: http://stackoverflow.com/questions/3444335/how-do-i-quote-a-utf-8-string-literal-in-sqlite3
return "CAST(X'%s' AS TEXT)" % binascii.hexlify(value.encode(UNICODE_ENCODING) if isinstance(value, unicode) else value)
return "CAST(X'%s' AS TEXT)" % binascii.hexlify(getBytes(value))
retVal = expression

View File

@ -91,6 +91,11 @@ import sys
if sys.version_info >= (3, 0):
xrange = range
text_type = str
binary_type = bytes
else:
text_type = unicode
binary_type = str
try:
from htmlentitydefs import name2codepoint
@ -434,19 +439,13 @@ class PageElement(object):
def toEncoding(self, s, encoding=None):
"""Encodes an object to a string in some encoding, or to Unicode.
."""
if isinstance(s, unicode):
if isinstance(s, text_type):
if encoding:
s = s.encode(encoding)
elif isinstance(s, str):
if encoding:
s = s.encode(encoding)
else:
s = unicode(s)
elif isinstance(s, binary_type):
s = s.encode(encoding or "utf8")
else:
if encoding:
s = self.toEncoding(str(s), encoding)
else:
s = unicode(s)
s = self.toEncoding(str(s), encoding or "utf8")
return s
BARE_AMPERSAND_OR_BRACKET = re.compile("([<>]|"
@ -459,7 +458,7 @@ class PageElement(object):
return "&" + self.XML_SPECIAL_CHARS_TO_ENTITIES[x.group(0)[0]] + ";"
class NavigableString(unicode, PageElement):
class NavigableString(text_type, PageElement):
def __new__(cls, value):
"""Create a new NavigableString.
@ -469,9 +468,9 @@ class NavigableString(unicode, PageElement):
passed in to the superclass's __new__ or the superclass won't know
how to handle non-ASCII characters.
"""
if isinstance(value, unicode):
return unicode.__new__(cls, value)
return unicode.__new__(cls, value, DEFAULT_OUTPUT_ENCODING)
if isinstance(value, text_type):
return text_type.__new__(cls, value)
return text_type.__new__(cls, value, DEFAULT_OUTPUT_ENCODING)
def __getnewargs__(self):
return (NavigableString.__str__(self),)
@ -1006,7 +1005,7 @@ class SoupStrainer:
if isinstance(markup, Tag):
markup = markup.name
if markup and not isinstance(markup, basestring):
markup = unicode(markup)
markup = text_type(markup)
#Now we know that chunk is either a string, or None.
if hasattr(matchAgainst, 'match'):
# It's a regexp object.
@ -1016,8 +1015,8 @@ class SoupStrainer:
elif hasattr(matchAgainst, 'items'):
result = markup.has_key(matchAgainst)
elif matchAgainst and isinstance(markup, basestring):
if isinstance(markup, unicode):
matchAgainst = unicode(matchAgainst)
if isinstance(markup, text_type):
matchAgainst = text_type(matchAgainst)
else:
matchAgainst = str(matchAgainst)
@ -1181,7 +1180,7 @@ class BeautifulStoneSoup(Tag, sgmllib.SGMLParser):
def _feed(self, inDocumentEncoding=None, isHTML=False):
# Convert the document to Unicode.
markup = self.markup
if isinstance(markup, unicode):
if isinstance(markup, text_type):
if not hasattr(self, 'originalEncoding'):
self.originalEncoding = None
else:
@ -1792,9 +1791,9 @@ class UnicodeDammit:
self._detectEncoding(markup, isHTML)
self.smartQuotesTo = smartQuotesTo
self.triedEncodings = []
if markup == '' or isinstance(markup, unicode):
if markup == '' or isinstance(markup, text_type):
self.originalEncoding = None
self.unicode = unicode(markup)
self.unicode = text_type(markup)
return
u = None
@ -1807,7 +1806,7 @@ class UnicodeDammit:
if u: break
# If no luck and we have auto-detection library, try that:
if not u and chardet and not isinstance(self.markup, unicode):
if not u and chardet and not isinstance(self.markup, text_type):
u = self._convertFrom(chardet.detect(self.markup)['encoding'])
# As a last resort, try utf-8 and windows-1252:
@ -1880,7 +1879,7 @@ class UnicodeDammit:
elif data[:4] == '\xff\xfe\x00\x00':
encoding = 'utf-32le'
data = data[4:]
newdata = unicode(data, encoding)
newdata = text_type(data, encoding)
return newdata
def _detectEncoding(self, xml_data, isHTML=False):
@ -1893,41 +1892,41 @@ class UnicodeDammit:
elif xml_data[:4] == '\x00\x3c\x00\x3f':
# UTF-16BE
sniffed_xml_encoding = 'utf-16be'
xml_data = unicode(xml_data, 'utf-16be').encode('utf-8')
xml_data = text_type(xml_data, 'utf-16be').encode('utf-8')
elif (len(xml_data) >= 4) and (xml_data[:2] == '\xfe\xff') \
and (xml_data[2:4] != '\x00\x00'):
# UTF-16BE with BOM
sniffed_xml_encoding = 'utf-16be'
xml_data = unicode(xml_data[2:], 'utf-16be').encode('utf-8')
xml_data = text_type(xml_data[2:], 'utf-16be').encode('utf-8')
elif xml_data[:4] == '\x3c\x00\x3f\x00':
# UTF-16LE
sniffed_xml_encoding = 'utf-16le'
xml_data = unicode(xml_data, 'utf-16le').encode('utf-8')
xml_data = text_type(xml_data, 'utf-16le').encode('utf-8')
elif (len(xml_data) >= 4) and (xml_data[:2] == '\xff\xfe') and \
(xml_data[2:4] != '\x00\x00'):
# UTF-16LE with BOM
sniffed_xml_encoding = 'utf-16le'
xml_data = unicode(xml_data[2:], 'utf-16le').encode('utf-8')
xml_data = text_type(xml_data[2:], 'utf-16le').encode('utf-8')
elif xml_data[:4] == '\x00\x00\x00\x3c':
# UTF-32BE
sniffed_xml_encoding = 'utf-32be'
xml_data = unicode(xml_data, 'utf-32be').encode('utf-8')
xml_data = text_type(xml_data, 'utf-32be').encode('utf-8')
elif xml_data[:4] == '\x3c\x00\x00\x00':
# UTF-32LE
sniffed_xml_encoding = 'utf-32le'
xml_data = unicode(xml_data, 'utf-32le').encode('utf-8')
xml_data = text_type(xml_data, 'utf-32le').encode('utf-8')
elif xml_data[:4] == '\x00\x00\xfe\xff':
# UTF-32BE with BOM
sniffed_xml_encoding = 'utf-32be'
xml_data = unicode(xml_data[4:], 'utf-32be').encode('utf-8')
xml_data = text_type(xml_data[4:], 'utf-32be').encode('utf-8')
elif xml_data[:4] == '\xff\xfe\x00\x00':
# UTF-32LE with BOM
sniffed_xml_encoding = 'utf-32le'
xml_data = unicode(xml_data[4:], 'utf-32le').encode('utf-8')
xml_data = text_type(xml_data[4:], 'utf-32le').encode('utf-8')
elif xml_data[:3] == '\xef\xbb\xbf':
# UTF-8 with BOM
sniffed_xml_encoding = 'utf-8'
xml_data = unicode(xml_data[3:], 'utf-8').encode('utf-8')
xml_data = text_type(xml_data[3:], 'utf-8').encode('utf-8')
else:
sniffed_xml_encoding = 'ascii'
pass

View File

@ -21,7 +21,6 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
"""
import io
import mimetools
import mimetypes
import os
import stat