mirror of
https://github.com/sqlmapproject/sqlmap.git
synced 2025-06-08 07:03:10 +03:00
Some more DREI stuff
This commit is contained in:
parent
da15701a55
commit
bb7bd51d94
|
@ -20,6 +20,9 @@ from optparse import OptionParser
|
||||||
|
|
||||||
if sys.version_info >= (3, 0):
|
if sys.version_info >= (3, 0):
|
||||||
xrange = range
|
xrange = range
|
||||||
|
text_type = str
|
||||||
|
else:
|
||||||
|
text_type = unicode
|
||||||
|
|
||||||
# Regex used for recognition of hex encoded characters
|
# Regex used for recognition of hex encoded characters
|
||||||
HEX_ENCODED_CHAR_REGEX = r"(?P<result>\\x[0-9A-Fa-f]{2})"
|
HEX_ENCODED_CHAR_REGEX = r"(?P<result>\\x[0-9A-Fa-f]{2})"
|
||||||
|
@ -52,14 +55,14 @@ def safecharencode(value):
|
||||||
retVal = value
|
retVal = value
|
||||||
|
|
||||||
if isinstance(value, basestring):
|
if isinstance(value, basestring):
|
||||||
if any([_ not in SAFE_CHARS for _ in value]):
|
if any(_ not in SAFE_CHARS for _ in value):
|
||||||
retVal = retVal.replace(HEX_ENCODED_PREFIX, HEX_ENCODED_PREFIX_MARKER)
|
retVal = retVal.replace(HEX_ENCODED_PREFIX, HEX_ENCODED_PREFIX_MARKER)
|
||||||
retVal = retVal.replace('\\', SLASH_MARKER)
|
retVal = retVal.replace('\\', SLASH_MARKER)
|
||||||
|
|
||||||
for char in SAFE_ENCODE_SLASH_REPLACEMENTS:
|
for char in SAFE_ENCODE_SLASH_REPLACEMENTS:
|
||||||
retVal = retVal.replace(char, repr(char).strip('\''))
|
retVal = retVal.replace(char, repr(char).strip('\''))
|
||||||
|
|
||||||
retVal = reduce(lambda x, y: x + (y if (y in string.printable or isinstance(value, unicode) and ord(y) >= 160) else '\\x%02x' % ord(y)), retVal, (unicode if isinstance(value, unicode) else str)())
|
retVal = reduce(lambda x, y: x + (y if (y in string.printable or isinstance(value, text_type) and ord(y) >= 160) else '\\x%02x' % ord(y)), retVal, type(value)())
|
||||||
|
|
||||||
retVal = retVal.replace(SLASH_MARKER, "\\\\")
|
retVal = retVal.replace(SLASH_MARKER, "\\\\")
|
||||||
retVal = retVal.replace(HEX_ENCODED_PREFIX_MARKER, HEX_ENCODED_PREFIX)
|
retVal = retVal.replace(HEX_ENCODED_PREFIX_MARKER, HEX_ENCODED_PREFIX)
|
||||||
|
@ -81,7 +84,7 @@ def safechardecode(value, binary=False):
|
||||||
while True:
|
while True:
|
||||||
match = re.search(HEX_ENCODED_CHAR_REGEX, retVal)
|
match = re.search(HEX_ENCODED_CHAR_REGEX, retVal)
|
||||||
if match:
|
if match:
|
||||||
retVal = retVal.replace(match.group("result"), (unichr if isinstance(value, unicode) else chr)(ord(binascii.unhexlify(match.group("result").lstrip("\\x")))))
|
retVal = retVal.replace(match.group("result"), (unichr if isinstance(value, text_type) else chr)(ord(binascii.unhexlify(match.group("result").lstrip("\\x")))))
|
||||||
else:
|
else:
|
||||||
break
|
break
|
||||||
|
|
||||||
|
@ -91,7 +94,7 @@ def safechardecode(value, binary=False):
|
||||||
retVal = retVal.replace(SLASH_MARKER, '\\')
|
retVal = retVal.replace(SLASH_MARKER, '\\')
|
||||||
|
|
||||||
if binary:
|
if binary:
|
||||||
if isinstance(retVal, unicode):
|
if isinstance(retVal, text_type):
|
||||||
retVal = retVal.encode("utf8")
|
retVal = retVal.encode("utf8")
|
||||||
|
|
||||||
elif isinstance(value, (list, tuple)):
|
elif isinstance(value, (list, tuple)):
|
||||||
|
|
|
@ -4,4 +4,4 @@
|
||||||
# See the file 'LICENSE' for copying permission
|
# See the file 'LICENSE' for copying permission
|
||||||
|
|
||||||
# Runs pyflakes on all python files (prerequisite: apt-get install pyflakes)
|
# Runs pyflakes on all python files (prerequisite: apt-get install pyflakes)
|
||||||
find . -wholename "./thirdparty" -prune -o -type f -iname "*.py" -exec pyflakes '{}' \;
|
find . -wholename "./thirdparty" -prune -o -type f -iname "*.py" -exec pyflakes '{}' \; | grep -v "redefines '_'"
|
||||||
|
|
|
@ -333,7 +333,7 @@ def start():
|
||||||
|
|
||||||
testSqlInj = False
|
testSqlInj = False
|
||||||
|
|
||||||
if PLACE.GET in conf.parameters and not any([conf.data, conf.testParameter]):
|
if PLACE.GET in conf.parameters and not any((conf.data, conf.testParameter)):
|
||||||
for parameter in re.findall(r"([^=]+)=([^%s]+%s?|\Z)" % (re.escape(conf.paramDel or "") or DEFAULT_GET_POST_DELIMITER, re.escape(conf.paramDel or "") or DEFAULT_GET_POST_DELIMITER), conf.parameters[PLACE.GET]):
|
for parameter in re.findall(r"([^=]+)=([^%s]+%s?|\Z)" % (re.escape(conf.paramDel or "") or DEFAULT_GET_POST_DELIMITER, re.escape(conf.paramDel or "") or DEFAULT_GET_POST_DELIMITER), conf.parameters[PLACE.GET]):
|
||||||
paramKey = (conf.hostname, conf.path, PLACE.GET, parameter[0])
|
paramKey = (conf.hostname, conf.path, PLACE.GET, parameter[0])
|
||||||
|
|
||||||
|
|
|
@ -882,6 +882,16 @@ def singleTimeLogMessage(message, level=logging.INFO, flag=None):
|
||||||
logger.log(level, message)
|
logger.log(level, message)
|
||||||
|
|
||||||
def boldifyMessage(message):
|
def boldifyMessage(message):
|
||||||
|
"""
|
||||||
|
Sets ANSI bold marking on entire message if parts found in predefined BOLD_PATTERNS
|
||||||
|
|
||||||
|
>>> boldifyMessage("Hello World")
|
||||||
|
'Hello World'
|
||||||
|
|
||||||
|
>>> boldifyMessage("GET parameter id is not injectable")
|
||||||
|
'\\x1b[1mGET parameter id is not injectable\\x1b[0m'
|
||||||
|
"""
|
||||||
|
|
||||||
retVal = message
|
retVal = message
|
||||||
|
|
||||||
if any(_ in message for _ in BOLD_PATTERNS):
|
if any(_ in message for _ in BOLD_PATTERNS):
|
||||||
|
@ -890,6 +900,13 @@ def boldifyMessage(message):
|
||||||
return retVal
|
return retVal
|
||||||
|
|
||||||
def setColor(message, color=None, bold=False, level=None):
|
def setColor(message, color=None, bold=False, level=None):
|
||||||
|
"""
|
||||||
|
Sets ANSI color codes
|
||||||
|
|
||||||
|
>>> setColor("Hello World", "red")
|
||||||
|
'\\x1b[31mHello World\\x1b[0m'
|
||||||
|
"""
|
||||||
|
|
||||||
retVal = message
|
retVal = message
|
||||||
level = level or extractRegexResult(r"\[(?P<result>%s)\]" % '|'.join(_[0] for _ in getPublicTypeMembers(LOGGING_LEVELS)), message)
|
level = level or extractRegexResult(r"\[(?P<result>%s)\]" % '|'.join(_[0] for _ in getPublicTypeMembers(LOGGING_LEVELS)), message)
|
||||||
|
|
||||||
|
@ -933,7 +950,7 @@ def dataToStdout(data, forceOutput=False, bold=False, content_type=None, status=
|
||||||
if multiThreadMode:
|
if multiThreadMode:
|
||||||
logging._acquireLock()
|
logging._acquireLock()
|
||||||
|
|
||||||
if isinstance(data, unicode):
|
if isinstance(data, six.text_type):
|
||||||
message = stdoutencode(data)
|
message = stdoutencode(data)
|
||||||
else:
|
else:
|
||||||
message = data
|
message = data
|
||||||
|
@ -1840,7 +1857,7 @@ def safeFilepathEncode(filepath):
|
||||||
|
|
||||||
retVal = filepath
|
retVal = filepath
|
||||||
|
|
||||||
if filepath and isinstance(filepath, unicode):
|
if filepath and isinstance(filepath, six.text_type):
|
||||||
retVal = filepath.encode(sys.getfilesystemencoding() or UNICODE_ENCODING)
|
retVal = filepath.encode(sys.getfilesystemencoding() or UNICODE_ENCODING)
|
||||||
|
|
||||||
return retVal
|
return retVal
|
||||||
|
@ -1927,7 +1944,7 @@ def getFilteredPageContent(page, onlyText=True, split=" "):
|
||||||
retVal = page
|
retVal = page
|
||||||
|
|
||||||
# only if the page's charset has been successfully identified
|
# only if the page's charset has been successfully identified
|
||||||
if isinstance(page, unicode):
|
if isinstance(page, six.text_type):
|
||||||
retVal = re.sub(r"(?si)<script.+?</script>|<!--.+?-->|<style.+?</style>%s" % (r"|<[^>]+>|\t|\n|\r" if onlyText else ""), split, page)
|
retVal = re.sub(r"(?si)<script.+?</script>|<!--.+?-->|<style.+?</style>%s" % (r"|<[^>]+>|\t|\n|\r" if onlyText else ""), split, page)
|
||||||
retVal = re.sub(r"%s{2,}" % split, split, retVal)
|
retVal = re.sub(r"%s{2,}" % split, split, retVal)
|
||||||
retVal = htmlunescape(retVal.strip().strip(split))
|
retVal = htmlunescape(retVal.strip().strip(split))
|
||||||
|
@ -1945,7 +1962,7 @@ def getPageWordSet(page):
|
||||||
retVal = set()
|
retVal = set()
|
||||||
|
|
||||||
# only if the page's charset has been successfully identified
|
# only if the page's charset has been successfully identified
|
||||||
if isinstance(page, unicode):
|
if isinstance(page, six.text_type):
|
||||||
retVal = set(_.group(0) for _ in re.finditer(r"\w+", getFilteredPageContent(page)))
|
retVal = set(_.group(0) for _ in re.finditer(r"\w+", getFilteredPageContent(page)))
|
||||||
|
|
||||||
return retVal
|
return retVal
|
||||||
|
@ -2430,7 +2447,7 @@ def getUnicode(value, encoding=None, noneToNull=False):
|
||||||
except UnicodeDecodeError:
|
except UnicodeDecodeError:
|
||||||
return six.text_type(str(value), errors="ignore") # encoding ignored for non-basestring instances
|
return six.text_type(str(value), errors="ignore") # encoding ignored for non-basestring instances
|
||||||
|
|
||||||
def getBytes(value, encoding=UNICODE_ENCODING):
|
def getBytes(value, encoding=UNICODE_ENCODING, errors="strict"):
|
||||||
"""
|
"""
|
||||||
Returns byte representation of provided Unicode value
|
Returns byte representation of provided Unicode value
|
||||||
|
|
||||||
|
@ -2445,11 +2462,11 @@ def getBytes(value, encoding=UNICODE_ENCODING):
|
||||||
for char in xrange(0xF0000, 0xF00FF + 1):
|
for char in xrange(0xF0000, 0xF00FF + 1):
|
||||||
value = value.replace(unichr(char), "%s%02x" % (SAFE_HEX_MARKER, char - 0xF0000))
|
value = value.replace(unichr(char), "%s%02x" % (SAFE_HEX_MARKER, char - 0xF0000))
|
||||||
|
|
||||||
retVal = value.encode(encoding)
|
retVal = value.encode(encoding, errors)
|
||||||
|
|
||||||
retVal = re.sub(r"%s([0-9a-f]{2})" % SAFE_HEX_MARKER, lambda _: _.group(1).decode("hex"), retVal)
|
retVal = re.sub(r"%s([0-9a-f]{2})" % SAFE_HEX_MARKER, lambda _: _.group(1).decode("hex"), retVal)
|
||||||
else:
|
else:
|
||||||
retVal = value.encode(encoding)
|
retVal = value.encode(encoding, errors)
|
||||||
retVal = re.sub(r"\\x([0-9a-f]{2})", lambda _: _.group(1).decode("hex"), retVal)
|
retVal = re.sub(r"\\x([0-9a-f]{2})", lambda _: _.group(1).decode("hex"), retVal)
|
||||||
|
|
||||||
return retVal
|
return retVal
|
||||||
|
@ -3694,7 +3711,7 @@ def removeReflectiveValues(content, payload, suppressWarning=False):
|
||||||
retVal = content
|
retVal = content
|
||||||
|
|
||||||
try:
|
try:
|
||||||
if all((content, payload)) and isinstance(content, unicode) and kb.reflectiveMechanism and not kb.heuristicMode:
|
if all((content, payload)) and isinstance(content, six.text_type) and kb.reflectiveMechanism and not kb.heuristicMode:
|
||||||
def _(value):
|
def _(value):
|
||||||
while 2 * REFLECTED_REPLACEMENT_REGEX in value:
|
while 2 * REFLECTED_REPLACEMENT_REGEX in value:
|
||||||
value = value.replace(2 * REFLECTED_REPLACEMENT_REGEX, REFLECTED_REPLACEMENT_REGEX)
|
value = value.replace(2 * REFLECTED_REPLACEMENT_REGEX, REFLECTED_REPLACEMENT_REGEX)
|
||||||
|
@ -3786,7 +3803,7 @@ def normalizeUnicode(value):
|
||||||
'sucuraj'
|
'sucuraj'
|
||||||
"""
|
"""
|
||||||
|
|
||||||
return unicodedata.normalize("NFKD", value).encode("ascii", "ignore") if isinstance(value, unicode) else value
|
return unicodedata.normalize("NFKD", value).encode("ascii", "ignore") if isinstance(value, six.text_type) else value
|
||||||
|
|
||||||
def safeSQLIdentificatorNaming(name, isTable=False):
|
def safeSQLIdentificatorNaming(name, isTable=False):
|
||||||
"""
|
"""
|
||||||
|
@ -4105,7 +4122,7 @@ def asciifyUrl(url, forceQuote=False):
|
||||||
# _urllib.parse.quote(s.replace('%', '')) != s.replace('%', '')
|
# _urllib.parse.quote(s.replace('%', '')) != s.replace('%', '')
|
||||||
# which would trigger on all %-characters, e.g. "&".
|
# which would trigger on all %-characters, e.g. "&".
|
||||||
if getUnicode(s).encode("ascii", "replace") != s or forceQuote:
|
if getUnicode(s).encode("ascii", "replace") != s or forceQuote:
|
||||||
return _urllib.parse.quote(s.encode(UNICODE_ENCODING) if isinstance(s, unicode) else s, safe=safe)
|
return _urllib.parse.quote(s.encode(UNICODE_ENCODING) if isinstance(s, six.text_type) else s, safe=safe)
|
||||||
return s
|
return s
|
||||||
|
|
||||||
username = quote(parts.username, '')
|
username = quote(parts.username, '')
|
||||||
|
@ -4459,8 +4476,8 @@ def decodeHexValue(value, raw=False):
|
||||||
retVal = retVal.decode("utf-16-be")
|
retVal = retVal.decode("utf-16-be")
|
||||||
except UnicodeDecodeError:
|
except UnicodeDecodeError:
|
||||||
pass
|
pass
|
||||||
if not isinstance(retVal, unicode):
|
if not isinstance(retVal, six.text_type):
|
||||||
retVal = getUnicode(retVal, conf.encoding or "utf8")
|
retVal = getUnicode(retVal, conf.encoding or UNICODE_ENCODING)
|
||||||
|
|
||||||
return retVal
|
return retVal
|
||||||
|
|
||||||
|
|
|
@ -242,7 +242,7 @@ class Dump(object):
|
||||||
if table and isListLike(table):
|
if table and isListLike(table):
|
||||||
table = table[0]
|
table = table[0]
|
||||||
|
|
||||||
maxlength = max(maxlength, len(unsafeSQLIdentificatorNaming(normalizeUnicode(table) or unicode(table))))
|
maxlength = max(maxlength, len(unsafeSQLIdentificatorNaming(normalizeUnicode(table) or getUnicode(table))))
|
||||||
|
|
||||||
lines = "-" * (int(maxlength) + 2)
|
lines = "-" * (int(maxlength) + 2)
|
||||||
|
|
||||||
|
@ -263,7 +263,7 @@ class Dump(object):
|
||||||
table = table[0]
|
table = table[0]
|
||||||
|
|
||||||
table = unsafeSQLIdentificatorNaming(table)
|
table = unsafeSQLIdentificatorNaming(table)
|
||||||
blank = " " * (maxlength - len(normalizeUnicode(table) or unicode(table)))
|
blank = " " * (maxlength - len(normalizeUnicode(table) or getUnicode(table)))
|
||||||
self._write("| %s%s |" % (table, blank))
|
self._write("| %s%s |" % (table, blank))
|
||||||
|
|
||||||
self._write("+%s+\n" % lines)
|
self._write("+%s+\n" % lines)
|
||||||
|
@ -358,7 +358,7 @@ class Dump(object):
|
||||||
for ctables in dbTables.values():
|
for ctables in dbTables.values():
|
||||||
for tables in ctables.values():
|
for tables in ctables.values():
|
||||||
for table in tables:
|
for table in tables:
|
||||||
maxlength1 = max(maxlength1, len(normalizeUnicode(table) or unicode(table)))
|
maxlength1 = max(maxlength1, len(normalizeUnicode(table) or getUnicode(table)))
|
||||||
|
|
||||||
for db, counts in dbTables.items():
|
for db, counts in dbTables.items():
|
||||||
self._write("Database: %s" % unsafeSQLIdentificatorNaming(db) if db else "Current database")
|
self._write("Database: %s" % unsafeSQLIdentificatorNaming(db) if db else "Current database")
|
||||||
|
@ -384,7 +384,7 @@ class Dump(object):
|
||||||
tables.sort(key=lambda _: _.lower() if hasattr(_, "lower") else _)
|
tables.sort(key=lambda _: _.lower() if hasattr(_, "lower") else _)
|
||||||
|
|
||||||
for table in tables:
|
for table in tables:
|
||||||
blank1 = " " * (maxlength1 - len(normalizeUnicode(table) or unicode(table)))
|
blank1 = " " * (maxlength1 - len(normalizeUnicode(table) or getUnicode(table)))
|
||||||
blank2 = " " * (maxlength2 - len(str(count)))
|
blank2 = " " * (maxlength2 - len(str(count)))
|
||||||
self._write("| %s%s | %d%s |" % (table, blank1, count, blank2))
|
self._write("| %s%s | %d%s |" % (table, blank1, count, blank2))
|
||||||
|
|
||||||
|
|
|
@ -1716,7 +1716,7 @@ def _cleanupOptions():
|
||||||
except re.error:
|
except re.error:
|
||||||
conf.csrfToken = re.escape(conf.csrfToken)
|
conf.csrfToken = re.escape(conf.csrfToken)
|
||||||
finally:
|
finally:
|
||||||
class _(unicode):
|
class _(six.text_type):
|
||||||
pass
|
pass
|
||||||
conf.csrfToken = _(conf.csrfToken)
|
conf.csrfToken = _(conf.csrfToken)
|
||||||
conf.csrfToken._original = original
|
conf.csrfToken._original = original
|
||||||
|
|
|
@ -17,7 +17,7 @@ from lib.core.enums import DBMS_DIRECTORY_NAME
|
||||||
from lib.core.enums import OS
|
from lib.core.enums import OS
|
||||||
|
|
||||||
# sqlmap version (<major>.<minor>.<month>.<monthly commit>)
|
# sqlmap version (<major>.<minor>.<month>.<monthly commit>)
|
||||||
VERSION = "1.3.4.28"
|
VERSION = "1.3.4.29"
|
||||||
TYPE = "dev" if VERSION.count('.') > 2 and VERSION.split('.')[-1] != '0' else "stable"
|
TYPE = "dev" if VERSION.count('.') > 2 and VERSION.split('.')[-1] != '0' else "stable"
|
||||||
TYPE_COLORS = {"dev": 33, "stable": 90, "pip": 34}
|
TYPE_COLORS = {"dev": 33, "stable": 90, "pip": 34}
|
||||||
VERSION_STRING = "sqlmap/%s#%s" % ('.'.join(VERSION.split('.')[:-1]) if VERSION.count('.') > 2 and VERSION.split('.')[-1] == '0' else VERSION, TYPE)
|
VERSION_STRING = "sqlmap/%s#%s" % ('.'.join(VERSION.split('.')[:-1]) if VERSION.count('.') > 2 and VERSION.split('.')[-1] == '0' else VERSION, TYPE)
|
||||||
|
|
|
@ -73,6 +73,7 @@ from lib.core.settings import URI_INJECTABLE_REGEX
|
||||||
from lib.core.settings import USER_AGENT_ALIASES
|
from lib.core.settings import USER_AGENT_ALIASES
|
||||||
from lib.core.settings import XML_RECOGNITION_REGEX
|
from lib.core.settings import XML_RECOGNITION_REGEX
|
||||||
from lib.utils.hashdb import HashDB
|
from lib.utils.hashdb import HashDB
|
||||||
|
from thirdparty import six
|
||||||
from thirdparty.odict import OrderedDict
|
from thirdparty.odict import OrderedDict
|
||||||
from thirdparty.six.moves import urllib as _urllib
|
from thirdparty.six.moves import urllib as _urllib
|
||||||
|
|
||||||
|
@ -409,7 +410,7 @@ def _setRequestParams():
|
||||||
message += "Do you want sqlmap to automatically update it in further requests? [y/N] "
|
message += "Do you want sqlmap to automatically update it in further requests? [y/N] "
|
||||||
|
|
||||||
if readInput(message, default='N', boolean=True):
|
if readInput(message, default='N', boolean=True):
|
||||||
class _(unicode):
|
class _(six.text_type):
|
||||||
pass
|
pass
|
||||||
conf.csrfToken = _(re.escape(getUnicode(parameter)))
|
conf.csrfToken = _(re.escape(getUnicode(parameter)))
|
||||||
conf.csrfToken._original = getUnicode(parameter)
|
conf.csrfToken._original = getUnicode(parameter)
|
||||||
|
@ -712,7 +713,7 @@ def initTargetEnv():
|
||||||
_setDBMS()
|
_setDBMS()
|
||||||
|
|
||||||
if conf.data:
|
if conf.data:
|
||||||
class _(unicode):
|
class _(six.text_type):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
kb.postUrlEncode = True
|
kb.postUrlEncode = True
|
||||||
|
|
|
@ -17,6 +17,7 @@ from lib.core.common import Backend
|
||||||
from lib.core.common import extractErrorMessage
|
from lib.core.common import extractErrorMessage
|
||||||
from lib.core.common import extractRegexResult
|
from lib.core.common import extractRegexResult
|
||||||
from lib.core.common import filterNone
|
from lib.core.common import filterNone
|
||||||
|
from lib.core.common import getBytes
|
||||||
from lib.core.common import getPublicTypeMembers
|
from lib.core.common import getPublicTypeMembers
|
||||||
from lib.core.common import getSafeExString
|
from lib.core.common import getSafeExString
|
||||||
from lib.core.common import getUnicode
|
from lib.core.common import getUnicode
|
||||||
|
@ -42,11 +43,11 @@ from lib.core.settings import MAX_CONNECTION_TOTAL_SIZE
|
||||||
from lib.core.settings import META_CHARSET_REGEX
|
from lib.core.settings import META_CHARSET_REGEX
|
||||||
from lib.core.settings import PARSE_HEADERS_LIMIT
|
from lib.core.settings import PARSE_HEADERS_LIMIT
|
||||||
from lib.core.settings import SELECT_FROM_TABLE_REGEX
|
from lib.core.settings import SELECT_FROM_TABLE_REGEX
|
||||||
from lib.core.settings import UNICODE_ENCODING
|
|
||||||
from lib.core.settings import VIEWSTATE_REGEX
|
from lib.core.settings import VIEWSTATE_REGEX
|
||||||
from lib.parse.headers import headersParser
|
from lib.parse.headers import headersParser
|
||||||
from lib.parse.html import htmlParser
|
from lib.parse.html import htmlParser
|
||||||
from lib.utils.htmlentities import htmlEntities
|
from lib.utils.htmlentities import htmlEntities
|
||||||
|
from thirdparty import six
|
||||||
from thirdparty.chardet import detect
|
from thirdparty.chardet import detect
|
||||||
from thirdparty.odict import OrderedDict
|
from thirdparty.odict import OrderedDict
|
||||||
|
|
||||||
|
@ -219,13 +220,13 @@ def checkCharEncoding(encoding, warn=True):
|
||||||
# Reference: http://www.iana.org/assignments/character-sets
|
# Reference: http://www.iana.org/assignments/character-sets
|
||||||
# Reference: http://docs.python.org/library/codecs.html
|
# Reference: http://docs.python.org/library/codecs.html
|
||||||
try:
|
try:
|
||||||
codecs.lookup(encoding.encode(UNICODE_ENCODING) if isinstance(encoding, unicode) else encoding)
|
codecs.lookup(encoding)
|
||||||
except (LookupError, ValueError):
|
except:
|
||||||
encoding = None
|
encoding = None
|
||||||
|
|
||||||
if encoding:
|
if encoding:
|
||||||
try:
|
try:
|
||||||
unicode(randomStr(), encoding)
|
six.text_type(getBytes(randomStr()), encoding)
|
||||||
except:
|
except:
|
||||||
if warn:
|
if warn:
|
||||||
warnMsg = "invalid web page charset '%s'" % encoding
|
warnMsg = "invalid web page charset '%s'" % encoding
|
||||||
|
@ -313,7 +314,7 @@ def decodePage(page, contentEncoding, contentType):
|
||||||
kb.pageEncoding = conf.encoding
|
kb.pageEncoding = conf.encoding
|
||||||
|
|
||||||
# can't do for all responses because we need to support binary files too
|
# can't do for all responses because we need to support binary files too
|
||||||
if not isinstance(page, unicode) and "text/" in contentType:
|
if isinstance(page, six.binary_type) and "text/" in contentType:
|
||||||
# e.g. 	Ãëàâà
|
# e.g. 	Ãëàâà
|
||||||
if "&#" in page:
|
if "&#" in page:
|
||||||
page = re.sub(r"&#x([0-9a-f]{1,2});", lambda _: (_.group(1) if len(_.group(1)) == 2 else "0%s" % _.group(1)).decode("hex"), page)
|
page = re.sub(r"&#x([0-9a-f]{1,2});", lambda _: (_.group(1) if len(_.group(1)) == 2 else "0%s" % _.group(1)).decode("hex"), page)
|
||||||
|
|
|
@ -8,6 +8,7 @@ See the file 'LICENSE' for copying permission
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from lib.core.common import extractRegexResult
|
from lib.core.common import extractRegexResult
|
||||||
|
from lib.core.common import getBytes
|
||||||
from lib.core.common import getFilteredPageContent
|
from lib.core.common import getFilteredPageContent
|
||||||
from lib.core.common import listToStrValue
|
from lib.core.common import listToStrValue
|
||||||
from lib.core.common import removeDynamicContent
|
from lib.core.common import removeDynamicContent
|
||||||
|
@ -28,6 +29,7 @@ from lib.core.settings import LOWER_RATIO_BOUND
|
||||||
from lib.core.settings import UPPER_RATIO_BOUND
|
from lib.core.settings import UPPER_RATIO_BOUND
|
||||||
from lib.core.settings import URI_HTTP_HEADER
|
from lib.core.settings import URI_HTTP_HEADER
|
||||||
from lib.core.threads import getCurrentThreadData
|
from lib.core.threads import getCurrentThreadData
|
||||||
|
from thirdparty import six
|
||||||
|
|
||||||
def comparison(page, headers, code=None, getRatioValue=False, pageLength=None):
|
def comparison(page, headers, code=None, getRatioValue=False, pageLength=None):
|
||||||
_ = _adjust(_comparison(page, headers, code, getRatioValue, pageLength), getRatioValue)
|
_ = _adjust(_comparison(page, headers, code, getRatioValue, pageLength), getRatioValue)
|
||||||
|
@ -105,10 +107,10 @@ def _comparison(page, headers, code, getRatioValue, pageLength):
|
||||||
else:
|
else:
|
||||||
# Preventing "Unicode equal comparison failed to convert both arguments to Unicode"
|
# Preventing "Unicode equal comparison failed to convert both arguments to Unicode"
|
||||||
# (e.g. if one page is PDF and the other is HTML)
|
# (e.g. if one page is PDF and the other is HTML)
|
||||||
if isinstance(seqMatcher.a, str) and isinstance(page, unicode):
|
if isinstance(seqMatcher.a, six.binary_type) and isinstance(page, six.text_type):
|
||||||
page = page.encode(kb.pageEncoding or DEFAULT_PAGE_ENCODING, "ignore")
|
page = getBytes(page, kb.pageEncoding or DEFAULT_PAGE_ENCODING, "ignore")
|
||||||
elif isinstance(seqMatcher.a, unicode) and isinstance(page, str):
|
elif isinstance(seqMatcher.a, six.text_type) and isinstance(page, six.binary_type):
|
||||||
seqMatcher.a = seqMatcher.a.encode(kb.pageEncoding or DEFAULT_PAGE_ENCODING, "ignore")
|
seqMatcher.a = getBytes(seqMatcher.a, kb.pageEncoding or DEFAULT_PAGE_ENCODING, "ignore")
|
||||||
|
|
||||||
if any(_ is None for _ in (page, seqMatcher.a)):
|
if any(_ is None for _ in (page, seqMatcher.a)):
|
||||||
return None
|
return None
|
||||||
|
|
|
@ -486,7 +486,7 @@ def getValue(expression, blind=True, union=True, error=True, time=True, fromUser
|
||||||
singleTimeWarnMessage(warnMsg)
|
singleTimeWarnMessage(warnMsg)
|
||||||
|
|
||||||
# Dirty patch (safe-encoded unicode characters)
|
# Dirty patch (safe-encoded unicode characters)
|
||||||
if isinstance(value, unicode) and "\\x" in value:
|
if isinstance(value, six.text_type) and "\\x" in value:
|
||||||
try:
|
try:
|
||||||
candidate = eval(repr(value).replace("\\\\x", "\\x").replace("u'", "'", 1)).decode(conf.encoding or UNICODE_ENCODING)
|
candidate = eval(repr(value).replace("\\\\x", "\\x").replace("u'", "'", 1)).decode(conf.encoding or UNICODE_ENCODING)
|
||||||
if "\\x" not in candidate:
|
if "\\x" not in candidate:
|
||||||
|
|
|
@ -32,6 +32,7 @@ from lib.core.threads import getCurrentThreadData
|
||||||
from lib.core.threads import runThreads
|
from lib.core.threads import runThreads
|
||||||
from lib.parse.sitemap import parseSitemap
|
from lib.parse.sitemap import parseSitemap
|
||||||
from lib.request.connect import Connect as Request
|
from lib.request.connect import Connect as Request
|
||||||
|
from thirdparty import six
|
||||||
from thirdparty.beautifulsoup.beautifulsoup import BeautifulSoup
|
from thirdparty.beautifulsoup.beautifulsoup import BeautifulSoup
|
||||||
from thirdparty.six.moves import http_client as _http_client
|
from thirdparty.six.moves import http_client as _http_client
|
||||||
from thirdparty.six.moves import urllib as _urllib
|
from thirdparty.six.moves import urllib as _urllib
|
||||||
|
@ -79,7 +80,7 @@ def crawl(target):
|
||||||
if not kb.threadContinue:
|
if not kb.threadContinue:
|
||||||
break
|
break
|
||||||
|
|
||||||
if isinstance(content, unicode):
|
if isinstance(content, six.text_type):
|
||||||
try:
|
try:
|
||||||
match = re.search(r"(?si)<html[^>]*>(.+)</html>", content)
|
match = re.search(r"(?si)<html[^>]*>(.+)</html>", content)
|
||||||
if match:
|
if match:
|
||||||
|
|
|
@ -7,8 +7,8 @@ See the file 'LICENSE' for copying permission
|
||||||
|
|
||||||
import binascii
|
import binascii
|
||||||
|
|
||||||
|
from lib.core.common import getBytes
|
||||||
from lib.core.common import isDBMSVersionAtLeast
|
from lib.core.common import isDBMSVersionAtLeast
|
||||||
from lib.core.settings import UNICODE_ENCODING
|
|
||||||
from plugins.generic.syntax import Syntax as GenericSyntax
|
from plugins.generic.syntax import Syntax as GenericSyntax
|
||||||
|
|
||||||
class Syntax(GenericSyntax):
|
class Syntax(GenericSyntax):
|
||||||
|
@ -28,7 +28,7 @@ class Syntax(GenericSyntax):
|
||||||
|
|
||||||
def escaper(value):
|
def escaper(value):
|
||||||
# Reference: http://stackoverflow.com/questions/3444335/how-do-i-quote-a-utf-8-string-literal-in-sqlite3
|
# Reference: http://stackoverflow.com/questions/3444335/how-do-i-quote-a-utf-8-string-literal-in-sqlite3
|
||||||
return "CAST(X'%s' AS TEXT)" % binascii.hexlify(value.encode(UNICODE_ENCODING) if isinstance(value, unicode) else value)
|
return "CAST(X'%s' AS TEXT)" % binascii.hexlify(getBytes(value))
|
||||||
|
|
||||||
retVal = expression
|
retVal = expression
|
||||||
|
|
||||||
|
|
61
thirdparty/beautifulsoup/beautifulsoup.py
vendored
61
thirdparty/beautifulsoup/beautifulsoup.py
vendored
|
@ -91,6 +91,11 @@ import sys
|
||||||
|
|
||||||
if sys.version_info >= (3, 0):
|
if sys.version_info >= (3, 0):
|
||||||
xrange = range
|
xrange = range
|
||||||
|
text_type = str
|
||||||
|
binary_type = bytes
|
||||||
|
else:
|
||||||
|
text_type = unicode
|
||||||
|
binary_type = str
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from htmlentitydefs import name2codepoint
|
from htmlentitydefs import name2codepoint
|
||||||
|
@ -434,19 +439,13 @@ class PageElement(object):
|
||||||
def toEncoding(self, s, encoding=None):
|
def toEncoding(self, s, encoding=None):
|
||||||
"""Encodes an object to a string in some encoding, or to Unicode.
|
"""Encodes an object to a string in some encoding, or to Unicode.
|
||||||
."""
|
."""
|
||||||
if isinstance(s, unicode):
|
if isinstance(s, text_type):
|
||||||
if encoding:
|
if encoding:
|
||||||
s = s.encode(encoding)
|
s = s.encode(encoding)
|
||||||
elif isinstance(s, str):
|
elif isinstance(s, binary_type):
|
||||||
if encoding:
|
s = s.encode(encoding or "utf8")
|
||||||
s = s.encode(encoding)
|
|
||||||
else:
|
|
||||||
s = unicode(s)
|
|
||||||
else:
|
else:
|
||||||
if encoding:
|
s = self.toEncoding(str(s), encoding or "utf8")
|
||||||
s = self.toEncoding(str(s), encoding)
|
|
||||||
else:
|
|
||||||
s = unicode(s)
|
|
||||||
return s
|
return s
|
||||||
|
|
||||||
BARE_AMPERSAND_OR_BRACKET = re.compile("([<>]|"
|
BARE_AMPERSAND_OR_BRACKET = re.compile("([<>]|"
|
||||||
|
@ -459,7 +458,7 @@ class PageElement(object):
|
||||||
return "&" + self.XML_SPECIAL_CHARS_TO_ENTITIES[x.group(0)[0]] + ";"
|
return "&" + self.XML_SPECIAL_CHARS_TO_ENTITIES[x.group(0)[0]] + ";"
|
||||||
|
|
||||||
|
|
||||||
class NavigableString(unicode, PageElement):
|
class NavigableString(text_type, PageElement):
|
||||||
|
|
||||||
def __new__(cls, value):
|
def __new__(cls, value):
|
||||||
"""Create a new NavigableString.
|
"""Create a new NavigableString.
|
||||||
|
@ -469,9 +468,9 @@ class NavigableString(unicode, PageElement):
|
||||||
passed in to the superclass's __new__ or the superclass won't know
|
passed in to the superclass's __new__ or the superclass won't know
|
||||||
how to handle non-ASCII characters.
|
how to handle non-ASCII characters.
|
||||||
"""
|
"""
|
||||||
if isinstance(value, unicode):
|
if isinstance(value, text_type):
|
||||||
return unicode.__new__(cls, value)
|
return text_type.__new__(cls, value)
|
||||||
return unicode.__new__(cls, value, DEFAULT_OUTPUT_ENCODING)
|
return text_type.__new__(cls, value, DEFAULT_OUTPUT_ENCODING)
|
||||||
|
|
||||||
def __getnewargs__(self):
|
def __getnewargs__(self):
|
||||||
return (NavigableString.__str__(self),)
|
return (NavigableString.__str__(self),)
|
||||||
|
@ -1006,7 +1005,7 @@ class SoupStrainer:
|
||||||
if isinstance(markup, Tag):
|
if isinstance(markup, Tag):
|
||||||
markup = markup.name
|
markup = markup.name
|
||||||
if markup and not isinstance(markup, basestring):
|
if markup and not isinstance(markup, basestring):
|
||||||
markup = unicode(markup)
|
markup = text_type(markup)
|
||||||
#Now we know that chunk is either a string, or None.
|
#Now we know that chunk is either a string, or None.
|
||||||
if hasattr(matchAgainst, 'match'):
|
if hasattr(matchAgainst, 'match'):
|
||||||
# It's a regexp object.
|
# It's a regexp object.
|
||||||
|
@ -1016,8 +1015,8 @@ class SoupStrainer:
|
||||||
elif hasattr(matchAgainst, 'items'):
|
elif hasattr(matchAgainst, 'items'):
|
||||||
result = markup.has_key(matchAgainst)
|
result = markup.has_key(matchAgainst)
|
||||||
elif matchAgainst and isinstance(markup, basestring):
|
elif matchAgainst and isinstance(markup, basestring):
|
||||||
if isinstance(markup, unicode):
|
if isinstance(markup, text_type):
|
||||||
matchAgainst = unicode(matchAgainst)
|
matchAgainst = text_type(matchAgainst)
|
||||||
else:
|
else:
|
||||||
matchAgainst = str(matchAgainst)
|
matchAgainst = str(matchAgainst)
|
||||||
|
|
||||||
|
@ -1181,7 +1180,7 @@ class BeautifulStoneSoup(Tag, sgmllib.SGMLParser):
|
||||||
def _feed(self, inDocumentEncoding=None, isHTML=False):
|
def _feed(self, inDocumentEncoding=None, isHTML=False):
|
||||||
# Convert the document to Unicode.
|
# Convert the document to Unicode.
|
||||||
markup = self.markup
|
markup = self.markup
|
||||||
if isinstance(markup, unicode):
|
if isinstance(markup, text_type):
|
||||||
if not hasattr(self, 'originalEncoding'):
|
if not hasattr(self, 'originalEncoding'):
|
||||||
self.originalEncoding = None
|
self.originalEncoding = None
|
||||||
else:
|
else:
|
||||||
|
@ -1792,9 +1791,9 @@ class UnicodeDammit:
|
||||||
self._detectEncoding(markup, isHTML)
|
self._detectEncoding(markup, isHTML)
|
||||||
self.smartQuotesTo = smartQuotesTo
|
self.smartQuotesTo = smartQuotesTo
|
||||||
self.triedEncodings = []
|
self.triedEncodings = []
|
||||||
if markup == '' or isinstance(markup, unicode):
|
if markup == '' or isinstance(markup, text_type):
|
||||||
self.originalEncoding = None
|
self.originalEncoding = None
|
||||||
self.unicode = unicode(markup)
|
self.unicode = text_type(markup)
|
||||||
return
|
return
|
||||||
|
|
||||||
u = None
|
u = None
|
||||||
|
@ -1807,7 +1806,7 @@ class UnicodeDammit:
|
||||||
if u: break
|
if u: break
|
||||||
|
|
||||||
# If no luck and we have auto-detection library, try that:
|
# If no luck and we have auto-detection library, try that:
|
||||||
if not u and chardet and not isinstance(self.markup, unicode):
|
if not u and chardet and not isinstance(self.markup, text_type):
|
||||||
u = self._convertFrom(chardet.detect(self.markup)['encoding'])
|
u = self._convertFrom(chardet.detect(self.markup)['encoding'])
|
||||||
|
|
||||||
# As a last resort, try utf-8 and windows-1252:
|
# As a last resort, try utf-8 and windows-1252:
|
||||||
|
@ -1880,7 +1879,7 @@ class UnicodeDammit:
|
||||||
elif data[:4] == '\xff\xfe\x00\x00':
|
elif data[:4] == '\xff\xfe\x00\x00':
|
||||||
encoding = 'utf-32le'
|
encoding = 'utf-32le'
|
||||||
data = data[4:]
|
data = data[4:]
|
||||||
newdata = unicode(data, encoding)
|
newdata = text_type(data, encoding)
|
||||||
return newdata
|
return newdata
|
||||||
|
|
||||||
def _detectEncoding(self, xml_data, isHTML=False):
|
def _detectEncoding(self, xml_data, isHTML=False):
|
||||||
|
@ -1893,41 +1892,41 @@ class UnicodeDammit:
|
||||||
elif xml_data[:4] == '\x00\x3c\x00\x3f':
|
elif xml_data[:4] == '\x00\x3c\x00\x3f':
|
||||||
# UTF-16BE
|
# UTF-16BE
|
||||||
sniffed_xml_encoding = 'utf-16be'
|
sniffed_xml_encoding = 'utf-16be'
|
||||||
xml_data = unicode(xml_data, 'utf-16be').encode('utf-8')
|
xml_data = text_type(xml_data, 'utf-16be').encode('utf-8')
|
||||||
elif (len(xml_data) >= 4) and (xml_data[:2] == '\xfe\xff') \
|
elif (len(xml_data) >= 4) and (xml_data[:2] == '\xfe\xff') \
|
||||||
and (xml_data[2:4] != '\x00\x00'):
|
and (xml_data[2:4] != '\x00\x00'):
|
||||||
# UTF-16BE with BOM
|
# UTF-16BE with BOM
|
||||||
sniffed_xml_encoding = 'utf-16be'
|
sniffed_xml_encoding = 'utf-16be'
|
||||||
xml_data = unicode(xml_data[2:], 'utf-16be').encode('utf-8')
|
xml_data = text_type(xml_data[2:], 'utf-16be').encode('utf-8')
|
||||||
elif xml_data[:4] == '\x3c\x00\x3f\x00':
|
elif xml_data[:4] == '\x3c\x00\x3f\x00':
|
||||||
# UTF-16LE
|
# UTF-16LE
|
||||||
sniffed_xml_encoding = 'utf-16le'
|
sniffed_xml_encoding = 'utf-16le'
|
||||||
xml_data = unicode(xml_data, 'utf-16le').encode('utf-8')
|
xml_data = text_type(xml_data, 'utf-16le').encode('utf-8')
|
||||||
elif (len(xml_data) >= 4) and (xml_data[:2] == '\xff\xfe') and \
|
elif (len(xml_data) >= 4) and (xml_data[:2] == '\xff\xfe') and \
|
||||||
(xml_data[2:4] != '\x00\x00'):
|
(xml_data[2:4] != '\x00\x00'):
|
||||||
# UTF-16LE with BOM
|
# UTF-16LE with BOM
|
||||||
sniffed_xml_encoding = 'utf-16le'
|
sniffed_xml_encoding = 'utf-16le'
|
||||||
xml_data = unicode(xml_data[2:], 'utf-16le').encode('utf-8')
|
xml_data = text_type(xml_data[2:], 'utf-16le').encode('utf-8')
|
||||||
elif xml_data[:4] == '\x00\x00\x00\x3c':
|
elif xml_data[:4] == '\x00\x00\x00\x3c':
|
||||||
# UTF-32BE
|
# UTF-32BE
|
||||||
sniffed_xml_encoding = 'utf-32be'
|
sniffed_xml_encoding = 'utf-32be'
|
||||||
xml_data = unicode(xml_data, 'utf-32be').encode('utf-8')
|
xml_data = text_type(xml_data, 'utf-32be').encode('utf-8')
|
||||||
elif xml_data[:4] == '\x3c\x00\x00\x00':
|
elif xml_data[:4] == '\x3c\x00\x00\x00':
|
||||||
# UTF-32LE
|
# UTF-32LE
|
||||||
sniffed_xml_encoding = 'utf-32le'
|
sniffed_xml_encoding = 'utf-32le'
|
||||||
xml_data = unicode(xml_data, 'utf-32le').encode('utf-8')
|
xml_data = text_type(xml_data, 'utf-32le').encode('utf-8')
|
||||||
elif xml_data[:4] == '\x00\x00\xfe\xff':
|
elif xml_data[:4] == '\x00\x00\xfe\xff':
|
||||||
# UTF-32BE with BOM
|
# UTF-32BE with BOM
|
||||||
sniffed_xml_encoding = 'utf-32be'
|
sniffed_xml_encoding = 'utf-32be'
|
||||||
xml_data = unicode(xml_data[4:], 'utf-32be').encode('utf-8')
|
xml_data = text_type(xml_data[4:], 'utf-32be').encode('utf-8')
|
||||||
elif xml_data[:4] == '\xff\xfe\x00\x00':
|
elif xml_data[:4] == '\xff\xfe\x00\x00':
|
||||||
# UTF-32LE with BOM
|
# UTF-32LE with BOM
|
||||||
sniffed_xml_encoding = 'utf-32le'
|
sniffed_xml_encoding = 'utf-32le'
|
||||||
xml_data = unicode(xml_data[4:], 'utf-32le').encode('utf-8')
|
xml_data = text_type(xml_data[4:], 'utf-32le').encode('utf-8')
|
||||||
elif xml_data[:3] == '\xef\xbb\xbf':
|
elif xml_data[:3] == '\xef\xbb\xbf':
|
||||||
# UTF-8 with BOM
|
# UTF-8 with BOM
|
||||||
sniffed_xml_encoding = 'utf-8'
|
sniffed_xml_encoding = 'utf-8'
|
||||||
xml_data = unicode(xml_data[3:], 'utf-8').encode('utf-8')
|
xml_data = text_type(xml_data[3:], 'utf-8').encode('utf-8')
|
||||||
else:
|
else:
|
||||||
sniffed_xml_encoding = 'ascii'
|
sniffed_xml_encoding = 'ascii'
|
||||||
pass
|
pass
|
||||||
|
|
1
thirdparty/multipart/multipartpost.py
vendored
1
thirdparty/multipart/multipartpost.py
vendored
|
@ -21,7 +21,6 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import io
|
import io
|
||||||
import mimetools
|
|
||||||
import mimetypes
|
import mimetypes
|
||||||
import os
|
import os
|
||||||
import stat
|
import stat
|
||||||
|
|
Loading…
Reference in New Issue
Block a user