sqlmap/lib/core/convert.py

341 lines
9.2 KiB
Python
Raw Normal View History

2019-05-08 13:47:52 +03:00
#!/usr/bin/env python
2008-10-15 19:38:22 +04:00
"""
2019-01-05 23:38:52 +03:00
Copyright (c) 2006-2019 sqlmap developers (http://sqlmap.org/)
2017-10-11 15:50:46 +03:00
See the file 'LICENSE' for copying permission
2008-10-15 19:38:22 +04:00
"""
2016-10-26 23:33:04 +03:00
try:
2016-10-29 01:13:04 +03:00
import cPickle as pickle
2016-10-26 23:33:04 +03:00
except:
2016-10-29 01:13:04 +03:00
import pickle
2016-10-26 23:33:04 +03:00
2015-01-15 19:32:07 +03:00
import base64
2019-05-08 13:28:50 +03:00
import binascii
2019-05-03 14:20:15 +03:00
import codecs
import json
2016-01-08 13:47:12 +03:00
import re
import sys
2008-10-15 19:38:22 +04:00
2019-05-06 01:54:21 +03:00
from lib.core.data import conf
from lib.core.data import kb
2019-05-03 14:20:15 +03:00
from lib.core.settings import INVALID_UNICODE_PRIVATE_AREA
2012-07-31 13:03:44 +04:00
from lib.core.settings import IS_WIN
2019-05-06 01:54:21 +03:00
from lib.core.settings import NULL
from lib.core.settings import PICKLE_PROTOCOL
2019-05-03 14:20:15 +03:00
from lib.core.settings import SAFE_HEX_MARKER
2011-03-03 13:39:04 +03:00
from lib.core.settings import UNICODE_ENCODING
from thirdparty import six
2019-05-15 11:57:22 +03:00
from thirdparty.six import unichr as _unichr
def base64pickle(value):
2013-03-11 17:58:05 +04:00
"""
Serializes (with pickle) and encodes to Base64 format supplied (binary) value
2019-05-02 17:54:54 +03:00
>>> base64unpickle(base64pickle([1, 2, 3])) == [1, 2, 3]
True
2013-03-11 17:58:05 +04:00
"""
2012-11-26 14:16:59 +04:00
retVal = None
2014-11-13 12:52:33 +03:00
2012-11-26 14:16:59 +04:00
try:
2019-05-03 14:20:15 +03:00
retVal = encodeBase64(pickle.dumps(value, PICKLE_PROTOCOL))
2012-11-26 14:16:59 +04:00
except:
warnMsg = "problem occurred while serializing "
warnMsg += "instance of a type '%s'" % type(value)
singleTimeWarnMessage(warnMsg)
try:
2019-05-03 14:20:15 +03:00
retVal = encodeBase64(pickle.dumps(value))
except:
2019-05-03 14:20:15 +03:00
retVal = encodeBase64(pickle.dumps(str(value), PICKLE_PROTOCOL))
2012-11-26 14:16:59 +04:00
return retVal
2019-03-27 03:28:34 +03:00
def base64unpickle(value):
2013-03-11 17:58:05 +04:00
"""
2013-03-26 17:11:17 +04:00
Decodes value from Base64 to plain format and deserializes (with pickle) its content
2013-03-11 17:58:05 +04:00
2019-05-02 17:54:54 +03:00
>>> type(base64unpickle('gAJjX19idWlsdGluX18Kb2JqZWN0CnEBKYFxAi4=')) == object
True
2013-03-11 17:58:05 +04:00
"""
2014-11-13 12:52:33 +03:00
retVal = None
try:
2019-05-03 14:20:15 +03:00
retVal = pickle.loads(decodeBase64(value))
except TypeError:
2019-05-03 14:20:15 +03:00
retVal = pickle.loads(decodeBase64(bytes(value)))
2019-05-02 17:54:54 +03:00
return retVal
def htmlunescape(value):
2013-03-11 17:58:05 +04:00
"""
Returns (basic conversion) HTML unescaped value
>>> htmlunescape('a<b')
'a<b'
"""
2011-12-21 18:25:39 +04:00
retVal = value
if value and isinstance(value, six.string_types):
2019-04-30 14:20:31 +03:00
replacements = (("&lt;", '<'), ("&gt;", '>'), ("&quot;", '"'), ("&nbsp;", ' '), ("&amp;", '&'), ("&apos;", "'"))
for code, value in replacements:
retVal = retVal.replace(code, value)
2016-01-09 01:10:32 +03:00
try:
2019-05-15 11:57:22 +03:00
retVal = re.sub(r"&#x([^ ;]+);", lambda match: _unichr(int(match.group(1), 16)), retVal)
2016-01-09 01:10:32 +03:00
except ValueError:
pass
return retVal
2012-07-31 13:03:44 +04:00
2018-03-21 16:29:54 +03:00
def singleTimeWarnMessage(message): # Cross-referenced function
2014-11-30 01:33:24 +03:00
sys.stdout.write(message)
sys.stdout.write("\n")
sys.stdout.flush()
2012-07-31 13:03:44 +04:00
2019-05-06 01:54:21 +03:00
def filterNone(values): # Cross-referenced function
raise NotImplementedError
def isListLike(value): # Cross-referenced function
raise NotImplementedError
2012-07-31 13:03:44 +04:00
def stdoutencode(data):
2019-04-30 14:20:31 +03:00
retVal = data
2012-07-31 13:03:44 +04:00
2019-04-30 14:20:31 +03:00
if six.PY2:
try:
retVal = getBytes(data or "", sys.stdout.encoding, unsafe=False)
2019-04-30 14:20:31 +03:00
# Reference: http://bugs.python.org/issue1602
if IS_WIN:
if '?' in retVal and '?' not in retVal:
warnMsg = "cannot properly display Unicode characters "
warnMsg += "inside Windows OS command prompt "
warnMsg += "(http://bugs.python.org/issue1602). All "
warnMsg += "unhandled occurrences will result in "
warnMsg += "replacement with '?' character. Please, find "
warnMsg += "proper character representation inside "
warnMsg += "corresponding output files. "
singleTimeWarnMessage(warnMsg)
2012-07-31 13:03:44 +04:00
2019-04-30 14:20:31 +03:00
except:
retVal = getBytes(data or "", unsafe=False)
2012-07-31 13:03:44 +04:00
return retVal
def jsonize(data):
2013-03-11 17:58:05 +04:00
"""
Returns JSON serialized data
>>> jsonize({'foo':'bar'})
'{\\n "foo": "bar"\\n}'
"""
return json.dumps(data, sort_keys=False, indent=4)
def dejsonize(data):
2013-03-11 17:58:05 +04:00
"""
Returns JSON deserialized data
2019-05-02 17:54:54 +03:00
>>> dejsonize('{\\n "foo": "bar"\\n}') == {u'foo': u'bar'}
True
2013-03-11 17:58:05 +04:00
"""
return json.loads(data)
2019-05-03 14:20:15 +03:00
def decodeHex(value, binary=True):
"""
Returns a decoded representation of provided hexadecimal value
>>> decodeHex("313233") == b"123"
True
>>> decodeHex("313233", binary=False) == u"123"
True
"""
retVal = value
if isinstance(value, six.binary_type):
2019-05-06 01:54:21 +03:00
value = getText(value)
2019-05-03 14:20:15 +03:00
if value.lower().startswith("0x"):
value = value[2:]
2019-05-08 13:28:50 +03:00
try:
retVal = codecs.decode(value, "hex")
except LookupError:
retVal = binascii.unhexlify(value)
2019-05-03 14:20:15 +03:00
if not binary:
retVal = getText(retVal)
return retVal
def encodeHex(value, binary=True):
"""
Returns a encoded representation of provided string value
>>> encodeHex(b"123") == b"313233"
True
>>> encodeHex("123", binary=False)
'313233'
"""
if isinstance(value, six.text_type):
value = value.encode(UNICODE_ENCODING)
2019-05-08 13:28:50 +03:00
try:
retVal = codecs.encode(value, "hex")
except LookupError:
retVal = binascii.hexlify(value)
2019-05-03 14:20:15 +03:00
if not binary:
retVal = getText(retVal)
return retVal
def decodeBase64(value, binary=True):
"""
Returns a decoded representation of provided Base64 value
>>> decodeBase64("MTIz") == b"123"
True
>>> decodeBase64("MTIz", binary=False)
'123'
"""
retVal = base64.b64decode(value)
if not binary:
retVal = getText(retVal)
return retVal
def encodeBase64(value, binary=True):
"""
Returns a decoded representation of provided Base64 value
>>> encodeBase64(b"123") == b"MTIz"
True
>>> encodeBase64(u"123", binary=False)
'MTIz'
"""
if isinstance(value, six.text_type):
value = value.encode(UNICODE_ENCODING)
retVal = base64.b64encode(value)
if not binary:
retVal = getText(retVal)
return retVal
def getBytes(value, encoding=UNICODE_ENCODING, errors="strict", unsafe=True):
2019-05-03 14:20:15 +03:00
"""
Returns byte representation of provided Unicode value
>>> getBytes(u"foo\\\\x01\\\\x83\\\\xffbar") == b"foo\\x01\\x83\\xffbar"
True
"""
retVal = value
if isinstance(value, six.text_type):
if INVALID_UNICODE_PRIVATE_AREA:
if unsafe:
for char in xrange(0xF0000, 0xF00FF + 1):
2019-05-15 11:57:22 +03:00
value = value.replace(_unichr(char), "%s%02x" % (SAFE_HEX_MARKER, char - 0xF0000))
2019-05-03 14:20:15 +03:00
retVal = value.encode(encoding, errors)
if unsafe:
retVal = re.sub(r"%s([0-9a-f]{2})" % SAFE_HEX_MARKER, lambda _: decodeHex(_.group(1)), retVal)
2019-05-03 14:20:15 +03:00
else:
retVal = value.encode(encoding, errors)
if unsafe:
retVal = re.sub(b"\\\\x([0-9a-f]{2})", lambda _: decodeHex(_.group(1)), retVal)
2019-05-03 14:20:15 +03:00
return retVal
def getOrds(value):
"""
Returns ORD(...) representation of provided string value
>>> getOrds(u'fo\\xf6bar')
[102, 111, 246, 98, 97, 114]
>>> getOrds(b"fo\\xc3\\xb6bar")
[102, 111, 195, 182, 98, 97, 114]
"""
return [_ if isinstance(_, int) else ord(_) for _ in value]
2019-05-06 01:54:21 +03:00
def getUnicode(value, encoding=None, noneToNull=False):
"""
Return the unicode representation of the supplied value:
>>> getUnicode('test') == u'test'
True
>>> getUnicode(1) == u'1'
True
"""
if noneToNull and value is None:
return NULL
if isinstance(value, six.text_type):
return value
elif isinstance(value, six.binary_type):
# Heuristics (if encoding not explicitly specified)
candidates = filterNone((encoding, kb.get("pageEncoding") if kb.get("originalPage") else None, conf.get("encoding"), UNICODE_ENCODING, sys.getfilesystemencoding()))
if all(_ in value for _ in (b'<', b'>')):
pass
elif any(_ in value for _ in (b":\\", b'/', b'.')) and b'\n' not in value:
candidates = filterNone((encoding, sys.getfilesystemencoding(), kb.get("pageEncoding") if kb.get("originalPage") else None, UNICODE_ENCODING, conf.get("encoding")))
elif conf.get("encoding") and b'\n' not in value:
candidates = filterNone((encoding, conf.get("encoding"), kb.get("pageEncoding") if kb.get("originalPage") else None, sys.getfilesystemencoding(), UNICODE_ENCODING))
for candidate in candidates:
try:
return six.text_type(value, candidate)
except UnicodeDecodeError:
pass
try:
return six.text_type(value, encoding or (kb.get("pageEncoding") if kb.get("originalPage") else None) or UNICODE_ENCODING)
except UnicodeDecodeError:
return six.text_type(value, UNICODE_ENCODING, errors="reversible")
elif isListLike(value):
value = list(getUnicode(_, encoding, noneToNull) for _ in value)
return value
else:
try:
return six.text_type(value)
except UnicodeDecodeError:
return six.text_type(str(value), errors="ignore") # encoding ignored for non-basestring instances
2019-05-03 14:20:15 +03:00
def getText(value):
"""
Returns textual value of a given value (Note: not necessary Unicode on Python2)
>>> getText(b"foobar")
'foobar'
>>> isinstance(getText(u"fo\\u2299bar"), six.text_type)
True
"""
retVal = value
if isinstance(value, six.binary_type):
2019-05-06 01:54:21 +03:00
retVal = getUnicode(value)
2019-05-03 14:20:15 +03:00
if six.PY2:
try:
retVal = str(retVal)
except:
pass
return retVal