From cc07e5dc971c2d93ccebd7a2bae83437ff58f9b0 Mon Sep 17 00:00:00 2001 From: Miroslav Stampar Date: Tue, 17 May 2011 22:55:22 +0000 Subject: [PATCH] =?UTF-8?q?added=20--charset=20option=20to=20force=20chars?= =?UTF-8?q?et=20encoding=20of=20the=20retrieved=20data=20(e.g.=20when=20th?= =?UTF-8?q?e=20backend=20collation=20is=20different=20than=20the=20current?= =?UTF-8?q?=20web=20page=20charset)=20as=20requested=20by=20devon.mitchell?= =?UTF-8?q?1988@y=E2=80=8Bahoo.com?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- lib/core/option.py | 9 +++++++++ lib/core/optiondict.py | 13 +++++++------ lib/parse/cmdline.py | 31 +++++++++++++++++-------------- lib/request/basic.py | 22 +++++++++++++--------- sqlmap.conf | 25 ++++++++++++++----------- 5 files changed, 60 insertions(+), 40 deletions(-) diff --git a/lib/core/option.py b/lib/core/option.py index d78dd75f4..7726f056e 100644 --- a/lib/core/option.py +++ b/lib/core/option.py @@ -1636,6 +1636,15 @@ def __basicOptionValidation(): errMsg = "value for --union-cols must be a range with hyphon (e.g. 1-10)" raise sqlmapSyntaxException, errMsg + if conf.charset: + try: + codecs.lookup(conf.charset) + except LookupError: + errMsg = "unknown charset '%s'. please visit page " % conf.charset + errMsg += "'http://docs.python.org/library/codecs.html#standard-encodings' " + errMsg += "to get the full list of supported charsets" + raise sqlmapSyntaxException, errMsg + def init(inputOptions=advancedDict(), overrideOptions=False): """ Set attributes into both configuration and knowledge base singletons diff --git a/lib/core/optiondict.py b/lib/core/optiondict.py index d0196e2c1..6d1259808 100644 --- a/lib/core/optiondict.py +++ b/lib/core/optiondict.py @@ -150,14 +150,15 @@ optDict = { "General": { #"xmlFile": "string", - "trafficFile": "string", "sessionFile": "string", - "flushSession": "boolean", - "freshQueries": "boolean", - "forms": "boolean", + "trafficFile": "string", + "batch": "boolean", + "charset": "string", "eta": "boolean", - "updateAll": "boolean", - "batch": "boolean" + "flushSession": "boolean", + "forms": "boolean", + "freshQueries": "boolean", + "updateAll": "boolean" }, "Miscellaneous": { diff --git a/lib/parse/cmdline.py b/lib/parse/cmdline.py index e37b3f6e0..6722e9a9a 100644 --- a/lib/parse/cmdline.py +++ b/lib/parse/cmdline.py @@ -447,13 +447,25 @@ def cmdLineParser(): #general.add_option("-x", dest="xmlFile", # help="Dump the data into an XML file") + general.add_option("-s", dest="sessionFile", + help="Save and resume all data retrieved " + "on a session file") + general.add_option("-t", dest="trafficFile", help="Log all HTTP traffic into a " "textual file") - general.add_option("-s", dest="sessionFile", - help="Save and resume all data retrieved " - "on a session file") + general.add_option("--batch", dest="batch", + action="store_true", default=False, + help="Never ask for user input, use the default behaviour") + + general.add_option("--charset", dest="charset", + help="Force character encoding used for data retrieval") + + general.add_option("--eta", dest="eta", + action="store_true", default=False, + help="Display for each output the " + "estimated time of arrival") general.add_option("--flush-session", dest="flushSession", action="store_true", default=False, @@ -463,22 +475,13 @@ def cmdLineParser(): action="store_true", default=False, help="Ignores query results stored in session file") - general.add_option("--eta", dest="eta", - action="store_true", default=False, - help="Display for each output the " - "estimated time of arrival") - - general.add_option("--update", dest="updateAll", - action="store_true", default=False, - help="Update sqlmap") - general.add_option("--save", dest="saveCmdline", action="store_true", default=False, help="Save options on a configuration INI file") - general.add_option("--batch", dest="batch", + general.add_option("--update", dest="updateAll", action="store_true", default=False, - help="Never ask for user input, use the default behaviour") + help="Update sqlmap") # Miscellaneous options miscellaneous = OptionGroup(parser, "Miscellaneous") diff --git a/lib/request/basic.py b/lib/request/basic.py index 842cda39a..2910c0a3f 100644 --- a/lib/request/basic.py +++ b/lib/request/basic.py @@ -134,6 +134,7 @@ def checkCharEncoding(encoding): return None # http://www.iana.org/assignments/character-sets + # http://docs.python.org/library/codecs.html try: codecs.lookup(encoding) except LookupError: @@ -173,19 +174,22 @@ def decodePage(page, contentEncoding, contentType): page = data.read() - httpCharset, metaCharset = None, None + if not conf.charset: + httpCharset, metaCharset = None, None - # http://stackoverflow.com/questions/1020892/python-urllib2-read-to-unicode - if contentType and (contentType.find('charset=') != -1): - httpCharset = checkCharEncoding(contentType.split('charset=')[-1]) + # http://stackoverflow.com/questions/1020892/python-urllib2-read-to-unicode + if contentType and (contentType.find('charset=') != -1): + httpCharset = checkCharEncoding(contentType.split('charset=')[-1]) - metaCharset = checkCharEncoding(extractRegexResult(META_CHARSET_REGEX, page, re.DOTALL | re.IGNORECASE)) + metaCharset = checkCharEncoding(extractRegexResult(META_CHARSET_REGEX, page, re.DOTALL | re.IGNORECASE)) - if ((httpCharset or metaCharset) and not all([httpCharset, metaCharset]))\ - or (httpCharset == metaCharset and all([httpCharset, metaCharset])): - kb.pageEncoding = httpCharset or metaCharset + if ((httpCharset or metaCharset) and not all([httpCharset, metaCharset]))\ + or (httpCharset == metaCharset and all([httpCharset, metaCharset])): + kb.pageEncoding = httpCharset or metaCharset + else: + kb.pageEncoding = None else: - kb.pageEncoding = None + kb.pageEncoding = conf.charset if contentType and any(map(lambda x: x in contentType.lower(), ('text/txt', 'text/raw', 'text/html', 'text/xml'))): # can't do for all responses because we need to support binary files too diff --git a/sqlmap.conf b/sqlmap.conf index 0aee4dc03..c000acf58 100644 --- a/sqlmap.conf +++ b/sqlmap.conf @@ -493,11 +493,23 @@ regType = # These options can be used to set some general working parameters. [General] +# Save and resume all data retrieved on a session file. +sessionFile = + # Log all HTTP traffic into a textual file. trafficFile = -# Save and resume all data retrieved on a session file. -sessionFile = +# Never ask for user input, use the default behaviour. +# Valid: True or False +batch = False + +# Force character encoding used for data retrieval. +charset = + +# Retrieve each query output length and calculate the estimated time of +# arrival in real time. +# Valid: True or False +eta = False # Flush session file for current target. # Valid: True or False @@ -507,19 +519,10 @@ flushSession = False # Valid: True or False freshQueries = False -# Retrieve each query output length and calculate the estimated time of -# arrival in real time. -# Valid: True or False -eta = False - # Update sqlmap. # Valid: True or False updateAll = False -# Never ask for user input, use the default behaviour. -# Valid: True or False -batch = False - [Miscellaneous]