added --charset option to force charset encoding of the retrieved data (e.g. when the backend collation is different than the current web page charset) as requested by devon.mitchell1988@y​ahoo.com

This commit is contained in:
Miroslav Stampar 2011-05-17 22:55:22 +00:00
parent dfe81cc66f
commit cc07e5dc97
5 changed files with 60 additions and 40 deletions

View File

@ -1636,6 +1636,15 @@ def __basicOptionValidation():
errMsg = "value for --union-cols must be a range with hyphon (e.g. 1-10)" errMsg = "value for --union-cols must be a range with hyphon (e.g. 1-10)"
raise sqlmapSyntaxException, errMsg raise sqlmapSyntaxException, errMsg
if conf.charset:
try:
codecs.lookup(conf.charset)
except LookupError:
errMsg = "unknown charset '%s'. please visit page " % conf.charset
errMsg += "'http://docs.python.org/library/codecs.html#standard-encodings' "
errMsg += "to get the full list of supported charsets"
raise sqlmapSyntaxException, errMsg
def init(inputOptions=advancedDict(), overrideOptions=False): def init(inputOptions=advancedDict(), overrideOptions=False):
""" """
Set attributes into both configuration and knowledge base singletons Set attributes into both configuration and knowledge base singletons

View File

@ -150,14 +150,15 @@ optDict = {
"General": { "General": {
#"xmlFile": "string", #"xmlFile": "string",
"trafficFile": "string",
"sessionFile": "string", "sessionFile": "string",
"flushSession": "boolean", "trafficFile": "string",
"freshQueries": "boolean", "batch": "boolean",
"forms": "boolean", "charset": "string",
"eta": "boolean", "eta": "boolean",
"updateAll": "boolean", "flushSession": "boolean",
"batch": "boolean" "forms": "boolean",
"freshQueries": "boolean",
"updateAll": "boolean"
}, },
"Miscellaneous": { "Miscellaneous": {

View File

@ -447,13 +447,25 @@ def cmdLineParser():
#general.add_option("-x", dest="xmlFile", #general.add_option("-x", dest="xmlFile",
# help="Dump the data into an XML file") # help="Dump the data into an XML file")
general.add_option("-s", dest="sessionFile",
help="Save and resume all data retrieved "
"on a session file")
general.add_option("-t", dest="trafficFile", general.add_option("-t", dest="trafficFile",
help="Log all HTTP traffic into a " help="Log all HTTP traffic into a "
"textual file") "textual file")
general.add_option("-s", dest="sessionFile", general.add_option("--batch", dest="batch",
help="Save and resume all data retrieved " action="store_true", default=False,
"on a session file") help="Never ask for user input, use the default behaviour")
general.add_option("--charset", dest="charset",
help="Force character encoding used for data retrieval")
general.add_option("--eta", dest="eta",
action="store_true", default=False,
help="Display for each output the "
"estimated time of arrival")
general.add_option("--flush-session", dest="flushSession", general.add_option("--flush-session", dest="flushSession",
action="store_true", default=False, action="store_true", default=False,
@ -463,22 +475,13 @@ def cmdLineParser():
action="store_true", default=False, action="store_true", default=False,
help="Ignores query results stored in session file") help="Ignores query results stored in session file")
general.add_option("--eta", dest="eta",
action="store_true", default=False,
help="Display for each output the "
"estimated time of arrival")
general.add_option("--update", dest="updateAll",
action="store_true", default=False,
help="Update sqlmap")
general.add_option("--save", dest="saveCmdline", general.add_option("--save", dest="saveCmdline",
action="store_true", default=False, action="store_true", default=False,
help="Save options on a configuration INI file") help="Save options on a configuration INI file")
general.add_option("--batch", dest="batch", general.add_option("--update", dest="updateAll",
action="store_true", default=False, action="store_true", default=False,
help="Never ask for user input, use the default behaviour") help="Update sqlmap")
# Miscellaneous options # Miscellaneous options
miscellaneous = OptionGroup(parser, "Miscellaneous") miscellaneous = OptionGroup(parser, "Miscellaneous")

View File

@ -134,6 +134,7 @@ def checkCharEncoding(encoding):
return None return None
# http://www.iana.org/assignments/character-sets # http://www.iana.org/assignments/character-sets
# http://docs.python.org/library/codecs.html
try: try:
codecs.lookup(encoding) codecs.lookup(encoding)
except LookupError: except LookupError:
@ -173,19 +174,22 @@ def decodePage(page, contentEncoding, contentType):
page = data.read() page = data.read()
httpCharset, metaCharset = None, None if not conf.charset:
httpCharset, metaCharset = None, None
# http://stackoverflow.com/questions/1020892/python-urllib2-read-to-unicode # http://stackoverflow.com/questions/1020892/python-urllib2-read-to-unicode
if contentType and (contentType.find('charset=') != -1): if contentType and (contentType.find('charset=') != -1):
httpCharset = checkCharEncoding(contentType.split('charset=')[-1]) httpCharset = checkCharEncoding(contentType.split('charset=')[-1])
metaCharset = checkCharEncoding(extractRegexResult(META_CHARSET_REGEX, page, re.DOTALL | re.IGNORECASE)) metaCharset = checkCharEncoding(extractRegexResult(META_CHARSET_REGEX, page, re.DOTALL | re.IGNORECASE))
if ((httpCharset or metaCharset) and not all([httpCharset, metaCharset]))\ if ((httpCharset or metaCharset) and not all([httpCharset, metaCharset]))\
or (httpCharset == metaCharset and all([httpCharset, metaCharset])): or (httpCharset == metaCharset and all([httpCharset, metaCharset])):
kb.pageEncoding = httpCharset or metaCharset kb.pageEncoding = httpCharset or metaCharset
else:
kb.pageEncoding = None
else: else:
kb.pageEncoding = None kb.pageEncoding = conf.charset
if contentType and any(map(lambda x: x in contentType.lower(), ('text/txt', 'text/raw', 'text/html', 'text/xml'))): if contentType and any(map(lambda x: x in contentType.lower(), ('text/txt', 'text/raw', 'text/html', 'text/xml'))):
# can't do for all responses because we need to support binary files too # can't do for all responses because we need to support binary files too

View File

@ -493,11 +493,23 @@ regType =
# These options can be used to set some general working parameters. # These options can be used to set some general working parameters.
[General] [General]
# Save and resume all data retrieved on a session file.
sessionFile =
# Log all HTTP traffic into a textual file. # Log all HTTP traffic into a textual file.
trafficFile = trafficFile =
# Save and resume all data retrieved on a session file. # Never ask for user input, use the default behaviour.
sessionFile = # Valid: True or False
batch = False
# Force character encoding used for data retrieval.
charset =
# Retrieve each query output length and calculate the estimated time of
# arrival in real time.
# Valid: True or False
eta = False
# Flush session file for current target. # Flush session file for current target.
# Valid: True or False # Valid: True or False
@ -507,19 +519,10 @@ flushSession = False
# Valid: True or False # Valid: True or False
freshQueries = False freshQueries = False
# Retrieve each query output length and calculate the estimated time of
# arrival in real time.
# Valid: True or False
eta = False
# Update sqlmap. # Update sqlmap.
# Valid: True or False # Valid: True or False
updateAll = False updateAll = False
# Never ask for user input, use the default behaviour.
# Valid: True or False
batch = False
[Miscellaneous] [Miscellaneous]