2008-10-15 19:38:22 +04:00
|
|
|
#!/usr/bin/env python
|
|
|
|
|
|
|
|
"""
|
2008-10-15 19:56:32 +04:00
|
|
|
$Id$
|
2008-10-15 19:38:22 +04:00
|
|
|
|
2011-04-15 16:33:18 +04:00
|
|
|
Copyright (c) 2006-2011 sqlmap developers (http://sqlmap.sourceforge.net/)
|
2010-10-15 03:18:29 +04:00
|
|
|
See the file 'doc/COPYING' for copying permission
|
2008-10-15 19:38:22 +04:00
|
|
|
"""
|
|
|
|
|
2010-06-30 16:09:33 +04:00
|
|
|
import codecs
|
2010-01-02 05:02:12 +03:00
|
|
|
import gzip
|
2011-04-20 02:54:13 +04:00
|
|
|
import logging
|
2009-04-28 15:05:07 +04:00
|
|
|
import os
|
2008-10-15 19:38:22 +04:00
|
|
|
import re
|
2010-01-02 05:02:12 +03:00
|
|
|
import StringIO
|
|
|
|
import zlib
|
2008-10-15 19:38:22 +04:00
|
|
|
|
2011-04-18 17:38:46 +04:00
|
|
|
from extra.chardet import detect
|
2010-12-25 13:16:20 +03:00
|
|
|
from lib.core.common import extractErrorMessage
|
2011-01-04 18:49:20 +03:00
|
|
|
from lib.core.common import extractRegexResult
|
2010-05-21 18:42:59 +04:00
|
|
|
from lib.core.common import getCompiledRegex
|
2010-09-09 18:03:45 +04:00
|
|
|
from lib.core.common import getUnicode
|
2010-04-23 20:34:20 +04:00
|
|
|
from lib.core.common import isWindowsDriveLetterPath
|
2010-02-04 17:37:00 +03:00
|
|
|
from lib.core.common import posixToNtSlashes
|
2010-12-25 13:16:20 +03:00
|
|
|
from lib.core.common import sanitizeAsciiString
|
2011-04-20 02:54:13 +04:00
|
|
|
from lib.core.common import singleTimeLogMessage
|
2008-10-15 19:38:22 +04:00
|
|
|
from lib.core.data import conf
|
|
|
|
from lib.core.data import kb
|
2010-06-30 16:09:33 +04:00
|
|
|
from lib.core.data import logger
|
2011-04-01 20:40:28 +04:00
|
|
|
from lib.core.settings import ML
|
2011-01-04 18:49:20 +03:00
|
|
|
from lib.core.settings import META_CHARSET_REGEX
|
2011-01-30 14:36:03 +03:00
|
|
|
from lib.core.settings import UNICODE_ENCODING
|
2008-11-17 03:00:54 +03:00
|
|
|
from lib.parse.headers import headersParser
|
2008-10-15 19:38:22 +04:00
|
|
|
from lib.parse.html import htmlParser
|
|
|
|
|
2011-02-12 02:07:03 +03:00
|
|
|
def forgeHeaders(cookie, ua, referer):
|
2008-10-15 19:38:22 +04:00
|
|
|
"""
|
2011-02-12 02:07:03 +03:00
|
|
|
Prepare HTTP Cookie, HTTP User-Agent and HTTP Referer headers to use when performing
|
2008-10-15 19:38:22 +04:00
|
|
|
the HTTP requests
|
|
|
|
"""
|
|
|
|
|
|
|
|
headers = {}
|
|
|
|
|
|
|
|
for header, value in conf.httpHeaders:
|
2010-11-08 16:26:45 +03:00
|
|
|
if cookie and header == "Cookie":
|
2008-10-15 19:38:22 +04:00
|
|
|
headers[header] = cookie
|
2010-11-08 16:26:45 +03:00
|
|
|
elif ua and header == "User-Agent":
|
2008-10-15 19:38:22 +04:00
|
|
|
headers[header] = ua
|
2011-02-12 02:07:03 +03:00
|
|
|
elif referer and header == "Referer":
|
|
|
|
headers[header] = referer
|
2008-10-15 19:38:22 +04:00
|
|
|
else:
|
|
|
|
headers[header] = value
|
|
|
|
|
2010-12-03 20:41:10 +03:00
|
|
|
if kb.redirectSetCookie:
|
|
|
|
if "Cookie" in headers:
|
|
|
|
headers["Cookie"] = "%s; %s" % (headers["Cookie"], kb.redirectSetCookie)
|
|
|
|
else:
|
|
|
|
headers["Cookie"] = kb.redirectSetCookie
|
|
|
|
|
2008-10-15 19:38:22 +04:00
|
|
|
return headers
|
|
|
|
|
2008-11-17 03:00:54 +03:00
|
|
|
def parseResponse(page, headers):
|
2008-10-15 19:38:22 +04:00
|
|
|
"""
|
|
|
|
@param page: the page to parse to feed the knowledge base htmlFp
|
|
|
|
(back-end DBMS fingerprint based upon DBMS error messages return
|
|
|
|
through the web application) list and absFilePaths (absolute file
|
|
|
|
paths) set.
|
|
|
|
"""
|
|
|
|
|
2008-11-17 03:00:54 +03:00
|
|
|
if headers:
|
|
|
|
headersParser(headers)
|
2008-10-15 19:38:22 +04:00
|
|
|
|
2008-11-17 03:00:54 +03:00
|
|
|
if page:
|
|
|
|
htmlParser(page)
|
2008-10-15 19:38:22 +04:00
|
|
|
|
2008-11-17 03:00:54 +03:00
|
|
|
# Detect injectable page absolute system path
|
2011-01-07 18:41:09 +03:00
|
|
|
# NOTE: this regular expression works if the remote web
|
|
|
|
# application is written in PHP and debug/error messages are
|
|
|
|
# enabled
|
2010-05-21 18:42:59 +04:00
|
|
|
for regex in ( r" in <b>(?P<result>.*?)</b> on line", r"(?:>|\s)(?P<result>[A-Za-z]:[\\/][\w.\\/]*)", r"(?:>|\s)(?P<result>/\w[/\w.]+)" ):
|
|
|
|
regObj = getCompiledRegex(regex)
|
2011-01-07 18:41:09 +03:00
|
|
|
|
2010-05-21 18:42:59 +04:00
|
|
|
for match in regObj.finditer(page):
|
2010-01-05 14:30:33 +03:00
|
|
|
absFilePath = match.group("result").strip()
|
2010-02-03 18:06:41 +03:00
|
|
|
page = page.replace(absFilePath, "")
|
2010-04-23 20:34:20 +04:00
|
|
|
|
|
|
|
if isWindowsDriveLetterPath(absFilePath):
|
2010-02-04 17:37:00 +03:00
|
|
|
absFilePath = posixToNtSlashes(absFilePath)
|
2010-04-23 20:34:20 +04:00
|
|
|
|
2009-04-28 03:05:11 +04:00
|
|
|
if absFilePath not in kb.absFilePaths:
|
2010-02-03 19:40:12 +03:00
|
|
|
kb.absFilePaths.add(absFilePath)
|
2010-06-10 15:34:17 +04:00
|
|
|
|
2010-06-30 16:09:33 +04:00
|
|
|
def checkCharEncoding(encoding):
|
2010-07-15 12:44:42 +04:00
|
|
|
if encoding:
|
|
|
|
encoding = encoding.lower()
|
|
|
|
else:
|
|
|
|
return encoding
|
|
|
|
|
2010-11-07 19:23:03 +03:00
|
|
|
# http://www.destructor.de/charsets/index.htm
|
2011-03-31 18:39:01 +04:00
|
|
|
translate = { 'windows-874': 'iso-8859-11', 'en_us': 'utf8', 'macintosh': 'iso-8859-1' }
|
2010-07-15 12:44:42 +04:00
|
|
|
|
2010-11-02 21:01:10 +03:00
|
|
|
for delimiter in (';', ','):
|
|
|
|
if delimiter in encoding:
|
|
|
|
encoding = encoding[:encoding.find(delimiter)]
|
2010-10-14 19:28:54 +04:00
|
|
|
|
2011-03-24 12:27:19 +03:00
|
|
|
# popular typos/errors
|
|
|
|
if '8858' in encoding:
|
2011-04-04 22:20:09 +04:00
|
|
|
encoding = encoding.replace('8858', '8859') # iso-8858 -> iso-8859
|
2011-04-16 10:44:29 +04:00
|
|
|
elif '5889' in encoding:
|
|
|
|
encoding = encoding.replace('5889', '8859') # iso-5889 -> iso-8859
|
2011-04-04 22:20:09 +04:00
|
|
|
elif '2313' in encoding:
|
|
|
|
encoding = encoding.replace('2313', '2312') # gb2313 -> gb2312
|
2011-04-04 22:24:16 +04:00
|
|
|
|
|
|
|
# name adjustment for compatibility
|
2011-03-24 12:27:19 +03:00
|
|
|
if encoding.startswith('8859'):
|
|
|
|
encoding = 'iso-%s' % encoding
|
2010-09-09 18:03:45 +04:00
|
|
|
elif encoding.startswith('cp-'):
|
|
|
|
encoding = 'cp%s' % encoding[3:]
|
|
|
|
elif encoding.startswith('windows') and not encoding.startswith('windows-'):
|
|
|
|
encoding = 'windows-%s' % encoding[7:]
|
2011-05-01 02:41:54 +04:00
|
|
|
elif encoding.find('iso-88') > 0:
|
|
|
|
encoding = encoding[encoding.find('iso-88'):]
|
2011-04-04 22:24:16 +04:00
|
|
|
|
|
|
|
# http://philip.html5.org/data/charsets-2.html
|
|
|
|
if encoding in translate:
|
|
|
|
encoding = translate[encoding]
|
2010-11-17 12:57:32 +03:00
|
|
|
elif encoding == 'null':
|
|
|
|
return None
|
2010-10-14 19:28:54 +04:00
|
|
|
|
2011-03-21 19:43:46 +03:00
|
|
|
# http://www.iana.org/assignments/character-sets
|
2010-06-30 16:09:33 +04:00
|
|
|
try:
|
|
|
|
codecs.lookup(encoding)
|
|
|
|
except LookupError:
|
2011-04-30 17:20:05 +04:00
|
|
|
warnMsg = "unknown web page charset '%s'. " % encoding
|
2011-04-20 02:54:13 +04:00
|
|
|
warnMsg += "Please report by e-mail to %s." % ML
|
|
|
|
singleTimeLogMessage(warnMsg, logging.WARN, encoding)
|
2011-04-18 17:38:46 +04:00
|
|
|
encoding = None
|
2010-11-02 21:01:10 +03:00
|
|
|
|
2010-06-30 16:09:33 +04:00
|
|
|
return encoding
|
|
|
|
|
2011-04-18 17:38:46 +04:00
|
|
|
def getHeuristicCharEncoding(page):
|
|
|
|
"""
|
|
|
|
Returns page encoding charset detected by usage of heuristics
|
|
|
|
Reference: http://chardet.feedparser.org/docs/
|
|
|
|
"""
|
|
|
|
retVal = detect(page)['encoding']
|
|
|
|
|
2011-04-30 17:20:05 +04:00
|
|
|
infoMsg = "heuristics detected web page charset '%s'" % retVal
|
2011-04-21 13:31:35 +04:00
|
|
|
singleTimeLogMessage(infoMsg, logging.INFO, retVal)
|
2011-04-18 17:38:46 +04:00
|
|
|
|
|
|
|
return retVal
|
|
|
|
|
2010-06-09 18:40:36 +04:00
|
|
|
def decodePage(page, contentEncoding, contentType):
|
2010-01-02 05:02:12 +03:00
|
|
|
"""
|
2010-06-09 18:40:36 +04:00
|
|
|
Decode compressed/charset HTTP response
|
2010-01-02 05:02:12 +03:00
|
|
|
"""
|
|
|
|
|
2011-01-20 14:01:01 +03:00
|
|
|
if not page or (conf.nullConnection and len(page) < 2):
|
|
|
|
return getUnicode(page)
|
|
|
|
|
2010-06-09 18:40:36 +04:00
|
|
|
if isinstance(contentEncoding, basestring) and contentEncoding.lower() in ('gzip', 'x-gzip', 'deflate'):
|
|
|
|
if contentEncoding == 'deflate':
|
2010-01-02 05:02:12 +03:00
|
|
|
# http://stackoverflow.com/questions/1089662/python-inflate-and-deflate-implementations
|
|
|
|
data = StringIO.StringIO(zlib.decompress(page, -15))
|
|
|
|
else:
|
|
|
|
data = gzip.GzipFile('', 'rb', 9, StringIO.StringIO(page))
|
|
|
|
|
|
|
|
page = data.read()
|
2010-11-03 13:08:27 +03:00
|
|
|
|
2011-04-20 12:35:47 +04:00
|
|
|
httpCharset, metaCharset = None, None
|
2011-01-04 18:49:20 +03:00
|
|
|
|
2010-11-07 19:23:03 +03:00
|
|
|
# http://stackoverflow.com/questions/1020892/python-urllib2-read-to-unicode
|
2010-06-09 18:40:36 +04:00
|
|
|
if contentType and (contentType.find('charset=') != -1):
|
2011-04-20 12:35:47 +04:00
|
|
|
httpCharset = checkCharEncoding(contentType.split('charset=')[-1])
|
2010-11-07 19:23:03 +03:00
|
|
|
|
2011-04-20 12:35:47 +04:00
|
|
|
metaCharset = checkCharEncoding(extractRegexResult(META_CHARSET_REGEX, page, re.DOTALL | re.IGNORECASE))
|
|
|
|
|
2011-04-20 12:38:46 +04:00
|
|
|
if ((httpCharset or metaCharset) and not all([httpCharset, metaCharset]))\
|
2011-04-20 12:35:47 +04:00
|
|
|
or (httpCharset == metaCharset and all([httpCharset, metaCharset])):
|
|
|
|
kb.pageEncoding = httpCharset or metaCharset
|
|
|
|
else:
|
|
|
|
kb.pageEncoding = None
|
2011-01-04 15:56:55 +03:00
|
|
|
|
2011-03-07 13:15:41 +03:00
|
|
|
if contentType and any(map(lambda x: x in contentType.lower(), ('text/txt', 'text/raw', 'text/html', 'text/xml'))):
|
|
|
|
# can't do for all responses because we need to support binary files too
|
2011-04-20 12:35:47 +04:00
|
|
|
kb.pageEncoding = kb.pageEncoding or getHeuristicCharEncoding(page)
|
2011-01-28 01:00:34 +03:00
|
|
|
page = getUnicode(page, kb.pageEncoding)
|
|
|
|
|
|
|
|
return page
|
2010-12-25 13:16:20 +03:00
|
|
|
|
|
|
|
def processResponse(page, responseHeaders):
|
|
|
|
parseResponse(page, responseHeaders)
|
2011-01-07 18:41:09 +03:00
|
|
|
|
2010-12-25 13:16:20 +03:00
|
|
|
if conf.parseErrors:
|
|
|
|
msg = extractErrorMessage(page)
|
|
|
|
|
|
|
|
if msg:
|
|
|
|
logger.info("parsed error message: '%s'" % msg)
|
2011-01-07 18:41:09 +03:00
|
|
|
|
2010-12-25 13:16:20 +03:00
|
|
|
return page
|