sqlmap/lib/request/basic.py

206 lines
7.0 KiB
Python
Raw Normal View History

2008-10-15 19:38:22 +04:00
#!/usr/bin/env python
"""
2008-10-15 19:56:32 +04:00
$Id$
2008-10-15 19:38:22 +04:00
2011-07-08 00:10:03 +04:00
Copyright (c) 2006-2011 sqlmap developers (http://www.sqlmap.org/)
2010-10-15 03:18:29 +04:00
See the file 'doc/COPYING' for copying permission
2008-10-15 19:38:22 +04:00
"""
2010-06-30 16:09:33 +04:00
import codecs
import gzip
2011-04-20 02:54:13 +04:00
import logging
import os
2008-10-15 19:38:22 +04:00
import re
import StringIO
import zlib
2008-10-15 19:38:22 +04:00
from extra.chardet import detect
from lib.core.common import extractErrorMessage
from lib.core.common import extractRegexResult
from lib.core.common import getUnicode
from lib.core.common import isWindowsDriveLetterPath
from lib.core.common import posixToNtSlashes
from lib.core.common import sanitizeAsciiString
2011-06-08 18:42:48 +04:00
from lib.core.common import singleTimeLogMessage
2008-10-15 19:38:22 +04:00
from lib.core.data import conf
from lib.core.data import kb
2010-06-30 16:09:33 +04:00
from lib.core.data import logger
2011-11-29 23:17:07 +04:00
from lib.core.enums import HTTPHEADER
from lib.core.exception import sqlmapDataException
2011-04-01 20:40:28 +04:00
from lib.core.settings import ML
from lib.core.settings import META_CHARSET_REGEX
2011-11-22 16:18:24 +04:00
from lib.core.settings import PARSE_HEADERS_LIMIT
2011-01-30 14:36:03 +03:00
from lib.core.settings import UNICODE_ENCODING
from lib.parse.headers import headersParser
2008-10-15 19:38:22 +04:00
from lib.parse.html import htmlParser
2011-02-12 02:07:03 +03:00
def forgeHeaders(cookie, ua, referer):
2008-10-15 19:38:22 +04:00
"""
2011-02-12 02:07:03 +03:00
Prepare HTTP Cookie, HTTP User-Agent and HTTP Referer headers to use when performing
2008-10-15 19:38:22 +04:00
the HTTP requests
"""
headers = {}
for header, value in conf.httpHeaders:
2011-11-29 23:17:07 +04:00
if cookie and header == HTTPHEADER.COOKIE:
2008-10-15 19:38:22 +04:00
headers[header] = cookie
2011-11-29 23:17:07 +04:00
elif ua and header == HTTPHEADER.USER_AGENT:
2008-10-15 19:38:22 +04:00
headers[header] = ua
2011-11-29 23:17:07 +04:00
elif referer and header == HTTPHEADER.REFERER:
2011-02-12 02:07:03 +03:00
headers[header] = referer
2008-10-15 19:38:22 +04:00
else:
headers[header] = value
if kb.redirectSetCookie and not conf.dropSetCookie:
2011-11-29 23:17:07 +04:00
if HTTPHEADER.COOKIE in headers:
headers[HTTPHEADER.COOKIE] = "%s; %s" % (headers[HTTPHEADER.COOKIE], kb.redirectSetCookie)
else:
2011-11-29 23:17:07 +04:00
headers[HTTPHEADER.COOKIE] = kb.redirectSetCookie
2008-10-15 19:38:22 +04:00
return headers
def parseResponse(page, headers):
2008-10-15 19:38:22 +04:00
"""
@param page: the page to parse to feed the knowledge base htmlFp
(back-end DBMS fingerprint based upon DBMS error messages return
through the web application) list and absFilePaths (absolute file
paths) set.
"""
if headers:
headersParser(headers)
2008-10-15 19:38:22 +04:00
if page:
htmlParser(page)
2008-10-15 19:38:22 +04:00
2010-06-30 16:09:33 +04:00
def checkCharEncoding(encoding):
if encoding:
encoding = encoding.lower()
else:
return encoding
2010-11-07 19:23:03 +03:00
# http://www.destructor.de/charsets/index.htm
2011-08-02 12:12:43 +04:00
translate = { 'windows-874': 'iso-8859-11', 'en_us': 'utf8', 'macintosh': 'iso-8859-1', 'euc_tw': 'big5_tw', 'th': 'tis-620' }
for delimiter in (';', ',', '('):
if delimiter in encoding:
encoding = encoding[:encoding.find(delimiter)].strip()
2010-10-14 19:28:54 +04:00
# popular typos/errors
if '8858' in encoding:
encoding = encoding.replace('8858', '8859') # iso-8858 -> iso-8859
2011-07-10 19:41:45 +04:00
elif '8559' in encoding:
encoding = encoding.replace('8559', '8859') # iso-8559 -> iso-8859
elif '5889' in encoding:
encoding = encoding.replace('5889', '8859') # iso-5889 -> iso-8859
elif '2313' in encoding:
encoding = encoding.replace('2313', '2312') # gb2313 -> gb2312
elif 'x-euc' in encoding:
encoding = encoding.replace('x-euc', 'euc') # x-euc-kr -> euc-kr
2011-04-04 22:24:16 +04:00
# name adjustment for compatibility
if encoding.startswith('8859'):
encoding = 'iso-%s' % encoding
elif encoding.startswith('cp-'):
encoding = 'cp%s' % encoding[3:]
elif encoding.startswith('euc-'):
encoding = 'euc_%s' % encoding[4:]
elif encoding.startswith('windows') and not encoding.startswith('windows-'):
encoding = 'windows-%s' % encoding[7:]
elif encoding.find('iso-88') > 0:
encoding = encoding[encoding.find('iso-88'):]
2011-05-02 16:37:54 +04:00
elif encoding.startswith('is0-'):
encoding = 'iso%s' % encoding[4:]
2011-07-12 00:33:16 +04:00
elif encoding.find('ascii') > 0:
encoding = 'ascii'
2011-04-04 22:24:16 +04:00
# http://philip.html5.org/data/charsets-2.html
if encoding in translate:
encoding = translate[encoding]
2011-12-15 13:28:58 +04:00
elif encoding in ('null', '{charset}', '*'):
return None
2010-10-14 19:28:54 +04:00
# http://www.iana.org/assignments/character-sets
# http://docs.python.org/library/codecs.html
2010-06-30 16:09:33 +04:00
try:
codecs.lookup(encoding)
except LookupError:
2011-04-30 17:20:05 +04:00
warnMsg = "unknown web page charset '%s'. " % encoding
2011-04-20 02:54:13 +04:00
warnMsg += "Please report by e-mail to %s." % ML
2011-06-08 18:42:48 +04:00
singleTimeLogMessage(warnMsg, logging.WARN, encoding)
encoding = None
2010-06-30 16:09:33 +04:00
return encoding
def getHeuristicCharEncoding(page):
"""
Returns page encoding charset detected by usage of heuristics
Reference: http://chardet.feedparser.org/docs/
"""
retVal = detect(page)['encoding']
2011-04-30 17:20:05 +04:00
infoMsg = "heuristics detected web page charset '%s'" % retVal
2011-06-08 18:42:48 +04:00
singleTimeLogMessage(infoMsg, logging.INFO, retVal)
return retVal
def decodePage(page, contentEncoding, contentType):
"""
Decode compressed/charset HTTP response
"""
if not page or (conf.nullConnection and len(page) < 2):
return getUnicode(page)
if isinstance(contentEncoding, basestring) and contentEncoding.lower() in ('gzip', 'x-gzip', 'deflate'):
if contentEncoding == 'deflate':
# http://stackoverflow.com/questions/1089662/python-inflate-and-deflate-implementations
data = StringIO.StringIO(zlib.decompress(page, -15))
else:
data = gzip.GzipFile('', 'rb', 9, StringIO.StringIO(page))
try:
page = data.read()
except Exception, msg:
errMsg = "detected invalid data for declared content "
errMsg += "encoding '%s' ('%s')" % (contentEncoding, msg)
singleTimeLogMessage(errMsg, logging.ERROR)
if not conf.charset:
httpCharset, metaCharset = None, None
# http://stackoverflow.com/questions/1020892/python-urllib2-read-to-unicode
if contentType and (contentType.find('charset=') != -1):
httpCharset = checkCharEncoding(contentType.split('charset=')[-1])
2010-11-07 19:23:03 +03:00
metaCharset = checkCharEncoding(extractRegexResult(META_CHARSET_REGEX, page, re.DOTALL | re.IGNORECASE))
if ((httpCharset or metaCharset) and not all([httpCharset, metaCharset]))\
or (httpCharset == metaCharset and all([httpCharset, metaCharset])):
kb.pageEncoding = httpCharset or metaCharset
else:
kb.pageEncoding = None
else:
kb.pageEncoding = conf.charset
2011-03-07 13:15:41 +03:00
if contentType and any(map(lambda x: x in contentType.lower(), ('text/txt', 'text/raw', 'text/html', 'text/xml'))):
# can't do for all responses because we need to support binary files too
kb.pageEncoding = kb.pageEncoding or checkCharEncoding(getHeuristicCharEncoding(page))
2011-01-28 01:00:34 +03:00
page = getUnicode(page, kb.pageEncoding)
return page
def processResponse(page, responseHeaders):
2011-11-22 16:18:24 +04:00
kb.processResponseCounter += 1
if not kb.dumpMode:
2011-11-22 16:18:24 +04:00
parseResponse(page, responseHeaders if kb.processResponseCounter < PARSE_HEADERS_LIMIT else None)
2011-01-07 18:41:09 +03:00
if conf.parseErrors:
msg = extractErrorMessage(page)
if msg:
logger.info("parsed error message: '%s'" % msg)