sqlmap/lib/request/basic.py

128 lines
3.9 KiB
Python
Raw Normal View History

2008-10-15 19:38:22 +04:00
#!/usr/bin/env python
"""
2008-10-15 19:56:32 +04:00
$Id$
2008-10-15 19:38:22 +04:00
Copyright (c) 2006-2010 sqlmap developers (http://sqlmap.sourceforge.net/)
2010-10-15 03:18:29 +04:00
See the file 'doc/COPYING' for copying permission
2008-10-15 19:38:22 +04:00
"""
2010-06-30 16:09:33 +04:00
import codecs
import gzip
import os
2008-10-15 19:38:22 +04:00
import re
import StringIO
import zlib
2008-10-15 19:38:22 +04:00
2010-05-21 18:42:59 +04:00
from lib.core.common import getCompiledRegex
from lib.core.common import getUnicode
from lib.core.common import isWindowsDriveLetterPath
from lib.core.common import posixToNtSlashes
2008-10-15 19:38:22 +04:00
from lib.core.data import conf
from lib.core.data import kb
2010-06-30 16:09:33 +04:00
from lib.core.data import logger
from lib.parse.headers import headersParser
2008-10-15 19:38:22 +04:00
from lib.parse.html import htmlParser
def forgeHeaders(cookie, ua):
"""
Prepare HTTP Cookie and HTTP User-Agent headers to use when performing
the HTTP requests
"""
headers = {}
for header, value in conf.httpHeaders:
2010-11-08 16:26:45 +03:00
if cookie and header == "Cookie":
2008-10-15 19:38:22 +04:00
headers[header] = cookie
2010-11-08 16:26:45 +03:00
elif ua and header == "User-Agent":
2008-10-15 19:38:22 +04:00
headers[header] = ua
else:
headers[header] = value
return headers
def parseResponse(page, headers):
2008-10-15 19:38:22 +04:00
"""
@param page: the page to parse to feed the knowledge base htmlFp
(back-end DBMS fingerprint based upon DBMS error messages return
through the web application) list and absFilePaths (absolute file
paths) set.
"""
if headers:
headersParser(headers)
2008-10-15 19:38:22 +04:00
if page:
htmlParser(page)
2008-10-15 19:38:22 +04:00
# Detect injectable page absolute system path
# NOTE: this regular expression works if the remote web application
# is written in PHP and debug/error messages are enabled.
2008-10-15 19:38:22 +04:00
2010-05-21 18:42:59 +04:00
for regex in ( r" in <b>(?P<result>.*?)</b> on line", r"(?:>|\s)(?P<result>[A-Za-z]:[\\/][\w.\\/]*)", r"(?:>|\s)(?P<result>/\w[/\w.]+)" ):
regObj = getCompiledRegex(regex)
for match in regObj.finditer(page):
2010-01-05 14:30:33 +03:00
absFilePath = match.group("result").strip()
page = page.replace(absFilePath, "")
if isWindowsDriveLetterPath(absFilePath):
absFilePath = posixToNtSlashes(absFilePath)
if absFilePath not in kb.absFilePaths:
kb.absFilePaths.add(absFilePath)
2010-06-10 15:34:17 +04:00
2010-06-30 16:09:33 +04:00
def checkCharEncoding(encoding):
if encoding:
encoding = encoding.lower()
else:
return encoding
2010-11-07 19:23:03 +03:00
# http://www.destructor.de/charsets/index.htm
translate = { 'windows-874': 'iso-8859-11', 'en_us': 'utf8' }
for delimiter in (';', ','):
if delimiter in encoding:
encoding = encoding[:encoding.find(delimiter)]
2010-10-14 19:28:54 +04:00
# http://philip.html5.org/data/charsets-2.html
if encoding in translate:
encoding = translate[encoding]
elif encoding.startswith('cp-'):
encoding = 'cp%s' % encoding[3:]
elif encoding.startswith('windows') and not encoding.startswith('windows-'):
encoding = 'windows-%s' % encoding[7:]
2010-10-14 19:28:54 +04:00
2010-06-30 16:09:33 +04:00
try:
codecs.lookup(encoding)
except LookupError:
warnMsg = "unknown charset '%s'. " % encoding
2010-10-14 19:28:54 +04:00
warnMsg += "Please report by e-mail to sqlmap-users@lists.sourceforge.net."
2010-06-30 16:09:33 +04:00
logger.warn(warnMsg)
encoding = conf.dataEncoding
2010-06-30 16:09:33 +04:00
return encoding
def decodePage(page, contentEncoding, contentType):
"""
Decode compressed/charset HTTP response
"""
if isinstance(contentEncoding, basestring) and contentEncoding.lower() in ('gzip', 'x-gzip', 'deflate'):
if contentEncoding == 'deflate':
# http://stackoverflow.com/questions/1089662/python-inflate-and-deflate-implementations
data = StringIO.StringIO(zlib.decompress(page, -15))
else:
data = gzip.GzipFile('', 'rb', 9, StringIO.StringIO(page))
page = data.read()
2010-11-07 19:23:03 +03:00
# http://stackoverflow.com/questions/1020892/python-urllib2-read-to-unicode
if contentType and (contentType.find('charset=') != -1):
2010-06-30 16:09:33 +04:00
charset = checkCharEncoding(contentType.split('charset=')[-1])
2010-11-07 19:23:03 +03:00
2010-06-30 16:09:33 +04:00
if charset:
page = getUnicode(page, charset)
return page