2019-05-08 13:47:52 +03:00
#!/usr/bin/env python
2008-10-15 19:38:22 +04:00
"""
2020-01-01 15:25:15 +03:00
Copyright ( c ) 2006 - 2020 sqlmap developers ( http : / / sqlmap . org / )
2017-10-11 15:50:46 +03:00
See the file ' LICENSE ' for copying permission
2008-10-15 19:38:22 +04:00
"""
2010-06-30 16:09:33 +04:00
import codecs
2013-07-13 21:25:49 +04:00
import gzip
2019-03-26 16:37:01 +03:00
import io
2011-04-20 02:54:13 +04:00
import logging
2008-10-15 19:38:22 +04:00
import re
2012-09-12 13:50:38 +04:00
import struct
2013-07-13 21:25:49 +04:00
import zlib
2008-10-15 19:38:22 +04:00
2016-05-31 12:08:23 +03:00
from lib . core . common import Backend
2010-12-25 13:16:20 +03:00
from lib . core . common import extractErrorMessage
2011-01-04 18:49:20 +03:00
from lib . core . common import extractRegexResult
2019-03-29 04:28:16 +03:00
from lib . core . common import filterNone
2014-01-02 15:09:58 +04:00
from lib . core . common import getPublicTypeMembers
2019-01-22 03:20:27 +03:00
from lib . core . common import getSafeExString
2018-01-02 02:42:20 +03:00
from lib . core . common import isListLike
2015-10-29 18:52:17 +03:00
from lib . core . common import randomStr
2012-01-11 18:28:08 +04:00
from lib . core . common import readInput
2012-03-08 14:19:34 +04:00
from lib . core . common import resetCookieJar
2011-06-08 18:42:48 +04:00
from lib . core . common import singleTimeLogMessage
2012-09-11 16:58:52 +04:00
from lib . core . common import singleTimeWarnMessage
2018-01-02 02:42:20 +03:00
from lib . core . common import unArrayizeValue
2019-05-03 14:20:15 +03:00
from lib . core . convert import decodeHex
from lib . core . convert import getBytes
2019-05-27 14:09:13 +03:00
from lib . core . convert import getText
2019-05-06 01:54:21 +03:00
from lib . core . convert import getUnicode
2008-10-15 19:38:22 +04:00
from lib . core . data import conf
from lib . core . data import kb
2010-06-30 16:09:33 +04:00
from lib . core . data import logger
2016-09-09 12:06:38 +03:00
from lib . core . decorators import cachedmethod
2019-10-31 22:59:14 +03:00
from lib . core . decorators import lockedmethod
2019-07-03 11:56:05 +03:00
from lib . core . dicts import HTML_ENTITIES
2016-05-31 12:08:23 +03:00
from lib . core . enums import DBMS
2013-03-20 14:10:24 +04:00
from lib . core . enums import HTTP_HEADER
2012-01-11 18:28:08 +04:00
from lib . core . enums import PLACE
2012-12-06 17:14:19 +04:00
from lib . core . exception import SqlmapCompressionException
2014-12-03 12:06:21 +03:00
from lib . core . settings import BLOCKED_IP_REGEX
2012-01-11 18:28:08 +04:00
from lib . core . settings import DEFAULT_COOKIE_DELIMITER
2012-10-19 13:02:14 +04:00
from lib . core . settings import EVENTVALIDATION_REGEX
2019-05-28 15:12:35 +03:00
from lib . core . settings import IDENTYWAF_PARSE_LIMIT
2012-09-12 13:50:38 +04:00
from lib . core . settings import MAX_CONNECTION_TOTAL_SIZE
2011-01-04 18:49:20 +03:00
from lib . core . settings import META_CHARSET_REGEX
2011-11-22 16:18:24 +04:00
from lib . core . settings import PARSE_HEADERS_LIMIT
2016-05-31 12:08:23 +03:00
from lib . core . settings import SELECT_FROM_TABLE_REGEX
2019-11-11 14:24:42 +03:00
from lib . core . settings import UNICODE_ENCODING
2012-10-19 13:02:14 +04:00
from lib . core . settings import VIEWSTATE_REGEX
2008-11-17 03:00:54 +03:00
from lib . parse . headers import headersParser
2008-10-15 19:38:22 +04:00
from lib . parse . html import htmlParser
2019-04-19 12:24:34 +03:00
from thirdparty import six
2012-07-14 19:01:04 +04:00
from thirdparty . chardet import detect
2019-05-24 14:54:10 +03:00
from thirdparty . identywaf import identYwaf
2019-03-11 16:36:01 +03:00
from thirdparty . odict import OrderedDict
2019-05-15 11:57:22 +03:00
from thirdparty . six import unichr as _unichr
2019-05-24 14:54:10 +03:00
from thirdparty . six . moves import http_client as _http_client
2008-10-15 19:38:22 +04:00
2019-10-31 22:59:14 +03:00
@lockedmethod
2017-08-16 04:08:58 +03:00
def forgeHeaders ( items = None , base = None ) :
2008-10-15 19:38:22 +04:00
"""
2011-02-12 02:07:03 +03:00
Prepare HTTP Cookie , HTTP User - Agent and HTTP Referer headers to use when performing
2008-10-15 19:38:22 +04:00
the HTTP requests
"""
2012-01-30 13:17:22 +04:00
items = items or { }
2019-01-22 05:00:44 +03:00
for _ in list ( items . keys ( ) ) :
2012-01-30 13:17:22 +04:00
if items [ _ ] is None :
del items [ _ ]
2018-03-26 16:39:48 +03:00
headers = OrderedDict ( conf . httpHeaders if base is None else base )
2014-10-23 01:16:46 +04:00
headers . update ( items . items ( ) )
2008-10-15 19:38:22 +04:00
2014-01-02 15:09:58 +04:00
class _str ( str ) :
def capitalize ( self ) :
return _str ( self )
def title ( self ) :
return _str ( self )
_ = headers
2014-10-22 15:32:49 +04:00
headers = OrderedDict ( )
2014-01-02 15:09:58 +04:00
for key , value in _ . items ( ) :
success = False
2015-06-05 18:18:21 +03:00
for _ in headers :
if _ . upper ( ) == key . upper ( ) :
del headers [ _ ]
break
2014-01-02 15:09:58 +04:00
if key . upper ( ) not in ( _ . upper ( ) for _ in getPublicTypeMembers ( HTTP_HEADER , True ) ) :
try :
headers [ _str ( key ) ] = value # dirty hack for http://bugs.python.org/issue12455
except UnicodeEncodeError : # don't do the hack on non-ASCII header names (they have to be properly encoded later on)
pass
else :
success = True
if not success :
key = ' - ' . join ( _ . capitalize ( ) for _ in key . split ( ' - ' ) )
headers [ key ] = value
2012-10-07 22:28:24 +04:00
2012-01-11 18:28:08 +04:00
if conf . cj :
2013-03-20 14:10:24 +04:00
if HTTP_HEADER . COOKIE in headers :
2012-01-11 18:28:08 +04:00
for cookie in conf . cj :
2017-10-17 17:34:09 +03:00
if cookie . domain_specified and not ( conf . hostname or " " ) . endswith ( cookie . domain ) :
2013-04-12 21:20:33 +04:00
continue
2017-05-26 15:34:32 +03:00
if ( " %s = " % getUnicode ( cookie . name ) ) in getUnicode ( headers [ HTTP_HEADER . COOKIE ] ) :
2013-04-12 21:20:33 +04:00
if conf . loadCookies :
2019-03-29 04:28:16 +03:00
conf . httpHeaders = filterNone ( ( item if item [ 0 ] != HTTP_HEADER . COOKIE else None ) for item in conf . httpHeaders )
2013-04-12 21:20:33 +04:00
elif kb . mergeCookies is None :
2019-10-31 23:40:28 +03:00
message = " you provided a HTTP %s header value, while " % HTTP_HEADER . COOKIE
message + = " target URL provides its own cookies within "
message + = " HTTP %s header which intersect with yours. " % HTTP_HEADER . SET_COOKIE
2017-02-26 17:05:36 +03:00
message + = " Do you want to merge them in further requests? [Y/n] "
2017-04-18 16:48:05 +03:00
kb . mergeCookies = readInput ( message , default = ' Y ' , boolean = True )
2012-01-11 18:28:08 +04:00
2015-03-26 13:40:19 +03:00
if kb . mergeCookies and kb . injection . place != PLACE . COOKIE :
2018-03-13 15:45:42 +03:00
def _ ( value ) :
return re . sub ( r " (?i) \ b %s =[^ %s ]+ " % ( re . escape ( getUnicode ( cookie . name ) ) , conf . cookieDel or DEFAULT_COOKIE_DELIMITER ) , ( " %s = %s " % ( getUnicode ( cookie . name ) , getUnicode ( cookie . value ) ) ) . replace ( ' \\ ' , r ' \\ ' ) , value )
2014-10-28 15:44:55 +03:00
headers [ HTTP_HEADER . COOKIE ] = _ ( headers [ HTTP_HEADER . COOKIE ] )
2012-01-11 18:28:08 +04:00
if PLACE . COOKIE in conf . parameters :
conf . parameters [ PLACE . COOKIE ] = _ ( conf . parameters [ PLACE . COOKIE ] )
2012-07-13 12:28:03 +04:00
2013-03-20 14:10:24 +04:00
conf . httpHeaders = [ ( item [ 0 ] , item [ 1 ] if item [ 0 ] != HTTP_HEADER . COOKIE else _ ( item [ 1 ] ) ) for item in conf . httpHeaders ]
2012-01-11 18:28:08 +04:00
elif not kb . testMode :
2016-06-10 18:52:22 +03:00
headers [ HTTP_HEADER . COOKIE ] + = " %s %s = %s " % ( conf . cookieDel or DEFAULT_COOKIE_DELIMITER , getUnicode ( cookie . name ) , getUnicode ( cookie . value ) )
2012-01-11 18:28:08 +04:00
2015-11-06 13:19:55 +03:00
if kb . testMode and not any ( ( conf . csrfToken , conf . safeUrl ) ) :
2012-03-08 14:19:34 +04:00
resetCookieJar ( conf . cj )
2008-10-15 19:38:22 +04:00
return headers
2017-05-17 01:22:18 +03:00
def parseResponse ( page , headers , status = None ) :
2008-10-15 19:38:22 +04:00
"""
@param page : the page to parse to feed the knowledge base htmlFp
( back - end DBMS fingerprint based upon DBMS error messages return
through the web application ) list and absFilePaths ( absolute file
paths ) set .
"""
2008-11-17 03:00:54 +03:00
if headers :
headersParser ( headers )
2008-10-15 19:38:22 +04:00
2008-11-17 03:00:54 +03:00
if page :
2017-05-17 01:22:18 +03:00
htmlParser ( page if not status else " %s \n \n %s " % ( status , page ) )
2008-10-15 19:38:22 +04:00
2016-09-09 12:06:38 +03:00
@cachedmethod
2012-09-25 12:17:25 +04:00
def checkCharEncoding ( encoding , warn = True ) :
2013-03-13 22:42:22 +04:00
"""
Checks encoding name , repairs common misspellings and adjusts to
proper namings used in codecs module
>> > checkCharEncoding ( ' iso-8858 ' , False )
' iso8859-1 '
>> > checkCharEncoding ( ' en_us ' , False )
' utf8 '
"""
2019-05-02 01:45:44 +03:00
if isinstance ( encoding , six . binary_type ) :
encoding = getUnicode ( encoding )
2018-01-02 02:42:20 +03:00
if isListLike ( encoding ) :
encoding = unArrayizeValue ( encoding )
2010-07-15 12:44:42 +04:00
if encoding :
encoding = encoding . lower ( )
else :
2020-02-07 00:44:37 +03:00
return encoding
2010-07-15 12:44:42 +04:00
2013-01-10 16:18:44 +04:00
# Reference: http://www.destructor.de/charsets/index.htm
2018-03-13 15:45:42 +03:00
translate = { " windows-874 " : " iso-8859-11 " , " utf-8859-1 " : " utf8 " , " en_us " : " utf8 " , " macintosh " : " iso-8859-1 " , " euc_tw " : " big5_tw " , " th " : " tis-620 " , " unicode " : " utf8 " , " utc8 " : " utf8 " , " ebcdic " : " ebcdic-cp-be " , " iso-8859 " : " iso8859-1 " , " iso-8859-0 " : " iso8859-1 " , " ansi " : " ascii " , " gbk2312 " : " gbk " , " windows-31j " : " cp932 " , " en " : " us " }
2010-07-15 12:44:42 +04:00
2011-06-12 12:36:21 +04:00
for delimiter in ( ' ; ' , ' , ' , ' ( ' ) :
2010-11-02 21:01:10 +03:00
if delimiter in encoding :
2011-06-12 12:36:21 +04:00
encoding = encoding [ : encoding . find ( delimiter ) ] . strip ( )
2010-10-14 19:28:54 +04:00
2015-12-20 01:45:10 +03:00
encoding = encoding . replace ( " " " , " " )
2011-03-24 12:27:19 +03:00
# popular typos/errors
2012-02-06 13:48:44 +04:00
if " 8858 " in encoding :
2013-01-10 16:18:44 +04:00
encoding = encoding . replace ( " 8858 " , " 8859 " ) # iso-8858 -> iso-8859
2012-02-06 13:48:44 +04:00
elif " 8559 " in encoding :
2013-01-10 16:18:44 +04:00
encoding = encoding . replace ( " 8559 " , " 8859 " ) # iso-8559 -> iso-8859
2017-02-16 17:52:07 +03:00
elif " 8895 " in encoding :
encoding = encoding . replace ( " 8895 " , " 8859 " ) # iso-8895 -> iso-8859
2012-02-06 13:48:44 +04:00
elif " 5889 " in encoding :
2013-01-10 16:18:44 +04:00
encoding = encoding . replace ( " 5889 " , " 8859 " ) # iso-5889 -> iso-8859
2012-06-25 20:24:33 +04:00
elif " 5589 " in encoding :
2013-01-10 16:18:44 +04:00
encoding = encoding . replace ( " 5589 " , " 8859 " ) # iso-5589 -> iso-8859
2012-02-06 13:48:44 +04:00
elif " 2313 " in encoding :
2013-01-10 16:18:44 +04:00
encoding = encoding . replace ( " 2313 " , " 2312 " ) # gb2313 -> gb2312
2013-10-21 22:04:48 +04:00
elif encoding . startswith ( " x- " ) :
encoding = encoding [ len ( " x- " ) : ] # x-euc-kr -> euc-kr / x-mac-turkish -> mac-turkish
2013-04-07 13:02:43 +04:00
elif " windows-cp " in encoding :
encoding = encoding . replace ( " windows-cp " , " windows " ) # windows-cp-1254 -> windows-1254
2011-04-04 22:24:16 +04:00
# name adjustment for compatibility
2012-02-06 13:48:44 +04:00
if encoding . startswith ( " 8859 " ) :
encoding = " iso- %s " % encoding
elif encoding . startswith ( " cp- " ) :
encoding = " cp %s " % encoding [ 3 : ]
elif encoding . startswith ( " euc- " ) :
encoding = " euc_ %s " % encoding [ 4 : ]
elif encoding . startswith ( " windows " ) and not encoding . startswith ( " windows- " ) :
encoding = " windows- %s " % encoding [ 7 : ]
elif encoding . find ( " iso-88 " ) > 0 :
encoding = encoding [ encoding . find ( " iso-88 " ) : ]
elif encoding . startswith ( " is0- " ) :
encoding = " iso %s " % encoding [ 4 : ]
elif encoding . find ( " ascii " ) > 0 :
encoding = " ascii "
elif encoding . find ( " utf8 " ) > 0 :
encoding = " utf8 "
2015-12-20 01:45:10 +03:00
elif encoding . find ( " utf-8 " ) > 0 :
encoding = " utf-8 "
2011-04-04 22:24:16 +04:00
2013-01-10 16:18:44 +04:00
# Reference: http://philip.html5.org/data/charsets-2.html
2011-04-04 22:24:16 +04:00
if encoding in translate :
encoding = translate [ encoding ]
2017-05-15 01:34:13 +03:00
elif encoding in ( " null " , " {charset} " , " charset " , " * " ) or not re . search ( r " \ w " , encoding ) :
2020-02-07 00:44:37 +03:00
return None
2010-10-14 19:28:54 +04:00
2013-01-10 16:18:44 +04:00
# Reference: http://www.iana.org/assignments/character-sets
# Reference: http://docs.python.org/library/codecs.html
2010-06-30 16:09:33 +04:00
try :
2019-04-19 12:24:34 +03:00
codecs . lookup ( encoding )
except :
2011-04-18 17:38:46 +04:00
encoding = None
2010-11-02 21:01:10 +03:00
2015-10-29 18:52:17 +03:00
if encoding :
try :
2020-02-07 00:52:45 +03:00
six . text_type ( getBytes ( randomStr ( ) ) , encoding )
except :
2015-10-29 18:52:17 +03:00
if warn :
warnMsg = " invalid web page charset ' %s ' " % encoding
singleTimeLogMessage ( warnMsg , logging . WARN , encoding )
encoding = None
2010-06-30 16:09:33 +04:00
return encoding
2011-04-18 17:38:46 +04:00
def getHeuristicCharEncoding ( page ) :
"""
Returns page encoding charset detected by usage of heuristics
2019-05-09 17:22:18 +03:00
Reference : https : / / chardet . readthedocs . io / en / latest / usage . html
>> > getHeuristicCharEncoding ( b " <html></html> " )
' ascii '
2011-04-18 17:38:46 +04:00
"""
2016-09-09 12:06:38 +03:00
key = hash ( page )
retVal = kb . cache . encoding . get ( key ) or detect ( page ) [ " encoding " ]
kb . cache . encoding [ key ] = retVal
2011-04-18 17:38:46 +04:00
2019-11-11 14:24:42 +03:00
if retVal and retVal . lower ( ) . replace ( ' - ' , " " ) == UNICODE_ENCODING . lower ( ) . replace ( ' - ' , " " ) :
2013-04-30 20:16:32 +04:00
infoMsg = " heuristics detected web page charset ' %s ' " % retVal
singleTimeLogMessage ( infoMsg , logging . INFO , retVal )
2011-04-18 17:38:46 +04:00
return retVal
2019-11-13 00:51:11 +03:00
def decodePage ( page , contentEncoding , contentType , percentDecode = True ) :
2010-01-02 05:02:12 +03:00
"""
2010-06-09 18:40:36 +04:00
Decode compressed / charset HTTP response
2019-05-09 17:22:18 +03:00
>> > getText ( decodePage ( b " <html>foo&bar</html> " , None , " text/html; charset=utf-8 " ) )
' <html>foo&bar</html> '
2010-01-02 05:02:12 +03:00
"""
2011-01-20 14:01:01 +03:00
if not page or ( conf . nullConnection and len ( page ) < 2 ) :
return getUnicode ( page )
2019-03-28 15:53:54 +03:00
if hasattr ( contentEncoding , " lower " ) :
2017-11-09 14:24:58 +03:00
contentEncoding = contentEncoding . lower ( )
else :
contentEncoding = " "
2019-03-28 15:53:54 +03:00
if hasattr ( contentType , " lower " ) :
2017-11-09 14:24:58 +03:00
contentType = contentType . lower ( )
else :
contentType = " "
if contentEncoding in ( " gzip " , " x-gzip " , " deflate " ) :
2012-09-12 13:50:38 +04:00
if not kb . pageCompress :
return None
2011-07-06 09:44:47 +04:00
try :
2017-11-09 14:24:58 +03:00
if contentEncoding == " deflate " :
2019-03-26 16:37:01 +03:00
data = io . BytesIO ( zlib . decompress ( page , - 15 ) ) # Reference: http://stackoverflow.com/questions/1089662/python-inflate-and-deflate-implementations
2012-09-11 14:08:34 +04:00
else :
2019-03-26 16:37:01 +03:00
data = gzip . GzipFile ( " " , " rb " , 9 , io . BytesIO ( page ) )
2012-09-12 13:50:38 +04:00
size = struct . unpack ( " <l " , page [ - 4 : ] ) [ 0 ] # Reference: http://pydoc.org/get.cgi/usr/local/lib/python2.5/gzip.py
if size > MAX_CONNECTION_TOTAL_SIZE :
2013-01-04 02:20:55 +04:00
raise Exception ( " size too large " )
2012-09-11 14:08:34 +04:00
2011-07-06 09:44:47 +04:00
page = data . read ( )
2019-01-22 03:20:27 +03:00
except Exception as ex :
2016-05-27 17:43:01 +03:00
if " <html " not in page : # in some cases, invalid "Content-Encoding" appears for plain HTML (should be ignored)
errMsg = " detected invalid data for declared content "
2019-01-22 03:20:27 +03:00
errMsg + = " encoding ' %s ' ( ' %s ' ) " % ( contentEncoding , getSafeExString ( ex ) )
2016-05-27 17:43:01 +03:00
singleTimeLogMessage ( errMsg , logging . ERROR )
2012-09-11 16:58:52 +04:00
2016-05-27 17:43:01 +03:00
warnMsg = " turning off page compression "
singleTimeWarnMessage ( warnMsg )
2012-09-11 16:58:52 +04:00
2016-05-27 17:43:01 +03:00
kb . pageCompress = False
raise SqlmapCompressionException
2010-11-03 13:08:27 +03:00
2017-09-21 15:35:24 +03:00
if not conf . encoding :
2011-05-18 02:55:22 +04:00
httpCharset , metaCharset = None , None
2011-01-04 18:49:20 +03:00
2013-01-10 16:18:44 +04:00
# Reference: http://stackoverflow.com/questions/1020892/python-urllib2-read-to-unicode
2017-11-09 14:24:58 +03:00
if contentType . find ( " charset= " ) != - 1 :
2012-07-23 16:26:42 +04:00
httpCharset = checkCharEncoding ( contentType . split ( " charset= " ) [ - 1 ] )
2010-11-07 19:23:03 +03:00
2012-12-03 15:13:59 +04:00
metaCharset = c heckCharEncoding ( extractRegexResult ( META_CHARSET_REGEX , page ) )
2011-04-20 12:35:47 +04:00
2017-12-06 15:42:15 +03:00
if ( any ( ( httpCharset , metaCharset ) ) and not all ( ( httpCharset , metaCharset ) ) ) or ( httpCharset == metaCharset and all ( ( httpCharset , metaCharset ) ) ) :
2014-12-01 13:15:45 +03:00
kb . pageEncoding = httpCharset or metaCharset # Reference: http://bytes.com/topic/html-css/answers/154758-http-equiv-vs-true-header-has-precedence
2012-12-19 14:16:42 +04:00
debugMsg = " declared web page charset ' %s ' " % kb . pageEncoding
singleTimeLogMessage ( debugMsg , logging . DEBUG , debugMsg )
2011-05-18 02:55:22 +04:00
else :
kb . pageEncoding = None
2011-04-20 12:35:47 +04:00
else :
2017-09-21 15:35:24 +03:00
kb . pageEncoding = conf . encoding
2011-01-04 15:56:55 +03:00
2012-07-23 17:14:52 +04:00
# can't do for all responses because we need to support binary files too
2019-04-19 12:24:34 +03:00
if isinstance ( page , six . binary_type ) and " text/ " in contentType :
2019-11-18 13:40:07 +03:00
if not kb . disableHtmlDecoding :
# e.g. 	Ãëàâà
if b " &# " in page :
page = re . sub ( b " &#x([0-9a-f] { 1,2}); " , lambda _ : decodeHex ( _ . group ( 1 ) if len ( _ . group ( 1 ) ) == 2 else " 0 %s " % _ . group ( 1 ) ) , page )
page = re . sub ( b " &#( \\ d { 1,3}); " , lambda _ : six . int2byte ( int ( _ . group ( 1 ) ) ) if int ( _ . group ( 1 ) ) < 256 else _ . group ( 0 ) , page )
# e.g. %20%28%29
if percentDecode :
if b " % " in page :
2020-02-25 14:36:07 +03:00
page = re . sub ( b " % ([0-9a-f] {2} ) " , lambda _ : decodeHex ( _ . group ( 1 ) ) , page )
page = re . sub ( b " % ([0-9A-F] {2} ) " , lambda _ : decodeHex ( _ . group ( 1 ) ) , page ) # Note: %DeepSee_SQL in CACHE
2019-11-18 13:40:07 +03:00
# e.g. &
page = re . sub ( b " &([^;]+); " , lambda _ : six . int2byte ( HTML_ENTITIES [ getText ( _ . group ( 1 ) ) ] ) if HTML_ENTITIES . get ( getText ( _ . group ( 1 ) ) , 256 ) < 256 else _ . group ( 0 ) , page )
kb . pageEncoding = kb . pageEncoding or checkCharEncoding ( getHeuristicCharEncoding ( page ) )
if ( kb . pageEncoding or " " ) . lower ( ) == " utf-8-sig " :
kb . pageEncoding = " utf-8 "
if page and page . startswith ( " \xef \xbb \xbf " ) : # Reference: https://docs.python.org/2/library/codecs.html (Note: noticed problems when "utf-8-sig" is left to Python for handling)
page = page [ 3 : ]
page = getUnicode ( page , kb . pageEncoding )
# e.g. ’…™
if " &# " in page :
def _ ( match ) :
retVal = match . group ( 0 )
try :
retVal = _unichr ( int ( match . group ( 1 ) ) )
except ( ValueError , OverflowError ) :
pass
return retVal
page = re . sub ( r " &#( \ d+); " , _ , page )
# e.g. ζ
page = re . sub ( r " &([^;]+); " , lambda _ : _unichr ( HTML_ENTITIES [ _ . group ( 1 ) ] ) if HTML_ENTITIES . get ( _ . group ( 1 ) , 0 ) > 255 else _ . group ( 0 ) , page )
else :
page = getUnicode ( page , kb . pageEncoding )
2012-07-23 20:38:46 +04:00
2011-01-28 01:00:34 +03:00
return page
2010-12-25 13:16:20 +03:00
2019-05-24 14:54:10 +03:00
def processResponse ( page , responseHeaders , code = None , status = None ) :
2011-11-22 16:18:24 +04:00
kb . processResponseCounter + = 1
2014-12-03 12:06:21 +03:00
page = page or " "
2017-05-17 01:22:18 +03:00
parseResponse ( page , responseHeaders if kb . processResponseCounter < PARSE_HEADERS_LIMIT else None , status )
2011-01-07 18:41:09 +03:00
2016-05-31 12:08:23 +03:00
if not kb . tableFrom and Backend . getIdentifiedDbms ( ) in ( DBMS . ACCESS , ) :
kb . tableFrom = extractRegexResult ( SELECT_FROM_TABLE_REGEX , page )
2016-09-29 19:02:20 +03:00
else :
kb . tableFrom = None
2016-05-31 12:08:23 +03:00
2010-12-25 13:16:20 +03:00
if conf . parseErrors :
msg = extractErrorMessage ( page )
if msg :
2016-05-31 11:49:34 +03:00
logger . warning ( " parsed DBMS error message: ' %s ' " % msg . rstrip ( ' . ' ) )
2012-10-19 13:02:14 +04:00
2019-05-28 15:12:35 +03:00
if kb . processResponseCounter < IDENTYWAF_PARSE_LIMIT :
2019-05-27 14:15:45 +03:00
rawResponse = " %s %s %s \n %s \n %s " % ( _http_client . HTTPConnection . _http_vsn_str , code or " " , status or " " , getUnicode ( " " . join ( responseHeaders . headers if responseHeaders else [ ] ) ) , page )
2019-05-25 09:23:05 +03:00
2019-05-24 15:18:18 +03:00
identYwaf . non_blind . clear ( )
if identYwaf . non_blind_check ( rawResponse , silent = True ) :
for waf in identYwaf . non_blind :
if waf not in kb . identifiedWafs :
kb . identifiedWafs . add ( waf )
errMsg = " WAF/IPS identified as ' %s ' " % identYwaf . format_name ( waf )
singleTimeLogMessage ( errMsg , logging . CRITICAL )
2019-05-24 14:54:10 +03:00
2012-10-19 13:29:03 +04:00
if kb . originalPage is None :
for regex in ( EVENTVALIDATION_REGEX , VIEWSTATE_REGEX ) :
2012-10-29 13:48:49 +04:00
match = re . search ( regex , page )
2012-10-19 13:29:03 +04:00
if match and PLACE . POST in conf . parameters :
name , value = match . groups ( )
if PLACE . POST in conf . paramDict and name in conf . paramDict [ PLACE . POST ] :
if conf . paramDict [ PLACE . POST ] [ name ] in page :
continue
2016-09-06 16:03:17 +03:00
else :
msg = " do you want to automatically adjust the value of ' %s ' ? [y/N] " % name
2017-04-18 16:48:05 +03:00
if not readInput ( msg , default = ' N ' , boolean = True ) :
2016-09-06 16:03:17 +03:00
continue
2017-04-18 16:48:05 +03:00
2016-09-06 16:03:17 +03:00
conf . paramDict [ PLACE . POST ] [ name ] = value
2018-05-29 15:54:43 +03:00
conf . parameters [ PLACE . POST ] = re . sub ( r " (?i)( %s =)[^&]+ " % re . escape ( name ) , r " \ g<1> %s " % value . replace ( ' \\ ' , r ' \\ ' ) , conf . parameters [ PLACE . POST ] )
2014-12-03 12:06:21 +03:00
2017-06-07 13:55:14 +03:00
if not kb . browserVerification and re . search ( r " (?i)browser.?verification " , page or " " ) :
kb . browserVerification = True
warnMsg = " potential browser verification protection mechanism detected "
if re . search ( r " (?i)CloudFlare " , page ) :
warnMsg + = " (CloudFlare) "
singleTimeWarnMessage ( warnMsg )
2016-06-01 16:48:04 +03:00
if not kb . captchaDetected and re . search ( r " (?i)captcha " , page or " " ) :
for match in re . finditer ( r " (?si)<form.+?</form> " , page ) :
if re . search ( r " (?i)captcha " , match . group ( 0 ) ) :
kb . captchaDetected = True
break
2019-11-16 00:27:33 +03:00
if re . search ( r " <meta[^>]+ \ brefresh \ b[^>]+ \ bcaptcha \ b " , page ) :
kb . captchaDetected = True
if kb . captchaDetected :
warnMsg = " potential CAPTCHA protection mechanism detected "
if re . search ( r " (?i)<title>[^<]*CloudFlare " , page ) :
warnMsg + = " (CloudFlare) "
singleTimeWarnMessage ( warnMsg )
2014-12-03 12:06:21 +03:00
if re . search ( BLOCKED_IP_REGEX , page ) :
2016-06-26 02:46:49 +03:00
warnMsg = " it appears that you have been blocked by the target server "
singleTimeWarnMessage ( warnMsg )