sqlmap/lib/parse/html.py

100 lines
3.0 KiB
Python
Raw Permalink Normal View History

2019-05-08 13:47:52 +03:00
#!/usr/bin/env python
2008-10-15 19:38:22 +04:00
"""
2023-01-03 01:24:59 +03:00
Copyright (c) 2006-2023 sqlmap developers (https://sqlmap.org/)
2017-10-11 15:50:46 +03:00
See the file 'LICENSE' for copying permission
2008-10-15 19:38:22 +04:00
"""
import re
from xml.sax.handler import ContentHandler
2018-06-01 11:21:59 +03:00
from lib.core.common import urldecode
2010-04-16 23:57:00 +04:00
from lib.core.common import parseXmlFile
from lib.core.data import kb
from lib.core.data import paths
2021-01-07 16:20:03 +03:00
from lib.core.settings import HEURISTIC_PAGE_SIZE_THRESHOLD
from lib.core.threads import getCurrentThreadData
2008-10-15 19:38:22 +04:00
class HTMLHandler(ContentHandler):
2008-10-15 19:38:22 +04:00
"""
This class defines methods to parse the input HTML page to
fingerprint the back-end database management system
"""
def __init__(self, page):
2011-01-15 15:53:40 +03:00
ContentHandler.__init__(self)
self._dbms = None
2016-04-08 16:30:25 +03:00
self._page = (page or "")
2020-01-31 23:51:02 +03:00
try:
self._lower_page = self._page.lower()
except SystemError: # https://bugs.python.org/issue18183
self._lower_page = None
2018-06-01 11:21:59 +03:00
self._urldecoded_page = urldecode(self._page)
2008-10-15 19:38:22 +04:00
2011-04-30 17:20:05 +04:00
self.dbms = None
2008-10-15 19:38:22 +04:00
def _markAsErrorPage(self):
threadData = getCurrentThreadData()
threadData.lastErrorPage = (threadData.lastRequestUID, self._page)
2008-10-15 19:38:22 +04:00
def startElement(self, name, attrs):
2016-04-08 16:30:25 +03:00
if self.dbms:
return
2008-10-15 19:38:22 +04:00
if name == "dbms":
self._dbms = attrs.get("value")
2008-10-15 19:38:22 +04:00
2011-11-16 20:06:21 +04:00
elif name == "error":
2016-04-08 16:30:25 +03:00
regexp = attrs.get("regexp")
if regexp not in kb.cache.regex:
2017-10-31 13:38:09 +03:00
keywords = re.findall(r"\w+", re.sub(r"\\.", " ", regexp))
2016-04-08 16:30:25 +03:00
keywords = sorted(keywords, key=len)
kb.cache.regex[regexp] = keywords[-1].lower()
2020-02-27 16:31:43 +03:00
if ('|' in regexp or kb.cache.regex[regexp] in (self._lower_page or kb.cache.regex[regexp])) and re.search(regexp, self._urldecoded_page, re.I):
self.dbms = self._dbms
self._markAsErrorPage()
2020-01-21 01:11:37 +03:00
kb.forkNote = kb.forkNote or attrs.get("fork")
2008-10-15 19:38:22 +04:00
2008-11-17 03:13:49 +03:00
def htmlParser(page):
2008-10-15 19:38:22 +04:00
"""
This function calls a class that parses the input HTML page to
fingerprint the back-end database management system
2020-01-03 15:46:12 +03:00
>>> from lib.core.enums import DBMS
>>> htmlParser("Warning: mysql_fetch_array() expects parameter 1 to be resource") == DBMS.MYSQL
True
>>> threadData = getCurrentThreadData()
>>> threadData.lastErrorPage = None
2008-10-15 19:38:22 +04:00
"""
2021-01-07 16:20:03 +03:00
page = page[:HEURISTIC_PAGE_SIZE_THRESHOLD]
2008-11-17 03:13:49 +03:00
xmlfile = paths.ERRORS_XML
handler = HTMLHandler(page)
2016-09-09 12:06:38 +03:00
key = hash(page)
# generic SQL warning/error messages
if re.search(r"SQL (warning|error|syntax)", page, re.I):
handler._markAsErrorPage()
2016-09-09 12:06:38 +03:00
if key in kb.cache.parsedDbms:
retVal = kb.cache.parsedDbms[key]
if retVal:
handler._markAsErrorPage()
return retVal
2011-01-23 14:21:27 +03:00
parseXmlFile(xmlfile, handler)
2008-10-15 19:38:22 +04:00
if handler.dbms and handler.dbms not in kb.htmlFp:
kb.lastParserStatus = handler.dbms
kb.htmlFp.append(handler.dbms)
else:
kb.lastParserStatus = None
2016-09-09 12:06:38 +03:00
kb.cache.parsedDbms[key] = handler.dbms
2008-10-15 19:38:22 +04:00
return handler.dbms