sqlmap/lib/parse/html.py
2022-01-03 11:30:34 +01:00

100 lines
3.0 KiB
Python

#!/usr/bin/env python
"""
Copyright (c) 2006-2022 sqlmap developers (https://sqlmap.org/)
See the file 'LICENSE' for copying permission
"""
import re
from xml.sax.handler import ContentHandler
from lib.core.common import urldecode
from lib.core.common import parseXmlFile
from lib.core.data import kb
from lib.core.data import paths
from lib.core.settings import HEURISTIC_PAGE_SIZE_THRESHOLD
from lib.core.threads import getCurrentThreadData
class HTMLHandler(ContentHandler):
"""
This class defines methods to parse the input HTML page to
fingerprint the back-end database management system
"""
def __init__(self, page):
ContentHandler.__init__(self)
self._dbms = None
self._page = (page or "")
try:
self._lower_page = self._page.lower()
except SystemError: # https://bugs.python.org/issue18183
self._lower_page = None
self._urldecoded_page = urldecode(self._page)
self.dbms = None
def _markAsErrorPage(self):
threadData = getCurrentThreadData()
threadData.lastErrorPage = (threadData.lastRequestUID, self._page)
def startElement(self, name, attrs):
if self.dbms:
return
if name == "dbms":
self._dbms = attrs.get("value")
elif name == "error":
regexp = attrs.get("regexp")
if regexp not in kb.cache.regex:
keywords = re.findall(r"\w+", re.sub(r"\\.", " ", regexp))
keywords = sorted(keywords, key=len)
kb.cache.regex[regexp] = keywords[-1].lower()
if ('|' in regexp or kb.cache.regex[regexp] in (self._lower_page or kb.cache.regex[regexp])) and re.search(regexp, self._urldecoded_page, re.I):
self.dbms = self._dbms
self._markAsErrorPage()
kb.forkNote = kb.forkNote or attrs.get("fork")
def htmlParser(page):
"""
This function calls a class that parses the input HTML page to
fingerprint the back-end database management system
>>> from lib.core.enums import DBMS
>>> htmlParser("Warning: mysql_fetch_array() expects parameter 1 to be resource") == DBMS.MYSQL
True
>>> threadData = getCurrentThreadData()
>>> threadData.lastErrorPage = None
"""
page = page[:HEURISTIC_PAGE_SIZE_THRESHOLD]
xmlfile = paths.ERRORS_XML
handler = HTMLHandler(page)
key = hash(page)
# generic SQL warning/error messages
if re.search(r"SQL (warning|error|syntax)", page, re.I):
handler._markAsErrorPage()
if key in kb.cache.parsedDbms:
retVal = kb.cache.parsedDbms[key]
if retVal:
handler._markAsErrorPage()
return retVal
parseXmlFile(xmlfile, handler)
if handler.dbms and handler.dbms not in kb.htmlFp:
kb.lastParserStatus = handler.dbms
kb.htmlFp.append(handler.dbms)
else:
kb.lastParserStatus = None
kb.cache.parsedDbms[key] = handler.dbms
return handler.dbms