From 5d5ebd49b617a383bfb2df0d92c883a77b0ada8f Mon Sep 17 00:00:00 2001 From: Miroslav Stampar Date: Fri, 21 May 2010 14:42:59 +0000 Subject: [PATCH] introducing regex caching mechanism --- lib/core/common.py | 10 ++++++++++ lib/parse/banner.py | 9 ++++++--- lib/request/basic.py | 9 ++++----- lib/utils/detection.py | 6 ++++-- 4 files changed, 24 insertions(+), 10 deletions(-) diff --git a/lib/core/common.py b/lib/core/common.py index 59c59acd4..c0e4507e5 100644 --- a/lib/core/common.py +++ b/lib/core/common.py @@ -70,6 +70,8 @@ from lib.core.settings import SQLITE_ALIASES from lib.core.settings import ACCESS_ALIASES from lib.core.settings import FIREBIRD_ALIASES +__compiledRegularExpressions = {} + def paramToDict(place, parameters=None): """ Split the parameters into names and values, check if these parameters @@ -1222,3 +1224,11 @@ def getGoodSamaritanCharsets(part, prevValue, originalCharset): return predictedCharset, otherCharset else: return None, originalTable + +def getCompiledRegex(regex): + if regex in __compiledRegularExpressions: + return __compiledRegularExpressions[regex] + else: + retVal = re.compile(regex) + __compiledRegularExpressions[regex] = retVal + return retVal diff --git a/lib/parse/banner.py b/lib/parse/banner.py index 1c8c20eb6..3672ce500 100644 --- a/lib/parse/banner.py +++ b/lib/parse/banner.py @@ -27,6 +27,7 @@ import re from xml.sax.handler import ContentHandler from lib.core.common import checkFile +from lib.core.common import getCompiledRegex from lib.core.common import parseXmlFile from lib.core.common import sanitizeStr from lib.core.data import kb @@ -76,7 +77,8 @@ class MSSQLBannerHandler(ContentHandler): def endElement(self, name): if name == "signature": for version in (self.__version, self.__versionAlt): - if version and re.search(" %s[\.\ ]+" % version, self.__banner): + regObj = getCompiledRegex(" %s[\.\ ]+" % version) + if version and regObj.search(self.__banner): self.__feedInfo("dbmsRelease", self.__release) self.__feedInfo("dbmsVersion", self.__version) self.__feedInfo("dbmsServicePack", self.__servicePack) @@ -89,8 +91,9 @@ class MSSQLBannerHandler(ContentHandler): elif name == "version": self.__inVersion = False self.__version = self.__version.replace(" ", "") - - match = re.search(r"\A(?P\d+)\.00\.(?P\d+)\Z", self.__version) + + regObj = getCompiledRegex(r"\A(?P\d+)\.00\.(?P\d+)\Z") + match = regObj.search(self.__version) self.__versionAlt = "%s.0.%s.0" % (match.group('major'), match.group('build')) if match else None elif name == "servicepack": diff --git a/lib/request/basic.py b/lib/request/basic.py index 4c94882d7..67cb912f1 100644 --- a/lib/request/basic.py +++ b/lib/request/basic.py @@ -28,6 +28,7 @@ import re import StringIO import zlib +from lib.core.common import getCompiledRegex from lib.core.common import isWindowsDriveLetterPath from lib.core.common import posixToNtSlashes from lib.core.common import urlEncodeCookieValues @@ -36,9 +37,6 @@ from lib.core.data import kb from lib.parse.headers import headersParser from lib.parse.html import htmlParser -__absFilePathsRegExp = ( r" in (?P.*?) on line", r"(?:>|\s)(?P[A-Za-z]:[\\/][\w.\\/]*)", r"(?:>|\s)(?P/\w[/\w.]+)" ) -__absFilePathsRegObj = [re.compile(absFilePathRegExp) for absFilePathRegExp in __absFilePathsRegExp] - def forgeHeaders(cookie, ua): """ Prepare HTTP Cookie and HTTP User-Agent headers to use when performing @@ -78,8 +76,9 @@ def parseResponse(page, headers): # NOTE: this regular expression works if the remote web application # is written in PHP and debug/error messages are enabled. - for reobj in __absFilePathsRegObj: - for match in reobj.finditer(page): + for regex in ( r" in (?P.*?) on line", r"(?:>|\s)(?P[A-Za-z]:[\\/][\w.\\/]*)", r"(?:>|\s)(?P/\w[/\w.]+)" ): + regObj = getCompiledRegex(regex) + for match in regObj.finditer(page): absFilePath = match.group("result").strip() page = page.replace(absFilePath, "") diff --git a/lib/utils/detection.py b/lib/utils/detection.py index 5aeee6837..a207aed08 100644 --- a/lib/utils/detection.py +++ b/lib/utils/detection.py @@ -24,6 +24,7 @@ Franklin St, Fifth Floor, Boston, MA 02110-1301 USA import re, sre_constants from xml.dom import minidom +from lib.core.common import getCompiledRegex from lib.core.data import paths from lib.core.data import logger @@ -50,12 +51,13 @@ def checkPayload(string): rules = [] for xmlrule in xmlrules.getElementsByTagName("filter"): try: - rule = re.compile(xmlrule.getElementsByTagName('rule')[0].childNodes[0].nodeValue) + rule = xmlrule.getElementsByTagName('rule')[0].childNodes[0].nodeValue desc = __adjustGrammar(xmlrule.getElementsByTagName('description')[0].childNodes[0].nodeValue) rules.append((rule, desc)) except sre_constants.error: #some issues with some regex expressions in Python 2.5 pass for rule, desc in rules: - if rule.search(string, re.IGNORECASE): + regObj = getCompiledRegex(rule) + if regObj.search(string, re.IGNORECASE): logger.warn("highly probable IDS/IPS detection: '%s'" % desc)