Code refactoring (epecially Google search code)

2025-08-22 04:54:45 +03:00 · 2012-10-30 18:38:10 +01:00 · 2012-10-30 18:38:10 +01:00 · 2de52927f3
commit 2de52927f3
parent 76b793b199
5 changed files with 62 additions and 88 deletions
--- a/lib/controller/controller.py
+++ b/lib/controller/controller.py
@ -242,22 +242,22 @@ def start():
        return True

    if conf.url and not any((conf.forms, conf.crawlDepth)):
-        kb.targetUrls.add((conf.url, conf.method, conf.data, conf.cookie))
+        kb.targets.add((conf.url, conf.method, conf.data, conf.cookie))

-    if conf.configFile and not kb.targetUrls:
+    if conf.configFile and not kb.targets:
        errMsg = "you did not edit the configuration file properly, set "
        errMsg += "the target url, list of targets or google dork"
        logger.error(errMsg)
        return False

-    if kb.targetUrls and len(kb.targetUrls) > 1:
-        infoMsg = "sqlmap got a total of %d targets" % len(kb.targetUrls)
+    if kb.targets and len(kb.targets) > 1:
+        infoMsg = "sqlmap got a total of %d targets" % len(kb.targets)
        logger.info(infoMsg)

    hostCount = 0
    cookieStr = ""

-    for targetUrl, targetMethod, targetData, targetCookie in kb.targetUrls:
+    for targetUrl, targetMethod, targetData, targetCookie in kb.targets:
        try:
            conf.url = targetUrl
            conf.method = targetMethod
--- a/lib/core/common.py
+++ b/lib/core/common.py
@ -3093,7 +3093,7 @@ def findPageForms(content, url, raise_=False, addToTargets=False):

    if addToTargets and retVal:
        for target in retVal:
-            kb.targetUrls.add(target)
+            kb.targets.add(target)

    return retVal

--- a/lib/core/option.py
+++ b/lib/core/option.py
@ -85,37 +85,38 @@ from lib.core.log import FORMATTER
 from lib.core.log import LOGGER_HANDLER
 from lib.core.optiondict import optDict
 from lib.core.purge import purge
+from lib.core.settings import ACCESS_ALIASES
+from lib.core.settings import BURP_REQUEST_REGEX
 from lib.core.settings import CODECS_LIST_PAGE
 from lib.core.settings import CRAWL_EXCLUDE_EXTENSIONS
+from lib.core.settings import DB2_ALIASES
 from lib.core.settings import DEFAULT_GET_POST_DELIMITER
 from lib.core.settings import DEFAULT_PAGE_ENCODING
 from lib.core.settings import DEFAULT_TOR_HTTP_PORTS
 from lib.core.settings import DEFAULT_TOR_SOCKS_PORT
+from lib.core.settings import FIREBIRD_ALIASES
 from lib.core.settings import IS_WIN
-from lib.core.settings import NULL
-from lib.core.settings import PYVERSION
-from lib.core.settings import SITE
-from lib.core.settings import SUPPORTED_DBMS
-from lib.core.settings import SUPPORTED_OS
-from lib.core.settings import VERSION_STRING
+from lib.core.settings import LOCALHOST
+from lib.core.settings import MAXDB_ALIASES
+from lib.core.settings import MAX_NUMBER_OF_THREADS
 from lib.core.settings import MSSQL_ALIASES
 from lib.core.settings import MYSQL_ALIASES
-from lib.core.settings import PGSQL_ALIASES
+from lib.core.settings import NULL
 from lib.core.settings import ORACLE_ALIASES
-from lib.core.settings import SQLITE_ALIASES
-from lib.core.settings import ACCESS_ALIASES
-from lib.core.settings import FIREBIRD_ALIASES
-from lib.core.settings import MAXDB_ALIASES
-from lib.core.settings import SYBASE_ALIASES
-from lib.core.settings import DB2_ALIASES
-from lib.core.settings import BURP_REQUEST_REGEX
-from lib.core.settings import LOCALHOST
-from lib.core.settings import MAX_NUMBER_OF_THREADS
 from lib.core.settings import PARAMETER_SPLITTING_REGEX
+from lib.core.settings import PGSQL_ALIASES
+from lib.core.settings import PYVERSION
+from lib.core.settings import SITE
+from lib.core.settings import SQLITE_ALIASES
+from lib.core.settings import SUPPORTED_DBMS
+from lib.core.settings import SUPPORTED_OS
+from lib.core.settings import SYBASE_ALIASES
 from lib.core.settings import TIME_DELAY_CANDIDATES
 from lib.core.settings import UNENCODED_ORIGINAL_VALUE
 from lib.core.settings import UNION_CHAR_REGEX
 from lib.core.settings import UNKNOWN_DBMS_VERSION
+from lib.core.settings import URI_INJECTABLE_REGEX
+from lib.core.settings import VERSION_STRING
 from lib.core.settings import WEBSCARAB_SPLITTER
 from lib.core.threads import getCurrentThreadData
 from lib.core.update import update
@ -212,8 +213,8 @@ def __feedTargetsDict(reqFile, addedTargetUrls):
                continue

            if not(conf.scope and not re.search(conf.scope, url, re.I)):
-                if not kb.targetUrls or url not in addedTargetUrls:
-                    kb.targetUrls.add((url, method, None, cookie))
+                if not kb.targets or url not in addedTargetUrls:
+                    kb.targets.add((url, method, None, cookie))
                    addedTargetUrls.add(url)

    def __parseBurpLog(content):
@ -322,8 +323,8 @@ def __feedTargetsDict(reqFile, addedTargetUrls):
                    port = None

                if not(conf.scope and not re.search(conf.scope, url, re.I)):
-                    if not kb.targetUrls or url not in addedTargetUrls:
-                        kb.targetUrls.add((url, method, urldecode(data) if data and urlencode(DEFAULT_GET_POST_DELIMITER, None) not in data else data, cookie))
+                    if not kb.targets or url not in addedTargetUrls:
+                        kb.targets.add((url, method, urldecode(data) if data and urlencode(DEFAULT_GET_POST_DELIMITER, None) not in data else data, cookie))
                        addedTargetUrls.add(url)

    fp = openFile(reqFile, "rb")
@ -374,7 +375,7 @@ def __setMultipleTargets():
    mode.
    """

-    initialTargetsCount = len(kb.targetUrls)
+    initialTargetsCount = len(kb.targets)
    addedTargetUrls = set()

    if not conf.logFile:
@ -405,7 +406,7 @@ def __setMultipleTargets():
        errMsg += "nor a directory"
        raise sqlmapFilePathException, errMsg

-    updatedTargetsCount = len(kb.targetUrls)
+    updatedTargetsCount = len(kb.targets)

    if updatedTargetsCount > initialTargetsCount:
        infoMsg = "sqlmap parsed %d " % (updatedTargetsCount - initialTargetsCount)
@ -493,37 +494,48 @@ def __setGoogleDorking():
            handlers.append(keepAliveHandler)

    googleObj = Google(handlers)
-    googleObj.getCookie()
+    kb.data.onlyGETs = None

-    def search():
-        matches = googleObj.search(conf.googleDork)
+    def retrieve():
+        links = googleObj.search(conf.googleDork)

-        if not matches:
+        if not links:
            errMsg = "unable to find results for your "
            errMsg += "Google dork expression"
            raise sqlmapGenericException, errMsg

-        googleObj.getTargetUrls()
-        return matches
+        for link in links:
+            link = urldecode(link)
+            if re.search(r"(.*?)\?(.+)", link):
+                kb.targets.add((link, conf.method, conf.data, conf.cookie))
+            elif re.search(URI_INJECTABLE_REGEX, link, re.I):
+                if kb.data.onlyGETs is None and conf.data is None:
+                    message = "do you want to scan only results containing GET parameters? [Y/n] "
+                    test = readInput(message, default="Y")
+                    kb.data.onlyGETs = test.lower() != 'n'
+                if not kb.data.onlyGETs:
+                    kb.targets.add((link, conf.method, conf.data, conf.cookie))
+
+        return links

    while True:
-        matches = search()
+        links = retrieve()

-        if kb.targetUrls:
-            infoMsg = "sqlmap got %d results for your " % len(matches)
+        if kb.targets:
+            infoMsg = "sqlmap got %d results for your " % len(links)
            infoMsg += "Google dork expression, "

-            if len(matches) == len(kb.targetUrls):
+            if len(links) == len(kb.targets):
                infoMsg += "all "
            else:
-                infoMsg += "%d " % len(kb.targetUrls)
+                infoMsg += "%d " % len(kb.targets)

            infoMsg += "of them are testable targets"
            logger.info(infoMsg)
            break

        else:
-            message = "sqlmap got %d results " % len(matches)
+            message = "sqlmap got %d results " % len(links)
            message += "for your Google dork expression, but none of them "
            message += "have GET parameters to test for SQL injection. "
            message += "Do you want to skip to the next result page? [Y/n]"
@ -550,7 +562,7 @@ def __setBulkMultipleTargets():

    for line in getFileItems(conf.bulkFile):
        if re.search(r"[^ ]+\?(.+)", line, re.I):
-            kb.targetUrls.add((line.strip(), None, None, None))
+            kb.targets.add((line.strip(), None, None, None))

 def __findPageForms():
    if not conf.forms or conf.crawlDepth:
@ -1571,9 +1583,8 @@ def __setKnowledgeBaseAttributes(flushAll=True):
        kb.headerPaths = {}
        kb.keywords = set(getFileItems(paths.SQL_KEYWORDS))
        kb.passwordMgr = None
-        kb.scanOnlyGoogleGETs = None
        kb.tamperFunctions = []
-        kb.targetUrls = oset()
+        kb.targets = oset()
        kb.testedParams = set()
        kb.userAgents = None
        kb.vainRun = True
--- a/lib/utils/crawler.py
+++ b/lib/utils/crawler.py
@ -126,4 +126,4 @@ class Crawler:
                logger.warn(warnMsg)
            else:
                for url in threadData.shared.outputs:
-                    kb.targetUrls.add(( url, None, None, None ))
+                    kb.targets.add(( url, None, None, None ))
--- a/lib/utils/google.py
+++ b/lib/utils/google.py
@ -33,7 +33,6 @@ class Google:
    """

    def __init__(self, handlers):
-        self._matches = []
        self._cj = cookielib.CookieJar()

        handlers.append(urllib2.HTTPCookieProcessor(self._cj))
@ -41,52 +40,16 @@ class Google:
        self.opener = urllib2.build_opener(*handlers)
        self.opener.addheaders = conf.httpHeaders

-    def _parsePage(self, page):
-        """
-        Parse Google dork search results page to get the list of
-        HTTP addresses
-        """
-
-        retVal = [urllib.unquote(match.group(1)) for match in re.finditer(GOOGLE_REGEX, page, re.I | re.S)]
-
-        return retVal
-
-    def getTargetUrls(self):
-        """
-        This method returns the list of hosts with parameters out of
-        your Google dork search results
-        """
-
-        for _ in self._matches:
-            _ = urldecode(_)
-            if re.search(r"(.*?)\?(.+)", _):
-                kb.targetUrls.add((_, conf.method, conf.data, conf.cookie))
-            elif re.search(URI_INJECTABLE_REGEX, _, re.I):
-                if kb.scanOnlyGoogleGETs is None:
-                    message = "do you want to scan only results containing GET parameters? [Y/n] "
-                    test = readInput(message, default="Y")
-                    kb.scanOnlyGoogleGETs = test.lower() != 'n'
-                if not kb.scanOnlyGoogleGETs:
-                    kb.targetUrls.add((_, conf.method, conf.data, conf.cookie))
-
-    def getCookie(self):
-        """
-        This method is the first to be called when initializing a
-        Google dorking object through this library. It is used to
-        retrieve the Google session cookie needed to perform the
-        further search
-        """
-
        try:
            conn = self.opener.open("http://www.google.com/ncr")
-            _ = conn.info()
+            _ = conn.info()  # retrieve session cookie
        except urllib2.HTTPError, e:
            _ = e.info()
        except urllib2.URLError:
            errMsg = "unable to connect to Google"
            raise sqlmapConnectionException, errMsg

-    def search(self, googleDork):
+    def search(self, dork):
        """
        This method performs the effective search on Google providing
        the google dork and the Google session cookie
@ -95,11 +58,11 @@ class Google:
        gpage = conf.googlePage if conf.googlePage > 1 else 1
        logger.info("using Google result page #%d" % gpage)

-        if not googleDork:
+        if not dork:
            return None

        url = "http://www.google.com/search?"
-        url += "q=%s&" % urlencode(googleDork, convall=True)
+        url += "q=%s&" % urlencode(dork, convall=True)
        url += "num=100&hl=en&complete=0&safe=off&filter=0&btnG=Search"
        url += "&start=%d" % ((gpage-1) * 100)

@ -136,11 +99,11 @@ class Google:
            errMsg = "unable to connect to Google"
            raise sqlmapConnectionException, errMsg

-        self._matches = self._parsePage(page)
+        retVal = [urllib.unquote(match.group(1)) for match in re.finditer(GOOGLE_REGEX, page, re.I | re.S)]

-        if not self._matches and "detected unusual traffic" in page:
+        if not retVal and "detected unusual traffic" in page:
            warnMsg = "Google has detected 'unusual' traffic from "
            warnMsg += "this computer disabling further searches"
            raise sqlmapGenericException, warnMsg

-        return self._matches
+        return retVal