Patch for an Issue #169

2025-07-13 01:32:25 +03:00 · 2013-01-09 15:22:21 +01:00 · 2013-01-09 15:22:21 +01:00 · 3d4f381ab5
commit 3d4f381ab5
parent 55a552ddc4
2 changed files with 110 additions and 113 deletions
--- a/lib/core/option.py
+++ b/lib/core/option.py
@ -134,7 +134,7 @@ from lib.request.httpshandler import HTTPSHandler
 from lib.request.rangehandler import HTTPRangeHandler
 from lib.request.redirecthandler import SmartRedirectHandler
 from lib.request.templates import getPageTemplate
-from lib.utils.crawler import Crawler
+from lib.utils.crawler import crawl
 from lib.utils.deps import checkDependencies
 from lib.utils.google import Google
 from thirdparty.colorama.initialise import init as coloramainit
@ -461,8 +461,7 @@ def _setCrawler():
    if not conf.crawlDepth:
        return
-    crawler = Crawler()
+    crawl(conf.url)
    crawler.getTargetUrls()
 def _setGoogleDorking():
    """
@ -570,15 +569,19 @@ def _findPageForms():
    if not conf.forms or conf.crawlDepth:
        return
-    if not checkConnection():
+    if conf.url and not checkConnection():
        return
    infoMsg = "searching for forms"
    logger.info(infoMsg)
-    page, _ = Request.queryPage(content=True)
+    if not conf.bulkFile:
-
+        page, _ = Request.queryPage(content=True)
-    findPageForms(page, conf.url, True, True)
+        findPageForms(page, conf.url, True, True)
    else:
        for target, _, _, _ in kb.targets[:]:
            page, _, _= Request.getPage(url=target, crawling=True, raise404=False)
            findPageForms(page, target, False, True)
 def _setDBMSAuthentication():
    """
@ -1961,8 +1964,8 @@ def _basicOptionValidation():
        errMsg = "maximum number of used threads is %d avoiding possible connection issues" % MAX_NUMBER_OF_THREADS
        raise SqlmapSyntaxException(errMsg)
-    if conf.forms and not conf.url:
+    if conf.forms and not any ((conf.url, conf.bulkFile)):
-        errMsg = "switch '--forms' requires usage of option '-u' (--url)"
+        errMsg = "switch '--forms' requires usage of option '-u' (--url) or '-m'"
        raise SqlmapSyntaxException(errMsg)
    if conf.requestFile and conf.url:
@ -2005,8 +2008,8 @@ def _basicOptionValidation():
        errMsg = "option '--proxy' is incompatible with switch '--ignore-proxy'"
        raise SqlmapSyntaxException(errMsg)
-    if conf.forms and any([conf.logFile, conf.bulkFile, conf.direct, conf.requestFile, conf.googleDork]):
+    if conf.forms and any([conf.logFile, conf.direct, conf.requestFile, conf.googleDork]):
-        errMsg = "switch '--forms' is compatible only with option '-u' (--url)"
+        errMsg = "switch '--forms' is compatible only with options '-u' (--url) and '-m'"
        raise SqlmapSyntaxException(errMsg)
    if conf.timeSec < 1:
--- a/lib/utils/crawler.py
+++ b/lib/utils/crawler.py
@ -25,116 +25,110 @@ from lib.request.connect import Connect as Request
 from thirdparty.beautifulsoup.beautifulsoup import BeautifulSoup
 from thirdparty.oset.pyoset import oset
-class Crawler(object):
+def crawl(target):
-    """
+    try:
-    This class defines methods used to perform crawling (command
+        threadData = getCurrentThreadData()
-    line option '--crawl'
+        threadData.shared.value = oset()
    """
-    def getTargetUrls(self):
+        def crawlThread():
        try:
            threadData = getCurrentThreadData()
            threadData.shared.value = oset()
-            def crawlThread():
+            while kb.threadContinue:
-                threadData = getCurrentThreadData()
+                with kb.locks.limit:
-
+                    if threadData.shared.unprocessed:
-                while kb.threadContinue:
+                        current = threadData.shared.unprocessed.pop()
-                    with kb.locks.limit:
+                    else:
                        if threadData.shared.unprocessed:
                            current = threadData.shared.unprocessed.pop()
                        else:
                            break
                    content = None
                    try:
                        if current:
                            content = Request.getPage(url=current, crawling=True, raise404=False)[0]
                    except SqlmapConnectionException, e:
                        errMsg = "connection exception detected (%s). skipping " % e
                        errMsg += "url '%s'" % current
                        logger.critical(errMsg)
                    except httplib.InvalidURL, e:
                        errMsg = "invalid url detected (%s). skipping " % e
                        errMsg += "url '%s'" % current
                        logger.critical(errMsg)
                    if not kb.threadContinue:
                        break
-                    if isinstance(content, unicode):
+                content = None
-                        try:
+                try:
-                            match = re.search(r"(?si)<html[^>]*>(.+)</html>", content)
+                    if current:
-                            if match:
+                        content = Request.getPage(url=current, crawling=True, raise404=False)[0]
-                                content = "<html>%s</html>" % match.group(1)
+                except SqlmapConnectionException, e:
                    errMsg = "connection exception detected (%s). skipping " % e
                    errMsg += "url '%s'" % current
                    logger.critical(errMsg)
                except httplib.InvalidURL, e:
                    errMsg = "invalid url detected (%s). skipping " % e
                    errMsg += "url '%s'" % current
                    logger.critical(errMsg)
-                            soup = BeautifulSoup(content)
+                if not kb.threadContinue:
                            tags = soup('a')
                            if not tags:
                                tags = re.finditer(r'(?si)<a[^>]+href="(?P<href>[^>"]+)"', content)
                            for tag in tags:
                                href = tag.get("href") if hasattr(tag, "get") else tag.group("href")
                                if href:
                                    url = urlparse.urljoin(conf.url, href)
                                    # flag to know if we are dealing with the same target host
                                    _ = reduce(lambda x, y: x == y, map(lambda x: urlparse.urlparse(x).netloc.split(':')[0], (url, conf.url)))
                                    if conf.scope:
                                        if not re.search(conf.scope, url, re.I):
                                            continue
                                    elif not _:
                                        continue
                                    if url.split('.')[-1].lower() not in CRAWL_EXCLUDE_EXTENSIONS:
                                        with kb.locks.value:
                                            threadData.shared.deeper.add(url)
                                            if re.search(r"(.*?)\?(.+)", url):
                                                threadData.shared.value.add(url)
                        except UnicodeEncodeError: # for non-HTML files
                            pass
                        finally:
                            if conf.forms:
                                findPageForms(content, current, False, True)
                    if conf.verbose in (1, 2):
                        threadData.shared.count += 1
                        status = '%d/%d links visited (%d%s)' % (threadData.shared.count, threadData.shared.length, round(100.0*threadData.shared.count/threadData.shared.length), '%')
                        dataToStdout("\r[%s] [INFO] %s" % (time.strftime("%X"), status), True)
            threadData.shared.deeper = set()
            threadData.shared.unprocessed = set([conf.url])
            logger.info("starting crawler")
            for i in xrange(conf.crawlDepth):
                if i > 0 and conf.threads == 1:
                    singleTimeWarnMessage("running in a single-thread mode. This could take a while.")
                threadData.shared.count = 0
                threadData.shared.length = len(threadData.shared.unprocessed)
                numThreads = min(conf.threads, len(threadData.shared.unprocessed))
                logger.info("searching for links with depth %d" % (i + 1))
                runThreads(numThreads, crawlThread)
                clearConsoleLine(True)
                if threadData.shared.deeper:
                    threadData.shared.unprocessed = set(threadData.shared.deeper)
                else:
                    break
-        except KeyboardInterrupt:
+                if isinstance(content, unicode):
-            warnMsg = "user aborted during crawling. sqlmap "
+                    try:
-            warnMsg += "will use partial list"
+                        match = re.search(r"(?si)<html[^>]*>(.+)</html>", content)
-            logger.warn(warnMsg)
+                        if match:
                            content = "<html>%s</html>" % match.group(1)
-        finally:
+                        soup = BeautifulSoup(content)
                        tags = soup('a')
                        if not tags:
                            tags = re.finditer(r'(?si)<a[^>]+href="(?P<href>[^>"]+)"', content)
                        for tag in tags:
                            href = tag.get("href") if hasattr(tag, "get") else tag.group("href")
                            if href:
                                url = urlparse.urljoin(target, href)
                                # flag to know if we are dealing with the same target host
                                _ = reduce(lambda x, y: x == y, map(lambda x: urlparse.urlparse(x).netloc.split(':')[0], (url, target)))
                                if conf.scope:
                                    if not re.search(conf.scope, url, re.I):
                                        continue
                                elif not _:
                                    continue
                                if url.split('.')[-1].lower() not in CRAWL_EXCLUDE_EXTENSIONS:
                                    with kb.locks.value:
                                        threadData.shared.deeper.add(url)
                                        if re.search(r"(.*?)\?(.+)", url):
                                            threadData.shared.value.add(url)
                    except UnicodeEncodeError: # for non-HTML files
                        pass
                    finally:
                        if conf.forms:
                            findPageForms(content, current, False, True)
                if conf.verbose in (1, 2):
                    threadData.shared.count += 1
                    status = '%d/%d links visited (%d%s)' % (threadData.shared.count, threadData.shared.length, round(100.0*threadData.shared.count/threadData.shared.length), '%')
                    dataToStdout("\r[%s] [INFO] %s" % (time.strftime("%X"), status), True)
        threadData.shared.deeper = set()
        threadData.shared.unprocessed = set([target])
        logger.info("starting crawler")
        for i in xrange(conf.crawlDepth):
            if i > 0 and conf.threads == 1:
                singleTimeWarnMessage("running in a single-thread mode. This could take a while.")
            threadData.shared.count = 0
            threadData.shared.length = len(threadData.shared.unprocessed)
            numThreads = min(conf.threads, len(threadData.shared.unprocessed))
            logger.info("searching for links with depth %d" % (i + 1))
            runThreads(numThreads, crawlThread)
            clearConsoleLine(True)
-
+            if threadData.shared.deeper:
-            if not threadData.shared.value:
+                threadData.shared.unprocessed = set(threadData.shared.deeper)
                warnMsg = "no usable links found (with GET parameters)"
                logger.warn(warnMsg)
            else:
-                for url in threadData.shared.value:
+                break
-                    kb.targets.add(( url, None, None, None ))
+
    except KeyboardInterrupt:
        warnMsg = "user aborted during crawling. sqlmap "
        warnMsg += "will use partial list"
        logger.warn(warnMsg)
    finally:
        clearConsoleLine(True)
        if not threadData.shared.value:
            warnMsg = "no usable links found (with GET parameters)"
            logger.warn(warnMsg)
        else:
            for url in threadData.shared.value:
                kb.targets.add((url, None, None, None))