Minor improvement for crawling

2025-10-10 13:56:36 +03:00 · 2019-10-02 13:08:13 +02:00 · 2019-10-02 13:08:13 +02:00 · 5cc36b452e
commit 5cc36b452e
parent d72660ef04
2 changed files with 4 additions and 3 deletions
--- a/lib/core/settings.py
+++ b/lib/core/settings.py
@ -18,7 +18,7 @@ from lib.core.enums import OS
 from thirdparty.six import unichr as _unichr

 # sqlmap version (<major>.<minor>.<month>.<monthly commit>)
-VERSION = "1.3.10.0"
+VERSION = "1.3.10.1"
 TYPE = "dev" if VERSION.count('.') > 2 and VERSION.split('.')[-1] != '0' else "stable"
 TYPE_COLORS = {"dev": 33, "stable": 90, "pip": 34}
 VERSION_STRING = "sqlmap/%s#%s" % ('.'.join(VERSION.split('.')[:-1]) if VERSION.count('.') > 2 and VERSION.split('.')[-1] == '0' else VERSION, TYPE)
--- a/lib/utils/crawler.py
+++ b/lib/utils/crawler.py
@ -15,6 +15,7 @@ import time
 from lib.core.common import checkSameHost
 from lib.core.common import clearConsoleLine
 from lib.core.common import dataToStdout
+from lib.core.common import extractRegexResult
 from lib.core.common import findPageForms
 from lib.core.common import getSafeExString
 from lib.core.common import openFile
@ -92,7 +93,7 @@ def crawl(target):
                        soup = BeautifulSoup(content)
                        tags = soup('a')

-                        tags += re.finditer(r'(?i)<a[^>]+href=["\'](?P<href>[^>"\']+)', content)
+                        tags += re.finditer(r'(?i)\b(href|src)=["\'](?P<href>[^>"\']+)', content)

                        for tag in tags:
                            href = tag.get("href") if hasattr(tag, "get") else tag.group("href")
@ -111,7 +112,7 @@ def crawl(target):
                                elif not _:
                                    continue

-                                if url.split('.')[-1].lower() not in CRAWL_EXCLUDE_EXTENSIONS:
+                                if (extractRegexResult(r"\A[^?]+\.(?P<result>\w+)(\?|\Z)", url) or "").lower() not in CRAWL_EXCLUDE_EXTENSIONS:
                                    with kb.locks.value:
                                        threadData.shared.deeper.add(url)
                                        if re.search(r"(.*?)\?(.+)", url):