From 5cc36b452ef217496ff022100640c39087a4a14a Mon Sep 17 00:00:00 2001 From: Miroslav Stampar Date: Wed, 2 Oct 2019 13:08:13 +0200 Subject: [PATCH] Minor improvement for crawling --- lib/core/settings.py | 2 +- lib/utils/crawler.py | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/lib/core/settings.py b/lib/core/settings.py index 182fc1168..af988f7c4 100644 --- a/lib/core/settings.py +++ b/lib/core/settings.py @@ -18,7 +18,7 @@ from lib.core.enums import OS from thirdparty.six import unichr as _unichr # sqlmap version (...) -VERSION = "1.3.10.0" +VERSION = "1.3.10.1" TYPE = "dev" if VERSION.count('.') > 2 and VERSION.split('.')[-1] != '0' else "stable" TYPE_COLORS = {"dev": 33, "stable": 90, "pip": 34} VERSION_STRING = "sqlmap/%s#%s" % ('.'.join(VERSION.split('.')[:-1]) if VERSION.count('.') > 2 and VERSION.split('.')[-1] == '0' else VERSION, TYPE) diff --git a/lib/utils/crawler.py b/lib/utils/crawler.py index 624f31dce..98f1cf544 100644 --- a/lib/utils/crawler.py +++ b/lib/utils/crawler.py @@ -15,6 +15,7 @@ import time from lib.core.common import checkSameHost from lib.core.common import clearConsoleLine from lib.core.common import dataToStdout +from lib.core.common import extractRegexResult from lib.core.common import findPageForms from lib.core.common import getSafeExString from lib.core.common import openFile @@ -92,7 +93,7 @@ def crawl(target): soup = BeautifulSoup(content) tags = soup('a') - tags += re.finditer(r'(?i)]+href=["\'](?P[^>"\']+)', content) + tags += re.finditer(r'(?i)\b(href|src)=["\'](?P[^>"\']+)', content) for tag in tags: href = tag.get("href") if hasattr(tag, "get") else tag.group("href") @@ -111,7 +112,7 @@ def crawl(target): elif not _: continue - if url.split('.')[-1].lower() not in CRAWL_EXCLUDE_EXTENSIONS: + if (extractRegexResult(r"\A[^?]+\.(?P\w+)(\?|\Z)", url) or "").lower() not in CRAWL_EXCLUDE_EXTENSIONS: with kb.locks.value: threadData.shared.deeper.add(url) if re.search(r"(.*?)\?(.+)", url):