Minor improvement for crawling

This commit is contained in:
Miroslav Stampar 2019-10-02 13:08:13 +02:00
parent d72660ef04
commit 5cc36b452e
2 changed files with 4 additions and 3 deletions

View File

@ -18,7 +18,7 @@ from lib.core.enums import OS
from thirdparty.six import unichr as _unichr
# sqlmap version (<major>.<minor>.<month>.<monthly commit>)
VERSION = "1.3.10.0"
VERSION = "1.3.10.1"
TYPE = "dev" if VERSION.count('.') > 2 and VERSION.split('.')[-1] != '0' else "stable"
TYPE_COLORS = {"dev": 33, "stable": 90, "pip": 34}
VERSION_STRING = "sqlmap/%s#%s" % ('.'.join(VERSION.split('.')[:-1]) if VERSION.count('.') > 2 and VERSION.split('.')[-1] == '0' else VERSION, TYPE)

View File

@ -15,6 +15,7 @@ import time
from lib.core.common import checkSameHost
from lib.core.common import clearConsoleLine
from lib.core.common import dataToStdout
from lib.core.common import extractRegexResult
from lib.core.common import findPageForms
from lib.core.common import getSafeExString
from lib.core.common import openFile
@ -92,7 +93,7 @@ def crawl(target):
soup = BeautifulSoup(content)
tags = soup('a')
tags += re.finditer(r'(?i)<a[^>]+href=["\'](?P<href>[^>"\']+)', content)
tags += re.finditer(r'(?i)\b(href|src)=["\'](?P<href>[^>"\']+)', content)
for tag in tags:
href = tag.get("href") if hasattr(tag, "get") else tag.group("href")
@ -111,7 +112,7 @@ def crawl(target):
elif not _:
continue
if url.split('.')[-1].lower() not in CRAWL_EXCLUDE_EXTENSIONS:
if (extractRegexResult(r"\A[^?]+\.(?P<result>\w+)(\?|\Z)", url) or "").lower() not in CRAWL_EXCLUDE_EXTENSIONS:
with kb.locks.value:
threadData.shared.deeper.add(url)
if re.search(r"(.*?)\?(.+)", url):