mirror of
https://github.com/sqlmapproject/sqlmap.git
synced 2025-06-19 04:23:12 +03:00
Fix for an Issue #324 (crawling when HTML is not well-formed)
This commit is contained in:
parent
127b880577
commit
cb91729913
|
@ -64,10 +64,21 @@ class Crawler(object):
|
||||||
|
|
||||||
if isinstance(content, unicode):
|
if isinstance(content, unicode):
|
||||||
try:
|
try:
|
||||||
|
match = re.search(r"(?si)<html[^>]*>(.+)</html>", content)
|
||||||
|
if match:
|
||||||
|
content = "<html>%s</html>" % match.group(1)
|
||||||
|
|
||||||
soup = BeautifulSoup(content)
|
soup = BeautifulSoup(content)
|
||||||
for tag in soup('a'):
|
tags = soup('a')
|
||||||
if tag.get("href"):
|
|
||||||
url = urlparse.urljoin(conf.url, tag.get("href"))
|
if not tags:
|
||||||
|
tags = re.finditer(r'(?si)<a[^>]+href="(?P<href>[^>"]+)"', content)
|
||||||
|
|
||||||
|
for tag in tags:
|
||||||
|
href = tag.get("href") if hasattr(tag, "get") else tag.group("href")
|
||||||
|
|
||||||
|
if href:
|
||||||
|
url = urlparse.urljoin(conf.url, href)
|
||||||
|
|
||||||
# flag to know if we are dealing with the same target host
|
# flag to know if we are dealing with the same target host
|
||||||
_ = reduce(lambda x, y: x == y, map(lambda x: urlparse.urlparse(x).netloc.split(':')[0], (url, conf.url)))
|
_ = reduce(lambda x, y: x == y, map(lambda x: urlparse.urlparse(x).netloc.split(':')[0], (url, conf.url)))
|
||||||
|
|
Loading…
Reference in New Issue
Block a user