diff --git a/lib/core/threads.py b/lib/core/threads.py index 849da683f..0d924f5eb 100644 --- a/lib/core/threads.py +++ b/lib/core/threads.py @@ -49,6 +49,7 @@ class _ThreadData(threading.local): self.lastQueryDuration = 0 self.lastRequestMsg = None self.lastRequestUID = 0 + self.lastRedirectURL = None self.resumed = False self.retriesCount = 0 self.seqMatcher = difflib.SequenceMatcher(None) diff --git a/lib/request/redirecthandler.py b/lib/request/redirecthandler.py index b4c08f8cf..3e2376cbf 100644 --- a/lib/request/redirecthandler.py +++ b/lib/request/redirecthandler.py @@ -117,9 +117,10 @@ class SmartRedirectHandler(urllib2.HTTPRedirectHandler): else: result = fp + threadData.lastRedirectURL = (threadData.lastRequestUID, redurl) + result.redcode = code result.redurl = redurl - return result http_error_301 = http_error_303 = http_error_307 = http_error_302 diff --git a/lib/utils/crawler.py b/lib/utils/crawler.py index 7b226751a..0cbe73ea3 100644 --- a/lib/utils/crawler.py +++ b/lib/utils/crawler.py @@ -72,6 +72,8 @@ def crawl(target): href = tag.get("href") if hasattr(tag, "get") else tag.group("href") if href: + if threadData.lastRedirectURL and threadData.lastRedirectURL[0] == threadData.lastRequestUID: + current = threadData.lastRedirectURL[1] url = urlparse.urljoin(current, href) # flag to know if we are dealing with the same target host