diff --git a/doc/THANKS b/doc/THANKS index 0cb9595f7..8bbc051ee 100644 --- a/doc/THANKS +++ b/doc/THANKS @@ -648,6 +648,9 @@ Phil P <@superevr> ragos for reporting a minor bug +rmillet + for reporting a bug + shiftzwei for reporting a couple of bugs diff --git a/lib/core/common.py b/lib/core/common.py index ea1d2c32c..9335fb56a 100644 --- a/lib/core/common.py +++ b/lib/core/common.py @@ -3127,4 +3127,12 @@ def findPageForms(content, url, raise_=False, addToTargets=False): for target in retVal: kb.targetUrls.add(target) + return retVal + +def getHostHeader(url): + retVal = urlparse.urlparse(url).netloc + + if any(map(lambda x: retVal.endswith(':%d' % x), [80, 443])): + retVal = retVal.split(':')[0] + return retVal \ No newline at end of file diff --git a/lib/request/connect.py b/lib/request/connect.py index 2d51197c7..c40f2a82d 100644 --- a/lib/request/connect.py +++ b/lib/request/connect.py @@ -25,6 +25,7 @@ from lib.core.common import cpuThrottle from lib.core.common import extractRegexResult from lib.core.common import getCurrentThreadData from lib.core.common import getFilteredPageContent +from lib.core.common import getHostHeader from lib.core.common import getUnicode from lib.core.common import logHTTPTraffic from lib.core.common import parseTargetUrl @@ -234,10 +235,7 @@ class Connect: headers[HTTPHEADER.ACCEPT] = HTTP_ACCEPT_HEADER_VALUE - headers[HTTPHEADER.HOST] = urlparse.urlparse(url).netloc - - if any(map(lambda x: headers[HTTPHEADER.HOST].endswith(':%d' % x), [80, 443])): - headers[HTTPHEADER.HOST] = headers[HTTPHEADER.HOST].split(':')[0] + headers[HTTPHEADER.HOST] = getHostHeader(url) if auxHeaders: for key, item in auxHeaders.items(): diff --git a/lib/request/redirecthandler.py b/lib/request/redirecthandler.py index 3d95f8f96..70e15c96d 100644 --- a/lib/request/redirecthandler.py +++ b/lib/request/redirecthandler.py @@ -12,6 +12,7 @@ import urlparse from lib.core.data import conf from lib.core.data import logger +from lib.core.common import getHostHeader from lib.core.common import getUnicode from lib.core.common import logHTTPTraffic from lib.core.enums import HTTPHEADER @@ -28,6 +29,16 @@ class SmartRedirectHandler(urllib2.HTTPRedirectHandler): # assuming we're in a loop max_redirections = 10 + def _get_header_redirect(self, headers): + retVal = None + + if "location" in headers: + retVal = headers.getheaders("location")[0].split("?")[0] + elif "uri" in headers: + retVal = headers.getheaders("uri")[0].split("?")[0] + + return retVal + def common_http_redirect(self, result, headers, code, content, msg): content = decodePage(content, headers.get(HTTPHEADER.CONTENT_ENCODING), headers.get(HTTPHEADER.CONTENT_TYPE)) @@ -49,10 +60,8 @@ class SmartRedirectHandler(urllib2.HTTPRedirectHandler): logger.log(7, responseMsg) if result: - if "location" in headers: - result.redurl = headers.getheaders("location")[0].split("?")[0] - elif "uri" in headers: - result.redurl = headers.getheaders("uri")[0].split("?")[0] + if self._get_header_redirect(headers): + result.redurl = self._get_header_redirect(headers) if hasattr(result, 'redurl'): if not urlparse.urlsplit(result.redurl).netloc: @@ -76,6 +85,9 @@ class SmartRedirectHandler(urllib2.HTTPRedirectHandler): dbgMsg += "redirect response content (%s)" % msg logger.debug(dbgMsg) + if self._get_header_redirect(headers): + req.headers[HTTPHEADER.HOST] = getHostHeader(self._get_header_redirect(headers)) + result = urllib2.HTTPRedirectHandler.http_error_301(self, req, fp, code, msg, headers) return self.common_http_redirect(result, headers, code, content, msg) @@ -90,6 +102,9 @@ class SmartRedirectHandler(urllib2.HTTPRedirectHandler): dbgMsg += "redirect response content (%s)" % msg logger.debug(dbgMsg) + if self._get_header_redirect(headers): + req.headers[HTTPHEADER.HOST] = getHostHeader(self._get_header_redirect(headers)) + result = urllib2.HTTPRedirectHandler.http_error_302(self, req, fp, code, msg, headers) return self.common_http_redirect(result, headers, code, content, msg)