diff --git a/lib/request/connect.py b/lib/request/connect.py index 549a261fb..436bbaeaa 100644 --- a/lib/request/connect.py +++ b/lib/request/connect.py @@ -239,7 +239,8 @@ class Connect(object): retrying = kwargs.get("retrying", False) crawling = kwargs.get("crawling", False) skipRead = kwargs.get("skipRead", False) - is_websocket = conf.url.startswith("ws") + + websocket_ = url.lower().startswith("ws") if not urlparse.urlsplit(url).netloc: url = urlparse.urljoin(conf.url, url) @@ -372,75 +373,89 @@ class Connect(object): url = unicodeencode(url) post = unicodeencode(post, kb.pageEncoding) - if is_websocket: - # WebSocket will add Host field of headers automatically - disallowed_headers = ['Host'] + if websocket_: ws = websocket.WebSocket() - ws.connect(url, header=["%s: %s" % _ for _ in headers.items() if _[0] not in disallowed_headers], cookie=cookie) - ws.send(urldecode(post) if post else '') - response = ws.recv() + ws.connect(url, header=("%s: %s" % _ for _ in headers.items() if _[0] not in ("Host",)), cookie=cookie) # WebSocket will add Host field of headers automatically + ws.send(urldecode(post or "")) + page = ws.recv() ws.close() - # WebSocket class does not have response headers - return response, {}, 101 + code = ws.status + status = httplib.responses[code] + class _(dict): + pass + responseHeaders = _(ws.getheaders()) + responseHeaders.headers = ["%s: %s\r\n" % (_[0].capitalize(), _[1]) for _ in responseHeaders.items()] - elif method and method not in (HTTPMETHOD.GET, HTTPMETHOD.POST): - method = unicodeencode(method) - req = MethodRequest(url, post, headers) - req.set_method(method) + requestHeaders += "\n".join("%s: %s" % (getUnicode(key.capitalize() if isinstance(key, basestring) else key), getUnicode(value)) for (key, value) in responseHeaders.items()) + requestMsg += "\n%s" % requestHeaders + + if post is not None: + requestMsg += "\n\n%s" % getUnicode(post) + + requestMsg += "\n" + + threadData.lastRequestMsg = requestMsg + + logger.log(CUSTOM_LOGGING.TRAFFIC_OUT, requestMsg) else: - req = urllib2.Request(url, post, headers) + if method and method not in (HTTPMETHOD.GET, HTTPMETHOD.POST): + method = unicodeencode(method) + req = MethodRequest(url, post, headers) + req.set_method(method) + else: + req = urllib2.Request(url, post, headers) - requestHeaders += "\n".join("%s: %s" % (getUnicode(key.capitalize() if isinstance(key, basestring) else key), getUnicode(value)) for (key, value) in req.header_items()) + requestHeaders += "\n".join("%s: %s" % (getUnicode(key.capitalize() if isinstance(key, basestring) else key), getUnicode(value)) for (key, value) in req.header_items()) - if not getRequestHeader(req, HTTP_HEADER.COOKIE) and conf.cj: - conf.cj._policy._now = conf.cj._now = int(time.time()) - cookies = conf.cj._cookies_for_request(req) - requestHeaders += "\n%s" % ("Cookie: %s" % ";".join("%s=%s" % (getUnicode(cookie.name), getUnicode(cookie.value)) for cookie in cookies)) + if not getRequestHeader(req, HTTP_HEADER.COOKIE) and conf.cj: + conf.cj._policy._now = conf.cj._now = int(time.time()) + cookies = conf.cj._cookies_for_request(req) + requestHeaders += "\n%s" % ("Cookie: %s" % ";".join("%s=%s" % (getUnicode(cookie.name), getUnicode(cookie.value)) for cookie in cookies)) - if post is not None: - if not getRequestHeader(req, HTTP_HEADER.CONTENT_LENGTH): - requestHeaders += "\n%s: %d" % (string.capwords(HTTP_HEADER.CONTENT_LENGTH), len(post)) + if post is not None: + if not getRequestHeader(req, HTTP_HEADER.CONTENT_LENGTH): + requestHeaders += "\n%s: %d" % (string.capwords(HTTP_HEADER.CONTENT_LENGTH), len(post)) - if not getRequestHeader(req, HTTP_HEADER.CONNECTION): - requestHeaders += "\n%s: close" % HTTP_HEADER.CONNECTION + if not getRequestHeader(req, HTTP_HEADER.CONNECTION): + requestHeaders += "\n%s: close" % HTTP_HEADER.CONNECTION - requestMsg += "\n%s" % requestHeaders + requestMsg += "\n%s" % requestHeaders - if post is not None: - requestMsg += "\n\n%s" % getUnicode(post) + if post is not None: + requestMsg += "\n\n%s" % getUnicode(post) - requestMsg += "\n" + requestMsg += "\n" - threadData.lastRequestMsg = requestMsg + threadData.lastRequestMsg = requestMsg - logger.log(CUSTOM_LOGGING.TRAFFIC_OUT, requestMsg) + logger.log(CUSTOM_LOGGING.TRAFFIC_OUT, requestMsg) - conn = urllib2.urlopen(req) + conn = urllib2.urlopen(req) - if not kb.authHeader and getRequestHeader(req, HTTP_HEADER.AUTHORIZATION) and (conf.authType or "").lower() == AUTH_TYPE.BASIC.lower(): - kb.authHeader = getRequestHeader(req, HTTP_HEADER.AUTHORIZATION) + if not kb.authHeader and getRequestHeader(req, HTTP_HEADER.AUTHORIZATION) and (conf.authType or "").lower() == AUTH_TYPE.BASIC.lower(): + kb.authHeader = getRequestHeader(req, HTTP_HEADER.AUTHORIZATION) - if not kb.proxyAuthHeader and getRequestHeader(req, HTTP_HEADER.PROXY_AUTHORIZATION): - kb.proxyAuthHeader = getRequestHeader(req, HTTP_HEADER.PROXY_AUTHORIZATION) + if not kb.proxyAuthHeader and getRequestHeader(req, HTTP_HEADER.PROXY_AUTHORIZATION): + kb.proxyAuthHeader = getRequestHeader(req, HTTP_HEADER.PROXY_AUTHORIZATION) - # Return response object - if response: - return conn, None, None + # Return response object + if response: + return conn, None, None - # Get HTTP response - if hasattr(conn, 'redurl'): - page = (threadData.lastRedirectMsg[1] if kb.redirectChoice == REDIRECTION.NO\ - else Connect._connReadProxy(conn)) if not skipRead else None - skipLogTraffic = kb.redirectChoice == REDIRECTION.NO - code = conn.redcode - else: - page = Connect._connReadProxy(conn) if not skipRead else None + # Get HTTP response + if hasattr(conn, 'redurl'): + page = (threadData.lastRedirectMsg[1] if kb.redirectChoice == REDIRECTION.NO\ + else Connect._connReadProxy(conn)) if not skipRead else None + skipLogTraffic = kb.redirectChoice == REDIRECTION.NO + code = conn.redcode + else: + page = Connect._connReadProxy(conn) if not skipRead else None - code = code or conn.code - responseHeaders = conn.info() - responseHeaders[URI_HTTP_HEADER] = conn.geturl() - page = decodePage(page, responseHeaders.get(HTTP_HEADER.CONTENT_ENCODING), responseHeaders.get(HTTP_HEADER.CONTENT_TYPE)) - status = getUnicode(conn.msg) + code = code or conn.code + responseHeaders = conn.info() + responseHeaders[URI_HTTP_HEADER] = conn.geturl() + page = decodePage(page, responseHeaders.get(HTTP_HEADER.CONTENT_ENCODING), responseHeaders.get(HTTP_HEADER.CONTENT_TYPE)) + status = getUnicode(conn.msg) if extractRegexResult(META_REFRESH_REGEX, page) and not refreshing: url = extractRegexResult(META_REFRESH_REGEX, page) @@ -474,7 +489,7 @@ class Connect(object): pass # Explicit closing of connection object - if not conf.keepAlive: + if conn and not conf.keepAlive: try: if hasattr(conn.fp, '_sock'): conn.fp._sock.close()