Fixes related to the #1206

This commit is contained in:
Miroslav Stampar 2015-05-11 10:56:10 +02:00
parent c8aac19f75
commit 91bc02e3ba

View File

@ -239,7 +239,8 @@ class Connect(object):
retrying = kwargs.get("retrying", False)
crawling = kwargs.get("crawling", False)
skipRead = kwargs.get("skipRead", False)
is_websocket = conf.url.startswith("ws")
websocket_ = url.lower().startswith("ws")
if not urlparse.urlsplit(url).netloc:
url = urlparse.urljoin(conf.url, url)
@ -372,75 +373,89 @@ class Connect(object):
url = unicodeencode(url)
post = unicodeencode(post, kb.pageEncoding)
if is_websocket:
# WebSocket will add Host field of headers automatically
disallowed_headers = ['Host']
if websocket_:
ws = websocket.WebSocket()
ws.connect(url, header=["%s: %s" % _ for _ in headers.items() if _[0] not in disallowed_headers], cookie=cookie)
ws.send(urldecode(post) if post else '')
response = ws.recv()
ws.connect(url, header=("%s: %s" % _ for _ in headers.items() if _[0] not in ("Host",)), cookie=cookie) # WebSocket will add Host field of headers automatically
ws.send(urldecode(post or ""))
page = ws.recv()
ws.close()
# WebSocket class does not have response headers
return response, {}, 101
code = ws.status
status = httplib.responses[code]
class _(dict):
pass
responseHeaders = _(ws.getheaders())
responseHeaders.headers = ["%s: %s\r\n" % (_[0].capitalize(), _[1]) for _ in responseHeaders.items()]
elif method and method not in (HTTPMETHOD.GET, HTTPMETHOD.POST):
method = unicodeencode(method)
req = MethodRequest(url, post, headers)
req.set_method(method)
requestHeaders += "\n".join("%s: %s" % (getUnicode(key.capitalize() if isinstance(key, basestring) else key), getUnicode(value)) for (key, value) in responseHeaders.items())
requestMsg += "\n%s" % requestHeaders
if post is not None:
requestMsg += "\n\n%s" % getUnicode(post)
requestMsg += "\n"
threadData.lastRequestMsg = requestMsg
logger.log(CUSTOM_LOGGING.TRAFFIC_OUT, requestMsg)
else:
req = urllib2.Request(url, post, headers)
if method and method not in (HTTPMETHOD.GET, HTTPMETHOD.POST):
method = unicodeencode(method)
req = MethodRequest(url, post, headers)
req.set_method(method)
else:
req = urllib2.Request(url, post, headers)
requestHeaders += "\n".join("%s: %s" % (getUnicode(key.capitalize() if isinstance(key, basestring) else key), getUnicode(value)) for (key, value) in req.header_items())
requestHeaders += "\n".join("%s: %s" % (getUnicode(key.capitalize() if isinstance(key, basestring) else key), getUnicode(value)) for (key, value) in req.header_items())
if not getRequestHeader(req, HTTP_HEADER.COOKIE) and conf.cj:
conf.cj._policy._now = conf.cj._now = int(time.time())
cookies = conf.cj._cookies_for_request(req)
requestHeaders += "\n%s" % ("Cookie: %s" % ";".join("%s=%s" % (getUnicode(cookie.name), getUnicode(cookie.value)) for cookie in cookies))
if not getRequestHeader(req, HTTP_HEADER.COOKIE) and conf.cj:
conf.cj._policy._now = conf.cj._now = int(time.time())
cookies = conf.cj._cookies_for_request(req)
requestHeaders += "\n%s" % ("Cookie: %s" % ";".join("%s=%s" % (getUnicode(cookie.name), getUnicode(cookie.value)) for cookie in cookies))
if post is not None:
if not getRequestHeader(req, HTTP_HEADER.CONTENT_LENGTH):
requestHeaders += "\n%s: %d" % (string.capwords(HTTP_HEADER.CONTENT_LENGTH), len(post))
if post is not None:
if not getRequestHeader(req, HTTP_HEADER.CONTENT_LENGTH):
requestHeaders += "\n%s: %d" % (string.capwords(HTTP_HEADER.CONTENT_LENGTH), len(post))
if not getRequestHeader(req, HTTP_HEADER.CONNECTION):
requestHeaders += "\n%s: close" % HTTP_HEADER.CONNECTION
if not getRequestHeader(req, HTTP_HEADER.CONNECTION):
requestHeaders += "\n%s: close" % HTTP_HEADER.CONNECTION
requestMsg += "\n%s" % requestHeaders
requestMsg += "\n%s" % requestHeaders
if post is not None:
requestMsg += "\n\n%s" % getUnicode(post)
if post is not None:
requestMsg += "\n\n%s" % getUnicode(post)
requestMsg += "\n"
requestMsg += "\n"
threadData.lastRequestMsg = requestMsg
threadData.lastRequestMsg = requestMsg
logger.log(CUSTOM_LOGGING.TRAFFIC_OUT, requestMsg)
logger.log(CUSTOM_LOGGING.TRAFFIC_OUT, requestMsg)
conn = urllib2.urlopen(req)
conn = urllib2.urlopen(req)
if not kb.authHeader and getRequestHeader(req, HTTP_HEADER.AUTHORIZATION) and (conf.authType or "").lower() == AUTH_TYPE.BASIC.lower():
kb.authHeader = getRequestHeader(req, HTTP_HEADER.AUTHORIZATION)
if not kb.authHeader and getRequestHeader(req, HTTP_HEADER.AUTHORIZATION) and (conf.authType or "").lower() == AUTH_TYPE.BASIC.lower():
kb.authHeader = getRequestHeader(req, HTTP_HEADER.AUTHORIZATION)
if not kb.proxyAuthHeader and getRequestHeader(req, HTTP_HEADER.PROXY_AUTHORIZATION):
kb.proxyAuthHeader = getRequestHeader(req, HTTP_HEADER.PROXY_AUTHORIZATION)
if not kb.proxyAuthHeader and getRequestHeader(req, HTTP_HEADER.PROXY_AUTHORIZATION):
kb.proxyAuthHeader = getRequestHeader(req, HTTP_HEADER.PROXY_AUTHORIZATION)
# Return response object
if response:
return conn, None, None
# Return response object
if response:
return conn, None, None
# Get HTTP response
if hasattr(conn, 'redurl'):
page = (threadData.lastRedirectMsg[1] if kb.redirectChoice == REDIRECTION.NO\
else Connect._connReadProxy(conn)) if not skipRead else None
skipLogTraffic = kb.redirectChoice == REDIRECTION.NO
code = conn.redcode
else:
page = Connect._connReadProxy(conn) if not skipRead else None
# Get HTTP response
if hasattr(conn, 'redurl'):
page = (threadData.lastRedirectMsg[1] if kb.redirectChoice == REDIRECTION.NO\
else Connect._connReadProxy(conn)) if not skipRead else None
skipLogTraffic = kb.redirectChoice == REDIRECTION.NO
code = conn.redcode
else:
page = Connect._connReadProxy(conn) if not skipRead else None
code = code or conn.code
responseHeaders = conn.info()
responseHeaders[URI_HTTP_HEADER] = conn.geturl()
page = decodePage(page, responseHeaders.get(HTTP_HEADER.CONTENT_ENCODING), responseHeaders.get(HTTP_HEADER.CONTENT_TYPE))
status = getUnicode(conn.msg)
code = code or conn.code
responseHeaders = conn.info()
responseHeaders[URI_HTTP_HEADER] = conn.geturl()
page = decodePage(page, responseHeaders.get(HTTP_HEADER.CONTENT_ENCODING), responseHeaders.get(HTTP_HEADER.CONTENT_TYPE))
status = getUnicode(conn.msg)
if extractRegexResult(META_REFRESH_REGEX, page) and not refreshing:
url = extractRegexResult(META_REFRESH_REGEX, page)
@ -474,7 +489,7 @@ class Connect(object):
pass
# Explicit closing of connection object
if not conf.keepAlive:
if conn and not conf.keepAlive:
try:
if hasattr(conn.fp, '_sock'):
conn.fp._sock.close()