From 305115a68bfdf1800f29fd79cd3dfca591f5be61 Mon Sep 17 00:00:00 2001 From: Miroslav Stampar Date: Sun, 3 Apr 2011 15:02:52 +0000 Subject: [PATCH] important improvement of data handling (POST data and header values) --- extra/keepalive/keepalive.py | 13 +++++-------- lib/core/common.py | 31 ++++++++++++++++--------------- lib/request/connect.py | 6 +++--- 3 files changed, 24 insertions(+), 26 deletions(-) diff --git a/extra/keepalive/keepalive.py b/extra/keepalive/keepalive.py index 184acea04..bb032ebc1 100644 --- a/extra/keepalive/keepalive.py +++ b/extra/keepalive/keepalive.py @@ -71,7 +71,8 @@ EXTRA ATTRIBUTES AND METHODS """ from httplib import _CS_REQ_STARTED, _CS_REQ_SENT, _CS_IDLE, CannotSendHeader -from lib.core.common import unicodeToSafeHTMLValue +from lib.core.common import encodeUnicode +from lib.core.data import kb import threading import urllib2 @@ -193,8 +194,6 @@ class HTTPHandler(urllib2.HTTPHandler): r._host = host r._url = req.get_full_url() - - #if r.status == 200 or not HANDLE_ERRORS: #return r if r.status == 200 or not HANDLE_ERRORS: @@ -316,7 +315,6 @@ class HTTPConnection(httplib.HTTPConnection): self._headers[header] = value - def endheaders(self): """Indicate that the last header line has been sent to the server.""" @@ -325,10 +323,6 @@ class HTTPConnection(httplib.HTTPConnection): else: raise CannotSendHeader() - for key, item in self._headers.items(): - del self._headers[key] - self._headers[unicodeToSafeHTMLValue(key)] = unicodeToSafeHTMLValue(item) - for header in ['Host', 'Accept-Encoding']: if header in self._headers: str = '%s: %s' % (header, self._headers[header]) @@ -341,6 +335,9 @@ class HTTPConnection(httplib.HTTPConnection): self._send_output() + def send(self, str): + httplib.HTTPConnection.send(self, encodeUnicode(str, kb.pageEncoding)) + ######################################################################### ##### TEST FUNCTIONS ######################################################################### diff --git a/lib/core/common.py b/lib/core/common.py index 84b4d7785..0e9bc1983 100644 --- a/lib/core/common.py +++ b/lib/core/common.py @@ -1724,6 +1724,22 @@ def getUnicode(value, encoding=None, system=False): except: return getUnicode(value, UNICODE_ENCODING) +def encodeUnicode(value, encoding=None): + """ + Return 8-bit string representation of the supplied unicode value: + + >>> encodeUnicode(u'test') + 'test' + """ + + retVal = value + if isinstance(value, unicode): + try: + retVal = value.encode(encoding or UNICODE_ENCODING) + except UnicodeEncodeError: + retVal = value.encode(UNICODE_ENCODING, errors="replace") + return retVal + # http://boredzo.org/blog/archives/2007-01-06/longest-common-prefix-in-python-2 def longestCommonPrefix(*sequences): if len(sequences) == 1: @@ -2262,21 +2278,6 @@ def filterListValue(value, regex): else: return value -def unicodeToSafeHTMLValue(value): - """ - Returns HTML representation of unicode string value safe for sending - over HTTP(s) - """ - - retVal = value - - if value: - for char in value: - if ord(char) > 127: - retVal = retVal.replace(char, "&#%d;" % ord(char)) - - return retVal - def showHttpErrorCodes(): """ Shows all HTTP error codes raised till now diff --git a/lib/request/connect.py b/lib/request/connect.py index 903a20666..7567a52aa 100644 --- a/lib/request/connect.py +++ b/lib/request/connect.py @@ -21,10 +21,10 @@ from lib.core.common import average from lib.core.common import calculateDeltaSeconds from lib.core.common import clearConsoleLine from lib.core.common import cpuThrottle +from lib.core.common import encodeUnicode from lib.core.common import extractRegexResult from lib.core.common import getCurrentThreadData from lib.core.common import getFilteredPageContent -from lib.core.common import unicodeToSafeHTMLValue from lib.core.common import getUnicode from lib.core.common import logHTTPTraffic from lib.core.common import parseTargetUrl @@ -173,9 +173,9 @@ class Connect: for key, item in headers.items(): del headers[key] - headers[unicodeToSafeHTMLValue(key)] = unicodeToSafeHTMLValue(item) + headers[encodeUnicode(key, kb.pageEncoding)] = encodeUnicode(item, kb.pageEncoding) - post = unicodeToSafeHTMLValue(post) + post = encodeUnicode(post, kb.pageEncoding) if method: req = MethodRequest(url, post, headers)