From 24435a2c20179bf822dd3369ff86b4666b8160d7 Mon Sep 17 00:00:00 2001
From: Miroslav Stampar <miroslav.stampar@gmail.com>
Date: Wed, 20 Apr 2011 08:35:47 +0000
Subject: [PATCH] implemented "break a tie" request by Andres Riancho

---
 lib/request/basic.py | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/lib/request/basic.py b/lib/request/basic.py
index 6984f8fbc..3133fa5bf 100644
--- a/lib/request/basic.py
+++ b/lib/request/basic.py
@@ -165,18 +165,23 @@ def decodePage(page, contentEncoding, contentType):
 
         page = data.read()
 
-    charset = None
+    httpCharset, metaCharset = None, None
 
     # http://stackoverflow.com/questions/1020892/python-urllib2-read-to-unicode
     if contentType and (contentType.find('charset=') != -1):
-        charset = contentType.split('charset=')[-1]
-    elif extractRegexResult(META_CHARSET_REGEX, page, re.DOTALL | re.IGNORECASE):
-        charset = extractRegexResult(META_CHARSET_REGEX, page, re.DOTALL | re.IGNORECASE)
+        httpCharset = checkCharEncoding(contentType.split('charset=')[-1])
 
-    kb.pageEncoding = checkCharEncoding(charset) or getHeuristicCharEncoding(page)
+    metaCharset = checkCharEncoding(extractRegexResult(META_CHARSET_REGEX, page, re.DOTALL | re.IGNORECASE))
+
+    if (httpCharset or metaCharset and not all([httpCharset, metaCharset]))\
+        or (httpCharset == metaCharset and all([httpCharset, metaCharset])):
+        kb.pageEncoding = httpCharset or metaCharset
+    else:
+        kb.pageEncoding = None
 
     if contentType and any(map(lambda x: x in contentType.lower(), ('text/txt', 'text/raw', 'text/html', 'text/xml'))):
         # can't do for all responses because we need to support binary files too
+        kb.pageEncoding = kb.pageEncoding or getHeuristicCharEncoding(page)
         page = getUnicode(page, kb.pageEncoding)
 
     return page