minor update regarding default page encoding

This commit is contained in:
Miroslav Stampar 2011-01-17 10:23:37 +00:00
parent 5c857779c1
commit 34d13be0d3
2 changed files with 6 additions and 3 deletions

View File

@ -86,7 +86,10 @@ UNKNOWN_DBMS_VERSION = "Unknown"
DYNAMICITY_MARK_LENGTH = 32
# dummy user prefix used in dictionary attack
DUMMY_USER_PREFIX = '__dummy__'
DUMMY_USER_PREFIX = "__dummy__"
# Reference: http://en.wikipedia.org/wiki/ISO/IEC_8859-1
DEFAULT_PAGE_ENCODING = "iso-8859-1"
# System variables
IS_WIN = subprocess.mswindows

View File

@ -25,6 +25,7 @@ from lib.core.data import conf
from lib.core.data import kb
from lib.core.data import logger
from lib.core.settings import META_CHARSET_REGEX
from lib.core.settings import DEFAULT_PAGE_ENCODING
from lib.parse.headers import headersParser
from lib.parse.html import htmlParser
@ -139,8 +140,7 @@ def decodePage(page, contentEncoding, contentType):
charset = extractRegexResult(META_CHARSET_REGEX, page, re.DOTALL | re.IGNORECASE)
charset = checkCharEncoding(charset)
if charset:
kb.pageEncoding = charset
kb.pageEncoding = charset or DEFAULT_PAGE_ENCODING
return getUnicode(page)