minor update regarding default page encoding

This commit is contained in:
Miroslav Stampar 2011-01-17 10:23:37 +00:00
parent 5c857779c1
commit 34d13be0d3
2 changed files with 6 additions and 3 deletions

View File

@ -86,7 +86,10 @@ UNKNOWN_DBMS_VERSION = "Unknown"
DYNAMICITY_MARK_LENGTH = 32 DYNAMICITY_MARK_LENGTH = 32
# dummy user prefix used in dictionary attack # dummy user prefix used in dictionary attack
DUMMY_USER_PREFIX = '__dummy__' DUMMY_USER_PREFIX = "__dummy__"
# Reference: http://en.wikipedia.org/wiki/ISO/IEC_8859-1
DEFAULT_PAGE_ENCODING = "iso-8859-1"
# System variables # System variables
IS_WIN = subprocess.mswindows IS_WIN = subprocess.mswindows

View File

@ -25,6 +25,7 @@ from lib.core.data import conf
from lib.core.data import kb from lib.core.data import kb
from lib.core.data import logger from lib.core.data import logger
from lib.core.settings import META_CHARSET_REGEX from lib.core.settings import META_CHARSET_REGEX
from lib.core.settings import DEFAULT_PAGE_ENCODING
from lib.parse.headers import headersParser from lib.parse.headers import headersParser
from lib.parse.html import htmlParser from lib.parse.html import htmlParser
@ -139,8 +140,7 @@ def decodePage(page, contentEncoding, contentType):
charset = extractRegexResult(META_CHARSET_REGEX, page, re.DOTALL | re.IGNORECASE) charset = extractRegexResult(META_CHARSET_REGEX, page, re.DOTALL | re.IGNORECASE)
charset = checkCharEncoding(charset) charset = checkCharEncoding(charset)
if charset: kb.pageEncoding = charset or DEFAULT_PAGE_ENCODING
kb.pageEncoding = charset
return getUnicode(page) return getUnicode(page)