diff --git a/lib/core/settings.py b/lib/core/settings.py index 4ee65e0ab..3f02fc6b9 100644 --- a/lib/core/settings.py +++ b/lib/core/settings.py @@ -86,7 +86,10 @@ UNKNOWN_DBMS_VERSION = "Unknown" DYNAMICITY_MARK_LENGTH = 32 # dummy user prefix used in dictionary attack -DUMMY_USER_PREFIX = '__dummy__' +DUMMY_USER_PREFIX = "__dummy__" + +# Reference: http://en.wikipedia.org/wiki/ISO/IEC_8859-1 +DEFAULT_PAGE_ENCODING = "iso-8859-1" # System variables IS_WIN = subprocess.mswindows diff --git a/lib/request/basic.py b/lib/request/basic.py index dffe93580..9823b1e78 100644 --- a/lib/request/basic.py +++ b/lib/request/basic.py @@ -25,6 +25,7 @@ from lib.core.data import conf from lib.core.data import kb from lib.core.data import logger from lib.core.settings import META_CHARSET_REGEX +from lib.core.settings import DEFAULT_PAGE_ENCODING from lib.parse.headers import headersParser from lib.parse.html import htmlParser @@ -139,8 +140,7 @@ def decodePage(page, contentEncoding, contentType): charset = extractRegexResult(META_CHARSET_REGEX, page, re.DOTALL | re.IGNORECASE) charset = checkCharEncoding(charset) - if charset: - kb.pageEncoding = charset + kb.pageEncoding = charset or DEFAULT_PAGE_ENCODING return getUnicode(page)