diff --git a/lib/core/settings.py b/lib/core/settings.py index e81e651c3..fac3e9b24 100644 --- a/lib/core/settings.py +++ b/lib/core/settings.py @@ -222,10 +222,10 @@ ERROR_PARSING_REGEXES = ( ) # Regular expression used for parsing charset info from meta html headers -META_CHARSET_REGEX = r']+charset=(?P[^">]+)' +META_CHARSET_REGEX = r'(?si).*]+charset=(?P[^">]+).*' # Regular expression used for parsing refresh info from meta html headers -META_REFRESH_REGEX = r']+content="?[^">]+url=(?P[^">]+)' +META_REFRESH_REGEX = r'(?si).*]+content="?[^">]+url=(?P[^">]+).*' # Regular expression used for parsing empty fields in tested form data EMPTY_FORM_FIELDS_REGEX = r'(&|\A)(?P[^=]+=(&|\Z))' diff --git a/lib/request/basic.py b/lib/request/basic.py index d64609622..0238e920c 100644 --- a/lib/request/basic.py +++ b/lib/request/basic.py @@ -220,7 +220,7 @@ def decodePage(page, contentEncoding, contentType): if contentType and (contentType.find("charset=") != -1): httpCharset = checkCharEncoding(contentType.split("charset=")[-1]) - metaCharset = checkCharEncoding(extractRegexResult(META_CHARSET_REGEX, page, re.DOTALL | re.IGNORECASE)) + metaCharset = checkCharEncoding(extractRegexResult(META_CHARSET_REGEX, page)) if ((httpCharset or metaCharset) and not all([httpCharset, metaCharset]))\ or (httpCharset == metaCharset and all([httpCharset, metaCharset])): diff --git a/lib/request/connect.py b/lib/request/connect.py index 87814567d..dd0fa0b26 100644 --- a/lib/request/connect.py +++ b/lib/request/connect.py @@ -372,8 +372,8 @@ class Connect: page = decodePage(page, responseHeaders.get(HTTPHEADER.CONTENT_ENCODING), responseHeaders.get(HTTPHEADER.CONTENT_TYPE)) status = getUnicode(conn.msg) - if extractRegexResult(META_REFRESH_REGEX, page, re.DOTALL | re.IGNORECASE) and not refreshing: - url = extractRegexResult(META_REFRESH_REGEX, page, re.DOTALL | re.IGNORECASE) + if extractRegexResult(META_REFRESH_REGEX, page) and not refreshing: + url = extractRegexResult(META_REFRESH_REGEX, page) debugMsg = "got HTML meta refresh header" logger.debug(debugMsg)