From 2a72fcce2b68da0ff960b27a15b96c6234731d60 Mon Sep 17 00:00:00 2001 From: Miroslav Stampar Date: Thu, 28 Jun 2012 13:55:30 +0200 Subject: [PATCH] Fix for Issue #42 --- lib/core/option.py | 6 +++--- lib/core/settings.py | 2 +- lib/utils/google.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/lib/core/option.py b/lib/core/option.py index ce6a26e9e..9cf1f4614 100644 --- a/lib/core/option.py +++ b/lib/core/option.py @@ -1154,14 +1154,14 @@ def __setHTTPUserAgent(): conf.httpHeaders.append(("User-Agent", conf.agent)) elif not conf.randomAgent: - addDefaultUserAgent = True + _ = True for header, _ in conf.httpHeaders: if header == "User-Agent": - addDefaultUserAgent = False + _ = False break - if addDefaultUserAgent: + if _: conf.httpHeaders.append(("User-Agent", __defaultHTTPUserAgent())) else: diff --git a/lib/core/settings.py b/lib/core/settings.py index 490b0de46..fc38fa69b 100644 --- a/lib/core/settings.py +++ b/lib/core/settings.py @@ -63,7 +63,7 @@ CHAR_INFERENCE_MARK = "%c" PRINTABLE_CHAR_REGEX = r"[^\x00-\x1f\x7e-\xff]" # regular expression used for extracting results from google search -GOOGLE_REGEX = r"url\?q=(http[^>]+)&sa=U&" +GOOGLE_REGEX = r"url\?\w+=(http[^>]+)&(sa=U|rct=j)" # regular expression used for extracting content from "textual" tags TEXT_TAG_REGEX = r"(?si)<(abbr|acronym|b|blockquote|br|center|cite|code|dt|em|font|h\d|i|li|p|pre|q|strong|sub|sup|td|th|title|tt|u)(?!\w).*?>(?P[^<]+)" diff --git a/lib/utils/google.py b/lib/utils/google.py index 833e74168..97ef7b17c 100644 --- a/lib/utils/google.py +++ b/lib/utils/google.py @@ -46,7 +46,7 @@ class Google: HTTP addresses """ - retVal = re.findall(GOOGLE_REGEX, page, re.I | re.S) + retVal = [match.group(1) for match in re.finditer(GOOGLE_REGEX, page, re.I | re.S)] return retVal