From bc4dd7c0ddd87788e20b3f7d25a91eba1baa570d Mon Sep 17 00:00:00 2001 From: Miroslav Stampar Date: Mon, 20 Feb 2012 10:02:19 +0000 Subject: [PATCH] fix for -g --- lib/core/settings.py | 3 +++ lib/utils/google.py | 34 +++++++++++++++++----------------- 2 files changed, 20 insertions(+), 17 deletions(-) diff --git a/lib/core/settings.py b/lib/core/settings.py index e786e0877..3f9d2918e 100644 --- a/lib/core/settings.py +++ b/lib/core/settings.py @@ -65,6 +65,9 @@ PAYLOAD_DELIMITER = "\x00" CHAR_INFERENCE_MARK = "%c" PRINTABLE_CHAR_REGEX = r'[^\x00-\x1f\x7e-\xff]' +# regular expression used for extracting results from google search +GOOGLE_REGEX = r'url\?q=(http[^>]+)&sa=U&' + # dumping characters used in GROUP_CONCAT MySQL technique CONCAT_ROW_DELIMITER = ',' CONCAT_VALUE_DELIMITER = '|' diff --git a/lib/utils/google.py b/lib/utils/google.py index c3df9e4de..2a996e1d4 100644 --- a/lib/utils/google.py +++ b/lib/utils/google.py @@ -16,12 +16,14 @@ import urllib2 from lib.core.common import getUnicode from lib.core.common import readInput from lib.core.convert import htmlunescape +from lib.core.convert import urldecode from lib.core.convert import urlencode from lib.core.data import conf from lib.core.data import kb from lib.core.data import logger from lib.core.exception import sqlmapConnectionException from lib.core.exception import sqlmapGenericException +from lib.core.settings import GOOGLE_REGEX from lib.core.settings import UNICODE_ENCODING from lib.core.settings import URI_INJECTABLE_REGEX from lib.request.basic import decodePage @@ -33,26 +35,23 @@ class Google: """ def __init__(self, handlers): - self.__matches = [] - self.__cj = cookielib.LWPCookieJar() + self._matches = [] + self._cj = cookielib.LWPCookieJar() - handlers.append(urllib2.HTTPCookieProcessor(self.__cj)) + handlers.append(urllib2.HTTPCookieProcessor(self._cj)) self.opener = urllib2.build_opener(*handlers) self.opener.addheaders = conf.httpHeaders - def __parsePage(self, page): + def _parsePage(self, page): """ Parse Google dork search results page to get the list of HTTP addresses """ - matches = [] + retVal = re.findall(GOOGLE_REGEX, page, re.I | re.S) - regExpr = r'h3 class="?r"?>