sqlmap/lib/utils/google.py

#!/usr/bin/env python

"""
$Id$

Copyright (c) 2006-2011 sqlmap developers (http://www.sqlmap.org/)
See the file 'doc/COPYING' for copying permission
"""

import cookielib
import httplib
import re
import socket
import urllib2

from lib.core.common import getUnicode
from lib.core.common import readInput
from lib.core.convert import htmlunescape
from lib.core.convert import urlencode
from lib.core.data import conf
from lib.core.data import kb
from lib.core.data import logger
from lib.core.exception import sqlmapConnectionException
from lib.core.exception import sqlmapGenericException
from lib.core.settings import UNICODE_ENCODING
from lib.core.settings import URI_INJECTABLE_REGEX
from lib.request.basic import decodePage

class Google:
    """
    This class defines methods used to perform Google dorking (command
    line option '-g <google dork>'
    """

    def __init__(self, handlers):
        self.__matches = []
        self.__cj = cookielib.LWPCookieJar()

        handlers.append(urllib2.HTTPCookieProcessor(self.__cj))

        self.opener = urllib2.build_opener(*handlers)
        self.opener.addheaders = conf.httpHeaders

    def __parsePage(self, page):
        """
        Parse Google dork search results page to get the list of
        HTTP addresses
        """

        matches = []

        regExpr = r'h3 class="?r"?><a href="(http[s]?://[^"]+?)"\s(class="?l"?|onmousedown=)'
        matches = re.findall(regExpr, page, re.I | re.S)

        return [match[0] for match in matches]

    def getTargetUrls(self):
        """
        This method returns the list of hosts with parameters out of
        your Google dork search results
        """

        for match in self.__matches:
            if re.search(r"(.*?)\?(.+)", match):
                kb.targetUrls.add(( htmlunescape(htmlunescape(match)), None, None, None ))
            elif re.search(URI_INJECTABLE_REGEX, match, re.I):
                if kb.scanOnlyGoogleGETs is None:
                    message = "do you want to scan only results containing GET parameters? [Y/n] "
                    test = readInput(message, default="Y")
                    kb.scanOnlyGoogleGETs = test.lower() != 'n'
                if not kb.scanOnlyGoogleGETs:
                    kb.targetUrls.add(( htmlunescape(htmlunescape("%s" % match)), None, None, None ))

    def getCookie(self):
        """
        This method is the first to be called when initializing a
        Google dorking object through this library. It is used to
        retrieve the Google session cookie needed to perform the
        further search
        """

        try:
            conn = self.opener.open("http://www.google.com/ncr")
            _ = conn.info()
        except urllib2.HTTPError, e:
            _ = e.info()
        except urllib2.URLError, _:
            errMsg = "unable to connect to Google"
            raise sqlmapConnectionException, errMsg

    def search(self, googleDork):
        """
        This method performs the effective search on Google providing
        the google dork and the Google session cookie
        """

        gpage = conf.googlePage if conf.googlePage > 1 else 1
        logger.info("using Google result page #%d" % gpage)

        if not googleDork:
            return None

        url = "http://www.google.com/search?"
        url += "q=%s&" % urlencode(googleDork, convall=True)
        url += "num=100&hl=en&complete=0&safe=off&filter=0&btnG=Search"
        url += "&start=%d" % ((gpage-1) * 100)

        try:
            conn = self.opener.open(url)

            requestMsg = "HTTP request:\nGET %s" % url
            requestMsg += " %s" % httplib.HTTPConnection._http_vsn_str
            logger.log(8, requestMsg)

            page = conn.read()
            code = conn.code
            status = conn.msg
            responseHeaders = conn.info()
            page = decodePage(page, responseHeaders.get("Content-Encoding"), responseHeaders.get("Content-Type"))

            responseMsg = "HTTP response (%s - %d):\n" % (status, code)

            if conf.verbose <= 4:
                responseMsg += getUnicode(responseHeaders, UNICODE_ENCODING)
            elif conf.verbose > 4:
                responseMsg += "%s\n%s\n" % (responseHeaders, page)

            logger.log(7, responseMsg)
        except urllib2.HTTPError, e:
            try:
                page = e.read()
            except socket.timeout:
                warnMsg = "connection timed out while trying "
                warnMsg += "to get error page information (%d)" % e.code
                logger.critical(warnMsg)
                return None
        except (urllib2.URLError, socket.error, socket.timeout), _:
            errMsg = "unable to connect to Google"
            raise sqlmapConnectionException, errMsg

        self.__matches = self.__parsePage(page)

        if not self.__matches and "detected unusual traffic" in page:
            warnMsg = "Google has detected 'unusual' traffic from "
            warnMsg += "this computer disabling further searches"
            raise sqlmapGenericException, warnMsg

        return self.__matches
After the storm, a restore.. 2008-10-15 19:38:22 +04:00			`#!/usr/bin/env python`

			`"""`
propsets.. 2008-10-15 19:56:32 +04:00			$Id$
After the storm, a restore.. 2008-10-15 19:38:22 +04:00
Changed homepage address 2011-07-08 00:10:03 +04:00			`Copyright (c) 2006-2011 sqlmap developers (http://www.sqlmap.org/)`
sorry, cosmetics 2010-10-15 03:18:29 +04:00			`See the file 'doc/COPYING' for copying permission`
After the storm, a restore.. 2008-10-15 19:38:22 +04:00			`"""`

			`import cookielib`
Proper HTTP version display 2010-10-31 18:41:28 +03:00			`import httplib`
After the storm, a restore.. 2008-10-15 19:38:22 +04:00			`import re`
added socket timeout exception handling regarding that timeout message from Fahad Al Shunaiber 2010-03-26 14:51:23 +03:00			`import socket`
After the storm, a restore.. 2008-10-15 19:38:22 +04:00			`import urllib2`

more unicode refactoring 2010-06-02 16:45:40 +04:00			`from lib.core.common import getUnicode`
minor update 2011-11-06 15:18:16 +04:00			`from lib.core.common import readInput`
fix for a google bug reported by Brandon E. 2010-10-01 12:03:39 +04:00			`from lib.core.convert import htmlunescape`
After the storm, a restore.. 2008-10-15 19:38:22 +04:00			`from lib.core.convert import urlencode`
			`from lib.core.data import conf`
Completed support to get the list of targets from WebScarab/Burp proxies log file and updated the documentation 2008-11-28 01:33:33 +03:00			`from lib.core.data import kb`
sqlmap 0.8-rc3: Merge from Miroslav Stampar's branch fixing a bug when verbosity > 2, another major bug with urlencoding/urldecoding of POST data and Cookies, adding --drop-set-cookie option, implementing support to automatically decode gzip and deflate HTTP responses, support for Google dork page result (--gpage) and a minor code cleanup. 2010-01-02 05:02:12 +03:00			`from lib.core.data import logger`
After the storm, a restore.. 2008-10-15 19:38:22 +04:00			`from lib.core.exception import sqlmapConnectionException`
raising critical when google detects strange traffic and also removing obsolete sqlmapSiteTooDynamic 2011-01-03 17:21:41 +03:00			`from lib.core.exception import sqlmapGenericException`
refactoring 2011-01-30 14:36:03 +03:00			`from lib.core.settings import UNICODE_ENCODING`
update (now URIs like www.site.com/id82 are automatically treated as possible URI injectable) 2011-01-31 23:36:01 +03:00			`from lib.core.settings import URI_INJECTABLE_REGEX`
sqlmap 0.8-rc3: Merge from Miroslav Stampar's branch fixing a bug when verbosity > 2, another major bug with urlencoding/urldecoding of POST data and Cookies, adding --drop-set-cookie option, implementing support to automatically decode gzip and deflate HTTP responses, support for Google dork page result (--gpage) and a minor code cleanup. 2010-01-02 05:02:12 +03:00			`from lib.request.basic import decodePage`
After the storm, a restore.. 2008-10-15 19:38:22 +04:00
			`class Google:`
			`"""`
			`This class defines methods used to perform Google dorking (command`
			`line option '-g <google dork>'`
			`"""`

Minor bug fix and adjustment to deal with Keep-Alive also against Google (-g) 2010-06-11 14:08:19 +04:00			`def __init__(self, handlers):`
After the storm, a restore.. 2008-10-15 19:38:22 +04:00			`self.__matches = []`
			`self.__cj = cookielib.LWPCookieJar()`
Minor bug fix and adjustment to deal with Keep-Alive also against Google (-g) 2010-06-11 14:08:19 +04:00
			`handlers.append(urllib2.HTTPCookieProcessor(self.__cj))`

			`self.opener = urllib2.build_opener(*handlers)`
After the storm, a restore.. 2008-10-15 19:38:22 +04:00			`self.opener.addheaders = conf.httpHeaders`

			`def __parsePage(self, page):`
			`"""`
			`Parse Google dork search results page to get the list of`
			`HTTP addresses`
			`"""`

			`matches = []`

fix for google searches 2011-11-06 12:55:09 +04:00			`regExpr = r'h3 class="?r"?><a href="(http[s]?://[^"]+?)"\s(class="?l"?\|onmousedown=)'`
			`matches = re.findall(regExpr, page, re.I \| re.S)`
After the storm, a restore.. 2008-10-15 19:38:22 +04:00
fix for google searches 2011-11-06 12:55:09 +04:00			`return [match[0] for match in matches]`
After the storm, a restore.. 2008-10-15 19:38:22 +04:00
			`def getTargetUrls(self):`
			`"""`
			`This method returns the list of hosts with parameters out of`
			`your Google dork search results`
			`"""`

			`for match in self.__matches:`
adding Beautifulsoup (BSD) into extras; adding --crawl to options 2011-06-20 15:32:30 +04:00			`if re.search(r"(.*?)\?(.+)", match):`
noticed that google search page sometimes contain double html escaped links - double htmlunescape solves the problem, while dealing no harm to single html escaped links 2011-01-03 17:39:23 +03:00			`kb.targetUrls.add(( htmlunescape(htmlunescape(match)), None, None, None ))`
update (now URIs like www.site.com/id82 are automatically treated as possible URI injectable) 2011-01-31 23:36:01 +03:00			`elif re.search(URI_INJECTABLE_REGEX, match, re.I):`
minor update 2011-11-06 15:18:16 +04:00			`if kb.scanOnlyGoogleGETs is None:`
			`message = "do you want to scan only results containing GET parameters? [Y/n] "`
			`test = readInput(message, default="Y")`
			`kb.scanOnlyGoogleGETs = test.lower() != 'n'`
			`if not kb.scanOnlyGoogleGETs:`
			`kb.targetUrls.add(( htmlunescape(htmlunescape("%s" % match)), None, None, None ))`
After the storm, a restore.. 2008-10-15 19:38:22 +04:00
			`def getCookie(self):`
			`"""`
			`This method is the first to be called when initializing a`
			`Google dorking object through this library. It is used to`
			`retrieve the Google session cookie needed to perform the`
			`further search`
			`"""`

			`try:`
			`conn = self.opener.open("http://www.google.com/ncr")`
Minor bug fixes to --os-shell (altought web backdoor functionality still to be reviewed). Minor common library code refactoring. Code cleanup. Set back the default User-Agent to sqlmap for comparison algorithm reasons. Updated THANKS. 2009-04-28 03:05:11 +04:00			`_ = conn.info()`
After the storm, a restore.. 2008-10-15 19:38:22 +04:00			`except urllib2.HTTPError, e:`
Minor bug fixes to --os-shell (altought web backdoor functionality still to be reviewed). Minor common library code refactoring. Code cleanup. Set back the default User-Agent to sqlmap for comparison algorithm reasons. Updated THANKS. 2009-04-28 03:05:11 +04:00			`_ = e.info()`
Minor bug fix and adjustment to deal with Keep-Alive also against Google (-g) 2010-06-11 14:08:19 +04:00			`except urllib2.URLError, _:`
After the storm, a restore.. 2008-10-15 19:38:22 +04:00			`errMsg = "unable to connect to Google"`
			`raise sqlmapConnectionException, errMsg`

			`def search(self, googleDork):`
			`"""`
			`This method performs the effective search on Google providing`
			`the google dork and the Google session cookie`
			`"""`

sqlmap 0.8-rc3: Merge from Miroslav Stampar's branch fixing a bug when verbosity > 2, another major bug with urlencoding/urldecoding of POST data and Cookies, adding --drop-set-cookie option, implementing support to automatically decode gzip and deflate HTTP responses, support for Google dork page result (--gpage) and a minor code cleanup. 2010-01-02 05:02:12 +03:00			`gpage = conf.googlePage if conf.googlePage > 1 else 1`
Minor log adjustments 2010-03-05 17:59:33 +03:00			`logger.info("using Google result page #%d" % gpage)`
minor update 2010-09-27 17:41:18 +04:00
After the storm, a restore.. 2008-10-15 19:38:22 +04:00			`if not googleDork:`
			`return None`

Minor code restyling 2011-04-30 17:20:05 +04:00			`url = "http://www.google.com/search?"`
minor bug fix 2010-12-22 02:09:41 +03:00			`url += "q=%s&" % urlencode(googleDork, convall=True)`
minor update 2011-11-06 15:42:02 +04:00			`url += "num=100&hl=en&complete=0&safe=off&filter=0&btnG=Search"`
sqlmap 0.8-rc3: Merge from Miroslav Stampar's branch fixing a bug when verbosity > 2, another major bug with urlencoding/urldecoding of POST data and Cookies, adding --drop-set-cookie option, implementing support to automatically decode gzip and deflate HTTP responses, support for Google dork page result (--gpage) and a minor code cleanup. 2010-01-02 05:02:12 +03:00			`url += "&start=%d" % ((gpage-1) * 100)`
After the storm, a restore.. 2008-10-15 19:38:22 +04:00
			`try:`
			`conn = self.opener.open(url)`
sqlmap 0.8-rc3: Merge from Miroslav Stampar's branch fixing a bug when verbosity > 2, another major bug with urlencoding/urldecoding of POST data and Cookies, adding --drop-set-cookie option, implementing support to automatically decode gzip and deflate HTTP responses, support for Google dork page result (--gpage) and a minor code cleanup. 2010-01-02 05:02:12 +03:00
Proper HTTP version display 2010-10-31 18:41:28 +03:00			`requestMsg = "HTTP request:\nGET %s" % url`
			`requestMsg += " %s" % httplib.HTTPConnection._http_vsn_str`
Added one new verbose level, -v 3 now shows the full injected payload. Fixed also -d verbose output. 2010-11-08 01:34:29 +03:00			`logger.log(8, requestMsg)`
sqlmap 0.8-rc3: Merge from Miroslav Stampar's branch fixing a bug when verbosity > 2, another major bug with urlencoding/urldecoding of POST data and Cookies, adding --drop-set-cookie option, implementing support to automatically decode gzip and deflate HTTP responses, support for Google dork page result (--gpage) and a minor code cleanup. 2010-01-02 05:02:12 +03:00
Minor bug fix and adjustment to deal with Keep-Alive also against Google (-g) 2010-06-11 14:08:19 +04:00			`page = conn.read()`
			`code = conn.code`
			`status = conn.msg`
sqlmap 0.8-rc3: Merge from Miroslav Stampar's branch fixing a bug when verbosity > 2, another major bug with urlencoding/urldecoding of POST data and Cookies, adding --drop-set-cookie option, implementing support to automatically decode gzip and deflate HTTP responses, support for Google dork page result (--gpage) and a minor code cleanup. 2010-01-02 05:02:12 +03:00			`responseHeaders = conn.info()`
Minor bug fix and adjustment to deal with Keep-Alive also against Google (-g) 2010-06-11 14:08:19 +04:00			`page = decodePage(page, responseHeaders.get("Content-Encoding"), responseHeaders.get("Content-Type"))`
sqlmap 0.8-rc3: Merge from Miroslav Stampar's branch fixing a bug when verbosity > 2, another major bug with urlencoding/urldecoding of POST data and Cookies, adding --drop-set-cookie option, implementing support to automatically decode gzip and deflate HTTP responses, support for Google dork page result (--gpage) and a minor code cleanup. 2010-01-02 05:02:12 +03:00
			`responseMsg = "HTTP response (%s - %d):\n" % (status, code)`
minor update 2010-09-27 17:41:18 +04:00
sqlmap 0.8-rc3: Merge from Miroslav Stampar's branch fixing a bug when verbosity > 2, another major bug with urlencoding/urldecoding of POST data and Cookies, adding --drop-set-cookie option, implementing support to automatically decode gzip and deflate HTTP responses, support for Google dork page result (--gpage) and a minor code cleanup. 2010-01-02 05:02:12 +03:00			`if conf.verbose <= 4:`
refactoring 2011-01-30 14:36:03 +03:00			`responseMsg += getUnicode(responseHeaders, UNICODE_ENCODING)`
sqlmap 0.8-rc3: Merge from Miroslav Stampar's branch fixing a bug when verbosity > 2, another major bug with urlencoding/urldecoding of POST data and Cookies, adding --drop-set-cookie option, implementing support to automatically decode gzip and deflate HTTP responses, support for Google dork page result (--gpage) and a minor code cleanup. 2010-01-02 05:02:12 +03:00			`elif conf.verbose > 4:`
			`responseMsg += "%s\n%s\n" % (responseHeaders, page)`
minor update 2010-09-27 17:41:18 +04:00
Added one new verbose level, -v 3 now shows the full injected payload. Fixed also -d verbose output. 2010-11-08 01:34:29 +03:00			`logger.log(7, responseMsg)`
After the storm, a restore.. 2008-10-15 19:38:22 +04:00			`except urllib2.HTTPError, e:`
minor update 2010-05-16 00:44:08 +04:00			`try:`
			`page = e.read()`
			`except socket.timeout:`
Minor code restyling 2011-04-30 17:20:05 +04:00			`warnMsg = "connection timed out while trying "`
minor update 2010-05-16 00:44:08 +04:00			`warnMsg += "to get error page information (%d)" % e.code`
minor update 2010-09-27 17:41:18 +04:00			`logger.critical(warnMsg)`
minor update 2010-05-16 00:44:08 +04:00			`return None`
Minor bug fix and adjustment to deal with Keep-Alive also against Google (-g) 2010-06-11 14:08:19 +04:00			`except (urllib2.URLError, socket.error, socket.timeout), _:`
After the storm, a restore.. 2008-10-15 19:38:22 +04:00			`errMsg = "unable to connect to Google"`
			`raise sqlmapConnectionException, errMsg`

			`self.__matches = self.__parsePage(page)`
Minor code cleanup 2011-02-08 03:02:54 +03:00
raising critical when google detects strange traffic and also removing obsolete sqlmapSiteTooDynamic 2011-01-03 17:21:41 +03:00			`if not self.__matches and "detected unusual traffic" in page:`
Minor code restyling 2011-04-30 17:20:05 +04:00			`warnMsg = "Google has detected 'unusual' traffic from "`
			`warnMsg += "this computer disabling further searches"`
raising critical when google detects strange traffic and also removing obsolete sqlmapSiteTooDynamic 2011-01-03 17:21:41 +03:00			`raise sqlmapGenericException, warnMsg`
After the storm, a restore.. 2008-10-15 19:38:22 +04:00
			`return self.__matches`