sqlmap/lib/utils/google.py

#!/usr/bin/env python

"""
Copyright (c) 2006-2012 sqlmap developers (http://sqlmap.org/)
See the file 'doc/COPYING' for copying permission
"""

import cookielib
import httplib
import re
import socket
import urllib
import urllib2

from lib.core.common import getUnicode
from lib.core.common import readInput
from lib.core.convert import urldecode
from lib.core.convert import urlencode
from lib.core.data import conf
from lib.core.data import kb
from lib.core.data import logger
from lib.core.exception import sqlmapConnectionException
from lib.core.exception import sqlmapGenericException
from lib.core.settings import GOOGLE_REGEX
from lib.core.settings import UNICODE_ENCODING
from lib.core.settings import URI_INJECTABLE_REGEX
from lib.request.basic import decodePage

class Google:
    """
    This class defines methods used to perform Google dorking (command
    line option '-g <google dork>'
    """

    def __init__(self, handlers):
        self._matches = []
        self._cj = cookielib.CookieJar()

        handlers.append(urllib2.HTTPCookieProcessor(self._cj))

        self.opener = urllib2.build_opener(*handlers)
        self.opener.addheaders = conf.httpHeaders

    def _parsePage(self, page):
        """
        Parse Google dork search results page to get the list of
        HTTP addresses
        """

        retVal = [urllib.unquote(match.group(1)) for match in re.finditer(GOOGLE_REGEX, page, re.I | re.S)]

        return retVal

    def getTargetUrls(self):
        """
        This method returns the list of hosts with parameters out of
        your Google dork search results
        """

        for _ in self._matches:
            _ = urldecode(_)
            if re.search(r"(.*?)\?(.+)", _):
                kb.targetUrls.add((_, None, None, None))
            elif re.search(URI_INJECTABLE_REGEX, _, re.I):
                if kb.scanOnlyGoogleGETs is None:
                    message = "do you want to scan only results containing GET parameters? [Y/n] "
                    test = readInput(message, default="Y")
                    kb.scanOnlyGoogleGETs = test.lower() != 'n'
                if not kb.scanOnlyGoogleGETs:
                    kb.targetUrls.add((_, None, None, None))

    def getCookie(self):
        """
        This method is the first to be called when initializing a
        Google dorking object through this library. It is used to
        retrieve the Google session cookie needed to perform the
        further search
        """

        try:
            conn = self.opener.open("http://www.google.com/ncr")
            _ = conn.info()
        except urllib2.HTTPError, e:
            _ = e.info()
        except urllib2.URLError:
            errMsg = "unable to connect to Google"
            raise sqlmapConnectionException, errMsg

    def search(self, googleDork):
        """
        This method performs the effective search on Google providing
        the google dork and the Google session cookie
        """

        gpage = conf.googlePage if conf.googlePage > 1 else 1
        logger.info("using Google result page #%d" % gpage)

        if not googleDork:
            return None

        url = "http://www.google.com/search?"
        url += "q=%s&" % urlencode(googleDork, convall=True)
        url += "num=100&hl=en&complete=0&safe=off&filter=0&btnG=Search"
        url += "&start=%d" % ((gpage-1) * 100)

        try:
            conn = self.opener.open(url)

            requestMsg = "HTTP request:\nGET %s" % url
            requestMsg += " %s" % httplib.HTTPConnection._http_vsn_str
            logger.log(8, requestMsg)

            page = conn.read()
            code = conn.code
            status = conn.msg
            responseHeaders = conn.info()
            page = decodePage(page, responseHeaders.get("Content-Encoding"), responseHeaders.get("Content-Type"))

            responseMsg = "HTTP response (%s - %d):\n" % (status, code)

            if conf.verbose <= 4:
                responseMsg += getUnicode(responseHeaders, UNICODE_ENCODING)
            elif conf.verbose > 4:
                responseMsg += "%s\n%s\n" % (responseHeaders, page)

            logger.log(7, responseMsg)
        except urllib2.HTTPError, e:
            try:
                page = e.read()
            except socket.timeout:
                warnMsg = "connection timed out while trying "
                warnMsg += "to get error page information (%d)" % e.code
                logger.critical(warnMsg)
                return None
        except (urllib2.URLError, socket.error, socket.timeout):
            errMsg = "unable to connect to Google"
            raise sqlmapConnectionException, errMsg

        self._matches = self._parsePage(page)

        if not self._matches and "detected unusual traffic" in page:
            warnMsg = "Google has detected 'unusual' traffic from "
            warnMsg += "this computer disabling further searches"
            raise sqlmapGenericException, warnMsg

        return self._matches
After the storm, a restore.. 2008-10-15 19:38:22 +04:00			`#!/usr/bin/env python`

			`"""`
modified homepage address 2012-07-12 21:38:03 +04:00			`Copyright (c) 2006-2012 sqlmap developers (http://sqlmap.org/)`
sorry, cosmetics 2010-10-15 03:18:29 +04:00			`See the file 'doc/COPYING' for copying permission`
After the storm, a restore.. 2008-10-15 19:38:22 +04:00			`"""`

			`import cookielib`
Proper HTTP version display 2010-10-31 18:41:28 +03:00			`import httplib`
After the storm, a restore.. 2008-10-15 19:38:22 +04:00			`import re`
added socket timeout exception handling regarding that timeout message from Fahad Al Shunaiber 2010-03-26 14:51:23 +03:00			`import socket`
Fix for Issue #59 2012-07-05 14:34:27 +04:00			`import urllib`
After the storm, a restore.. 2008-10-15 19:38:22 +04:00			`import urllib2`

more unicode refactoring 2010-06-02 16:45:40 +04:00			`from lib.core.common import getUnicode`
minor update 2011-11-06 15:18:16 +04:00			`from lib.core.common import readInput`
fix for -g 2012-02-20 14:02:19 +04:00			`from lib.core.convert import urldecode`
After the storm, a restore.. 2008-10-15 19:38:22 +04:00			`from lib.core.convert import urlencode`
			`from lib.core.data import conf`
Completed support to get the list of targets from WebScarab/Burp proxies log file and updated the documentation 2008-11-28 01:33:33 +03:00			`from lib.core.data import kb`
sqlmap 0.8-rc3: Merge from Miroslav Stampar's branch fixing a bug when verbosity > 2, another major bug with urlencoding/urldecoding of POST data and Cookies, adding --drop-set-cookie option, implementing support to automatically decode gzip and deflate HTTP responses, support for Google dork page result (--gpage) and a minor code cleanup. 2010-01-02 05:02:12 +03:00			`from lib.core.data import logger`
After the storm, a restore.. 2008-10-15 19:38:22 +04:00			`from lib.core.exception import sqlmapConnectionException`
raising critical when google detects strange traffic and also removing obsolete sqlmapSiteTooDynamic 2011-01-03 17:21:41 +03:00			`from lib.core.exception import sqlmapGenericException`
fix for -g 2012-02-20 14:02:19 +04:00			`from lib.core.settings import GOOGLE_REGEX`
refactoring 2011-01-30 14:36:03 +03:00			`from lib.core.settings import UNICODE_ENCODING`
update (now URIs like www.site.com/id82 are automatically treated as possible URI injectable) 2011-01-31 23:36:01 +03:00			`from lib.core.settings import URI_INJECTABLE_REGEX`
sqlmap 0.8-rc3: Merge from Miroslav Stampar's branch fixing a bug when verbosity > 2, another major bug with urlencoding/urldecoding of POST data and Cookies, adding --drop-set-cookie option, implementing support to automatically decode gzip and deflate HTTP responses, support for Google dork page result (--gpage) and a minor code cleanup. 2010-01-02 05:02:12 +03:00			`from lib.request.basic import decodePage`
After the storm, a restore.. 2008-10-15 19:38:22 +04:00
			`class Google:`
			`"""`
			`This class defines methods used to perform Google dorking (command`
			`line option '-g <google dork>'`
			`"""`

Minor bug fix and adjustment to deal with Keep-Alive also against Google (-g) 2010-06-11 14:08:19 +04:00			`def __init__(self, handlers):`
fix for -g 2012-02-20 14:02:19 +04:00			`self._matches = []`
added switch --load-cookies 2012-03-07 18:48:45 +04:00			`self._cj = cookielib.CookieJar()`
Minor bug fix and adjustment to deal with Keep-Alive also against Google (-g) 2010-06-11 14:08:19 +04:00
fix for -g 2012-02-20 14:02:19 +04:00			`handlers.append(urllib2.HTTPCookieProcessor(self._cj))`
Minor bug fix and adjustment to deal with Keep-Alive also against Google (-g) 2010-06-11 14:08:19 +04:00
			`self.opener = urllib2.build_opener(*handlers)`
After the storm, a restore.. 2008-10-15 19:38:22 +04:00			`self.opener.addheaders = conf.httpHeaders`

fix for -g 2012-02-20 14:02:19 +04:00			`def _parsePage(self, page):`
After the storm, a restore.. 2008-10-15 19:38:22 +04:00			`"""`
			`Parse Google dork search results page to get the list of`
			`HTTP addresses`
			`"""`

Fix for Issue #59 2012-07-05 14:34:27 +04:00			`retVal = [urllib.unquote(match.group(1)) for match in re.finditer(GOOGLE_REGEX, page, re.I \| re.S)]`
After the storm, a restore.. 2008-10-15 19:38:22 +04:00
removing of unused imports together with some general code refactoring 2012-02-22 14:40:11 +04:00			`return retVal`
After the storm, a restore.. 2008-10-15 19:38:22 +04:00
			`def getTargetUrls(self):`
			`"""`
			`This method returns the list of hosts with parameters out of`
			`your Google dork search results`
			`"""`

fix for -g 2012-02-20 14:02:19 +04:00			`for _ in self._matches:`
			`_ = urldecode(_)`
			`if re.search(r"(.*?)\?(.+)", _):`
			`kb.targetUrls.add((_, None, None, None))`
			`elif re.search(URI_INJECTABLE_REGEX, _, re.I):`
minor update 2011-11-06 15:18:16 +04:00			`if kb.scanOnlyGoogleGETs is None:`
			`message = "do you want to scan only results containing GET parameters? [Y/n] "`
			`test = readInput(message, default="Y")`
			`kb.scanOnlyGoogleGETs = test.lower() != 'n'`
			`if not kb.scanOnlyGoogleGETs:`
fix for -g 2012-02-20 14:02:19 +04:00			`kb.targetUrls.add((_, None, None, None))`
After the storm, a restore.. 2008-10-15 19:38:22 +04:00
			`def getCookie(self):`
			`"""`
			`This method is the first to be called when initializing a`
			`Google dorking object through this library. It is used to`
			`retrieve the Google session cookie needed to perform the`
			`further search`
			`"""`

			`try:`
			`conn = self.opener.open("http://www.google.com/ncr")`
Minor bug fixes to --os-shell (altought web backdoor functionality still to be reviewed). Minor common library code refactoring. Code cleanup. Set back the default User-Agent to sqlmap for comparison algorithm reasons. Updated THANKS. 2009-04-28 03:05:11 +04:00			`_ = conn.info()`
After the storm, a restore.. 2008-10-15 19:38:22 +04:00			`except urllib2.HTTPError, e:`
Minor bug fixes to --os-shell (altought web backdoor functionality still to be reviewed). Minor common library code refactoring. Code cleanup. Set back the default User-Agent to sqlmap for comparison algorithm reasons. Updated THANKS. 2009-04-28 03:05:11 +04:00			`_ = e.info()`
removing of unused imports together with some general code refactoring 2012-02-22 14:40:11 +04:00			`except urllib2.URLError:`
After the storm, a restore.. 2008-10-15 19:38:22 +04:00			`errMsg = "unable to connect to Google"`
			`raise sqlmapConnectionException, errMsg`

			`def search(self, googleDork):`
			`"""`
			`This method performs the effective search on Google providing`
			`the google dork and the Google session cookie`
			`"""`

sqlmap 0.8-rc3: Merge from Miroslav Stampar's branch fixing a bug when verbosity > 2, another major bug with urlencoding/urldecoding of POST data and Cookies, adding --drop-set-cookie option, implementing support to automatically decode gzip and deflate HTTP responses, support for Google dork page result (--gpage) and a minor code cleanup. 2010-01-02 05:02:12 +03:00			`gpage = conf.googlePage if conf.googlePage > 1 else 1`
Minor log adjustments 2010-03-05 17:59:33 +03:00			`logger.info("using Google result page #%d" % gpage)`
minor update 2010-09-27 17:41:18 +04:00
After the storm, a restore.. 2008-10-15 19:38:22 +04:00			`if not googleDork:`
			`return None`

Minor code restyling 2011-04-30 17:20:05 +04:00			`url = "http://www.google.com/search?"`
minor bug fix 2010-12-22 02:09:41 +03:00			`url += "q=%s&" % urlencode(googleDork, convall=True)`
minor update 2011-11-06 15:42:02 +04:00			`url += "num=100&hl=en&complete=0&safe=off&filter=0&btnG=Search"`
sqlmap 0.8-rc3: Merge from Miroslav Stampar's branch fixing a bug when verbosity > 2, another major bug with urlencoding/urldecoding of POST data and Cookies, adding --drop-set-cookie option, implementing support to automatically decode gzip and deflate HTTP responses, support for Google dork page result (--gpage) and a minor code cleanup. 2010-01-02 05:02:12 +03:00			`url += "&start=%d" % ((gpage-1) * 100)`
After the storm, a restore.. 2008-10-15 19:38:22 +04:00
			`try:`
			`conn = self.opener.open(url)`
sqlmap 0.8-rc3: Merge from Miroslav Stampar's branch fixing a bug when verbosity > 2, another major bug with urlencoding/urldecoding of POST data and Cookies, adding --drop-set-cookie option, implementing support to automatically decode gzip and deflate HTTP responses, support for Google dork page result (--gpage) and a minor code cleanup. 2010-01-02 05:02:12 +03:00
Proper HTTP version display 2010-10-31 18:41:28 +03:00			`requestMsg = "HTTP request:\nGET %s" % url`
			`requestMsg += " %s" % httplib.HTTPConnection._http_vsn_str`
Added one new verbose level, -v 3 now shows the full injected payload. Fixed also -d verbose output. 2010-11-08 01:34:29 +03:00			`logger.log(8, requestMsg)`
sqlmap 0.8-rc3: Merge from Miroslav Stampar's branch fixing a bug when verbosity > 2, another major bug with urlencoding/urldecoding of POST data and Cookies, adding --drop-set-cookie option, implementing support to automatically decode gzip and deflate HTTP responses, support for Google dork page result (--gpage) and a minor code cleanup. 2010-01-02 05:02:12 +03:00
Minor bug fix and adjustment to deal with Keep-Alive also against Google (-g) 2010-06-11 14:08:19 +04:00			`page = conn.read()`
			`code = conn.code`
			`status = conn.msg`
sqlmap 0.8-rc3: Merge from Miroslav Stampar's branch fixing a bug when verbosity > 2, another major bug with urlencoding/urldecoding of POST data and Cookies, adding --drop-set-cookie option, implementing support to automatically decode gzip and deflate HTTP responses, support for Google dork page result (--gpage) and a minor code cleanup. 2010-01-02 05:02:12 +03:00			`responseHeaders = conn.info()`
Minor bug fix and adjustment to deal with Keep-Alive also against Google (-g) 2010-06-11 14:08:19 +04:00			`page = decodePage(page, responseHeaders.get("Content-Encoding"), responseHeaders.get("Content-Type"))`
sqlmap 0.8-rc3: Merge from Miroslav Stampar's branch fixing a bug when verbosity > 2, another major bug with urlencoding/urldecoding of POST data and Cookies, adding --drop-set-cookie option, implementing support to automatically decode gzip and deflate HTTP responses, support for Google dork page result (--gpage) and a minor code cleanup. 2010-01-02 05:02:12 +03:00
			`responseMsg = "HTTP response (%s - %d):\n" % (status, code)`
minor update 2010-09-27 17:41:18 +04:00
sqlmap 0.8-rc3: Merge from Miroslav Stampar's branch fixing a bug when verbosity > 2, another major bug with urlencoding/urldecoding of POST data and Cookies, adding --drop-set-cookie option, implementing support to automatically decode gzip and deflate HTTP responses, support for Google dork page result (--gpage) and a minor code cleanup. 2010-01-02 05:02:12 +03:00			`if conf.verbose <= 4:`
refactoring 2011-01-30 14:36:03 +03:00			`responseMsg += getUnicode(responseHeaders, UNICODE_ENCODING)`
sqlmap 0.8-rc3: Merge from Miroslav Stampar's branch fixing a bug when verbosity > 2, another major bug with urlencoding/urldecoding of POST data and Cookies, adding --drop-set-cookie option, implementing support to automatically decode gzip and deflate HTTP responses, support for Google dork page result (--gpage) and a minor code cleanup. 2010-01-02 05:02:12 +03:00			`elif conf.verbose > 4:`
			`responseMsg += "%s\n%s\n" % (responseHeaders, page)`
minor update 2010-09-27 17:41:18 +04:00
Added one new verbose level, -v 3 now shows the full injected payload. Fixed also -d verbose output. 2010-11-08 01:34:29 +03:00			`logger.log(7, responseMsg)`
After the storm, a restore.. 2008-10-15 19:38:22 +04:00			`except urllib2.HTTPError, e:`
minor update 2010-05-16 00:44:08 +04:00			`try:`
			`page = e.read()`
			`except socket.timeout:`
Minor code restyling 2011-04-30 17:20:05 +04:00			`warnMsg = "connection timed out while trying "`
minor update 2010-05-16 00:44:08 +04:00			`warnMsg += "to get error page information (%d)" % e.code`
minor update 2010-09-27 17:41:18 +04:00			`logger.critical(warnMsg)`
minor update 2010-05-16 00:44:08 +04:00			`return None`
removing of unused imports together with some general code refactoring 2012-02-22 14:40:11 +04:00			`except (urllib2.URLError, socket.error, socket.timeout):`
After the storm, a restore.. 2008-10-15 19:38:22 +04:00			`errMsg = "unable to connect to Google"`
			`raise sqlmapConnectionException, errMsg`

fix for -g 2012-02-20 14:02:19 +04:00			`self._matches = self._parsePage(page)`
Minor code cleanup 2011-02-08 03:02:54 +03:00
fix for -g 2012-02-20 14:02:19 +04:00			`if not self._matches and "detected unusual traffic" in page:`
Minor code restyling 2011-04-30 17:20:05 +04:00			`warnMsg = "Google has detected 'unusual' traffic from "`
			`warnMsg += "this computer disabling further searches"`
raising critical when google detects strange traffic and also removing obsolete sqlmapSiteTooDynamic 2011-01-03 17:21:41 +03:00			`raise sqlmapGenericException, warnMsg`
After the storm, a restore.. 2008-10-15 19:38:22 +04:00
fix for -g 2012-02-20 14:02:19 +04:00			`return self._matches`