sqlmap/lib/utils/google.py

#!/usr/bin/env python

"""
$Id$

Copyright (c) 2006-2010 sqlmap developers (http://sqlmap.sourceforge.net/)
See the file doc/COPYING for copying permission.
"""

import cookielib
import re
import socket
import urllib2

from lib.core.common import getUnicode
from lib.core.convert import htmlunescape
from lib.core.convert import urlencode
from lib.core.data import conf
from lib.core.data import kb
from lib.core.data import logger
from lib.core.exception import sqlmapConnectionException
from lib.request.basic import decodePage

class Google:
    """
    This class defines methods used to perform Google dorking (command
    line option '-g <google dork>'
    """

    def __init__(self, handlers):
        self.__matches = []
        self.__cj = cookielib.LWPCookieJar()

        handlers.append(urllib2.HTTPCookieProcessor(self.__cj))

        self.opener = urllib2.build_opener(*handlers)
        self.opener.addheaders = conf.httpHeaders

    def __parsePage(self, page):
        """
        Parse Google dork search results page to get the list of
        HTTP addresses
        """

        matches = []

        regExpr = "li class=\042?g\042?\076.+?a href=\042(http[s]*://.+?)\042\sclass=\042?l\042?"
        matches = re.findall(regExpr, page, re.I | re.M)

        return matches

    def getTargetUrls(self):
        """
        This method returns the list of hosts with parameters out of
        your Google dork search results
        """

        for match in self.__matches:
            if re.search("(.*?)\?(.+)", match, re.I):
                kb.targetUrls.add(( htmlunescape(match), None, None, None ))

    def getCookie(self):
        """
        This method is the first to be called when initializing a
        Google dorking object through this library. It is used to
        retrieve the Google session cookie needed to perform the
        further search
        """

        try:
            conn = self.opener.open("http://www.google.com/ncr")
            _ = conn.info()
        except urllib2.HTTPError, e:
            _ = e.info()
        except urllib2.URLError, _:
            errMsg = "unable to connect to Google"
            raise sqlmapConnectionException, errMsg

    def search(self, googleDork):
        """
        This method performs the effective search on Google providing
        the google dork and the Google session cookie
        """

        gpage = conf.googlePage if conf.googlePage > 1 else 1
        logger.info("using Google result page #%d" % gpage)

        if not googleDork:
            return None

        url  = "http://www.google.com/search?"
        url += "q=%s&" % urlencode(googleDork)
        url += "num=100&hl=en&safe=off&filter=0&btnG=Search"
        url += "&start=%d" % ((gpage-1) * 100)

        try:
            conn = self.opener.open(url)

            requestMsg = "HTTP request:\nGET %s HTTP/1.1\n" % url
            requestMsg += "\n".join(["%s: %s" % (header, value) for header, value in conn.headers.items()])
            requestMsg += "\n"
            logger.log(9, requestMsg)

            page = conn.read()
            code = conn.code
            status = conn.msg
            responseHeaders = conn.info()
            page = decodePage(page, responseHeaders.get("Content-Encoding"), responseHeaders.get("Content-Type"))

            responseMsg = "HTTP response (%s - %d):\n" % (status, code)

            if conf.verbose <= 4:
                responseMsg += getUnicode(responseHeaders)
            elif conf.verbose > 4:
                responseMsg += "%s\n%s\n" % (responseHeaders, page)

            logger.log(8, responseMsg)
        except urllib2.HTTPError, e:
            try:
                page = e.read()
            except socket.timeout:
                warnMsg  = "connection timed out while trying "
                warnMsg += "to get error page information (%d)" % e.code
                logger.critical(warnMsg)
                return None
        except (urllib2.URLError, socket.error, socket.timeout), _:
            errMsg = "unable to connect to Google"
            raise sqlmapConnectionException, errMsg

        self.__matches = self.__parsePage(page)

        return self.__matches
After the storm, a restore.. 2008-10-15 19:38:22 +04:00			`#!/usr/bin/env python`

			`"""`
propsets.. 2008-10-15 19:56:32 +04:00			$Id$
After the storm, a restore.. 2008-10-15 19:38:22 +04:00
large commit with copyright header modifications 2010-10-14 18:41:14 +04:00			`Copyright (c) 2006-2010 sqlmap developers (http://sqlmap.sourceforge.net/)`
			`See the file doc/COPYING for copying permission.`
After the storm, a restore.. 2008-10-15 19:38:22 +04:00			`"""`

			`import cookielib`
			`import re`
added socket timeout exception handling regarding that timeout message from Fahad Al Shunaiber 2010-03-26 14:51:23 +03:00			`import socket`
After the storm, a restore.. 2008-10-15 19:38:22 +04:00			`import urllib2`

more unicode refactoring 2010-06-02 16:45:40 +04:00			`from lib.core.common import getUnicode`
fix for a google bug reported by Brandon E. 2010-10-01 12:03:39 +04:00			`from lib.core.convert import htmlunescape`
After the storm, a restore.. 2008-10-15 19:38:22 +04:00			`from lib.core.convert import urlencode`
			`from lib.core.data import conf`
Completed support to get the list of targets from WebScarab/Burp proxies log file and updated the documentation 2008-11-28 01:33:33 +03:00			`from lib.core.data import kb`
sqlmap 0.8-rc3: Merge from Miroslav Stampar's branch fixing a bug when verbosity > 2, another major bug with urlencoding/urldecoding of POST data and Cookies, adding --drop-set-cookie option, implementing support to automatically decode gzip and deflate HTTP responses, support for Google dork page result (--gpage) and a minor code cleanup. 2010-01-02 05:02:12 +03:00			`from lib.core.data import logger`
After the storm, a restore.. 2008-10-15 19:38:22 +04:00			`from lib.core.exception import sqlmapConnectionException`
sqlmap 0.8-rc3: Merge from Miroslav Stampar's branch fixing a bug when verbosity > 2, another major bug with urlencoding/urldecoding of POST data and Cookies, adding --drop-set-cookie option, implementing support to automatically decode gzip and deflate HTTP responses, support for Google dork page result (--gpage) and a minor code cleanup. 2010-01-02 05:02:12 +03:00			`from lib.request.basic import decodePage`
After the storm, a restore.. 2008-10-15 19:38:22 +04:00
			`class Google:`
			`"""`
			`This class defines methods used to perform Google dorking (command`
			`line option '-g <google dork>'`
			`"""`

Minor bug fix and adjustment to deal with Keep-Alive also against Google (-g) 2010-06-11 14:08:19 +04:00			`def __init__(self, handlers):`
After the storm, a restore.. 2008-10-15 19:38:22 +04:00			`self.__matches = []`
			`self.__cj = cookielib.LWPCookieJar()`
Minor bug fix and adjustment to deal with Keep-Alive also against Google (-g) 2010-06-11 14:08:19 +04:00
			`handlers.append(urllib2.HTTPCookieProcessor(self.__cj))`

			`self.opener = urllib2.build_opener(*handlers)`
After the storm, a restore.. 2008-10-15 19:38:22 +04:00			`self.opener.addheaders = conf.httpHeaders`

			`def __parsePage(self, page):`
			`"""`
			`Parse Google dork search results page to get the list of`
			`HTTP addresses`
			`"""`

			`matches = []`

better regex used avoiding garbage google images 2010-05-16 02:02:28 +04:00			`regExpr = "li class=\042?g\042?\076.+?a href=\042(http[s]*://.+?)\042\sclass=\042?l\042?"`
After the storm, a restore.. 2008-10-15 19:38:22 +04:00			`matches = re.findall(regExpr, page, re.I \| re.M)`

			`return matches`

			`def getTargetUrls(self):`
			`"""`
			`This method returns the list of hosts with parameters out of`
			`your Google dork search results`
			`"""`

			`for match in self.__matches:`
			`if re.search("(.*?)\?(.+)", match, re.I):`
fix for a google bug reported by Brandon E. 2010-10-01 12:03:39 +04:00			`kb.targetUrls.add(( htmlunescape(match), None, None, None ))`
After the storm, a restore.. 2008-10-15 19:38:22 +04:00
			`def getCookie(self):`
			`"""`
			`This method is the first to be called when initializing a`
			`Google dorking object through this library. It is used to`
			`retrieve the Google session cookie needed to perform the`
			`further search`
			`"""`

			`try:`
			`conn = self.opener.open("http://www.google.com/ncr")`
Minor bug fixes to --os-shell (altought web backdoor functionality still to be reviewed). Minor common library code refactoring. Code cleanup. Set back the default User-Agent to sqlmap for comparison algorithm reasons. Updated THANKS. 2009-04-28 03:05:11 +04:00			`_ = conn.info()`
After the storm, a restore.. 2008-10-15 19:38:22 +04:00			`except urllib2.HTTPError, e:`
Minor bug fixes to --os-shell (altought web backdoor functionality still to be reviewed). Minor common library code refactoring. Code cleanup. Set back the default User-Agent to sqlmap for comparison algorithm reasons. Updated THANKS. 2009-04-28 03:05:11 +04:00			`_ = e.info()`
Minor bug fix and adjustment to deal with Keep-Alive also against Google (-g) 2010-06-11 14:08:19 +04:00			`except urllib2.URLError, _:`
After the storm, a restore.. 2008-10-15 19:38:22 +04:00			`errMsg = "unable to connect to Google"`
			`raise sqlmapConnectionException, errMsg`

			`def search(self, googleDork):`
			`"""`
			`This method performs the effective search on Google providing`
			`the google dork and the Google session cookie`
			`"""`

sqlmap 0.8-rc3: Merge from Miroslav Stampar's branch fixing a bug when verbosity > 2, another major bug with urlencoding/urldecoding of POST data and Cookies, adding --drop-set-cookie option, implementing support to automatically decode gzip and deflate HTTP responses, support for Google dork page result (--gpage) and a minor code cleanup. 2010-01-02 05:02:12 +03:00			`gpage = conf.googlePage if conf.googlePage > 1 else 1`
Minor log adjustments 2010-03-05 17:59:33 +03:00			`logger.info("using Google result page #%d" % gpage)`
minor update 2010-09-27 17:41:18 +04:00
After the storm, a restore.. 2008-10-15 19:38:22 +04:00			`if not googleDork:`
			`return None`

			`url = "http://www.google.com/search?"`
			`url += "q=%s&" % urlencode(googleDork)`
			`url += "num=100&hl=en&safe=off&filter=0&btnG=Search"`
sqlmap 0.8-rc3: Merge from Miroslav Stampar's branch fixing a bug when verbosity > 2, another major bug with urlencoding/urldecoding of POST data and Cookies, adding --drop-set-cookie option, implementing support to automatically decode gzip and deflate HTTP responses, support for Google dork page result (--gpage) and a minor code cleanup. 2010-01-02 05:02:12 +03:00			`url += "&start=%d" % ((gpage-1) * 100)`
After the storm, a restore.. 2008-10-15 19:38:22 +04:00
			`try:`
			`conn = self.opener.open(url)`
sqlmap 0.8-rc3: Merge from Miroslav Stampar's branch fixing a bug when verbosity > 2, another major bug with urlencoding/urldecoding of POST data and Cookies, adding --drop-set-cookie option, implementing support to automatically decode gzip and deflate HTTP responses, support for Google dork page result (--gpage) and a minor code cleanup. 2010-01-02 05:02:12 +03:00
Minor bug fix and adjustment to deal with Keep-Alive also against Google (-g) 2010-06-11 14:08:19 +04:00			`requestMsg = "HTTP request:\nGET %s HTTP/1.1\n" % url`
			`requestMsg += "\n".join(["%s: %s" % (header, value) for header, value in conn.headers.items()])`
sqlmap 0.8-rc3: Merge from Miroslav Stampar's branch fixing a bug when verbosity > 2, another major bug with urlencoding/urldecoding of POST data and Cookies, adding --drop-set-cookie option, implementing support to automatically decode gzip and deflate HTTP responses, support for Google dork page result (--gpage) and a minor code cleanup. 2010-01-02 05:02:12 +03:00			`requestMsg += "\n"`
			`logger.log(9, requestMsg)`

Minor bug fix and adjustment to deal with Keep-Alive also against Google (-g) 2010-06-11 14:08:19 +04:00			`page = conn.read()`
			`code = conn.code`
			`status = conn.msg`
sqlmap 0.8-rc3: Merge from Miroslav Stampar's branch fixing a bug when verbosity > 2, another major bug with urlencoding/urldecoding of POST data and Cookies, adding --drop-set-cookie option, implementing support to automatically decode gzip and deflate HTTP responses, support for Google dork page result (--gpage) and a minor code cleanup. 2010-01-02 05:02:12 +03:00			`responseHeaders = conn.info()`
Minor bug fix and adjustment to deal with Keep-Alive also against Google (-g) 2010-06-11 14:08:19 +04:00			`page = decodePage(page, responseHeaders.get("Content-Encoding"), responseHeaders.get("Content-Type"))`
sqlmap 0.8-rc3: Merge from Miroslav Stampar's branch fixing a bug when verbosity > 2, another major bug with urlencoding/urldecoding of POST data and Cookies, adding --drop-set-cookie option, implementing support to automatically decode gzip and deflate HTTP responses, support for Google dork page result (--gpage) and a minor code cleanup. 2010-01-02 05:02:12 +03:00
			`responseMsg = "HTTP response (%s - %d):\n" % (status, code)`
minor update 2010-09-27 17:41:18 +04:00
sqlmap 0.8-rc3: Merge from Miroslav Stampar's branch fixing a bug when verbosity > 2, another major bug with urlencoding/urldecoding of POST data and Cookies, adding --drop-set-cookie option, implementing support to automatically decode gzip and deflate HTTP responses, support for Google dork page result (--gpage) and a minor code cleanup. 2010-01-02 05:02:12 +03:00			`if conf.verbose <= 4:`
more unicode refactoring 2010-06-02 16:45:40 +04:00			`responseMsg += getUnicode(responseHeaders)`
sqlmap 0.8-rc3: Merge from Miroslav Stampar's branch fixing a bug when verbosity > 2, another major bug with urlencoding/urldecoding of POST data and Cookies, adding --drop-set-cookie option, implementing support to automatically decode gzip and deflate HTTP responses, support for Google dork page result (--gpage) and a minor code cleanup. 2010-01-02 05:02:12 +03:00			`elif conf.verbose > 4:`
			`responseMsg += "%s\n%s\n" % (responseHeaders, page)`
minor update 2010-09-27 17:41:18 +04:00
sqlmap 0.8-rc3: Merge from Miroslav Stampar's branch fixing a bug when verbosity > 2, another major bug with urlencoding/urldecoding of POST data and Cookies, adding --drop-set-cookie option, implementing support to automatically decode gzip and deflate HTTP responses, support for Google dork page result (--gpage) and a minor code cleanup. 2010-01-02 05:02:12 +03:00			`logger.log(8, responseMsg)`
After the storm, a restore.. 2008-10-15 19:38:22 +04:00			`except urllib2.HTTPError, e:`
minor update 2010-05-16 00:44:08 +04:00			`try:`
			`page = e.read()`
			`except socket.timeout:`
			`warnMsg = "connection timed out while trying "`
			`warnMsg += "to get error page information (%d)" % e.code`
minor update 2010-09-27 17:41:18 +04:00			`logger.critical(warnMsg)`
minor update 2010-05-16 00:44:08 +04:00			`return None`
Minor bug fix and adjustment to deal with Keep-Alive also against Google (-g) 2010-06-11 14:08:19 +04:00			`except (urllib2.URLError, socket.error, socket.timeout), _:`
After the storm, a restore.. 2008-10-15 19:38:22 +04:00			`errMsg = "unable to connect to Google"`
			`raise sqlmapConnectionException, errMsg`

			`self.__matches = self.__parsePage(page)`

			`return self.__matches`