sqlmap/lib/utils/google.py

#!/usr/bin/env python

"""
$Id$

This file is part of the sqlmap project, http://sqlmap.sourceforge.net.

Copyright (c) 2007-2010 Bernardo Damele A. G. <bernardo.damele@gmail.com>
Copyright (c) 2006 Daniele Bellucci <daniele.bellucci@gmail.com>

sqlmap is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation version 2 of the License.

sqlmap is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
details.

You should have received a copy of the GNU General Public License along
with sqlmap; if not, write to the Free Software Foundation, Inc., 51
Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
"""

import cookielib
import re
import socket
import urllib2

from lib.core.common import getUnicode
from lib.core.convert import urlencode
from lib.core.data import conf
from lib.core.data import kb
from lib.core.data import logger
from lib.core.exception import sqlmapConnectionException
from lib.request.basic import decodePage

class Google:
    """
    This class defines methods used to perform Google dorking (command
    line option '-g <google dork>'
    """

    def __init__(self, handlers):
        self.__matches = []
        self.__cj = cookielib.LWPCookieJar()

        handlers.append(urllib2.HTTPCookieProcessor(self.__cj))

        self.opener = urllib2.build_opener(*handlers)
        self.opener.addheaders = conf.httpHeaders

    def __parsePage(self, page):
        """
        Parse Google dork search results page to get the list of
        HTTP addresses
        """

        matches = []

        regExpr = "li class=\042?g\042?\076.+?a href=\042(http[s]*://.+?)\042\sclass=\042?l\042?"
        matches = re.findall(regExpr, page, re.I | re.M)

        return matches

    def getTargetUrls(self):
        """
        This method returns the list of hosts with parameters out of
        your Google dork search results
        """

        for match in self.__matches:
            if re.search("(.*?)\?(.+)", match, re.I):
                kb.targetUrls.add(( match, None, None, None ))

    def getCookie(self):
        """
        This method is the first to be called when initializing a
        Google dorking object through this library. It is used to
        retrieve the Google session cookie needed to perform the
        further search
        """

        try:
            conn = self.opener.open("http://www.google.com/ncr")
            _ = conn.info()
        except urllib2.HTTPError, e:
            _ = e.info()
        except urllib2.URLError, _:
            errMsg = "unable to connect to Google"
            raise sqlmapConnectionException, errMsg

    def search(self, googleDork):
        """
        This method performs the effective search on Google providing
        the google dork and the Google session cookie
        """

        gpage = conf.googlePage if conf.googlePage > 1 else 1
        logger.info("using Google result page #%d" % gpage)
        
        if not googleDork:
            return None

        url  = "http://www.google.com/search?"
        url += "q=%s&" % urlencode(googleDork)
        url += "num=100&hl=en&safe=off&filter=0&btnG=Search"
        url += "&start=%d" % ((gpage-1) * 100)

        try:
            conn = self.opener.open(url)

            requestMsg = "HTTP request:\nGET %s HTTP/1.1\n" % url
            requestMsg += "\n".join(["%s: %s" % (header, value) for header, value in conn.headers.items()])
            requestMsg += "\n"
            logger.log(9, requestMsg)

            page = conn.read()
            code = conn.code
            status = conn.msg
            responseHeaders = conn.info()
            page = decodePage(page, responseHeaders.get("Content-Encoding"), responseHeaders.get("Content-Type"))

            responseMsg = "HTTP response (%s - %d):\n" % (status, code)
    
            if conf.verbose <= 4:
                responseMsg += getUnicode(responseHeaders)
            elif conf.verbose > 4:
                responseMsg += "%s\n%s\n" % (responseHeaders, page)
    
            logger.log(8, responseMsg)
        except urllib2.HTTPError, e:
            try:
                page = e.read()
            except socket.timeout:
                warnMsg  = "connection timed out while trying "
                warnMsg += "to get error page information (%d)" % e.code
                logger.warn(warnMsg)
                return None
        except (urllib2.URLError, socket.error, socket.timeout), _:
            errMsg = "unable to connect to Google"
            raise sqlmapConnectionException, errMsg

        self.__matches = self.__parsePage(page)

        return self.__matches
After the storm, a restore.. 2008-10-15 19:38:22 +04:00			`#!/usr/bin/env python`

			`"""`
propsets.. 2008-10-15 19:56:32 +04:00			$Id$
After the storm, a restore.. 2008-10-15 19:38:22 +04:00
			`This file is part of the sqlmap project, http://sqlmap.sourceforge.net.`

Updated copyright 2010-03-03 18:26:27 +03:00			`Copyright (c) 2007-2010 Bernardo Damele A. G. <bernardo.damele@gmail.com>`
Updated to sqlmap 0.7 release candidate 1 2009-04-22 15:48:07 +04:00			`Copyright (c) 2006 Daniele Bellucci <daniele.bellucci@gmail.com>`
After the storm, a restore.. 2008-10-15 19:38:22 +04:00
			`sqlmap is free software; you can redistribute it and/or modify it under`
			`the terms of the GNU General Public License as published by the Free`
			`Software Foundation version 2 of the License.`

			`sqlmap is distributed in the hope that it will be useful, but WITHOUT ANY`
			`WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS`
			`FOR A PARTICULAR PURPOSE. See the GNU General Public License for more`
			`details.`

			`You should have received a copy of the GNU General Public License along`
			`with sqlmap; if not, write to the Free Software Foundation, Inc., 51`
			`Franklin St, Fifth Floor, Boston, MA 02110-1301 USA`
			`"""`

			`import cookielib`
			`import re`
added socket timeout exception handling regarding that timeout message from Fahad Al Shunaiber 2010-03-26 14:51:23 +03:00			`import socket`
After the storm, a restore.. 2008-10-15 19:38:22 +04:00			`import urllib2`

more unicode refactoring 2010-06-02 16:45:40 +04:00			`from lib.core.common import getUnicode`
After the storm, a restore.. 2008-10-15 19:38:22 +04:00			`from lib.core.convert import urlencode`
			`from lib.core.data import conf`
Completed support to get the list of targets from WebScarab/Burp proxies log file and updated the documentation 2008-11-28 01:33:33 +03:00			`from lib.core.data import kb`
sqlmap 0.8-rc3: Merge from Miroslav Stampar's branch fixing a bug when verbosity > 2, another major bug with urlencoding/urldecoding of POST data and Cookies, adding --drop-set-cookie option, implementing support to automatically decode gzip and deflate HTTP responses, support for Google dork page result (--gpage) and a minor code cleanup. 2010-01-02 05:02:12 +03:00			`from lib.core.data import logger`
After the storm, a restore.. 2008-10-15 19:38:22 +04:00			`from lib.core.exception import sqlmapConnectionException`
sqlmap 0.8-rc3: Merge from Miroslav Stampar's branch fixing a bug when verbosity > 2, another major bug with urlencoding/urldecoding of POST data and Cookies, adding --drop-set-cookie option, implementing support to automatically decode gzip and deflate HTTP responses, support for Google dork page result (--gpage) and a minor code cleanup. 2010-01-02 05:02:12 +03:00			`from lib.request.basic import decodePage`
After the storm, a restore.. 2008-10-15 19:38:22 +04:00
			`class Google:`
			`"""`
			`This class defines methods used to perform Google dorking (command`
			`line option '-g <google dork>'`
			`"""`

Minor bug fix and adjustment to deal with Keep-Alive also against Google (-g) 2010-06-11 14:08:19 +04:00			`def __init__(self, handlers):`
After the storm, a restore.. 2008-10-15 19:38:22 +04:00			`self.__matches = []`
			`self.__cj = cookielib.LWPCookieJar()`
Minor bug fix and adjustment to deal with Keep-Alive also against Google (-g) 2010-06-11 14:08:19 +04:00
			`handlers.append(urllib2.HTTPCookieProcessor(self.__cj))`

			`self.opener = urllib2.build_opener(*handlers)`
After the storm, a restore.. 2008-10-15 19:38:22 +04:00			`self.opener.addheaders = conf.httpHeaders`

			`def __parsePage(self, page):`
			`"""`
			`Parse Google dork search results page to get the list of`
			`HTTP addresses`
			`"""`

			`matches = []`

better regex used avoiding garbage google images 2010-05-16 02:02:28 +04:00			`regExpr = "li class=\042?g\042?\076.+?a href=\042(http[s]*://.+?)\042\sclass=\042?l\042?"`
After the storm, a restore.. 2008-10-15 19:38:22 +04:00			`matches = re.findall(regExpr, page, re.I \| re.M)`

			`return matches`

			`def getTargetUrls(self):`
			`"""`
			`This method returns the list of hosts with parameters out of`
			`your Google dork search results`
			`"""`

			`for match in self.__matches:`
			`if re.search("(.*?)\?(.+)", match, re.I):`
Completed support to get the list of targets from WebScarab/Burp proxies log file and updated the documentation 2008-11-28 01:33:33 +03:00			`kb.targetUrls.add(( match, None, None, None ))`
After the storm, a restore.. 2008-10-15 19:38:22 +04:00
			`def getCookie(self):`
			`"""`
			`This method is the first to be called when initializing a`
			`Google dorking object through this library. It is used to`
			`retrieve the Google session cookie needed to perform the`
			`further search`
			`"""`

			`try:`
			`conn = self.opener.open("http://www.google.com/ncr")`
Minor bug fixes to --os-shell (altought web backdoor functionality still to be reviewed). Minor common library code refactoring. Code cleanup. Set back the default User-Agent to sqlmap for comparison algorithm reasons. Updated THANKS. 2009-04-28 03:05:11 +04:00			`_ = conn.info()`
After the storm, a restore.. 2008-10-15 19:38:22 +04:00			`except urllib2.HTTPError, e:`
Minor bug fixes to --os-shell (altought web backdoor functionality still to be reviewed). Minor common library code refactoring. Code cleanup. Set back the default User-Agent to sqlmap for comparison algorithm reasons. Updated THANKS. 2009-04-28 03:05:11 +04:00			`_ = e.info()`
Minor bug fix and adjustment to deal with Keep-Alive also against Google (-g) 2010-06-11 14:08:19 +04:00			`except urllib2.URLError, _:`
After the storm, a restore.. 2008-10-15 19:38:22 +04:00			`errMsg = "unable to connect to Google"`
			`raise sqlmapConnectionException, errMsg`

			`def search(self, googleDork):`
			`"""`
			`This method performs the effective search on Google providing`
			`the google dork and the Google session cookie`
			`"""`

sqlmap 0.8-rc3: Merge from Miroslav Stampar's branch fixing a bug when verbosity > 2, another major bug with urlencoding/urldecoding of POST data and Cookies, adding --drop-set-cookie option, implementing support to automatically decode gzip and deflate HTTP responses, support for Google dork page result (--gpage) and a minor code cleanup. 2010-01-02 05:02:12 +03:00			`gpage = conf.googlePage if conf.googlePage > 1 else 1`
Minor log adjustments 2010-03-05 17:59:33 +03:00			`logger.info("using Google result page #%d" % gpage)`
update 2010-03-05 17:06:03 +03:00
After the storm, a restore.. 2008-10-15 19:38:22 +04:00			`if not googleDork:`
			`return None`

			`url = "http://www.google.com/search?"`
			`url += "q=%s&" % urlencode(googleDork)`
			`url += "num=100&hl=en&safe=off&filter=0&btnG=Search"`
sqlmap 0.8-rc3: Merge from Miroslav Stampar's branch fixing a bug when verbosity > 2, another major bug with urlencoding/urldecoding of POST data and Cookies, adding --drop-set-cookie option, implementing support to automatically decode gzip and deflate HTTP responses, support for Google dork page result (--gpage) and a minor code cleanup. 2010-01-02 05:02:12 +03:00			`url += "&start=%d" % ((gpage-1) * 100)`
After the storm, a restore.. 2008-10-15 19:38:22 +04:00
			`try:`
			`conn = self.opener.open(url)`
sqlmap 0.8-rc3: Merge from Miroslav Stampar's branch fixing a bug when verbosity > 2, another major bug with urlencoding/urldecoding of POST data and Cookies, adding --drop-set-cookie option, implementing support to automatically decode gzip and deflate HTTP responses, support for Google dork page result (--gpage) and a minor code cleanup. 2010-01-02 05:02:12 +03:00
Minor bug fix and adjustment to deal with Keep-Alive also against Google (-g) 2010-06-11 14:08:19 +04:00			`requestMsg = "HTTP request:\nGET %s HTTP/1.1\n" % url`
			`requestMsg += "\n".join(["%s: %s" % (header, value) for header, value in conn.headers.items()])`
sqlmap 0.8-rc3: Merge from Miroslav Stampar's branch fixing a bug when verbosity > 2, another major bug with urlencoding/urldecoding of POST data and Cookies, adding --drop-set-cookie option, implementing support to automatically decode gzip and deflate HTTP responses, support for Google dork page result (--gpage) and a minor code cleanup. 2010-01-02 05:02:12 +03:00			`requestMsg += "\n"`
			`logger.log(9, requestMsg)`

Minor bug fix and adjustment to deal with Keep-Alive also against Google (-g) 2010-06-11 14:08:19 +04:00			`page = conn.read()`
			`code = conn.code`
			`status = conn.msg`
sqlmap 0.8-rc3: Merge from Miroslav Stampar's branch fixing a bug when verbosity > 2, another major bug with urlencoding/urldecoding of POST data and Cookies, adding --drop-set-cookie option, implementing support to automatically decode gzip and deflate HTTP responses, support for Google dork page result (--gpage) and a minor code cleanup. 2010-01-02 05:02:12 +03:00			`responseHeaders = conn.info()`
Minor bug fix and adjustment to deal with Keep-Alive also against Google (-g) 2010-06-11 14:08:19 +04:00			`page = decodePage(page, responseHeaders.get("Content-Encoding"), responseHeaders.get("Content-Type"))`
sqlmap 0.8-rc3: Merge from Miroslav Stampar's branch fixing a bug when verbosity > 2, another major bug with urlencoding/urldecoding of POST data and Cookies, adding --drop-set-cookie option, implementing support to automatically decode gzip and deflate HTTP responses, support for Google dork page result (--gpage) and a minor code cleanup. 2010-01-02 05:02:12 +03:00
			`responseMsg = "HTTP response (%s - %d):\n" % (status, code)`

			`if conf.verbose <= 4:`
more unicode refactoring 2010-06-02 16:45:40 +04:00			`responseMsg += getUnicode(responseHeaders)`
sqlmap 0.8-rc3: Merge from Miroslav Stampar's branch fixing a bug when verbosity > 2, another major bug with urlencoding/urldecoding of POST data and Cookies, adding --drop-set-cookie option, implementing support to automatically decode gzip and deflate HTTP responses, support for Google dork page result (--gpage) and a minor code cleanup. 2010-01-02 05:02:12 +03:00			`elif conf.verbose > 4:`
			`responseMsg += "%s\n%s\n" % (responseHeaders, page)`

			`logger.log(8, responseMsg)`
After the storm, a restore.. 2008-10-15 19:38:22 +04:00			`except urllib2.HTTPError, e:`
minor update 2010-05-16 00:44:08 +04:00			`try:`
			`page = e.read()`
			`except socket.timeout:`
			`warnMsg = "connection timed out while trying "`
			`warnMsg += "to get error page information (%d)" % e.code`
			`logger.warn(warnMsg)`
			`return None`
Minor bug fix and adjustment to deal with Keep-Alive also against Google (-g) 2010-06-11 14:08:19 +04:00			`except (urllib2.URLError, socket.error, socket.timeout), _:`
After the storm, a restore.. 2008-10-15 19:38:22 +04:00			`errMsg = "unable to connect to Google"`
			`raise sqlmapConnectionException, errMsg`

			`self.__matches = self.__parsePage(page)`

			`return self.__matches`