2008-10-15 19:38:22 +04:00
|
|
|
#!/usr/bin/env python
|
|
|
|
|
|
|
|
"""
|
2008-10-15 19:56:32 +04:00
|
|
|
$Id$
|
2008-10-15 19:38:22 +04:00
|
|
|
|
2010-10-14 18:41:14 +04:00
|
|
|
Copyright (c) 2006-2010 sqlmap developers (http://sqlmap.sourceforge.net/)
|
2010-10-15 03:18:29 +04:00
|
|
|
See the file 'doc/COPYING' for copying permission
|
2008-10-15 19:38:22 +04:00
|
|
|
"""
|
|
|
|
|
|
|
|
import cookielib
|
2010-10-31 18:41:28 +03:00
|
|
|
import httplib
|
2008-10-15 19:38:22 +04:00
|
|
|
import re
|
2010-03-26 14:51:23 +03:00
|
|
|
import socket
|
2008-10-15 19:38:22 +04:00
|
|
|
import urllib2
|
|
|
|
|
2010-06-02 16:45:40 +04:00
|
|
|
from lib.core.common import getUnicode
|
2010-10-01 12:03:39 +04:00
|
|
|
from lib.core.convert import htmlunescape
|
2008-10-15 19:38:22 +04:00
|
|
|
from lib.core.convert import urlencode
|
|
|
|
from lib.core.data import conf
|
2008-11-28 01:33:33 +03:00
|
|
|
from lib.core.data import kb
|
2010-01-02 05:02:12 +03:00
|
|
|
from lib.core.data import logger
|
2008-10-15 19:38:22 +04:00
|
|
|
from lib.core.exception import sqlmapConnectionException
|
2010-01-02 05:02:12 +03:00
|
|
|
from lib.request.basic import decodePage
|
2008-10-15 19:38:22 +04:00
|
|
|
|
|
|
|
class Google:
|
|
|
|
"""
|
|
|
|
This class defines methods used to perform Google dorking (command
|
|
|
|
line option '-g <google dork>'
|
|
|
|
"""
|
|
|
|
|
2010-06-11 14:08:19 +04:00
|
|
|
def __init__(self, handlers):
|
2008-10-15 19:38:22 +04:00
|
|
|
self.__matches = []
|
|
|
|
self.__cj = cookielib.LWPCookieJar()
|
2010-06-11 14:08:19 +04:00
|
|
|
|
|
|
|
handlers.append(urllib2.HTTPCookieProcessor(self.__cj))
|
|
|
|
|
|
|
|
self.opener = urllib2.build_opener(*handlers)
|
2008-10-15 19:38:22 +04:00
|
|
|
self.opener.addheaders = conf.httpHeaders
|
|
|
|
|
|
|
|
def __parsePage(self, page):
|
|
|
|
"""
|
|
|
|
Parse Google dork search results page to get the list of
|
|
|
|
HTTP addresses
|
|
|
|
"""
|
|
|
|
|
|
|
|
matches = []
|
|
|
|
|
2010-05-16 02:02:28 +04:00
|
|
|
regExpr = "li class=\042?g\042?\076.+?a href=\042(http[s]*://.+?)\042\sclass=\042?l\042?"
|
2008-10-15 19:38:22 +04:00
|
|
|
matches = re.findall(regExpr, page, re.I | re.M)
|
|
|
|
|
|
|
|
return matches
|
|
|
|
|
|
|
|
def getTargetUrls(self):
|
|
|
|
"""
|
|
|
|
This method returns the list of hosts with parameters out of
|
|
|
|
your Google dork search results
|
|
|
|
"""
|
|
|
|
|
|
|
|
for match in self.__matches:
|
|
|
|
if re.search("(.*?)\?(.+)", match, re.I):
|
2010-10-01 12:03:39 +04:00
|
|
|
kb.targetUrls.add(( htmlunescape(match), None, None, None ))
|
2008-10-15 19:38:22 +04:00
|
|
|
|
|
|
|
def getCookie(self):
|
|
|
|
"""
|
|
|
|
This method is the first to be called when initializing a
|
|
|
|
Google dorking object through this library. It is used to
|
|
|
|
retrieve the Google session cookie needed to perform the
|
|
|
|
further search
|
|
|
|
"""
|
|
|
|
|
|
|
|
try:
|
|
|
|
conn = self.opener.open("http://www.google.com/ncr")
|
2009-04-28 03:05:11 +04:00
|
|
|
_ = conn.info()
|
2008-10-15 19:38:22 +04:00
|
|
|
except urllib2.HTTPError, e:
|
2009-04-28 03:05:11 +04:00
|
|
|
_ = e.info()
|
2010-06-11 14:08:19 +04:00
|
|
|
except urllib2.URLError, _:
|
2008-10-15 19:38:22 +04:00
|
|
|
errMsg = "unable to connect to Google"
|
|
|
|
raise sqlmapConnectionException, errMsg
|
|
|
|
|
|
|
|
def search(self, googleDork):
|
|
|
|
"""
|
|
|
|
This method performs the effective search on Google providing
|
|
|
|
the google dork and the Google session cookie
|
|
|
|
"""
|
|
|
|
|
2010-01-02 05:02:12 +03:00
|
|
|
gpage = conf.googlePage if conf.googlePage > 1 else 1
|
2010-03-05 17:59:33 +03:00
|
|
|
logger.info("using Google result page #%d" % gpage)
|
2010-09-27 17:41:18 +04:00
|
|
|
|
2008-10-15 19:38:22 +04:00
|
|
|
if not googleDork:
|
|
|
|
return None
|
|
|
|
|
|
|
|
url = "http://www.google.com/search?"
|
|
|
|
url += "q=%s&" % urlencode(googleDork)
|
|
|
|
url += "num=100&hl=en&safe=off&filter=0&btnG=Search"
|
2010-01-02 05:02:12 +03:00
|
|
|
url += "&start=%d" % ((gpage-1) * 100)
|
2008-10-15 19:38:22 +04:00
|
|
|
|
|
|
|
try:
|
|
|
|
conn = self.opener.open(url)
|
2010-01-02 05:02:12 +03:00
|
|
|
|
2010-10-31 18:41:28 +03:00
|
|
|
requestMsg = "HTTP request:\nGET %s" % url
|
|
|
|
requestMsg += " %s" % httplib.HTTPConnection._http_vsn_str
|
2010-01-02 05:02:12 +03:00
|
|
|
logger.log(9, requestMsg)
|
|
|
|
|
2010-06-11 14:08:19 +04:00
|
|
|
page = conn.read()
|
|
|
|
code = conn.code
|
|
|
|
status = conn.msg
|
2010-01-02 05:02:12 +03:00
|
|
|
responseHeaders = conn.info()
|
2010-06-11 14:08:19 +04:00
|
|
|
page = decodePage(page, responseHeaders.get("Content-Encoding"), responseHeaders.get("Content-Type"))
|
2010-01-02 05:02:12 +03:00
|
|
|
|
|
|
|
responseMsg = "HTTP response (%s - %d):\n" % (status, code)
|
2010-09-27 17:41:18 +04:00
|
|
|
|
2010-01-02 05:02:12 +03:00
|
|
|
if conf.verbose <= 4:
|
2010-06-02 16:45:40 +04:00
|
|
|
responseMsg += getUnicode(responseHeaders)
|
2010-01-02 05:02:12 +03:00
|
|
|
elif conf.verbose > 4:
|
|
|
|
responseMsg += "%s\n%s\n" % (responseHeaders, page)
|
2010-09-27 17:41:18 +04:00
|
|
|
|
2010-01-02 05:02:12 +03:00
|
|
|
logger.log(8, responseMsg)
|
2008-10-15 19:38:22 +04:00
|
|
|
except urllib2.HTTPError, e:
|
2010-05-16 00:44:08 +04:00
|
|
|
try:
|
|
|
|
page = e.read()
|
|
|
|
except socket.timeout:
|
|
|
|
warnMsg = "connection timed out while trying "
|
|
|
|
warnMsg += "to get error page information (%d)" % e.code
|
2010-09-27 17:41:18 +04:00
|
|
|
logger.critical(warnMsg)
|
2010-05-16 00:44:08 +04:00
|
|
|
return None
|
2010-06-11 14:08:19 +04:00
|
|
|
except (urllib2.URLError, socket.error, socket.timeout), _:
|
2008-10-15 19:38:22 +04:00
|
|
|
errMsg = "unable to connect to Google"
|
|
|
|
raise sqlmapConnectionException, errMsg
|
|
|
|
|
|
|
|
self.__matches = self.__parsePage(page)
|
|
|
|
|
|
|
|
return self.__matches
|