sqlmap/lib/utils/google.py

149 lines
5.0 KiB
Python
Raw Normal View History

2008-10-15 19:38:22 +04:00
#!/usr/bin/env python
"""
2008-10-15 19:56:32 +04:00
$Id$
2008-10-15 19:38:22 +04:00
2011-07-08 00:10:03 +04:00
Copyright (c) 2006-2011 sqlmap developers (http://www.sqlmap.org/)
2010-10-15 03:18:29 +04:00
See the file 'doc/COPYING' for copying permission
2008-10-15 19:38:22 +04:00
"""
import cookielib
2010-10-31 18:41:28 +03:00
import httplib
2008-10-15 19:38:22 +04:00
import re
import socket
2008-10-15 19:38:22 +04:00
import urllib2
2010-06-02 16:45:40 +04:00
from lib.core.common import getUnicode
2011-11-06 15:18:16 +04:00
from lib.core.common import readInput
from lib.core.convert import htmlunescape
2008-10-15 19:38:22 +04:00
from lib.core.convert import urlencode
from lib.core.data import conf
from lib.core.data import kb
from lib.core.data import logger
2008-10-15 19:38:22 +04:00
from lib.core.exception import sqlmapConnectionException
from lib.core.exception import sqlmapGenericException
2011-01-30 14:36:03 +03:00
from lib.core.settings import UNICODE_ENCODING
from lib.core.settings import URI_INJECTABLE_REGEX
from lib.request.basic import decodePage
2008-10-15 19:38:22 +04:00
class Google:
"""
This class defines methods used to perform Google dorking (command
line option '-g <google dork>'
"""
def __init__(self, handlers):
2008-10-15 19:38:22 +04:00
self.__matches = []
self.__cj = cookielib.LWPCookieJar()
handlers.append(urllib2.HTTPCookieProcessor(self.__cj))
self.opener = urllib2.build_opener(*handlers)
2008-10-15 19:38:22 +04:00
self.opener.addheaders = conf.httpHeaders
def __parsePage(self, page):
"""
Parse Google dork search results page to get the list of
HTTP addresses
"""
matches = []
2011-11-06 12:55:09 +04:00
regExpr = r'h3 class="?r"?><a href="(http[s]?://[^"]+?)"\s(class="?l"?|onmousedown=)'
matches = re.findall(regExpr, page, re.I | re.S)
2008-10-15 19:38:22 +04:00
2011-11-06 12:55:09 +04:00
return [match[0] for match in matches]
2008-10-15 19:38:22 +04:00
def getTargetUrls(self):
"""
This method returns the list of hosts with parameters out of
your Google dork search results
"""
for match in self.__matches:
if re.search(r"(.*?)\?(.+)", match):
kb.targetUrls.add(( htmlunescape(htmlunescape(match)), None, None, None ))
elif re.search(URI_INJECTABLE_REGEX, match, re.I):
2011-11-06 15:18:16 +04:00
if kb.scanOnlyGoogleGETs is None:
message = "do you want to scan only results containing GET parameters? [Y/n] "
test = readInput(message, default="Y")
kb.scanOnlyGoogleGETs = test.lower() != 'n'
if not kb.scanOnlyGoogleGETs:
kb.targetUrls.add(( htmlunescape(htmlunescape("%s" % match)), None, None, None ))
2008-10-15 19:38:22 +04:00
def getCookie(self):
"""
This method is the first to be called when initializing a
Google dorking object through this library. It is used to
retrieve the Google session cookie needed to perform the
further search
"""
try:
conn = self.opener.open("http://www.google.com/ncr")
_ = conn.info()
2008-10-15 19:38:22 +04:00
except urllib2.HTTPError, e:
_ = e.info()
except urllib2.URLError, _:
2008-10-15 19:38:22 +04:00
errMsg = "unable to connect to Google"
raise sqlmapConnectionException, errMsg
def search(self, googleDork):
"""
This method performs the effective search on Google providing
the google dork and the Google session cookie
"""
gpage = conf.googlePage if conf.googlePage > 1 else 1
2010-03-05 17:59:33 +03:00
logger.info("using Google result page #%d" % gpage)
2010-09-27 17:41:18 +04:00
2008-10-15 19:38:22 +04:00
if not googleDork:
return None
2011-04-30 17:20:05 +04:00
url = "http://www.google.com/search?"
2010-12-22 02:09:41 +03:00
url += "q=%s&" % urlencode(googleDork, convall=True)
2011-11-06 15:42:02 +04:00
url += "num=100&hl=en&complete=0&safe=off&filter=0&btnG=Search"
url += "&start=%d" % ((gpage-1) * 100)
2008-10-15 19:38:22 +04:00
try:
conn = self.opener.open(url)
2010-10-31 18:41:28 +03:00
requestMsg = "HTTP request:\nGET %s" % url
requestMsg += " %s" % httplib.HTTPConnection._http_vsn_str
logger.log(8, requestMsg)
page = conn.read()
code = conn.code
status = conn.msg
responseHeaders = conn.info()
page = decodePage(page, responseHeaders.get("Content-Encoding"), responseHeaders.get("Content-Type"))
responseMsg = "HTTP response (%s - %d):\n" % (status, code)
2010-09-27 17:41:18 +04:00
if conf.verbose <= 4:
2011-01-30 14:36:03 +03:00
responseMsg += getUnicode(responseHeaders, UNICODE_ENCODING)
elif conf.verbose > 4:
responseMsg += "%s\n%s\n" % (responseHeaders, page)
2010-09-27 17:41:18 +04:00
logger.log(7, responseMsg)
2008-10-15 19:38:22 +04:00
except urllib2.HTTPError, e:
2010-05-16 00:44:08 +04:00
try:
page = e.read()
except socket.timeout:
2011-04-30 17:20:05 +04:00
warnMsg = "connection timed out while trying "
2010-05-16 00:44:08 +04:00
warnMsg += "to get error page information (%d)" % e.code
2010-09-27 17:41:18 +04:00
logger.critical(warnMsg)
2010-05-16 00:44:08 +04:00
return None
except (urllib2.URLError, socket.error, socket.timeout), _:
2008-10-15 19:38:22 +04:00
errMsg = "unable to connect to Google"
raise sqlmapConnectionException, errMsg
self.__matches = self.__parsePage(page)
2011-02-08 03:02:54 +03:00
if not self.__matches and "detected unusual traffic" in page:
2011-04-30 17:20:05 +04:00
warnMsg = "Google has detected 'unusual' traffic from "
warnMsg += "this computer disabling further searches"
raise sqlmapGenericException, warnMsg
2008-10-15 19:38:22 +04:00
return self.__matches