Lots of fixes and refactoring in search department

This commit is contained in:
Miroslav Stampar 2015-11-08 16:37:46 +01:00
parent b4526a3d51
commit 42649005c2
5 changed files with 258 additions and 270 deletions

View File

@ -27,7 +27,7 @@ import lib.core.common
import lib.core.threads
import lib.core.convert
import lib.request.connect
import lib.utils.google
import lib.utils.search
from lib.controller.checks import checkConnection
from lib.core.common import Backend
@ -148,7 +148,7 @@ from lib.request.redirecthandler import SmartRedirectHandler
from lib.request.templates import getPageTemplate
from lib.utils.crawler import crawl
from lib.utils.deps import checkDependencies
from lib.utils.google import Google
from lib.utils.search import search
from lib.utils.purge import purge
from thirdparty.colorama.initialise import init as coloramainit
from thirdparty.keepalive import keepalive
@ -503,46 +503,23 @@ def _setCrawler():
errMsg = "problem occurred while crawling at '%s' ('%s')" % (target, ex)
logger.error(errMsg)
def _setGoogleDorking():
def _doSearch():
"""
This function checks if the way to request testable hosts is through
Google dorking then requests to Google the search parameter, parses
the results and save the testable hosts into the knowledge base.
This function performs search dorking, parses results
and saves the testable hosts into the knowledge base.
"""
if not conf.googleDork:
return
global keepAliveHandler
global proxyHandler
debugMsg = "initializing Google dorking requests"
logger.debug(debugMsg)
infoMsg = "first request to Google to get the session cookie"
logger.info(infoMsg)
handlers = [proxyHandler]
# Reference: http://www.w3.org/Protocols/rfc2616/rfc2616-sec8.html
if conf.keepAlive:
if conf.proxy:
warnMsg = "persistent HTTP(s) connections, Keep-Alive, has "
warnMsg += "been disabled because of its incompatibility "
warnMsg += "with HTTP(s) proxy"
logger.warn(warnMsg)
else:
handlers.append(keepAliveHandler)
googleObj = Google(handlers)
kb.data.onlyGETs = None
def retrieve():
links = googleObj.search(conf.googleDork)
links = search(conf.googleDork)
if not links:
errMsg = "unable to find results for your "
errMsg += "Google dork expression"
errMsg += "search dork expression"
raise SqlmapGenericException(errMsg)
for link in links:
@ -564,7 +541,7 @@ def _setGoogleDorking():
if kb.targets:
infoMsg = "sqlmap got %d results for your " % len(links)
infoMsg += "Google dork expression, "
infoMsg += "search dork expression, "
if len(links) == len(kb.targets):
infoMsg += "all "
@ -577,7 +554,7 @@ def _setGoogleDorking():
else:
message = "sqlmap got %d results " % len(links)
message += "for your Google dork expression, but none of them "
message += "for your search dork expression, but none of them "
message += "have GET parameters to test for SQL injection. "
message += "Do you want to skip to the next result page? [Y/n]"
test = readInput(message, default="Y")
@ -1041,7 +1018,7 @@ def _setDNSCache():
socket._getaddrinfo = socket.getaddrinfo
socket.getaddrinfo = _getaddrinfo
def _setHTTPProxy():
def _setHTTPHandlers():
"""
Check and set the HTTP/SOCKS proxy for all HTTP requests.
"""
@ -1066,63 +1043,62 @@ def _setHTTPProxy():
if conf.hostname in ("localhost", "127.0.0.1") or conf.ignoreProxy:
proxyHandler.proxies = {}
return
if conf.proxy:
debugMsg = "setting the HTTP/SOCKS proxy for all HTTP requests"
logger.debug(debugMsg)
debugMsg = "setting the HTTP/SOCKS proxy for all HTTP requests"
logger.debug(debugMsg)
try:
_ = urlparse.urlsplit(conf.proxy)
except Exception, ex:
errMsg = "invalid proxy address '%s' ('%s')" % (conf.proxy, ex)
raise SqlmapSyntaxException, errMsg
hostnamePort = _.netloc.split(":")
scheme = _.scheme.upper()
hostname = hostnamePort[0]
port = None
username = None
password = None
if len(hostnamePort) == 2:
try:
port = int(hostnamePort[1])
except:
pass # drops into the next check block
_ = urlparse.urlsplit(conf.proxy)
except Exception, ex:
errMsg = "invalid proxy address '%s' ('%s')" % (conf.proxy, ex)
raise SqlmapSyntaxException, errMsg
if not all((scheme, hasattr(PROXY_TYPE, scheme), hostname, port)):
errMsg = "proxy value must be in format '(%s)://address:port'" % "|".join(_[0].lower() for _ in getPublicTypeMembers(PROXY_TYPE))
raise SqlmapSyntaxException(errMsg)
hostnamePort = _.netloc.split(":")
if conf.proxyCred:
_ = re.search("^(.*?):(.*?)$", conf.proxyCred)
if not _:
errMsg = "proxy authentication credentials "
errMsg += "value must be in format username:password"
scheme = _.scheme.upper()
hostname = hostnamePort[0]
port = None
username = None
password = None
if len(hostnamePort) == 2:
try:
port = int(hostnamePort[1])
except:
pass # drops into the next check block
if not all((scheme, hasattr(PROXY_TYPE, scheme), hostname, port)):
errMsg = "proxy value must be in format '(%s)://address:port'" % "|".join(_[0].lower() for _ in getPublicTypeMembers(PROXY_TYPE))
raise SqlmapSyntaxException(errMsg)
else:
username = _.group(1)
password = _.group(2)
if scheme in (PROXY_TYPE.SOCKS4, PROXY_TYPE.SOCKS5):
proxyHandler.proxies = {}
socks.setdefaultproxy(socks.PROXY_TYPE_SOCKS5 if scheme == PROXY_TYPE.SOCKS5 else socks.PROXY_TYPE_SOCKS4, hostname, port, username=username, password=password)
socks.wrapmodule(urllib2)
else:
socks.unwrapmodule(urllib2)
if conf.proxyCred:
# Reference: http://stackoverflow.com/questions/34079/how-to-specify-an-authenticated-proxy-for-a-python-http-connection
proxyString = "%s@" % conf.proxyCred
_ = re.search("^(.*?):(.*?)$", conf.proxyCred)
if not _:
errMsg = "proxy authentication credentials "
errMsg += "value must be in format username:password"
raise SqlmapSyntaxException(errMsg)
else:
username = _.group(1)
password = _.group(2)
if scheme in (PROXY_TYPE.SOCKS4, PROXY_TYPE.SOCKS5):
proxyHandler.proxies = {}
socks.setdefaultproxy(socks.PROXY_TYPE_SOCKS5 if scheme == PROXY_TYPE.SOCKS5 else socks.PROXY_TYPE_SOCKS4, hostname, port, username=username, password=password)
socks.wrapmodule(urllib2)
else:
proxyString = ""
socks.unwrapmodule(urllib2)
proxyString += "%s:%d" % (hostname, port)
proxyHandler.proxies = {"http": proxyString, "https": proxyString}
if conf.proxyCred:
# Reference: http://stackoverflow.com/questions/34079/how-to-specify-an-authenticated-proxy-for-a-python-http-connection
proxyString = "%s@" % conf.proxyCred
else:
proxyString = ""
proxyHandler.__init__(proxyHandler.proxies)
proxyString += "%s:%d" % (hostname, port)
proxyHandler.proxies = {"http": proxyString, "https": proxyString}
proxyHandler.__init__(proxyHandler.proxies)
debugMsg = "creating HTTP requests opener object"
logger.debug(debugMsg)
@ -2489,8 +2465,8 @@ def _resolveCrossReferences():
lib.core.threads.readInput = readInput
lib.core.common.getPageTemplate = getPageTemplate
lib.core.convert.singleTimeWarnMessage = singleTimeWarnMessage
lib.request.connect.setHTTPProxy = _setHTTPProxy
lib.utils.google.setHTTPProxy = _setHTTPProxy
lib.request.connect.setHTTPHandlers = _setHTTPHandlers
lib.utils.search.setHTTPHandlers = _setHTTPHandlers
lib.controller.checks.setVerbosity = setVerbosity
def initOptions(inputOptions=AttribDict(), overrideOptions=False):
@ -2539,10 +2515,10 @@ def init():
_setHTTPHost()
_setHTTPUserAgent()
_setHTTPAuthentication()
_setHTTPProxy()
_setHTTPHandlers()
_setDNSCache()
_setSafeVisit()
_setGoogleDorking()
_doSearch()
_setBulkMultipleTargets()
_setSitemapTargets()
_checkTor()

View File

@ -73,7 +73,7 @@ PERMISSION_DENIED_REGEX = r"(command|permission|access)\s*(was|is)?\s*denied"
MAX_CONNECTIONS_REGEX = r"max.+connections"
# Regular expression used for extracting results from Google search
GOOGLE_REGEX = r"url\?\w+=((?![^>]+webcache\.googleusercontent\.com)http[^>]+)&(sa=U|rct=j)"
GOOGLE_REGEX = r"webcache\.googleusercontent\.com/search\?q=cache:[^:]+:([^+]+)\+&cd=|url\?\w+=((?![^>]+webcache\.googleusercontent\.com)http[^>]+)&(sa=U|rct=j)"
# Regular expression used for extracting results from DuckDuckGo search
DUCKDUCKGO_REGEX = r'"u":"([^"]+)'

View File

@ -134,7 +134,7 @@ class Connect(object):
conf.proxy = None
threadData.retriesCount = 0
setHTTPProxy()
setHTTPHandlers()
if kb.testMode and kb.previousMethod == PAYLOAD.METHOD.TIME:
# timed based payloads can cause web server unresponsiveness
@ -1118,5 +1118,5 @@ class Connect(object):
else:
return comparison(page, headers, code, getRatioValue, pageLength)
def setHTTPProxy(): # Cross-linked function
def setHTTPHandlers(): # Cross-linked function
raise NotImplementedError

View File

@ -1,183 +0,0 @@
#!/usr/bin/env python
"""
Copyright (c) 2006-2015 sqlmap developers (http://sqlmap.org/)
See the file 'doc/COPYING' for copying permission
"""
import cookielib
import httplib
import re
import socket
import urllib
import urllib2
from lib.core.common import getSafeExString
from lib.core.common import getUnicode
from lib.core.common import readInput
from lib.core.common import urlencode
from lib.core.data import conf
from lib.core.data import logger
from lib.core.enums import CUSTOM_LOGGING
from lib.core.enums import HTTP_HEADER
from lib.core.exception import SqlmapConnectionException
from lib.core.exception import SqlmapGenericException
from lib.core.exception import SqlmapUserQuitException
from lib.core.settings import DUMMY_SEARCH_USER_AGENT
from lib.core.settings import DUCKDUCKGO_REGEX
from lib.core.settings import DISCONNECT_SEARCH_REGEX
from lib.core.settings import GOOGLE_REGEX
from lib.core.settings import HTTP_ACCEPT_ENCODING_HEADER_VALUE
from lib.core.settings import UNICODE_ENCODING
from lib.request.basic import decodePage
from lib.request.httpshandler import HTTPSHandler
from thirdparty.socks import socks
class Google(object):
"""
This class defines methods used to perform Google dorking (command
line option '-g <google dork>')
"""
def __init__(self, handlers):
self._cj = cookielib.CookieJar()
handlers.append(urllib2.HTTPCookieProcessor(self._cj))
handlers.append(HTTPSHandler())
self.opener = urllib2.build_opener(*handlers)
self.opener.addheaders = conf.httpHeaders
try:
conn = self.opener.open("https://www.google.com/ncr")
conn.info() # retrieve session cookie
except Exception, ex:
errMsg = "unable to connect to Google ('%s')" % getSafeExString(ex)
raise SqlmapConnectionException(errMsg)
def search(self, dork):
"""
This method performs the effective search on Google providing
the google dork and the Google session cookie
"""
gpage = conf.googlePage if conf.googlePage > 1 else 1
logger.info("using Google result page #%d" % gpage)
if not dork:
return None
url = "https://www.google.com/search?"
url += "q=%s&" % urlencode(dork, convall=True)
url += "num=100&hl=en&complete=0&safe=off&filter=0&btnG=Search"
url += "&start=%d" % ((gpage - 1) * 100)
try:
conn = self.opener.open(url)
requestMsg = "HTTP request:\nGET %s" % url
requestMsg += " %s" % httplib.HTTPConnection._http_vsn_str
logger.log(CUSTOM_LOGGING.TRAFFIC_OUT, requestMsg)
page = conn.read()
code = conn.code
status = conn.msg
responseHeaders = conn.info()
page = decodePage(page, responseHeaders.get("Content-Encoding"), responseHeaders.get("Content-Type"))
responseMsg = "HTTP response (%s - %d):\n" % (status, code)
if conf.verbose <= 4:
responseMsg += getUnicode(responseHeaders, UNICODE_ENCODING)
elif conf.verbose > 4:
responseMsg += "%s\n%s\n" % (responseHeaders, page)
logger.log(CUSTOM_LOGGING.TRAFFIC_IN, responseMsg)
except urllib2.HTTPError, e:
try:
page = e.read()
except Exception, ex:
warnMsg = "problem occurred while trying to get "
warnMsg += "an error page information (%s)" % getSafeExString(ex)
logger.critical(warnMsg)
return None
except (urllib2.URLError, httplib.error, socket.error, socket.timeout, socks.ProxyError):
errMsg = "unable to connect to Google"
raise SqlmapConnectionException(errMsg)
retVal = [urllib.unquote(match.group(1)) for match in re.finditer(GOOGLE_REGEX, page, re.I | re.S)]
if not retVal and "detected unusual traffic" in page:
warnMsg = "Google has detected 'unusual' traffic from "
warnMsg += "used IP address disabling further searches"
raise SqlmapGenericException(warnMsg)
if not retVal:
message = "no usable links found. What do you want to do?"
message += "\n[1] (re)try with DuckDuckGo (default)"
message += "\n[2] (re)try with Disconnect Search"
message += "\n[3] quit"
choice = readInput(message, default="1").strip().upper()
if choice == "Q":
raise SqlmapUserQuitException
elif choice == "2":
url = "https://search.disconnect.me/searchTerms/search?"
url += "start=nav&option=Web"
url += "&query=%s" % urlencode(dork, convall=True)
url += "&ses=Google&location_option=US"
url += "&nextDDG=%s" % urlencode("/search?q=&num=100&hl=en&start=%d&sa=N" % ((gpage - 1) * 10), convall=True)
url += "&sa=N&showIcons=false&filterIcons=none&js_enabled=1"
regex = DISCONNECT_SEARCH_REGEX
else:
url = "https://duckduckgo.com/d.js?"
url += "q=%s&p=%d&s=100" % (urlencode(dork, convall=True), gpage)
regex = DUCKDUCKGO_REGEX
if not conf.randomAgent:
self.opener.addheaders = [_ for _ in self.opener.addheaders if _[0].lower() != HTTP_HEADER.USER_AGENT.lower()]
self.opener.addheaders.append((HTTP_HEADER.USER_AGENT, DUMMY_SEARCH_USER_AGENT))
self.opener.addheaders = [_ for _ in self.opener.addheaders if _[0].lower() != HTTP_HEADER.ACCEPT_ENCODING.lower()]
self.opener.addheaders.append((HTTP_HEADER.ACCEPT_ENCODING, HTTP_ACCEPT_ENCODING_HEADER_VALUE))
try:
conn = self.opener.open(url)
requestMsg = "HTTP request:\nGET %s" % url
requestMsg += " %s" % httplib.HTTPConnection._http_vsn_str
logger.log(CUSTOM_LOGGING.TRAFFIC_OUT, requestMsg)
page = conn.read()
code = conn.code
status = conn.msg
responseHeaders = conn.info()
page = decodePage(page, responseHeaders.get("Content-Encoding"), responseHeaders.get("Content-Type"))
responseMsg = "HTTP response (%s - %d):\n" % (status, code)
if conf.verbose <= 4:
responseMsg += getUnicode(responseHeaders, UNICODE_ENCODING)
elif conf.verbose > 4:
responseMsg += "%s\n%s\n" % (responseHeaders, page)
logger.log(CUSTOM_LOGGING.TRAFFIC_IN, responseMsg)
except urllib2.HTTPError, e:
try:
page = e.read()
except socket.timeout:
warnMsg = "connection timed out while trying "
warnMsg += "to get error page information (%d)" % e.code
logger.critical(warnMsg)
return None
except:
errMsg = "unable to connect"
raise SqlmapConnectionException(errMsg)
retVal = [urllib.unquote(match.group(1)) for match in re.finditer(regex, page, re.I | re.S)]
return retVal
def setHTTPProxy(): # Cross-linked function
raise NotImplementedError

195
lib/utils/search.py Normal file
View File

@ -0,0 +1,195 @@
#!/usr/bin/env python
"""
Copyright (c) 2006-2015 sqlmap developers (http://sqlmap.org/)
See the file 'doc/COPYING' for copying permission
"""
import cookielib
import httplib
import re
import socket
import urllib
import urllib2
from lib.core.common import getSafeExString
from lib.core.common import getUnicode
from lib.core.common import popValue
from lib.core.common import pushValue
from lib.core.common import readInput
from lib.core.common import urlencode
from lib.core.data import conf
from lib.core.data import kb
from lib.core.data import logger
from lib.core.enums import CUSTOM_LOGGING
from lib.core.enums import HTTP_HEADER
from lib.core.enums import REDIRECTION
from lib.core.exception import SqlmapBaseException
from lib.core.exception import SqlmapConnectionException
from lib.core.exception import SqlmapGenericException
from lib.core.exception import SqlmapUserQuitException
from lib.core.settings import DUMMY_SEARCH_USER_AGENT
from lib.core.settings import DUCKDUCKGO_REGEX
from lib.core.settings import DISCONNECT_SEARCH_REGEX
from lib.core.settings import GOOGLE_REGEX
from lib.core.settings import HTTP_ACCEPT_ENCODING_HEADER_VALUE
from lib.core.settings import UNICODE_ENCODING
from lib.request.basic import decodePage
from lib.request.httpshandler import HTTPSHandler
from thirdparty.socks import socks
def _search(dork):
"""
This method performs the effective search on Google providing
the google dork and the Google session cookie
"""
if not dork:
return None
headers = {}
headers[HTTP_HEADER.USER_AGENT] = dict(conf.httpHeaders).get(HTTP_HEADER.USER_AGENT, DUMMY_SEARCH_USER_AGENT)
headers[HTTP_HEADER.ACCEPT_ENCODING] = HTTP_ACCEPT_ENCODING_HEADER_VALUE
try:
req = urllib2.Request("https://www.google.com/ncr", headers=headers)
conn = urllib2.urlopen(req)
except Exception, ex:
errMsg = "unable to connect to Google ('%s')" % getSafeExString(ex)
raise SqlmapConnectionException(errMsg)
gpage = conf.googlePage if conf.googlePage > 1 else 1
logger.info("using search result page #%d" % gpage)
url = "https://www.google.com/search?"
url += "q=%s&" % urlencode(dork, convall=True)
url += "num=100&hl=en&complete=0&safe=off&filter=0&btnG=Search"
url += "&start=%d" % ((gpage - 1) * 100)
try:
req = urllib2.Request(url, headers=headers)
conn = urllib2.urlopen(req)
requestMsg = "HTTP request:\nGET %s" % url
requestMsg += " %s" % httplib.HTTPConnection._http_vsn_str
logger.log(CUSTOM_LOGGING.TRAFFIC_OUT, requestMsg)
page = conn.read()
code = conn.code
status = conn.msg
responseHeaders = conn.info()
page = decodePage(page, responseHeaders.get("Content-Encoding"), responseHeaders.get("Content-Type"))
responseMsg = "HTTP response (%s - %d):\n" % (status, code)
if conf.verbose <= 4:
responseMsg += getUnicode(responseHeaders, UNICODE_ENCODING)
elif conf.verbose > 4:
responseMsg += "%s\n%s\n" % (responseHeaders, page)
logger.log(CUSTOM_LOGGING.TRAFFIC_IN, responseMsg)
except urllib2.HTTPError, e:
try:
page = e.read()
except Exception, ex:
warnMsg = "problem occurred while trying to get "
warnMsg += "an error page information (%s)" % getSafeExString(ex)
logger.critical(warnMsg)
return None
except (urllib2.URLError, httplib.error, socket.error, socket.timeout, socks.ProxyError):
errMsg = "unable to connect to Google"
raise SqlmapConnectionException(errMsg)
retVal = [urllib.unquote(match.group(1) or match.group(2)) for match in re.finditer(GOOGLE_REGEX, page, re.I)]
if not retVal and "detected unusual traffic" in page:
warnMsg = "Google has detected 'unusual' traffic from "
warnMsg += "used IP address disabling further searches"
logger.warn(warnMsg)
if not retVal:
message = "no usable links found. What do you want to do?"
message += "\n[1] (re)try with DuckDuckGo (default)"
message += "\n[2] (re)try with Disconnect Search"
message += "\n[3] quit"
choice = readInput(message, default="1").strip().upper()
if choice == "Q":
raise SqlmapUserQuitException
elif choice == "2":
url = "https://search.disconnect.me/searchTerms/search?"
url += "start=nav&option=Web"
url += "&query=%s" % urlencode(dork, convall=True)
url += "&ses=Google&location_option=US"
url += "&nextDDG=%s" % urlencode("/search?q=%s&setmkt=en-US&setplang=en-us&setlang=en-us&first=%d&FORM=PORE" % (urlencode(dork, convall=True), (gpage - 1) * 10), convall=True)
url += "&sa=N&showIcons=false&filterIcons=none&js_enabled=1"
regex = DISCONNECT_SEARCH_REGEX
else:
url = "https://duckduckgo.com/d.js?"
url += "q=%s&p=%d&s=100" % (urlencode(dork, convall=True), gpage)
regex = DUCKDUCKGO_REGEX
try:
req = urllib2.Request(url, headers=headers)
conn = urllib2.urlopen(req)
requestMsg = "HTTP request:\nGET %s" % url
requestMsg += " %s" % httplib.HTTPConnection._http_vsn_str
logger.log(CUSTOM_LOGGING.TRAFFIC_OUT, requestMsg)
page = conn.read()
code = conn.code
status = conn.msg
responseHeaders = conn.info()
page = decodePage(page, responseHeaders.get("Content-Encoding"), responseHeaders.get("Content-Type"))
responseMsg = "HTTP response (%s - %d):\n" % (status, code)
if conf.verbose <= 4:
responseMsg += getUnicode(responseHeaders, UNICODE_ENCODING)
elif conf.verbose > 4:
responseMsg += "%s\n%s\n" % (responseHeaders, page)
logger.log(CUSTOM_LOGGING.TRAFFIC_IN, responseMsg)
except urllib2.HTTPError, e:
try:
page = e.read()
except socket.timeout:
warnMsg = "connection timed out while trying "
warnMsg += "to get error page information (%d)" % e.code
logger.critical(warnMsg)
return None
except:
errMsg = "unable to connect"
raise SqlmapConnectionException(errMsg)
retVal = [urllib.unquote(match.group(1)) for match in re.finditer(regex, page, re.I | re.S)]
return retVal
def search(dork):
pushValue(kb.redirectChoice)
kb.redirectChoice = REDIRECTION.YES
try:
return _search(dork)
except SqlmapBaseException, ex:
if conf.proxyList:
logger.critical(getSafeExString(ex))
warnMsg = "changing proxy"
logger.warn(warnMsg)
conf.proxy = None
setHTTPHandlers()
return search(dork)
else:
raise
finally:
kb.redirectChoice = popValue()
def setHTTPHandlers(): # Cross-linked function
raise NotImplementedError