sqlmap/lib/utils/search.py

214 lines
7.8 KiB
Python
Raw Permalink Normal View History

2019-05-08 13:47:52 +03:00
#!/usr/bin/env python
"""
2023-01-03 01:24:59 +03:00
Copyright (c) 2006-2023 sqlmap developers (https://sqlmap.org/)
2017-10-11 15:50:46 +03:00
See the file 'LICENSE' for copying permission
"""
import re
import socket
from lib.core.common import getSafeExString
from lib.core.common import popValue
from lib.core.common import pushValue
from lib.core.common import readInput
from lib.core.common import urlencode
2020-01-16 00:38:34 +03:00
from lib.core.convert import getBytes
2019-05-06 01:54:21 +03:00
from lib.core.convert import getUnicode
from lib.core.data import conf
from lib.core.data import kb
from lib.core.data import logger
from lib.core.decorators import stackedmethod
from lib.core.enums import CUSTOM_LOGGING
from lib.core.enums import HTTP_HEADER
from lib.core.enums import REDIRECTION
from lib.core.exception import SqlmapBaseException
from lib.core.exception import SqlmapConnectionException
from lib.core.exception import SqlmapUserQuitException
from lib.core.settings import BING_REGEX
from lib.core.settings import DUCKDUCKGO_REGEX
2019-06-04 15:44:06 +03:00
from lib.core.settings import DUMMY_SEARCH_USER_AGENT
2021-04-28 19:06:15 +03:00
from lib.core.settings import GOOGLE_CONSENT_COOKIE
from lib.core.settings import GOOGLE_REGEX
from lib.core.settings import HTTP_ACCEPT_ENCODING_HEADER_VALUE
from lib.core.settings import UNICODE_ENCODING
from lib.request.basic import decodePage
from thirdparty.six.moves import http_client as _http_client
from thirdparty.six.moves import urllib as _urllib
from thirdparty.socks import socks
def _search(dork):
"""
This method performs the effective search on Google providing
the google dork and the Google session cookie
"""
if not dork:
return None
2019-06-04 11:55:07 +03:00
page = None
2019-01-17 17:06:00 +03:00
data = None
2019-06-04 11:55:07 +03:00
requestHeaders = {}
responseHeaders = {}
2019-06-04 11:55:07 +03:00
requestHeaders[HTTP_HEADER.USER_AGENT] = dict(conf.httpHeaders).get(HTTP_HEADER.USER_AGENT, DUMMY_SEARCH_USER_AGENT)
requestHeaders[HTTP_HEADER.ACCEPT_ENCODING] = HTTP_ACCEPT_ENCODING_HEADER_VALUE
2021-04-28 19:06:15 +03:00
requestHeaders[HTTP_HEADER.COOKIE] = GOOGLE_CONSENT_COOKIE
try:
2019-06-04 11:55:07 +03:00
req = _urllib.request.Request("https://www.google.com/ncr", headers=requestHeaders)
conn = _urllib.request.urlopen(req)
2019-01-22 02:40:48 +03:00
except Exception as ex:
errMsg = "unable to connect to Google ('%s')" % getSafeExString(ex)
raise SqlmapConnectionException(errMsg)
gpage = conf.googlePage if conf.googlePage > 1 else 1
logger.info("using search result page #%d" % gpage)
2021-04-28 19:06:15 +03:00
url = "https://www.google.com/search?" # NOTE: if consent fails, try to use the "http://"
url += "q=%s&" % urlencode(dork, convall=True)
url += "num=100&hl=en&complete=0&safe=off&filter=0&btnG=Search"
url += "&start=%d" % ((gpage - 1) * 100)
try:
2019-06-04 11:55:07 +03:00
req = _urllib.request.Request(url, headers=requestHeaders)
conn = _urllib.request.urlopen(req)
requestMsg = "HTTP request:\nGET %s" % url
2019-03-28 13:22:38 +03:00
requestMsg += " %s" % _http_client.HTTPConnection._http_vsn_str
logger.log(CUSTOM_LOGGING.TRAFFIC_OUT, requestMsg)
page = conn.read()
code = conn.code
status = conn.msg
responseHeaders = conn.info()
responseMsg = "HTTP response (%s - %d):\n" % (status, code)
if conf.verbose <= 4:
responseMsg += getUnicode(responseHeaders, UNICODE_ENCODING)
elif conf.verbose > 4:
responseMsg += "%s\n%s\n" % (responseHeaders, page)
logger.log(CUSTOM_LOGGING.TRAFFIC_IN, responseMsg)
except _urllib.error.HTTPError as ex:
try:
page = ex.read()
2019-06-04 11:55:07 +03:00
responseHeaders = ex.info()
except Exception as _:
warnMsg = "problem occurred while trying to get "
warnMsg += "an error page information (%s)" % getSafeExString(_)
logger.critical(warnMsg)
return None
except (_urllib.error.URLError, _http_client.error, socket.error, socket.timeout, socks.ProxyError):
errMsg = "unable to connect to Google"
raise SqlmapConnectionException(errMsg)
2019-06-04 11:55:07 +03:00
page = decodePage(page, responseHeaders.get(HTTP_HEADER.CONTENT_ENCODING), responseHeaders.get(HTTP_HEADER.CONTENT_TYPE))
2023-08-01 12:45:20 +03:00
page = getUnicode(page) # Note: if decodePage call fails (Issue #4202)
2020-05-13 13:53:58 +03:00
retVal = [_urllib.parse.unquote(match.group(1) or match.group(2)) for match in re.finditer(GOOGLE_REGEX, page, re.I)]
if not retVal and "detected unusual traffic" in page:
warnMsg = "Google has detected 'unusual' traffic from "
warnMsg += "used IP address disabling further searches"
if conf.proxyList:
raise SqlmapBaseException(warnMsg)
else:
logger.critical(warnMsg)
if not retVal:
message = "no usable links found. What do you want to do?"
message += "\n[1] (re)try with DuckDuckGo (default)"
message += "\n[2] (re)try with Bing"
message += "\n[3] quit"
2017-04-19 15:46:27 +03:00
choice = readInput(message, default='1')
if choice == '3':
raise SqlmapUserQuitException
elif choice == '2':
url = "https://www.bing.com/search?q=%s&first=%d" % (urlencode(dork, convall=True), (gpage - 1) * 10 + 1)
regex = BING_REGEX
2017-11-06 12:00:29 +03:00
else:
2020-12-10 16:22:44 +03:00
url = "https://html.duckduckgo.com/html/"
2019-01-17 17:06:00 +03:00
data = "q=%s&s=%d" % (urlencode(dork, convall=True), (gpage - 1) * 30)
regex = DUCKDUCKGO_REGEX
try:
2020-01-16 00:38:34 +03:00
req = _urllib.request.Request(url, data=getBytes(data), headers=requestHeaders)
conn = _urllib.request.urlopen(req)
requestMsg = "HTTP request:\nGET %s" % url
2019-03-28 13:22:38 +03:00
requestMsg += " %s" % _http_client.HTTPConnection._http_vsn_str
logger.log(CUSTOM_LOGGING.TRAFFIC_OUT, requestMsg)
page = conn.read()
code = conn.code
status = conn.msg
responseHeaders = conn.info()
page = decodePage(page, responseHeaders.get("Content-Encoding"), responseHeaders.get("Content-Type"))
responseMsg = "HTTP response (%s - %d):\n" % (status, code)
if conf.verbose <= 4:
responseMsg += getUnicode(responseHeaders, UNICODE_ENCODING)
elif conf.verbose > 4:
responseMsg += "%s\n%s\n" % (responseHeaders, page)
logger.log(CUSTOM_LOGGING.TRAFFIC_IN, responseMsg)
except _urllib.error.HTTPError as ex:
try:
page = ex.read()
page = decodePage(page, ex.headers.get("Content-Encoding"), ex.headers.get("Content-Type"))
except socket.timeout:
warnMsg = "connection timed out while trying "
warnMsg += "to get error page information (%d)" % ex.code
logger.critical(warnMsg)
return None
except:
errMsg = "unable to connect"
raise SqlmapConnectionException(errMsg)
2023-08-01 12:45:20 +03:00
page = getUnicode(page) # Note: if decodePage call fails (Issue #4202)
retVal = [_urllib.parse.unquote(match.group(1).replace("&amp;", "&")) for match in re.finditer(regex, page, re.I | re.S)]
2019-01-17 17:06:00 +03:00
if not retVal and "issue with the Tor Exit Node you are currently using" in page:
warnMsg = "DuckDuckGo has detected 'unusual' traffic from "
warnMsg += "used (Tor) IP address"
if conf.proxyList:
raise SqlmapBaseException(warnMsg)
else:
logger.critical(warnMsg)
return retVal
@stackedmethod
def search(dork):
2021-01-12 15:21:51 +03:00
pushValue(kb.choices.redirect)
kb.choices.redirect = REDIRECTION.YES
try:
return _search(dork)
2019-01-22 02:40:48 +03:00
except SqlmapBaseException as ex:
if conf.proxyList:
logger.critical(getSafeExString(ex))
warnMsg = "changing proxy"
logger.warning(warnMsg)
conf.proxy = None
setHTTPHandlers()
return search(dork)
else:
raise
finally:
2021-01-12 15:21:51 +03:00
kb.choices.redirect = popValue()
2018-03-21 16:29:54 +03:00
def setHTTPHandlers(): # Cross-referenced function
raise NotImplementedError