sqlmap/extra/sqlharvest/sqlharvest.py

142 lines
4.2 KiB
Python
Raw Normal View History

#!/usr/bin/env python
2010-10-29 13:59:18 +04:00
"""
2017-01-02 16:19:18 +03:00
Copyright (c) 2006-2017 sqlmap developers (http://sqlmap.org/)
2017-10-11 15:50:46 +03:00
See the file 'LICENSE' for copying permission
2010-10-29 13:59:18 +04:00
"""
import cookielib
import re
import socket
import sys
import urllib
import urllib2
import ConfigParser
from operator import itemgetter
TIMEOUT = 10
CONFIG_FILE = 'sqlharvest.cfg'
TABLES_FILE = 'tables.txt'
USER_AGENT = 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; AskTB5.3)'
SEARCH_URL = 'http://www.google.com/m?source=mobileproducts&dc=gorganic'
2013-01-10 16:18:44 +04:00
MAX_FILE_SIZE = 2 * 1024 * 1024 # if a result (.sql) file for downloading is more than 2MB in size just skip it
QUERY = 'CREATE TABLE ext:sql'
REGEX_URLS = r';u=([^"]+?)&q='
REGEX_RESULT = r'(?i)CREATE TABLE\s*(/\*.*\*/)?\s*(IF NOT EXISTS)?\s*(?P<result>[^\(;]+)'
2010-10-29 13:59:18 +04:00
def main():
2010-10-29 13:59:18 +04:00
tables = dict()
cookies = cookielib.CookieJar()
cookie_processor = urllib2.HTTPCookieProcessor(cookies)
opener = urllib2.build_opener(cookie_processor)
opener.addheaders = [("User-Agent", USER_AGENT)]
2010-10-29 13:59:18 +04:00
conn = opener.open(SEARCH_URL)
2013-01-10 16:18:44 +04:00
page = conn.read() # set initial cookie values
2010-10-29 13:59:18 +04:00
config = ConfigParser.ConfigParser()
config.read(CONFIG_FILE)
if not config.has_section("options"):
config.add_section("options")
if not config.has_option("options", "index"):
config.set("options", "index", "0")
2010-10-29 13:59:18 +04:00
2013-01-10 18:02:28 +04:00
i = int(config.get("options", "index"))
2010-10-29 13:59:18 +04:00
try:
with open(TABLES_FILE, 'r') as f:
for line in f.xreadlines():
if len(line) > 0 and ',' in line:
temp = line.split(',')
tables[temp[0]] = int(temp[1])
2010-10-29 13:59:18 +04:00
except:
pass
2010-10-29 14:03:44 +04:00
socket.setdefaulttimeout(TIMEOUT)
2010-10-29 13:59:18 +04:00
files, old_files = None, None
2010-10-29 13:59:18 +04:00
try:
while True:
2011-01-15 16:43:08 +03:00
abort = False
old_files = files
2010-10-29 13:59:18 +04:00
files = []
try:
conn = opener.open("%s&q=%s&start=%d&sa=N" % (SEARCH_URL, QUERY.replace(' ', '+'), i * 10))
2010-10-29 13:59:18 +04:00
page = conn.read()
for match in re.finditer(REGEX_URLS, page):
2010-10-29 13:59:18 +04:00
files.append(urllib.unquote(match.group(1)))
if len(files) >= 10:
break
abort = (files == old_files)
2010-10-29 13:59:18 +04:00
except KeyboardInterrupt:
raise
except Exception, msg:
print msg
2011-01-15 16:43:08 +03:00
if abort:
2010-10-29 13:59:18 +04:00
break
sys.stdout.write("\n---------------\n")
2013-01-10 16:18:44 +04:00
sys.stdout.write("Result page #%d\n" % (i + 1))
2010-10-29 13:59:18 +04:00
sys.stdout.write("---------------\n")
2011-01-15 16:43:08 +03:00
for sqlfile in files:
print sqlfile
2010-10-29 13:59:18 +04:00
try:
2011-01-15 16:43:08 +03:00
req = urllib2.Request(sqlfile)
2010-10-29 13:59:18 +04:00
response = urllib2.urlopen(req)
if "Content-Length" in response.headers:
if int(response.headers.get("Content-Length")) > MAX_FILE_SIZE:
2010-10-29 13:59:18 +04:00
continue
page = response.read()
found = False
counter = 0
for match in re.finditer(REGEX_RESULT, page):
2010-10-29 13:59:18 +04:00
counter += 1
table = match.group("result").strip().strip("`\"'").replace('"."', ".").replace("].[", ".").strip('[]')
2010-10-29 13:59:18 +04:00
if table and not any(_ in table for _ in ('>', '<', '--', ' ')):
2010-10-29 13:59:18 +04:00
found = True
sys.stdout.write('*')
if table in tables:
tables[table] += 1
else:
tables[table] = 1
if found:
sys.stdout.write("\n")
except KeyboardInterrupt:
raise
except Exception, msg:
print msg
else:
i += 1
except KeyboardInterrupt:
pass
finally:
with open(TABLES_FILE, 'w+') as f:
tables = sorted(tables.items(), key=itemgetter(1), reverse=True)
for table, count in tables:
f.write("%s,%d\n" % (table, count))
config.set("options", "index", str(i + 1))
with open(CONFIG_FILE, 'w+') as f:
config.write(f)
2010-10-29 13:59:18 +04:00
if __name__ == "__main__":
main()