2013-02-14 15:32:17 +04:00
|
|
|
#!/usr/bin/env python
|
2011-06-20 15:32:30 +04:00
|
|
|
|
|
|
|
"""
|
2017-01-02 16:19:18 +03:00
|
|
|
Copyright (c) 2006-2017 sqlmap developers (http://sqlmap.org/)
|
2017-10-11 15:50:46 +03:00
|
|
|
See the file 'LICENSE' for copying permission
|
2011-06-20 15:32:30 +04:00
|
|
|
"""
|
|
|
|
|
2011-07-11 12:54:39 +04:00
|
|
|
import httplib
|
2014-11-20 18:29:17 +03:00
|
|
|
import os
|
2011-06-20 15:32:30 +04:00
|
|
|
import re
|
|
|
|
import urlparse
|
2014-11-20 18:29:17 +03:00
|
|
|
import tempfile
|
2011-06-20 18:27:24 +04:00
|
|
|
import time
|
2011-06-20 15:32:30 +04:00
|
|
|
|
2016-12-20 11:53:44 +03:00
|
|
|
from lib.core.common import checkSameHost
|
2011-06-20 18:27:24 +04:00
|
|
|
from lib.core.common import clearConsoleLine
|
2011-06-20 15:32:30 +04:00
|
|
|
from lib.core.common import dataToStdout
|
2011-10-29 13:32:20 +04:00
|
|
|
from lib.core.common import findPageForms
|
2015-11-08 01:30:24 +03:00
|
|
|
from lib.core.common import getSafeExString
|
2014-11-26 15:38:21 +03:00
|
|
|
from lib.core.common import openFile
|
2014-11-20 18:29:17 +03:00
|
|
|
from lib.core.common import readInput
|
|
|
|
from lib.core.common import safeCSValue
|
2017-04-24 00:50:30 +03:00
|
|
|
from lib.core.common import urldecode
|
2011-06-20 15:32:30 +04:00
|
|
|
from lib.core.data import conf
|
|
|
|
from lib.core.data import kb
|
|
|
|
from lib.core.data import logger
|
2016-05-31 14:02:26 +03:00
|
|
|
from lib.core.enums import MKSTEMP_PREFIX
|
2012-12-06 17:14:19 +04:00
|
|
|
from lib.core.exception import SqlmapConnectionException
|
2015-10-05 17:33:10 +03:00
|
|
|
from lib.core.exception import SqlmapSyntaxException
|
2011-06-21 02:41:38 +04:00
|
|
|
from lib.core.settings import CRAWL_EXCLUDE_EXTENSIONS
|
2011-06-20 15:32:30 +04:00
|
|
|
from lib.core.threads import getCurrentThreadData
|
|
|
|
from lib.core.threads import runThreads
|
2015-01-20 12:03:35 +03:00
|
|
|
from lib.parse.sitemap import parseSitemap
|
2011-06-20 15:32:30 +04:00
|
|
|
from lib.request.connect import Connect as Request
|
2012-07-14 19:01:04 +04:00
|
|
|
from thirdparty.beautifulsoup.beautifulsoup import BeautifulSoup
|
|
|
|
from thirdparty.oset.pyoset import oset
|
2011-06-20 15:32:30 +04:00
|
|
|
|
2013-01-09 18:22:21 +04:00
|
|
|
def crawl(target):
|
|
|
|
try:
|
2013-04-30 20:32:46 +04:00
|
|
|
visited = set()
|
2013-01-09 18:22:21 +04:00
|
|
|
threadData = getCurrentThreadData()
|
|
|
|
threadData.shared.value = oset()
|
|
|
|
|
|
|
|
def crawlThread():
|
2011-06-20 15:32:30 +04:00
|
|
|
threadData = getCurrentThreadData()
|
|
|
|
|
2013-01-09 18:22:21 +04:00
|
|
|
while kb.threadContinue:
|
|
|
|
with kb.locks.limit:
|
|
|
|
if threadData.shared.unprocessed:
|
|
|
|
current = threadData.shared.unprocessed.pop()
|
2013-04-30 20:32:46 +04:00
|
|
|
if current in visited:
|
|
|
|
continue
|
2015-04-06 23:07:22 +03:00
|
|
|
elif conf.crawlExclude and re.search(conf.crawlExclude, current):
|
|
|
|
dbgMsg = "skipping '%s'" % current
|
|
|
|
logger.debug(dbgMsg)
|
|
|
|
continue
|
2013-04-30 20:32:46 +04:00
|
|
|
else:
|
|
|
|
visited.add(current)
|
2013-01-09 18:22:21 +04:00
|
|
|
else:
|
|
|
|
break
|
2011-06-20 15:32:30 +04:00
|
|
|
|
2013-01-09 18:22:21 +04:00
|
|
|
content = None
|
|
|
|
try:
|
|
|
|
if current:
|
|
|
|
content = Request.getPage(url=current, crawling=True, raise404=False)[0]
|
2015-10-05 17:33:10 +03:00
|
|
|
except SqlmapConnectionException, ex:
|
2017-02-15 12:30:29 +03:00
|
|
|
errMsg = "connection exception detected (%s). skipping " % getSafeExString(ex)
|
2013-04-09 13:48:42 +04:00
|
|
|
errMsg += "URL '%s'" % current
|
2013-01-09 18:22:21 +04:00
|
|
|
logger.critical(errMsg)
|
2015-10-05 17:33:10 +03:00
|
|
|
except SqlmapSyntaxException:
|
|
|
|
errMsg = "invalid URL detected. skipping '%s'" % current
|
|
|
|
logger.critical(errMsg)
|
|
|
|
except httplib.InvalidURL, ex:
|
2017-02-15 12:30:29 +03:00
|
|
|
errMsg = "invalid URL detected (%s). skipping " % getSafeExString(ex)
|
2013-04-09 13:48:42 +04:00
|
|
|
errMsg += "URL '%s'" % current
|
2013-01-09 18:22:21 +04:00
|
|
|
logger.critical(errMsg)
|
|
|
|
|
|
|
|
if not kb.threadContinue:
|
|
|
|
break
|
2011-06-20 15:32:30 +04:00
|
|
|
|
2013-01-09 18:22:21 +04:00
|
|
|
if isinstance(content, unicode):
|
2011-06-21 01:47:03 +04:00
|
|
|
try:
|
2013-01-09 18:22:21 +04:00
|
|
|
match = re.search(r"(?si)<html[^>]*>(.+)</html>", content)
|
|
|
|
if match:
|
|
|
|
content = "<html>%s</html>" % match.group(1)
|
2011-06-20 15:32:30 +04:00
|
|
|
|
2013-01-09 18:22:21 +04:00
|
|
|
soup = BeautifulSoup(content)
|
|
|
|
tags = soup('a')
|
2012-12-27 23:55:37 +04:00
|
|
|
|
2013-01-09 18:22:21 +04:00
|
|
|
if not tags:
|
2017-04-14 14:08:51 +03:00
|
|
|
tags = re.finditer(r'(?i)<a[^>]+href="(?P<href>[^>"]+)"', content)
|
2012-12-27 23:55:37 +04:00
|
|
|
|
2013-01-09 18:22:21 +04:00
|
|
|
for tag in tags:
|
|
|
|
href = tag.get("href") if hasattr(tag, "get") else tag.group("href")
|
2012-12-27 23:55:37 +04:00
|
|
|
|
2013-01-09 18:22:21 +04:00
|
|
|
if href:
|
2013-04-30 20:08:26 +04:00
|
|
|
if threadData.lastRedirectURL and threadData.lastRedirectURL[0] == threadData.lastRequestUID:
|
|
|
|
current = threadData.lastRedirectURL[1]
|
2013-04-30 18:40:16 +04:00
|
|
|
url = urlparse.urljoin(current, href)
|
2012-12-27 23:55:37 +04:00
|
|
|
|
2013-01-09 18:22:21 +04:00
|
|
|
# flag to know if we are dealing with the same target host
|
2016-12-20 11:53:44 +03:00
|
|
|
_ = checkSameHost(url, target)
|
2012-02-22 14:40:11 +04:00
|
|
|
|
2013-01-09 18:22:21 +04:00
|
|
|
if conf.scope:
|
|
|
|
if not re.search(conf.scope, url, re.I):
|
2011-06-21 01:18:12 +04:00
|
|
|
continue
|
2013-01-09 18:22:21 +04:00
|
|
|
elif not _:
|
|
|
|
continue
|
|
|
|
|
|
|
|
if url.split('.')[-1].lower() not in CRAWL_EXCLUDE_EXTENSIONS:
|
|
|
|
with kb.locks.value:
|
|
|
|
threadData.shared.deeper.add(url)
|
|
|
|
if re.search(r"(.*?)\?(.+)", url):
|
|
|
|
threadData.shared.value.add(url)
|
2013-01-10 16:18:44 +04:00
|
|
|
except UnicodeEncodeError: # for non-HTML files
|
2013-01-09 18:22:21 +04:00
|
|
|
pass
|
2017-07-05 14:51:48 +03:00
|
|
|
except ValueError: # for non-valid links
|
|
|
|
pass
|
2013-01-09 18:22:21 +04:00
|
|
|
finally:
|
|
|
|
if conf.forms:
|
|
|
|
findPageForms(content, current, False, True)
|
|
|
|
|
|
|
|
if conf.verbose in (1, 2):
|
|
|
|
threadData.shared.count += 1
|
2013-01-09 19:10:26 +04:00
|
|
|
status = '%d/%d links visited (%d%%)' % (threadData.shared.count, threadData.shared.length, round(100.0 * threadData.shared.count / threadData.shared.length))
|
2013-01-09 18:22:21 +04:00
|
|
|
dataToStdout("\r[%s] [INFO] %s" % (time.strftime("%X"), status), True)
|
|
|
|
|
|
|
|
threadData.shared.deeper = set()
|
|
|
|
threadData.shared.unprocessed = set([target])
|
|
|
|
|
2015-01-20 12:03:35 +03:00
|
|
|
if not conf.sitemapUrl:
|
|
|
|
message = "do you want to check for the existence of "
|
2015-04-14 10:30:01 +03:00
|
|
|
message += "site's sitemap(.xml) [y/N] "
|
2017-04-18 16:48:05 +03:00
|
|
|
|
|
|
|
if readInput(message, default='N', boolean=True):
|
2015-11-08 01:30:24 +03:00
|
|
|
found = True
|
2015-01-20 12:03:35 +03:00
|
|
|
items = None
|
2015-04-14 12:05:17 +03:00
|
|
|
url = urlparse.urljoin(target, "/sitemap.xml")
|
2015-01-20 12:03:35 +03:00
|
|
|
try:
|
|
|
|
items = parseSitemap(url)
|
2015-11-08 01:30:24 +03:00
|
|
|
except SqlmapConnectionException, ex:
|
|
|
|
if "page not found" in getSafeExString(ex):
|
|
|
|
found = False
|
|
|
|
logger.warn("'sitemap.xml' not found")
|
2015-01-20 12:03:35 +03:00
|
|
|
except:
|
|
|
|
pass
|
|
|
|
finally:
|
2015-11-08 01:30:24 +03:00
|
|
|
if found:
|
|
|
|
if items:
|
|
|
|
for item in items:
|
|
|
|
if re.search(r"(.*?)\?(.+)", item):
|
|
|
|
threadData.shared.value.add(item)
|
|
|
|
if conf.crawlDepth > 1:
|
|
|
|
threadData.shared.unprocessed.update(items)
|
|
|
|
logger.info("%s links found" % ("no" if not items else len(items)))
|
2015-01-20 12:03:35 +03:00
|
|
|
|
2013-04-09 13:36:33 +04:00
|
|
|
infoMsg = "starting crawler"
|
|
|
|
if conf.bulkFile:
|
|
|
|
infoMsg += " for target URL '%s'" % target
|
|
|
|
logger.info(infoMsg)
|
2013-01-09 18:22:21 +04:00
|
|
|
|
|
|
|
for i in xrange(conf.crawlDepth):
|
|
|
|
threadData.shared.count = 0
|
|
|
|
threadData.shared.length = len(threadData.shared.unprocessed)
|
|
|
|
numThreads = min(conf.threads, len(threadData.shared.unprocessed))
|
2013-04-09 13:36:33 +04:00
|
|
|
|
|
|
|
if not conf.bulkFile:
|
|
|
|
logger.info("searching for links with depth %d" % (i + 1))
|
|
|
|
|
2014-10-10 14:09:08 +04:00
|
|
|
runThreads(numThreads, crawlThread, threadChoice=(i>0))
|
2013-01-09 18:22:21 +04:00
|
|
|
clearConsoleLine(True)
|
2013-04-09 13:36:33 +04:00
|
|
|
|
2013-01-09 18:22:21 +04:00
|
|
|
if threadData.shared.deeper:
|
|
|
|
threadData.shared.unprocessed = set(threadData.shared.deeper)
|
|
|
|
else:
|
|
|
|
break
|
2012-02-22 14:40:11 +04:00
|
|
|
|
2013-01-09 18:22:21 +04:00
|
|
|
except KeyboardInterrupt:
|
|
|
|
warnMsg = "user aborted during crawling. sqlmap "
|
|
|
|
warnMsg += "will use partial list"
|
|
|
|
logger.warn(warnMsg)
|
2011-06-20 15:32:30 +04:00
|
|
|
|
2013-01-09 18:22:21 +04:00
|
|
|
finally:
|
|
|
|
clearConsoleLine(True)
|
2011-06-20 18:27:24 +04:00
|
|
|
|
2013-01-09 18:22:21 +04:00
|
|
|
if not threadData.shared.value:
|
|
|
|
warnMsg = "no usable links found (with GET parameters)"
|
|
|
|
logger.warn(warnMsg)
|
|
|
|
else:
|
|
|
|
for url in threadData.shared.value:
|
2017-04-24 00:50:30 +03:00
|
|
|
kb.targets.add((urldecode(url, kb.pageEncoding), None, None, None, None))
|
2014-11-20 18:29:17 +03:00
|
|
|
|
|
|
|
storeResultsToFile(kb.targets)
|
|
|
|
|
|
|
|
def storeResultsToFile(results):
|
|
|
|
if not results:
|
|
|
|
return
|
|
|
|
|
|
|
|
if kb.storeCrawlingChoice is None:
|
|
|
|
message = "do you want to store crawling results to a temporary file "
|
|
|
|
message += "for eventual further processing with other tools [y/N] "
|
2017-04-18 16:48:05 +03:00
|
|
|
|
|
|
|
kb.storeCrawlingChoice = readInput(message, default='N', boolean=True)
|
2014-11-20 18:29:17 +03:00
|
|
|
|
|
|
|
if kb.storeCrawlingChoice:
|
2016-05-31 14:02:26 +03:00
|
|
|
handle, filename = tempfile.mkstemp(prefix=MKSTEMP_PREFIX.CRAWLER, suffix=".csv" if conf.forms else ".txt")
|
2014-11-20 18:29:17 +03:00
|
|
|
os.close(handle)
|
|
|
|
|
|
|
|
infoMsg = "writing crawling results to a temporary file '%s' " % filename
|
|
|
|
logger.info(infoMsg)
|
|
|
|
|
2014-11-26 15:38:21 +03:00
|
|
|
with openFile(filename, "w+b") as f:
|
2014-11-20 18:29:17 +03:00
|
|
|
if conf.forms:
|
|
|
|
f.write("URL,POST\n")
|
|
|
|
|
|
|
|
for url, _, data, _, _ in results:
|
|
|
|
if conf.forms:
|
|
|
|
f.write("%s,%s\n" % (safeCSValue(url), safeCSValue(data or "")))
|
|
|
|
else:
|
|
|
|
f.write("%s\n" % url)
|