#!/usr/bin/env python """ Copyright (c) 2006-2014 sqlmap developers (http://sqlmap.org/) See the file 'doc/COPYING' for copying permission """ import codecs import httplib import os import re import urlparse import tempfile import time from lib.core.common import clearConsoleLine from lib.core.common import dataToStdout from lib.core.common import findPageForms from lib.core.common import readInput from lib.core.common import safeCSValue from lib.core.common import singleTimeWarnMessage from lib.core.data import conf from lib.core.data import kb from lib.core.data import logger from lib.core.exception import SqlmapConnectionException from lib.core.settings import CRAWL_EXCLUDE_EXTENSIONS from lib.core.settings import UNICODE_ENCODING from lib.core.threads import getCurrentThreadData from lib.core.threads import runThreads from lib.request.connect import Connect as Request from thirdparty.beautifulsoup.beautifulsoup import BeautifulSoup from thirdparty.oset.pyoset import oset def crawl(target): try: visited = set() threadData = getCurrentThreadData() threadData.shared.value = oset() def crawlThread(): threadData = getCurrentThreadData() while kb.threadContinue: with kb.locks.limit: if threadData.shared.unprocessed: current = threadData.shared.unprocessed.pop() if current in visited: continue else: visited.add(current) else: break content = None try: if current: content = Request.getPage(url=current, crawling=True, raise404=False)[0] except SqlmapConnectionException, e: errMsg = "connection exception detected (%s). skipping " % e errMsg += "URL '%s'" % current logger.critical(errMsg) except httplib.InvalidURL, e: errMsg = "invalid URL detected (%s). skipping " % e errMsg += "URL '%s'" % current logger.critical(errMsg) if not kb.threadContinue: break if isinstance(content, unicode): try: match = re.search(r"(?si)]*>(.+)", content) if match: content = "%s" % match.group(1) soup = BeautifulSoup(content) tags = soup('a') if not tags: tags = re.finditer(r'(?si)]+href="(?P[^>"]+)"', content) for tag in tags: href = tag.get("href") if hasattr(tag, "get") else tag.group("href") if href: if threadData.lastRedirectURL and threadData.lastRedirectURL[0] == threadData.lastRequestUID: current = threadData.lastRedirectURL[1] url = urlparse.urljoin(current, href) # flag to know if we are dealing with the same target host _ = reduce(lambda x, y: x == y, map(lambda x: urlparse.urlparse(x).netloc.split(':')[0], (url, target))) if conf.scope: if not re.search(conf.scope, url, re.I): continue elif not _: continue if url.split('.')[-1].lower() not in CRAWL_EXCLUDE_EXTENSIONS: with kb.locks.value: threadData.shared.deeper.add(url) if re.search(r"(.*?)\?(.+)", url): threadData.shared.value.add(url) except UnicodeEncodeError: # for non-HTML files pass finally: if conf.forms: findPageForms(content, current, False, True) if conf.verbose in (1, 2): threadData.shared.count += 1 status = '%d/%d links visited (%d%%)' % (threadData.shared.count, threadData.shared.length, round(100.0 * threadData.shared.count / threadData.shared.length)) dataToStdout("\r[%s] [INFO] %s" % (time.strftime("%X"), status), True) threadData.shared.deeper = set() threadData.shared.unprocessed = set([target]) infoMsg = "starting crawler" if conf.bulkFile: infoMsg += " for target URL '%s'" % target logger.info(infoMsg) for i in xrange(conf.crawlDepth): threadData.shared.count = 0 threadData.shared.length = len(threadData.shared.unprocessed) numThreads = min(conf.threads, len(threadData.shared.unprocessed)) if not conf.bulkFile: logger.info("searching for links with depth %d" % (i + 1)) runThreads(numThreads, crawlThread, threadChoice=(i>0)) clearConsoleLine(True) if threadData.shared.deeper: threadData.shared.unprocessed = set(threadData.shared.deeper) else: break except KeyboardInterrupt: warnMsg = "user aborted during crawling. sqlmap " warnMsg += "will use partial list" logger.warn(warnMsg) finally: clearConsoleLine(True) if not threadData.shared.value: warnMsg = "no usable links found (with GET parameters)" logger.warn(warnMsg) else: for url in threadData.shared.value: kb.targets.add((url, None, None, None, None)) storeResultsToFile(kb.targets) def storeResultsToFile(results): if not results: return if kb.storeCrawlingChoice is None: message = "do you want to store crawling results to a temporary file " message += "for eventual further processing with other tools [y/N] " test = readInput(message, default="N") kb.storeCrawlingChoice = test[0] in ("y", "Y") if kb.storeCrawlingChoice: handle, filename = tempfile.mkstemp(prefix="sqlmapcrawling-", suffix=".csv" if conf.forms else ".txt") os.close(handle) infoMsg = "writing crawling results to a temporary file '%s' " % filename logger.info(infoMsg) with codecs.open(filename, "w+b", UNICODE_ENCODING) as f: if conf.forms: f.write("URL,POST\n") for url, _, data, _, _ in results: if conf.forms: f.write("%s,%s\n" % (safeCSValue(url), safeCSValue(data or ""))) else: f.write("%s\n" % url)