2011-06-20 15:32:30 +04:00
|
|
|
#!/usr/bin/env python
|
|
|
|
|
|
|
|
"""
|
|
|
|
$Id$
|
|
|
|
|
|
|
|
Copyright (c) 2006-2011 sqlmap developers (http://sqlmap.sourceforge.net/)
|
|
|
|
See the file 'doc/COPYING' for copying permission
|
|
|
|
"""
|
|
|
|
|
|
|
|
import re
|
|
|
|
import threading
|
|
|
|
import urlparse
|
2011-06-20 18:27:24 +04:00
|
|
|
import time
|
2011-06-20 15:32:30 +04:00
|
|
|
|
2011-06-20 18:27:24 +04:00
|
|
|
from lib.core.common import clearConsoleLine
|
2011-06-20 15:32:30 +04:00
|
|
|
from lib.core.common import dataToStdout
|
|
|
|
from lib.core.data import conf
|
|
|
|
from lib.core.data import kb
|
|
|
|
from lib.core.data import logger
|
|
|
|
from lib.core.exception import sqlmapConnectionException
|
|
|
|
from lib.core.threads import getCurrentThreadData
|
|
|
|
from lib.core.threads import runThreads
|
|
|
|
from lib.request.connect import Connect as Request
|
|
|
|
from extra.beautifulsoup.beautifulsoup import BeautifulSoup
|
|
|
|
from extra.oset.pyoset import oset
|
|
|
|
|
|
|
|
class Crawler:
|
|
|
|
"""
|
|
|
|
This class defines methods used to perform crawling (command
|
|
|
|
line option '--crawl'
|
|
|
|
"""
|
|
|
|
|
|
|
|
def getTargetUrls(self, depth=1):
|
|
|
|
try:
|
|
|
|
threadData = getCurrentThreadData()
|
|
|
|
threadData.shared.outputs = oset()
|
|
|
|
|
2011-06-20 18:27:24 +04:00
|
|
|
lockNames = ('limits', 'outputs', 'ioLock')
|
2011-06-20 15:32:30 +04:00
|
|
|
for lock in lockNames:
|
|
|
|
kb.locks[lock] = threading.Lock()
|
|
|
|
|
|
|
|
def crawlThread():
|
|
|
|
threadData = getCurrentThreadData()
|
|
|
|
|
|
|
|
while kb.threadContinue:
|
|
|
|
kb.locks.limits.acquire()
|
|
|
|
if threadData.shared.unprocessed:
|
|
|
|
current = threadData.shared.unprocessed.pop()
|
|
|
|
kb.locks.limits.release()
|
|
|
|
else:
|
|
|
|
kb.locks.limits.release()
|
|
|
|
break
|
|
|
|
|
2011-06-21 01:18:12 +04:00
|
|
|
content = Request.getPage(url=current, raise404=False)[0]
|
2011-06-20 15:32:30 +04:00
|
|
|
|
|
|
|
if not kb.threadContinue:
|
|
|
|
break
|
|
|
|
|
2011-06-21 01:18:12 +04:00
|
|
|
if content:
|
|
|
|
soup = BeautifulSoup(content)
|
|
|
|
for tag in soup('a'):
|
|
|
|
if tag.get("href"):
|
|
|
|
url = urlparse.urljoin(conf.url, tag.get("href"))
|
2011-06-20 16:37:51 +04:00
|
|
|
|
2011-06-21 01:18:12 +04:00
|
|
|
# flag to know if we are dealing with the same target host
|
|
|
|
target = reduce(lambda x, y: x == y, map(lambda x: urlparse.urlparse(x).netloc.split(':')[0], [url, conf.url]))
|
2011-06-20 16:37:51 +04:00
|
|
|
|
2011-06-21 01:18:12 +04:00
|
|
|
if conf.scope:
|
|
|
|
if not re.search(conf.scope, url, re.I):
|
|
|
|
continue
|
|
|
|
elif not target:
|
2011-06-20 16:37:51 +04:00
|
|
|
continue
|
2011-06-21 01:18:12 +04:00
|
|
|
|
|
|
|
kb.locks.outputs.acquire()
|
|
|
|
threadData.shared.deeper.add(url)
|
|
|
|
if re.search(r"(.*?)\?(.+)", url):
|
|
|
|
threadData.shared.outputs.add(url)
|
|
|
|
kb.locks.outputs.release()
|
2011-06-20 15:32:30 +04:00
|
|
|
|
2011-06-20 18:27:24 +04:00
|
|
|
if conf.verbose in (1, 2):
|
|
|
|
kb.locks.ioLock.acquire()
|
|
|
|
threadData.shared.count += 1
|
|
|
|
status = '%d/%d links visited (%d%s)' % (threadData.shared.count, threadData.shared.length, round(100.0*threadData.shared.count/threadData.shared.length), '%')
|
|
|
|
dataToStdout("\r[%s] [INFO] %s" % (time.strftime("%X"), status), True)
|
|
|
|
kb.locks.ioLock.release()
|
|
|
|
|
2011-06-20 15:32:30 +04:00
|
|
|
threadData.shared.deeper = set()
|
|
|
|
threadData.shared.unprocessed = set([conf.url])
|
|
|
|
|
2011-06-20 15:46:23 +04:00
|
|
|
logger.info("starting crawler")
|
2011-06-20 15:32:30 +04:00
|
|
|
|
|
|
|
for i in xrange(depth):
|
2011-06-20 18:27:24 +04:00
|
|
|
threadData.shared.count = 0
|
|
|
|
threadData.shared.length = len(threadData.shared.unprocessed)
|
2011-06-20 15:32:30 +04:00
|
|
|
numThreads = min(conf.threads, len(threadData.shared.unprocessed))
|
2011-06-20 18:27:24 +04:00
|
|
|
logger.info("searching for links with depth %d" % (i + 1))
|
2011-06-20 15:32:30 +04:00
|
|
|
runThreads(numThreads, crawlThread)
|
2011-06-20 18:27:24 +04:00
|
|
|
clearConsoleLine(True)
|
2011-06-20 15:32:30 +04:00
|
|
|
threadData.shared.unprocessed = threadData.shared.deeper
|
|
|
|
|
|
|
|
except KeyboardInterrupt:
|
|
|
|
warnMsg = "user aborted during crawling. sqlmap "
|
|
|
|
warnMsg += "will use partial list"
|
|
|
|
logger.warn(warnMsg)
|
|
|
|
|
|
|
|
except sqlmapConnectionException, e:
|
|
|
|
errMsg = "connection exception detected. sqlmap "
|
|
|
|
errMsg += "will use partial list"
|
|
|
|
errMsg += "'%s'" % e
|
|
|
|
logger.critical(errMsg)
|
|
|
|
|
|
|
|
finally:
|
2011-06-20 18:27:24 +04:00
|
|
|
clearConsoleLine(True)
|
|
|
|
|
2011-06-20 17:53:39 +04:00
|
|
|
if not threadData.shared.outputs:
|
|
|
|
warnMsg = "no usable links found (with GET parameters)"
|
|
|
|
logger.warn(warnMsg)
|
|
|
|
else:
|
|
|
|
for url in threadData.shared.outputs:
|
|
|
|
kb.targetUrls.add(( url, None, None, None ))
|
2011-06-20 15:32:30 +04:00
|
|
|
kb.suppressResumeInfo = False
|