added support for --scope in --crawl mode

This commit is contained in:
Miroslav Stampar 2011-06-20 12:37:51 +00:00
parent 42746cc706
commit 4d1fa5596b
2 changed files with 14 additions and 7 deletions

View File

@ -192,7 +192,7 @@ def __feedTargetsDict(reqFile, addedTargetUrls):
continue
if conf.scope:
getPostReq &= re.search(conf.scope, url) is not None
getPostReq &= re.search(conf.scope, url, re.I) is not None
if getPostReq:
if not kb.targetUrls or url not in addedTargetUrls:

View File

@ -58,14 +58,21 @@ class Crawler:
for tag in soup('a'):
if tag.get("href"):
url = urlparse.urljoin(conf.url, tag.get("href"))
# flag to know if we are dealing with the same target host
target = reduce(lambda x, y: x == y, map(lambda x: urlparse.urlparse(x).netloc.split(':')[0], [url, conf.url]))
if target:
kb.locks.outputs.acquire()
threadData.shared.deeper.add(url)
if re.search(r"(.*?)\?(.+)", url):
threadData.shared.outputs.add(url)
kb.locks.outputs.release()
if conf.scope:
if not re.search(conf.scope, url, re.I):
continue
elif not target:
continue
kb.locks.outputs.acquire()
threadData.shared.deeper.add(url)
if re.search(r"(.*?)\?(.+)", url):
threadData.shared.outputs.add(url)
kb.locks.outputs.release()
threadData.shared.deeper = set()
threadData.shared.unprocessed = set([conf.url])