From d6062e8fc9394d3a99c7a1ccb81a4a093c0581c5 Mon Sep 17 00:00:00 2001 From: Miroslav Stampar Date: Mon, 20 Jun 2011 21:18:12 +0000 Subject: [PATCH] minor fix for crawler and far less message overlaps in future --- lib/core/common.py | 2 ++ lib/core/settings.py | 2 +- lib/utils/crawler.py | 33 +++++++++++++++++---------------- 3 files changed, 20 insertions(+), 17 deletions(-) diff --git a/lib/core/common.py b/lib/core/common.py index b1fa6e090..d5ddb6046 100644 --- a/lib/core/common.py +++ b/lib/core/common.py @@ -754,6 +754,8 @@ def readInput(message, default=None): elif message[-1] == ']': message += " " + message = "\r%s" % message + if conf.batch: if isinstance(default, (list, tuple, set)): options = ",".join([getUnicode(opt, UNICODE_ENCODING) for opt in default]) diff --git a/lib/core/settings.py b/lib/core/settings.py index b0620af6f..abdc26d46 100644 --- a/lib/core/settings.py +++ b/lib/core/settings.py @@ -39,7 +39,7 @@ logging.addLevelName(7, "TRAFFIC IN") LOGGER = logging.getLogger("sqlmapLog") LOGGER_HANDLER = logging.StreamHandler(sys.stdout) -FORMATTER = logging.Formatter("[%(asctime)s] [%(levelname)s] %(message)s", "%H:%M:%S") +FORMATTER = logging.Formatter("\r[%(asctime)s] [%(levelname)s] %(message)s", "%H:%M:%S") LOGGER_HANDLER.setFormatter(FORMATTER) LOGGER.addHandler(LOGGER_HANDLER) diff --git a/lib/utils/crawler.py b/lib/utils/crawler.py index 5477377b0..99df57dc9 100644 --- a/lib/utils/crawler.py +++ b/lib/utils/crawler.py @@ -51,30 +51,31 @@ class Crawler: kb.locks.limits.release() break - content = Request.getPage(url=current)[0] + content = Request.getPage(url=current, raise404=False)[0] if not kb.threadContinue: break - soup = BeautifulSoup(content) - for tag in soup('a'): - if tag.get("href"): - url = urlparse.urljoin(conf.url, tag.get("href")) + if content: + soup = BeautifulSoup(content) + for tag in soup('a'): + if tag.get("href"): + url = urlparse.urljoin(conf.url, tag.get("href")) - # flag to know if we are dealing with the same target host - target = reduce(lambda x, y: x == y, map(lambda x: urlparse.urlparse(x).netloc.split(':')[0], [url, conf.url])) + # flag to know if we are dealing with the same target host + target = reduce(lambda x, y: x == y, map(lambda x: urlparse.urlparse(x).netloc.split(':')[0], [url, conf.url])) - if conf.scope: - if not re.search(conf.scope, url, re.I): + if conf.scope: + if not re.search(conf.scope, url, re.I): + continue + elif not target: continue - elif not target: - continue - kb.locks.outputs.acquire() - threadData.shared.deeper.add(url) - if re.search(r"(.*?)\?(.+)", url): - threadData.shared.outputs.add(url) - kb.locks.outputs.release() + kb.locks.outputs.acquire() + threadData.shared.deeper.add(url) + if re.search(r"(.*?)\?(.+)", url): + threadData.shared.outputs.add(url) + kb.locks.outputs.release() if conf.verbose in (1, 2): kb.locks.ioLock.acquire()