mirror of
				https://github.com/sqlmapproject/sqlmap.git
				synced 2025-10-26 05:31:04 +03:00 
			
		
		
		
	crawler fix (skip binary files)
This commit is contained in:
		
							parent
							
								
									20bb1a685b
								
							
						
					
					
						commit
						2a4a284a29
					
				|  | @ -549,9 +549,6 @@ class Tag(PageElement): | |||
|                                           val)) | ||||
|         self.attrs = map(convert, self.attrs) | ||||
| 
 | ||||
|         # Reference: http://bytes.com/topic/python/answers/552874-py-2-5-bug-sgmllib | ||||
|         SGMLParser.convert_codepoint = lambda self, codepoint: unichr(codepoint) | ||||
| 
 | ||||
|     def getString(self): | ||||
|         if (len(self.contents) == 1 | ||||
|             and isinstance(self.contents[0], NavigableString)): | ||||
|  |  | |||
|  | @ -354,3 +354,6 @@ HTML_TITLE_REGEX = "<title>(?P<result>[^<]+)</title>" | |||
| 
 | ||||
| # Chars used to quickly distinguish if the user provided tainted parameter values | ||||
| DUMMY_SQL_INJECTION_CHARS = ";()\"'" | ||||
| 
 | ||||
| # Extensions skipped by crawler | ||||
| CRAWL_EXCLUDE_EXTENSIONS = ("gif","jpg","jar","tif","bmp","war","ear","mpg","wmv","mpeg","scm","iso","dmp","dll","cab","so","avi","bin","exe","iso","tar","png","pdf","ps","mp3","zip","rar","gz") | ||||
|  |  | |||
|  | @ -18,6 +18,7 @@ from lib.core.data import conf | |||
| from lib.core.data import kb | ||||
| from lib.core.data import logger | ||||
| from lib.core.exception import sqlmapConnectionException | ||||
| from lib.core.settings import CRAWL_EXCLUDE_EXTENSIONS | ||||
| from lib.core.threads import getCurrentThreadData | ||||
| from lib.core.threads import runThreads | ||||
| from lib.request.connect import Connect as Request | ||||
|  | @ -51,10 +52,11 @@ class Crawler: | |||
|                         kb.locks.limits.release() | ||||
|                         break | ||||
| 
 | ||||
|                     content = None | ||||
|                     try: | ||||
|                         if current.split('.')[-1].lower() not in CRAWL_EXCLUDE_EXTENSIONS: | ||||
|                             content = Request.getPage(url=current, raise404=False)[0] | ||||
|                     except sqlmapConnectionException, e: | ||||
|                         content = None | ||||
|                         errMsg = "connection exception detected (%s). skipping " % e | ||||
|                         errMsg += "url '%s'" % current | ||||
|                         logger.critical(errMsg) | ||||
|  | @ -62,7 +64,7 @@ class Crawler: | |||
|                     if not kb.threadContinue: | ||||
|                         break | ||||
| 
 | ||||
|                     if content: | ||||
|                     if isinstance(content, unicode): | ||||
|                         soup = BeautifulSoup(content) | ||||
|                         for tag in soup('a'): | ||||
|                             if tag.get("href"): | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue
	
	Block a user