mirror of
				https://github.com/sqlmapproject/sqlmap.git
				synced 2025-10-25 13:11:00 +03:00 
			
		
		
		
	crawler fix (skip binary files)
This commit is contained in:
		
							parent
							
								
									20bb1a685b
								
							
						
					
					
						commit
						2a4a284a29
					
				|  | @ -549,9 +549,6 @@ class Tag(PageElement): | ||||||
|                                           val)) |                                           val)) | ||||||
|         self.attrs = map(convert, self.attrs) |         self.attrs = map(convert, self.attrs) | ||||||
| 
 | 
 | ||||||
|         # Reference: http://bytes.com/topic/python/answers/552874-py-2-5-bug-sgmllib |  | ||||||
|         SGMLParser.convert_codepoint = lambda self, codepoint: unichr(codepoint) |  | ||||||
| 
 |  | ||||||
|     def getString(self): |     def getString(self): | ||||||
|         if (len(self.contents) == 1 |         if (len(self.contents) == 1 | ||||||
|             and isinstance(self.contents[0], NavigableString)): |             and isinstance(self.contents[0], NavigableString)): | ||||||
|  |  | ||||||
|  | @ -354,3 +354,6 @@ HTML_TITLE_REGEX = "<title>(?P<result>[^<]+)</title>" | ||||||
| 
 | 
 | ||||||
| # Chars used to quickly distinguish if the user provided tainted parameter values | # Chars used to quickly distinguish if the user provided tainted parameter values | ||||||
| DUMMY_SQL_INJECTION_CHARS = ";()\"'" | DUMMY_SQL_INJECTION_CHARS = ";()\"'" | ||||||
|  | 
 | ||||||
|  | # Extensions skipped by crawler | ||||||
|  | CRAWL_EXCLUDE_EXTENSIONS = ("gif","jpg","jar","tif","bmp","war","ear","mpg","wmv","mpeg","scm","iso","dmp","dll","cab","so","avi","bin","exe","iso","tar","png","pdf","ps","mp3","zip","rar","gz") | ||||||
|  |  | ||||||
|  | @ -18,6 +18,7 @@ from lib.core.data import conf | ||||||
| from lib.core.data import kb | from lib.core.data import kb | ||||||
| from lib.core.data import logger | from lib.core.data import logger | ||||||
| from lib.core.exception import sqlmapConnectionException | from lib.core.exception import sqlmapConnectionException | ||||||
|  | from lib.core.settings import CRAWL_EXCLUDE_EXTENSIONS | ||||||
| from lib.core.threads import getCurrentThreadData | from lib.core.threads import getCurrentThreadData | ||||||
| from lib.core.threads import runThreads | from lib.core.threads import runThreads | ||||||
| from lib.request.connect import Connect as Request | from lib.request.connect import Connect as Request | ||||||
|  | @ -51,10 +52,11 @@ class Crawler: | ||||||
|                         kb.locks.limits.release() |                         kb.locks.limits.release() | ||||||
|                         break |                         break | ||||||
| 
 | 
 | ||||||
|  |                     content = None | ||||||
|                     try: |                     try: | ||||||
|                         content = Request.getPage(url=current, raise404=False)[0] |                         if current.split('.')[-1].lower() not in CRAWL_EXCLUDE_EXTENSIONS: | ||||||
|  |                             content = Request.getPage(url=current, raise404=False)[0] | ||||||
|                     except sqlmapConnectionException, e: |                     except sqlmapConnectionException, e: | ||||||
|                         content = None |  | ||||||
|                         errMsg = "connection exception detected (%s). skipping " % e |                         errMsg = "connection exception detected (%s). skipping " % e | ||||||
|                         errMsg += "url '%s'" % current |                         errMsg += "url '%s'" % current | ||||||
|                         logger.critical(errMsg) |                         logger.critical(errMsg) | ||||||
|  | @ -62,7 +64,7 @@ class Crawler: | ||||||
|                     if not kb.threadContinue: |                     if not kb.threadContinue: | ||||||
|                         break |                         break | ||||||
| 
 | 
 | ||||||
|                     if content: |                     if isinstance(content, unicode): | ||||||
|                         soup = BeautifulSoup(content) |                         soup = BeautifulSoup(content) | ||||||
|                         for tag in soup('a'): |                         for tag in soup('a'): | ||||||
|                             if tag.get("href"): |                             if tag.get("href"): | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue
	
	Block a user