From 1088011bf093f4f0e759b5f861624165a6f6f9bd Mon Sep 17 00:00:00 2001 From: Miroslav Stampar Date: Fri, 2 Aug 2013 23:07:13 +0200 Subject: [PATCH] Adding new binary file formats for excluding in crawling --- lib/core/settings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/core/settings.py b/lib/core/settings.py index 67d4dc153..1b367e576 100644 --- a/lib/core/settings.py +++ b/lib/core/settings.py @@ -380,7 +380,7 @@ DUMMY_SQL_INJECTION_CHARS = ";()'" DUMMY_USER_INJECTION = r"(?i)[^\w](AND|OR)\s+[^\s]+[=><]" # Extensions skipped by crawler -CRAWL_EXCLUDE_EXTENSIONS = ("gif", "jpg", "jar", "tif", "bmp", "war", "ear", "mpg", "wmv", "mpeg", "scm", "iso", "dmp", "dll", "cab", "so", "avi", "bin", "exe", "iso", "tar", "png", "pdf", "ps", "mp3", "zip", "rar", "gz") +CRAWL_EXCLUDE_EXTENSIONS = ("gif", "jpg", "jpeg", "image", "jar", "tif", "bmp", "war", "ear", "mpg", "mpeg", "wmv", "mpeg", "scm", "iso", "dmp", "dll", "cab", "so", "avi", "mkv", "bin", "exe", "iso", "tar", "png", "pdf", "ps", "wav", "mp3", "mp4", "au", "aiff", "aac", "zip", "rar", "7z", "gz", "flv", "mov") # Patterns often seen in HTTP headers containing custom injection marking character PROBLEMATIC_CUSTOM_INJECTION_PATTERNS = r"(\bq=[^;']+)|(\*/\*)"