From 56d76e6bfdb29968af352e2bf960b5ab257daa9a Mon Sep 17 00:00:00 2001 From: Miroslav Stampar Date: Fri, 14 Mar 2014 21:34:16 +0100 Subject: [PATCH] Updating list of extensions to exclude from crawling --- lib/core/settings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/core/settings.py b/lib/core/settings.py index aa14734c0..f5b9698cd 100644 --- a/lib/core/settings.py +++ b/lib/core/settings.py @@ -392,7 +392,7 @@ DUMMY_SQL_INJECTION_CHARS = ";()'" DUMMY_USER_INJECTION = r"(?i)[^\w](AND|OR)\s+[^\s]+[=><]|\bUNION\b.+\bSELECT\b" # Extensions skipped by crawler -CRAWL_EXCLUDE_EXTENSIONS = ("gif", "jpg", "jpeg", "image", "jar", "tif", "bmp", "war", "ear", "mpg", "mpeg", "wmv", "mpeg", "scm", "iso", "dmp", "dll", "cab", "so", "avi", "mkv", "bin", "iso", "tar", "png", "pdf", "ps", "wav", "mp3", "mp4", "au", "aiff", "aac", "zip", "rar", "7z", "gz", "flv", "mov") +CRAWL_EXCLUDE_EXTENSIONS = ("gif", "jpg", "jpeg", "image", "jar", "tif", "bmp", "war", "ear", "mpg", "mpeg", "wmv", "mpeg", "scm", "iso", "dmp", "dll", "cab", "so", "avi", "mkv", "bin", "iso", "tar", "png", "pdf", "ps", "wav", "mp3", "mp4", "au", "aiff", "aac", "zip", "rar", "7z", "gz", "flv", "mov", "doc", "docx", "xls", "dot", "dotx", "xlt", "xlsx", "ppt", "pps", "pptx") # Patterns often seen in HTTP headers containing custom injection marking character PROBLEMATIC_CUSTOM_INJECTION_PATTERNS = r"(;q=[^;']+)|(\*/\*)"