Update inference.py

2025-11-13 14:25:33 +03:00 · 2025-09-01 01:12:54 +08:00 · 2025-09-01 01:12:54 +08:00 · bb1120d183
commit bb1120d183
parent d9d9b5eeb7
1 changed files with 193 additions and 0 deletions
--- a/lib/techniques/blind/inference.py
+++ b/lib/techniques/blind/inference.py
@ -9,6 +9,8 @@ from __future__ import division

 import re
 import time
+import os
+from difflib import SequenceMatcher

 from lib.core.agent import agent
 from lib.core.common import Backend
@ -34,6 +36,7 @@ from lib.core.data import conf
 from lib.core.data import kb
 from lib.core.data import logger
 from lib.core.data import queries
+from lib.core.data import paths
 from lib.core.enums import ADJUST_TIME_DELAY
 from lib.core.enums import CHARSET_TYPE
 from lib.core.enums import DBMS
@ -64,6 +67,146 @@ from lib.utils.safe2bin import safecharencode
 from lib.utils.xrange import xrange
 from thirdparty import six

+# Similarity threshold for dictionary matching
+SIMILARITY_THRESHOLD = 0.8
+
+# Cache for dictionary content
+_dict_cache = None
+_dict_cache_columns = None
+
+def loadDictionary():
+    """
+    Load common database/table names from dictionary file
+    """
+    global _dict_cache
+    
+    if _dict_cache is not None:
+        return _dict_cache
+    
+    _dict_cache = []
+    dict_file = paths.COMMON_TABLES
+    
+    if os.path.exists(dict_file):
+        try:
+            with open(dict_file, 'r', encoding='utf-8') as f:
+                for line in f:
+                    line = line.strip()
+                    if line and not line.startswith('#'):
+                        _dict_cache.append(line.lower())
+        except Exception as e:
+            logger.debug("Failed to load dictionary file: %s" % e)
+    
+    return _dict_cache
+
+def loadColumnsDictionary():
+    """
+    Load common column names from dictionary file
+    """
+    global _dict_cache_columns
+    
+    if _dict_cache_columns is not None:
+        return _dict_cache_columns
+    
+    _dict_cache_columns = []
+    dict_file = paths.COMMON_COLUMNS
+    
+    if os.path.exists(dict_file):
+        try:
+            with open(dict_file, 'r', encoding='utf-8') as f:
+                for line in f:
+                    line = line.strip()
+                    if line and not line.startswith('#'):
+                        _dict_cache_columns.append(line.lower())
+        except Exception as e:
+            logger.debug("Failed to load columns dictionary file: %s" % e)
+    
+    return _dict_cache_columns
+
+def isColumnQuery(expression):
+    """
+    Check if the query is related to columns
+    """
+    if not expression:
+        return False
+    
+    expression_lower = expression.lower()
+    
+    # Common patterns that indicate column queries
+    column_patterns = [
+        'information_schema.*columns',
+        'pg_attribute',
+        'sys\\.columns',
+        'describe',
+        'show.*columns',
+        'show.*fields',
+        'column_name',
+        'col_name'
+    ]
+    
+    for pattern in column_patterns:
+        if pattern in expression_lower:
+            return True
+    
+    return False
+
+def checkSimilarity(partial_value, expression=None):
+    """
+    Check similarity between partial inferred value and dictionary items
+    Returns the best match and similarity ratio
+    """
+    if not partial_value:
+        return None, 0.0
+    
+    # Choose dictionary based on query type
+    if expression and isColumnQuery(expression):
+        dictionary = loadColumnsDictionary()
+        dict_type = "columns"
+    else:
+        dictionary = loadDictionary()
+        dict_type = "tables"
+    
+    if not dictionary:
+        return None, 0.0
+    
+    best_match = None
+    best_ratio = 0.0
+    
+    partial_lower = partial_value.lower()
+    
+    for dict_item in dictionary:
+        # Calculate similarity ratio
+        ratio = SequenceMatcher(None, partial_lower, dict_item).ratio()
+        
+        # Give higher weight if partial value matches dictionary item prefix
+        if dict_item.startswith(partial_lower):
+            ratio = max(ratio, 0.9)
+        
+        if ratio > best_ratio:
+            best_ratio = ratio
+            best_match = dict_item
+    
+    return best_match, best_ratio, dict_type
+
+def testDictionaryMatch(expression, payload, best_match, timeBasedCompare):
+    """
+    Test if dictionary match is successful
+    """
+    try:
+        # Build test query
+        testValue = unescaper.escape("'%s'" % best_match) if "'" not in best_match else unescaper.escape("%s" % best_match, quote=False)
+        
+        query = getTechniqueData().vector
+        query = agent.prefixQuery(query.replace(INFERENCE_MARKER, "(%s)%s%s" % (expression, INFERENCE_EQUALS_CHAR, testValue)))
+        query = agent.suffixQuery(query)
+        
+        result = Request.queryPage(agent.payload(newValue=query), timeBasedCompare=timeBasedCompare, raise404=False)
+        incrementCounter(getTechnique())
+        
+        return result
+    except Exception as e:
+        logger.debug("Dictionary match test failed: %s" % e)
+        return False
+
 def bisection(payload, expression, length=None, charsetType=None, firstChar=None, lastChar=None, dump=False):
    """
    Bisection algorithm that can be used to perform blind SQL injection
@ -517,6 +660,35 @@ def bisection(payload, expression, length=None, charsetType=None, firstChar=None
                        else:
                            break

+                        # Dictionary similarity check for multi-threading
+                        # Check if we have enough characters to test similarity
+                        with kb.locks.value:
+                            currentPartialValue = ""
+                            for i in xrange(currentCharIndex - firstChar):
+                                if threadData.shared.value[i] is not None:
+                                    currentPartialValue += threadData.shared.value[i]
+                            
+                            if len(currentPartialValue) >= 3:
+                                best_match, similarity_ratio, dict_type = checkSimilarity(currentPartialValue, expression)
+                                
+                                if best_match and similarity_ratio >= SIMILARITY_THRESHOLD:
+                                    infoMsg = "checking %s dictionary similarity for '%s' (similarity: %.2f)" % (dict_type, currentPartialValue, similarity_ratio)
+                                    logger.info(infoMsg)
+                                    
+                                    if testDictionaryMatch(expressionUnescaped, payload, best_match, timeBasedCompare):
+                                        infoMsg = "%s dictionary match successful: '%s'" % (dict_type.capitalize(), best_match)
+                                        logger.info(infoMsg)
+                                        
+                                        # Fill the remaining characters with the matched value
+                                        remaining_chars = best_match[len(currentPartialValue):]
+                                        for i, char in enumerate(remaining_chars):
+                                            if currentCharIndex + i - firstChar < len(threadData.shared.value):
+                                                threadData.shared.value[currentCharIndex + i - 1 - firstChar] = char
+                                        
+                                        # Update the index to skip the matched characters
+                                        threadData.shared.index[0] += len(remaining_chars)
+                                        continue
+
                        # NOTE: https://github.com/sqlmapproject/sqlmap/issues/4629
                        if not isListLike(threadData.shared.value):
                            break
@ -596,6 +768,27 @@ def bisection(payload, expression, length=None, charsetType=None, firstChar=None
            while True:
                index += 1

+                # Dictionary similarity check feature
+                # Check if partial value has high similarity with dictionary items
+                if len(partialValue) >= 3:  # Only check if we have at least 3 characters
+                    best_match, similarity_ratio, dict_type = checkSimilarity(partialValue, expression)
+                    
+                    if best_match and similarity_ratio >= SIMILARITY_THRESHOLD:
+                        infoMsg = "checking %s dictionary similarity for '%s' (similarity: %.2f)" % (dict_type, partialValue, similarity_ratio)
+                        logger.info(infoMsg)
+                        
+                        if testDictionaryMatch(expressionUnescaped, payload, best_match, timeBasedCompare):
+                            infoMsg = "%s dictionary match successful: '%s'" % (dict_type.capitalize(), best_match)
+                            logger.info(infoMsg)
+                            
+                            if showEta:
+                                progress.progress(len(best_match))
+                            elif conf.verbose in (1, 2) or conf.api:
+                                dataToStdout(filterControlChars(best_match[index - 1:]))
+                            
+                            finalValue = best_match
+                            break
+                
                # Common prediction feature (a.k.a. "good samaritan")
                # NOTE: to be used only when multi-threading is not set for
                # the moment