sqlmap/lib/request/comparison.py

#!/usr/bin/env python

"""
$Id$

Copyright (c) 2006-2010 sqlmap developers (http://sqlmap.sourceforge.net/)
See the file doc/COPYING for copying permission.
"""

import re

from lib.core.common import getFilteredPageContent
from lib.core.common import preparePageForLineComparison
from lib.core.data import conf
from lib.core.data import kb
from lib.core.data import logger
from lib.core.session import setMatchRatio

def comparison(page, headers=None, getSeqMatcher=False, pageLength=None):
    regExpResults = None

    # String to be excluded before calculating page hash
    if conf.eString and conf.eString in page:
        index              = page.index(conf.eString)
        length             = len(conf.eString)
        pageWithoutString  = page[:index]
        pageWithoutString += page[index+length:]
        page               = pageWithoutString

    # Regular expression matches to be excluded before calculating page hash
    if conf.eRegexp:
        regExpResults = re.findall(conf.eRegexp, page, re.I | re.M)

        if regExpResults:
            for regExpResult in regExpResults:
                index              = page.index(regExpResult)
                length             = len(regExpResult)
                pageWithoutRegExp  = page[:index]
                pageWithoutRegExp += page[index+length:]
                page               = pageWithoutRegExp

    # String to match in page when the query is valid
    if conf.string:
        return conf.string in page

    # Regular expression to match in page when the query is valid
    if conf.regexp:
        return re.search(conf.regexp, page, re.I | re.M) is not None

    # Dynamic content lines to be excluded before calculating page hash
    if kb.dynamicContent:
        lines = preparePageForLineComparison(page)
        for item in kb.dynamicContent:
            if len(lines) == item.pageTotal:
                before = item.lineNumber - 1 if isinstance(item.lineNumber, int) else item.lineNumber[0] - 1
                after = item.lineNumber + 1 if isinstance(item.lineNumber, int) else item.lineNumber[-1] + 1
                if (item.lineContentBefore and lines[before] != item.lineContentBefore) or (item.lineContentAfter and lines[after] != item.lineContentAfter):
                    continue
                if isinstance(item.lineNumber, int):
                    page = page.replace(lines[item.lineNumber], '')
                else:
                    for i in item.lineNumber:
                        page = page.replace(lines[i], '')

    if conf.seqLock:
        conf.seqLock.acquire()

    if not conf.eRegexp and not conf.eString and kb.nullConnection:
        ratio = 1. * pageLength / len(conf.seqMatcher.a)
        if ratio > 1.:
            ratio = 1. / ratio
    else:
        conf.seqMatcher.set_seq2(page if not conf.textOnly else getFilteredPageContent(page))
        ratio = round(conf.seqMatcher.ratio(), 3)

    if conf.seqLock:
        conf.seqLock.release()

    # If the url is stable and we did not set yet the match ratio and the
    # current injected value changes the url page content
    if conf.matchRatio is None:
        if conf.thold:
            conf.matchRatio = conf.thold

        elif conf.md5hash is not None and ratio > 0.6 and ratio < 1:
            logger.debug("setting match ratio to %.3f" % ratio)
            conf.matchRatio = ratio

        elif conf.md5hash is None or ( conf.md5hash is not None and ratio < 0.6 ):
            logger.debug("setting match ratio to default value 0.900")
            conf.matchRatio = 0.900

        if conf.matchRatio is not None:
            setMatchRatio()

    # If it has been requested to return the ratio and not a comparison
    # response
    if getSeqMatcher:
        return ratio

    # If the url is stable it returns True if the page has the same MD5
    # hash of the original one
    # NOTE: old implementation, it did not handle automatically the fact
    # that the url could be not stable (due to VIEWSTATE, counter, etc.)
    #elif conf.md5hash is not None:
    #    return conf.md5hash == md5hash(page)

    # If the url is not stable it returns sequence matcher between the
    # first untouched HTTP response page content and this content
    else:
        return ratio > conf.matchRatio
Minor enhancemet to support also --regexp, --excl-str and --excl-reg options rather than only --string when comparing HTTP responses page content 2008-12-05 18:34:13 +03:00			`#!/usr/bin/env python`

			`"""`
			$Id$

large commit with copyright header modifications 2010-10-14 18:41:14 +04:00			`Copyright (c) 2006-2010 sqlmap developers (http://sqlmap.sourceforge.net/)`
			`See the file doc/COPYING for copying permission.`
Minor enhancemet to support also --regexp, --excl-str and --excl-reg options rather than only --string when comparing HTTP responses page content 2008-12-05 18:34:13 +03:00			`"""`

			`import re`

added --text-only switch 2010-10-12 23:41:29 +04:00			`from lib.core.common import getFilteredPageContent`
fix for Bug #165 2010-09-13 17:31:01 +04:00			`from lib.core.common import preparePageForLineComparison`
Minor enhancemet to support also --regexp, --excl-str and --excl-reg options rather than only --string when comparing HTTP responses page content 2008-12-05 18:34:13 +03:00			`from lib.core.data import conf`
fix for Bug #165 2010-09-13 17:31:01 +04:00			`from lib.core.data import kb`
Major bug fix in the comparison algorithm to correctly handle also the case that the url is stable and the False response changes the page content very little. 2009-02-09 13:28:03 +03:00			`from lib.core.data import logger`
Updated to sqlmap 0.7 release candidate 1 2009-04-22 15:48:07 +04:00			`from lib.core.session import setMatchRatio`
Minor enhancemet to support also --regexp, --excl-str and --excl-reg options rather than only --string when comparing HTTP responses page content 2008-12-05 18:34:13 +03:00
implementation of HEAD/Range methods 2010-09-16 13:32:09 +04:00			`def comparison(page, headers=None, getSeqMatcher=False, pageLength=None):`
Minor enhancemet to support also --regexp, --excl-str and --excl-reg options rather than only --string when comparing HTTP responses page content 2008-12-05 18:34:13 +03:00			`regExpResults = None`

sqlmap 0.6.3-rc4: minor enhancement to be able to specify extra HTTP headers by providing option --headers. By default Accept, Accept-Language and Accept-Charset headers are set. Added support to get the injection payload prefix and postfix from user. Minor bug fix to exclude image files when parsing (-l) proxies log files. Minor code adjustments. Updated documentation. 2008-12-09 00:24:24 +03:00			`# String to be excluded before calculating page hash`
Minor enhancemet to support also --regexp, --excl-str and --excl-reg options rather than only --string when comparing HTTP responses page content 2008-12-05 18:34:13 +03:00			`if conf.eString and conf.eString in page:`
			`index = page.index(conf.eString)`
			`length = len(conf.eString)`
			`pageWithoutString = page[:index]`
			`pageWithoutString += page[index+length:]`
			`page = pageWithoutString`

sqlmap 0.6.3-rc4: minor enhancement to be able to specify extra HTTP headers by providing option --headers. By default Accept, Accept-Language and Accept-Charset headers are set. Added support to get the injection payload prefix and postfix from user. Minor bug fix to exclude image files when parsing (-l) proxies log files. Minor code adjustments. Updated documentation. 2008-12-09 00:24:24 +03:00			`# Regular expression matches to be excluded before calculating page hash`
Minor enhancemet to support also --regexp, --excl-str and --excl-reg options rather than only --string when comparing HTTP responses page content 2008-12-05 18:34:13 +03:00			`if conf.eRegexp:`
			`regExpResults = re.findall(conf.eRegexp, page, re.I \| re.M)`

sqlmap 0.6.3-rc4: minor enhancement to be able to specify extra HTTP headers by providing option --headers. By default Accept, Accept-Language and Accept-Charset headers are set. Added support to get the injection payload prefix and postfix from user. Minor bug fix to exclude image files when parsing (-l) proxies log files. Minor code adjustments. Updated documentation. 2008-12-09 00:24:24 +03:00			`if regExpResults:`
			`for regExpResult in regExpResults:`
			`index = page.index(regExpResult)`
			`length = len(regExpResult)`
			`pageWithoutRegExp = page[:index]`
			`pageWithoutRegExp += page[index+length:]`
			`page = pageWithoutRegExp`
Minor enhancemet to support also --regexp, --excl-str and --excl-reg options rather than only --string when comparing HTTP responses page content 2008-12-05 18:34:13 +03:00
sqlmap 0.6.3-rc4: minor enhancement to be able to specify extra HTTP headers by providing option --headers. By default Accept, Accept-Language and Accept-Charset headers are set. Added support to get the injection payload prefix and postfix from user. Minor bug fix to exclude image files when parsing (-l) proxies log files. Minor code adjustments. Updated documentation. 2008-12-09 00:24:24 +03:00			`# String to match in page when the query is valid`
Minor enhancemet to support also --regexp, --excl-str and --excl-reg options rather than only --string when comparing HTTP responses page content 2008-12-05 18:34:13 +03:00			`if conf.string:`
some code refactoring 2010-05-14 18:21:13 +04:00			`return conf.string in page`
Minor enhancemet to support also --regexp, --excl-str and --excl-reg options rather than only --string when comparing HTTP responses page content 2008-12-05 18:34:13 +03:00
sqlmap 0.6.3-rc4: minor enhancement to be able to specify extra HTTP headers by providing option --headers. By default Accept, Accept-Language and Accept-Charset headers are set. Added support to get the injection payload prefix and postfix from user. Minor bug fix to exclude image files when parsing (-l) proxies log files. Minor code adjustments. Updated documentation. 2008-12-09 00:24:24 +03:00			`# Regular expression to match in page when the query is valid`
			`if conf.regexp:`
some code refactoring 2010-05-14 18:21:13 +04:00			`return re.search(conf.regexp, page, re.I \| re.M) is not None`
Minor enhancemet to support also --regexp, --excl-str and --excl-reg options rather than only --string when comparing HTTP responses page content 2008-12-05 18:34:13 +03:00
fix for Bug #165 2010-09-13 17:31:01 +04:00			`# Dynamic content lines to be excluded before calculating page hash`
			`if kb.dynamicContent:`
			`lines = preparePageForLineComparison(page)`
			`for item in kb.dynamicContent:`
			`if len(lines) == item.pageTotal:`
			`before = item.lineNumber - 1 if isinstance(item.lineNumber, int) else item.lineNumber[0] - 1`
			`after = item.lineNumber + 1 if isinstance(item.lineNumber, int) else item.lineNumber[-1] + 1`
			`if (item.lineContentBefore and lines[before] != item.lineContentBefore) or (item.lineContentAfter and lines[after] != item.lineContentAfter):`
			`continue`
			`if isinstance(item.lineNumber, int):`
			`page = page.replace(lines[item.lineNumber], '')`
			`else:`
			`for i in item.lineNumber:`
			`page = page.replace(lines[i], '')`

fixed threading bug (difflib :) 2010-03-10 17:14:27 +03:00			`if conf.seqLock:`
			`conf.seqLock.acquire()`

implementation of HEAD/Range methods 2010-09-16 13:32:09 +04:00			`if not conf.eRegexp and not conf.eString and kb.nullConnection:`
			`ratio = 1. * pageLength / len(conf.seqMatcher.a)`
			`if ratio > 1.:`
			`ratio = 1. / ratio`
			`else:`
added --text-only switch 2010-10-12 23:41:29 +04:00			`conf.seqMatcher.set_seq2(page if not conf.textOnly else getFilteredPageContent(page))`
implementation of HEAD/Range methods 2010-09-16 13:32:09 +04:00			`ratio = round(conf.seqMatcher.ratio(), 3)`
Major bug fix in the comparison algorithm to correctly handle also the case that the url is stable and the False response changes the page content very little. 2009-02-09 13:28:03 +03:00
fixed threading bug (difflib :) 2010-03-10 17:14:27 +03:00			`if conf.seqLock:`
			`conf.seqLock.release()`

Major bug fix in the comparison algorithm to correctly handle also the case that the url is stable and the False response changes the page content very little. 2009-02-09 13:28:03 +03:00			`# If the url is stable and we did not set yet the match ratio and the`
			`# current injected value changes the url page content`
sqlmap 0.8-rc3: Merge from Miroslav Stampar's branch fixing a bug when verbosity > 2, another major bug with urlencoding/urldecoding of POST data and Cookies, adding --drop-set-cookie option, implementing support to automatically decode gzip and deflate HTTP responses, support for Google dork page result (--gpage) and a minor code cleanup. 2010-01-02 05:02:12 +03:00			`if conf.matchRatio is None:`
fix for a Bug #200 2010-09-14 14:35:01 +04:00			`if conf.thold:`
			`conf.matchRatio = conf.thold`

			`elif conf.md5hash is not None and ratio > 0.6 and ratio < 1:`
Major enhancement in comparison algorithm 2009-02-12 03:17:44 +03:00			`logger.debug("setting match ratio to %.3f" % ratio)`
Updated to sqlmap 0.7 release candidate 1 2009-04-22 15:48:07 +04:00			`conf.matchRatio = ratio`
sqlmap 0.8-rc3: Merge from Miroslav Stampar's branch fixing a bug when verbosity > 2, another major bug with urlencoding/urldecoding of POST data and Cookies, adding --drop-set-cookie option, implementing support to automatically decode gzip and deflate HTTP responses, support for Google dork page result (--gpage) and a minor code cleanup. 2010-01-02 05:02:12 +03:00
			`elif conf.md5hash is None or ( conf.md5hash is not None and ratio < 0.6 ):`
Major enhancement in comparison algorithm 2009-02-12 03:17:44 +03:00			`logger.debug("setting match ratio to default value 0.900")`
Updated to sqlmap 0.7 release candidate 1 2009-04-22 15:48:07 +04:00			`conf.matchRatio = 0.900`

fix for a Bug #200 2010-09-14 14:35:01 +04:00			`if conf.matchRatio is not None:`
			`setMatchRatio()`
Major bug fix in the comparison algorithm to correctly handle also the case that the url is stable and the False response changes the page content very little. 2009-02-09 13:28:03 +03:00
			`# If it has been requested to return the ratio and not a comparison`
			`# response`
Major enhancement to make the comparison algorithm work properly also on url not stables automatically by using the difflib SequenceMatcher object: this changed a lot into the structure of the code, has to be extensively beta-tested! Please, do report bugs on sqlmap-users mailing list if you scout them. Cheers, Bernardo 2008-12-20 04:54:08 +03:00			`if getSeqMatcher:`
Major bug fix in the comparison algorithm to correctly handle also the case that the url is stable and the False response changes the page content very little. 2009-02-09 13:28:03 +03:00			`return ratio`

			`# If the url is stable it returns True if the page has the same MD5`
			`# hash of the original one`
			`# NOTE: old implementation, it did not handle automatically the fact`
			`# that the url could be not stable (due to VIEWSTATE, counter, etc.)`
sqlmap 0.8-rc3: Merge from Miroslav Stampar's branch fixing a bug when verbosity > 2, another major bug with urlencoding/urldecoding of POST data and Cookies, adding --drop-set-cookie option, implementing support to automatically decode gzip and deflate HTTP responses, support for Google dork page result (--gpage) and a minor code cleanup. 2010-01-02 05:02:12 +03:00			`#elif conf.md5hash is not None:`
Major bug fix in the comparison algorithm to correctly handle also the case that the url is stable and the False response changes the page content very little. 2009-02-09 13:28:03 +03:00			`# return conf.md5hash == md5hash(page)`

			`# If the url is not stable it returns sequence matcher between the`
			`# first untouched HTTP response page content and this content`
Major enhancement to make the comparison algorithm work properly also on url not stables automatically by using the difflib SequenceMatcher object: this changed a lot into the structure of the code, has to be extensively beta-tested! Please, do report bugs on sqlmap-users mailing list if you scout them. Cheers, Bernardo 2008-12-20 04:54:08 +03:00			`else:`
some code refactoring 2010-05-14 18:21:13 +04:00			`return ratio > conf.matchRatio`