implementation of HEAD/Range methods

This commit is contained in:
Miroslav Stampar 2010-09-16 09:32:09 +00:00
parent b745331974
commit 1741801ade
2 changed files with 44 additions and 23 deletions

View File

@ -30,7 +30,7 @@ from lib.core.data import kb
from lib.core.data import logger
from lib.core.session import setMatchRatio
def comparison(page, headers=None, getSeqMatcher=False):
def comparison(page, headers=None, getSeqMatcher=False, pageLength=None):
regExpResults = None
# String to be excluded before calculating page hash
@ -79,8 +79,13 @@ def comparison(page, headers=None, getSeqMatcher=False):
if conf.seqLock:
conf.seqLock.acquire()
conf.seqMatcher.set_seq2(page)
ratio = round(conf.seqMatcher.ratio(), 3)
if not conf.eRegexp and not conf.eString and kb.nullConnection:
ratio = 1. * pageLength / len(conf.seqMatcher.a)
if ratio > 1.:
ratio = 1. / ratio
else:
conf.seqMatcher.set_seq2(page)
ratio = round(conf.seqMatcher.ratio(), 3)
if conf.seqLock:
conf.seqLock.release()

View File

@ -69,17 +69,17 @@ class Connect:
delay = 0.00001 * (conf.cpuThrottle ** 2)
time.sleep(delay)
url = kwargs.get('url', conf.url).replace(" ", "%20")
get = kwargs.get('get', None)
post = kwargs.get('post', None)
method = kwargs.get('method', None)
cookie = kwargs.get('cookie', None)
ua = kwargs.get('ua', None)
direct = kwargs.get('direct', False)
multipart = kwargs.get('multipart', False)
silent = kwargs.get('silent', False)
raise404 = kwargs.get('raise404', True)
auxHeaders = kwargs.get('auxHeaders', None)
url = kwargs.get('url', conf.url).replace(" ", "%20")
get = kwargs.get('get', None)
post = kwargs.get('post', None)
method = kwargs.get('method', None)
cookie = kwargs.get('cookie', None)
ua = kwargs.get('ua', None)
direct = kwargs.get('direct', False)
multipart = kwargs.get('multipart', False)
silent = kwargs.get('silent', False)
raise404 = kwargs.get('raise404', True)
auxHeaders = kwargs.get('auxHeaders', None)
page = ""
cookieStr = ""
@ -277,7 +277,7 @@ class Connect:
return page, responseHeaders
@staticmethod
def queryPage(value=None, place=None, content=False, getSeqMatcher=False, silent=False, method=None, auxHeaders=dict()):
def queryPage(value=None, place=None, content=False, getSeqMatcher=False, silent=False, method=None, auxHeaders=None):
"""
This method calls a function to get the target url page content
and returns its page MD5 hash or a boolean value in case of
@ -287,10 +287,12 @@ class Connect:
if conf.direct:
return direct(value, content)
get = None
post = None
cookie = None
ua = None
get = None
post = None
cookie = None
ua = None
page = None
pageLength = None
if not place:
place = kb.injPlace
@ -310,13 +312,27 @@ class Connect:
if conf.safUrl and conf.saFreq > 0:
kb.queryCounter += 1
if kb.queryCounter % conf.saFreq == 0:
Connect.getPage(url=conf.safUrl, cookie=cookie, direct=True, silent=True, ua=ua, auxHeaders=auxHeaders)
Connect.getPage(url=conf.safUrl, cookie=cookie, direct=True, silent=True, ua=ua)
page, headers = Connect.getPage(get=get, post=post, cookie=cookie, ua=ua, silent=silent, method=method)
if not content and kb.nullConnection:
if kb.nullConnection == "HEAD":
_, headers = Connect.getPage(get=get, post=post, cookie=cookie, ua=ua, silent=silent, method="HEAD", auxHeaders=auxHeaders)
pageLength = int(headers['Content-Length'])
elif kb.nullConnection == "Range":
if not auxHeaders:
auxHeaders = {}
auxHeaders["Range"] = "bytes=-1"
_, headers = Connect.getPage(get=get, post=post, cookie=cookie, ua=ua, silent=silent, method=method, auxHeaders=auxHeaders)
pageLength = int(headers['Content-Range'][headers['Content-Range'].find('/') + 1:])
else:
kb.nullConnection = None
page, headers = Connect.getPage(get=get, post=post, cookie=cookie, ua=ua, silent=silent, method=method, auxHeaders=auxHeaders)
else:
page, headers = Connect.getPage(get=get, post=post, cookie=cookie, ua=ua, silent=silent, method=method, auxHeaders=auxHeaders)
if content:
return page, headers
elif page:
return comparison(page, headers, getSeqMatcher)
elif pageLength or page:
return comparison(page, headers, getSeqMatcher, pageLength)
else:
return False