Fix for Issue #56 (Google has changed few things for retrieving PR)

This commit is contained in:
Miroslav Stampar 2012-07-03 21:00:18 +02:00
parent 27fdccc858
commit 40fc6488bf
2 changed files with 9 additions and 6 deletions

View File

@ -7,20 +7,22 @@
# this version was adapted from http://www.djangosnippets.org/snippets/221/
# by Corey Goldberg - 2010
#
# important update (http://www.seroundtable.com/google-pagerank-change-14132.html)
# by Miroslav Stampar - 2012
#
# Licensed under the MIT license: http://www.opensource.org/licenses/mit-license.php
import urllib
def get_pagerank(url):
hsh = check_hash(hash_url(url))
gurl = 'http://www.google.com/search?client=navclient-auto&features=Rank:&q=info:%s&ch=%s' % (urllib.quote(url), hsh)
_ = 'http://toolbarqueries.google.com/tbr?client=navclient-auto&features=Rank&ch=%s&q=info:%s' % (check_hash(hash_url(url)), urllib.quote(url))
try:
f = urllib.urlopen(gurl)
f = urllib.urlopen(_)
rank = f.read().strip()[9:]
except Exception:
rank = 'N/A'
if rank == '':
rank = '0'
else:
rank = '0' if not rank or not rank.isdigit() else rank
return rank
def int_str(string_, integer, factor):
@ -28,6 +30,7 @@ def int_str(string_, integer, factor):
integer *= factor
integer &= 0xFFFFFFFF
integer += ord(string_[i])
return integer
def hash_url(string_):

View File

@ -1067,7 +1067,7 @@ def parseTargetUrl():
if urlSplit[3]:
conf.parameters[PLACE.GET] = urldecode(urlSplit[3]) if urlSplit[3] and urlencode(DEFAULT_GET_POST_DELIMITER, None) not in urlSplit[3] else urlSplit[3]
conf.url = "%s://%s:%d%s" % (conf.scheme, ("[%s]" % conf.hostname) if conf.ipv6 else conf.hostname, conf.port, conf.path)
conf.url = getUnicode("%s://%s:%d%s" % (conf.scheme, ("[%s]" % conf.hostname) if conf.ipv6 else conf.hostname, conf.port, conf.path))
conf.url = conf.url.replace(URI_QUESTION_MARKER, '?')
if not conf.referer and intersect(REFERER_ALIASES, conf.testParameter, True):