better regex used avoiding garbage google images

This commit is contained in:
Miroslav Stampar 2010-05-15 22:02:28 +00:00
parent d20b99ed65
commit e938331d8e

View File

@ -54,7 +54,7 @@ class Google:
matches = []
regExpr = "class=\042?r\042?\076\074a href=\042(http[s]*://.+?)\042\sclass=\042?l\042?"
regExpr = "li class=\042?g\042?\076.+?a href=\042(http[s]*://.+?)\042\sclass=\042?l\042?"
matches = re.findall(regExpr, page, re.I | re.M)
return matches