#!/usr/bin/env python """ Copyright (c) 2006-2016 sqlmap developers (http://sqlmap.org/) See the file 'doc/COPYING' for copying permission """ import httplib import re import socket import urllib import urllib2 from lib.core.common import getSafeExString from lib.core.common import getUnicode from lib.core.common import popValue from lib.core.common import pushValue from lib.core.common import readInput from lib.core.common import urlencode from lib.core.data import conf from lib.core.data import kb from lib.core.data import logger from lib.core.enums import CUSTOM_LOGGING from lib.core.enums import HTTP_HEADER from lib.core.enums import REDIRECTION from lib.core.exception import SqlmapBaseException from lib.core.exception import SqlmapConnectionException from lib.core.exception import SqlmapUserQuitException from lib.core.settings import DUMMY_SEARCH_USER_AGENT from lib.core.settings import DUCKDUCKGO_REGEX from lib.core.settings import DISCONNECT_SEARCH_REGEX from lib.core.settings import GOOGLE_REGEX from lib.core.settings import BAIDU_SEARCH_REGEX from lib.core.settings import HTTP_ACCEPT_ENCODING_HEADER_VALUE from lib.core.settings import UNICODE_ENCODING from lib.request.basic import decodePage from thirdparty.socks import socks def _remove_duplicate(links): if not links: return [] tmplinks = map(lambda url: url[:url.find("?")], links) tmplinks = set(tmplinks) ret = [] for link in links: for tmplink in tmplinks: if link.lower().find(tmplink.lower()) == 0: ret.append(link) tmplinks.remove(tmplink) break return ret def _locate_real_url_from_baidu_results(links): retVal = [] for link in links: try: req = urllib2.Request(link) conn = urllib2.urlopen(req, timeout=conf.timeout) page = conn.read() responseHeaders = conn.info() page = decodePage(page, responseHeaders.get("Content-Encoding"), responseHeaders.get("Content-Type")) if page: url = conn.geturl() if link != url: logger.info(url) retVal.append(url) else: # baidu sometimes will just use Javascript to redirect the page # rather than responding a 302 HTTP code. m = re.search('