mirror of
https://github.com/sqlmapproject/sqlmap.git
synced 2025-10-24 20:51:23 +03:00
support for non-latin (e.g. cyrillic) URLs
This commit is contained in:
parent
1c3f4e9e54
commit
3f0517d3f3
|
@ -3004,3 +3004,57 @@ def randomizeParameterValue(value):
|
|||
retVal = retVal.replace(match.group(), str(randomInt(len(match.group()))))
|
||||
|
||||
return retVal
|
||||
|
||||
def asciifyUrl(url, force_quote=False):
|
||||
"""
|
||||
Attempts to make a unicode url usuable with ``urllib/urllib2``.
|
||||
|
||||
More specifically, it attempts to convert the unicode object ``url``,
|
||||
which is meant to represent a IRI, to an unicode object that,
|
||||
containing only ASCII characters, is a valid URI. This involves:
|
||||
|
||||
* IDNA/Puny-encoding the domain name.
|
||||
* UTF8-quoting the path and querystring parts.
|
||||
|
||||
See also RFC 3987.
|
||||
|
||||
Reference: http://blog.elsdoerfer.name/2008/12/12/opening-iris-in-python/
|
||||
"""
|
||||
|
||||
parts = urlparse.urlsplit(url)
|
||||
if not parts.scheme or not parts.netloc:
|
||||
# apparently not an url
|
||||
return url
|
||||
|
||||
# idna-encode domain
|
||||
hostname = parts.hostname.encode('idna')
|
||||
|
||||
# UTF8-quote the other parts. We check each part individually if
|
||||
# if needs to be quoted - that should catch some additional user
|
||||
# errors, say for example an umlaut in the username even though
|
||||
# the path *is* already quoted.
|
||||
def quote(s, safe):
|
||||
s = s or ''
|
||||
# Triggers on non-ascii characters - another option would be:
|
||||
# urllib.quote(s.replace('%', '')) != s.replace('%', '')
|
||||
# which would trigger on all %-characters, e.g. "&".
|
||||
if s.encode('ascii', 'replace') != s or force_quote:
|
||||
return urllib.quote(s.encode('utf8'), safe=safe)
|
||||
return s
|
||||
|
||||
username = quote(parts.username, '')
|
||||
password = quote(parts.password, safe='')
|
||||
path = quote(parts.path, safe='/')
|
||||
query = quote(parts.query, safe='&=')
|
||||
|
||||
# put everything back together
|
||||
netloc = hostname
|
||||
if username or password:
|
||||
netloc = '@' + netloc
|
||||
if password:
|
||||
netloc = ':' + password + netloc
|
||||
netloc = username + netloc
|
||||
if parts.port:
|
||||
netloc += ':' + str(parts.port)
|
||||
|
||||
return urlparse.urlunsplit([parts.scheme, netloc, path, query, parts.fragment])
|
|
@ -17,6 +17,7 @@ import traceback
|
|||
|
||||
from extra.multipart import multipartpost
|
||||
from lib.core.agent import agent
|
||||
from lib.core.common import asciifyUrl
|
||||
from lib.core.common import average
|
||||
from lib.core.common import calculateDeltaSeconds
|
||||
from lib.core.common import clearConsoleLine
|
||||
|
@ -160,6 +161,10 @@ class Connect:
|
|||
responseHeaders = None
|
||||
logHeaders = ""
|
||||
|
||||
# support for non-latin URLs (e.g. cyrilic) as urllib/urllib2 doesn't
|
||||
# support those by default
|
||||
url = asciifyUrl(url)
|
||||
|
||||
# fix for known issues when using url in unicode format
|
||||
# (e.g. UnicodeDecodeError: "url = url + '?' + query" in redirect case)
|
||||
url = unicodeencode(url)
|
||||
|
|
Loading…
Reference in New Issue
Block a user