change regex back, issue #1386

This commit is contained in:
Artem Mezhenin 2014-02-13 18:59:05 +04:00
parent 35f4908e48
commit d00ea3bcac
2 changed files with 9 additions and 11 deletions

View File

@ -185,7 +185,7 @@ WRAPPING_PUNCTUATION = [('(', ')'), ('<', '>'), ('[', ']'), ('&lt;', '&gt;'),
('"', '"'), ("'", "'")]
word_split_re = re.compile(r'(\s+)')
simple_url_re = re.compile(r'^https?://\[?\w', re.IGNORECASE)
simple_url_2_re = re.compile(r'^\w[^@\[\]\:\/,]+\.(com|edu|gov|int|mil|net|org)(:\d{2,5})?(/(\w[^@\[\]\:\,]+)?)?$', re.IGNORECASE)
simple_url_2_re = re.compile(r'^www\.|^(?!http)\w[^@]+\.(com|edu|gov|int|mil|net|org)$', re.IGNORECASE)
simple_email_re = re.compile(r'^\S+@\S+\.\S+$')
@ -234,7 +234,11 @@ def urlize_quoted_links(text, trim_url_limit=None, nofollow=True, autoescape=Tru
if simple_url_re.match(middle):
url = smart_urlquote(middle)
elif simple_url_2_re.match(middle):
url = smart_urlquote('http://%s' % middle)
# ValueError("Invalid IPv6 URL") can be raised here, see issue #1386
try:
url = smart_urlquote('http://%s' % middle)
except ValueError:
pass
elif not ':' in middle and simple_email_re.match(middle):
local, domain = middle.rsplit('@', 1)
try:

View File

@ -29,26 +29,20 @@ class Issue1386Tests(TestCase):
Test function urlize_quoted_links with different args
"""
correct_urls = [
"asdf.com/zxvc",
"asdf.com",
"asdf.net",
"www.as_df.org",
"as.d8f.ghj8.gov",
"www.a-op.s.d.edu/asdf/dfff_908/",
"cd8fr.com:80/hello",
"cdfr.com:808/hello",
"cdfr.com:8080/hello",
"cdfr.com:44808/hello/asdf/",
]
for i in correct_urls:
res = urlize_quoted_links(i)
self.assertGreater(len(res), len(i))
self.assertNotEqual(res, i)
self.assertIn(i, res)
incorrect_urls = [
"mailto://asdf@fdf.com",
"asdf://asdf.com",
"asdf.netnet",
"asdf:[/p]zxcv.com" # example from issue #1386
"asdf:[/p]zxcv.com", # example from issue #1386
]
for i in incorrect_urls:
res = urlize_quoted_links(i)