From 41eb313e1c18051614809e2040e6ac8584936962 Mon Sep 17 00:00:00 2001 From: Artem Mezhenin Date: Sun, 9 Feb 2014 01:01:05 +0400 Subject: [PATCH 1/5] update regex for matching URLs, fixes issue #1386 --- rest_framework/templatetags/rest_framework.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/rest_framework/templatetags/rest_framework.py b/rest_framework/templatetags/rest_framework.py index 83c046f99..7a70fd460 100644 --- a/rest_framework/templatetags/rest_framework.py +++ b/rest_framework/templatetags/rest_framework.py @@ -6,7 +6,7 @@ from django.utils.encoding import iri_to_uri from django.utils.html import escape from django.utils.safestring import SafeData, mark_safe from rest_framework.compat import urlparse, force_text, six, smart_urlquote -import re, string +import re register = template.Library() @@ -185,7 +185,7 @@ WRAPPING_PUNCTUATION = [('(', ')'), ('<', '>'), ('[', ']'), ('<', '>'), ('"', '"'), ("'", "'")] word_split_re = re.compile(r'(\s+)') simple_url_re = re.compile(r'^https?://\[?\w', re.IGNORECASE) -simple_url_2_re = re.compile(r'^www\.|^(?!http)\w[^@]+\.(com|edu|gov|int|mil|net|org)$', re.IGNORECASE) +simple_url_2_re = re.compile(r'^www\.|^(?!http)\w[^@\[\]]+\.(com|edu|gov|int|mil|net|org)$', re.IGNORECASE) simple_email_re = re.compile(r'^\S+@\S+\.\S+$') @@ -211,7 +211,6 @@ def urlize_quoted_links(text, trim_url_limit=None, nofollow=True, autoescape=Tru safe_input = isinstance(text, SafeData) words = word_split_re.split(force_text(text)) for i, word in enumerate(words): - match = None if '.' in word or '@' in word or ':' in word: # Deal with punctuation. lead, middle, trail = '', word, '' From 35f4908e48cc18e94be239f8065c95e87b2fb007 Mon Sep 17 00:00:00 2001 From: Artem Mezhenin Date: Sun, 9 Feb 2014 02:46:25 +0400 Subject: [PATCH 2/5] issue #1386 * regex for matching URLs was rewritten * added unittests --- rest_framework/templatetags/rest_framework.py | 2 +- rest_framework/tests/test_templatetags.py | 38 ++++++++++++++++++- 2 files changed, 38 insertions(+), 2 deletions(-) diff --git a/rest_framework/templatetags/rest_framework.py b/rest_framework/templatetags/rest_framework.py index 7a70fd460..8a0e11ba7 100644 --- a/rest_framework/templatetags/rest_framework.py +++ b/rest_framework/templatetags/rest_framework.py @@ -185,7 +185,7 @@ WRAPPING_PUNCTUATION = [('(', ')'), ('<', '>'), ('[', ']'), ('<', '>'), ('"', '"'), ("'", "'")] word_split_re = re.compile(r'(\s+)') simple_url_re = re.compile(r'^https?://\[?\w', re.IGNORECASE) -simple_url_2_re = re.compile(r'^www\.|^(?!http)\w[^@\[\]]+\.(com|edu|gov|int|mil|net|org)$', re.IGNORECASE) +simple_url_2_re = re.compile(r'^\w[^@\[\]\:\/,]+\.(com|edu|gov|int|mil|net|org)(:\d{2,5})?(/(\w[^@\[\]\:\,]+)?)?$', re.IGNORECASE) simple_email_re = re.compile(r'^\S+@\S+\.\S+$') diff --git a/rest_framework/tests/test_templatetags.py b/rest_framework/tests/test_templatetags.py index 609a9e089..0c2259b99 100644 --- a/rest_framework/tests/test_templatetags.py +++ b/rest_framework/tests/test_templatetags.py @@ -2,7 +2,7 @@ from __future__ import unicode_literals from django.test import TestCase from rest_framework.test import APIRequestFactory -from rest_framework.templatetags.rest_framework import add_query_param +from rest_framework.templatetags.rest_framework import add_query_param, urlize_quoted_links factory = APIRequestFactory() @@ -17,3 +17,39 @@ class TemplateTagTests(TestCase): json_url = add_query_param(request, "format", "json") self.assertIn("q=%E6%9F%A5%E8%AF%A2", json_url) self.assertIn("format=json", json_url) + + +class Issue1386Tests(TestCase): + """ + Covers #1386 + """ + + def test_issue_1386(self): + """ + Test function urlize_quoted_links with different args + """ + correct_urls = [ + "asdf.com/zxvc", + "asdf.net", + "www.as_df.org", + "as.d8f.ghj8.gov", + "www.a-op.s.d.edu/asdf/dfff_908/", + "cd8fr.com:80/hello", + "cdfr.com:808/hello", + "cdfr.com:8080/hello", + "cdfr.com:44808/hello/asdf/", + ] + for i in correct_urls: + res = urlize_quoted_links(i) + self.assertGreater(len(res), len(i)) + self.assertIn(i, res) + + incorrect_urls = [ + "mailto://asdf@fdf.com", + "asdf://asdf.com", + "asdf.netnet", + "asdf:[/p]zxcv.com" # example from issue #1386 + ] + for i in incorrect_urls: + res = urlize_quoted_links(i) + self.assertEqual(i, res) \ No newline at end of file From d00ea3bcac5d622c586b267d18aef4700657f269 Mon Sep 17 00:00:00 2001 From: Artem Mezhenin Date: Thu, 13 Feb 2014 18:59:05 +0400 Subject: [PATCH 3/5] change regex back, issue #1386 --- rest_framework/templatetags/rest_framework.py | 8 ++++++-- rest_framework/tests/test_templatetags.py | 12 +++--------- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/rest_framework/templatetags/rest_framework.py b/rest_framework/templatetags/rest_framework.py index 8a0e11ba7..0fcc53463 100644 --- a/rest_framework/templatetags/rest_framework.py +++ b/rest_framework/templatetags/rest_framework.py @@ -185,7 +185,7 @@ WRAPPING_PUNCTUATION = [('(', ')'), ('<', '>'), ('[', ']'), ('<', '>'), ('"', '"'), ("'", "'")] word_split_re = re.compile(r'(\s+)') simple_url_re = re.compile(r'^https?://\[?\w', re.IGNORECASE) -simple_url_2_re = re.compile(r'^\w[^@\[\]\:\/,]+\.(com|edu|gov|int|mil|net|org)(:\d{2,5})?(/(\w[^@\[\]\:\,]+)?)?$', re.IGNORECASE) +simple_url_2_re = re.compile(r'^www\.|^(?!http)\w[^@]+\.(com|edu|gov|int|mil|net|org)$', re.IGNORECASE) simple_email_re = re.compile(r'^\S+@\S+\.\S+$') @@ -234,7 +234,11 @@ def urlize_quoted_links(text, trim_url_limit=None, nofollow=True, autoescape=Tru if simple_url_re.match(middle): url = smart_urlquote(middle) elif simple_url_2_re.match(middle): - url = smart_urlquote('http://%s' % middle) + # ValueError("Invalid IPv6 URL") can be raised here, see issue #1386 + try: + url = smart_urlquote('http://%s' % middle) + except ValueError: + pass elif not ':' in middle and simple_email_re.match(middle): local, domain = middle.rsplit('@', 1) try: diff --git a/rest_framework/tests/test_templatetags.py b/rest_framework/tests/test_templatetags.py index 0c2259b99..999c718ac 100644 --- a/rest_framework/tests/test_templatetags.py +++ b/rest_framework/tests/test_templatetags.py @@ -29,26 +29,20 @@ class Issue1386Tests(TestCase): Test function urlize_quoted_links with different args """ correct_urls = [ - "asdf.com/zxvc", + "asdf.com", "asdf.net", "www.as_df.org", "as.d8f.ghj8.gov", - "www.a-op.s.d.edu/asdf/dfff_908/", - "cd8fr.com:80/hello", - "cdfr.com:808/hello", - "cdfr.com:8080/hello", - "cdfr.com:44808/hello/asdf/", ] for i in correct_urls: res = urlize_quoted_links(i) - self.assertGreater(len(res), len(i)) + self.assertNotEqual(res, i) self.assertIn(i, res) incorrect_urls = [ "mailto://asdf@fdf.com", - "asdf://asdf.com", "asdf.netnet", - "asdf:[/p]zxcv.com" # example from issue #1386 + "asdf:[/p]zxcv.com", # example from issue #1386 ] for i in incorrect_urls: res = urlize_quoted_links(i) From 08ec23268dbb4a40000b6c4bf877f5563a4ba57b Mon Sep 17 00:00:00 2001 From: Artem Mezhenin Date: Thu, 13 Feb 2014 19:39:53 +0400 Subject: [PATCH 4/5] (I hope) tests are fixed, issue #1386 --- rest_framework/tests/test_templatetags.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/rest_framework/tests/test_templatetags.py b/rest_framework/tests/test_templatetags.py index 999c718ac..d4da0c23b 100644 --- a/rest_framework/tests/test_templatetags.py +++ b/rest_framework/tests/test_templatetags.py @@ -42,8 +42,10 @@ class Issue1386Tests(TestCase): incorrect_urls = [ "mailto://asdf@fdf.com", "asdf.netnet", - "asdf:[/p]zxcv.com", # example from issue #1386 ] for i in incorrect_urls: res = urlize_quoted_links(i) - self.assertEqual(i, res) \ No newline at end of file + self.assertEqual(i, res) + + # example from issue #1386, this shouldn't raise an exception + _ = urlize_quoted_links("asdf:[/p]zxcv.com") From dbd993d108b51bebbf9fd8d567d1c782cf941404 Mon Sep 17 00:00:00 2001 From: Artem Mezhenin Date: Thu, 13 Feb 2014 20:14:47 +0400 Subject: [PATCH 5/5] wrapper for smart_urlquote, issue #1386 --- rest_framework/templatetags/rest_framework.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/rest_framework/templatetags/rest_framework.py b/rest_framework/templatetags/rest_framework.py index 0fcc53463..beb8c5b0e 100644 --- a/rest_framework/templatetags/rest_framework.py +++ b/rest_framework/templatetags/rest_framework.py @@ -189,6 +189,17 @@ simple_url_2_re = re.compile(r'^www\.|^(?!http)\w[^@]+\.(com|edu|gov|int|mil|net simple_email_re = re.compile(r'^\S+@\S+\.\S+$') +def smart_urlquote_wrapper(matched_url): + """ + Simple wrapper for smart_urlquote. ValueError("Invalid IPv6 URL") can + be raised here, see issue #1386 + """ + try: + return smart_urlquote(matched_url) + except ValueError: + return None + + @register.filter def urlize_quoted_links(text, trim_url_limit=None, nofollow=True, autoescape=True): """ @@ -232,13 +243,9 @@ def urlize_quoted_links(text, trim_url_limit=None, nofollow=True, autoescape=Tru url = None nofollow_attr = ' rel="nofollow"' if nofollow else '' if simple_url_re.match(middle): - url = smart_urlquote(middle) + url = smart_urlquote_wrapper(middle) elif simple_url_2_re.match(middle): - # ValueError("Invalid IPv6 URL") can be raised here, see issue #1386 - try: - url = smart_urlquote('http://%s' % middle) - except ValueError: - pass + url = smart_urlquote_wrapper('http://%s' % middle) elif not ':' in middle and simple_email_re.match(middle): local, domain = middle.rsplit('@', 1) try: