Merge pull request #766 from glic3rinu/master

Refactored urlize_quoted_links based on Dango 1.5 urlize
This commit is contained in:
Tom Christie 2013-03-29 06:40:23 -07:00
commit ff3ebd979d
2 changed files with 77 additions and 38 deletions

View File

@ -395,6 +395,37 @@ except ImportError:
kw = dict((k, int(v)) for k, v in kw.iteritems() if v is not None) kw = dict((k, int(v)) for k, v in kw.iteritems() if v is not None)
return datetime.datetime(**kw) return datetime.datetime(**kw)
# smart_urlquote is new on Django 1.4
try:
from django.utils.html import smart_urlquote
except ImportError:
try:
from urllib.parse import quote, urlsplit, urlunsplit
except ImportError: # Python 2
from urllib import quote
from urlparse import urlsplit, urlunsplit
def smart_urlquote(url):
"Quotes a URL if it isn't already quoted."
# Handle IDN before quoting.
scheme, netloc, path, query, fragment = urlsplit(url)
try:
netloc = netloc.encode('idna').decode('ascii') # IDN -> ACE
except UnicodeError: # invalid domain part
pass
else:
url = urlunsplit((scheme, netloc, path, query, fragment))
# An URL is considered unquoted if it contains no % characters or
# contains a % not followed by two hexadecimal digits. See #9655.
if '%' not in url or unquoted_percents_re.search(url):
# See http://bugs.python.org/issue2637
url = quote(force_bytes(url), safe=b'!*\'();:@&=+$,/?#[]~')
return force_text(url)
# Markdown is optional # Markdown is optional
try: try:
import markdown import markdown

View File

@ -4,11 +4,8 @@ from django.core.urlresolvers import reverse, NoReverseMatch
from django.http import QueryDict from django.http import QueryDict
from django.utils.html import escape from django.utils.html import escape
from django.utils.safestring import SafeData, mark_safe from django.utils.safestring import SafeData, mark_safe
from rest_framework.compat import urlparse from rest_framework.compat import urlparse, force_text, six, smart_urlquote
from rest_framework.compat import force_text import re, string
from rest_framework.compat import six
import re
import string
register = template.Library() register = template.Library()
@ -112,22 +109,6 @@ def replace_query_param(url, key, val):
class_re = re.compile(r'(?<=class=["\'])(.*)(?=["\'])') class_re = re.compile(r'(?<=class=["\'])(.*)(?=["\'])')
# Bunch of stuff cloned from urlize
LEADING_PUNCTUATION = ['(', '<', '&lt;', '"', "'"]
TRAILING_PUNCTUATION = ['.', ',', ')', '>', '\n', '&gt;', '"', "'"]
DOTS = ['&middot;', '*', '\xe2\x80\xa2', '&#149;', '&bull;', '&#8226;']
unencoded_ampersands_re = re.compile(r'&(?!(\w+|#\d+);)')
word_split_re = re.compile(r'(\s+)')
punctuation_re = re.compile('^(?P<lead>(?:%s)*)(?P<middle>.*?)(?P<trail>(?:%s)*)$' % \
('|'.join([re.escape(x) for x in LEADING_PUNCTUATION]),
'|'.join([re.escape(x) for x in TRAILING_PUNCTUATION])))
simple_email_re = re.compile(r'^\S+@[a-zA-Z0-9._-]+\.[a-zA-Z0-9._-]+$')
link_target_attribute_re = re.compile(r'(<a [^>]*?)target=[^\s>]+')
html_gunk_re = re.compile(r'(?:<br clear="all">|<i><\/i>|<b><\/b>|<em><\/em>|<strong><\/strong>|<\/?smallcaps>|<\/?uppercase>)', re.IGNORECASE)
hard_coded_bullets_re = re.compile(r'((?:<p>(?:%s).*?[a-zA-Z].*?</p>\s*)+)' % '|'.join([re.escape(x) for x in DOTS]), re.DOTALL)
trailing_empty_content_re = re.compile(r'(?:<p>(?:&nbsp;|\s|<br \/>)*?</p>\s*)+\Z')
# And the template tags themselves... # And the template tags themselves...
@register.simple_tag @register.simple_tag
@ -195,15 +176,25 @@ def add_class(value, css_class):
return value return value
# Bunch of stuff cloned from urlize
TRAILING_PUNCTUATION = ['.', ',', ':', ';', '.)', '"', "'"]
WRAPPING_PUNCTUATION = [('(', ')'), ('<', '>'), ('[', ']'), ('&lt;', '&gt;'),
('"', '"'), ("'", "'")]
word_split_re = re.compile(r'(\s+)')
simple_url_re = re.compile(r'^https?://\w', re.IGNORECASE)
simple_url_2_re = re.compile(r'^www\.|^(?!http)\w[^@]+\.(com|edu|gov|int|mil|net|org)$', re.IGNORECASE)
simple_email_re = re.compile(r'^\S+@\S+\.\S+$')
@register.filter @register.filter
def urlize_quoted_links(text, trim_url_limit=None, nofollow=True, autoescape=True): def urlize_quoted_links(text, trim_url_limit=None, nofollow=True, autoescape=True):
""" """
Converts any URLs in text into clickable links. Converts any URLs in text into clickable links.
Works on http://, https://, www. links and links ending in .org, .net or Works on http://, https://, www. links, and also on links ending in one of
.com. Links can have trailing punctuation (periods, commas, close-parens) the original seven gTLDs (.com, .edu, .gov, .int, .mil, .net, and .org).
and leading punctuation (opening parens) and it'll still do the right Links can have trailing punctuation (periods, commas, close-parens) and
thing. leading punctuation (opening parens) and it'll still do the right thing.
If trim_url_limit is not None, the URLs in link text longer than this limit If trim_url_limit is not None, the URLs in link text longer than this limit
will truncated to trim_url_limit-3 characters and appended with an elipsis. will truncated to trim_url_limit-3 characters and appended with an elipsis.
@ -216,24 +207,41 @@ def urlize_quoted_links(text, trim_url_limit=None, nofollow=True, autoescape=Tru
trim_url = lambda x, limit=trim_url_limit: limit is not None and (len(x) > limit and ('%s...' % x[:max(0, limit - 3)])) or x trim_url = lambda x, limit=trim_url_limit: limit is not None and (len(x) > limit and ('%s...' % x[:max(0, limit - 3)])) or x
safe_input = isinstance(text, SafeData) safe_input = isinstance(text, SafeData)
words = word_split_re.split(force_text(text)) words = word_split_re.split(force_text(text))
nofollow_attr = nofollow and ' rel="nofollow"' or ''
for i, word in enumerate(words): for i, word in enumerate(words):
match = None match = None
if '.' in word or '@' in word or ':' in word: if '.' in word or '@' in word or ':' in word:
match = punctuation_re.match(word) # Deal with punctuation.
if match: lead, middle, trail = '', word, ''
lead, middle, trail = match.groups() for punctuation in TRAILING_PUNCTUATION:
if middle.endswith(punctuation):
middle = middle[:-len(punctuation)]
trail = punctuation + trail
for opening, closing in WRAPPING_PUNCTUATION:
if middle.startswith(opening):
middle = middle[len(opening):]
lead = lead + opening
# Keep parentheses at the end only if they're balanced.
if (middle.endswith(closing)
and middle.count(closing) == middle.count(opening) + 1):
middle = middle[:-len(closing)]
trail = closing + trail
# Make URL we want to point to. # Make URL we want to point to.
url = None url = None
if middle.startswith('http://') or middle.startswith('https://'): nofollow_attr = ' rel="nofollow"' if nofollow else ''
url = middle if simple_url_re.match(middle):
elif middle.startswith('www.') or ('@' not in middle and \ url = smart_urlquote(middle)
middle and middle[0] in string.ascii_letters + string.digits and \ elif simple_url_2_re.match(middle):
(middle.endswith('.org') or middle.endswith('.net') or middle.endswith('.com'))): url = smart_urlquote('http://%s' % middle)
url = 'http://%s' % middle elif not ':' in middle and simple_email_re.match(middle):
elif '@' in middle and not ':' in middle and simple_email_re.match(middle): local, domain = middle.rsplit('@', 1)
url = 'mailto:%s' % middle try:
domain = domain.encode('idna').decode('ascii')
except UnicodeError:
continue
url = 'mailto:%s@%s' % (local, domain)
nofollow_attr = '' nofollow_attr = ''
# Make link. # Make link.
if url: if url:
trimmed = trim_url(middle) trimmed = trim_url(middle)
@ -251,4 +259,4 @@ def urlize_quoted_links(text, trim_url_limit=None, nofollow=True, autoescape=Tru
words[i] = mark_safe(word) words[i] = mark_safe(word)
elif autoescape: elif autoescape:
words[i] = escape(word) words[i] = escape(word)
return mark_safe(''.join(words)) return ''.join(words)