diff --git a/spacy/lang/lex_attrs.py b/spacy/lang/lex_attrs.py index 0a671ab1d..c3bb4a8ff 100644 --- a/spacy/lang/lex_attrs.py +++ b/spacy/lang/lex_attrs.py @@ -74,8 +74,6 @@ def like_email(text): def like_url(text): - if '@' in text: # prevent matches on e-mail addresses - return False # We're looking for things that function in text like URLs. So, valid URL # or not, anything they say http:// is going to be good. if text.startswith('http://') or text.startswith('https://'): @@ -84,6 +82,10 @@ def like_url(text): return True if text[0] == '.' or text[-1] == '.': return False + if '@' in text: + # prevent matches on e-mail addresses – check after splitting the text + # to still allow URLs containing an '@' character (see #1715) + return False for i in range(len(text)): if text[i] == '.': break