From 6455b574fc91bbe2605921544f1bbc3a6a5883af Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Tue, 12 Dec 2017 10:25:13 +0100 Subject: [PATCH] Check for email address first --- spacy/lang/lex_attrs.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/spacy/lang/lex_attrs.py b/spacy/lang/lex_attrs.py index 8a1bc2b50..0a671ab1d 100644 --- a/spacy/lang/lex_attrs.py +++ b/spacy/lang/lex_attrs.py @@ -74,6 +74,8 @@ def like_email(text): def like_url(text): + if '@' in text: # prevent matches on e-mail addresses + return False # We're looking for things that function in text like URLs. So, valid URL # or not, anything they say http:// is going to be good. if text.startswith('http://') or text.startswith('https://'): @@ -82,8 +84,6 @@ def like_url(text): return True if text[0] == '.' or text[-1] == '.': return False - if '@' in text: #prevent matches on e-mail addresses - return False for i in range(len(text)): if text[i] == '.': break