mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-11 17:56:30 +03:00
Fix check for '@' in like_url (see #1715)
This commit is contained in:
parent
21482b391b
commit
22dc744b48
|
@ -74,8 +74,6 @@ def like_email(text):
|
||||||
|
|
||||||
|
|
||||||
def like_url(text):
|
def like_url(text):
|
||||||
if '@' in text: # prevent matches on e-mail addresses
|
|
||||||
return False
|
|
||||||
# We're looking for things that function in text like URLs. So, valid URL
|
# We're looking for things that function in text like URLs. So, valid URL
|
||||||
# or not, anything they say http:// is going to be good.
|
# or not, anything they say http:// is going to be good.
|
||||||
if text.startswith('http://') or text.startswith('https://'):
|
if text.startswith('http://') or text.startswith('https://'):
|
||||||
|
@ -84,6 +82,10 @@ def like_url(text):
|
||||||
return True
|
return True
|
||||||
if text[0] == '.' or text[-1] == '.':
|
if text[0] == '.' or text[-1] == '.':
|
||||||
return False
|
return False
|
||||||
|
if '@' in text:
|
||||||
|
# prevent matches on e-mail addresses – check after splitting the text
|
||||||
|
# to still allow URLs containing an '@' character (see #1715)
|
||||||
|
return False
|
||||||
for i in range(len(text)):
|
for i in range(len(text)):
|
||||||
if text[i] == '.':
|
if text[i] == '.':
|
||||||
break
|
break
|
||||||
|
|
Loading…
Reference in New Issue
Block a user