mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-25 09:26:27 +03:00
* Fix https for url detection
This commit is contained in:
parent
aa12b374c0
commit
3879d28457
|
@ -69,7 +69,7 @@ TLDs = set("com|org|edu|gov|net|mil|aero|asia|biz|cat|coop|info|int|jobs|mobi|mu
|
|||
cpdef bint like_url(unicode string):
|
||||
# We're looking for things that function in text like URLs. So, valid URL
|
||||
# or not, anything they say http:// is going to be good.
|
||||
if string.startswith('http://'):
|
||||
if string.startswith('http://') or string.startswith('https://'):
|
||||
return True
|
||||
elif string.startswith('www.') and len(string) >= 5:
|
||||
return True
|
||||
|
|
Loading…
Reference in New Issue
Block a user