mirror of
https://github.com/explosion/spaCy.git
synced 2024-11-14 05:37:03 +03:00
12 lines
322 B
Python
12 lines
322 B
Python
from __future__ import unicode_literals
|
|
|
|
import re
|
|
|
|
_URL_PATTERN = r'''
|
|
^((([A-Za-z]{3,9}:(?:\/\/)?)(?:[-;:&=\+\$,\w]+@)?[A-Za-z0-9.-]+|(?:www.|[-;:&=\+\$,\w]+@)[A-Za-z0-9.-]+)((?:\/[\+~%\/.\w\-_]*)?\??(?:[-\+=&;%@.\w_]*)#?(?:[\w]*))?)$
|
|
'''.strip()
|
|
|
|
TOKEN_MATCH = re.compile(_URL_PATTERN).match
|
|
|
|
__all__ = ['TOKEN_MATCH']
|