spaCy/spacy/language_data/special_cases.py

6 lines
250 B
Python

from __future__ import unicode_literals
EXCEPTION_PATTERNS = r'''
((([A-Za-z]{3,9}:(?:\/\/)?)(?:[-;:&=\+\$,\w]+@)?[A-Za-z0-9.-]+|(?:www.|[-;:&=\+\$,\w]+@)[A-Za-z0-9.-]+)((?:\/[\+~%\/.\w\-_]*)?\??(?:[-\+=&;%@.\w_]*)#?(?:[\w]*))?)
'''.strip().split()