mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-26 01:46:28 +03:00
Reversed accidental changes.
This commit is contained in:
parent
1be5da1ac6
commit
be7a7aeb1a
|
@ -4,7 +4,7 @@ from __future__ import unicode_literals
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from spacy.language_data.punctuation import ALPHA_LOWER, CURRENCY
|
from spacy.language_data.punctuation import ALPHA_LOWER, CURRENCY
|
||||||
from ..language_data.tokenizer_exceptions import URL_PATTERN
|
from ..language_data.tokenizer_exceptions import _URL_PATTERN
|
||||||
|
|
||||||
ABBREVIATIONS = """
|
ABBREVIATIONS = """
|
||||||
AkH.
|
AkH.
|
||||||
|
@ -548,4 +548,4 @@ NUMS = "(({ne})|({t})|({on})|({c}))({s})?".format(
|
||||||
c=CURRENCY, s=_SUFFIES
|
c=CURRENCY, s=_SUFFIES
|
||||||
)
|
)
|
||||||
|
|
||||||
TOKEN_MATCH = re.compile("^({u})|({n})$".format(u=URL_PATTERN, n=NUMS)).match
|
TOKEN_MATCH = re.compile("^({u})|({n})$".format(u=_URL_PATTERN, n=NUMS)).match
|
||||||
|
|
|
@ -2,10 +2,10 @@ from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
URL_PATTERN = r'''
|
_URL_PATTERN = r'''
|
||||||
((([A-Za-z]{3,9}:(?:\/\/)?)(?:[-;:&=\+\$,\w]+@)?[A-Za-z0-9.-]+|(?:www.|[-;:&=\+\$,\w]+@)[A-Za-z0-9.-]+)((?:\/[\+~%\/.\w\-_]*)?\??(?:[-\+=&;%@.\w_]*)#?(?:[\w]*))?)
|
^((([A-Za-z]{3,9}:(?:\/\/)?)(?:[-;:&=\+\$,\w]+@)?[A-Za-z0-9.-]+|(?:www.|[-;:&=\+\$,\w]+@)[A-Za-z0-9.-]+)((?:\/[\+~%\/.\w\-_]*)?\??(?:[-\+=&;%@.\w_]*)#?(?:[\w]*))?)$
|
||||||
'''.strip()
|
'''.strip()
|
||||||
|
|
||||||
TOKEN_MATCH = re.compile("^{}$".format(URL_PATTERN)).match
|
TOKEN_MATCH = re.compile(_URL_PATTERN).match
|
||||||
|
|
||||||
__all__ = ['TOKEN_MATCH']
|
__all__ = ['TOKEN_MATCH']
|
||||||
|
|
Loading…
Reference in New Issue
Block a user