mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-11 17:56:30 +03:00
Rename _URL_PATTERN to URL_PATTERN
This commit is contained in:
parent
604f299cf6
commit
a91278cb32
|
@ -144,7 +144,7 @@ _regular_exp += ["^{prefix}[{hyphen}][{alpha}][{alpha}{elision}{other_hyphen}\-]
|
||||||
_regular_exp += ["^{prefix}[{elision}][{alpha}][{alpha}{elision}{hyphen}\-]*$".format(
|
_regular_exp += ["^{prefix}[{elision}][{alpha}][{alpha}{elision}{hyphen}\-]*$".format(
|
||||||
prefix=p, elision=HYPHENS, hyphen=_other_hyphens, alpha=ALPHA_LOWER)
|
prefix=p, elision=HYPHENS, hyphen=_other_hyphens, alpha=ALPHA_LOWER)
|
||||||
for p in _elision_prefix]
|
for p in _elision_prefix]
|
||||||
_regular_exp.append(_URL_PATTERN)
|
_regular_exp.append(URL_PATTERN)
|
||||||
|
|
||||||
|
|
||||||
TOKENIZER_EXCEPTIONS = dict(_exc)
|
TOKENIZER_EXCEPTIONS = dict(_exc)
|
||||||
|
|
|
@ -96,4 +96,4 @@ _nums = "(({ne})|({t})|({on})|({c}))({s})?".format(
|
||||||
|
|
||||||
|
|
||||||
TOKENIZER_EXCEPTIONS = dict(_exc)
|
TOKENIZER_EXCEPTIONS = dict(_exc)
|
||||||
TOKEN_MATCH = re.compile("^({u})|({n})$".format(u=_URL_PATTERN, n=_nums)).match
|
TOKEN_MATCH = re.compile("^({u})|({n})$".format(u=URL_PATTERN, n=_nums)).match
|
||||||
|
|
|
@ -10,7 +10,7 @@ from ..symbols import ORTH, POS, LEMMA, SPACE, PUNCT
|
||||||
|
|
||||||
# URL validation regex courtesy of: https://mathiasbynens.be/demo/url-regex
|
# URL validation regex courtesy of: https://mathiasbynens.be/demo/url-regex
|
||||||
# A few minor mods to this regex to account for use cases represented in test_urls
|
# A few minor mods to this regex to account for use cases represented in test_urls
|
||||||
_URL_PATTERN = (
|
URL_PATTERN = (
|
||||||
r"^"
|
r"^"
|
||||||
# in order to support the prefix tokenization (see prefix test cases in test_urls).
|
# in order to support the prefix tokenization (see prefix test cases in test_urls).
|
||||||
r"(?=[\w])"
|
r"(?=[\w])"
|
||||||
|
@ -53,7 +53,7 @@ _URL_PATTERN = (
|
||||||
r"$"
|
r"$"
|
||||||
).strip()
|
).strip()
|
||||||
|
|
||||||
TOKEN_MATCH = regex.compile(_URL_PATTERN, regex.UNICODE).match
|
TOKEN_MATCH = regex.compile(URL_PATTERN, regex.UNICODE).match
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user