mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-28 02:04:07 +03:00
Use inline flags in token_match patterns (#5257)
* Use inline flags in token_match patterns Use inline flags in `token_match` patterns so that serializing does not lose the flag information. * Modify inline flag * Modify inline flag
This commit is contained in:
parent
e8be15e9b7
commit
c981aa6684
|
@ -461,5 +461,5 @@ _regular_exp.append(URL_PATTERN)
|
||||||
|
|
||||||
TOKENIZER_EXCEPTIONS = _exc
|
TOKENIZER_EXCEPTIONS = _exc
|
||||||
TOKEN_MATCH = re.compile(
|
TOKEN_MATCH = re.compile(
|
||||||
"|".join("(?:{})".format(m) for m in _regular_exp), re.IGNORECASE | re.UNICODE
|
"(?iu)" + "|".join("(?:{})".format(m) for m in _regular_exp)
|
||||||
).match
|
).match
|
||||||
|
|
|
@ -58,7 +58,7 @@ URL_PATTERN = (
|
||||||
# fmt: on
|
# fmt: on
|
||||||
).strip()
|
).strip()
|
||||||
|
|
||||||
TOKEN_MATCH = re.compile(URL_PATTERN, re.UNICODE).match
|
TOKEN_MATCH = re.compile("(?u)" + URL_PATTERN).match
|
||||||
|
|
||||||
|
|
||||||
BASE_EXCEPTIONS = {}
|
BASE_EXCEPTIONS = {}
|
||||||
|
|
|
@ -567,7 +567,7 @@ cdef class Tokenizer:
|
||||||
))
|
))
|
||||||
exclude = util.get_serialization_exclude(deserializers, exclude, kwargs)
|
exclude = util.get_serialization_exclude(deserializers, exclude, kwargs)
|
||||||
msg = util.from_bytes(bytes_data, deserializers, exclude)
|
msg = util.from_bytes(bytes_data, deserializers, exclude)
|
||||||
for key in ["prefix_search", "suffix_search", "infix_finditer"]:
|
for key in ["prefix_search", "suffix_search", "infix_finditer", "token_match"]:
|
||||||
if key in data:
|
if key in data:
|
||||||
data[key] = unescape_unicode(data[key])
|
data[key] = unescape_unicode(data[key])
|
||||||
if "prefix_search" in data and isinstance(data["prefix_search"], basestring_):
|
if "prefix_search" in data and isinstance(data["prefix_search"], basestring_):
|
||||||
|
|
Loading…
Reference in New Issue
Block a user