Use inline flags in token_match patterns (#5257)

* Use inline flags in token_match patterns

Use inline flags in `token_match` patterns so that serializing does not
lose the flag information.

* Modify inline flag

* Modify inline flag
This commit is contained in:
adrianeboyd 2020-04-06 13:19:04 +02:00 committed by GitHub
parent e8be15e9b7
commit c981aa6684
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 3 additions and 3 deletions

View File

@ -461,5 +461,5 @@ _regular_exp.append(URL_PATTERN)
TOKENIZER_EXCEPTIONS = _exc
TOKEN_MATCH = re.compile(
"|".join("(?:{})".format(m) for m in _regular_exp), re.IGNORECASE | re.UNICODE
"(?iu)" + "|".join("(?:{})".format(m) for m in _regular_exp)
).match

View File

@ -58,7 +58,7 @@ URL_PATTERN = (
# fmt: on
).strip()
TOKEN_MATCH = re.compile(URL_PATTERN, re.UNICODE).match
TOKEN_MATCH = re.compile("(?u)" + URL_PATTERN).match
BASE_EXCEPTIONS = {}

View File

@ -567,7 +567,7 @@ cdef class Tokenizer:
))
exclude = util.get_serialization_exclude(deserializers, exclude, kwargs)
msg = util.from_bytes(bytes_data, deserializers, exclude)
for key in ["prefix_search", "suffix_search", "infix_finditer"]:
for key in ["prefix_search", "suffix_search", "infix_finditer", "token_match"]:
if key in data:
data[key] = unescape_unicode(data[key])
if "prefix_search" in data and isinstance(data["prefix_search"], basestring_):