mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-13 02:36:32 +03:00
Handle deprecated tokenizer prefix data
This commit is contained in:
parent
abb09782f9
commit
de5aa92bc2
|
@ -94,8 +94,13 @@ def read_regex(path):
|
||||||
|
|
||||||
|
|
||||||
def compile_prefix_regex(entries):
|
def compile_prefix_regex(entries):
|
||||||
expression = '|'.join(['^' + re.escape(piece) for piece in entries if piece.strip()])
|
if '(' in entries:
|
||||||
return re.compile(expression)
|
# Handle deprecated data
|
||||||
|
expression = '|'.join(['^' + re.escape(piece) for piece in entries if piece.strip()])
|
||||||
|
return re.compile(expression)
|
||||||
|
else:
|
||||||
|
expression = '|'.join(['^' + piece for piece in entries if piece.strip()])
|
||||||
|
return re.compile(expression)
|
||||||
|
|
||||||
|
|
||||||
def compile_suffix_regex(entries):
|
def compile_suffix_regex(entries):
|
||||||
|
|
Loading…
Reference in New Issue
Block a user