mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-25 01:16:28 +03:00
fix loading custom tokenizer rules/exceptions from file
This commit is contained in:
parent
7bec0ebbcb
commit
c54aabc3cd
|
@ -441,8 +441,13 @@ cdef class Tokenizer:
|
|||
self.infix_finditer = re.compile(data["infix_finditer"]).finditer
|
||||
if data.get("token_match"):
|
||||
self.token_match = re.compile(data["token_match"]).match
|
||||
for string, substrings in data.get("rules", {}).items():
|
||||
self.add_special_case(string, substrings)
|
||||
if data.get("rules"):
|
||||
# make sure to hard reset the cache to remove data from the default exceptions
|
||||
self._rules = {}
|
||||
self._cache = PreshMap()
|
||||
for string, substrings in data.get("rules", {}).items():
|
||||
self.add_special_case(string, substrings)
|
||||
|
||||
return self
|
||||
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user