mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-26 01:46:28 +03:00
fix loading custom tokenizer rules/exceptions from file
This commit is contained in:
parent
7bec0ebbcb
commit
c54aabc3cd
|
@ -441,8 +441,13 @@ cdef class Tokenizer:
|
||||||
self.infix_finditer = re.compile(data["infix_finditer"]).finditer
|
self.infix_finditer = re.compile(data["infix_finditer"]).finditer
|
||||||
if data.get("token_match"):
|
if data.get("token_match"):
|
||||||
self.token_match = re.compile(data["token_match"]).match
|
self.token_match = re.compile(data["token_match"]).match
|
||||||
|
if data.get("rules"):
|
||||||
|
# make sure to hard reset the cache to remove data from the default exceptions
|
||||||
|
self._rules = {}
|
||||||
|
self._cache = PreshMap()
|
||||||
for string, substrings in data.get("rules", {}).items():
|
for string, substrings in data.get("rules", {}).items():
|
||||||
self.add_special_case(string, substrings)
|
self.add_special_case(string, substrings)
|
||||||
|
|
||||||
return self
|
return self
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user