Minor renaming / refactoring

* Rename loader to `spacy.LookupsDataLoader.v1`, add debugging message
* Make `Vocab.lookups` a property
This commit is contained in:
Adriane Boyd 2020-09-18 19:43:19 +02:00
parent eed4b785f5
commit 47080fba98
4 changed files with 16 additions and 10 deletions

View File

@ -87,8 +87,9 @@ def create_tokenizer() -> Callable[["Language"], Tokenizer]:
return tokenizer_factory
@registry.misc("spacy.LoadLookupsData.v1")
@registry.misc("spacy.LookupsDataLoader.v1")
def load_lookups_data(lang, tables):
util.logger.debug(f"Loading lookups from spacy-lookups-data: {tables}")
lookups = load_lookups(lang=lang, tables=tables)
return lookups

View File

@ -258,7 +258,7 @@ def load_vocab_data_into_model(
) -> None:
"""Load vocab data."""
if lookups:
nlp.vocab.load_lookups(lookups)
nlp.vocab.lookups = lookups
def load_model(

View File

@ -28,7 +28,7 @@ cdef class Vocab:
cpdef readonly StringStore strings
cpdef public Morphology morphology
cpdef public object vectors
cpdef public object lookups
cpdef public object _lookups
cpdef public object writing_system
cpdef public object get_noun_chunks
cdef readonly int length

View File

@ -417,13 +417,18 @@ cdef class Vocab:
orth = self.strings.add(orth)
return orth in self.vectors
def load_lookups(self, lookups):
self.lookups = lookups
if lookups.has_table("lexeme_norm"):
self.lex_attr_getters[NORM] = util.add_lookups(
self.lex_attr_getters[NORM],
lookups.get_table("lexeme_norm"),
)
property lookups:
def __get__(self):
return self._lookups
def __set__(self, lookups):
self._lookups = lookups
if lookups.has_table("lexeme_norm"):
self.lex_attr_getters[NORM] = util.add_lookups(
self.lex_attr_getters.get(NORM, LEX_ATTRS[NORM]),
self.lookups.get_table("lexeme_norm"),
)
def to_disk(self, path, *, exclude=tuple()):
"""Save the current state to a directory.