Minor renaming / refactoring

* Rename loader to `spacy.LookupsDataLoader.v1`, add debugging message
* Make `Vocab.lookups` a property
This commit is contained in:
Adriane Boyd 2020-09-18 19:43:19 +02:00
parent eed4b785f5
commit 47080fba98
4 changed files with 16 additions and 10 deletions

View File

@ -87,8 +87,9 @@ def create_tokenizer() -> Callable[["Language"], Tokenizer]:
return tokenizer_factory return tokenizer_factory
@registry.misc("spacy.LoadLookupsData.v1") @registry.misc("spacy.LookupsDataLoader.v1")
def load_lookups_data(lang, tables): def load_lookups_data(lang, tables):
util.logger.debug(f"Loading lookups from spacy-lookups-data: {tables}")
lookups = load_lookups(lang=lang, tables=tables) lookups = load_lookups(lang=lang, tables=tables)
return lookups return lookups

View File

@ -258,7 +258,7 @@ def load_vocab_data_into_model(
) -> None: ) -> None:
"""Load vocab data.""" """Load vocab data."""
if lookups: if lookups:
nlp.vocab.load_lookups(lookups) nlp.vocab.lookups = lookups
def load_model( def load_model(

View File

@ -28,7 +28,7 @@ cdef class Vocab:
cpdef readonly StringStore strings cpdef readonly StringStore strings
cpdef public Morphology morphology cpdef public Morphology morphology
cpdef public object vectors cpdef public object vectors
cpdef public object lookups cpdef public object _lookups
cpdef public object writing_system cpdef public object writing_system
cpdef public object get_noun_chunks cpdef public object get_noun_chunks
cdef readonly int length cdef readonly int length

View File

@ -417,13 +417,18 @@ cdef class Vocab:
orth = self.strings.add(orth) orth = self.strings.add(orth)
return orth in self.vectors return orth in self.vectors
def load_lookups(self, lookups): property lookups:
self.lookups = lookups def __get__(self):
if lookups.has_table("lexeme_norm"): return self._lookups
self.lex_attr_getters[NORM] = util.add_lookups(
self.lex_attr_getters[NORM], def __set__(self, lookups):
lookups.get_table("lexeme_norm"), self._lookups = lookups
) if lookups.has_table("lexeme_norm"):
self.lex_attr_getters[NORM] = util.add_lookups(
self.lex_attr_getters.get(NORM, LEX_ATTRS[NORM]),
self.lookups.get_table("lexeme_norm"),
)
def to_disk(self, path, *, exclude=tuple()): def to_disk(self, path, *, exclude=tuple()):
"""Save the current state to a directory. """Save the current state to a directory.