mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-27 17:54:39 +03:00
Tidy up vocab init
This commit is contained in:
parent
c22ecc66bb
commit
a62337b3f3
|
@ -63,11 +63,7 @@ def init_pipeline(config: Config, use_gpu: int = -1) -> Language:
|
||||||
train_corpus, dev_corpus = resolve_dot_names(config, dot_names)
|
train_corpus, dev_corpus = resolve_dot_names(config, dot_names)
|
||||||
I = registry.resolve(config["initialize"], schema=ConfigSchemaInit)
|
I = registry.resolve(config["initialize"], schema=ConfigSchemaInit)
|
||||||
V = I["vocab"]
|
V = I["vocab"]
|
||||||
init_vocab(nlp, data=V["data"], lookups=V["lookups"])
|
init_vocab(nlp, data=V["data"], lookups=V["lookups"], vectors=V["vectors"])
|
||||||
msg.good("Created vocabulary")
|
|
||||||
if V["vectors"] is not None:
|
|
||||||
add_vectors(nlp, V["vectors"])
|
|
||||||
msg.good(f"Added vectors: {V['vectors']}")
|
|
||||||
optimizer = T["optimizer"]
|
optimizer = T["optimizer"]
|
||||||
before_to_disk = create_before_to_disk_callback(T["before_to_disk"])
|
before_to_disk = create_before_to_disk_callback(T["before_to_disk"])
|
||||||
# Components that shouldn't be updated during training
|
# Components that shouldn't be updated during training
|
||||||
|
@ -94,7 +90,11 @@ def init_pipeline(config: Config, use_gpu: int = -1) -> Language:
|
||||||
|
|
||||||
|
|
||||||
def init_vocab(
|
def init_vocab(
|
||||||
nlp: Language, *, data: Optional[Path] = None, lookups: Optional[Lookups] = None,
|
nlp: Language,
|
||||||
|
*,
|
||||||
|
data: Optional[Path] = None,
|
||||||
|
lookups: Optional[Lookups] = None,
|
||||||
|
vectors: Optional[str] = None,
|
||||||
) -> Language:
|
) -> Language:
|
||||||
if lookups:
|
if lookups:
|
||||||
nlp.vocab.lookups = lookups
|
nlp.vocab.lookups = lookups
|
||||||
|
@ -115,6 +115,10 @@ def init_vocab(
|
||||||
oov_prob = DEFAULT_OOV_PROB
|
oov_prob = DEFAULT_OOV_PROB
|
||||||
nlp.vocab.cfg.update({"oov_prob": oov_prob})
|
nlp.vocab.cfg.update({"oov_prob": oov_prob})
|
||||||
msg.good(f"Added {len(nlp.vocab)} lexical entries to the vocab")
|
msg.good(f"Added {len(nlp.vocab)} lexical entries to the vocab")
|
||||||
|
msg.good("Created vocabulary")
|
||||||
|
if vectors is not None:
|
||||||
|
add_vectors(nlp, vectors)
|
||||||
|
msg.good(f"Added vectors: {V['vectors']}")
|
||||||
|
|
||||||
|
|
||||||
def add_tok2vec_weights(
|
def add_tok2vec_weights(
|
||||||
|
|
Loading…
Reference in New Issue
Block a user