mirror of
https://github.com/explosion/spaCy.git
synced 2025-02-11 09:00:36 +03:00
Pass gold_preproc setting into corpus
This commit is contained in:
parent
03b3da26be
commit
b5cd310543
|
@ -213,7 +213,11 @@ def train(
|
||||||
nlp.resume_training()
|
nlp.resume_training()
|
||||||
else:
|
else:
|
||||||
msg.info(f"Initializing the nlp pipeline: {nlp.pipe_names}")
|
msg.info(f"Initializing the nlp pipeline: {nlp.pipe_names}")
|
||||||
train_examples = list(corpus.train_dataset(nlp, shuffle=False))
|
train_examples = list(corpus.train_dataset(
|
||||||
|
nlp,
|
||||||
|
shuffle=False,
|
||||||
|
gold_preproc=training["gold_preproc"]
|
||||||
|
))
|
||||||
nlp.begin_training(lambda: train_examples)
|
nlp.begin_training(lambda: train_examples)
|
||||||
|
|
||||||
# Update tag map with provided mapping
|
# Update tag map with provided mapping
|
||||||
|
@ -305,10 +309,13 @@ def train(
|
||||||
def create_train_batches(nlp, corpus, cfg):
|
def create_train_batches(nlp, corpus, cfg):
|
||||||
epochs_todo = cfg.get("max_epochs", 0)
|
epochs_todo = cfg.get("max_epochs", 0)
|
||||||
while True:
|
while True:
|
||||||
train_examples = list(corpus.train_dataset(nlp))
|
train_examples = list(corpus.train_dataset(
|
||||||
|
nlp,
|
||||||
|
shuffle=True,
|
||||||
|
gold_preproc=cfg["gold_preproc"]
|
||||||
|
))
|
||||||
if len(train_examples) == 0:
|
if len(train_examples) == 0:
|
||||||
raise ValueError(Errors.E988)
|
raise ValueError(Errors.E988)
|
||||||
random.shuffle(train_examples)
|
|
||||||
batches = util.minibatch_by_words(
|
batches = util.minibatch_by_words(
|
||||||
train_examples,
|
train_examples,
|
||||||
size=cfg["batch_size"],
|
size=cfg["batch_size"],
|
||||||
|
|
Loading…
Reference in New Issue
Block a user