mirror of
https://github.com/explosion/spaCy.git
synced 2025-02-11 00:50:33 +03:00
Pass gold_preproc setting into corpus
This commit is contained in:
parent
03b3da26be
commit
b5cd310543
|
@ -213,7 +213,11 @@ def train(
|
|||
nlp.resume_training()
|
||||
else:
|
||||
msg.info(f"Initializing the nlp pipeline: {nlp.pipe_names}")
|
||||
train_examples = list(corpus.train_dataset(nlp, shuffle=False))
|
||||
train_examples = list(corpus.train_dataset(
|
||||
nlp,
|
||||
shuffle=False,
|
||||
gold_preproc=training["gold_preproc"]
|
||||
))
|
||||
nlp.begin_training(lambda: train_examples)
|
||||
|
||||
# Update tag map with provided mapping
|
||||
|
@ -305,10 +309,13 @@ def train(
|
|||
def create_train_batches(nlp, corpus, cfg):
|
||||
epochs_todo = cfg.get("max_epochs", 0)
|
||||
while True:
|
||||
train_examples = list(corpus.train_dataset(nlp))
|
||||
train_examples = list(corpus.train_dataset(
|
||||
nlp,
|
||||
shuffle=True,
|
||||
gold_preproc=cfg["gold_preproc"]
|
||||
))
|
||||
if len(train_examples) == 0:
|
||||
raise ValueError(Errors.E988)
|
||||
random.shuffle(train_examples)
|
||||
batches = util.minibatch_by_words(
|
||||
train_examples,
|
||||
size=cfg["batch_size"],
|
||||
|
|
Loading…
Reference in New Issue
Block a user