From 6af6950e46570a78da4e75ea2871bcf1dcf199d9 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Thu, 15 Nov 2018 22:45:36 +0000 Subject: [PATCH] Fix pretrain --- spacy/cli/pretrain.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/spacy/cli/pretrain.py b/spacy/cli/pretrain.py index 0c849d814..fea8db8fd 100644 --- a/spacy/cli/pretrain.py +++ b/spacy/cli/pretrain.py @@ -21,6 +21,7 @@ import time import ujson as json from pathlib import Path import sys +from collections import Counter import spacy from spacy.attrs import ID @@ -179,7 +180,7 @@ def pretrain(texts_loc, vectors_model, output_dir, width=128, depth=4, optimizer = create_default_optimizer(model.ops) tracker = ProgressTracker() print('Epoch', '#Words', 'Loss', 'w/s') - texts = stream_texts() if text_loc == '-' else load_texts(texts_loc) + texts = stream_texts() if texts_loc == '-' else load_texts(texts_loc) for epoch in range(nr_iter): for batch in minibatch_by_words(texts, tuples=False, size=50000): docs = [nlp.make_doc(text) for text in batch]