Fix pretrain

This commit is contained in:
Matthew Honnibal 2018-11-15 22:45:36 +00:00
parent 3e7b214e57
commit 6af6950e46

View File

@ -21,6 +21,7 @@ import time
import ujson as json import ujson as json
from pathlib import Path from pathlib import Path
import sys import sys
from collections import Counter
import spacy import spacy
from spacy.attrs import ID from spacy.attrs import ID
@ -179,7 +180,7 @@ def pretrain(texts_loc, vectors_model, output_dir, width=128, depth=4,
optimizer = create_default_optimizer(model.ops) optimizer = create_default_optimizer(model.ops)
tracker = ProgressTracker() tracker = ProgressTracker()
print('Epoch', '#Words', 'Loss', 'w/s') print('Epoch', '#Words', 'Loss', 'w/s')
texts = stream_texts() if text_loc == '-' else load_texts(texts_loc) texts = stream_texts() if texts_loc == '-' else load_texts(texts_loc)
for epoch in range(nr_iter): for epoch in range(nr_iter):
for batch in minibatch_by_words(texts, tuples=False, size=50000): for batch in minibatch_by_words(texts, tuples=False, size=50000):
docs = [nlp.make_doc(text) for text in batch] docs = [nlp.make_doc(text) for text in batch]