From 482c7cd1b94d9fab299635bc9ee12d8b31b8706a Mon Sep 17 00:00:00 2001 From: Sofie Van Landeghem Date: Mon, 9 Sep 2019 16:32:11 +0200 Subject: [PATCH] pulling tqdm imports in functions to avoid bug (tmp fix) (#4263) --- bin/ud/ud_run_test.py | 1 - bin/ud/ud_train.py | 4 +++- examples/training/conllu.py | 7 ++++--- examples/training/pretrain_textcat.py | 7 ++++++- examples/vectors_tensorboard.py | 4 +++- spacy/cli/init_model.py | 13 ++++++++++++- spacy/cli/profile.py | 4 +++- spacy/cli/train.py | 8 +++++++- 8 files changed, 38 insertions(+), 10 deletions(-) diff --git a/bin/ud/ud_run_test.py b/bin/ud/ud_run_test.py index b6307f799..1c529c831 100644 --- a/bin/ud/ud_run_test.py +++ b/bin/ud/ud_run_test.py @@ -5,7 +5,6 @@ from __future__ import unicode_literals import plac -import tqdm from pathlib import Path import re import sys diff --git a/bin/ud/ud_train.py b/bin/ud/ud_train.py index 0600ab0ff..8f699db4f 100644 --- a/bin/ud/ud_train.py +++ b/bin/ud/ud_train.py @@ -5,7 +5,6 @@ from __future__ import unicode_literals import plac -import tqdm from pathlib import Path import re import sys @@ -462,6 +461,9 @@ def main( vectors_dir=None, use_oracle_segments=False, ): + # temp fix to avoid import issues cf https://github.com/explosion/spaCy/issues/4200 + import tqdm + spacy.util.fix_random_seed() lang.zh.Chinese.Defaults.use_jieba = False lang.ja.Japanese.Defaults.use_janome = False diff --git a/examples/training/conllu.py b/examples/training/conllu.py index a7745b93a..dfc790456 100644 --- a/examples/training/conllu.py +++ b/examples/training/conllu.py @@ -3,11 +3,9 @@ """ from __future__ import unicode_literals import plac -import tqdm import attr from pathlib import Path import re -import sys import json import spacy @@ -23,7 +21,7 @@ import itertools import random import numpy.random -import conll17_ud_eval +from bin.ud import conll17_ud_eval import spacy.lang.zh import spacy.lang.ja @@ -394,6 +392,9 @@ class TreebankPaths(object): limit=("Size limit", "option", "n", int), ) def main(ud_dir, parses_dir, config, corpus, limit=0): + # temp fix to avoid import issues cf https://github.com/explosion/spaCy/issues/4200 + import tqdm + paths = TreebankPaths(ud_dir, corpus) if not (parses_dir / corpus).exists(): (parses_dir / corpus).mkdir() diff --git a/examples/training/pretrain_textcat.py b/examples/training/pretrain_textcat.py index 49dd28060..7c9556913 100644 --- a/examples/training/pretrain_textcat.py +++ b/examples/training/pretrain_textcat.py @@ -18,7 +18,6 @@ import random import spacy import thinc.extra.datasets from spacy.util import minibatch, use_gpu, compounding -import tqdm from spacy._ml import Tok2Vec from spacy.pipeline import TextCategorizer import numpy @@ -107,6 +106,9 @@ def create_pipeline(width, embed_size, vectors_model): def train_tensorizer(nlp, texts, dropout, n_iter): + # temp fix to avoid import issues cf https://github.com/explosion/spaCy/issues/4200 + import tqdm + tensorizer = nlp.create_pipe("tensorizer") nlp.add_pipe(tensorizer) optimizer = nlp.begin_training() @@ -120,6 +122,9 @@ def train_tensorizer(nlp, texts, dropout, n_iter): def train_textcat(nlp, n_texts, n_iter=10): + # temp fix to avoid import issues cf https://github.com/explosion/spaCy/issues/4200 + import tqdm + textcat = nlp.get_pipe("textcat") tok2vec_weights = textcat.model.tok2vec.to_bytes() (train_texts, train_cats), (dev_texts, dev_cats) = load_textcat_data(limit=n_texts) diff --git a/examples/vectors_tensorboard.py b/examples/vectors_tensorboard.py index 4cfe7f442..b1160888d 100644 --- a/examples/vectors_tensorboard.py +++ b/examples/vectors_tensorboard.py @@ -13,7 +13,6 @@ import numpy import plac import spacy import tensorflow as tf -import tqdm from tensorflow.contrib.tensorboard.plugins.projector import ( visualize_embeddings, ProjectorConfig, @@ -36,6 +35,9 @@ from tensorflow.contrib.tensorboard.plugins.projector import ( ), ) def main(vectors_loc, out_loc, name="spaCy_vectors"): + # temp fix to avoid import issues cf https://github.com/explosion/spaCy/issues/4200 + import tqdm + meta_file = "{}.tsv".format(name) out_meta_file = path.join(out_loc, meta_file) diff --git a/spacy/cli/init_model.py b/spacy/cli/init_model.py index 93d37d4c9..955b420aa 100644 --- a/spacy/cli/init_model.py +++ b/spacy/cli/init_model.py @@ -3,7 +3,6 @@ from __future__ import unicode_literals import plac import math -from tqdm import tqdm import numpy from ast import literal_eval from pathlib import Path @@ -109,6 +108,9 @@ def open_file(loc): def read_attrs_from_deprecated(freqs_loc, clusters_loc): + # temp fix to avoid import issues cf https://github.com/explosion/spaCy/issues/4200 + from tqdm import tqdm + if freqs_loc is not None: with msg.loading("Counting frequencies..."): probs, _ = read_freqs(freqs_loc) @@ -186,6 +188,9 @@ def add_vectors(nlp, vectors_loc, prune_vectors): def read_vectors(vectors_loc): + # temp fix to avoid import issues cf https://github.com/explosion/spaCy/issues/4200 + from tqdm import tqdm + f = open_file(vectors_loc) shape = tuple(int(size) for size in next(f).split()) vectors_data = numpy.zeros(shape=shape, dtype="f") @@ -202,6 +207,9 @@ def read_vectors(vectors_loc): def read_freqs(freqs_loc, max_length=100, min_doc_freq=5, min_freq=50): + # temp fix to avoid import issues cf https://github.com/explosion/spaCy/issues/4200 + from tqdm import tqdm + counts = PreshCounter() total = 0 with freqs_loc.open() as f: @@ -231,6 +239,9 @@ def read_freqs(freqs_loc, max_length=100, min_doc_freq=5, min_freq=50): def read_clusters(clusters_loc): + # temp fix to avoid import issues cf https://github.com/explosion/spaCy/issues/4200 + from tqdm import tqdm + clusters = {} if ftfy is None: user_warning(Warnings.W004) diff --git a/spacy/cli/profile.py b/spacy/cli/profile.py index 45e97b8ba..201ab13d5 100644 --- a/spacy/cli/profile.py +++ b/spacy/cli/profile.py @@ -7,7 +7,6 @@ import srsly import cProfile import pstats import sys -import tqdm import itertools import thinc.extra.datasets from wasabi import Printer @@ -48,6 +47,9 @@ def profile(model, inputs=None, n_texts=10000): def parse_texts(nlp, texts): + # temp fix to avoid import issues cf https://github.com/explosion/spaCy/issues/4200 + import tqdm + for doc in nlp.pipe(tqdm.tqdm(texts), batch_size=16): pass diff --git a/spacy/cli/train.py b/spacy/cli/train.py index c4355f1a1..fe30e1a3c 100644 --- a/spacy/cli/train.py +++ b/spacy/cli/train.py @@ -4,7 +4,6 @@ from __future__ import unicode_literals, division, print_function import plac import os from pathlib import Path -import tqdm from thinc.neural._classes.model import Model from timeit import default_timer as timer import shutil @@ -101,6 +100,10 @@ def train( JSON format. To convert data from other formats, use the `spacy convert` command. """ + + # temp fix to avoid import issues cf https://github.com/explosion/spaCy/issues/4200 + import tqdm + msg = Printer() util.fix_random_seed() util.set_env_log(verbose) @@ -390,6 +393,9 @@ def _score_for_model(meta): @contextlib.contextmanager def _create_progress_bar(total): + # temp fix to avoid import issues cf https://github.com/explosion/spaCy/issues/4200 + import tqdm + if int(os.environ.get("LOG_FRIENDLY", 0)): yield else: