mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-19 05:54:11 +03:00
Merge branch 'master' into spacy.io
This commit is contained in:
commit
1bb11953e8
|
@ -8,6 +8,7 @@ import plac
|
|||
from pathlib import Path
|
||||
import re
|
||||
import json
|
||||
import tqdm
|
||||
|
||||
import spacy
|
||||
import spacy.util
|
||||
|
@ -486,9 +487,6 @@ def main(
|
|||
vectors_dir=None,
|
||||
use_oracle_segments=False,
|
||||
):
|
||||
# temp fix to avoid import issues cf https://github.com/explosion/spaCy/issues/4200
|
||||
import tqdm
|
||||
|
||||
Token.set_extension("get_conllu_lines", method=get_token_conllu)
|
||||
Token.set_extension("begins_fused", default=False)
|
||||
Token.set_extension("inside_fused", default=False)
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
import logging
|
||||
import random
|
||||
|
||||
from tqdm import tqdm
|
||||
from collections import defaultdict
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
@ -119,8 +120,6 @@ def get_eval_results(data, el_pipe=None):
|
|||
Only evaluate entities that overlap between gold and NER, to isolate the performance of the NEL.
|
||||
If the docs in the data require further processing with an entity linker, set el_pipe.
|
||||
"""
|
||||
from tqdm import tqdm
|
||||
|
||||
docs = []
|
||||
golds = []
|
||||
for d, g in tqdm(data, leave=False):
|
||||
|
|
|
@ -6,6 +6,7 @@ import bz2
|
|||
import logging
|
||||
import random
|
||||
import json
|
||||
from tqdm import tqdm
|
||||
|
||||
from functools import partial
|
||||
|
||||
|
@ -457,9 +458,6 @@ def read_training(nlp, entity_file_path, dev, limit, kb, labels_discard=None):
|
|||
""" This method provides training examples that correspond to the entity annotations found by the nlp object.
|
||||
For training, it will include both positive and negative examples by using the candidate generator from the kb.
|
||||
For testing (kb=None), it will include all positive examples only."""
|
||||
|
||||
from tqdm import tqdm
|
||||
|
||||
if not labels_discard:
|
||||
labels_discard = []
|
||||
|
||||
|
|
|
@ -7,6 +7,7 @@ import attr
|
|||
from pathlib import Path
|
||||
import re
|
||||
import json
|
||||
import tqdm
|
||||
|
||||
import spacy
|
||||
import spacy.util
|
||||
|
@ -386,9 +387,6 @@ class TreebankPaths(object):
|
|||
limit=("Size limit", "option", "n", int),
|
||||
)
|
||||
def main(ud_dir, parses_dir, config, corpus, limit=0):
|
||||
# temp fix to avoid import issues cf https://github.com/explosion/spaCy/issues/4200
|
||||
import tqdm
|
||||
|
||||
Token.set_extension("get_conllu_lines", method=get_token_conllu)
|
||||
Token.set_extension("begins_fused", default=False)
|
||||
Token.set_extension("inside_fused", default=False)
|
||||
|
|
|
@ -14,6 +14,7 @@ pre-train with the development data, but also not *so* terrible: we're not using
|
|||
the development labels, after all --- only the unlabelled text.
|
||||
"""
|
||||
import plac
|
||||
import tqdm
|
||||
import random
|
||||
import spacy
|
||||
import thinc.extra.datasets
|
||||
|
@ -106,9 +107,6 @@ def create_pipeline(width, embed_size, vectors_model):
|
|||
|
||||
|
||||
def train_tensorizer(nlp, texts, dropout, n_iter):
|
||||
# temp fix to avoid import issues cf https://github.com/explosion/spaCy/issues/4200
|
||||
import tqdm
|
||||
|
||||
tensorizer = nlp.create_pipe("tensorizer")
|
||||
nlp.add_pipe(tensorizer)
|
||||
optimizer = nlp.begin_training()
|
||||
|
@ -122,9 +120,6 @@ def train_tensorizer(nlp, texts, dropout, n_iter):
|
|||
|
||||
|
||||
def train_textcat(nlp, n_texts, n_iter=10):
|
||||
# temp fix to avoid import issues cf https://github.com/explosion/spaCy/issues/4200
|
||||
import tqdm
|
||||
|
||||
textcat = nlp.get_pipe("textcat")
|
||||
tok2vec_weights = textcat.model.tok2vec.to_bytes()
|
||||
(train_texts, train_cats), (dev_texts, dev_cats) = load_textcat_data(limit=n_texts)
|
||||
|
|
|
@ -8,6 +8,7 @@ from __future__ import unicode_literals
|
|||
|
||||
from os import path
|
||||
|
||||
import tqdm
|
||||
import math
|
||||
import numpy
|
||||
import plac
|
||||
|
@ -35,9 +36,6 @@ from tensorflow.contrib.tensorboard.plugins.projector import (
|
|||
),
|
||||
)
|
||||
def main(vectors_loc, out_loc, name="spaCy_vectors"):
|
||||
# temp fix to avoid import issues cf https://github.com/explosion/spaCy/issues/4200
|
||||
import tqdm
|
||||
|
||||
meta_file = "{}.tsv".format(name)
|
||||
out_meta_file = path.join(out_loc, meta_file)
|
||||
|
||||
|
|
|
@ -12,6 +12,7 @@ numpy>=1.15.0
|
|||
requests>=2.13.0,<3.0.0
|
||||
plac>=0.9.6,<1.2.0
|
||||
pathlib==1.0.1; python_version < "3.4"
|
||||
tqdm>=4.38.0,<5.0.0
|
||||
# Optional dependencies
|
||||
jsonschema>=2.6.0,<3.1.0
|
||||
# Development dependencies
|
||||
|
|
|
@ -3,6 +3,7 @@ from __future__ import unicode_literals
|
|||
|
||||
import plac
|
||||
import math
|
||||
from tqdm import tqdm
|
||||
import numpy
|
||||
from ast import literal_eval
|
||||
from pathlib import Path
|
||||
|
@ -116,9 +117,6 @@ def open_file(loc):
|
|||
|
||||
|
||||
def read_attrs_from_deprecated(freqs_loc, clusters_loc):
|
||||
# temp fix to avoid import issues cf https://github.com/explosion/spaCy/issues/4200
|
||||
from tqdm import tqdm
|
||||
|
||||
if freqs_loc is not None:
|
||||
with msg.loading("Counting frequencies..."):
|
||||
probs, _ = read_freqs(freqs_loc)
|
||||
|
@ -201,9 +199,6 @@ def add_vectors(nlp, vectors_loc, prune_vectors, name=None):
|
|||
|
||||
|
||||
def read_vectors(vectors_loc):
|
||||
# temp fix to avoid import issues cf https://github.com/explosion/spaCy/issues/4200
|
||||
from tqdm import tqdm
|
||||
|
||||
f = open_file(vectors_loc)
|
||||
shape = tuple(int(size) for size in next(f).split())
|
||||
vectors_data = numpy.zeros(shape=shape, dtype="f")
|
||||
|
@ -220,9 +215,6 @@ def read_vectors(vectors_loc):
|
|||
|
||||
|
||||
def read_freqs(freqs_loc, max_length=100, min_doc_freq=5, min_freq=50):
|
||||
# temp fix to avoid import issues cf https://github.com/explosion/spaCy/issues/4200
|
||||
from tqdm import tqdm
|
||||
|
||||
counts = PreshCounter()
|
||||
total = 0
|
||||
with freqs_loc.open() as f:
|
||||
|
@ -252,9 +244,6 @@ def read_freqs(freqs_loc, max_length=100, min_doc_freq=5, min_freq=50):
|
|||
|
||||
|
||||
def read_clusters(clusters_loc):
|
||||
# temp fix to avoid import issues cf https://github.com/explosion/spaCy/issues/4200
|
||||
from tqdm import tqdm
|
||||
|
||||
clusters = {}
|
||||
if ftfy is None:
|
||||
user_warning(Warnings.W004)
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
from __future__ import unicode_literals, division, print_function
|
||||
|
||||
import plac
|
||||
import tqdm
|
||||
from pathlib import Path
|
||||
import srsly
|
||||
import cProfile
|
||||
|
@ -46,9 +47,6 @@ def profile(model, inputs=None, n_texts=10000):
|
|||
|
||||
|
||||
def parse_texts(nlp, texts):
|
||||
# temp fix to avoid import issues cf https://github.com/explosion/spaCy/issues/4200
|
||||
import tqdm
|
||||
|
||||
for doc in nlp.pipe(tqdm.tqdm(texts), batch_size=16):
|
||||
pass
|
||||
|
||||
|
|
|
@ -3,6 +3,7 @@ from __future__ import unicode_literals, division, print_function
|
|||
|
||||
import plac
|
||||
import os
|
||||
import tqdm
|
||||
from pathlib import Path
|
||||
from thinc.neural._classes.model import Model
|
||||
from timeit import default_timer as timer
|
||||
|
@ -85,10 +86,6 @@ def train(
|
|||
JSON format. To convert data from other formats, use the `spacy convert`
|
||||
command.
|
||||
"""
|
||||
|
||||
# temp fix to avoid import issues cf https://github.com/explosion/spaCy/issues/4200
|
||||
import tqdm
|
||||
|
||||
util.fix_random_seed()
|
||||
util.set_env_log(verbose)
|
||||
|
||||
|
@ -516,9 +513,6 @@ def _score_for_model(meta):
|
|||
|
||||
@contextlib.contextmanager
|
||||
def _create_progress_bar(total):
|
||||
# temp fix to avoid import issues cf https://github.com/explosion/spaCy/issues/4200
|
||||
import tqdm
|
||||
|
||||
if int(os.environ.get("LOG_FRIENDLY", 0)):
|
||||
yield
|
||||
else:
|
||||
|
|
|
@ -53,7 +53,9 @@ class Warnings(object):
|
|||
W009 = ("Custom factory '{name}' provided by entry points of another "
|
||||
"package overwrites built-in factory.")
|
||||
W010 = ("As of v2.1.0, the PhraseMatcher doesn't have a phrase length "
|
||||
"limit anymore, so the max_length argument is now deprecated.")
|
||||
"limit anymore, so the max_length argument is now deprecated. "
|
||||
"If you did not specify this parameter, make sure you call the "
|
||||
"constructor with named arguments instead of positional ones.")
|
||||
W011 = ("It looks like you're calling displacy.serve from within a "
|
||||
"Jupyter notebook or a similar environment. This likely means "
|
||||
"you're already running a local web server, so there's no need to "
|
||||
|
|
|
@ -38,6 +38,7 @@ be shown.
|
|||
| Name | Type | Description |
|
||||
| --------------------------------------- | --------------- | ------------------------------------------------------------------------------------------- |
|
||||
| `vocab` | `Vocab` | The vocabulary object, which must be shared with the documents the matcher will operate on. |
|
||||
| `max_length` | int | Deprecated argument - the `PhraseMatcher` does not have a phrase length limit anymore. |
|
||||
| `attr` <Tag variant="new">2.1</Tag> | int / unicode | The token attribute to match on. Defaults to `ORTH`, i.e. the verbatim token text. |
|
||||
| `validate` <Tag variant="new">2.1</Tag> | bool | Validate patterns added to the matcher. |
|
||||
| **RETURNS** | `PhraseMatcher` | The newly constructed object. |
|
||||
|
|
Loading…
Reference in New Issue
Block a user