diff --git a/spacy/__init__.py b/spacy/__init__.py index 4a311ec86..2c063ce24 100644 --- a/spacy/__init__.py +++ b/spacy/__init__.py @@ -5,7 +5,7 @@ warnings.filterwarnings("ignore", message="numpy.dtype size changed") warnings.filterwarnings("ignore", message="numpy.ufunc size changed") # These are imported as part of the API -from thinc.util import prefer_gpu, require_gpu +from thinc.api import prefer_gpu, require_gpu from . import pipeline from .cli.info import info as cli_info diff --git a/spacy/cli/__init__.py b/spacy/cli/__init__.py index 0f7677fd2..585eaea51 100644 --- a/spacy/cli/__init__.py +++ b/spacy/cli/__init__.py @@ -4,7 +4,7 @@ from .link import link # noqa: F401 from .package import package # noqa: F401 from .profile import profile # noqa: F401 from .train import train # noqa: F401 -from .train_from_config import train_from_config_cli # noqa: F401 +from .train_from_config import train_from_config_cli # noqa: F401 from .pretrain import pretrain # noqa: F401 from .debug_data import debug_data # noqa: F401 from .evaluate import evaluate # noqa: F401 diff --git a/spacy/cli/debug_data.py b/spacy/cli/debug_data.py index 4bcafce24..1705bf446 100644 --- a/spacy/cli/debug_data.py +++ b/spacy/cli/debug_data.py @@ -192,11 +192,7 @@ def debug_data( has_ws_ents_error = True if gold_train_data["punct_ents"]: - msg.warn( - "{} entity span(s) with punctuation".format( - gold_train_data["punct_ents"] - ) - ) + msg.warn(f"{gold_train_data['punct_ents']} entity span(s) with punctuation") has_punct_ents_warning = True for label in new_labels: diff --git a/spacy/cli/pretrain.py b/spacy/cli/pretrain.py index 2cef378c0..690e3107d 100644 --- a/spacy/cli/pretrain.py +++ b/spacy/cli/pretrain.py @@ -4,14 +4,12 @@ import time import re from collections import Counter from pathlib import Path -from thinc.layers import Linear, Maxout -from thinc.util import prefer_gpu +from thinc.api import Linear, Maxout, chain, list2array, prefer_gpu +from thinc.api import CosineDistance, L2Distance from wasabi import msg import srsly -from thinc.layers import chain, list2array -from thinc.loss import CosineDistance, L2Distance -from spacy.gold import Example +from ..gold import Example from ..errors import Errors from ..tokens import Doc from ..attrs import ID, HEAD @@ -85,7 +83,7 @@ def pretrain( ) if not output_dir.exists(): output_dir.mkdir() - msg.good("Created output directory: {}".format(output_dir)) + msg.good(f"Created output directory: {output_dir}") srsly.write_json(output_dir / "config.json", config) msg.good("Saved settings to config.json") diff --git a/spacy/cli/train.py b/spacy/cli/train.py index adae91ff9..d8514095b 100644 --- a/spacy/cli/train.py +++ b/spacy/cli/train.py @@ -1,7 +1,7 @@ import os import tqdm from pathlib import Path -from thinc.backends import use_ops +from thinc.api import use_ops from timeit import default_timer as timer import shutil import srsly @@ -89,7 +89,7 @@ def train( ) if not output_path.exists(): output_path.mkdir() - msg.good("Created output directory: {}".format(output_path)) + msg.good(f"Created output directory: {output_path}") tag_map = {} if tag_map_path is not None: @@ -125,17 +125,17 @@ def train( msg.text(f"Training pipeline: {pipeline}") disabled_pipes = None pipes_added = False - msg.text("Training pipeline: {}".format(pipeline)) + msg.text(f"Training pipeline: {pipeline}") if use_gpu >= 0: activated_gpu = None try: activated_gpu = set_gpu(use_gpu) except Exception as e: - msg.warn("Exception: {}".format(e)) + msg.warn(f"Exception: {e}") if activated_gpu is not None: - msg.text("Using GPU: {}".format(use_gpu)) + msg.text(f"Using GPU: {use_gpu}") else: - msg.warn("Unable to activate GPU: {}".format(use_gpu)) + msg.warn(f"Unable to activate GPU: {use_gpu}") msg.text("Using CPU only") use_gpu = -1 if base_model: @@ -158,11 +158,11 @@ def train( "positive_label": textcat_positive_label, } if pipe not in nlp.pipe_names: - msg.text("Adding component to base model '{}'".format(pipe)) + msg.text(f"Adding component to base model '{pipe}'") nlp.add_pipe(nlp.create_pipe(pipe, config=pipe_cfg)) pipes_added = True elif replace_components: - msg.text("Replacing component from base model '{}'".format(pipe)) + msg.text(f"Replacing component from base model '{pipe}'") nlp.replace_pipe(pipe, nlp.create_pipe(pipe, config=pipe_cfg)) pipes_added = True else: @@ -180,7 +180,7 @@ def train( f"Existing cfg: {base_cfg}, provided cfg: {pipe_cfg}", exits=1, ) - msg.text("Extending component from base model '{}'".format(pipe)) + msg.text(f"Extending component from base model '{pipe}'") disabled_pipes = nlp.disable_pipes( [p for p in nlp.pipe_names if p not in pipeline] ) @@ -377,7 +377,7 @@ def train( msg.warn( "Did you provide the same parameters during 'train' as during 'pretrain'?" ) - msg.fail("Original error message: {}".format(e), exits=1) + msg.fail(f"Original error message: {e}", exits=1) if raw_text: # If raw text is available, perform 'rehearsal' updates, # which use unlabelled data to reduce overfitting. @@ -504,11 +504,7 @@ def train( ) break except Exception as e: - msg.warn( - "Aborting and saving the final best model. Encountered exception: {}".format( - e - ) - ) + msg.warn(f"Aborting and saving final best model. Encountered exception: {e}") finally: best_pipes = nlp.pipe_names if disabled_pipes: diff --git a/spacy/cli/train_from_config.py b/spacy/cli/train_from_config.py index 0488dd04c..9150da356 100644 --- a/spacy/cli/train_from_config.py +++ b/spacy/cli/train_from_config.py @@ -1,19 +1,20 @@ +from typing import Optional, Dict, List, Union, Sequence import plac -from thinc.util import require_gpu from wasabi import msg from pathlib import Path import thinc import thinc.schedules -from thinc.model import Model -from spacy.gold import GoldCorpus -import spacy -from spacy.pipeline.tok2vec import Tok2VecListener -from typing import Optional, Dict, List, Union, Sequence +from thinc.api import Model from pydantic import BaseModel, FilePath, StrictInt import tqdm -from ..ml import component_models -from .. import util +# TODO: relative imports? +import spacy +from spacy.gold import GoldCorpus +from spacy.pipeline.tok2vec import Tok2VecListener +from spacy.ml import component_models +from spacy import util + registry = util.registry @@ -153,10 +154,9 @@ def create_tb_parser_model( hidden_width: StrictInt = 64, maxout_pieces: StrictInt = 3, ): - from thinc.layers import Linear, chain, list2array + from thinc.api import Linear, chain, list2array, use_ops, zero_init from spacy.ml._layers import PrecomputableAffine from spacy.syntax._parser_model import ParserModel - from thinc.api import use_ops, zero_init token_vector_width = tok2vec.get_dim("nO") tok2vec = chain(tok2vec, list2array()) @@ -221,13 +221,9 @@ def train_from_config_cli( def train_from_config( - config_path, - data_paths, - raw_text=None, - meta_path=None, - output_path=None, + config_path, data_paths, raw_text=None, meta_path=None, output_path=None, ): - msg.info("Loading config from: {}".format(config_path)) + msg.info(f"Loading config from: {config_path}") config = util.load_from_config(config_path, create_objects=True) use_gpu = config["training"]["use_gpu"] if use_gpu >= 0: @@ -241,9 +237,7 @@ def train_from_config( msg.info("Loading training corpus") corpus = GoldCorpus(data_paths["train"], data_paths["dev"], limit=limit) msg.info("Initializing the nlp pipeline") - nlp.begin_training( - lambda: corpus.train_examples, device=use_gpu - ) + nlp.begin_training(lambda: corpus.train_examples, device=use_gpu) train_batches = create_train_batches(nlp, corpus, config["training"]) evaluate = create_evaluation_callback(nlp, optimizer, corpus, config["training"]) @@ -260,7 +254,7 @@ def train_from_config( config["training"]["eval_frequency"], ) - msg.info("Training. Initial learn rate: {}".format(optimizer.learn_rate)) + msg.info(f"Training. Initial learn rate: {optimizer.learn_rate}") print_row = setup_printer(config) try: @@ -414,7 +408,7 @@ def subdivide_batch(batch): def setup_printer(config): score_cols = config["training"]["scores"] score_widths = [max(len(col), 6) for col in score_cols] - loss_cols = ["Loss {}".format(pipe) for pipe in config["nlp"]["pipeline"]] + loss_cols = [f"Loss {pipe}" for pipe in config["nlp"]["pipeline"]] loss_widths = [max(len(col), 8) for col in loss_cols] table_header = ["#"] + loss_cols + score_cols + ["Score"] table_header = [col.upper() for col in table_header] diff --git a/spacy/compat.py b/spacy/compat.py index 6fa49353e..8c5c2930b 100644 --- a/spacy/compat.py +++ b/spacy/compat.py @@ -30,7 +30,7 @@ try: except ImportError: cupy = None -from thinc.optimizers import Optimizer # noqa: F401 +from thinc.api import Optimizer # noqa: F401 pickle = pickle copy_reg = copy_reg diff --git a/spacy/displacy/templates.py b/spacy/displacy/templates.py index d6970aa2f..a721ce480 100644 --- a/spacy/displacy/templates.py +++ b/spacy/displacy/templates.py @@ -1,4 +1,3 @@ - # Setting explicit height and max-width: none on the SVG is required for # Jupyter to render it properly in a cell diff --git a/spacy/glossary.py b/spacy/glossary.py index 5e7e531a9..938a575cd 100644 --- a/spacy/glossary.py +++ b/spacy/glossary.py @@ -1,4 +1,3 @@ - def explain(term): """Get a description for a given POS tag, dependency label or entity type. diff --git a/spacy/gold.pxd b/spacy/gold.pxd index 49dba16df..aea691130 100644 --- a/spacy/gold.pxd +++ b/spacy/gold.pxd @@ -1,6 +1,6 @@ from cymem.cymem cimport Pool -from spacy.tokens import Doc +from .tokens import Doc from .typedefs cimport attr_t from .syntax.transition_system cimport Transition @@ -65,5 +65,3 @@ cdef class Example: cdef public TokenAnnotation token_annotation cdef public DocAnnotation doc_annotation cdef public object goldparse - - diff --git a/spacy/kb.pxd b/spacy/kb.pxd index d5aa382b1..518ce0f4e 100644 --- a/spacy/kb.pxd +++ b/spacy/kb.pxd @@ -6,7 +6,7 @@ from libcpp.vector cimport vector from libc.stdint cimport int32_t, int64_t from libc.stdio cimport FILE -from spacy.vocab cimport Vocab +from .vocab cimport Vocab from .typedefs cimport hash_t from .structs cimport KBEntryC, AliasC @@ -113,7 +113,7 @@ cdef class KnowledgeBase: return new_index cdef inline void _create_empty_vectors(self, hash_t dummy_hash) nogil: - """ + """ Initializing the vectors and making sure the first element of each vector is a dummy, because the PreshMap maps pointing to indices in these vectors can not contain 0 as value cf. https://github.com/explosion/preshed/issues/17 @@ -169,4 +169,3 @@ cdef class Reader: cdef int read_alias(self, int64_t* entry_index, float* prob) except -1 cdef int _read(self, void* value, size_t size) except -1 - diff --git a/spacy/lang/af/stop_words.py b/spacy/lang/af/stop_words.py index dfd144de9..4b5a04a5e 100644 --- a/spacy/lang/af/stop_words.py +++ b/spacy/lang/af/stop_words.py @@ -1,4 +1,3 @@ - # Source: https://github.com/stopwords-iso/stopwords-af STOP_WORDS = set( diff --git a/spacy/lang/bg/stop_words.py b/spacy/lang/bg/stop_words.py index 45a252bc9..aae7692a2 100644 --- a/spacy/lang/bg/stop_words.py +++ b/spacy/lang/bg/stop_words.py @@ -1,4 +1,3 @@ - # Source: https://github.com/Alir3z4/stop-words STOP_WORDS = set( diff --git a/spacy/lang/bn/examples.py b/spacy/lang/bn/examples.py index 051e59d84..c3be4c556 100644 --- a/spacy/lang/bn/examples.py +++ b/spacy/lang/bn/examples.py @@ -1,4 +1,3 @@ - """ Example sentences to test spaCy and its language models. diff --git a/spacy/lang/bn/stop_words.py b/spacy/lang/bn/stop_words.py index 6bcd06b37..bf38e3254 100644 --- a/spacy/lang/bn/stop_words.py +++ b/spacy/lang/bn/stop_words.py @@ -1,4 +1,3 @@ - STOP_WORDS = set( """ অতএব অথচ অথবা অনুযায়ী অনেক অনেকে অনেকেই অন্তত অবধি অবশ্য অর্থাৎ অন্য অনুযায়ী অর্ধভাগে diff --git a/spacy/lang/ca/examples.py b/spacy/lang/ca/examples.py index 3fbf1fb0a..ae6aa3e24 100644 --- a/spacy/lang/ca/examples.py +++ b/spacy/lang/ca/examples.py @@ -1,4 +1,3 @@ - """ Example sentences to test spaCy and its language models. diff --git a/spacy/lang/cs/stop_words.py b/spacy/lang/cs/stop_words.py index e8171a7e5..70aab030b 100644 --- a/spacy/lang/cs/stop_words.py +++ b/spacy/lang/cs/stop_words.py @@ -1,4 +1,3 @@ - # Source: https://github.com/Alir3z4/stop-words STOP_WORDS = set( diff --git a/spacy/lang/da/examples.py b/spacy/lang/da/examples.py index e5c6448f0..80b2b925b 100644 --- a/spacy/lang/da/examples.py +++ b/spacy/lang/da/examples.py @@ -1,4 +1,3 @@ - """ Example sentences to test spaCy and its language models. diff --git a/spacy/lang/de/examples.py b/spacy/lang/de/examples.py index 530ece629..735d1c316 100644 --- a/spacy/lang/de/examples.py +++ b/spacy/lang/de/examples.py @@ -1,4 +1,3 @@ - """ Example sentences to test spaCy and its language models. diff --git a/spacy/lang/de/stop_words.py b/spacy/lang/de/stop_words.py index cc5aa0f3c..f52687eb9 100644 --- a/spacy/lang/de/stop_words.py +++ b/spacy/lang/de/stop_words.py @@ -1,4 +1,3 @@ - STOP_WORDS = set( """ á a ab aber ach acht achte achten achter achtes ag alle allein allem allen @@ -44,7 +43,7 @@ kleines kommen kommt können könnt konnte könnte konnten kurz lang lange leicht leider lieber los machen macht machte mag magst man manche manchem manchen mancher manches mehr -mein meine meinem meinen meiner meines mich mir mit mittel mochte möchte mochten +mein meine meinem meinen meiner meines mich mir mit mittel mochte möchte mochten mögen möglich mögt morgen muss muß müssen musst müsst musste mussten na nach nachdem nahm natürlich neben nein neue neuen neun neunte neunten neunter diff --git a/spacy/lang/el/get_pos_from_wiktionary.py b/spacy/lang/el/get_pos_from_wiktionary.py index 01deb23a2..369973cc0 100644 --- a/spacy/lang/el/get_pos_from_wiktionary.py +++ b/spacy/lang/el/get_pos_from_wiktionary.py @@ -1,4 +1,3 @@ - def get_pos_from_wiktionary(): import re from gensim.corpora.wikicorpus import extract_pages diff --git a/spacy/lang/el/norm_exceptions.py b/spacy/lang/el/norm_exceptions.py index d540aae2c..aa774c19b 100644 --- a/spacy/lang/el/norm_exceptions.py +++ b/spacy/lang/el/norm_exceptions.py @@ -1,4 +1,3 @@ - # These exceptions are used to add NORM values based on a token's ORTH value. # Norms are only set if no alternative is provided in the tokenizer exceptions. diff --git a/spacy/lang/el/stop_words.py b/spacy/lang/el/stop_words.py index 8484826d1..7c436219f 100644 --- a/spacy/lang/el/stop_words.py +++ b/spacy/lang/el/stop_words.py @@ -1,4 +1,3 @@ - # Stop words # Link to greek stop words: https://www.translatum.gr/forum/index.php?topic=3550.0?topic=3550.0 STOP_WORDS = set( diff --git a/spacy/lang/en/examples.py b/spacy/lang/en/examples.py index 0363a45e7..2cca9e05f 100644 --- a/spacy/lang/en/examples.py +++ b/spacy/lang/en/examples.py @@ -1,4 +1,3 @@ - """ Example sentences to test spaCy and its language models. diff --git a/spacy/lang/en/norm_exceptions.py b/spacy/lang/en/norm_exceptions.py index 431d9c049..4125cd37b 100644 --- a/spacy/lang/en/norm_exceptions.py +++ b/spacy/lang/en/norm_exceptions.py @@ -1,4 +1,3 @@ - _exc = { # Slang and abbreviations "cos": "because", diff --git a/spacy/lang/en/stop_words.py b/spacy/lang/en/stop_words.py index 4573c9411..1ca5cbc16 100644 --- a/spacy/lang/en/stop_words.py +++ b/spacy/lang/en/stop_words.py @@ -1,4 +1,3 @@ - # Stop words STOP_WORDS = set( """ diff --git a/spacy/lang/es/examples.py b/spacy/lang/es/examples.py index 1c1ad631b..a1db41a16 100644 --- a/spacy/lang/es/examples.py +++ b/spacy/lang/es/examples.py @@ -1,4 +1,3 @@ - """ Example sentences to test spaCy and its language models. diff --git a/spacy/lang/es/stop_words.py b/spacy/lang/es/stop_words.py index 3d46a88cb..004df4fca 100644 --- a/spacy/lang/es/stop_words.py +++ b/spacy/lang/es/stop_words.py @@ -1,4 +1,3 @@ - STOP_WORDS = set( """ actualmente acuerdo adelante ademas además adrede afirmó agregó ahi ahora ahí diff --git a/spacy/lang/et/stop_words.py b/spacy/lang/et/stop_words.py index 3b600a158..e1da1f14d 100644 --- a/spacy/lang/et/stop_words.py +++ b/spacy/lang/et/stop_words.py @@ -1,4 +1,3 @@ - # Source: https://github.com/stopwords-iso/stopwords-et STOP_WORDS = set( diff --git a/spacy/lang/fa/examples.py b/spacy/lang/fa/examples.py index d89feb6c8..9c6fb0345 100644 --- a/spacy/lang/fa/examples.py +++ b/spacy/lang/fa/examples.py @@ -1,4 +1,3 @@ - """ Example sentences to test spaCy and its language models. diff --git a/spacy/lang/fa/generate_verbs_exc.py b/spacy/lang/fa/generate_verbs_exc.py index 61586dc3f..62094c6de 100644 --- a/spacy/lang/fa/generate_verbs_exc.py +++ b/spacy/lang/fa/generate_verbs_exc.py @@ -1,4 +1,3 @@ - verb_roots = """ #هست آخت#آهنج diff --git a/spacy/lang/fa/stop_words.py b/spacy/lang/fa/stop_words.py index 372422b67..f462f2e7a 100644 --- a/spacy/lang/fa/stop_words.py +++ b/spacy/lang/fa/stop_words.py @@ -1,4 +1,3 @@ - # Stop words from HAZM package STOP_WORDS = set( """ diff --git a/spacy/lang/fi/stop_words.py b/spacy/lang/fi/stop_words.py index 642cfc369..8e8dcfa56 100644 --- a/spacy/lang/fi/stop_words.py +++ b/spacy/lang/fi/stop_words.py @@ -1,4 +1,3 @@ - # Source https://github.com/stopwords-iso/stopwords-fi/blob/master/stopwords-fi.txt # Reformatted with some minor corrections STOP_WORDS = set( diff --git a/spacy/lang/fr/examples.py b/spacy/lang/fr/examples.py index 57d57f4a6..a74a62204 100644 --- a/spacy/lang/fr/examples.py +++ b/spacy/lang/fr/examples.py @@ -1,4 +1,3 @@ - """ Example sentences to test spaCy and its language models. diff --git a/spacy/lang/fr/stop_words.py b/spacy/lang/fr/stop_words.py index 9c12e49a3..a331f3c0f 100644 --- a/spacy/lang/fr/stop_words.py +++ b/spacy/lang/fr/stop_words.py @@ -1,4 +1,3 @@ - STOP_WORDS = set( """ a à â abord absolument afin ah ai aie ailleurs ainsi ait allaient allo allons diff --git a/spacy/lang/ga/irish_morphology_helpers.py b/spacy/lang/ga/irish_morphology_helpers.py index c8cd36835..d606da975 100644 --- a/spacy/lang/ga/irish_morphology_helpers.py +++ b/spacy/lang/ga/irish_morphology_helpers.py @@ -1,4 +1,3 @@ - # fmt: off consonants = ["b", "c", "d", "f", "g", "h", "j", "k", "l", "m", "n", "p", "q", "r", "s", "t", "v", "w", "x", "z"] broad_vowels = ["a", "á", "o", "ó", "u", "ú"] diff --git a/spacy/lang/he/examples.py b/spacy/lang/he/examples.py index 29075c7d4..d54d2a145 100644 --- a/spacy/lang/he/examples.py +++ b/spacy/lang/he/examples.py @@ -1,4 +1,3 @@ - """ Example sentences to test spaCy and its language models. diff --git a/spacy/lang/hi/examples.py b/spacy/lang/hi/examples.py index 7639ff940..ecb0b328c 100644 --- a/spacy/lang/hi/examples.py +++ b/spacy/lang/hi/examples.py @@ -1,4 +1,3 @@ - """ Example sentences to test spaCy and its language models. diff --git a/spacy/lang/hi/stop_words.py b/spacy/lang/hi/stop_words.py index 142fc6f47..475b07da1 100644 --- a/spacy/lang/hi/stop_words.py +++ b/spacy/lang/hi/stop_words.py @@ -1,4 +1,3 @@ - # Source: https://github.com/taranjeet/hindi-tokenizer/blob/master/stopwords.txt, https://data.mendeley.com/datasets/bsr3frvvjc/1#file-a21d5092-99d7-45d8-b044-3ae9edd391c6 STOP_WORDS = set( diff --git a/spacy/lang/hu/examples.py b/spacy/lang/hu/examples.py index b60f752ec..711a438bd 100644 --- a/spacy/lang/hu/examples.py +++ b/spacy/lang/hu/examples.py @@ -1,4 +1,3 @@ - """ Example sentences to test spaCy and its language models. diff --git a/spacy/lang/hu/stop_words.py b/spacy/lang/hu/stop_words.py index 024af68f4..e39a26d35 100644 --- a/spacy/lang/hu/stop_words.py +++ b/spacy/lang/hu/stop_words.py @@ -1,4 +1,3 @@ - STOP_WORDS = set( """ a abban ahhoz ahogy ahol aki akik akkor akár alatt amely amelyek amelyekben diff --git a/spacy/lang/id/examples.py b/spacy/lang/id/examples.py index 2ce46ce5a..1069232ff 100644 --- a/spacy/lang/id/examples.py +++ b/spacy/lang/id/examples.py @@ -1,4 +1,3 @@ - """ Example sentences to test spaCy and its language models. diff --git a/spacy/lang/is/stop_words.py b/spacy/lang/is/stop_words.py index 5b3ff2f5a..917fb6df4 100644 --- a/spacy/lang/is/stop_words.py +++ b/spacy/lang/is/stop_words.py @@ -1,4 +1,3 @@ - # Source: https://github.com/Xangis/extra-stopwords STOP_WORDS = set( diff --git a/spacy/lang/it/examples.py b/spacy/lang/it/examples.py index 30327bd14..506721276 100644 --- a/spacy/lang/it/examples.py +++ b/spacy/lang/it/examples.py @@ -1,4 +1,3 @@ - """ Example sentences to test spaCy and its language models. diff --git a/spacy/lang/it/stop_words.py b/spacy/lang/it/stop_words.py index 5cd1af137..e97613912 100644 --- a/spacy/lang/it/stop_words.py +++ b/spacy/lang/it/stop_words.py @@ -1,4 +1,3 @@ - STOP_WORDS = set( """ a abbastanza abbia abbiamo abbiano abbiate accidenti ad adesso affinche agl diff --git a/spacy/lang/ja/examples.py b/spacy/lang/ja/examples.py index 1d532ad77..c3a011862 100644 --- a/spacy/lang/ja/examples.py +++ b/spacy/lang/ja/examples.py @@ -1,4 +1,3 @@ - """ Example sentences to test spaCy and its language models. diff --git a/spacy/lang/kn/stop_words.py b/spacy/lang/kn/stop_words.py index cfeb0e69d..dba9740af 100644 --- a/spacy/lang/kn/stop_words.py +++ b/spacy/lang/kn/stop_words.py @@ -1,4 +1,3 @@ - STOP_WORDS = set( """ ಹಲವು diff --git a/spacy/lang/lt/examples.py b/spacy/lang/lt/examples.py index b2889114c..eaf941f1a 100644 --- a/spacy/lang/lt/examples.py +++ b/spacy/lang/lt/examples.py @@ -1,4 +1,3 @@ - """ Example sentences to test spaCy and its language models. diff --git a/spacy/lang/lv/stop_words.py b/spacy/lang/lv/stop_words.py index a9612f949..2685c2430 100644 --- a/spacy/lang/lv/stop_words.py +++ b/spacy/lang/lv/stop_words.py @@ -1,4 +1,3 @@ - # Source: https://github.com/stopwords-iso/stopwords-lv STOP_WORDS = set( diff --git a/spacy/lang/mr/stop_words.py b/spacy/lang/mr/stop_words.py index 0d7501461..9b0cee951 100644 --- a/spacy/lang/mr/stop_words.py +++ b/spacy/lang/mr/stop_words.py @@ -1,4 +1,3 @@ - # Source: https://github.com/stopwords-iso/stopwords-mr/blob/master/stopwords-mr.txt, https://github.com/6/stopwords-json/edit/master/dist/mr.json STOP_WORDS = set( """ diff --git a/spacy/lang/nb/examples.py b/spacy/lang/nb/examples.py index 89e265951..b1a63ad74 100644 --- a/spacy/lang/nb/examples.py +++ b/spacy/lang/nb/examples.py @@ -1,4 +1,3 @@ - """ Example sentences to test spaCy and its language models. diff --git a/spacy/lang/nl/examples.py b/spacy/lang/nl/examples.py index fcefa9d62..8c8c50c60 100644 --- a/spacy/lang/nl/examples.py +++ b/spacy/lang/nl/examples.py @@ -1,4 +1,3 @@ - """ Example sentences to test spaCy and its language models. diff --git a/spacy/lang/norm_exceptions.py b/spacy/lang/norm_exceptions.py index c194f05c7..f35f613b1 100644 --- a/spacy/lang/norm_exceptions.py +++ b/spacy/lang/norm_exceptions.py @@ -1,4 +1,3 @@ - # These exceptions are used to add NORM values based on a token's ORTH value. # Individual languages can also add their own exceptions and overwrite them - # for example, British vs. American spelling in English. diff --git a/spacy/lang/pl/examples.py b/spacy/lang/pl/examples.py index 6eabe1843..b1ea5880f 100644 --- a/spacy/lang/pl/examples.py +++ b/spacy/lang/pl/examples.py @@ -1,4 +1,3 @@ - """ Example sentences to test spaCy and its language models. diff --git a/spacy/lang/pt/examples.py b/spacy/lang/pt/examples.py index 7427f8b25..13f3512cf 100644 --- a/spacy/lang/pt/examples.py +++ b/spacy/lang/pt/examples.py @@ -1,4 +1,3 @@ - """ Example sentences to test spaCy and its language models. diff --git a/spacy/lang/pt/stop_words.py b/spacy/lang/pt/stop_words.py index 8065fcda7..ff45ad3a7 100644 --- a/spacy/lang/pt/stop_words.py +++ b/spacy/lang/pt/stop_words.py @@ -1,4 +1,3 @@ - STOP_WORDS = set( """ à às área acerca ademais adeus agora ainda algo algumas alguns ali além ambas ambos antes diff --git a/spacy/lang/ro/examples.py b/spacy/lang/ro/examples.py index d472f0d6d..bfa258ffc 100644 --- a/spacy/lang/ro/examples.py +++ b/spacy/lang/ro/examples.py @@ -1,4 +1,3 @@ - """ Example sentences to test spaCy and its language models. diff --git a/spacy/lang/ru/examples.py b/spacy/lang/ru/examples.py index 34cf5a1eb..adb007625 100644 --- a/spacy/lang/ru/examples.py +++ b/spacy/lang/ru/examples.py @@ -1,4 +1,3 @@ - """ Example sentences to test spaCy and its language models. diff --git a/spacy/lang/ru/norm_exceptions.py b/spacy/lang/ru/norm_exceptions.py index c5d725031..0975bf5b8 100644 --- a/spacy/lang/ru/norm_exceptions.py +++ b/spacy/lang/ru/norm_exceptions.py @@ -1,4 +1,3 @@ - _exc = { # Slang "прив": "привет", diff --git a/spacy/lang/si/examples.py b/spacy/lang/si/examples.py index 0ff00e76e..b34051d00 100644 --- a/spacy/lang/si/examples.py +++ b/spacy/lang/si/examples.py @@ -1,4 +1,3 @@ - """ Example sentences to test spaCy and its language models. diff --git a/spacy/lang/si/stop_words.py b/spacy/lang/si/stop_words.py index 49723c860..bde662bf7 100644 --- a/spacy/lang/si/stop_words.py +++ b/spacy/lang/si/stop_words.py @@ -1,4 +1,3 @@ - STOP_WORDS = set( """ අතර diff --git a/spacy/lang/sk/stop_words.py b/spacy/lang/sk/stop_words.py index 269bdc58b..017e7beef 100644 --- a/spacy/lang/sk/stop_words.py +++ b/spacy/lang/sk/stop_words.py @@ -1,4 +1,3 @@ - # Source: https://github.com/Ardevop-sk/stopwords-sk STOP_WORDS = set( diff --git a/spacy/lang/sl/stop_words.py b/spacy/lang/sl/stop_words.py index c8596ad0b..6fb01a183 100644 --- a/spacy/lang/sl/stop_words.py +++ b/spacy/lang/sl/stop_words.py @@ -1,4 +1,3 @@ - # Source: https://github.com/stopwords-iso/stopwords-sl # TODO: probably needs to be tidied up – the list seems to have month names in # it, which shouldn't be considered stop words. diff --git a/spacy/lang/sq/examples.py b/spacy/lang/sq/examples.py index e1075f70a..06ed20fa1 100644 --- a/spacy/lang/sq/examples.py +++ b/spacy/lang/sq/examples.py @@ -1,4 +1,3 @@ - """ Example sentences to test spaCy and its language models. diff --git a/spacy/lang/sq/stop_words.py b/spacy/lang/sq/stop_words.py index 58ee87d05..f2b1a4f4a 100644 --- a/spacy/lang/sq/stop_words.py +++ b/spacy/lang/sq/stop_words.py @@ -1,4 +1,3 @@ - # Source: https://github.com/andrixh/index-albanian STOP_WORDS = set( diff --git a/spacy/lang/sr/examples.py b/spacy/lang/sr/examples.py index 1ac867f4c..ec7f57ced 100644 --- a/spacy/lang/sr/examples.py +++ b/spacy/lang/sr/examples.py @@ -1,4 +1,3 @@ - """ Example sentences to test spaCy and its language models. diff --git a/spacy/lang/sr/norm_exceptions.py b/spacy/lang/sr/norm_exceptions.py index add8350a0..723ab84c0 100644 --- a/spacy/lang/sr/norm_exceptions.py +++ b/spacy/lang/sr/norm_exceptions.py @@ -1,4 +1,3 @@ - _exc = { # Slang "ћале": "отац", diff --git a/spacy/lang/sr/stop_words.py b/spacy/lang/sr/stop_words.py index 488c82a75..5df5509d2 100644 --- a/spacy/lang/sr/stop_words.py +++ b/spacy/lang/sr/stop_words.py @@ -1,4 +1,3 @@ - STOP_WORDS = set( """ а diff --git a/spacy/lang/sv/examples.py b/spacy/lang/sv/examples.py index 98eee700b..bc6cd7a54 100644 --- a/spacy/lang/sv/examples.py +++ b/spacy/lang/sv/examples.py @@ -1,4 +1,3 @@ - """ Example sentences to test spaCy and its language models. diff --git a/spacy/lang/sv/stop_words.py b/spacy/lang/sv/stop_words.py index 4d933a76d..2422b2a9e 100644 --- a/spacy/lang/sv/stop_words.py +++ b/spacy/lang/sv/stop_words.py @@ -1,4 +1,3 @@ - STOP_WORDS = set( """ aderton adertonde adjö aldrig alla allas allt alltid alltså än andra andras diff --git a/spacy/lang/ta/examples.py b/spacy/lang/ta/examples.py index 2590163cb..a53227220 100644 --- a/spacy/lang/ta/examples.py +++ b/spacy/lang/ta/examples.py @@ -1,4 +1,3 @@ - """ Example sentences to test spaCy and its language models. diff --git a/spacy/lang/ta/stop_words.py b/spacy/lang/ta/stop_words.py index 83410d65e..abbff949d 100644 --- a/spacy/lang/ta/stop_words.py +++ b/spacy/lang/ta/stop_words.py @@ -1,4 +1,3 @@ - # Stop words STOP_WORDS = set( diff --git a/spacy/lang/te/examples.py b/spacy/lang/te/examples.py index 6162b231e..cff7d3cb0 100644 --- a/spacy/lang/te/examples.py +++ b/spacy/lang/te/examples.py @@ -1,4 +1,3 @@ - """ Example sentences to test spaCy and its language models. diff --git a/spacy/lang/th/norm_exceptions.py b/spacy/lang/th/norm_exceptions.py index 98b878308..b8ddbab16 100644 --- a/spacy/lang/th/norm_exceptions.py +++ b/spacy/lang/th/norm_exceptions.py @@ -1,4 +1,3 @@ - _exc = { # Conjugation and Diversion invalid to Tonal form (ผันอักษรและเสียงไม่ตรงกับรูปวรรณยุกต์) "สนุ๊กเกอร์": "สนุกเกอร์", diff --git a/spacy/lang/tokenizer_exceptions.py b/spacy/lang/tokenizer_exceptions.py index fa4e347fd..ee58a7b09 100644 --- a/spacy/lang/tokenizer_exceptions.py +++ b/spacy/lang/tokenizer_exceptions.py @@ -34,7 +34,7 @@ URL_PATTERN = ( r"|" # host & domain names # mods: match is case-sensitive, so include [A-Z] - "(?:" + "(?:" # noqa: E131 "(?:" "[A-Za-z0-9\u00a1-\uffff]" "[A-Za-z0-9\u00a1-\uffff_-]{0,62}" diff --git a/spacy/lang/tr/examples.py b/spacy/lang/tr/examples.py index a14d87a46..dfb324a4e 100644 --- a/spacy/lang/tr/examples.py +++ b/spacy/lang/tr/examples.py @@ -1,4 +1,3 @@ - """ Example sentences to test spaCy and its language models. >>> from spacy.lang.tr.examples import sentences diff --git a/spacy/lang/uk/examples.py b/spacy/lang/uk/examples.py index d17768ea6..f75d44488 100644 --- a/spacy/lang/uk/examples.py +++ b/spacy/lang/uk/examples.py @@ -1,4 +1,3 @@ - """ Example sentences to test spaCy and its language models. diff --git a/spacy/lang/ur/examples.py b/spacy/lang/ur/examples.py index 7024483b5..e55b337be 100644 --- a/spacy/lang/ur/examples.py +++ b/spacy/lang/ur/examples.py @@ -1,4 +1,3 @@ - """ Example sentences to test spaCy and its language models. diff --git a/spacy/lang/xx/__init__.py b/spacy/lang/xx/__init__.py index 2af650703..347c624fd 100644 --- a/spacy/lang/xx/__init__.py +++ b/spacy/lang/xx/__init__.py @@ -1,4 +1,3 @@ - from ..tokenizer_exceptions import BASE_EXCEPTIONS from ..norm_exceptions import BASE_NORMS from ...language import Language diff --git a/spacy/lang/xx/examples.py b/spacy/lang/xx/examples.py index 15f5c4ff8..8d63c3c20 100644 --- a/spacy/lang/xx/examples.py +++ b/spacy/lang/xx/examples.py @@ -1,4 +1,3 @@ - """ Example sentences to test spaCy and its language models. diff --git a/spacy/lang/yo/examples.py b/spacy/lang/yo/examples.py index 9b875d09e..0a610f125 100644 --- a/spacy/lang/yo/examples.py +++ b/spacy/lang/yo/examples.py @@ -1,4 +1,3 @@ - """ Example sentences to test spaCy and its language models. diff --git a/spacy/lang/zh/examples.py b/spacy/lang/zh/examples.py index d0715eb0d..8be1336d2 100644 --- a/spacy/lang/zh/examples.py +++ b/spacy/lang/zh/examples.py @@ -1,4 +1,3 @@ - """ Example sentences to test spaCy and its language models. diff --git a/spacy/language.py b/spacy/language.py index 3aaf0b327..1c6014cec 100644 --- a/spacy/language.py +++ b/spacy/language.py @@ -4,7 +4,7 @@ import weakref import functools from contextlib import contextmanager from copy import copy, deepcopy -from thinc.backends import get_current_ops +from thinc.api import get_current_ops import srsly import multiprocessing as mp from itertools import chain, cycle diff --git a/spacy/lexeme.pyx b/spacy/lexeme.pyx index 1292a46bd..5910ebfe1 100644 --- a/spacy/lexeme.pyx +++ b/spacy/lexeme.pyx @@ -6,7 +6,7 @@ cimport numpy as np np.import_array() import numpy -from thinc.util import get_array_module +from thinc.api import get_array_module from .typedefs cimport attr_t, flags_t from .attrs cimport IS_ALPHA, IS_ASCII, IS_DIGIT, IS_LOWER, IS_PUNCT, IS_SPACE diff --git a/spacy/ml/_character_embed.py b/spacy/ml/_character_embed.py index 2ff67746f..b366f67c6 100644 --- a/spacy/ml/_character_embed.py +++ b/spacy/ml/_character_embed.py @@ -3,18 +3,20 @@ from thinc.api import Model def CharacterEmbed(nM, nC): # nM: Number of dimensions per character. nC: Number of characters. - nO = nM*nC if (nM is not None and nC is not None) else None + nO = nM * nC if (nM is not None and nC is not None) else None return Model( "charembed", forward, init=init, dims={"nM": nM, "nC": nC, "nO": nO, "nV": 256}, - params={"E": None} + params={"E": None}, ).initialize() def init(model, X=None, Y=None): - vectors_table = model.ops.alloc3f(model.get_dim("nC"), model.get_dim("nV"), model.get_dim("nM")) + vectors_table = model.ops.alloc3f( + model.get_dim("nC"), model.get_dim("nV"), model.get_dim("nM") + ) model.set_param("E", vectors_table) diff --git a/spacy/ml/_layers.py b/spacy/ml/_layers.py index e6aa798e7..7e9150d8b 100644 --- a/spacy/ml/_layers.py +++ b/spacy/ml/_layers.py @@ -1,5 +1,4 @@ -from thinc.model import Model -from thinc.api import normal_init +from thinc.api import Model, normal_init def PrecomputableAffine(nO, nI, nF, nP): @@ -20,9 +19,7 @@ def forward(model, X, is_train): nP = model.get_dim("nP") nI = model.get_dim("nI") W = model.get_param("W") - Yf = model.ops.gemm( - X, W.reshape((nF * nO * nP, nI)), trans2=True - ) + Yf = model.ops.gemm(X, W.reshape((nF * nO * nP, nI)), trans2=True) Yf = Yf.reshape((Yf.shape[0], nF, nO, nP)) Yf = model.ops.xp.vstack((model.get_param("pad"), Yf)) @@ -37,14 +34,14 @@ def forward(model, X, is_train): # for b in range(nB): # for f in range(nF): # dYf[b, ids[b, f]] += dY[b] - # + # # However, we avoid building that array for efficiency -- and just pass # in the indices. dY, ids = dY_ids assert dY.ndim == 3 assert dY.shape[1] == nO, dY.shape assert dY.shape[2] == nP, dY.shape - nB = dY.shape[0] + # nB = dY.shape[0] model.inc_grad("pad", _backprop_precomputable_affine_padding(model, dY, ids)) Xf = X[ids] Xf = Xf.reshape((Xf.shape[0], nF * nI)) @@ -83,12 +80,12 @@ def _backprop_precomputable_affine_padding(model, dY, ids): # for f in range(nF): # if ids[b, f] < 0: # d_padding[0, f] += dY[b] - # + # # Which can be rewritten as: # # for b in range(nB): # d_pad[0, ids[b] < 0] += dY[b] - # + # # I don't know how to avoid the loop without building a whole array :(. # Cursed numpy. d_pad = model.ops.alloc((1, nF, nO, nP)) @@ -118,7 +115,7 @@ def init(model, X=None, Y=None): pad = model.ops.alloc4f(1, nF, nO, nP) ops = model.ops - W = normal_init(ops, W.shape, fan_in=nF*nI) + W = normal_init(ops, W.shape, fan_in=nF * nI) model.set_param("W", W) model.set_param("b", b) model.set_param("pad", pad) diff --git a/spacy/ml/component_models.py b/spacy/ml/component_models.py index a24c2bfce..8c694f950 100644 --- a/spacy/ml/component_models.py +++ b/spacy/ml/component_models.py @@ -9,7 +9,7 @@ from thinc.api import Model, Maxout, Linear, residual, reduce_mean, list2ragged from thinc.api import PyTorchLSTM, add, MultiSoftmax, HashEmbed, StaticVectors from thinc.api import expand_window, FeatureExtractor, SparseLinear, chain from thinc.api import clone, concatenate, with_array, Softmax, Logistic, uniqued -from thinc.api import zero_init, glorot_uniform_init +from thinc.api import zero_init def build_text_classifier(arch, config): @@ -33,10 +33,7 @@ def build_simple_cnn_text_classifier(tok2vec, nr_class, exclusive_classes, **cfg output_layer = Softmax(nO=nr_class, nI=tok2vec.get_dim("nO")) else: # TODO: experiment with init_w=zero_init - output_layer = ( - Linear(nO=nr_class, nI=tok2vec.get_dim("nO")) - >> Logistic() - ) + output_layer = Linear(nO=nr_class, nI=tok2vec.get_dim("nO")) >> Logistic() model = tok2vec >> list2ragged() >> reduce_mean() >> output_layer model.set_ref("tok2vec", tok2vec) model.set_dim("nO", nr_class) @@ -149,13 +146,21 @@ def Tok2Vec( with Model.define_operators({">>": chain, "|": concatenate, "**": clone}): norm = HashEmbed(nO=width, nV=embed_size, column=cols.index(NORM), dropout=0.0) if subword_features: - prefix = HashEmbed(nO=width, nV=embed_size // 2, column=cols.index(PREFIX), dropout=0.0) - suffix = HashEmbed(nO=width, nV=embed_size // 2, column=cols.index(SUFFIX), dropout=0.0) - shape = HashEmbed(nO=width, nV=embed_size // 2, column=cols.index(SHAPE), dropout=0.0) + prefix = HashEmbed( + nO=width, nV=embed_size // 2, column=cols.index(PREFIX), dropout=0.0 + ) + suffix = HashEmbed( + nO=width, nV=embed_size // 2, column=cols.index(SUFFIX), dropout=0.0 + ) + shape = HashEmbed( + nO=width, nV=embed_size // 2, column=cols.index(SHAPE), dropout=0.0 + ) else: prefix, suffix, shape = (None, None, None) if pretrained_vectors is not None: - glove = StaticVectors(vectors=pretrained_vectors, nO=width, column=cols.index(ID), dropout=0.0) + glove = StaticVectors( + vectors=pretrained_vectors, nO=width, column=cols.index(ID), dropout=0.0 + ) if subword_features: embed = uniqued( diff --git a/spacy/ml/extract_ngrams.py b/spacy/ml/extract_ngrams.py index 1ec5b5fc1..d4195b9a4 100644 --- a/spacy/ml/extract_ngrams.py +++ b/spacy/ml/extract_ngrams.py @@ -1,5 +1,5 @@ import numpy -from thinc.model import Model +from thinc.api import Model from ..attrs import LOWER @@ -26,9 +26,7 @@ def forward(self, docs, is_train: bool): # The dtype here matches what thinc is expecting -- which differs per # platform (by int definition). This should be fixed once the problem # is fixed on Thinc's side. - lengths = self.ops.asarray( - [arr.shape[0] for arr in batch_keys], dtype=numpy.int_ - ) + lengths = self.ops.asarray([arr.shape[0] for arr in batch_keys], dtype=numpy.int_) batch_keys = self.ops.xp.concatenate(batch_keys) batch_vals = self.ops.asarray(self.ops.xp.concatenate(batch_vals), dtype="f") @@ -36,4 +34,3 @@ def forward(self, docs, is_train: bool): return dY return (batch_keys, batch_vals, lengths), backprop - diff --git a/spacy/ml/tok2vec.py b/spacy/ml/tok2vec.py index 102b88604..5e51bc47a 100644 --- a/spacy/ml/tok2vec.py +++ b/spacy/ml/tok2vec.py @@ -1,11 +1,8 @@ -from thinc.layers import chain, clone, concatenate, with_array, uniqued -from thinc.model import Model -from thinc.layers import noop, with_padded -from thinc.layers import Maxout, expand_window -from thinc.layers import HashEmbed, StaticVectors -from thinc.layers import residual, LayerNorm, FeatureExtractor +from thinc.api import Model, chain, clone, concatenate, with_array, uniqued, noop +from thinc.api import with_padded, Maxout, expand_window, HashEmbed, StaticVectors +from thinc.api import residual, LayerNorm, FeatureExtractor -from spacy.ml import _character_embed +from ..ml import _character_embed from ..util import make_layer, registry @@ -93,8 +90,10 @@ def MaxoutWindowEncoder(config): nW = config["window_size"] nP = config["pieces"] depth = config["depth"] - - cnn = expand_window(window_size=nW), Maxout(nO=nO, nI=nO * ((nW * 2) + 1), nP=nP, dropout=0.0, normalize=True) + cnn = ( + expand_window(window_size=nW), + Maxout(nO=nO, nI=nO * ((nW * 2) + 1), nP=nP, dropout=0.0, normalize=True), + ) model = clone(residual(cnn), depth) model.set_dim("nO", nO) model.attrs["receptive_field"] = nW * depth @@ -103,13 +102,16 @@ def MaxoutWindowEncoder(config): @registry.architectures.register("spacy.MishWindowEncoder.v1") def MishWindowEncoder(config): - from thinc.layers import Mish + from thinc.api import Mish nO = config["width"] nW = config["window_size"] depth = config["depth"] - - cnn = chain(expand_window(window_size=nW), Mish(nO=nO, nI=nO * ((nW * 2) + 1)), LayerNorm(nO)) + cnn = chain( + expand_window(window_size=nW), + Mish(nO=nO, nI=nO * ((nW * 2) + 1)), + LayerNorm(nO), + ) model = clone(residual(cnn), depth) model.set_dim("nO", nO) return model @@ -118,14 +120,20 @@ def MishWindowEncoder(config): @registry.architectures.register("spacy.PretrainedVectors.v1") def PretrainedVectors(config): # TODO: actual vectors instead of name - return StaticVectors(vectors=config["vectors_name"], nO=config["width"], column=config["column"], dropout=0.0) + return StaticVectors( + vectors=config["vectors_name"], + nO=config["width"], + column=config["column"], + dropout=0.0, + ) @registry.architectures.register("spacy.TorchBiLSTMEncoder.v1") def TorchBiLSTMEncoder(config): import torch.nn - # TODO FIX - from thinc.layers import PyTorchRNNWrapper + + # TODO: FIX + from thinc.api import PyTorchRNNWrapper width = config["width"] depth = config["depth"] diff --git a/spacy/pipeline/hooks.py b/spacy/pipeline/hooks.py index 00c328e81..d48b04bd1 100644 --- a/spacy/pipeline/hooks.py +++ b/spacy/pipeline/hooks.py @@ -1,4 +1,4 @@ -from thinc.layers import concatenate, reduce_max, reduce_mean, siamese, CauchySimilarity +from thinc.api import concatenate, reduce_max, reduce_mean, siamese, CauchySimilarity from .pipes import Pipe from ..language import component @@ -63,8 +63,7 @@ class SimilarityHook(Pipe): @classmethod def Model(cls, length): return siamese( - concatenate(reduce_max(), reduce_mean()), - CauchySimilarity(length * 2) + concatenate(reduce_max(), reduce_mean()), CauchySimilarity(length * 2) ) def __call__(self, doc): diff --git a/spacy/pipeline/morphologizer.pyx b/spacy/pipeline/morphologizer.pyx index 7b9e4b04e..999132b35 100644 --- a/spacy/pipeline/morphologizer.pyx +++ b/spacy/pipeline/morphologizer.pyx @@ -3,8 +3,8 @@ from collections import defaultdict import numpy cimport numpy as np -from thinc.layers import chain, list2array -from thinc.util import to_categorical, copy_array, get_array_module +from thinc.api import chain, list2array, to_categorical, get_array_module +from thinc.util import copy_array from .. import util from .pipes import Pipe diff --git a/spacy/pipeline/pipes.pyx b/spacy/pipeline/pipes.pyx index c77281b2c..ad75d2e78 100644 --- a/spacy/pipeline/pipes.pyx +++ b/spacy/pipeline/pipes.pyx @@ -3,11 +3,9 @@ import numpy import srsly import random -from thinc.layers import chain, Linear, Maxout, Softmax, LayerNorm, list2array -from thinc.initializers import zero_init -from thinc.loss import CosineDistance -from thinc.util import to_categorical, get_array_module -from thinc.model import set_dropout_rate +from thinc.api import chain, Linear, Maxout, Softmax, LayerNorm, list2array +from thinc.api import zero_init, CosineDistance, to_categorical, get_array_module +from thinc.api import set_dropout_rate from ..tokens.doc cimport Doc from ..syntax.nn_parser cimport Parser diff --git a/spacy/pipeline/tok2vec.py b/spacy/pipeline/tok2vec.py index 9857c87eb..8290468cf 100644 --- a/spacy/pipeline/tok2vec.py +++ b/spacy/pipeline/tok2vec.py @@ -1,3 +1,5 @@ +from thinc.api import Model, set_dropout_rate + from .pipes import Pipe from ..gold import Example from ..tokens import Doc @@ -5,8 +7,6 @@ from ..vocab import Vocab from ..language import component from ..util import link_vectors_to_models, minibatch, registry, eg2doc -from thinc.model import Model, set_dropout_rate - @component("tok2vec", assigns=["doc.tensor"]) class Tok2Vec(Pipe): @@ -39,7 +39,9 @@ class Tok2Vec(Pipe): self.listeners = [] def create_listener(self): - listener = Tok2VecListener(upstream_name="tok2vec", width=self.model.get_dim("nO")) + listener = Tok2VecListener( + upstream_name="tok2vec", width=self.model.get_dim("nO") + ) self.listeners.append(listener) def add_listener(self, listener): @@ -112,10 +114,10 @@ class Tok2Vec(Pipe): docs = [docs] set_dropout_rate(self.model, drop) tokvecs, bp_tokvecs = self.model.begin_update(docs) - + def capture_losses(d_tokvecs): """Accumulate tok2vec loss before doing backprop.""" - l2_loss = sum((d_t2v**2).sum() for d_t2v in d_tokvecs) + l2_loss = sum((d_t2v ** 2).sum() for d_t2v in d_tokvecs) if self.name in losses: losses[self.name] += l2_loss / len(d_tokvecs) else: @@ -133,7 +135,9 @@ class Tok2Vec(Pipe): def get_loss(self, docs, golds, scores): pass - def begin_training(self, get_examples=lambda: [], pipeline=None, sgd=None, **kwargs): + def begin_training( + self, get_examples=lambda: [], pipeline=None, sgd=None, **kwargs + ): """Allocate models and pre-process training data get_examples (function): Function returning example training data. @@ -151,6 +155,7 @@ class Tok2VecListener(Model): """A layer that gets fed its answers from an upstream connection, for instance from a component earlier in the pipeline. """ + name = "tok2vec-listener" def __init__(self, upstream_name, width): diff --git a/spacy/syntax/_parser_model.pyx b/spacy/syntax/_parser_model.pyx index cb8e1d127..442233f19 100644 --- a/spacy/syntax/_parser_model.pyx +++ b/spacy/syntax/_parser_model.pyx @@ -11,9 +11,7 @@ from libc.string cimport memset, memcpy from libc.stdlib cimport calloc, free, realloc from cymem.cymem cimport Pool from thinc.extra.search cimport Beam -from thinc.layers import Linear -from thinc.model import Model -from thinc.backends import CupyOps, NumpyOps, use_ops +from thinc.api import Linear, Model, CupyOps, NumpyOps, use_ops from thinc.backends.linalg cimport Vec, VecVec cimport blis.cy diff --git a/spacy/syntax/nn_parser.pyx b/spacy/syntax/nn_parser.pyx index 8e55d3873..cf57e1cf6 100644 --- a/spacy/syntax/nn_parser.pyx +++ b/spacy/syntax/nn_parser.pyx @@ -1,11 +1,8 @@ # cython: infer_types=True # cython: cdivision=True # cython: boundscheck=False -import numpy cimport cython.parallel -import numpy.random cimport numpy as np -from itertools import islice from cpython.ref cimport PyObject, Py_XDECREF from cpython.exc cimport PyErr_CheckSignals, PyErr_SetFromErrno from libc.math cimport exp @@ -14,15 +11,16 @@ from libc.string cimport memset, memcpy from libc.stdlib cimport calloc, free from cymem.cymem cimport Pool from thinc.extra.search cimport Beam -from thinc.layers import chain, clone, Linear, list2array -from thinc.backends import NumpyOps, CupyOps, use_ops -from thinc.util import get_array_module from thinc.backends.linalg cimport Vec, VecVec -from thinc.initializers import zero_init -from thinc.model import set_dropout_rate -import srsly -from spacy.gold import Example +from thinc.api import chain, clone, Linear, list2array, NumpyOps, CupyOps, use_ops +from thinc.api import get_array_module, zero_init, set_dropout_rate +from itertools import islice +import srsly +import numpy.random +import numpy + +from ..gold import Example from ..typedefs cimport weight_t, class_t, hash_t from ._parser_model cimport alloc_activations, free_activations from ._parser_model cimport predict_states, arg_max_if_valid diff --git a/spacy/syntax/nonproj.pyx b/spacy/syntax/nonproj.pyx index f024c1f05..27516ffd9 100644 --- a/spacy/syntax/nonproj.pyx +++ b/spacy/syntax/nonproj.pyx @@ -6,7 +6,7 @@ scheme. """ from copy import copy -from spacy.gold import Example +from ..gold import Example from ..tokens.doc cimport Doc, set_children_from_heads from ..errors import Errors diff --git a/spacy/tests/doc/test_doc_api.py b/spacy/tests/doc/test_doc_api.py index b7627b175..4323bb736 100644 --- a/spacy/tests/doc/test_doc_api.py +++ b/spacy/tests/doc/test_doc_api.py @@ -1,4 +1,3 @@ - import pytest import numpy from spacy.tokens import Doc, Span @@ -274,7 +273,19 @@ def test_doc_is_nered(en_vocab): def test_doc_from_array_sent_starts(en_vocab): words = ["I", "live", "in", "New", "York", ".", "I", "like", "cats", "."] heads = [0, 0, 0, 0, 0, 0, 6, 6, 6, 6] - deps = ["ROOT", "dep", "dep", "dep", "dep", "dep", "ROOT", "dep", "dep", "dep", "dep"] + deps = [ + "ROOT", + "dep", + "dep", + "dep", + "dep", + "dep", + "ROOT", + "dep", + "dep", + "dep", + "dep", + ] doc = Doc(en_vocab, words=words) for i, (dep, head) in enumerate(zip(deps, heads)): doc[i].dep_ = dep diff --git a/spacy/tests/doc/test_morphanalysis.py b/spacy/tests/doc/test_morphanalysis.py index 82fb549ba..221b6f683 100644 --- a/spacy/tests/doc/test_morphanalysis.py +++ b/spacy/tests/doc/test_morphanalysis.py @@ -29,7 +29,9 @@ def test_morph_props(i_has): def test_morph_iter(i_has): assert set(i_has[0].morph) == set(["PronType=prs"]) - assert set(i_has[1].morph) == set(["Number=sing", "Person=three", "Tense=pres", "VerbForm=fin"]) + assert set(i_has[1].morph) == set( + ["Number=sing", "Person=three", "Tense=pres", "VerbForm=fin"] + ) def test_morph_get(i_has): diff --git a/spacy/tests/doc/test_retokenize_merge.py b/spacy/tests/doc/test_retokenize_merge.py index 17bcd2c64..5e564d1f2 100644 --- a/spacy/tests/doc/test_retokenize_merge.py +++ b/spacy/tests/doc/test_retokenize_merge.py @@ -8,7 +8,12 @@ from ..util import get_doc def test_doc_retokenize_merge(en_tokenizer): text = "WKRO played songs by the beach boys all night" - attrs = {"tag": "NAMED", "lemma": "LEMMA", "ent_type": "TYPE", "morph": "Number=Plur"} + attrs = { + "tag": "NAMED", + "lemma": "LEMMA", + "ent_type": "TYPE", + "morph": "Number=Plur", + } doc = en_tokenizer(text) assert len(doc) == 9 with doc.retokenize() as retokenizer: diff --git a/spacy/tests/lang/ar/test_text.py b/spacy/tests/lang/ar/test_text.py index f4a8cc1e3..c5ab376f1 100644 --- a/spacy/tests/lang/ar/test_text.py +++ b/spacy/tests/lang/ar/test_text.py @@ -1,4 +1,3 @@ - def test_ar_tokenizer_handles_long_text(ar_tokenizer): text = """نجيب محفوظ مؤلف و كاتب روائي عربي، يعد من أهم الأدباء العرب خلال القرن العشرين. ولد نجيب محفوظ في مدينة القاهرة، حيث ترعرع و تلقى تعليمه الجامعي في جامعتها، diff --git a/spacy/tests/lang/en/test_indices.py b/spacy/tests/lang/en/test_indices.py index d50c75fc5..93daeec30 100644 --- a/spacy/tests/lang/en/test_indices.py +++ b/spacy/tests/lang/en/test_indices.py @@ -1,4 +1,3 @@ - def test_en_simple_punct(en_tokenizer): text = "to walk, do foo" tokens = en_tokenizer(text) diff --git a/spacy/tests/lang/fi/test_tokenizer.py b/spacy/tests/lang/fi/test_tokenizer.py index 02aa63207..bcd62f239 100644 --- a/spacy/tests/lang/fi/test_tokenizer.py +++ b/spacy/tests/lang/fi/test_tokenizer.py @@ -19,16 +19,10 @@ HYPHENATED_TESTS = [ ABBREVIATION_INFLECTION_TESTS = [ ( "VTT:ssa ennen v:ta 2010 suoritetut mittaukset", - ["VTT:ssa", "ennen", "v:ta", "2010", "suoritetut", "mittaukset"] + ["VTT:ssa", "ennen", "v:ta", "2010", "suoritetut", "mittaukset"], ), - ( - "ALV:n osuus on 24 %.", - ["ALV:n", "osuus", "on", "24", "%", "."] - ), - ( - "Hiihtäjä oli kilpailun 14:s.", - ["Hiihtäjä", "oli", "kilpailun", "14:s", "."] - ) + ("ALV:n osuus on 24 %.", ["ALV:n", "osuus", "on", "24", "%", "."]), + ("Hiihtäjä oli kilpailun 14:s.", ["Hiihtäjä", "oli", "kilpailun", "14:s", "."]), ] diff --git a/spacy/tests/lang/hu/test_tokenizer.py b/spacy/tests/lang/hu/test_tokenizer.py index d0d8c2268..fd3acd0a0 100644 --- a/spacy/tests/lang/hu/test_tokenizer.py +++ b/spacy/tests/lang/hu/test_tokenizer.py @@ -294,12 +294,7 @@ WIKI_TESTS = [ ] EXTRA_TESTS = ( - DOT_TESTS - + QUOTE_TESTS - + NUMBER_TESTS - + HYPHEN_TESTS - + WIKI_TESTS - + TYPO_TESTS + DOT_TESTS + QUOTE_TESTS + NUMBER_TESTS + HYPHEN_TESTS + WIKI_TESTS + TYPO_TESTS ) # normal: default tests + 10% of extra tests @@ -308,7 +303,14 @@ TESTS.extend([x for i, x in enumerate(EXTRA_TESTS) if i % 10 == 0]) # slow: remaining 90% of extra tests SLOW_TESTS = [x for i, x in enumerate(EXTRA_TESTS) if i % 10 != 0] -TESTS.extend([pytest.param(x[0], x[1], marks=pytest.mark.slow()) if not isinstance(x[0], tuple) else x for x in SLOW_TESTS]) +TESTS.extend( + [ + pytest.param(x[0], x[1], marks=pytest.mark.slow()) + if not isinstance(x[0], tuple) + else x + for x in SLOW_TESTS + ] +) @pytest.mark.parametrize("text,expected_tokens", TESTS) diff --git a/spacy/tests/lang/sv/test_text.py b/spacy/tests/lang/sv/test_text.py index dc4911ab6..1e26c45bc 100644 --- a/spacy/tests/lang/sv/test_text.py +++ b/spacy/tests/lang/sv/test_text.py @@ -1,4 +1,3 @@ - def test_sv_tokenizer_handles_long_text(sv_tokenizer): text = """Det var så härligt ute på landet. Det var sommar, majsen var gul, havren grön, höet var uppställt i stackar nere vid den gröna ängen, och där gick storken på sina långa, diff --git a/spacy/tests/lang/zh/test_text.py b/spacy/tests/lang/zh/test_text.py index d48feaee5..d9a65732e 100644 --- a/spacy/tests/lang/zh/test_text.py +++ b/spacy/tests/lang/zh/test_text.py @@ -1,4 +1,3 @@ - import pytest diff --git a/spacy/tests/morphology/test_morph_converters.py b/spacy/tests/morphology/test_morph_converters.py index 3bff4f924..9486cad45 100644 --- a/spacy/tests/morphology/test_morph_converters.py +++ b/spacy/tests/morphology/test_morph_converters.py @@ -1,4 +1,3 @@ -import pytest from spacy.morphology import Morphology diff --git a/spacy/tests/morphology/test_morph_features.py b/spacy/tests/morphology/test_morph_features.py index 0d8d7dea9..f644a5867 100644 --- a/spacy/tests/morphology/test_morph_features.py +++ b/spacy/tests/morphology/test_morph_features.py @@ -24,13 +24,20 @@ def test_add_morphology_with_int_ids(morphology): morphology.strings.add("gen") morphology.strings.add("Number") morphology.strings.add("sing") - morphology.add({get_string_id("Case"): get_string_id("gen"), get_string_id("Number"): get_string_id("sing")}) + morphology.add( + { + get_string_id("Case"): get_string_id("gen"), + get_string_id("Number"): get_string_id("sing"), + } + ) def test_add_morphology_with_mix_strings_and_ints(morphology): morphology.strings.add("PunctSide") morphology.strings.add("ini") - morphology.add({get_string_id("PunctSide"): get_string_id("ini"), "VerbType": "aux"}) + morphology.add( + {get_string_id("PunctSide"): get_string_id("ini"), "VerbType": "aux"} + ) def test_morphology_tags_hash_distinctly(morphology): diff --git a/spacy/tests/parser/test_add_label.py b/spacy/tests/parser/test_add_label.py index 25892ac71..fe847a6ae 100644 --- a/spacy/tests/parser/test_add_label.py +++ b/spacy/tests/parser/test_add_label.py @@ -1,6 +1,5 @@ import pytest -from thinc.optimizers import Adam -from thinc.backends import NumpyOps +from thinc.api import Adam, NumpyOps from spacy.attrs import NORM from spacy.gold import GoldParse from spacy.vocab import Vocab diff --git a/spacy/tests/parser/test_ner.py b/spacy/tests/parser/test_ner.py index 8dda1f406..9a4d21a8d 100644 --- a/spacy/tests/parser/test_ner.py +++ b/spacy/tests/parser/test_ner.py @@ -10,7 +10,7 @@ from spacy.tokens import Doc TRAIN_DATA = [ ("Who is Shaka Khan?", {"entities": [(7, 17, "PERSON")]}), ("I like London and Berlin.", {"entities": [(7, 13, "LOC"), (18, 24, "LOC")]}), - ] +] @pytest.fixture diff --git a/spacy/tests/parser/test_preset_sbd.py b/spacy/tests/parser/test_preset_sbd.py index 5e56442b5..c6c1240a8 100644 --- a/spacy/tests/parser/test_preset_sbd.py +++ b/spacy/tests/parser/test_preset_sbd.py @@ -1,6 +1,5 @@ import pytest -from thinc.optimizers import Adam -from thinc.backends import NumpyOps +from thinc.api import Adam from spacy.attrs import NORM from spacy.gold import GoldParse from spacy.vocab import Vocab diff --git a/spacy/tests/pipeline/test_entity_ruler.py b/spacy/tests/pipeline/test_entity_ruler.py index 234603e94..b04569e22 100644 --- a/spacy/tests/pipeline/test_entity_ruler.py +++ b/spacy/tests/pipeline/test_entity_ruler.py @@ -149,10 +149,5 @@ def test_entity_ruler_validate(nlp): def test_entity_ruler_properties(nlp, patterns): ruler = EntityRuler(nlp, patterns=patterns, overwrite_ents=True) - assert sorted(ruler.labels) == sorted([ - "HELLO", - "BYE", - "COMPLEX", - "TECH_ORG" - ]) + assert sorted(ruler.labels) == sorted(["HELLO", "BYE", "COMPLEX", "TECH_ORG"]) assert sorted(ruler.ent_ids) == ["a1", "a2"] diff --git a/spacy/tests/pipeline/test_tagger.py b/spacy/tests/pipeline/test_tagger.py index 6a6ec8665..366cd4f1a 100644 --- a/spacy/tests/pipeline/test_tagger.py +++ b/spacy/tests/pipeline/test_tagger.py @@ -1,5 +1,4 @@ import pytest -import srsly from spacy.language import Language diff --git a/spacy/tests/regression/test_issue1501-2000.py b/spacy/tests/regression/test_issue1501-2000.py index d9e1d663a..2bfdbd7c3 100644 --- a/spacy/tests/regression/test_issue1501-2000.py +++ b/spacy/tests/regression/test_issue1501-2000.py @@ -270,7 +270,9 @@ def test_issue1963(en_tokenizer): def test_issue1967(label): ner = EntityRecognizer(Vocab()) example = Example(doc=None) - example.set_token_annotation(ids=[0], words=["word"], tags=["tag"], heads=[0], deps=["dep"], entities=[label]) + example.set_token_annotation( + ids=[0], words=["word"], tags=["tag"], heads=[0], deps=["dep"], entities=[label] + ) ner.moves.get_actions(gold_parses=[example]) diff --git a/spacy/tests/regression/test_issue3611.py b/spacy/tests/regression/test_issue3611.py index fca884356..120cea1d2 100644 --- a/spacy/tests/regression/test_issue3611.py +++ b/spacy/tests/regression/test_issue3611.py @@ -39,8 +39,5 @@ def test_issue3611(): for batch in batches: nlp.update( - examples=batch, - sgd=optimizer, - drop=0.1, - losses=losses, + examples=batch, sgd=optimizer, drop=0.1, losses=losses, ) diff --git a/spacy/tests/regression/test_issue4030.py b/spacy/tests/regression/test_issue4030.py index 7153594db..7158d9b21 100644 --- a/spacy/tests/regression/test_issue4030.py +++ b/spacy/tests/regression/test_issue4030.py @@ -39,10 +39,7 @@ def test_issue4030(): for batch in batches: nlp.update( - examples=batch, - sgd=optimizer, - drop=0.1, - losses=losses, + examples=batch, sgd=optimizer, drop=0.1, losses=losses, ) # processing of an empty doc should result in 0.0 for all categories diff --git a/spacy/tests/test_architectures.py b/spacy/tests/test_architectures.py index 786e2cedf..31b2a2d2f 100644 --- a/spacy/tests/test_architectures.py +++ b/spacy/tests/test_architectures.py @@ -1,6 +1,6 @@ import pytest from spacy import registry -from thinc.layers import Linear +from thinc.api import Linear from catalogue import RegistryError diff --git a/spacy/tests/test_cli.py b/spacy/tests/test_cli.py index 049858960..306adc881 100644 --- a/spacy/tests/test_cli.py +++ b/spacy/tests/test_cli.py @@ -65,8 +65,9 @@ def test_cli_converters_conllu2json_subtokens(): "5\t.\t$.\tPUNCT\t_\t_\t4\tpunct\t_\tname=O", ] input_data = "\n".join(lines) - converted = conllu2json(input_data, n_sents=1, merge_subtokens=True, - append_morphology=True) + converted = conllu2json( + input_data, n_sents=1, merge_subtokens=True, append_morphology=True + ) assert len(converted) == 1 assert converted[0]["id"] == 0 assert len(converted[0]["paragraphs"]) == 1 @@ -81,11 +82,16 @@ def test_cli_converters_conllu2json_subtokens(): "NOUN__Definite=Ind|Gender=Masc|Number=Sing", "PROPN_X__Gender=Fem,Masc|Tense=past", "VERB__Mood=Ind|Tense=Pres|VerbForm=Fin", - "PUNCT" + "PUNCT", ] - assert [t["pos"] for t in tokens] == ['NOUN', 'PROPN', 'VERB', 'PUNCT'] - assert [t["morph"] for t in tokens] == ['Definite=Ind|Gender=Masc|Number=Sing', 'Gender=Fem,Masc|Tense=past', 'Mood=Ind|Tense=Pres|VerbForm=Fin', ''] - assert [t["lemma"] for t in tokens] == ['dommer', 'Finn Eilertsen', 'avstå', '$.'] + assert [t["pos"] for t in tokens] == ["NOUN", "PROPN", "VERB", "PUNCT"] + assert [t["morph"] for t in tokens] == [ + "Definite=Ind|Gender=Masc|Number=Sing", + "Gender=Fem,Masc|Tense=past", + "Mood=Ind|Tense=Pres|VerbForm=Fin", + "", + ] + assert [t["lemma"] for t in tokens] == ["dommer", "Finn Eilertsen", "avstå", "$."] assert [t["head"] for t in tokens] == [1, 1, 0, -1] assert [t["dep"] for t in tokens] == ["appos", "nsubj", "ROOT", "punct"] assert [t["ner"] for t in tokens] == ["O", "U-PER", "O", "O"] diff --git a/spacy/tests/tokenizer/test_exceptions.py b/spacy/tests/tokenizer/test_exceptions.py index 8276d7aea..9a98e049e 100644 --- a/spacy/tests/tokenizer/test_exceptions.py +++ b/spacy/tests/tokenizer/test_exceptions.py @@ -4,7 +4,9 @@ import pytest def test_tokenizer_handles_emoticons(tokenizer): # Tweebo challenge (CMU) - text = """:o :/ :'( >:o (: :) >.< XD -__- o.O ;D :-) @_@ :P 8D :1 >:( :D =| :> ....""" + text = ( + """:o :/ :'( >:o (: :) >.< XD -__- o.O ;D :-) @_@ :P 8D :1 >:( :D =| :> ....""" + ) tokens = tokenizer(text) assert tokens[0].text == ":o" assert tokens[1].text == ":/" diff --git a/spacy/tests/tokenizer/test_tokenizer.py b/spacy/tests/tokenizer/test_tokenizer.py index 3dce1ae31..c035559b4 100644 --- a/spacy/tests/tokenizer/test_tokenizer.py +++ b/spacy/tests/tokenizer/test_tokenizer.py @@ -130,7 +130,19 @@ def test_tokenizer_special_cases_with_affixes(tokenizer): tokenizer.add_special_case("_SPECIAL_", [{"orth": "_SPECIAL_"}]) tokenizer.add_special_case("A/B", [{"orth": "A/B"}]) doc = tokenizer(text) - assert [token.text for token in doc] == ["(", "(", "(", "_SPECIAL_", "A/B", ",", "A/B", "-", "A/B", '"', ")"] + assert [token.text for token in doc] == [ + "(", + "(", + "(", + "_SPECIAL_", + "A/B", + ",", + "A/B", + "-", + "A/B", + '"', + ")", + ] def test_tokenizer_special_cases_with_period(tokenizer): diff --git a/spacy/tokens/_retokenize.pyx b/spacy/tokens/_retokenize.pyx index ec7e8a9e8..337c154a2 100644 --- a/spacy/tokens/_retokenize.pyx +++ b/spacy/tokens/_retokenize.pyx @@ -4,8 +4,8 @@ from libc.string cimport memcpy, memset from libc.stdlib cimport malloc, free from cymem.cymem cimport Pool -from thinc.util import get_array_module +from thinc.api import get_array_module import numpy from .doc cimport Doc, set_children_from_heads, token_by_start, token_by_end diff --git a/spacy/tokens/_serialize.py b/spacy/tokens/_serialize.py index 4a18acd77..65b70d1b3 100644 --- a/spacy/tokens/_serialize.py +++ b/spacy/tokens/_serialize.py @@ -1,7 +1,7 @@ import numpy import zlib import srsly -from thinc.backends import NumpyOps +from thinc.api import NumpyOps from ..compat import copy_reg from ..tokens import Doc diff --git a/spacy/tokens/doc.pyx b/spacy/tokens/doc.pyx index 16ef5f966..54d92f8b1 100644 --- a/spacy/tokens/doc.pyx +++ b/spacy/tokens/doc.pyx @@ -11,7 +11,8 @@ import numpy import numpy.linalg import struct import srsly -from thinc.util import get_array_module, copy_array +from thinc.api import get_array_module +from thinc.util import copy_array from .span cimport Span from .token cimport Token diff --git a/spacy/tokens/span.pyx b/spacy/tokens/span.pyx index 796a5e674..d6b50b5f4 100644 --- a/spacy/tokens/span.pyx +++ b/spacy/tokens/span.pyx @@ -4,7 +4,7 @@ from libc.math cimport sqrt import numpy import numpy.linalg -from thinc.util import get_array_module +from thinc.api import get_array_module from collections import defaultdict from .doc cimport token_by_start, token_by_end, get_token_attr, _get_lca_matrix diff --git a/spacy/tokens/token.pyx b/spacy/tokens/token.pyx index c241cd5ad..379da6c77 100644 --- a/spacy/tokens/token.pyx +++ b/spacy/tokens/token.pyx @@ -7,7 +7,7 @@ cimport numpy as np np.import_array() import numpy -from thinc.util import get_array_module +from thinc.api import get_array_module from ..typedefs cimport hash_t from ..lexeme cimport Lexeme diff --git a/spacy/util.py b/spacy/util.py index 0cc11cef7..995ff722f 100644 --- a/spacy/util.py +++ b/spacy/util.py @@ -5,13 +5,9 @@ import re from pathlib import Path import random from typing import List - import thinc import thinc.config -from thinc.backends import NumpyOps, get_current_ops -from thinc.optimizers import Adam -from thinc.util import require_gpu - +from thinc.api import NumpyOps, get_current_ops, Adam, require_gpu import functools import itertools import numpy.random @@ -797,5 +793,13 @@ def create_default_optimizer(): eps = env_opt("optimizer_eps", 1e-8) L2 = env_opt("L2_penalty", 1e-6) grad_clip = env_opt("grad_norm_clip", 1.0) - optimizer = Adam(learn_rate, L2=L2, beta1=beta1, beta2=beta2, eps=eps, ops=ops, grad_clip=grad_clip) + optimizer = Adam( + learn_rate, + L2=L2, + beta1=beta1, + beta2=beta2, + eps=eps, + ops=ops, + grad_clip=grad_clip, + ) return optimizer diff --git a/spacy/vectors.pyx b/spacy/vectors.pyx index f812acac4..0ade8b280 100644 --- a/spacy/vectors.pyx +++ b/spacy/vectors.pyx @@ -5,8 +5,7 @@ from libcpp.set cimport set as cppset import functools import numpy import srsly -from thinc.util import get_array_module -from thinc.backends import get_current_ops +from thinc.api import get_array_module, get_current_ops from .strings cimport StringStore diff --git a/spacy/vocab.pyx b/spacy/vocab.pyx index 3da9978c4..a1929559f 100644 --- a/spacy/vocab.pyx +++ b/spacy/vocab.pyx @@ -2,7 +2,7 @@ from libc.string cimport memcpy import srsly -from thinc.util import get_array_module +from thinc.api import get_array_module from .lexeme cimport EMPTY_LEXEME from .lexeme cimport Lexeme