" namespace "std" nogil:
void swap[T](T& a, T& b) except + # Only available in Cython 3.
diff --git a/spacy/pipeline/_edit_tree_internals/edit_trees.pyx b/spacy/pipeline/_edit_tree_internals/edit_trees.pyx
index 9d18c0334..daab0d204 100644
--- a/spacy/pipeline/_edit_tree_internals/edit_trees.pyx
+++ b/spacy/pipeline/_edit_tree_internals/edit_trees.pyx
@@ -1,7 +1,6 @@
# cython: infer_types=True, binding=True
from cython.operator cimport dereference as deref
-from libc.stdint cimport uint32_t
-from libc.stdint cimport UINT32_MAX
+from libc.stdint cimport UINT32_MAX, uint32_t
from libc.string cimport memset
from libcpp.pair cimport pair
from libcpp.vector cimport vector
@@ -15,7 +14,6 @@ from ...errors import Errors
from ...strings import StringStore
from .schemas import validate_edit_tree
-
NULL_TREE_ID = UINT32_MAX
cdef LCS find_lcs(str source, str target):
diff --git a/spacy/pipeline/_edit_tree_internals/schemas.py b/spacy/pipeline/_edit_tree_internals/schemas.py
index c01d0632e..1e307b66c 100644
--- a/spacy/pipeline/_edit_tree_internals/schemas.py
+++ b/spacy/pipeline/_edit_tree_internals/schemas.py
@@ -1,5 +1,6 @@
-from typing import Any, Dict, List, Union
from collections import defaultdict
+from typing import Any, Dict, List, Union
+
from pydantic import BaseModel, Field, ValidationError
from pydantic.types import StrictBool, StrictInt, StrictStr
diff --git a/spacy/pipeline/_parser_internals/_beam_utils.pxd b/spacy/pipeline/_parser_internals/_beam_utils.pxd
index de3573fbc..596306b23 100644
--- a/spacy/pipeline/_parser_internals/_beam_utils.pxd
+++ b/spacy/pipeline/_parser_internals/_beam_utils.pxd
@@ -1,5 +1,6 @@
from ...typedefs cimport class_t, hash_t
+
# These are passed as callbacks to thinc.search.Beam
cdef int transition_state(void* _dest, void* _src, class_t clas, void* _moves) except -1
diff --git a/spacy/pipeline/_parser_internals/_beam_utils.pyx b/spacy/pipeline/_parser_internals/_beam_utils.pyx
index fa7df2056..04dd3f11e 100644
--- a/spacy/pipeline/_parser_internals/_beam_utils.pyx
+++ b/spacy/pipeline/_parser_internals/_beam_utils.pyx
@@ -1,15 +1,21 @@
# cython: infer_types=True
# cython: profile=True
cimport numpy as np
+
import numpy
-from cpython.ref cimport PyObject, Py_XDECREF
+
+from cpython.ref cimport Py_XDECREF, PyObject
from thinc.extra.search cimport Beam
+
from thinc.extra.search import MaxViolation
+
from thinc.extra.search cimport MaxViolation
-from ...typedefs cimport hash_t, class_t
-from .transition_system cimport TransitionSystem, Transition
+from ...typedefs cimport class_t, hash_t
+from .transition_system cimport Transition, TransitionSystem
+
from ...errors import Errors
+
from .stateclass cimport StateC, StateClass
diff --git a/spacy/pipeline/_parser_internals/_state.pxd b/spacy/pipeline/_parser_internals/_state.pxd
index a1262bb61..24acc350c 100644
--- a/spacy/pipeline/_parser_internals/_state.pxd
+++ b/spacy/pipeline/_parser_internals/_state.pxd
@@ -1,19 +1,20 @@
-from cython.operator cimport dereference as deref, preincrement as incr
-from libc.string cimport memcpy, memset
-from libc.stdlib cimport calloc, free
-from libc.stdint cimport uint32_t, uint64_t
cimport libcpp
+from cpython.exc cimport PyErr_CheckSignals, PyErr_SetFromErrno
+from cython.operator cimport dereference as deref
+from cython.operator cimport preincrement as incr
+from libc.stdint cimport uint32_t, uint64_t
+from libc.stdlib cimport calloc, free
+from libc.string cimport memcpy, memset
+from libcpp.set cimport set
from libcpp.unordered_map cimport unordered_map
from libcpp.vector cimport vector
-from libcpp.set cimport set
-from cpython.exc cimport PyErr_CheckSignals, PyErr_SetFromErrno
from murmurhash.mrmr cimport hash64
-from ...vocab cimport EMPTY_LEXEME
-from ...structs cimport TokenC, SpanC
-from ...lexeme cimport Lexeme
from ...attrs cimport IS_SPACE
+from ...lexeme cimport Lexeme
+from ...structs cimport SpanC, TokenC
from ...typedefs cimport attr_t
+from ...vocab cimport EMPTY_LEXEME
cdef inline bint is_space_token(const TokenC* token) nogil:
diff --git a/spacy/pipeline/_parser_internals/arc_eager.pxd b/spacy/pipeline/_parser_internals/arc_eager.pxd
index b618bc587..2c17e7b26 100644
--- a/spacy/pipeline/_parser_internals/arc_eager.pxd
+++ b/spacy/pipeline/_parser_internals/arc_eager.pxd
@@ -1,5 +1,5 @@
+from ...typedefs cimport attr_t, weight_t
from ._state cimport StateC
-from ...typedefs cimport weight_t, attr_t
from .transition_system cimport Transition, TransitionSystem
diff --git a/spacy/pipeline/_parser_internals/arc_eager.pyx b/spacy/pipeline/_parser_internals/arc_eager.pyx
index 257b5ef8a..2c9eb0ff5 100644
--- a/spacy/pipeline/_parser_internals/arc_eager.pyx
+++ b/spacy/pipeline/_parser_internals/arc_eager.pyx
@@ -1,22 +1,27 @@
# cython: profile=True, cdivision=True, infer_types=True
-from cymem.cymem cimport Pool, Address
+from cymem.cymem cimport Address, Pool
from libc.stdint cimport int32_t
from libcpp.vector cimport vector
-from collections import defaultdict, Counter
+from collections import Counter, defaultdict
-from ...typedefs cimport hash_t, attr_t
from ...strings cimport hash_string
from ...structs cimport TokenC
from ...tokens.doc cimport Doc, set_children_from_heads
from ...tokens.token cimport MISSING_DEP
+from ...typedefs cimport attr_t, hash_t
+
from ...training import split_bilu_label
+
from ...training.example cimport Example
+from ._state cimport ArcC, StateC
from .stateclass cimport StateClass
-from ._state cimport StateC, ArcC
+
from ...errors import Errors
+
from thinc.extra.search cimport Beam
+
cdef weight_t MIN_SCORE = -90000
cdef attr_t SUBTOK_LABEL = hash_string('subtok')
diff --git a/spacy/pipeline/_parser_internals/ner.pyx b/spacy/pipeline/_parser_internals/ner.pyx
index fab872f00..e1edb4464 100644
--- a/spacy/pipeline/_parser_internals/ner.pyx
+++ b/spacy/pipeline/_parser_internals/ner.pyx
@@ -1,22 +1,28 @@
import os
import random
-from libc.stdint cimport int32_t
+
from cymem.cymem cimport Pool
+from libc.stdint cimport int32_t
from collections import Counter
+
from thinc.extra.search cimport Beam
from ...tokens.doc cimport Doc
+
from ...tokens.span import Span
-from ...tokens.span cimport Span
-from ...typedefs cimport weight_t, attr_t
-from ...lexeme cimport Lexeme
+
from ...attrs cimport IS_SPACE
-from ...structs cimport TokenC, SpanC
+from ...lexeme cimport Lexeme
+from ...structs cimport SpanC, TokenC
+from ...tokens.span cimport Span
+from ...typedefs cimport attr_t, weight_t
+
from ...training import split_bilu_label
+
from ...training.example cimport Example
-from .stateclass cimport StateClass
from ._state cimport StateC
+from .stateclass cimport StateClass
from .transition_system cimport Transition, do_func_t
from ...errors import Errors
diff --git a/spacy/pipeline/_parser_internals/nonproj.pxd b/spacy/pipeline/_parser_internals/nonproj.pxd
index aabdf7ebe..1a349d56a 100644
--- a/spacy/pipeline/_parser_internals/nonproj.pxd
+++ b/spacy/pipeline/_parser_internals/nonproj.pxd
@@ -1,4 +1,5 @@
from libcpp.string cimport string
+
cdef extern from "nonproj.hh":
cdef void raise_domain_error(const string& msg) nogil except +
diff --git a/spacy/pipeline/_parser_internals/nonproj.pyx b/spacy/pipeline/_parser_internals/nonproj.pyx
index d1b6e7066..66f423b3b 100644
--- a/spacy/pipeline/_parser_internals/nonproj.pyx
+++ b/spacy/pipeline/_parser_internals/nonproj.pyx
@@ -4,19 +4,20 @@ for doing pseudo-projective parsing implementation uses the HEAD decoration
scheme.
"""
from copy import copy
-from cython.operator cimport preincrement as incr, dereference as deref
+
+from cython.operator cimport dereference as deref
+from cython.operator cimport preincrement as incr
from libc.limits cimport INT_MAX
from libc.stdlib cimport abs
from libcpp cimport bool
from libcpp.string cimport string, to_string
-from libcpp.vector cimport vector
from libcpp.unordered_set cimport unordered_set
+from libcpp.vector cimport vector
from ...tokens.doc cimport Doc, set_children_from_heads
from ...errors import Errors
-
DELIMITER = '||'
diff --git a/spacy/pipeline/_parser_internals/stateclass.pxd b/spacy/pipeline/_parser_internals/stateclass.pxd
index 54ff344b9..b8ecc1bbf 100644
--- a/spacy/pipeline/_parser_internals/stateclass.pxd
+++ b/spacy/pipeline/_parser_internals/stateclass.pxd
@@ -1,9 +1,8 @@
from cymem.cymem cimport Pool
-from ...structs cimport TokenC, SpanC
-from ...typedefs cimport attr_t
+from ...structs cimport SpanC, TokenC
from ...tokens.doc cimport Doc
-
+from ...typedefs cimport attr_t
from ._state cimport StateC
diff --git a/spacy/pipeline/_parser_internals/stateclass.pyx b/spacy/pipeline/_parser_internals/stateclass.pyx
index 4eaddd997..0a2657af1 100644
--- a/spacy/pipeline/_parser_internals/stateclass.pyx
+++ b/spacy/pipeline/_parser_internals/stateclass.pyx
@@ -1,9 +1,10 @@
# cython: infer_types=True
import numpy
+
from libcpp.vector cimport vector
-from ._state cimport ArcC
from ...tokens.doc cimport Doc
+from ._state cimport ArcC
cdef class StateClass:
diff --git a/spacy/pipeline/_parser_internals/transition_system.pxd b/spacy/pipeline/_parser_internals/transition_system.pxd
index 52ebd2b8e..ce17480d4 100644
--- a/spacy/pipeline/_parser_internals/transition_system.pxd
+++ b/spacy/pipeline/_parser_internals/transition_system.pxd
@@ -1,11 +1,11 @@
from cymem.cymem cimport Pool
-from ...typedefs cimport attr_t, weight_t
-from ...structs cimport TokenC
from ...strings cimport StringStore
+from ...structs cimport TokenC
from ...training.example cimport Example
-from .stateclass cimport StateClass
+from ...typedefs cimport attr_t, weight_t
from ._state cimport StateC
+from .stateclass cimport StateClass
cdef struct Transition:
diff --git a/spacy/pipeline/_parser_internals/transition_system.pyx b/spacy/pipeline/_parser_internals/transition_system.pyx
index 18eb745a9..053c87f22 100644
--- a/spacy/pipeline/_parser_internals/transition_system.pyx
+++ b/spacy/pipeline/_parser_internals/transition_system.pyx
@@ -1,18 +1,20 @@
# cython: infer_types=True
from __future__ import print_function
+
from cymem.cymem cimport Pool
from collections import Counter
+
import srsly
-from . cimport _beam_utils
-from ...typedefs cimport weight_t, attr_t
-from ...tokens.doc cimport Doc
from ...structs cimport TokenC
+from ...tokens.doc cimport Doc
+from ...typedefs cimport attr_t, weight_t
+from . cimport _beam_utils
from .stateclass cimport StateClass
-from ...errors import Errors
from ... import util
+from ...errors import Errors
cdef weight_t MIN_SCORE = -90000
diff --git a/spacy/pipeline/attributeruler.py b/spacy/pipeline/attributeruler.py
index 0d9494865..8ac74d92b 100644
--- a/spacy/pipeline/attributeruler.py
+++ b/spacy/pipeline/attributeruler.py
@@ -1,21 +1,20 @@
-from typing import List, Dict, Union, Iterable, Any, Optional, Callable
-from typing import Tuple
-import srsly
from pathlib import Path
+from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Union
-from .pipe import Pipe
+import srsly
+
+from .. import util
from ..errors import Errors
-from ..training import Example
from ..language import Language
from ..matcher import Matcher
from ..scorer import Scorer
from ..symbols import IDS
from ..tokens import Doc, Span
from ..tokens._retokenize import normalize_token_attrs, set_token_attrs
-from ..vocab import Vocab
+from ..training import Example
from ..util import SimpleFrozenList, registry
-from .. import util
-
+from ..vocab import Vocab
+from .pipe import Pipe
MatcherPatternType = List[Dict[Union[int, str], Any]]
AttributeRulerPatternType = Dict[str, Union[MatcherPatternType, Dict, int]]
diff --git a/spacy/pipeline/dep_parser.pyx b/spacy/pipeline/dep_parser.pyx
index e5f686158..cb896c385 100644
--- a/spacy/pipeline/dep_parser.pyx
+++ b/spacy/pipeline/dep_parser.pyx
@@ -1,20 +1,21 @@
# cython: infer_types=True, profile=True, binding=True
from collections import defaultdict
-from typing import Optional, Iterable, Callable
-from thinc.api import Model, Config
+from typing import Callable, Iterable, Optional
+
+from thinc.api import Config, Model
from ._parser_internals.transition_system import TransitionSystem
-from .transition_parser cimport Parser
-from ._parser_internals.arc_eager cimport ArcEager
-from .functions import merge_subtokens
+from ._parser_internals.arc_eager cimport ArcEager
+from .transition_parser cimport Parser
+
from ..language import Language
-from ._parser_internals import nonproj
-from ._parser_internals.nonproj import DELIMITER
from ..scorer import Scorer
from ..training import remove_bilu_prefix
from ..util import registry
-
+from ._parser_internals import nonproj
+from ._parser_internals.nonproj import DELIMITER
+from .functions import merge_subtokens
default_model_config = """
[model]
diff --git a/spacy/pipeline/edit_tree_lemmatizer.py b/spacy/pipeline/edit_tree_lemmatizer.py
index 332badd8c..4a6174bc3 100644
--- a/spacy/pipeline/edit_tree_lemmatizer.py
+++ b/spacy/pipeline/edit_tree_lemmatizer.py
@@ -1,24 +1,22 @@
-from typing import cast, Any, Callable, Dict, Iterable, List, Optional
-from typing import Tuple
from collections import Counter
from itertools import islice
-import numpy as np
+from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, cast
+import numpy as np
import srsly
-from thinc.api import Config, Model, SequenceCategoricalCrossentropy, NumpyOps
+from thinc.api import Config, Model, NumpyOps, SequenceCategoricalCrossentropy
from thinc.types import Floats2d, Ints2d
-from ._edit_tree_internals.edit_trees import EditTrees
-from ._edit_tree_internals.schemas import validate_edit_tree
-from .lemmatizer import lemmatizer_score
-from .trainable_pipe import TrainablePipe
+from .. import util
from ..errors import Errors
from ..language import Language
from ..tokens import Doc
from ..training import Example, validate_examples, validate_get_examples
from ..vocab import Vocab
-from .. import util
-
+from ._edit_tree_internals.edit_trees import EditTrees
+from ._edit_tree_internals.schemas import validate_edit_tree
+from .lemmatizer import lemmatizer_score
+from .trainable_pipe import TrainablePipe
# The cutoff value of *top_k* above which an alternative method is used to process guesses.
TOP_K_GUARDRAIL = 20
diff --git a/spacy/pipeline/entity_linker.py b/spacy/pipeline/entity_linker.py
index 76ccc3247..a730ece1b 100644
--- a/spacy/pipeline/entity_linker.py
+++ b/spacy/pipeline/entity_linker.py
@@ -1,25 +1,25 @@
-from typing import Optional, Iterable, Callable, Dict, Union, List, Any
-from thinc.types import Floats2d
-from pathlib import Path
-from itertools import islice
-import srsly
import random
-from thinc.api import CosineDistance, Model, Optimizer, Config
-from thinc.api import set_dropout_rate
+from itertools import islice
+from pathlib import Path
+from typing import Any, Callable, Dict, Iterable, List, Optional, Union
+
+import srsly
+from thinc.api import Config, CosineDistance, Model, Optimizer, set_dropout_rate
+from thinc.types import Floats2d
-from ..kb import KnowledgeBase, Candidate
-from ..ml import empty_kb
-from ..tokens import Doc, Span
-from .pipe import deserialize_config
-from .legacy.entity_linker import EntityLinker_v1
-from .trainable_pipe import TrainablePipe
-from ..language import Language
-from ..vocab import Vocab
-from ..training import Example, validate_examples, validate_get_examples
-from ..errors import Errors
-from ..util import SimpleFrozenList, registry
from .. import util
+from ..errors import Errors
+from ..kb import Candidate, KnowledgeBase
+from ..language import Language
+from ..ml import empty_kb
from ..scorer import Scorer
+from ..tokens import Doc, Span
+from ..training import Example, validate_examples, validate_get_examples
+from ..util import SimpleFrozenList, registry
+from ..vocab import Vocab
+from .legacy.entity_linker import EntityLinker_v1
+from .pipe import deserialize_config
+from .trainable_pipe import TrainablePipe
# See #9050
BACKWARD_OVERWRITE = True
diff --git a/spacy/pipeline/entityruler.py b/spacy/pipeline/entityruler.py
index 6a3755533..3683cfc02 100644
--- a/spacy/pipeline/entityruler.py
+++ b/spacy/pipeline/entityruler.py
@@ -1,19 +1,19 @@
-from typing import Optional, Union, List, Dict, Tuple, Iterable, Any, Callable, Sequence
import warnings
from collections import defaultdict
from pathlib import Path
+from typing import Any, Callable, Dict, Iterable, List, Optional, Sequence, Tuple, Union
+
import srsly
-from .pipe import Pipe
-from ..training import Example
-from ..language import Language
from ..errors import Errors, Warnings
-from ..util import ensure_path, to_disk, from_disk, SimpleFrozenList, registry
-from ..tokens import Doc, Span
+from ..language import Language
from ..matcher import Matcher, PhraseMatcher
from ..matcher.levenshtein import levenshtein_compare
from ..scorer import get_ner_prf
-
+from ..tokens import Doc, Span
+from ..training import Example
+from ..util import SimpleFrozenList, ensure_path, from_disk, registry, to_disk
+from .pipe import Pipe
DEFAULT_ENT_ID_SEP = "||"
PatternType = Dict[str, Union[str, List[Dict[str, Any]]]]
diff --git a/spacy/pipeline/functions.py b/spacy/pipeline/functions.py
index c005395bf..2bf0437d5 100644
--- a/spacy/pipeline/functions.py
+++ b/spacy/pipeline/functions.py
@@ -1,12 +1,13 @@
-from typing import Dict, Any
-import srsly
import warnings
+from typing import Any, Dict
+import srsly
+
+from .. import util
from ..errors import Warnings
from ..language import Language
from ..matcher import Matcher
from ..tokens import Doc
-from .. import util
@Language.component(
diff --git a/spacy/pipeline/legacy/entity_linker.py b/spacy/pipeline/legacy/entity_linker.py
index c14dfa1db..1e46db019 100644
--- a/spacy/pipeline/legacy/entity_linker.py
+++ b/spacy/pipeline/legacy/entity_linker.py
@@ -1,28 +1,28 @@
# This file is present to provide a prior version of the EntityLinker component
# for backwards compatability. For details see #9669.
-from typing import Optional, Iterable, Callable, Dict, Union, List, Any
-from thinc.types import Floats2d
-from pathlib import Path
-from itertools import islice
-import srsly
import random
-from thinc.api import CosineDistance, Model, Optimizer
-from thinc.api import set_dropout_rate
import warnings
+from itertools import islice
+from pathlib import Path
+from typing import Any, Callable, Dict, Iterable, List, Optional, Union
-from ...kb import KnowledgeBase, Candidate
+import srsly
+from thinc.api import CosineDistance, Model, Optimizer, set_dropout_rate
+from thinc.types import Floats2d
+
+from ... import util
+from ...errors import Errors, Warnings
+from ...kb import Candidate, KnowledgeBase
+from ...language import Language
from ...ml import empty_kb
+from ...scorer import Scorer
from ...tokens import Doc, Span
+from ...training import Example, validate_examples, validate_get_examples
+from ...util import SimpleFrozenList
+from ...vocab import Vocab
from ..pipe import deserialize_config
from ..trainable_pipe import TrainablePipe
-from ...language import Language
-from ...vocab import Vocab
-from ...training import Example, validate_examples, validate_get_examples
-from ...errors import Errors, Warnings
-from ...util import SimpleFrozenList
-from ... import util
-from ...scorer import Scorer
# See #9050
BACKWARD_OVERWRITE = True
diff --git a/spacy/pipeline/lemmatizer.py b/spacy/pipeline/lemmatizer.py
index 9c2fc2f09..09e501595 100644
--- a/spacy/pipeline/lemmatizer.py
+++ b/spacy/pipeline/lemmatizer.py
@@ -1,19 +1,19 @@
-from typing import Optional, List, Dict, Any, Callable, Iterable, Union, Tuple
-from thinc.api import Model
-from pathlib import Path
-
import warnings
+from pathlib import Path
+from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Union
-from .pipe import Pipe
+from thinc.api import Model
+
+from .. import util
from ..errors import Errors, Warnings
from ..language import Language
-from ..training import Example
from ..lookups import Lookups, load_lookups
from ..scorer import Scorer
from ..tokens import Doc, Token
+from ..training import Example
+from ..util import SimpleFrozenList, logger, registry
from ..vocab import Vocab
-from ..util import logger, SimpleFrozenList, registry
-from .. import util
+from .pipe import Pipe
@Language.factory(
diff --git a/spacy/pipeline/morphologizer.pyx b/spacy/pipeline/morphologizer.pyx
index be8f82212..4ca0ce165 100644
--- a/spacy/pipeline/morphologizer.pyx
+++ b/spacy/pipeline/morphologizer.pyx
@@ -1,23 +1,24 @@
# cython: infer_types=True, profile=True, binding=True
-from typing import Optional, Union, Dict, Callable
-import srsly
-from thinc.api import SequenceCategoricalCrossentropy, Model, Config
from itertools import islice
+from typing import Callable, Dict, Optional, Union
+import srsly
+from thinc.api import Config, Model, SequenceCategoricalCrossentropy
+
+from ..morphology cimport Morphology
from ..tokens.doc cimport Doc
from ..vocab cimport Vocab
-from ..morphology cimport Morphology
-from ..parts_of_speech import IDS as POS_IDS
-from ..symbols import POS
-from ..language import Language
-from ..errors import Errors
-from .pipe import deserialize_config
-from .tagger import Tagger
from .. import util
+from ..errors import Errors
+from ..language import Language
+from ..parts_of_speech import IDS as POS_IDS
from ..scorer import Scorer
+from ..symbols import POS
from ..training import validate_examples, validate_get_examples
from ..util import registry
+from .pipe import deserialize_config
+from .tagger import Tagger
# See #9050
BACKWARD_OVERWRITE = True
diff --git a/spacy/pipeline/multitask.pyx b/spacy/pipeline/multitask.pyx
index 8c44061e2..6b62c0811 100644
--- a/spacy/pipeline/multitask.pyx
+++ b/spacy/pipeline/multitask.pyx
@@ -1,19 +1,18 @@
# cython: infer_types=True, profile=True, binding=True
from typing import Optional
+
import numpy
-from thinc.api import CosineDistance, to_categorical, Model, Config
-from thinc.api import set_dropout_rate
+from thinc.api import Config, CosineDistance, Model, set_dropout_rate, to_categorical
from ..tokens.doc cimport Doc
-from .trainable_pipe import TrainablePipe
-from .tagger import Tagger
-from ..training import validate_examples
-from ..language import Language
-from ._parser_internals import nonproj
-from ..attrs import POS, ID
+from ..attrs import ID, POS
from ..errors import Errors
-
+from ..language import Language
+from ..training import validate_examples
+from ._parser_internals import nonproj
+from .tagger import Tagger
+from .trainable_pipe import TrainablePipe
default_model_config = """
[model]
diff --git a/spacy/pipeline/ner.pyx b/spacy/pipeline/ner.pyx
index 25f48c9f8..8dd6c3c43 100644
--- a/spacy/pipeline/ner.pyx
+++ b/spacy/pipeline/ner.pyx
@@ -1,16 +1,18 @@
# cython: infer_types=True, profile=True, binding=True
from collections import defaultdict
-from typing import Optional, Iterable, Callable
-from thinc.api import Model, Config
+from typing import Callable, Iterable, Optional
+
+from thinc.api import Config, Model
from ._parser_internals.transition_system import TransitionSystem
-from .transition_parser cimport Parser
-from ._parser_internals.ner cimport BiluoPushDown
-from ..language import Language
-from ..scorer import get_ner_prf, PRFScore
-from ..util import registry
-from ..training import remove_bilu_prefix
+from ._parser_internals.ner cimport BiluoPushDown
+from .transition_parser cimport Parser
+
+from ..language import Language
+from ..scorer import PRFScore, get_ner_prf
+from ..training import remove_bilu_prefix
+from ..util import registry
default_model_config = """
[model]
diff --git a/spacy/pipeline/pipe.pyi b/spacy/pipeline/pipe.pyi
index 9dd6a9d50..9a1c11cef 100644
--- a/spacy/pipeline/pipe.pyi
+++ b/spacy/pipeline/pipe.pyi
@@ -1,11 +1,20 @@
from pathlib import Path
-from typing import Any, Callable, Dict, Iterable, Iterator, List
-from typing import NoReturn, Optional, Tuple, Union
+from typing import (
+ Any,
+ Callable,
+ Dict,
+ Iterable,
+ Iterator,
+ List,
+ NoReturn,
+ Optional,
+ Tuple,
+ Union,
+)
-from ..tokens.doc import Doc
-
-from ..training import Example
from ..language import Language
+from ..tokens.doc import Doc
+from ..training import Example
class Pipe:
def __call__(self, doc: Doc) -> Doc: ...
diff --git a/spacy/pipeline/pipe.pyx b/spacy/pipeline/pipe.pyx
index 8407acc45..42f518882 100644
--- a/spacy/pipeline/pipe.pyx
+++ b/spacy/pipeline/pipe.pyx
@@ -1,15 +1,17 @@
# cython: infer_types=True, profile=True, binding=True
-from typing import Optional, Tuple, Iterable, Iterator, Callable, Union, Dict
-import srsly
import warnings
+from typing import Callable, Dict, Iterable, Iterator, Optional, Tuple, Union
+
+import srsly
from ..tokens.doc cimport Doc
-from ..training import Example
from ..errors import Errors, Warnings
from ..language import Language
+from ..training import Example
from ..util import raise_error
+
cdef class Pipe:
"""This class is a base class and not instantiated directly. It provides
an interface for pipeline components to implement.
diff --git a/spacy/pipeline/sentencizer.pyx b/spacy/pipeline/sentencizer.pyx
index 77f4e8adb..2fe7e1540 100644
--- a/spacy/pipeline/sentencizer.pyx
+++ b/spacy/pipeline/sentencizer.pyx
@@ -1,14 +1,15 @@
# cython: infer_types=True, profile=True, binding=True
-from typing import Optional, List, Callable
+from typing import Callable, List, Optional
+
import srsly
from ..tokens.doc cimport Doc
-from .pipe import Pipe
-from .senter import senter_score
+from .. import util
from ..language import Language
from ..scorer import Scorer
-from .. import util
+from .pipe import Pipe
+from .senter import senter_score
# see #9050
BACKWARD_OVERWRITE = False
diff --git a/spacy/pipeline/senter.pyx b/spacy/pipeline/senter.pyx
index 6808fe70e..26f98ba59 100644
--- a/spacy/pipeline/senter.pyx
+++ b/spacy/pipeline/senter.pyx
@@ -1,19 +1,19 @@
# cython: infer_types=True, profile=True, binding=True
-from typing import Optional, Callable
from itertools import islice
+from typing import Callable, Optional
import srsly
-from thinc.api import Model, SequenceCategoricalCrossentropy, Config
+from thinc.api import Config, Model, SequenceCategoricalCrossentropy
from ..tokens.doc cimport Doc
-from .tagger import Tagger
-from ..language import Language
+from .. import util
from ..errors import Errors
+from ..language import Language
from ..scorer import Scorer
from ..training import validate_examples, validate_get_examples
from ..util import registry
-from .. import util
+from .tagger import Tagger
# See #9050
BACKWARD_OVERWRITE = False
diff --git a/spacy/pipeline/span_finder.py b/spacy/pipeline/span_finder.py
index da3c38430..91be2f2ae 100644
--- a/spacy/pipeline/span_finder.py
+++ b/spacy/pipeline/span_finder.py
@@ -3,15 +3,14 @@ from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple
from thinc.api import Config, Model, Optimizer, set_dropout_rate
from thinc.types import Floats2d
+from ..errors import Errors
from ..language import Language
-from .trainable_pipe import TrainablePipe
from ..scorer import Scorer
from ..tokens import Doc, Span
from ..training import Example
-from ..errors import Errors
-
from ..util import registry
from .spancat import DEFAULT_SPANS_KEY
+from .trainable_pipe import TrainablePipe
span_finder_default_config = """
[model]
diff --git a/spacy/pipeline/span_ruler.py b/spacy/pipeline/span_ruler.py
index b0669c0ef..2a5e2179a 100644
--- a/spacy/pipeline/span_ruler.py
+++ b/spacy/pipeline/span_ruler.py
@@ -1,20 +1,32 @@
-from typing import Optional, Union, List, Dict, Tuple, Iterable, Any, Callable
-from typing import Sequence, Set, cast
import warnings
from functools import partial
from pathlib import Path
+from typing import (
+ Any,
+ Callable,
+ Dict,
+ Iterable,
+ List,
+ Optional,
+ Sequence,
+ Set,
+ Tuple,
+ Union,
+ cast,
+)
+
import srsly
-from .pipe import Pipe
-from ..training import Example
-from ..language import Language
+from .. import util
from ..errors import Errors, Warnings
-from ..util import ensure_path, SimpleFrozenList, registry
-from ..tokens import Doc, Span
-from ..scorer import Scorer
+from ..language import Language
from ..matcher import Matcher, PhraseMatcher
from ..matcher.levenshtein import levenshtein_compare
-from .. import util
+from ..scorer import Scorer
+from ..tokens import Doc, Span
+from ..training import Example
+from ..util import SimpleFrozenList, ensure_path, registry
+from .pipe import Pipe
PatternType = Dict[str, Union[str, List[Dict[str, Any]]]]
DEFAULT_SPANS_KEY = "ruler"
diff --git a/spacy/pipeline/tagger.pyx b/spacy/pipeline/tagger.pyx
index 4d5d78035..47aae2bb7 100644
--- a/spacy/pipeline/tagger.pyx
+++ b/spacy/pipeline/tagger.pyx
@@ -1,26 +1,27 @@
# cython: infer_types=True, profile=True, binding=True
-from typing import Callable, Optional
-import numpy
-import srsly
-from thinc.api import Model, set_dropout_rate, SequenceCategoricalCrossentropy, Config
-from thinc.types import Floats2d
import warnings
from itertools import islice
+from typing import Callable, Optional
+
+import numpy
+import srsly
+from thinc.api import Config, Model, SequenceCategoricalCrossentropy, set_dropout_rate
+from thinc.types import Floats2d
-from ..tokens.doc cimport Doc
from ..morphology cimport Morphology
+from ..tokens.doc cimport Doc
from ..vocab cimport Vocab
-from .trainable_pipe import TrainablePipe
-from .pipe import deserialize_config
-from ..language import Language
-from ..attrs import POS, ID
-from ..parts_of_speech import X
+from .. import util
+from ..attrs import ID, POS
from ..errors import Errors, Warnings
+from ..language import Language
+from ..parts_of_speech import X
from ..scorer import Scorer
from ..training import validate_examples, validate_get_examples
from ..util import registry
-from .. import util
+from .pipe import deserialize_config
+from .trainable_pipe import TrainablePipe
# See #9050
BACKWARD_OVERWRITE = False
diff --git a/spacy/pipeline/textcat.py b/spacy/pipeline/textcat.py
index 650a01949..610ed99b6 100644
--- a/spacy/pipeline/textcat.py
+++ b/spacy/pipeline/textcat.py
@@ -1,18 +1,18 @@
-from typing import Iterable, Tuple, Optional, Dict, List, Callable, Any
-from thinc.api import get_array_module, Model, Optimizer, set_dropout_rate, Config
-from thinc.types import Floats2d
-import numpy
from itertools import islice
+from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple
+
+import numpy
+from thinc.api import Config, Model, Optimizer, get_array_module, set_dropout_rate
+from thinc.types import Floats2d
-from .trainable_pipe import TrainablePipe
-from ..language import Language
-from ..training import Example, validate_examples, validate_get_examples
from ..errors import Errors
+from ..language import Language
from ..scorer import Scorer
from ..tokens import Doc
+from ..training import Example, validate_examples, validate_get_examples
from ..util import registry
from ..vocab import Vocab
-
+from .trainable_pipe import TrainablePipe
single_label_default_config = """
[model]
diff --git a/spacy/pipeline/textcat_multilabel.py b/spacy/pipeline/textcat_multilabel.py
index 41c0e2f63..364e6f436 100644
--- a/spacy/pipeline/textcat_multilabel.py
+++ b/spacy/pipeline/textcat_multilabel.py
@@ -1,19 +1,18 @@
-from typing import Iterable, Optional, Dict, List, Callable, Any
-from thinc.types import Floats2d
-from thinc.api import Model, Config
-
from itertools import islice
+from typing import Any, Callable, Dict, Iterable, List, Optional
+
+from thinc.api import Config, Model
+from thinc.types import Floats2d
-from ..language import Language
-from ..training import Example, validate_get_examples
from ..errors import Errors
+from ..language import Language
from ..scorer import Scorer
from ..tokens import Doc
+from ..training import Example, validate_get_examples
from ..util import registry
from ..vocab import Vocab
from .textcat import TextCategorizer
-
multi_label_default_config = """
[model]
@architectures = "spacy.TextCatEnsemble.v2"
diff --git a/spacy/pipeline/tok2vec.py b/spacy/pipeline/tok2vec.py
index c742aaeaa..677f5eec1 100644
--- a/spacy/pipeline/tok2vec.py
+++ b/spacy/pipeline/tok2vec.py
@@ -1,13 +1,14 @@
-from typing import Sequence, Iterable, Optional, Dict, Callable, List, Any
-from thinc.api import Model, set_dropout_rate, Optimizer, Config
from itertools import islice
+from typing import Any, Callable, Dict, Iterable, List, Optional, Sequence
+
+from thinc.api import Config, Model, Optimizer, set_dropout_rate
-from .trainable_pipe import TrainablePipe
-from ..training import Example, validate_examples, validate_get_examples
-from ..tokens import Doc
-from ..vocab import Vocab
-from ..language import Language
from ..errors import Errors
+from ..language import Language
+from ..tokens import Doc
+from ..training import Example, validate_examples, validate_get_examples
+from ..vocab import Vocab
+from .trainable_pipe import TrainablePipe
default_model_config = """
[model]
diff --git a/spacy/pipeline/trainable_pipe.pxd b/spacy/pipeline/trainable_pipe.pxd
index 65daa8b22..b1d2550a1 100644
--- a/spacy/pipeline/trainable_pipe.pxd
+++ b/spacy/pipeline/trainable_pipe.pxd
@@ -1,5 +1,6 @@
-from .pipe cimport Pipe
from ..vocab cimport Vocab
+from .pipe cimport Pipe
+
cdef class TrainablePipe(Pipe):
cdef public Vocab vocab
diff --git a/spacy/pipeline/trainable_pipe.pyx b/spacy/pipeline/trainable_pipe.pyx
index 3f0507d4b..7aa91ac16 100644
--- a/spacy/pipeline/trainable_pipe.pyx
+++ b/spacy/pipeline/trainable_pipe.pyx
@@ -1,17 +1,17 @@
# cython: infer_types=True, profile=True, binding=True
-from typing import Iterable, Iterator, Optional, Dict, Tuple, Callable
+from typing import Callable, Dict, Iterable, Iterator, Optional, Tuple
+
import srsly
-from thinc.api import set_dropout_rate, Model, Optimizer
+from thinc.api import Model, Optimizer, set_dropout_rate
from ..tokens.doc cimport Doc
-from ..training import validate_examples
-from ..errors import Errors
-from .pipe import Pipe, deserialize_config
from .. import util
-from ..vocab import Vocab
+from ..errors import Errors
from ..language import Language
-from ..training import Example
+from ..training import Example, validate_examples
+from ..vocab import Vocab
+from .pipe import Pipe, deserialize_config
cdef class TrainablePipe(Pipe):
diff --git a/spacy/pipeline/transition_parser.pxd b/spacy/pipeline/transition_parser.pxd
index 1521fde60..e5e88d521 100644
--- a/spacy/pipeline/transition_parser.pxd
+++ b/spacy/pipeline/transition_parser.pxd
@@ -1,11 +1,11 @@
from cymem.cymem cimport Pool
from thinc.backends.cblas cimport CBlas
+from ..ml.parser_model cimport ActivationsC, SizesC, WeightsC
from ..vocab cimport Vocab
-from .trainable_pipe cimport TrainablePipe
-from ._parser_internals.transition_system cimport Transition, TransitionSystem
from ._parser_internals._state cimport StateC
-from ..ml.parser_model cimport WeightsC, ActivationsC, SizesC
+from ._parser_internals.transition_system cimport Transition, TransitionSystem
+from .trainable_pipe cimport TrainablePipe
cdef class Parser(TrainablePipe):
diff --git a/spacy/pipeline/transition_parser.pyx b/spacy/pipeline/transition_parser.pyx
index 1327db2ce..ef4d9b362 100644
--- a/spacy/pipeline/transition_parser.pyx
+++ b/spacy/pipeline/transition_parser.pyx
@@ -1,34 +1,50 @@
# cython: infer_types=True, cdivision=True, boundscheck=False, binding=True
from __future__ import print_function
-from cymem.cymem cimport Pool
+
cimport numpy as np
+from cymem.cymem cimport Pool
+
from itertools import islice
-from libcpp.vector cimport vector
-from libc.string cimport memset, memcpy
+
from libc.stdlib cimport calloc, free
+from libc.string cimport memcpy, memset
+from libcpp.vector cimport vector
+
import random
import srsly
-from thinc.api import get_ops, set_dropout_rate, CupyOps, NumpyOps
+from thinc.api import CupyOps, NumpyOps, get_ops, set_dropout_rate
+
from thinc.extra.search cimport Beam
-import numpy.random
-import numpy
+
import warnings
-from ._parser_internals.stateclass cimport StateClass
-from ..ml.parser_model cimport alloc_activations, free_activations
-from ..ml.parser_model cimport predict_states, arg_max_if_valid
-from ..ml.parser_model cimport WeightsC, ActivationsC, SizesC, cpu_log_loss
-from ..ml.parser_model cimport get_c_weights, get_c_sizes
+import numpy
+import numpy.random
+
+from ..ml.parser_model cimport (
+ ActivationsC,
+ SizesC,
+ WeightsC,
+ alloc_activations,
+ arg_max_if_valid,
+ cpu_log_loss,
+ free_activations,
+ get_c_sizes,
+ get_c_weights,
+ predict_states,
+)
from ..tokens.doc cimport Doc
+from ._parser_internals.stateclass cimport StateClass
+
from .trainable_pipe import TrainablePipe
+
from ._parser_internals cimport _beam_utils
-from ._parser_internals import _beam_utils
-from ..training import validate_examples, validate_get_examples
-from ..errors import Errors, Warnings
from .. import util
-
+from ..errors import Errors, Warnings
+from ..training import validate_examples, validate_get_examples
+from ._parser_internals import _beam_utils
NUMPY_OPS = NumpyOps()
diff --git a/spacy/schemas.py b/spacy/schemas.py
index 140592dcd..22f45372c 100644
--- a/spacy/schemas.py
+++ b/spacy/schemas.py
@@ -1,17 +1,39 @@
-from typing import Dict, List, Union, Optional, Any, Callable, Type, Tuple
-from typing import Iterable, TypeVar, TYPE_CHECKING
-from .compat import Literal
-from enum import Enum
-from pydantic import BaseModel, Field, ValidationError, validator, create_model
-from pydantic import StrictStr, StrictInt, StrictFloat, StrictBool, ConstrainedStr
-from pydantic.main import ModelMetaclass
-from thinc.api import Optimizer, ConfigValidationError, Model
-from thinc.config import Promise
-from collections import defaultdict
import inspect
import re
+from collections import defaultdict
+from enum import Enum
+from typing import (
+ TYPE_CHECKING,
+ Any,
+ Callable,
+ Dict,
+ Iterable,
+ List,
+ Optional,
+ Tuple,
+ Type,
+ TypeVar,
+ Union,
+)
+
+from pydantic import (
+ BaseModel,
+ ConstrainedStr,
+ Field,
+ StrictBool,
+ StrictFloat,
+ StrictInt,
+ StrictStr,
+ ValidationError,
+ create_model,
+ validator,
+)
+from pydantic.main import ModelMetaclass
+from thinc.api import ConfigValidationError, Model, Optimizer
+from thinc.config import Promise
from .attrs import NAMES
+from .compat import Literal
from .lookups import Lookups
from .util import is_cython_func
diff --git a/spacy/scorer.py b/spacy/scorer.py
index 86cd00a50..48d9f03ab 100644
--- a/spacy/scorer.py
+++ b/spacy/scorer.py
@@ -1,13 +1,23 @@
-from typing import Optional, Iterable, Dict, Set, List, Any, Callable, Tuple
-from typing import TYPE_CHECKING
-import numpy as np
from collections import defaultdict
+from typing import (
+ TYPE_CHECKING,
+ Any,
+ Callable,
+ Dict,
+ Iterable,
+ List,
+ Optional,
+ Set,
+ Tuple,
+)
+
+import numpy as np
-from .training import Example
-from .tokens import Token, Doc, Span
from .errors import Errors
-from .util import get_lang_class, SimpleFrozenList
from .morphology import Morphology
+from .tokens import Doc, Span, Token
+from .training import Example
+from .util import SimpleFrozenList, get_lang_class
if TYPE_CHECKING:
# This lets us add type hints for mypy etc. without causing circular imports
diff --git a/spacy/strings.pxd b/spacy/strings.pxd
index 5f03a9a28..d22f48ba1 100644
--- a/spacy/strings.pxd
+++ b/spacy/strings.pxd
@@ -1,9 +1,9 @@
-from libc.stdint cimport int64_t
-from libcpp.vector cimport vector
-from libcpp.set cimport set
from cymem.cymem cimport Pool
-from preshed.maps cimport PreshMap
+from libc.stdint cimport int64_t
+from libcpp.set cimport set
+from libcpp.vector cimport vector
from murmurhash.mrmr cimport hash64
+from preshed.maps cimport PreshMap
from .typedefs cimport attr_t, hash_t
diff --git a/spacy/strings.pyi b/spacy/strings.pyi
index b29389b9a..f8fe8381c 100644
--- a/spacy/strings.pyi
+++ b/spacy/strings.pyi
@@ -1,5 +1,5 @@
-from typing import Optional, Iterable, Iterator, Union, Any, overload
from pathlib import Path
+from typing import Any, Iterable, Iterator, Optional, Union, overload
def get_string_id(key: Union[str, int]) -> int: ...
diff --git a/spacy/strings.pyx b/spacy/strings.pyx
index c5f218342..16c3e2b5b 100644
--- a/spacy/strings.pyx
+++ b/spacy/strings.pyx
@@ -1,18 +1,19 @@
# cython: infer_types=True
cimport cython
+from libc.stdint cimport uint32_t
from libc.string cimport memcpy
from libcpp.set cimport set
-from libc.stdint cimport uint32_t
-from murmurhash.mrmr cimport hash64, hash32
+from murmurhash.mrmr cimport hash32, hash64
import srsly
from .typedefs cimport hash_t
+from . import util
+from .errors import Errors
from .symbols import IDS as SYMBOLS_BY_STR
from .symbols import NAMES as SYMBOLS_BY_INT
-from .errors import Errors
-from . import util
+
# Not particularly elegant, but this is faster than `isinstance(key, numbers.Integral)`
cdef inline bint _try_coerce_to_hash(object key, hash_t* out_hash):
diff --git a/spacy/structs.pxd b/spacy/structs.pxd
index 86d5b67ed..9efb068fd 100644
--- a/spacy/structs.pxd
+++ b/spacy/structs.pxd
@@ -1,11 +1,10 @@
-from libc.stdint cimport uint8_t, uint32_t, int32_t, uint64_t
-from libcpp.vector cimport vector
-from libcpp.unordered_set cimport unordered_set
+from libc.stdint cimport int32_t, int64_t, uint8_t, uint32_t, uint64_t
from libcpp.unordered_map cimport unordered_map
-from libc.stdint cimport int32_t, int64_t
+from libcpp.unordered_set cimport unordered_set
+from libcpp.vector cimport vector
-from .typedefs cimport flags_t, attr_t, hash_t
from .parts_of_speech cimport univ_pos_t
+from .typedefs cimport attr_t, flags_t, hash_t
cdef struct LexemeC:
diff --git a/spacy/tests/conftest.py b/spacy/tests/conftest.py
index 00b8f5f1c..4ca741dfc 100644
--- a/spacy/tests/conftest.py
+++ b/spacy/tests/conftest.py
@@ -1,7 +1,8 @@
import pytest
-from spacy.util import get_lang_class
from hypothesis import settings
+from spacy.util import get_lang_class
+
# Functionally disable deadline settings for tests
# to prevent spurious test failures in CI builds.
settings.register_profile("no_deadlines", deadline=2 * 60 * 1000) # in ms
diff --git a/spacy/tests/doc/test_add_entities.py b/spacy/tests/doc/test_add_entities.py
index 231b7c2a8..259b21fb3 100644
--- a/spacy/tests/doc/test_add_entities.py
+++ b/spacy/tests/doc/test_add_entities.py
@@ -1,10 +1,11 @@
-from spacy.pipeline.ner import DEFAULT_NER_MODEL
-from spacy.training import Example
-from spacy.pipeline import EntityRecognizer
-from spacy.tokens import Span, Doc
-from spacy import registry
import pytest
+from spacy import registry
+from spacy.pipeline import EntityRecognizer
+from spacy.pipeline.ner import DEFAULT_NER_MODEL
+from spacy.tokens import Doc, Span
+from spacy.training import Example
+
def _ner_example(ner):
doc = Doc(
diff --git a/spacy/tests/doc/test_array.py b/spacy/tests/doc/test_array.py
index 1f2d7d999..757655f55 100644
--- a/spacy/tests/doc/test_array.py
+++ b/spacy/tests/doc/test_array.py
@@ -1,8 +1,8 @@
import numpy
import pytest
+from spacy.attrs import DEP, MORPH, ORTH, POS, SHAPE
from spacy.tokens import Doc
-from spacy.attrs import ORTH, SHAPE, POS, DEP, MORPH
@pytest.mark.issue(2203)
diff --git a/spacy/tests/doc/test_creation.py b/spacy/tests/doc/test_creation.py
index 302a9b6ea..4bc1de3e0 100644
--- a/spacy/tests/doc/test_creation.py
+++ b/spacy/tests/doc/test_creation.py
@@ -1,7 +1,8 @@
import pytest
-from spacy.vocab import Vocab
-from spacy.tokens import Doc
+
from spacy import util
+from spacy.tokens import Doc
+from spacy.vocab import Vocab
@pytest.fixture
diff --git a/spacy/tests/doc/test_doc_api.py b/spacy/tests/doc/test_doc_api.py
index 38003dea9..73544c51a 100644
--- a/spacy/tests/doc/test_doc_api.py
+++ b/spacy/tests/doc/test_doc_api.py
@@ -1,13 +1,22 @@
+import warnings
import weakref
import numpy
-from numpy.testing import assert_array_equal
import pytest
-import warnings
+from numpy.testing import assert_array_equal
from thinc.api import NumpyOps, get_current_ops
-from spacy.attrs import DEP, ENT_IOB, ENT_TYPE, HEAD, IS_ALPHA, MORPH, POS
-from spacy.attrs import SENT_START, TAG
+from spacy.attrs import (
+ DEP,
+ ENT_IOB,
+ ENT_TYPE,
+ HEAD,
+ IS_ALPHA,
+ MORPH,
+ POS,
+ SENT_START,
+ TAG,
+)
from spacy.lang.en import English
from spacy.lang.xx import MultiLanguage
from spacy.language import Language
diff --git a/spacy/tests/doc/test_graph.py b/spacy/tests/doc/test_graph.py
index e464b0058..d14a5b057 100644
--- a/spacy/tests/doc/test_graph.py
+++ b/spacy/tests/doc/test_graph.py
@@ -1,6 +1,6 @@
-from spacy.vocab import Vocab
from spacy.tokens.doc import Doc
from spacy.tokens.graph import Graph
+from spacy.vocab import Vocab
def test_graph_init():
diff --git a/spacy/tests/doc/test_json_doc_conversion.py b/spacy/tests/doc/test_json_doc_conversion.py
index 11a1817e6..a76472d07 100644
--- a/spacy/tests/doc/test_json_doc_conversion.py
+++ b/spacy/tests/doc/test_json_doc_conversion.py
@@ -1,8 +1,10 @@
import pytest
+import srsly
+
import spacy
from spacy import schemas
from spacy.tokens import Doc, Span, Token
-import srsly
+
from .test_underscore import clean_underscore # noqa: F401
diff --git a/spacy/tests/doc/test_pickle_doc.py b/spacy/tests/doc/test_pickle_doc.py
index 28cb66714..2e28162d4 100644
--- a/spacy/tests/doc/test_pickle_doc.py
+++ b/spacy/tests/doc/test_pickle_doc.py
@@ -1,5 +1,5 @@
-from spacy.language import Language
from spacy.compat import pickle
+from spacy.language import Language
def test_pickle_single_doc():
diff --git a/spacy/tests/doc/test_retokenize_merge.py b/spacy/tests/doc/test_retokenize_merge.py
index 20c302da1..45d54346e 100644
--- a/spacy/tests/doc/test_retokenize_merge.py
+++ b/spacy/tests/doc/test_retokenize_merge.py
@@ -1,7 +1,8 @@
import pytest
+
from spacy.attrs import LEMMA
-from spacy.vocab import Vocab
from spacy.tokens import Doc, Token
+from spacy.vocab import Vocab
def test_doc_retokenize_merge(en_tokenizer):
diff --git a/spacy/tests/doc/test_retokenize_split.py b/spacy/tests/doc/test_retokenize_split.py
index ec4deb033..61ef599be 100644
--- a/spacy/tests/doc/test_retokenize_split.py
+++ b/spacy/tests/doc/test_retokenize_split.py
@@ -1,8 +1,8 @@
import numpy
import pytest
-from spacy.vocab import Vocab
from spacy.tokens import Doc, Token
+from spacy.vocab import Vocab
@pytest.mark.issue(3540)
diff --git a/spacy/tests/doc/test_span.py b/spacy/tests/doc/test_span.py
index a5c512dc0..04dde2bfa 100644
--- a/spacy/tests/doc/test_span.py
+++ b/spacy/tests/doc/test_span.py
@@ -1,13 +1,13 @@
-import pytest
import numpy
+import pytest
from numpy.testing import assert_array_equal
+from thinc.api import get_current_ops
-from spacy.attrs import ORTH, LENGTH
+from spacy.attrs import LENGTH, ORTH
from spacy.lang.en import English
from spacy.tokens import Doc, Span, Token
-from spacy.vocab import Vocab
from spacy.util import filter_spans
-from thinc.api import get_current_ops
+from spacy.vocab import Vocab
from ..util import add_vecs_to_vocab
from .test_underscore import clean_underscore # noqa: F401
diff --git a/spacy/tests/doc/test_span_group.py b/spacy/tests/doc/test_span_group.py
index cea2c42ee..ef78172bf 100644
--- a/spacy/tests/doc/test_span_group.py
+++ b/spacy/tests/doc/test_span_group.py
@@ -1,9 +1,10 @@
+from random import Random
from typing import List
import pytest
-from random import Random
+
from spacy.matcher import Matcher
-from spacy.tokens import Span, SpanGroup, Doc
+from spacy.tokens import Doc, Span, SpanGroup
from spacy.util import filter_spans
diff --git a/spacy/tests/doc/test_token_api.py b/spacy/tests/doc/test_token_api.py
index e715c5e85..782dfd774 100644
--- a/spacy/tests/doc/test_token_api.py
+++ b/spacy/tests/doc/test_token_api.py
@@ -1,10 +1,11 @@
-import pytest
import numpy
-from spacy.attrs import IS_ALPHA, IS_DIGIT, IS_LOWER, IS_PUNCT, IS_TITLE, IS_STOP
+import pytest
+
+from spacy.attrs import IS_ALPHA, IS_DIGIT, IS_LOWER, IS_PUNCT, IS_STOP, IS_TITLE
from spacy.symbols import VERB
-from spacy.vocab import Vocab
from spacy.tokens import Doc
from spacy.training import Example
+from spacy.vocab import Vocab
@pytest.fixture
diff --git a/spacy/tests/doc/test_underscore.py b/spacy/tests/doc/test_underscore.py
index b934221af..b79d2f01f 100644
--- a/spacy/tests/doc/test_underscore.py
+++ b/spacy/tests/doc/test_underscore.py
@@ -1,5 +1,6 @@
import pytest
from mock import Mock
+
from spacy.tokens import Doc, Span, Token
from spacy.tokens.underscore import Underscore
diff --git a/spacy/tests/lang/bn/test_tokenizer.py b/spacy/tests/lang/bn/test_tokenizer.py
index 5b18c5269..e9a4d5e54 100644
--- a/spacy/tests/lang/bn/test_tokenizer.py
+++ b/spacy/tests/lang/bn/test_tokenizer.py
@@ -1,6 +1,5 @@
import pytest
-
# fmt: off
TESTCASES = [
# Punctuation tests
diff --git a/spacy/tests/lang/da/test_noun_chunks.py b/spacy/tests/lang/da/test_noun_chunks.py
index 30df92c0b..b4d389e4b 100644
--- a/spacy/tests/lang/da/test_noun_chunks.py
+++ b/spacy/tests/lang/da/test_noun_chunks.py
@@ -1,4 +1,5 @@
import pytest
+
from spacy.tokens import Doc
diff --git a/spacy/tests/lang/da/test_text.py b/spacy/tests/lang/da/test_text.py
index 3c6cca5ac..e1f3b96e2 100644
--- a/spacy/tests/lang/da/test_text.py
+++ b/spacy/tests/lang/da/test_text.py
@@ -1,4 +1,5 @@
import pytest
+
from spacy.lang.da.lex_attrs import like_num
diff --git a/spacy/tests/lang/en/test_customized_tokenizer.py b/spacy/tests/lang/en/test_customized_tokenizer.py
index f5302cb31..8251306a6 100644
--- a/spacy/tests/lang/en/test_customized_tokenizer.py
+++ b/spacy/tests/lang/en/test_customized_tokenizer.py
@@ -1,9 +1,10 @@
-import pytest
import re
+
+import pytest
+
from spacy.lang.en import English
from spacy.tokenizer import Tokenizer
-from spacy.util import compile_prefix_regex, compile_suffix_regex
-from spacy.util import compile_infix_regex
+from spacy.util import compile_infix_regex, compile_prefix_regex, compile_suffix_regex
@pytest.fixture
diff --git a/spacy/tests/lang/en/test_noun_chunks.py b/spacy/tests/lang/en/test_noun_chunks.py
index 0c54ffbb4..bda203b2c 100644
--- a/spacy/tests/lang/en/test_noun_chunks.py
+++ b/spacy/tests/lang/en/test_noun_chunks.py
@@ -1,6 +1,7 @@
-from spacy.tokens import Doc
import pytest
+from spacy.tokens import Doc
+
@pytest.fixture
def doc(en_vocab):
diff --git a/spacy/tests/lang/en/test_punct.py b/spacy/tests/lang/en/test_punct.py
index 1d10478a1..79d03d2db 100644
--- a/spacy/tests/lang/en/test_punct.py
+++ b/spacy/tests/lang/en/test_punct.py
@@ -1,7 +1,7 @@
import pytest
-from spacy.util import compile_prefix_regex
-from spacy.lang.punctuation import TOKENIZER_PREFIXES
+from spacy.lang.punctuation import TOKENIZER_PREFIXES
+from spacy.util import compile_prefix_regex
PUNCT_OPEN = ["(", "[", "{", "*"]
PUNCT_CLOSE = [")", "]", "}", "*"]
diff --git a/spacy/tests/lang/en/test_sbd.py b/spacy/tests/lang/en/test_sbd.py
index d30c72750..c07c23193 100644
--- a/spacy/tests/lang/en/test_sbd.py
+++ b/spacy/tests/lang/en/test_sbd.py
@@ -1,4 +1,5 @@
import pytest
+
from spacy.tokens import Doc
from ...util import apply_transition_sequence
diff --git a/spacy/tests/lang/en/test_text.py b/spacy/tests/lang/en/test_text.py
index 358f4c0f9..53cf0cc5b 100644
--- a/spacy/tests/lang/en/test_text.py
+++ b/spacy/tests/lang/en/test_text.py
@@ -1,4 +1,5 @@
import pytest
+
from spacy.lang.en.lex_attrs import like_num
diff --git a/spacy/tests/lang/es/test_noun_chunks.py b/spacy/tests/lang/es/test_noun_chunks.py
index 6118a0458..8e5fe8354 100644
--- a/spacy/tests/lang/es/test_noun_chunks.py
+++ b/spacy/tests/lang/es/test_noun_chunks.py
@@ -1,6 +1,7 @@
-from spacy.tokens import Doc
import pytest
+from spacy.tokens import Doc
+
# fmt: off
@pytest.mark.parametrize(
diff --git a/spacy/tests/lang/es/test_text.py b/spacy/tests/lang/es/test_text.py
index d95f6d26b..1d1f7fa6b 100644
--- a/spacy/tests/lang/es/test_text.py
+++ b/spacy/tests/lang/es/test_text.py
@@ -1,6 +1,7 @@
import pytest
-from spacy.lang.es.lex_attrs import like_num
+
from spacy.lang.es import Spanish
+from spacy.lang.es.lex_attrs import like_num
@pytest.mark.issue(3803)
diff --git a/spacy/tests/lang/fi/test_noun_chunks.py b/spacy/tests/lang/fi/test_noun_chunks.py
index cab84b311..37e1b00a0 100644
--- a/spacy/tests/lang/fi/test_noun_chunks.py
+++ b/spacy/tests/lang/fi/test_noun_chunks.py
@@ -1,6 +1,6 @@
import pytest
-from spacy.tokens import Doc
+from spacy.tokens import Doc
FI_NP_TEST_EXAMPLES = [
(
diff --git a/spacy/tests/lang/fi/test_tokenizer.py b/spacy/tests/lang/fi/test_tokenizer.py
index dc40e18a3..2d9f081a7 100644
--- a/spacy/tests/lang/fi/test_tokenizer.py
+++ b/spacy/tests/lang/fi/test_tokenizer.py
@@ -1,6 +1,5 @@
import pytest
-
ABBREVIATION_TESTS = [
(
"Hyvää uutta vuotta t. siht. Niemelä!",
diff --git a/spacy/tests/lang/fr/test_noun_chunks.py b/spacy/tests/lang/fr/test_noun_chunks.py
index 25b95f566..436e07b29 100644
--- a/spacy/tests/lang/fr/test_noun_chunks.py
+++ b/spacy/tests/lang/fr/test_noun_chunks.py
@@ -1,6 +1,7 @@
-from spacy.tokens import Doc
import pytest
+from spacy.tokens import Doc
+
# fmt: off
@pytest.mark.parametrize(
diff --git a/spacy/tests/lang/fr/test_prefix_suffix_infix.py b/spacy/tests/lang/fr/test_prefix_suffix_infix.py
index 272531b63..b81ccbc0e 100644
--- a/spacy/tests/lang/fr/test_prefix_suffix_infix.py
+++ b/spacy/tests/lang/fr/test_prefix_suffix_infix.py
@@ -1,7 +1,8 @@
import pytest
-from spacy.language import Language, BaseDefaults
-from spacy.lang.punctuation import TOKENIZER_INFIXES
+
from spacy.lang.char_classes import ALPHA
+from spacy.lang.punctuation import TOKENIZER_INFIXES
+from spacy.language import BaseDefaults, Language
@pytest.mark.issue(768)
diff --git a/spacy/tests/lang/fr/test_text.py b/spacy/tests/lang/fr/test_text.py
index 01231f593..2c58a1c4a 100644
--- a/spacy/tests/lang/fr/test_text.py
+++ b/spacy/tests/lang/fr/test_text.py
@@ -1,4 +1,5 @@
import pytest
+
from spacy.lang.fr.lex_attrs import like_num
diff --git a/spacy/tests/lang/ga/test_tokenizer.py b/spacy/tests/lang/ga/test_tokenizer.py
index 78127ef7c..0c16b27d2 100644
--- a/spacy/tests/lang/ga/test_tokenizer.py
+++ b/spacy/tests/lang/ga/test_tokenizer.py
@@ -1,6 +1,5 @@
import pytest
-
# fmt: off
GA_TOKEN_EXCEPTION_TESTS = [
("Niall Ó Domhnaill, Rialtas na hÉireann 1977 (lch. 600).", ["Niall", "Ó", "Domhnaill", ",", "Rialtas", "na", "hÉireann", "1977", "(", "lch.", "600", ")", "."]),
diff --git a/spacy/tests/lang/grc/test_tokenizer.py b/spacy/tests/lang/grc/test_tokenizer.py
index 3df5b546b..9f29b9024 100644
--- a/spacy/tests/lang/grc/test_tokenizer.py
+++ b/spacy/tests/lang/grc/test_tokenizer.py
@@ -1,6 +1,5 @@
import pytest
-
# fmt: off
GRC_TOKEN_EXCEPTION_TESTS = [
("τὸ 〈τῆς〉 φιλοσοφίας ἔργον ἔνιοί φασιν ἀπὸ ⟦βαρβάρων⟧ ἄρξαι.", ["τὸ", "〈", "τῆς", "〉", "φιλοσοφίας", "ἔργον", "ἔνιοί", "φασιν", "ἀπὸ", "⟦", "βαρβάρων", "⟧", "ἄρξαι", "."]),
diff --git a/spacy/tests/lang/he/test_tokenizer.py b/spacy/tests/lang/he/test_tokenizer.py
index 3716f7e3b..15d059328 100644
--- a/spacy/tests/lang/he/test_tokenizer.py
+++ b/spacy/tests/lang/he/test_tokenizer.py
@@ -1,4 +1,5 @@
import pytest
+
from spacy.lang.he.lex_attrs import like_num
diff --git a/spacy/tests/lang/hi/test_lex_attrs.py b/spacy/tests/lang/hi/test_lex_attrs.py
index 80a7cc1c4..2d8d4a53e 100644
--- a/spacy/tests/lang/hi/test_lex_attrs.py
+++ b/spacy/tests/lang/hi/test_lex_attrs.py
@@ -1,5 +1,6 @@
import pytest
-from spacy.lang.hi.lex_attrs import norm, like_num
+
+from spacy.lang.hi.lex_attrs import like_num, norm
def test_hi_tokenizer_handles_long_text(hi_tokenizer):
diff --git a/spacy/tests/lang/hi/test_text.py b/spacy/tests/lang/hi/test_text.py
index 791cc3822..837dc3099 100644
--- a/spacy/tests/lang/hi/test_text.py
+++ b/spacy/tests/lang/hi/test_text.py
@@ -1,4 +1,5 @@
import pytest
+
from spacy.lang.hi import Hindi
diff --git a/spacy/tests/lang/hu/test_tokenizer.py b/spacy/tests/lang/hu/test_tokenizer.py
index 0488474ae..fa689c8f3 100644
--- a/spacy/tests/lang/hu/test_tokenizer.py
+++ b/spacy/tests/lang/hu/test_tokenizer.py
@@ -1,6 +1,5 @@
import pytest
-
DEFAULT_TESTS = [
("N. kormányzósági\nszékhely.", ["N.", "kormányzósági", "székhely", "."]),
pytest.param(
diff --git a/spacy/tests/lang/hy/test_text.py b/spacy/tests/lang/hy/test_text.py
index ac0f1e128..7a69c2a81 100644
--- a/spacy/tests/lang/hy/test_text.py
+++ b/spacy/tests/lang/hy/test_text.py
@@ -1,4 +1,5 @@
import pytest
+
from spacy.lang.hy.lex_attrs import like_num
diff --git a/spacy/tests/lang/hy/test_tokenizer.py b/spacy/tests/lang/hy/test_tokenizer.py
index e9efb224a..9423cb4d0 100644
--- a/spacy/tests/lang/hy/test_tokenizer.py
+++ b/spacy/tests/lang/hy/test_tokenizer.py
@@ -1,6 +1,5 @@
import pytest
-
# TODO add test cases with valid punctuation signs.
hy_tokenize_text_test = [
diff --git a/spacy/tests/lang/id/test_text.py b/spacy/tests/lang/id/test_text.py
index ed6487b68..7397a8c17 100644
--- a/spacy/tests/lang/id/test_text.py
+++ b/spacy/tests/lang/id/test_text.py
@@ -1,4 +1,5 @@
import pytest
+
from spacy.lang.id.lex_attrs import like_num
diff --git a/spacy/tests/lang/it/test_noun_chunks.py b/spacy/tests/lang/it/test_noun_chunks.py
index 0a8c10e79..7f6659ee7 100644
--- a/spacy/tests/lang/it/test_noun_chunks.py
+++ b/spacy/tests/lang/it/test_noun_chunks.py
@@ -1,6 +1,7 @@
-from spacy.tokens import Doc
import pytest
+from spacy.tokens import Doc
+
# fmt: off
@pytest.mark.parametrize(
diff --git a/spacy/tests/lang/ja/test_morphologizer_factory.py b/spacy/tests/lang/ja/test_morphologizer_factory.py
index a4e038d01..d504576d0 100644
--- a/spacy/tests/lang/ja/test_morphologizer_factory.py
+++ b/spacy/tests/lang/ja/test_morphologizer_factory.py
@@ -1,4 +1,5 @@
import pytest
+
from spacy.lang.ja import Japanese
diff --git a/spacy/tests/lang/ja/test_serialize.py b/spacy/tests/lang/ja/test_serialize.py
index 011eb470f..f48b2570e 100644
--- a/spacy/tests/lang/ja/test_serialize.py
+++ b/spacy/tests/lang/ja/test_serialize.py
@@ -1,6 +1,7 @@
import pickle
from spacy.lang.ja import Japanese
+
from ...util import make_tempdir
diff --git a/spacy/tests/lang/ja/test_tokenizer.py b/spacy/tests/lang/ja/test_tokenizer.py
index ef7bed06d..a26347444 100644
--- a/spacy/tests/lang/ja/test_tokenizer.py
+++ b/spacy/tests/lang/ja/test_tokenizer.py
@@ -1,7 +1,8 @@
import pytest
+from spacy.lang.ja import DetailedToken, Japanese
+
from ...tokenizer.test_naughty_strings import NAUGHTY_STRINGS
-from spacy.lang.ja import Japanese, DetailedToken
# fmt: off
TOKENIZER_TESTS = [
diff --git a/spacy/tests/lang/ko/test_serialize.py b/spacy/tests/lang/ko/test_serialize.py
index 75288fcc5..bba7bce6e 100644
--- a/spacy/tests/lang/ko/test_serialize.py
+++ b/spacy/tests/lang/ko/test_serialize.py
@@ -1,6 +1,7 @@
import pickle
from spacy.lang.ko import Korean
+
from ...util import make_tempdir
diff --git a/spacy/tests/lang/ky/test_tokenizer.py b/spacy/tests/lang/ky/test_tokenizer.py
index 5cf6eb1a6..b089dd9b9 100644
--- a/spacy/tests/lang/ky/test_tokenizer.py
+++ b/spacy/tests/lang/ky/test_tokenizer.py
@@ -1,6 +1,5 @@
import pytest
-
INFIX_HYPHEN_TESTS = [
("Бала-чака жакшыбы?", "Бала-чака жакшыбы ?".split()),
("Кыз-келиндер кийими.", "Кыз-келиндер кийими .".split()),
diff --git a/spacy/tests/lang/la/test_noun_chunks.py b/spacy/tests/lang/la/test_noun_chunks.py
index ba8f5658b..70a3392cd 100644
--- a/spacy/tests/lang/la/test_noun_chunks.py
+++ b/spacy/tests/lang/la/test_noun_chunks.py
@@ -1,4 +1,5 @@
import pytest
+
from spacy.tokens import Doc
diff --git a/spacy/tests/lang/la/test_text.py b/spacy/tests/lang/la/test_text.py
index 48e7359a4..74606c4e8 100644
--- a/spacy/tests/lang/la/test_text.py
+++ b/spacy/tests/lang/la/test_text.py
@@ -1,4 +1,5 @@
import pytest
+
from spacy.lang.la.lex_attrs import like_num
diff --git a/spacy/tests/lang/mk/test_text.py b/spacy/tests/lang/mk/test_text.py
index b8881082c..b3a7ff9ee 100644
--- a/spacy/tests/lang/mk/test_text.py
+++ b/spacy/tests/lang/mk/test_text.py
@@ -1,4 +1,5 @@
import pytest
+
from spacy.lang.mk.lex_attrs import like_num
diff --git a/spacy/tests/lang/ms/test_text.py b/spacy/tests/lang/ms/test_text.py
index d6cd169ce..4b0ac3b2b 100644
--- a/spacy/tests/lang/ms/test_text.py
+++ b/spacy/tests/lang/ms/test_text.py
@@ -1,4 +1,5 @@
import pytest
+
from spacy.lang.ms.lex_attrs import like_num
diff --git a/spacy/tests/lang/nb/test_tokenizer.py b/spacy/tests/lang/nb/test_tokenizer.py
index 2da6e8d40..4f5fd89a3 100644
--- a/spacy/tests/lang/nb/test_tokenizer.py
+++ b/spacy/tests/lang/nb/test_tokenizer.py
@@ -1,6 +1,5 @@
import pytest
-
NB_TOKEN_EXCEPTION_TESTS = [
(
"Smørsausen brukes bl.a. til fisk",
diff --git a/spacy/tests/lang/nl/test_noun_chunks.py b/spacy/tests/lang/nl/test_noun_chunks.py
index 8962e3b75..6004ac230 100644
--- a/spacy/tests/lang/nl/test_noun_chunks.py
+++ b/spacy/tests/lang/nl/test_noun_chunks.py
@@ -1,4 +1,5 @@
import pytest
+
from spacy.tokens import Doc
from spacy.util import filter_spans
diff --git a/spacy/tests/lang/nl/test_text.py b/spacy/tests/lang/nl/test_text.py
index 8bc72cc6d..d6413e0d7 100644
--- a/spacy/tests/lang/nl/test_text.py
+++ b/spacy/tests/lang/nl/test_text.py
@@ -1,4 +1,5 @@
import pytest
+
from spacy.lang.nl.lex_attrs import like_num
diff --git a/spacy/tests/lang/pt/test_noun_chunks.py b/spacy/tests/lang/pt/test_noun_chunks.py
index 9a42ce268..eee96d593 100644
--- a/spacy/tests/lang/pt/test_noun_chunks.py
+++ b/spacy/tests/lang/pt/test_noun_chunks.py
@@ -1,6 +1,7 @@
-from spacy.tokens import Doc
import pytest
+from spacy.tokens import Doc
+
# fmt: off
@pytest.mark.parametrize(
diff --git a/spacy/tests/lang/pt/test_text.py b/spacy/tests/lang/pt/test_text.py
index 3a9162b80..cb8723901 100644
--- a/spacy/tests/lang/pt/test_text.py
+++ b/spacy/tests/lang/pt/test_text.py
@@ -1,4 +1,5 @@
import pytest
+
from spacy.lang.pt.lex_attrs import like_num
diff --git a/spacy/tests/lang/ro/test_tokenizer.py b/spacy/tests/lang/ro/test_tokenizer.py
index 64c072470..d2affd607 100644
--- a/spacy/tests/lang/ro/test_tokenizer.py
+++ b/spacy/tests/lang/ro/test_tokenizer.py
@@ -1,6 +1,5 @@
import pytest
-
TEST_CASES = [
(
"Adresa este str. Principală nr. 5.",
diff --git a/spacy/tests/lang/ru/test_lemmatizer.py b/spacy/tests/lang/ru/test_lemmatizer.py
index 9a5a9ad68..66aa7e3a6 100644
--- a/spacy/tests/lang/ru/test_lemmatizer.py
+++ b/spacy/tests/lang/ru/test_lemmatizer.py
@@ -1,6 +1,6 @@
import pytest
-from spacy.tokens import Doc
+from spacy.tokens import Doc
pytestmark = pytest.mark.filterwarnings("ignore::DeprecationWarning")
diff --git a/spacy/tests/lang/ru/test_text.py b/spacy/tests/lang/ru/test_text.py
index b0eaf66bb..0bbed2122 100644
--- a/spacy/tests/lang/ru/test_text.py
+++ b/spacy/tests/lang/ru/test_text.py
@@ -1,4 +1,5 @@
import pytest
+
from spacy.lang.ru.lex_attrs import like_num
diff --git a/spacy/tests/lang/ru/test_tokenizer.py b/spacy/tests/lang/ru/test_tokenizer.py
index 083b55a09..c941e21fc 100644
--- a/spacy/tests/lang/ru/test_tokenizer.py
+++ b/spacy/tests/lang/ru/test_tokenizer.py
@@ -1,6 +1,6 @@
from string import punctuation
-import pytest
+import pytest
PUNCT_OPEN = ["(", "[", "{", "*"]
PUNCT_CLOSE = [")", "]", "}", "*"]
diff --git a/spacy/tests/lang/sr/test_tokenizer.py b/spacy/tests/lang/sr/test_tokenizer.py
index fdcf790d8..7ecd9596b 100644
--- a/spacy/tests/lang/sr/test_tokenizer.py
+++ b/spacy/tests/lang/sr/test_tokenizer.py
@@ -1,6 +1,5 @@
import pytest
-
PUNCT_OPEN = ["(", "[", "{", "*"]
PUNCT_CLOSE = [")", "]", "}", "*"]
PUNCT_PAIRED = [("(", ")"), ("[", "]"), ("{", "}"), ("*", "*")]
diff --git a/spacy/tests/lang/sv/test_lex_attrs.py b/spacy/tests/lang/sv/test_lex_attrs.py
index 656c4706b..a47b17b27 100644
--- a/spacy/tests/lang/sv/test_lex_attrs.py
+++ b/spacy/tests/lang/sv/test_lex_attrs.py
@@ -1,4 +1,5 @@
import pytest
+
from spacy.lang.sv.lex_attrs import like_num
diff --git a/spacy/tests/lang/sv/test_noun_chunks.py b/spacy/tests/lang/sv/test_noun_chunks.py
index d2410156c..599148384 100644
--- a/spacy/tests/lang/sv/test_noun_chunks.py
+++ b/spacy/tests/lang/sv/test_noun_chunks.py
@@ -1,4 +1,5 @@
import pytest
+
from spacy.tokens import Doc
diff --git a/spacy/tests/lang/sv/test_tokenizer.py b/spacy/tests/lang/sv/test_tokenizer.py
index 8871f4414..f19c6b66f 100644
--- a/spacy/tests/lang/sv/test_tokenizer.py
+++ b/spacy/tests/lang/sv/test_tokenizer.py
@@ -1,6 +1,5 @@
import pytest
-
SV_TOKEN_EXCEPTION_TESTS = [
(
"Smörsåsen används bl.a. till fisk",
diff --git a/spacy/tests/lang/ta/test_text.py b/spacy/tests/lang/ta/test_text.py
index 228a14c18..2d15e96fc 100644
--- a/spacy/tests/lang/ta/test_text.py
+++ b/spacy/tests/lang/ta/test_text.py
@@ -1,4 +1,5 @@
import pytest
+
from spacy.lang.ta import Tamil
# Wikipedia excerpt: https://en.wikipedia.org/wiki/Chennai (Tamil Language)
diff --git a/spacy/tests/lang/ta/test_tokenizer.py b/spacy/tests/lang/ta/test_tokenizer.py
index 6ba8a2400..e668b5aca 100644
--- a/spacy/tests/lang/ta/test_tokenizer.py
+++ b/spacy/tests/lang/ta/test_tokenizer.py
@@ -1,6 +1,7 @@
import pytest
-from spacy.symbols import ORTH
+
from spacy.lang.ta import Tamil
+from spacy.symbols import ORTH
TA_BASIC_TOKENIZATION_TESTS = [
(
diff --git a/spacy/tests/lang/test_attrs.py b/spacy/tests/lang/test_attrs.py
index 1c27c1744..fd96e8f9b 100644
--- a/spacy/tests/lang/test_attrs.py
+++ b/spacy/tests/lang/test_attrs.py
@@ -1,10 +1,15 @@
import pytest
-from spacy.attrs import intify_attrs, ENT_IOB
-from spacy.attrs import IS_ALPHA, LEMMA, NORM, ORTH, intify_attrs
+from spacy.attrs import ENT_IOB, IS_ALPHA, LEMMA, NORM, ORTH, intify_attrs
from spacy.lang.en.stop_words import STOP_WORDS
-from spacy.lang.lex_attrs import is_ascii, is_currency, is_punct, is_stop
-from spacy.lang.lex_attrs import like_url, word_shape
+from spacy.lang.lex_attrs import (
+ is_ascii,
+ is_currency,
+ is_punct,
+ is_stop,
+ like_url,
+ word_shape,
+)
@pytest.mark.parametrize("word", ["the"])
diff --git a/spacy/tests/lang/test_initialize.py b/spacy/tests/lang/test_initialize.py
index 36f4a75e0..8a158647a 100644
--- a/spacy/tests/lang/test_initialize.py
+++ b/spacy/tests/lang/test_initialize.py
@@ -1,6 +1,6 @@
import pytest
-from spacy.util import get_lang_class
+from spacy.util import get_lang_class
# fmt: off
# Only include languages with no external dependencies
diff --git a/spacy/tests/lang/test_lemmatizers.py b/spacy/tests/lang/test_lemmatizers.py
index e419f0a14..ddb3336ff 100644
--- a/spacy/tests/lang/test_lemmatizers.py
+++ b/spacy/tests/lang/test_lemmatizers.py
@@ -1,9 +1,9 @@
import pytest
+
from spacy import registry
from spacy.lookups import Lookups
from spacy.util import get_lang_class
-
# fmt: off
# Only include languages with no external dependencies
# excluded: ru, uk
diff --git a/spacy/tests/lang/th/test_serialize.py b/spacy/tests/lang/th/test_serialize.py
index a3de4bf54..57d0f1726 100644
--- a/spacy/tests/lang/th/test_serialize.py
+++ b/spacy/tests/lang/th/test_serialize.py
@@ -1,6 +1,7 @@
import pickle
from spacy.lang.th import Thai
+
from ...util import make_tempdir
diff --git a/spacy/tests/lang/tl/test_punct.py b/spacy/tests/lang/tl/test_punct.py
index d6bcf297d..e2c93bf88 100644
--- a/spacy/tests/lang/tl/test_punct.py
+++ b/spacy/tests/lang/tl/test_punct.py
@@ -1,7 +1,7 @@
import pytest
-from spacy.util import compile_prefix_regex
-from spacy.lang.punctuation import TOKENIZER_PREFIXES
+from spacy.lang.punctuation import TOKENIZER_PREFIXES
+from spacy.util import compile_prefix_regex
PUNCT_OPEN = ["(", "[", "{", "*"]
PUNCT_CLOSE = [")", "]", "}", "*"]
diff --git a/spacy/tests/lang/tl/test_text.py b/spacy/tests/lang/tl/test_text.py
index 17429617c..26635ca90 100644
--- a/spacy/tests/lang/tl/test_text.py
+++ b/spacy/tests/lang/tl/test_text.py
@@ -1,4 +1,5 @@
import pytest
+
from spacy.lang.tl.lex_attrs import like_num
# https://github.com/explosion/spaCy/blob/master/spacy/tests/lang/en/test_text.py
diff --git a/spacy/tests/lang/tr/test_text.py b/spacy/tests/lang/tr/test_text.py
index 323b11bd1..b4d84daae 100644
--- a/spacy/tests/lang/tr/test_text.py
+++ b/spacy/tests/lang/tr/test_text.py
@@ -1,4 +1,5 @@
import pytest
+
from spacy.lang.tr.lex_attrs import like_num
diff --git a/spacy/tests/lang/tr/test_tokenizer.py b/spacy/tests/lang/tr/test_tokenizer.py
index 9f988eae9..b07c98535 100644
--- a/spacy/tests/lang/tr/test_tokenizer.py
+++ b/spacy/tests/lang/tr/test_tokenizer.py
@@ -1,6 +1,5 @@
import pytest
-
ABBREV_TESTS = [
("Dr. Murat Bey ile görüştüm.", ["Dr.", "Murat", "Bey", "ile", "görüştüm", "."]),
("Dr.la görüştüm.", ["Dr.la", "görüştüm", "."]),
diff --git a/spacy/tests/lang/tt/test_tokenizer.py b/spacy/tests/lang/tt/test_tokenizer.py
index 246d2824d..0bb241f27 100644
--- a/spacy/tests/lang/tt/test_tokenizer.py
+++ b/spacy/tests/lang/tt/test_tokenizer.py
@@ -1,6 +1,5 @@
import pytest
-
INFIX_HYPHEN_TESTS = [
("Явым-төшем күләме.", "Явым-төшем күләме .".split()),
("Хатын-кыз киеме.", "Хатын-кыз киеме .".split()),
diff --git a/spacy/tests/lang/uk/test_lemmatizer.py b/spacy/tests/lang/uk/test_lemmatizer.py
index a65bb25e5..060114cdf 100644
--- a/spacy/tests/lang/uk/test_lemmatizer.py
+++ b/spacy/tests/lang/uk/test_lemmatizer.py
@@ -1,6 +1,6 @@
import pytest
-from spacy.tokens import Doc
+from spacy.tokens import Doc
pytestmark = pytest.mark.filterwarnings("ignore::DeprecationWarning")
diff --git a/spacy/tests/lang/uk/test_tokenizer.py b/spacy/tests/lang/uk/test_tokenizer.py
index 6596f490a..7960a30a2 100644
--- a/spacy/tests/lang/uk/test_tokenizer.py
+++ b/spacy/tests/lang/uk/test_tokenizer.py
@@ -1,6 +1,5 @@
import pytest
-
PUNCT_OPEN = ["(", "[", "{", "*"]
PUNCT_CLOSE = [")", "]", "}", "*"]
PUNCT_PAIRED = [("(", ")"), ("[", "]"), ("{", "}"), ("*", "*")]
diff --git a/spacy/tests/lang/vi/test_serialize.py b/spacy/tests/lang/vi/test_serialize.py
index 55dab799c..20bfd20d5 100644
--- a/spacy/tests/lang/vi/test_serialize.py
+++ b/spacy/tests/lang/vi/test_serialize.py
@@ -1,6 +1,7 @@
import pickle
from spacy.lang.vi import Vietnamese
+
from ...util import make_tempdir
diff --git a/spacy/tests/lang/vi/test_tokenizer.py b/spacy/tests/lang/vi/test_tokenizer.py
index 3d0642d1e..ca6dee985 100644
--- a/spacy/tests/lang/vi/test_tokenizer.py
+++ b/spacy/tests/lang/vi/test_tokenizer.py
@@ -1,8 +1,8 @@
import pytest
-from ...tokenizer.test_naughty_strings import NAUGHTY_STRINGS
from spacy.lang.vi import Vietnamese
+from ...tokenizer.test_naughty_strings import NAUGHTY_STRINGS
# fmt: off
TOKENIZER_TESTS = [
diff --git a/spacy/tests/lang/yo/test_text.py b/spacy/tests/lang/yo/test_text.py
index 48b689f3d..a1bbc38da 100644
--- a/spacy/tests/lang/yo/test_text.py
+++ b/spacy/tests/lang/yo/test_text.py
@@ -1,4 +1,5 @@
import pytest
+
from spacy.lang.yo.lex_attrs import like_num
diff --git a/spacy/tests/lang/zh/test_serialize.py b/spacy/tests/lang/zh/test_serialize.py
index 03cdbbe24..4b014d713 100644
--- a/spacy/tests/lang/zh/test_serialize.py
+++ b/spacy/tests/lang/zh/test_serialize.py
@@ -1,5 +1,7 @@
import pytest
+
from spacy.lang.zh import Chinese
+
from ...util import make_tempdir
diff --git a/spacy/tests/lang/zh/test_tokenizer.py b/spacy/tests/lang/zh/test_tokenizer.py
index 741eb0ace..cdba5e397 100644
--- a/spacy/tests/lang/zh/test_tokenizer.py
+++ b/spacy/tests/lang/zh/test_tokenizer.py
@@ -1,7 +1,7 @@
import pytest
-from spacy.lang.zh import Chinese, _get_pkuseg_trie_data
from thinc.api import ConfigValidationError
+from spacy.lang.zh import Chinese, _get_pkuseg_trie_data
# fmt: off
TEXTS = ("作为语言而言,为世界使用人数最多的语言,目前世界有五分之一人口做为母语。",)
diff --git a/spacy/tests/matcher/test_dependency_matcher.py b/spacy/tests/matcher/test_dependency_matcher.py
index 200384320..44b3bb26b 100644
--- a/spacy/tests/matcher/test_dependency_matcher.py
+++ b/spacy/tests/matcher/test_dependency_matcher.py
@@ -1,8 +1,10 @@
-import pytest
+import copy
import pickle
import re
-import copy
+
+import pytest
from mock import Mock
+
from spacy.matcher import DependencyMatcher
from spacy.tokens import Doc, Token
diff --git a/spacy/tests/matcher/test_levenshtein.py b/spacy/tests/matcher/test_levenshtein.py
index 5afb7e1fc..fd85579ae 100644
--- a/spacy/tests/matcher/test_levenshtein.py
+++ b/spacy/tests/matcher/test_levenshtein.py
@@ -1,4 +1,5 @@
import pytest
+
from spacy.matcher import levenshtein
from spacy.matcher.levenshtein import levenshtein_compare
diff --git a/spacy/tests/matcher/test_matcher_api.py b/spacy/tests/matcher/test_matcher_api.py
index 09ab6c7dc..c824ca392 100644
--- a/spacy/tests/matcher/test_matcher_api.py
+++ b/spacy/tests/matcher/test_matcher_api.py
@@ -1,7 +1,8 @@
import pytest
from mock import Mock
+
from spacy.matcher import Matcher
-from spacy.tokens import Doc, Token, Span
+from spacy.tokens import Doc, Span, Token
from ..doc.test_underscore import clean_underscore # noqa: F401
diff --git a/spacy/tests/matcher/test_pattern_validation.py b/spacy/tests/matcher/test_pattern_validation.py
index e7eced02c..21fa36865 100644
--- a/spacy/tests/matcher/test_pattern_validation.py
+++ b/spacy/tests/matcher/test_pattern_validation.py
@@ -1,6 +1,7 @@
import pytest
-from spacy.matcher import Matcher
+
from spacy.errors import MatchPatternError
+from spacy.matcher import Matcher
from spacy.schemas import validate_token_pattern
# (pattern, num errors with validation, num errors identified with minimal
diff --git a/spacy/tests/matcher/test_phrase_matcher.py b/spacy/tests/matcher/test_phrase_matcher.py
index 8a8d9eb84..7335bbdf1 100644
--- a/spacy/tests/matcher/test_phrase_matcher.py
+++ b/spacy/tests/matcher/test_phrase_matcher.py
@@ -1,14 +1,14 @@
-import pytest
import warnings
+
+import pytest
import srsly
from mock import Mock
from spacy.lang.en import English
-from spacy.matcher import PhraseMatcher, Matcher
+from spacy.matcher import Matcher, PhraseMatcher
from spacy.tokens import Doc, Span
from spacy.vocab import Vocab
-
from ..util import make_tempdir
diff --git a/spacy/tests/morphology/test_morph_features.py b/spacy/tests/morphology/test_morph_features.py
index 0693da690..ae20f9ba8 100644
--- a/spacy/tests/morphology/test_morph_features.py
+++ b/spacy/tests/morphology/test_morph_features.py
@@ -1,4 +1,5 @@
import pytest
+
from spacy.morphology import Morphology
from spacy.strings import StringStore, get_string_id
diff --git a/spacy/tests/morphology/test_morph_pickle.py b/spacy/tests/morphology/test_morph_pickle.py
index d9b0e3476..5c1a8a31e 100644
--- a/spacy/tests/morphology/test_morph_pickle.py
+++ b/spacy/tests/morphology/test_morph_pickle.py
@@ -1,5 +1,7 @@
-import pytest
import pickle
+
+import pytest
+
from spacy.morphology import Morphology
from spacy.strings import StringStore
diff --git a/spacy/tests/package/test_requirements.py b/spacy/tests/package/test_requirements.py
index b403f274f..9e83d5fb1 100644
--- a/spacy/tests/package/test_requirements.py
+++ b/spacy/tests/package/test_requirements.py
@@ -13,6 +13,7 @@ def test_build_dependencies():
"hypothesis",
"pre-commit",
"black",
+ "isort",
"mypy",
"types-dataclasses",
"types-mock",
diff --git a/spacy/tests/parser/test_add_label.py b/spacy/tests/parser/test_add_label.py
index f89e993e9..89626597d 100644
--- a/spacy/tests/parser/test_add_label.py
+++ b/spacy/tests/parser/test_add_label.py
@@ -1,14 +1,15 @@
import pytest
from thinc.api import Adam, fix_random_seed
+
from spacy import registry
-from spacy.language import Language
from spacy.attrs import NORM
-from spacy.vocab import Vocab
-from spacy.training import Example
-from spacy.tokens import Doc
+from spacy.language import Language
from spacy.pipeline import DependencyParser, EntityRecognizer
-from spacy.pipeline.ner import DEFAULT_NER_MODEL
from spacy.pipeline.dep_parser import DEFAULT_PARSER_MODEL
+from spacy.pipeline.ner import DEFAULT_NER_MODEL
+from spacy.tokens import Doc
+from spacy.training import Example
+from spacy.vocab import Vocab
@pytest.fixture
diff --git a/spacy/tests/parser/test_arc_eager_oracle.py b/spacy/tests/parser/test_arc_eager_oracle.py
index bb226f9c5..fafd23268 100644
--- a/spacy/tests/parser/test_arc_eager_oracle.py
+++ b/spacy/tests/parser/test_arc_eager_oracle.py
@@ -1,12 +1,13 @@
import pytest
-from spacy.vocab import Vocab
+
from spacy import registry
-from spacy.training import Example
from spacy.pipeline import DependencyParser
-from spacy.tokens import Doc
-from spacy.pipeline._parser_internals.nonproj import projectivize
from spacy.pipeline._parser_internals.arc_eager import ArcEager
+from spacy.pipeline._parser_internals.nonproj import projectivize
from spacy.pipeline.dep_parser import DEFAULT_PARSER_MODEL
+from spacy.tokens import Doc
+from spacy.training import Example
+from spacy.vocab import Vocab
def get_sequence_costs(M, words, heads, deps, transitions):
diff --git a/spacy/tests/parser/test_ner.py b/spacy/tests/parser/test_ner.py
index 7198859b3..1509c31bb 100644
--- a/spacy/tests/parser/test_ner.py
+++ b/spacy/tests/parser/test_ner.py
@@ -1,21 +1,21 @@
+import logging
import random
import pytest
from numpy.testing import assert_equal
+from spacy import registry, util
from spacy.attrs import ENT_IOB
-from spacy import util, registry
from spacy.lang.en import English
from spacy.lang.it import Italian
from spacy.language import Language
from spacy.lookups import Lookups
from spacy.pipeline import EntityRecognizer
-from spacy.pipeline.ner import DEFAULT_NER_MODEL
from spacy.pipeline._parser_internals.ner import BiluoPushDown
-from spacy.training import Example, iob_to_biluo, split_bilu_label
+from spacy.pipeline.ner import DEFAULT_NER_MODEL
from spacy.tokens import Doc, Span
+from spacy.training import Example, iob_to_biluo, split_bilu_label
from spacy.vocab import Vocab
-import logging
from ..util import make_tempdir
diff --git a/spacy/tests/parser/test_neural_parser.py b/spacy/tests/parser/test_neural_parser.py
index 1bb5d4aa5..5bef5758f 100644
--- a/spacy/tests/parser/test_neural_parser.py
+++ b/spacy/tests/parser/test_neural_parser.py
@@ -1,14 +1,14 @@
import pytest
+from thinc.api import Model
from spacy import registry
-from spacy.training import Example
-from spacy.vocab import Vocab
from spacy.pipeline._parser_internals.arc_eager import ArcEager
+from spacy.pipeline.dep_parser import DEFAULT_PARSER_MODEL
+from spacy.pipeline.tok2vec import DEFAULT_TOK2VEC_MODEL
from spacy.pipeline.transition_parser import Parser
from spacy.tokens.doc import Doc
-from thinc.api import Model
-from spacy.pipeline.tok2vec import DEFAULT_TOK2VEC_MODEL
-from spacy.pipeline.dep_parser import DEFAULT_PARSER_MODEL
+from spacy.training import Example
+from spacy.vocab import Vocab
@pytest.fixture
diff --git a/spacy/tests/parser/test_nn_beam.py b/spacy/tests/parser/test_nn_beam.py
index 4ba020ef0..f852e5cda 100644
--- a/spacy/tests/parser/test_nn_beam.py
+++ b/spacy/tests/parser/test_nn_beam.py
@@ -1,16 +1,17 @@
-import pytest
import hypothesis
import hypothesis.strategies
import numpy
-from spacy.vocab import Vocab
-from spacy.language import Language
-from spacy.pipeline._parser_internals.arc_eager import ArcEager
-from spacy.tokens import Doc
-from spacy.pipeline._parser_internals._beam_utils import BeamBatch
-from spacy.pipeline._parser_internals.stateclass import StateClass
-from spacy.training import Example
+import pytest
from thinc.tests.strategies import ndarrays_of_shape
+from spacy.language import Language
+from spacy.pipeline._parser_internals._beam_utils import BeamBatch
+from spacy.pipeline._parser_internals.arc_eager import ArcEager
+from spacy.pipeline._parser_internals.stateclass import StateClass
+from spacy.tokens import Doc
+from spacy.training import Example
+from spacy.vocab import Vocab
+
@pytest.fixture(scope="module")
def vocab():
diff --git a/spacy/tests/parser/test_nonproj.py b/spacy/tests/parser/test_nonproj.py
index 051d0ef0c..f4e09fc91 100644
--- a/spacy/tests/parser/test_nonproj.py
+++ b/spacy/tests/parser/test_nonproj.py
@@ -1,7 +1,12 @@
import pytest
-from spacy.pipeline._parser_internals.nonproj import ancestors, contains_cycle
-from spacy.pipeline._parser_internals.nonproj import is_nonproj_tree, is_nonproj_arc
+
from spacy.pipeline._parser_internals import nonproj
+from spacy.pipeline._parser_internals.nonproj import (
+ ancestors,
+ contains_cycle,
+ is_nonproj_arc,
+ is_nonproj_tree,
+)
from spacy.tokens import Doc
diff --git a/spacy/tests/parser/test_parse.py b/spacy/tests/parser/test_parse.py
index 4b05c6721..3565c62af 100644
--- a/spacy/tests/parser/test_parse.py
+++ b/spacy/tests/parser/test_parse.py
@@ -5,12 +5,12 @@ from thinc.api import Adam
from spacy import registry, util
from spacy.attrs import DEP, NORM
from spacy.lang.en import English
-from spacy.tokens import Doc
-from spacy.training import Example
-from spacy.vocab import Vocab
from spacy.pipeline import DependencyParser
from spacy.pipeline.dep_parser import DEFAULT_PARSER_MODEL
from spacy.pipeline.tok2vec import DEFAULT_TOK2VEC_MODEL
+from spacy.tokens import Doc
+from spacy.training import Example
+from spacy.vocab import Vocab
from ..util import apply_transition_sequence, make_tempdir
diff --git a/spacy/tests/parser/test_parse_navigate.py b/spacy/tests/parser/test_parse_navigate.py
index 50da60594..d2f684fdc 100644
--- a/spacy/tests/parser/test_parse_navigate.py
+++ b/spacy/tests/parser/test_parse_navigate.py
@@ -1,4 +1,5 @@
import pytest
+
from spacy.tokens import Doc
diff --git a/spacy/tests/parser/test_preset_sbd.py b/spacy/tests/parser/test_preset_sbd.py
index d71388900..dcbb9679d 100644
--- a/spacy/tests/parser/test_preset_sbd.py
+++ b/spacy/tests/parser/test_preset_sbd.py
@@ -1,12 +1,13 @@
import pytest
from thinc.api import Adam
-from spacy.attrs import NORM
-from spacy.vocab import Vocab
+
from spacy import registry
-from spacy.training import Example
+from spacy.attrs import NORM
+from spacy.pipeline import DependencyParser
from spacy.pipeline.dep_parser import DEFAULT_PARSER_MODEL
from spacy.tokens import Doc
-from spacy.pipeline import DependencyParser
+from spacy.training import Example
+from spacy.vocab import Vocab
@pytest.fixture
diff --git a/spacy/tests/parser/test_space_attachment.py b/spacy/tests/parser/test_space_attachment.py
index 2b80272d6..30e66b37a 100644
--- a/spacy/tests/parser/test_space_attachment.py
+++ b/spacy/tests/parser/test_space_attachment.py
@@ -1,4 +1,5 @@
import pytest
+
from spacy.tokens import Doc
from ..util import apply_transition_sequence
diff --git a/spacy/tests/parser/test_state.py b/spacy/tests/parser/test_state.py
index ca1755c48..0febc3d09 100644
--- a/spacy/tests/parser/test_state.py
+++ b/spacy/tests/parser/test_state.py
@@ -1,8 +1,8 @@
import pytest
+from spacy.pipeline._parser_internals.stateclass import StateClass
from spacy.tokens.doc import Doc
from spacy.vocab import Vocab
-from spacy.pipeline._parser_internals.stateclass import StateClass
@pytest.fixture
diff --git a/spacy/tests/pipeline/test_analysis.py b/spacy/tests/pipeline/test_analysis.py
index df3d7dff5..503b501ce 100644
--- a/spacy/tests/pipeline/test_analysis.py
+++ b/spacy/tests/pipeline/test_analysis.py
@@ -1,7 +1,8 @@
+import pytest
+from mock import Mock
+
from spacy.language import Language
from spacy.pipe_analysis import get_attr_info, validate_attrs
-from mock import Mock
-import pytest
def test_component_decorator_assigns():
diff --git a/spacy/tests/pipeline/test_annotates_on_update.py b/spacy/tests/pipeline/test_annotates_on_update.py
index 869b8b874..d4feebd30 100644
--- a/spacy/tests/pipeline/test_annotates_on_update.py
+++ b/spacy/tests/pipeline/test_annotates_on_update.py
@@ -1,12 +1,13 @@
from typing import Callable, Iterable, Iterator
-import pytest
+import pytest
from thinc.api import Config
+
+from spacy.lang.en import English
from spacy.language import Language
from spacy.training import Example
from spacy.training.loop import train
-from spacy.lang.en import English
-from spacy.util import registry, load_model_from_config
+from spacy.util import load_model_from_config, registry
@pytest.fixture
diff --git a/spacy/tests/pipeline/test_attributeruler.py b/spacy/tests/pipeline/test_attributeruler.py
index dab3ebf57..06587b4be 100644
--- a/spacy/tests/pipeline/test_attributeruler.py
+++ b/spacy/tests/pipeline/test_attributeruler.py
@@ -1,10 +1,11 @@
-import pytest
import numpy
-from spacy.training import Example
+import pytest
+
+from spacy import registry, util
from spacy.lang.en import English
from spacy.pipeline import AttributeRuler
-from spacy import util, registry
from spacy.tokens import Doc
+from spacy.training import Example
from ..util import make_tempdir
diff --git a/spacy/tests/pipeline/test_edit_tree_lemmatizer.py b/spacy/tests/pipeline/test_edit_tree_lemmatizer.py
index 128d75680..5a8f0aee2 100644
--- a/spacy/tests/pipeline/test_edit_tree_lemmatizer.py
+++ b/spacy/tests/pipeline/test_edit_tree_lemmatizer.py
@@ -1,16 +1,17 @@
import pickle
+
+import hypothesis.strategies as st
import pytest
from hypothesis import given
-import hypothesis.strategies as st
+
from spacy import util
from spacy.lang.en import English
from spacy.language import Language
from spacy.pipeline._edit_tree_internals.edit_trees import EditTrees
-from spacy.training import Example
from spacy.strings import StringStore
+from spacy.training import Example
from spacy.util import make_tempdir
-
TRAIN_DATA = [
("She likes green eggs", {"lemmas": ["she", "like", "green", "egg"]}),
("Eat blue ham", {"lemmas": ["eat", "blue", "ham"]}),
diff --git a/spacy/tests/pipeline/test_entity_linker.py b/spacy/tests/pipeline/test_entity_linker.py
index fc960cb01..00771a0f0 100644
--- a/spacy/tests/pipeline/test_entity_linker.py
+++ b/spacy/tests/pipeline/test_entity_linker.py
@@ -1,12 +1,12 @@
-from typing import Callable, Iterable, Dict, Any, Tuple
+from typing import Any, Callable, Dict, Iterable, Tuple
import pytest
from numpy.testing import assert_equal
-from spacy import registry, util, Language
+from spacy import Language, registry, util
from spacy.attrs import ENT_KB_ID
from spacy.compat import pickle
-from spacy.kb import Candidate, InMemoryLookupKB, get_candidates, KnowledgeBase
+from spacy.kb import Candidate, InMemoryLookupKB, KnowledgeBase, get_candidates
from spacy.lang.en import English
from spacy.ml import load_kb
from spacy.ml.models.entity_linker import build_span_maker
@@ -15,7 +15,7 @@ from spacy.pipeline.legacy import EntityLinker_v1
from spacy.pipeline.tok2vec import DEFAULT_TOK2VEC_MODEL
from spacy.scorer import Scorer
from spacy.tests.util import make_tempdir
-from spacy.tokens import Span, Doc
+from spacy.tokens import Doc, Span
from spacy.training import Example
from spacy.util import ensure_path
from spacy.vocab import Vocab
diff --git a/spacy/tests/pipeline/test_entity_ruler.py b/spacy/tests/pipeline/test_entity_ruler.py
index 417f930cb..d0ab00391 100644
--- a/spacy/tests/pipeline/test_entity_ruler.py
+++ b/spacy/tests/pipeline/test_entity_ruler.py
@@ -1,16 +1,14 @@
import pytest
+from thinc.api import NumpyOps, get_current_ops
from spacy import registry
-from spacy.tokens import Doc, Span
-from spacy.language import Language
-from spacy.lang.en import English
-from spacy.pipeline import EntityRuler, EntityRecognizer, merge_entities
-from spacy.pipeline import SpanRuler
-from spacy.pipeline.ner import DEFAULT_NER_MODEL
from spacy.errors import MatchPatternError
+from spacy.lang.en import English
+from spacy.language import Language
+from spacy.pipeline import EntityRecognizer, EntityRuler, SpanRuler, merge_entities
+from spacy.pipeline.ner import DEFAULT_NER_MODEL
from spacy.tests.util import make_tempdir
-
-from thinc.api import NumpyOps, get_current_ops
+from spacy.tokens import Doc, Span
ENTITY_RULERS = ["entity_ruler", "future_entity_ruler"]
diff --git a/spacy/tests/pipeline/test_functions.py b/spacy/tests/pipeline/test_functions.py
index e4adfe2fe..f4db4ee98 100644
--- a/spacy/tests/pipeline/test_functions.py
+++ b/spacy/tests/pipeline/test_functions.py
@@ -1,7 +1,8 @@
import pytest
-from spacy.pipeline.functions import merge_subtokens
+
from spacy.language import Language
-from spacy.tokens import Span, Doc
+from spacy.pipeline.functions import merge_subtokens
+from spacy.tokens import Doc, Span
from ..doc.test_underscore import clean_underscore # noqa: F401
diff --git a/spacy/tests/pipeline/test_initialize.py b/spacy/tests/pipeline/test_initialize.py
index c9b514770..6dd4114f1 100644
--- a/spacy/tests/pipeline/test_initialize.py
+++ b/spacy/tests/pipeline/test_initialize.py
@@ -1,9 +1,10 @@
import pytest
-from spacy.language import Language
-from spacy.lang.en import English
-from spacy.training import Example
-from thinc.api import ConfigValidationError
from pydantic import StrictBool
+from thinc.api import ConfigValidationError
+
+from spacy.lang.en import English
+from spacy.language import Language
+from spacy.training import Example
def test_initialize_arguments():
diff --git a/spacy/tests/pipeline/test_lemmatizer.py b/spacy/tests/pipeline/test_lemmatizer.py
index 0d2d3d6e5..ccc2e0b15 100644
--- a/spacy/tests/pipeline/test_lemmatizer.py
+++ b/spacy/tests/pipeline/test_lemmatizer.py
@@ -1,6 +1,8 @@
-import pytest
import pickle
-from spacy import util, registry
+
+import pytest
+
+from spacy import registry, util
from spacy.lang.en import English
from spacy.lookups import Lookups
diff --git a/spacy/tests/pipeline/test_models.py b/spacy/tests/pipeline/test_models.py
index e3fd28d0f..fef0017a8 100644
--- a/spacy/tests/pipeline/test_models.py
+++ b/spacy/tests/pipeline/test_models.py
@@ -3,7 +3,6 @@ from typing import List
import numpy
import pytest
from numpy.testing import assert_almost_equal
-from spacy.vocab import Vocab
from thinc.api import Model, data_validation, get_current_ops
from thinc.types import Array2d, Ragged
@@ -11,7 +10,7 @@ from spacy.lang.en import English
from spacy.ml import FeatureExtractor, StaticVectors
from spacy.ml._character_embed import CharacterEmbed
from spacy.tokens import Doc
-
+from spacy.vocab import Vocab
OPS = get_current_ops()
diff --git a/spacy/tests/pipeline/test_morphologizer.py b/spacy/tests/pipeline/test_morphologizer.py
index 74c571ccf..0d895f236 100644
--- a/spacy/tests/pipeline/test_morphologizer.py
+++ b/spacy/tests/pipeline/test_morphologizer.py
@@ -1,16 +1,15 @@
import pytest
-from numpy.testing import assert_equal, assert_almost_equal
-
+from numpy.testing import assert_almost_equal, assert_equal
from thinc.api import get_current_ops
from spacy import util
-from spacy.training import Example
+from spacy.attrs import MORPH
from spacy.lang.en import English
from spacy.language import Language
-from spacy.tests.util import make_tempdir
from spacy.morphology import Morphology
-from spacy.attrs import MORPH
+from spacy.tests.util import make_tempdir
from spacy.tokens import Doc
+from spacy.training import Example
def test_label_types():
diff --git a/spacy/tests/pipeline/test_pipe_factories.py b/spacy/tests/pipeline/test_pipe_factories.py
index 232b0512e..0f1454b55 100644
--- a/spacy/tests/pipeline/test_pipe_factories.py
+++ b/spacy/tests/pipeline/test_pipe_factories.py
@@ -1,14 +1,14 @@
import pytest
+from pydantic import StrictInt, StrictStr
+from thinc.api import ConfigValidationError, Linear, Model
import spacy
-from spacy.language import Language
-from spacy.lang.en import English
from spacy.lang.de import German
+from spacy.lang.en import English
+from spacy.language import Language
from spacy.pipeline.tok2vec import DEFAULT_TOK2VEC_MODEL
from spacy.tokens import Doc
-from spacy.util import registry, SimpleFrozenDict, combine_score_weights
-from thinc.api import Model, Linear, ConfigValidationError
-from pydantic import StrictInt, StrictStr
+from spacy.util import SimpleFrozenDict, combine_score_weights, registry
from ..util import make_tempdir
diff --git a/spacy/tests/pipeline/test_sentencizer.py b/spacy/tests/pipeline/test_sentencizer.py
index 5dd0fef43..9b1ddd530 100644
--- a/spacy/tests/pipeline/test_sentencizer.py
+++ b/spacy/tests/pipeline/test_sentencizer.py
@@ -1,8 +1,9 @@
import pytest
+
import spacy
+from spacy.lang.en import English
from spacy.pipeline import Sentencizer
from spacy.tokens import Doc
-from spacy.lang.en import English
def test_sentencizer(en_vocab):
diff --git a/spacy/tests/pipeline/test_senter.py b/spacy/tests/pipeline/test_senter.py
index 047f59bef..6c7655812 100644
--- a/spacy/tests/pipeline/test_senter.py
+++ b/spacy/tests/pipeline/test_senter.py
@@ -1,12 +1,12 @@
import pytest
from numpy.testing import assert_equal
-from spacy.attrs import SENT_START
from spacy import util
-from spacy.training import Example
+from spacy.attrs import SENT_START
from spacy.lang.en import English
from spacy.language import Language
from spacy.tests.util import make_tempdir
+from spacy.training import Example
def test_label_types():
diff --git a/spacy/tests/pipeline/test_span_finder.py b/spacy/tests/pipeline/test_span_finder.py
index 91b08cabf..1a8789fff 100644
--- a/spacy/tests/pipeline/test_span_finder.py
+++ b/spacy/tests/pipeline/test_span_finder.py
@@ -1,15 +1,13 @@
import pytest
from thinc.api import Config
-from spacy.language import Language
+from spacy import util
from spacy.lang.en import English
+from spacy.language import Language
from spacy.pipeline.span_finder import span_finder_default_config
from spacy.tokens import Doc
from spacy.training import Example
-from spacy import util
-from spacy.util import registry
-from spacy.util import fix_random_seed, make_tempdir
-
+from spacy.util import fix_random_seed, make_tempdir, registry
SPANS_KEY = "pytest"
TRAIN_DATA = [
diff --git a/spacy/tests/pipeline/test_span_ruler.py b/spacy/tests/pipeline/test_span_ruler.py
index 794815359..0a8616f44 100644
--- a/spacy/tests/pipeline/test_span_ruler.py
+++ b/spacy/tests/pipeline/test_span_ruler.py
@@ -1,13 +1,12 @@
import pytest
+from thinc.api import NumpyOps, get_current_ops
import spacy
from spacy import registry
from spacy.errors import MatchPatternError
+from spacy.tests.util import make_tempdir
from spacy.tokens import Span
from spacy.training import Example
-from spacy.tests.util import make_tempdir
-
-from thinc.api import NumpyOps, get_current_ops
@pytest.fixture
diff --git a/spacy/tests/pipeline/test_spancat.py b/spacy/tests/pipeline/test_spancat.py
index b7024cf36..9405a78e0 100644
--- a/spacy/tests/pipeline/test_spancat.py
+++ b/spacy/tests/pipeline/test_spancat.py
@@ -1,7 +1,7 @@
-import pytest
import numpy
-from numpy.testing import assert_array_equal, assert_almost_equal
-from thinc.api import get_current_ops, NumpyOps, Ragged
+import pytest
+from numpy.testing import assert_almost_equal, assert_array_equal
+from thinc.api import NumpyOps, Ragged, get_current_ops
from spacy import util
from spacy.lang.en import English
@@ -9,7 +9,7 @@ from spacy.language import Language
from spacy.tokens import SpanGroup
from spacy.tokens._dict_proxies import SpanGroups
from spacy.training import Example
-from spacy.util import fix_random_seed, registry, make_tempdir
+from spacy.util import fix_random_seed, make_tempdir, registry
OPS = get_current_ops()
diff --git a/spacy/tests/pipeline/test_tagger.py b/spacy/tests/pipeline/test_tagger.py
index 746f32ee3..4b5f1ee99 100644
--- a/spacy/tests/pipeline/test_tagger.py
+++ b/spacy/tests/pipeline/test_tagger.py
@@ -1,12 +1,12 @@
import pytest
-from numpy.testing import assert_equal, assert_almost_equal
-from spacy.attrs import TAG
+from numpy.testing import assert_almost_equal, assert_equal
+from thinc.api import compounding, get_current_ops
from spacy import util
-from spacy.training import Example
+from spacy.attrs import TAG
from spacy.lang.en import English
from spacy.language import Language
-from thinc.api import compounding, get_current_ops
+from spacy.training import Example
from ..util import make_tempdir
diff --git a/spacy/tests/pipeline/test_textcat.py b/spacy/tests/pipeline/test_textcat.py
index d042f3445..9ce5909f1 100644
--- a/spacy/tests/pipeline/test_textcat.py
+++ b/spacy/tests/pipeline/test_textcat.py
@@ -12,12 +12,16 @@ from spacy.cli.evaluate import print_prf_per_type, print_textcats_auc_per_cat
from spacy.lang.en import English
from spacy.language import Language
from spacy.pipeline import TextCategorizer
-from spacy.pipeline.textcat import single_label_bow_config
-from spacy.pipeline.textcat import single_label_cnn_config
-from spacy.pipeline.textcat import single_label_default_config
-from spacy.pipeline.textcat_multilabel import multi_label_bow_config
-from spacy.pipeline.textcat_multilabel import multi_label_cnn_config
-from spacy.pipeline.textcat_multilabel import multi_label_default_config
+from spacy.pipeline.textcat import (
+ single_label_bow_config,
+ single_label_cnn_config,
+ single_label_default_config,
+)
+from spacy.pipeline.textcat_multilabel import (
+ multi_label_bow_config,
+ multi_label_cnn_config,
+ multi_label_default_config,
+)
from spacy.pipeline.tok2vec import DEFAULT_TOK2VEC_MODEL
from spacy.scorer import Scorer
from spacy.tokens import Doc, DocBin
diff --git a/spacy/tests/pipeline/test_tok2vec.py b/spacy/tests/pipeline/test_tok2vec.py
index e423d9a19..76c7d6f62 100644
--- a/spacy/tests/pipeline/test_tok2vec.py
+++ b/spacy/tests/pipeline/test_tok2vec.py
@@ -1,17 +1,21 @@
import pytest
-from spacy.ml.models.tok2vec import build_Tok2Vec_model
-from spacy.ml.models.tok2vec import MultiHashEmbed, MaxoutWindowEncoder
-from spacy.pipeline.tok2vec import Tok2Vec, Tok2VecListener
-from spacy.vocab import Vocab
-from spacy.tokens import Doc
-from spacy.training import Example
+from numpy.testing import assert_array_equal
+from thinc.api import Config, get_current_ops
+
from spacy import util
from spacy.lang.en import English
+from spacy.ml.models.tok2vec import (
+ MaxoutWindowEncoder,
+ MultiHashEmbed,
+ build_Tok2Vec_model,
+)
+from spacy.pipeline.tok2vec import Tok2Vec, Tok2VecListener
+from spacy.tokens import Doc
+from spacy.training import Example
from spacy.util import registry
-from thinc.api import Config, get_current_ops
-from numpy.testing import assert_array_equal
+from spacy.vocab import Vocab
-from ..util import get_batch, make_tempdir, add_vecs_to_vocab
+from ..util import add_vecs_to_vocab, get_batch, make_tempdir
def test_empty_doc():
diff --git a/spacy/tests/serialize/test_resource_warning.py b/spacy/tests/serialize/test_resource_warning.py
index befd05635..ab6e6e9ee 100644
--- a/spacy/tests/serialize/test_resource_warning.py
+++ b/spacy/tests/serialize/test_resource_warning.py
@@ -1,12 +1,14 @@
import warnings
from unittest import TestCase
+
import pytest
import srsly
from numpy import zeros
+
from spacy.kb.kb_in_memory import InMemoryLookupKB, Writer
-from spacy.vectors import Vectors
from spacy.language import Language
from spacy.pipeline import TrainablePipe
+from spacy.vectors import Vectors
from spacy.vocab import Vocab
from ..util import make_tempdir
diff --git a/spacy/tests/serialize/test_serialize_config.py b/spacy/tests/serialize/test_serialize_config.py
index 85e6f8b2c..3e158ad8b 100644
--- a/spacy/tests/serialize/test_serialize_config.py
+++ b/spacy/tests/serialize/test_serialize_config.py
@@ -5,13 +5,20 @@ from thinc.api import Config, ConfigValidationError
import spacy
from spacy.lang.de import German
from spacy.lang.en import English
-from spacy.language import DEFAULT_CONFIG, DEFAULT_CONFIG_PRETRAIN_PATH
-from spacy.language import Language
-from spacy.ml.models import MaxoutWindowEncoder, MultiHashEmbed
-from spacy.ml.models import build_tb_parser_model, build_Tok2Vec_model
+from spacy.language import DEFAULT_CONFIG, DEFAULT_CONFIG_PRETRAIN_PATH, Language
+from spacy.ml.models import (
+ MaxoutWindowEncoder,
+ MultiHashEmbed,
+ build_tb_parser_model,
+ build_Tok2Vec_model,
+)
from spacy.schemas import ConfigSchema, ConfigSchemaPretrain
-from spacy.util import load_config, load_config_from_str
-from spacy.util import load_model_from_config, registry
+from spacy.util import (
+ load_config,
+ load_config_from_str,
+ load_model_from_config,
+ registry,
+)
from ..util import make_tempdir
diff --git a/spacy/tests/serialize/test_serialize_extension_attrs.py b/spacy/tests/serialize/test_serialize_extension_attrs.py
index 9cfa1a552..f3b6cb000 100644
--- a/spacy/tests/serialize/test_serialize_extension_attrs.py
+++ b/spacy/tests/serialize/test_serialize_extension_attrs.py
@@ -1,4 +1,5 @@
import pytest
+
from spacy.tokens import Doc, Token
from spacy.vocab import Vocab
diff --git a/spacy/tests/serialize/test_serialize_kb.py b/spacy/tests/serialize/test_serialize_kb.py
index f9d2e226b..99eb8cd86 100644
--- a/spacy/tests/serialize/test_serialize_kb.py
+++ b/spacy/tests/serialize/test_serialize_kb.py
@@ -1,16 +1,16 @@
from pathlib import Path
-from typing import Callable, Iterable, Any, Dict
+from typing import Any, Callable, Dict, Iterable
import srsly
-
-from spacy import util, Errors
-from spacy.util import ensure_path, registry, load_model_from_config, SimpleFrozenList
-from spacy.kb.kb_in_memory import InMemoryLookupKB
-from spacy.vocab import Vocab
+from numpy import zeros
from thinc.api import Config
+from spacy import Errors, util
+from spacy.kb.kb_in_memory import InMemoryLookupKB
+from spacy.util import SimpleFrozenList, ensure_path, load_model_from_config, registry
+from spacy.vocab import Vocab
+
from ..util import make_tempdir
-from numpy import zeros
def test_serialize_kb_disk(en_vocab):
diff --git a/spacy/tests/serialize/test_serialize_language.py b/spacy/tests/serialize/test_serialize_language.py
index c03287548..9c36015a9 100644
--- a/spacy/tests/serialize/test_serialize_language.py
+++ b/spacy/tests/serialize/test_serialize_language.py
@@ -1,11 +1,11 @@
-import re
import pickle
+import re
import pytest
-from spacy.language import Language
-from spacy.lang.it import Italian
from spacy.lang.en import English
+from spacy.lang.it import Italian
+from spacy.language import Language
from spacy.tokenizer import Tokenizer
from spacy.training import Example
from spacy.util import load_config_from_str
diff --git a/spacy/tests/serialize/test_serialize_pipeline.py b/spacy/tests/serialize/test_serialize_pipeline.py
index 9fcf18e2d..6bbe743a1 100644
--- a/spacy/tests/serialize/test_serialize_pipeline.py
+++ b/spacy/tests/serialize/test_serialize_pipeline.py
@@ -8,15 +8,21 @@ import spacy
from spacy import Vocab, load, registry
from spacy.lang.en import English
from spacy.language import Language
-from spacy.pipeline import DependencyParser, EntityRecognizer, EntityRuler
-from spacy.pipeline import SentenceRecognizer, Tagger, TextCategorizer
-from spacy.pipeline import TrainablePipe
+from spacy.pipeline import (
+ DependencyParser,
+ EntityRecognizer,
+ EntityRuler,
+ SentenceRecognizer,
+ Tagger,
+ TextCategorizer,
+ TrainablePipe,
+)
from spacy.pipeline.dep_parser import DEFAULT_PARSER_MODEL
from spacy.pipeline.senter import DEFAULT_SENTER_MODEL
from spacy.pipeline.tagger import DEFAULT_TAGGER_MODEL
from spacy.pipeline.textcat import DEFAULT_SINGLE_TEXTCAT_MODEL
-from spacy.util import ensure_path, load_model
from spacy.tokens import Span
+from spacy.util import ensure_path, load_model
from ..util import make_tempdir
diff --git a/spacy/tests/serialize/test_serialize_tokenizer.py b/spacy/tests/serialize/test_serialize_tokenizer.py
index 9b74d7721..e998a78b4 100644
--- a/spacy/tests/serialize/test_serialize_tokenizer.py
+++ b/spacy/tests/serialize/test_serialize_tokenizer.py
@@ -7,8 +7,13 @@ from spacy.attrs import ENT_IOB, ENT_TYPE
from spacy.lang.en import English
from spacy.tokenizer import Tokenizer
from spacy.tokens import Doc
-from spacy.util import compile_infix_regex, compile_prefix_regex
-from spacy.util import compile_suffix_regex, get_lang_class, load_model
+from spacy.util import (
+ compile_infix_regex,
+ compile_prefix_regex,
+ compile_suffix_regex,
+ get_lang_class,
+ load_model,
+)
from ..util import assert_packed_msg_equal, make_tempdir
diff --git a/spacy/tests/test_architectures.py b/spacy/tests/test_architectures.py
index 26eabd4e5..3b5804a69 100644
--- a/spacy/tests/test_architectures.py
+++ b/spacy/tests/test_architectures.py
@@ -1,7 +1,8 @@
import pytest
-from spacy import registry
-from thinc.api import Linear
from catalogue import RegistryError
+from thinc.api import Linear
+
+from spacy import registry
def test_get_architecture():
diff --git a/spacy/tests/test_cli.py b/spacy/tests/test_cli.py
index 351e6bf11..88d3ffa45 100644
--- a/spacy/tests/test_cli.py
+++ b/spacy/tests/test_cli.py
@@ -1,43 +1,51 @@
-import os
import math
-from collections import Counter
-from typing import Tuple, List, Dict, Any
+import os
import time
+from collections import Counter
from pathlib import Path
+from typing import Any, Dict, List, Tuple
-import spacy
import numpy
import pytest
import srsly
from click import NoSuchOption
from packaging.specifiers import SpecifierSet
from thinc.api import Config, ConfigValidationError
-from spacy.tokens import DocBin
+import spacy
from spacy import about
from spacy.cli import info
-from spacy.cli._util import is_subpath_of, load_project_config, walk_directory
-from spacy.cli._util import parse_config_overrides, string_to_list
-from spacy.cli._util import substitute_project_variables
-from spacy.cli._util import validate_project_commands
-from spacy.cli._util import upload_file, download_file
-from spacy.cli.debug_data import _compile_gold, _get_labels_from_model
-from spacy.cli.debug_data import _get_labels_from_spancat
-from spacy.cli.debug_data import _get_distribution, _get_kl_divergence
-from spacy.cli.debug_data import _get_span_characteristics
-from spacy.cli.debug_data import _print_span_characteristics
-from spacy.cli.debug_data import _get_spans_length_freq_dist
+from spacy.cli._util import (
+ download_file,
+ is_subpath_of,
+ load_project_config,
+ parse_config_overrides,
+ string_to_list,
+ substitute_project_variables,
+ upload_file,
+ validate_project_commands,
+ walk_directory,
+)
+from spacy.cli.apply import apply
+from spacy.cli.debug_data import (
+ _compile_gold,
+ _get_distribution,
+ _get_kl_divergence,
+ _get_labels_from_model,
+ _get_labels_from_spancat,
+ _get_span_characteristics,
+ _get_spans_length_freq_dist,
+ _print_span_characteristics,
+)
from spacy.cli.download import get_compatibility, get_version
from spacy.cli.evaluate import render_parses
-from spacy.cli.init_config import RECOMMENDATIONS, init_config, fill_config
+from spacy.cli.find_threshold import find_threshold
+from spacy.cli.init_config import RECOMMENDATIONS, fill_config, init_config
from spacy.cli.init_pipeline import _init_labels
-from spacy.cli.package import get_third_party_dependencies
-from spacy.cli.package import _is_permitted_package_name
+from spacy.cli.package import _is_permitted_package_name, get_third_party_dependencies
from spacy.cli.project.remote_storage import RemoteStorage
from spacy.cli.project.run import _check_requirements
from spacy.cli.validate import get_model_pkgs
-from spacy.cli.apply import apply
-from spacy.cli.find_threshold import find_threshold
from spacy.lang.en import English
from spacy.lang.nl import Dutch
from spacy.language import Language
@@ -45,9 +53,8 @@ from spacy.schemas import ProjectConfigSchema, RecommendationSchema, validate
from spacy.tokens import Doc, DocBin
from spacy.tokens.span import Span
from spacy.training import Example, docs_to_json, offsets_to_biluo_tags
-from spacy.training.converters import conll_ner_to_docs, conllu_to_docs
-from spacy.training.converters import iob_to_docs
-from spacy.util import ENV_VARS, get_minor_version, load_model_from_config, load_config
+from spacy.training.converters import conll_ner_to_docs, conllu_to_docs, iob_to_docs
+from spacy.util import ENV_VARS, get_minor_version, load_config, load_model_from_config
from .util import make_tempdir
diff --git a/spacy/tests/test_cli_app.py b/spacy/tests/test_cli_app.py
index 5ff4dfa26..3a426113b 100644
--- a/spacy/tests/test_cli_app.py
+++ b/spacy/tests/test_cli_app.py
@@ -1,11 +1,13 @@
import os
from pathlib import Path
+
import pytest
import srsly
from typer.testing import CliRunner
-from spacy.tokens import DocBin, Doc
from spacy.cli._util import app, get_git_version
+from spacy.tokens import Doc, DocBin
+
from .util import make_tempdir, normalize_whitespace
diff --git a/spacy/tests/test_displacy.py b/spacy/tests/test_displacy.py
index 837a92e02..ce103068a 100644
--- a/spacy/tests/test_displacy.py
+++ b/spacy/tests/test_displacy.py
@@ -5,7 +5,7 @@ from spacy import displacy
from spacy.displacy.render import DependencyRenderer, EntityRenderer
from spacy.lang.en import English
from spacy.lang.fa import Persian
-from spacy.tokens import Span, Doc
+from spacy.tokens import Doc, Span
@pytest.mark.issue(2361)
diff --git a/spacy/tests/test_language.py b/spacy/tests/test_language.py
index 236856dad..51eec3239 100644
--- a/spacy/tests/test_language.py
+++ b/spacy/tests/test_language.py
@@ -1,21 +1,22 @@
import itertools
import logging
from unittest import mock
+
import pytest
+from thinc.api import CupyOps, NumpyOps, get_current_ops
+
+import spacy
+from spacy.lang.de import German
+from spacy.lang.en import English
from spacy.language import Language
from spacy.scorer import Scorer
from spacy.tokens import Doc, Span
-from spacy.vocab import Vocab
from spacy.training import Example
-from spacy.lang.en import English
-from spacy.lang.de import German
-from spacy.util import registry, ignore_error, raise_error, find_matching_language
-import spacy
-from thinc.api import CupyOps, NumpyOps, get_current_ops
+from spacy.util import find_matching_language, ignore_error, raise_error, registry
+from spacy.vocab import Vocab
from .util import add_vecs_to_vocab, assert_docs_equal
-
try:
import torch
diff --git a/spacy/tests/test_misc.py b/spacy/tests/test_misc.py
index 618f17334..19163d350 100644
--- a/spacy/tests/test_misc.py
+++ b/spacy/tests/test_misc.py
@@ -1,24 +1,39 @@
-import pytest
-import os
import ctypes
+import os
from pathlib import Path
-from spacy.about import __version__ as spacy_version
-from spacy import util
-from spacy import prefer_gpu, require_gpu, require_cpu
-from spacy.ml._precomputable_affine import PrecomputableAffine
-from spacy.ml._precomputable_affine import _backprop_precomputable_affine_padding
-from spacy.util import dot_to_object, SimpleFrozenList, import_file
-from spacy.util import to_ternary_int, find_available_port
-from thinc.api import Config, Optimizer, ConfigValidationError
-from thinc.api import get_current_ops, set_current_ops, NumpyOps, CupyOps, MPSOps
+
+import pytest
+from pydantic import ValidationError
+from thinc.api import (
+ Config,
+ ConfigValidationError,
+ CupyOps,
+ MPSOps,
+ NumpyOps,
+ Optimizer,
+ get_current_ops,
+ set_current_ops,
+)
from thinc.compat import has_cupy_gpu, has_torch_mps_gpu
-from spacy.training.batchers import minibatch_by_words
+
+from spacy import prefer_gpu, require_cpu, require_gpu, util
+from spacy.about import __version__ as spacy_version
from spacy.lang.en import English
from spacy.lang.nl import Dutch
from spacy.language import DEFAULT_CONFIG_PATH
+from spacy.ml._precomputable_affine import (
+ PrecomputableAffine,
+ _backprop_precomputable_affine_padding,
+)
from spacy.schemas import ConfigSchemaTraining, TokenPattern, TokenPatternSchema
-from pydantic import ValidationError
-
+from spacy.training.batchers import minibatch_by_words
+from spacy.util import (
+ SimpleFrozenList,
+ dot_to_object,
+ find_available_port,
+ import_file,
+ to_ternary_int,
+)
from .util import get_random_doc, make_tempdir
@@ -441,7 +456,7 @@ def test_find_available_port():
port = 5000
assert find_available_port(port, host) == port, "Port 5000 isn't free"
- from wsgiref.simple_server import make_server, demo_app
+ from wsgiref.simple_server import demo_app, make_server
with make_server(host, port, demo_app) as httpd:
with pytest.warns(UserWarning, match="already in use"):
diff --git a/spacy/tests/test_models.py b/spacy/tests/test_models.py
index d91ed1201..e6692ad92 100644
--- a/spacy/tests/test_models.py
+++ b/spacy/tests/test_models.py
@@ -1,16 +1,31 @@
from typing import List
-import pytest
-from thinc.api import fix_random_seed, Adam, set_dropout_rate
-from thinc.api import Ragged, reduce_mean, Logistic, chain, Relu
-from numpy.testing import assert_array_equal, assert_array_almost_equal
+
import numpy
-from spacy.ml.models import build_Tok2Vec_model, MultiHashEmbed, MaxoutWindowEncoder
-from spacy.ml.models import build_bow_text_classifier, build_simple_cnn_text_classifier
-from spacy.ml.models import build_spancat_model
-from spacy.ml.staticvectors import StaticVectors
-from spacy.ml.extract_spans import extract_spans, _get_span_indices
+import pytest
+from numpy.testing import assert_array_almost_equal, assert_array_equal
+from thinc.api import (
+ Adam,
+ Logistic,
+ Ragged,
+ Relu,
+ chain,
+ fix_random_seed,
+ reduce_mean,
+ set_dropout_rate,
+)
+
from spacy.lang.en import English
from spacy.lang.en.examples import sentences as EN_SENTENCES
+from spacy.ml.extract_spans import _get_span_indices, extract_spans
+from spacy.ml.models import (
+ MaxoutWindowEncoder,
+ MultiHashEmbed,
+ build_bow_text_classifier,
+ build_simple_cnn_text_classifier,
+ build_spancat_model,
+ build_Tok2Vec_model,
+)
+from spacy.ml.staticvectors import StaticVectors
def get_textcat_bow_kwargs():
diff --git a/spacy/tests/test_pickles.py b/spacy/tests/test_pickles.py
index 0c56ae0d2..e3acd27a3 100644
--- a/spacy/tests/test_pickles.py
+++ b/spacy/tests/test_pickles.py
@@ -1,11 +1,12 @@
-import pytest
import numpy
+import pytest
import srsly
+
+from spacy.attrs import NORM
from spacy.lang.en import English
from spacy.strings import StringStore
from spacy.tokens import Doc
from spacy.vocab import Vocab
-from spacy.attrs import NORM
@pytest.mark.parametrize("text1,text2", [("hello", "bye")])
diff --git a/spacy/tests/test_scorer.py b/spacy/tests/test_scorer.py
index f95c44149..95daf046c 100644
--- a/spacy/tests/test_scorer.py
+++ b/spacy/tests/test_scorer.py
@@ -1,13 +1,12 @@
-from numpy.testing import assert_almost_equal, assert_array_almost_equal
import pytest
+from numpy.testing import assert_almost_equal, assert_array_almost_equal
from pytest import approx
+
+from spacy.lang.en import English
+from spacy.scorer import PRFScore, ROCAUCScore, Scorer, _roc_auc_score, _roc_curve
+from spacy.tokens import Doc, Span
from spacy.training import Example
from spacy.training.iob_utils import offsets_to_biluo_tags
-from spacy.scorer import Scorer, ROCAUCScore, PRFScore
-from spacy.scorer import _roc_auc_score, _roc_curve
-from spacy.lang.en import English
-from spacy.tokens import Doc, Span
-
test_las_apple = [
[
diff --git a/spacy/tests/tokenizer/test_exceptions.py b/spacy/tests/tokenizer/test_exceptions.py
index 85716377a..1f8f52c79 100644
--- a/spacy/tests/tokenizer/test_exceptions.py
+++ b/spacy/tests/tokenizer/test_exceptions.py
@@ -1,4 +1,5 @@
import sys
+
import pytest
diff --git a/spacy/tests/tokenizer/test_tokenizer.py b/spacy/tests/tokenizer/test_tokenizer.py
index 6af58b344..1ea5f78c9 100644
--- a/spacy/tests/tokenizer/test_tokenizer.py
+++ b/spacy/tests/tokenizer/test_tokenizer.py
@@ -3,15 +3,19 @@ import re
import numpy
import pytest
-from spacy.lang.en import English
from spacy.lang.de import German
+from spacy.lang.en import English
+from spacy.symbols import ORTH
from spacy.tokenizer import Tokenizer
from spacy.tokens import Doc
from spacy.training import Example
-from spacy.util import compile_prefix_regex, compile_suffix_regex, ensure_path
-from spacy.util import compile_infix_regex
+from spacy.util import (
+ compile_infix_regex,
+ compile_prefix_regex,
+ compile_suffix_regex,
+ ensure_path,
+)
from spacy.vocab import Vocab
-from spacy.symbols import ORTH
@pytest.mark.issue(743)
diff --git a/spacy/tests/tokenizer/test_urls.py b/spacy/tests/tokenizer/test_urls.py
index 57e970f87..ff8812be1 100644
--- a/spacy/tests/tokenizer/test_urls.py
+++ b/spacy/tests/tokenizer/test_urls.py
@@ -2,7 +2,6 @@ import pytest
from spacy.lang.tokenizer_exceptions import BASE_EXCEPTIONS
-
URLS_BASIC = [
"http://www.nytimes.com/2016/04/20/us/politics/new-york-primary-preview.html?hp&action=click&pgtype=Homepage&clickSource=story-heading&module=a-lede-package-region®ion=top-news&WT.nav=top-news&_r=0",
"www.red-stars.com",
diff --git a/spacy/tests/training/test_augmenters.py b/spacy/tests/training/test_augmenters.py
index 35860a199..49a83010b 100644
--- a/spacy/tests/training/test_augmenters.py
+++ b/spacy/tests/training/test_augmenters.py
@@ -1,13 +1,17 @@
-import pytest
-from spacy.pipeline._parser_internals.nonproj import contains_cycle
-from spacy.training import Corpus, Example
-from spacy.training.augment import create_orth_variants_augmenter
-from spacy.training.augment import create_lower_casing_augmenter
-from spacy.training.augment import make_whitespace_variant
-from spacy.lang.en import English
-from spacy.tokens import DocBin, Doc, Span
-from contextlib import contextmanager
import random
+from contextlib import contextmanager
+
+import pytest
+
+from spacy.lang.en import English
+from spacy.pipeline._parser_internals.nonproj import contains_cycle
+from spacy.tokens import Doc, DocBin, Span
+from spacy.training import Corpus, Example
+from spacy.training.augment import (
+ create_lower_casing_augmenter,
+ create_orth_variants_augmenter,
+ make_whitespace_variant,
+)
from ..util import make_tempdir
diff --git a/spacy/tests/training/test_corpus.py b/spacy/tests/training/test_corpus.py
index b4f9cc13a..e7cae9893 100644
--- a/spacy/tests/training/test_corpus.py
+++ b/spacy/tests/training/test_corpus.py
@@ -1,8 +1,9 @@
-from typing import IO, Generator, Iterable, List, TextIO, Tuple
+import tempfile
from contextlib import contextmanager
from pathlib import Path
+from typing import IO, Generator, Iterable, List, TextIO, Tuple
+
import pytest
-import tempfile
from spacy.lang.en import English
from spacy.training import Example, PlainTextCorpus
diff --git a/spacy/tests/training/test_logger.py b/spacy/tests/training/test_logger.py
index 0dfd0cbf4..48750026b 100644
--- a/spacy/tests/training/test_logger.py
+++ b/spacy/tests/training/test_logger.py
@@ -1,6 +1,6 @@
import pytest
-import spacy
+import spacy
from spacy.training import loggers
diff --git a/spacy/tests/training/test_new_example.py b/spacy/tests/training/test_new_example.py
index 6b15603b3..88f819984 100644
--- a/spacy/tests/training/test_new_example.py
+++ b/spacy/tests/training/test_new_example.py
@@ -1,8 +1,9 @@
import pytest
-from spacy.training.example import Example
+
from spacy.tokens import Doc
-from spacy.vocab import Vocab
+from spacy.training.example import Example
from spacy.util import to_ternary_int
+from spacy.vocab import Vocab
def test_Example_init_requires_doc_objects():
diff --git a/spacy/tests/training/test_pretraining.py b/spacy/tests/training/test_pretraining.py
index 6cfdeed20..5e5f94622 100644
--- a/spacy/tests/training/test_pretraining.py
+++ b/spacy/tests/training/test_pretraining.py
@@ -1,4 +1,5 @@
from pathlib import Path
+
import numpy as np
import pytest
import srsly
@@ -6,14 +7,15 @@ from thinc.api import Config, get_current_ops
from spacy import util
from spacy.lang.en import English
+from spacy.language import DEFAULT_CONFIG_PATH, DEFAULT_CONFIG_PRETRAIN_PATH
+from spacy.ml.models.multi_task import create_pretrain_vectors
+from spacy.tokens import Doc, DocBin
from spacy.training.initialize import init_nlp
from spacy.training.loop import train
from spacy.training.pretrain import pretrain
-from spacy.tokens import Doc, DocBin
-from spacy.language import DEFAULT_CONFIG_PRETRAIN_PATH, DEFAULT_CONFIG_PATH
-from spacy.ml.models.multi_task import create_pretrain_vectors
from spacy.vectors import Vectors
from spacy.vocab import Vocab
+
from ..util import make_tempdir
pretrain_string_listener = """
diff --git a/spacy/tests/training/test_readers.py b/spacy/tests/training/test_readers.py
index 8c5c81625..22cf75272 100644
--- a/spacy/tests/training/test_readers.py
+++ b/spacy/tests/training/test_readers.py
@@ -1,10 +1,12 @@
-from typing import Dict, Iterable, Callable
+from typing import Callable, Dict, Iterable
+
import pytest
from thinc.api import Config, fix_random_seed
+
from spacy import Language
-from spacy.util import load_model_from_config, registry, resolve_dot_names
from spacy.schemas import ConfigSchemaTraining
from spacy.training import Example
+from spacy.util import load_model_from_config, registry, resolve_dot_names
def test_readers():
diff --git a/spacy/tests/training/test_rehearse.py b/spacy/tests/training/test_rehearse.py
index 5ac7fc217..7efe57a36 100644
--- a/spacy/tests/training/test_rehearse.py
+++ b/spacy/tests/training/test_rehearse.py
@@ -1,9 +1,9 @@
-import pytest
-import spacy
-
from typing import List
-from spacy.training import Example
+import pytest
+
+import spacy
+from spacy.training import Example
TRAIN_DATA = [
(
diff --git a/spacy/tests/training/test_training.py b/spacy/tests/training/test_training.py
index 7933ea31f..a492a8be3 100644
--- a/spacy/tests/training/test_training.py
+++ b/spacy/tests/training/test_training.py
@@ -2,20 +2,32 @@ import random
import numpy
import pytest
-import spacy
import srsly
+from thinc.api import Adam, compounding
+
+import spacy
from spacy.lang.en import English
from spacy.tokens import Doc, DocBin
-from spacy.training import Alignment, Corpus, Example, biluo_tags_to_offsets
-from spacy.training import biluo_tags_to_spans, docs_to_json, iob_to_biluo
-from spacy.training import offsets_to_biluo_tags
-from spacy.training.alignment_array import AlignmentArray
+from spacy.training import (
+ Alignment,
+ Corpus,
+ Example,
+ biluo_tags_to_offsets,
+ biluo_tags_to_spans,
+ docs_to_json,
+ iob_to_biluo,
+ offsets_to_biluo_tags,
+)
from spacy.training.align import get_alignments
+from spacy.training.alignment_array import AlignmentArray
from spacy.training.converters import json_to_docs
from spacy.training.loop import train_while_improving
-from spacy.util import get_words_and_spaces, load_model_from_path, minibatch
-from spacy.util import load_config_from_str
-from thinc.api import compounding, Adam
+from spacy.util import (
+ get_words_and_spaces,
+ load_config_from_str,
+ load_model_from_path,
+ minibatch,
+)
from ..util import make_tempdir
diff --git a/spacy/tests/util.py b/spacy/tests/util.py
index c2647558d..a5548898c 100644
--- a/spacy/tests/util.py
+++ b/spacy/tests/util.py
@@ -1,14 +1,16 @@
-import numpy
-import tempfile
import contextlib
import re
+import tempfile
+
+import numpy
import srsly
-from spacy.tokens import Doc
-from spacy.vocab import Vocab
-from spacy.util import make_tempdir # noqa: F401
-from spacy.training import split_bilu_label
from thinc.api import get_current_ops
+from spacy.tokens import Doc
+from spacy.training import split_bilu_label
+from spacy.util import make_tempdir # noqa: F401
+from spacy.vocab import Vocab
+
@contextlib.contextmanager
def make_tempfile(mode="r"):
diff --git a/spacy/tests/vocab_vectors/test_lexeme.py b/spacy/tests/vocab_vectors/test_lexeme.py
index d91f41db3..156e3391a 100644
--- a/spacy/tests/vocab_vectors/test_lexeme.py
+++ b/spacy/tests/vocab_vectors/test_lexeme.py
@@ -1,5 +1,6 @@
import numpy
import pytest
+
from spacy.attrs import IS_ALPHA, IS_DIGIT
from spacy.lookups import Lookups
from spacy.tokens import Doc
diff --git a/spacy/tests/vocab_vectors/test_lookups.py b/spacy/tests/vocab_vectors/test_lookups.py
index 94e31a072..addd3fe4f 100644
--- a/spacy/tests/vocab_vectors/test_lookups.py
+++ b/spacy/tests/vocab_vectors/test_lookups.py
@@ -1,4 +1,5 @@
import pytest
+
from spacy.lookups import Lookups, Table
from spacy.strings import get_string_id
from spacy.vocab import Vocab
diff --git a/spacy/tests/vocab_vectors/test_similarity.py b/spacy/tests/vocab_vectors/test_similarity.py
index 1efcdd81e..5a28f5414 100644
--- a/spacy/tests/vocab_vectors/test_similarity.py
+++ b/spacy/tests/vocab_vectors/test_similarity.py
@@ -1,9 +1,10 @@
-import pytest
import numpy
+import pytest
+
from spacy.tokens import Doc
from spacy.vocab import Vocab
-from ..util import get_cosine, add_vecs_to_vocab
+from ..util import add_vecs_to_vocab, get_cosine
@pytest.fixture
diff --git a/spacy/tests/vocab_vectors/test_stringstore.py b/spacy/tests/vocab_vectors/test_stringstore.py
index a0f8016af..61039fffd 100644
--- a/spacy/tests/vocab_vectors/test_stringstore.py
+++ b/spacy/tests/vocab_vectors/test_stringstore.py
@@ -1,4 +1,5 @@
import pytest
+
from spacy.strings import StringStore
diff --git a/spacy/tests/vocab_vectors/test_vocab_api.py b/spacy/tests/vocab_vectors/test_vocab_api.py
index b9c386eb8..e373b9d0b 100644
--- a/spacy/tests/vocab_vectors/test_vocab_api.py
+++ b/spacy/tests/vocab_vectors/test_vocab_api.py
@@ -1,6 +1,7 @@
import os
import pytest
+
from spacy.attrs import IS_ALPHA, LEMMA, ORTH
from spacy.lang.en import English
from spacy.parts_of_speech import NOUN, VERB
diff --git a/spacy/tokenizer.pxd b/spacy/tokenizer.pxd
index e6a072053..f7585b45a 100644
--- a/spacy/tokenizer.pxd
+++ b/spacy/tokenizer.pxd
@@ -1,13 +1,13 @@
+from cymem.cymem cimport Pool
from libcpp.vector cimport vector
from preshed.maps cimport PreshMap
-from cymem.cymem cimport Pool
-from .typedefs cimport hash_t
-from .structs cimport LexemeC, SpanC, TokenC
-from .strings cimport StringStore
-from .tokens.doc cimport Doc
-from .vocab cimport Vocab, LexemesOrTokens, _Cached
from .matcher.phrasematcher cimport PhraseMatcher
+from .strings cimport StringStore
+from .structs cimport LexemeC, SpanC, TokenC
+from .tokens.doc cimport Doc
+from .typedefs cimport hash_t
+from .vocab cimport LexemesOrTokens, Vocab, _Cached
cdef class Tokenizer:
diff --git a/spacy/tokenizer.pyx b/spacy/tokenizer.pyx
index a4a68ae8e..3861b1cee 100644
--- a/spacy/tokenizer.pyx
+++ b/spacy/tokenizer.pyx
@@ -1,29 +1,27 @@
# cython: embedsignature=True, profile=True, binding=True
+cimport cython
+from cymem.cymem cimport Pool
from cython.operator cimport dereference as deref
from cython.operator cimport preincrement as preinc
from libc.string cimport memcpy, memset
from libcpp.set cimport set as stdset
-from cymem.cymem cimport Pool
from preshed.maps cimport PreshMap
-cimport cython
import re
import warnings
-from .tokens.doc cimport Doc
-from .strings cimport hash_string
from .lexeme cimport EMPTY_LEXEME
+from .strings cimport hash_string
+from .tokens.doc cimport Doc
-from .attrs import intify_attrs
-from .symbols import ORTH, NORM
-from .errors import Errors, Warnings
from . import util
-from .util import registry, get_words_and_spaces
from .attrs import intify_attrs
-from .symbols import ORTH
+from .errors import Errors, Warnings
from .scorer import Scorer
-from .training import validate_examples
+from .symbols import NORM, ORTH
from .tokens import Span
+from .training import validate_examples
+from .util import get_words_and_spaces, registry
cdef class Tokenizer:
diff --git a/spacy/tokens/__init__.py b/spacy/tokens/__init__.py
index 64090925d..f4b2bf022 100644
--- a/spacy/tokens/__init__.py
+++ b/spacy/tokens/__init__.py
@@ -1,8 +1,8 @@
+from ._serialize import DocBin
from .doc import Doc
-from .token import Token
+from .morphanalysis import MorphAnalysis
from .span import Span
from .span_group import SpanGroup
-from ._serialize import DocBin
-from .morphanalysis import MorphAnalysis
+from .token import Token
__all__ = ["Doc", "Token", "Span", "SpanGroup", "DocBin", "MorphAnalysis"]
diff --git a/spacy/tokens/_dict_proxies.py b/spacy/tokens/_dict_proxies.py
index 6edcce13d..b2b496307 100644
--- a/spacy/tokens/_dict_proxies.py
+++ b/spacy/tokens/_dict_proxies.py
@@ -1,12 +1,12 @@
-from typing import Dict, Iterable, List, Tuple, Union, Optional, TYPE_CHECKING
import warnings
import weakref
from collections import UserDict
+from typing import TYPE_CHECKING, Dict, Iterable, List, Optional, Tuple, Union
+
import srsly
-from .span_group import SpanGroup
from ..errors import Errors, Warnings
-
+from .span_group import SpanGroup
if TYPE_CHECKING:
# This lets us add type hints for mypy etc. without causing circular imports
diff --git a/spacy/tokens/_retokenize.pyi b/spacy/tokens/_retokenize.pyi
index 8834d38c0..097fbd1a9 100644
--- a/spacy/tokens/_retokenize.pyi
+++ b/spacy/tokens/_retokenize.pyi
@@ -1,8 +1,9 @@
-from typing import Dict, Any, Union, List, Tuple
+from typing import Any, Dict, List, Tuple, Union
+
+from .. import Vocab
from .doc import Doc
from .span import Span
from .token import Token
-from .. import Vocab
class Retokenizer:
def __init__(self, doc: Doc) -> None: ...
diff --git a/spacy/tokens/_retokenize.pyx b/spacy/tokens/_retokenize.pyx
index 43e6d4aa7..8ed707ab9 100644
--- a/spacy/tokens/_retokenize.pyx
+++ b/spacy/tokens/_retokenize.pyx
@@ -1,24 +1,24 @@
# cython: infer_types=True, bounds_check=False, profile=True
-from libc.string cimport memcpy, memset
-from libc.stdlib cimport malloc, free
from cymem.cymem cimport Pool
+from libc.stdlib cimport free, malloc
+from libc.string cimport memcpy, memset
-from thinc.api import get_array_module
import numpy
+from thinc.api import get_array_module
-from .doc cimport Doc, set_children_from_heads, token_by_start, token_by_end
+from ..attrs cimport MORPH, NORM
+from ..lexeme cimport EMPTY_LEXEME, Lexeme
+from ..structs cimport LexemeC, TokenC
+from ..vocab cimport Vocab
+from .doc cimport Doc, set_children_from_heads, token_by_end, token_by_start
from .span cimport Span
from .token cimport Token
-from ..lexeme cimport Lexeme, EMPTY_LEXEME
-from ..structs cimport LexemeC, TokenC
-from ..attrs cimport MORPH, NORM
-from ..vocab cimport Vocab
-from .underscore import is_writable_attr
from ..attrs import intify_attrs
-from ..util import SimpleFrozenDict
from ..errors import Errors
from ..strings import get_string_id
+from ..util import SimpleFrozenDict
+from .underscore import is_writable_attr
cdef class Retokenizer:
diff --git a/spacy/tokens/_serialize.py b/spacy/tokens/_serialize.py
index 73c857d1f..873d85835 100644
--- a/spacy/tokens/_serialize.py
+++ b/spacy/tokens/_serialize.py
@@ -1,22 +1,20 @@
-from typing import List, Dict, Set, Iterable, Iterator, Union, Optional
-from pathlib import Path
-import numpy
-from numpy import ndarray
import zlib
+from pathlib import Path
+from typing import Dict, Iterable, Iterator, List, Optional, Set, Union
+
+import numpy
import srsly
+from numpy import ndarray
from thinc.api import NumpyOps
-from .doc import Doc
-from ..vocab import Vocab
+from ..attrs import IDS, ORTH, SPACY, intify_attr
from ..compat import copy_reg
-from ..attrs import SPACY, ORTH, intify_attr, IDS
from ..errors import Errors
-from ..util import ensure_path, SimpleFrozenList
+from ..util import SimpleFrozenList, ensure_path
+from ..vocab import Vocab
from ._dict_proxies import SpanGroups
-
-# fmt: off
-ALL_ATTRS = ("ORTH", "NORM", "TAG", "HEAD", "DEP", "ENT_IOB", "ENT_TYPE", "ENT_KB_ID", "ENT_ID", "LEMMA", "MORPH", "POS", "SENT_START")
-# fmt: on
+from .doc import DOCBIN_ALL_ATTRS as ALL_ATTRS
+from .doc import Doc
class DocBin:
diff --git a/spacy/tokens/doc.pxd b/spacy/tokens/doc.pxd
index 57d087958..d7f092c94 100644
--- a/spacy/tokens/doc.pxd
+++ b/spacy/tokens/doc.pxd
@@ -1,10 +1,10 @@
-from cymem.cymem cimport Pool
cimport numpy as np
+from cymem.cymem cimport Pool
-from ..vocab cimport Vocab
-from ..structs cimport TokenC, LexemeC, SpanC
-from ..typedefs cimport attr_t
from ..attrs cimport attr_id_t
+from ..structs cimport LexemeC, SpanC, TokenC
+from ..typedefs cimport attr_t
+from ..vocab cimport Vocab
cdef attr_t get_token_attr(const TokenC* token, attr_id_t feat_name) nogil
diff --git a/spacy/tokens/doc.pyi b/spacy/tokens/doc.pyi
index 9d45960ab..00c7a9d07 100644
--- a/spacy/tokens/doc.pyi
+++ b/spacy/tokens/doc.pyi
@@ -1,16 +1,31 @@
-from typing import Callable, Protocol, Iterable, Iterator, Optional
-from typing import Union, Tuple, List, Dict, Any, overload
+from pathlib import Path
+from typing import (
+ Any,
+ Callable,
+ Dict,
+ Iterable,
+ Iterator,
+ List,
+ Optional,
+ Protocol,
+ Tuple,
+ Union,
+ overload,
+)
+
+import numpy as np
from cymem.cymem import Pool
from thinc.types import Floats1d, Floats2d, Ints2d
-from .span import Span
-from .token import Token
-from ._dict_proxies import SpanGroups
-from ._retokenize import Retokenizer
+
from ..lexeme import Lexeme
from ..vocab import Vocab
+from ._dict_proxies import SpanGroups
+from ._retokenize import Retokenizer
+from .span import Span
+from .token import Token
from .underscore import Underscore
-from pathlib import Path
-import numpy as np
+
+DOCBIN_ALL_ATTRS: Tuple[str, ...]
class DocMethod(Protocol):
def __call__(self: Doc, *args: Any, **kwargs: Any) -> Any: ... # type: ignore[misc]
diff --git a/spacy/tokens/doc.pyx b/spacy/tokens/doc.pyx
index 6c196ad78..206253949 100644
--- a/spacy/tokens/doc.pyx
+++ b/spacy/tokens/doc.pyx
@@ -3,45 +3,67 @@ from typing import Set
cimport cython
cimport numpy as np
-from libc.string cimport memcpy
from libc.math cimport sqrt
from libc.stdint cimport int32_t, uint64_t
+from libc.string cimport memcpy
import copy
+import itertools
+import warnings
from collections import Counter, defaultdict
from enum import Enum
-import itertools
+
import numpy
import srsly
from thinc.api import get_array_module, get_current_ops
from thinc.util import copy_array
-import warnings
from .span cimport Span
from .token cimport MISSING_DEP
-from ._dict_proxies import SpanGroups
-from .token cimport Token
-from ..lexeme cimport Lexeme, EMPTY_LEXEME
-from ..typedefs cimport attr_t, flags_t
-from ..attrs cimport attr_id_t
-from ..attrs cimport LENGTH, POS, LEMMA, TAG, MORPH, DEP, HEAD, SPACY, ENT_IOB
-from ..attrs cimport ENT_TYPE, ENT_ID, ENT_KB_ID, SENT_START, IDX, NORM
-from ..attrs import intify_attr, IDS
+from ._dict_proxies import SpanGroups
+
+from ..attrs cimport (
+ DEP,
+ ENT_ID,
+ ENT_IOB,
+ ENT_KB_ID,
+ ENT_TYPE,
+ HEAD,
+ IDX,
+ LEMMA,
+ LENGTH,
+ MORPH,
+ NORM,
+ POS,
+ SENT_START,
+ SPACY,
+ TAG,
+ attr_id_t,
+)
+from ..lexeme cimport EMPTY_LEXEME, Lexeme
+from ..typedefs cimport attr_t, flags_t
+from .token cimport Token
+
+from .. import parts_of_speech, schemas, util
+from ..attrs import IDS, intify_attr
from ..compat import copy_reg, pickle
from ..errors import Errors, Warnings
from ..morphology import Morphology
-from .. import util
-from .. import parts_of_speech
-from .. import schemas
-from .underscore import Underscore, get_ext_args
-from ._retokenize import Retokenizer
-from ._serialize import ALL_ATTRS as DOCBIN_ALL_ATTRS
from ..util import get_words_and_spaces
+from ._retokenize import Retokenizer
+from .underscore import Underscore, get_ext_args
DEF PADDING = 5
+# We store the docbin attrs here rather than in _serialize to avoid
+# import cycles.
+
+# fmt: off
+DOCBIN_ALL_ATTRS = ("ORTH", "NORM", "TAG", "HEAD", "DEP", "ENT_IOB", "ENT_TYPE", "ENT_KB_ID", "ENT_ID", "LEMMA", "MORPH", "POS", "SENT_START")
+# fmt: on
+
cdef int bounds_check(int i, int length, int padding) except -1:
if (i + padding) < 0:
raise IndexError(Errors.E026.format(i=i, length=length))
diff --git a/spacy/tokens/graph.pxd b/spacy/tokens/graph.pxd
index 6f2f80656..083ef6522 100644
--- a/spacy/tokens/graph.pxd
+++ b/spacy/tokens/graph.pxd
@@ -1,7 +1,8 @@
-from libcpp.vector cimport vector
from cymem.cymem cimport Pool
+from libcpp.vector cimport vector
from preshed.maps cimport PreshMap
-from ..structs cimport GraphC, EdgeC
+
+from ..structs cimport EdgeC, GraphC
cdef class Graph:
diff --git a/spacy/tokens/graph.pyx b/spacy/tokens/graph.pyx
index adc4d23c8..47f0a20d4 100644
--- a/spacy/tokens/graph.pyx
+++ b/spacy/tokens/graph.pyx
@@ -1,19 +1,26 @@
# cython: infer_types=True, cdivision=True, boundscheck=False, binding=True
-from typing import List, Tuple, Generator
+from typing import Generator, List, Tuple
+
+cimport cython
+from cython.operator cimport dereference
from libc.stdint cimport int32_t, int64_t
from libcpp.pair cimport pair
from libcpp.unordered_map cimport unordered_map
from libcpp.unordered_set cimport unordered_set
-from cython.operator cimport dereference
-cimport cython
+
import weakref
-from preshed.maps cimport map_get_unless_missing
+
from murmurhash.mrmr cimport hash64
+from preshed.maps cimport map_get_unless_missing
from .. import Errors
+
from ..typedefs cimport hash_t
+
from ..strings import get_string_id
+
from ..structs cimport EdgeC, GraphC
+
from .token import Token
diff --git a/spacy/tokens/morphanalysis.pxd b/spacy/tokens/morphanalysis.pxd
index 9510875c9..728f0aaf7 100644
--- a/spacy/tokens/morphanalysis.pxd
+++ b/spacy/tokens/morphanalysis.pxd
@@ -1,6 +1,6 @@
-from ..vocab cimport Vocab
-from ..typedefs cimport hash_t
from ..structs cimport MorphAnalysisC
+from ..typedefs cimport hash_t
+from ..vocab cimport Vocab
cdef class MorphAnalysis:
diff --git a/spacy/tokens/morphanalysis.pyi b/spacy/tokens/morphanalysis.pyi
index a5376e80d..b35ff36aa 100644
--- a/spacy/tokens/morphanalysis.pyi
+++ b/spacy/tokens/morphanalysis.pyi
@@ -1,4 +1,5 @@
from typing import Any, Dict, Iterator, List, Optional, Union
+
from ..vocab import Vocab
class MorphAnalysis:
diff --git a/spacy/tokens/morphanalysis.pyx b/spacy/tokens/morphanalysis.pyx
index baa3800a1..0992a0b66 100644
--- a/spacy/tokens/morphanalysis.pyx
+++ b/spacy/tokens/morphanalysis.pyx
@@ -1,11 +1,12 @@
-from libc.string cimport memset
cimport numpy as np
+from libc.string cimport memset
from ..errors import Errors
from ..morphology import Morphology
+
+from ..morphology cimport check_feature, get_by_field, list_features
+from ..typedefs cimport attr_t, hash_t
from ..vocab cimport Vocab
-from ..typedefs cimport hash_t, attr_t
-from ..morphology cimport list_features, check_feature, get_by_field
cdef class MorphAnalysis:
diff --git a/spacy/tokens/span.pxd b/spacy/tokens/span.pxd
index 78bee0a8c..d77bbea70 100644
--- a/spacy/tokens/span.pxd
+++ b/spacy/tokens/span.pxd
@@ -1,8 +1,8 @@
cimport numpy as np
-from .doc cimport Doc
-from ..typedefs cimport attr_t
from ..structs cimport SpanC
+from ..typedefs cimport attr_t
+from .doc cimport Doc
cdef class Span:
diff --git a/spacy/tokens/span.pyx b/spacy/tokens/span.pyx
index 29b8ce703..73192b760 100644
--- a/spacy/tokens/span.pyx
+++ b/spacy/tokens/span.pyx
@@ -1,22 +1,23 @@
cimport numpy as np
from libc.math cimport sqrt
+import copy
+import warnings
+
import numpy
from thinc.api import get_array_module
-import warnings
-import copy
-from .doc cimport token_by_start, token_by_end, get_token_attr, _get_lca_matrix
-from ..structs cimport TokenC, LexemeC
-from ..typedefs cimport flags_t, attr_t, hash_t
-from ..attrs cimport attr_id_t
-from ..parts_of_speech cimport univ_pos_t
from ..attrs cimport *
+from ..attrs cimport attr_id_t
from ..lexeme cimport Lexeme
+from ..parts_of_speech cimport univ_pos_t
+from ..structs cimport LexemeC, TokenC
from ..symbols cimport dep
+from ..typedefs cimport attr_t, flags_t, hash_t
+from .doc cimport _get_lca_matrix, get_token_attr, token_by_end, token_by_start
-from ..util import normalize_slice
from ..errors import Errors, Warnings
+from ..util import normalize_slice
from .underscore import Underscore, get_ext_args
diff --git a/spacy/tokens/span_group.pxd b/spacy/tokens/span_group.pxd
index 5074aa275..7f4145682 100644
--- a/spacy/tokens/span_group.pxd
+++ b/spacy/tokens/span_group.pxd
@@ -1,6 +1,8 @@
from libcpp.vector cimport vector
+
from ..structs cimport SpanC
+
cdef class SpanGroup:
cdef public object _doc_ref
cdef public str name
diff --git a/spacy/tokens/span_group.pyx b/spacy/tokens/span_group.pyx
index c748fa256..48ad4a516 100644
--- a/spacy/tokens/span_group.pyx
+++ b/spacy/tokens/span_group.pyx
@@ -1,10 +1,12 @@
-from typing import Iterable, Tuple, Union, Optional, TYPE_CHECKING
-import weakref
import struct
+import weakref
from copy import deepcopy
+from typing import TYPE_CHECKING, Iterable, Optional, Tuple, Union
+
import srsly
from spacy.errors import Errors
+
from .span cimport Span
diff --git a/spacy/tokens/token.pxd b/spacy/tokens/token.pxd
index 58b727764..fc02ff624 100644
--- a/spacy/tokens/token.pxd
+++ b/spacy/tokens/token.pxd
@@ -1,14 +1,16 @@
from numpy cimport ndarray
-from ..vocab cimport Vocab
-from ..structs cimport TokenC
+
from ..attrs cimport *
-from ..typedefs cimport attr_t, flags_t
-from ..parts_of_speech cimport univ_pos_t
-from .doc cimport Doc
from ..lexeme cimport Lexeme
+from ..parts_of_speech cimport univ_pos_t
+from ..structs cimport TokenC
+from ..typedefs cimport attr_t, flags_t
+from ..vocab cimport Vocab
+from .doc cimport Doc
from ..errors import Errors
+
cdef int MISSING_DEP = 0
cdef class Token:
diff --git a/spacy/tokens/token.pyi b/spacy/tokens/token.pyi
index bd585d034..e7863fd16 100644
--- a/spacy/tokens/token.pyi
+++ b/spacy/tokens/token.pyi
@@ -1,18 +1,12 @@
-from typing import (
- Callable,
- Protocol,
- Iterator,
- Optional,
- Union,
- Tuple,
- Any,
-)
+from typing import Any, Callable, Iterator, Optional, Protocol, Tuple, Union
+
from thinc.types import Floats1d, FloatsXd
-from .doc import Doc
-from .span import Span
-from .morphanalysis import MorphAnalysis
+
from ..lexeme import Lexeme
from ..vocab import Vocab
+from .doc import Doc
+from .morphanalysis import MorphAnalysis
+from .span import Span
from .underscore import Underscore
class TokenMethod(Protocol):
diff --git a/spacy/tokens/token.pyx b/spacy/tokens/token.pyx
index 7fff6b162..8c384f417 100644
--- a/spacy/tokens/token.pyx
+++ b/spacy/tokens/token.pyx
@@ -1,26 +1,43 @@
# cython: infer_types=True
# Compiler crashes on memory view coercion without this. Should report bug.
-from cython.view cimport array as cvarray
cimport numpy as np
+from cython.view cimport array as cvarray
+
np.import_array()
+import warnings
+
import numpy
from thinc.api import get_array_module
-import warnings
-from ..typedefs cimport hash_t
+from ..attrs cimport (
+ IS_ALPHA,
+ IS_ASCII,
+ IS_BRACKET,
+ IS_CURRENCY,
+ IS_DIGIT,
+ IS_LEFT_PUNCT,
+ IS_LOWER,
+ IS_PUNCT,
+ IS_QUOTE,
+ IS_RIGHT_PUNCT,
+ IS_SPACE,
+ IS_STOP,
+ IS_TITLE,
+ IS_UPPER,
+ LIKE_EMAIL,
+ LIKE_NUM,
+ LIKE_URL,
+)
from ..lexeme cimport Lexeme
-from ..attrs cimport IS_ALPHA, IS_ASCII, IS_DIGIT, IS_LOWER, IS_PUNCT, IS_SPACE
-from ..attrs cimport IS_BRACKET, IS_QUOTE, IS_LEFT_PUNCT, IS_RIGHT_PUNCT
-from ..attrs cimport IS_TITLE, IS_UPPER, IS_CURRENCY, IS_STOP
-from ..attrs cimport LIKE_URL, LIKE_NUM, LIKE_EMAIL
from ..symbols cimport conj
-from .morphanalysis cimport MorphAnalysis
+from ..typedefs cimport hash_t
from .doc cimport set_children_from_heads
+from .morphanalysis cimport MorphAnalysis
from .. import parts_of_speech
-from ..errors import Errors, Warnings
from ..attrs import IOB_STRINGS
+from ..errors import Errors, Warnings
from .underscore import Underscore, get_ext_args
diff --git a/spacy/tokens/underscore.py b/spacy/tokens/underscore.py
index e9a4e1862..0aa0c1e6d 100644
--- a/spacy/tokens/underscore.py
+++ b/spacy/tokens/underscore.py
@@ -1,6 +1,7 @@
-from typing import Dict, Any, List, Optional, Tuple, Union, TYPE_CHECKING
-import functools
import copy
+import functools
+from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union
+
from ..errors import Errors
if TYPE_CHECKING:
diff --git a/spacy/training/__init__.py b/spacy/training/__init__.py
index a6f873f05..b8c0792f0 100644
--- a/spacy/training/__init__.py
+++ b/spacy/training/__init__.py
@@ -1,12 +1,18 @@
-from .corpus import Corpus, JsonlCorpus, PlainTextCorpus # noqa: F401
-from .example import Example, validate_examples, validate_get_examples # noqa: F401
from .alignment import Alignment # noqa: F401
from .augment import dont_augment, orth_variants_augmenter # noqa: F401
-from .iob_utils import iob_to_biluo, biluo_to_iob # noqa: F401
-from .iob_utils import offsets_to_biluo_tags, biluo_tags_to_offsets # noqa: F401
-from .iob_utils import biluo_tags_to_spans, tags_to_entities # noqa: F401
-from .iob_utils import split_bilu_label, remove_bilu_prefix # noqa: F401
-from .gold_io import docs_to_json, read_json_file # noqa: F401
from .batchers import minibatch_by_padded_size, minibatch_by_words # noqa: F401
-from .loggers import console_logger # noqa: F401
from .callbacks import create_copy_from_base_model # noqa: F401
+from .corpus import Corpus, JsonlCorpus, PlainTextCorpus # noqa: F401
+from .example import Example, validate_examples, validate_get_examples # noqa: F401
+from .gold_io import docs_to_json, read_json_file # noqa: F401
+from .iob_utils import ( # noqa: F401
+ biluo_tags_to_offsets,
+ biluo_tags_to_spans,
+ biluo_to_iob,
+ iob_to_biluo,
+ offsets_to_biluo_tags,
+ remove_bilu_prefix,
+ split_bilu_label,
+ tags_to_entities,
+)
+from .loggers import console_logger # noqa: F401
diff --git a/spacy/training/align.pyx b/spacy/training/align.pyx
index 0ef1fd35d..8bd43b048 100644
--- a/spacy/training/align.pyx
+++ b/spacy/training/align.pyx
@@ -1,6 +1,6 @@
-from typing import List, Tuple
-from itertools import chain
import re
+from itertools import chain
+from typing import List, Tuple
from ..errors import Errors
diff --git a/spacy/training/alignment.py b/spacy/training/alignment.py
index 6d24714bf..3f615d10b 100644
--- a/spacy/training/alignment.py
+++ b/spacy/training/alignment.py
@@ -1,5 +1,5 @@
-from typing import List
from dataclasses import dataclass
+from typing import List
from .align import get_alignments
from .alignment_array import AlignmentArray
diff --git a/spacy/training/alignment_array.pxd b/spacy/training/alignment_array.pxd
index 056f5bef3..bb28f3ac6 100644
--- a/spacy/training/alignment_array.pxd
+++ b/spacy/training/alignment_array.pxd
@@ -1,5 +1,6 @@
-from libcpp.vector cimport vector
cimport numpy as np
+from libcpp.vector cimport vector
+
cdef class AlignmentArray:
cdef np.ndarray _data
diff --git a/spacy/training/alignment_array.pyx b/spacy/training/alignment_array.pyx
index 01e9d9bf8..b0be1512b 100644
--- a/spacy/training/alignment_array.pyx
+++ b/spacy/training/alignment_array.pyx
@@ -1,6 +1,9 @@
from typing import List
-from ..errors import Errors
+
import numpy
+
+from ..errors import Errors
+
from libc.stdint cimport int32_t
diff --git a/spacy/training/augment.py b/spacy/training/augment.py
index 2fe8c24fb..1ebd3313c 100644
--- a/spacy/training/augment.py
+++ b/spacy/training/augment.py
@@ -1,12 +1,11 @@
-from typing import Callable, Iterator, Dict, List, Tuple, TYPE_CHECKING
-from typing import Optional
-import random
import itertools
+import random
from functools import partial
+from typing import TYPE_CHECKING, Callable, Dict, Iterator, List, Optional, Tuple
from ..util import registry
from .example import Example
-from .iob_utils import split_bilu_label, _doc_to_biluo_tags_with_partial
+from .iob_utils import _doc_to_biluo_tags_with_partial, split_bilu_label
if TYPE_CHECKING:
from ..language import Language # noqa: F401
diff --git a/spacy/training/batchers.py b/spacy/training/batchers.py
index f0b6c3123..050c3351b 100644
--- a/spacy/training/batchers.py
+++ b/spacy/training/batchers.py
@@ -1,10 +1,18 @@
-from typing import Union, Iterable, Sequence, TypeVar, List, Callable, Iterator
-from typing import Optional, Any
-from functools import partial
import itertools
+from functools import partial
+from typing import (
+ Any,
+ Callable,
+ Iterable,
+ Iterator,
+ List,
+ Optional,
+ Sequence,
+ TypeVar,
+ Union,
+)
-from ..util import registry, minibatch
-
+from ..util import minibatch, registry
Sizing = Union[Sequence[int], int]
ItemT = TypeVar("ItemT")
diff --git a/spacy/training/callbacks.py b/spacy/training/callbacks.py
index 7e2494f5b..21c3d56a1 100644
--- a/spacy/training/callbacks.py
+++ b/spacy/training/callbacks.py
@@ -1,14 +1,17 @@
-from typing import Callable, Optional
+from typing import TYPE_CHECKING, Callable, Optional
+
from ..errors import Errors
-from ..language import Language
-from ..util import load_model, registry, logger
+from ..util import load_model, logger, registry
+
+if TYPE_CHECKING:
+ from ..language import Language
@registry.callbacks("spacy.copy_from_base_model.v1")
def create_copy_from_base_model(
tokenizer: Optional[str] = None,
vocab: Optional[str] = None,
-) -> Callable[[Language], Language]:
+) -> Callable[["Language"], "Language"]:
def copy_from_base_model(nlp):
if tokenizer:
logger.info("Copying tokenizer from: %s", tokenizer)
diff --git a/spacy/training/converters/__init__.py b/spacy/training/converters/__init__.py
index e91b6aaa6..8173da64c 100644
--- a/spacy/training/converters/__init__.py
+++ b/spacy/training/converters/__init__.py
@@ -1,4 +1,4 @@
-from .iob_to_docs import iob_to_docs # noqa: F401
from .conll_ner_to_docs import conll_ner_to_docs # noqa: F401
-from .json_to_docs import json_to_docs # noqa: F401
from .conllu_to_docs import conllu_to_docs # noqa: F401
+from .iob_to_docs import iob_to_docs # noqa: F401
+from .json_to_docs import json_to_docs # noqa: F401
diff --git a/spacy/training/converters/conll_ner_to_docs.py b/spacy/training/converters/conll_ner_to_docs.py
index 28b21c5f0..b19d1791b 100644
--- a/spacy/training/converters/conll_ner_to_docs.py
+++ b/spacy/training/converters/conll_ner_to_docs.py
@@ -1,10 +1,10 @@
from wasabi import Printer
-from .. import tags_to_entities
-from ...training import iob_to_biluo
-from ...tokens import Doc, Span
from ...errors import Errors
-from ...util import load_model, get_lang_class
+from ...tokens import Doc, Span
+from ...training import iob_to_biluo
+from ...util import get_lang_class, load_model
+from .. import tags_to_entities
def conll_ner_to_docs(
diff --git a/spacy/training/converters/conllu_to_docs.py b/spacy/training/converters/conllu_to_docs.py
index 7052504cc..bda5c88c3 100644
--- a/spacy/training/converters/conllu_to_docs.py
+++ b/spacy/training/converters/conllu_to_docs.py
@@ -1,11 +1,12 @@
import re
-from .conll_ner_to_docs import n_sents_info
-from ...training import iob_to_biluo, biluo_tags_to_spans
-from ...tokens import Doc, Token, Span
-from ...vocab import Vocab
from wasabi import Printer
+from ...tokens import Doc, Span, Token
+from ...training import biluo_tags_to_spans, iob_to_biluo
+from ...vocab import Vocab
+from .conll_ner_to_docs import n_sents_info
+
def conllu_to_docs(
input_data,
diff --git a/spacy/training/converters/iob_to_docs.py b/spacy/training/converters/iob_to_docs.py
index 60fb7df61..45bb65692 100644
--- a/spacy/training/converters/iob_to_docs.py
+++ b/spacy/training/converters/iob_to_docs.py
@@ -1,11 +1,11 @@
from wasabi import Printer
-from .conll_ner_to_docs import n_sents_info
-from ...vocab import Vocab
-from ...training import iob_to_biluo, tags_to_entities
-from ...tokens import Doc, Span
from ...errors import Errors
+from ...tokens import Doc, Span
+from ...training import iob_to_biluo, tags_to_entities
from ...util import minibatch
+from ...vocab import Vocab
+from .conll_ner_to_docs import n_sents_info
def iob_to_docs(input_data, n_sents=10, no_print=False, *args, **kwargs):
diff --git a/spacy/training/converters/json_to_docs.py b/spacy/training/converters/json_to_docs.py
index 4123839f2..b4beedd2f 100644
--- a/spacy/training/converters/json_to_docs.py
+++ b/spacy/training/converters/json_to_docs.py
@@ -1,9 +1,13 @@
import srsly
-from ..gold_io import json_iterate, json_to_annotations
-from ..example import annotations_to_doc
-from ..example import _fix_legacy_dict_data, _parse_example_dict_data
-from ...util import load_model
+
from ...lang.xx import MultiLanguage
+from ...util import load_model
+from ..example import (
+ _fix_legacy_dict_data,
+ _parse_example_dict_data,
+ annotations_to_doc,
+)
+from ..gold_io import json_iterate, json_to_annotations
def json_to_docs(input_data, model=None, **kwargs):
diff --git a/spacy/training/corpus.py b/spacy/training/corpus.py
index 086ad831c..6037c15e3 100644
--- a/spacy/training/corpus.py
+++ b/spacy/training/corpus.py
@@ -1,16 +1,16 @@
-import warnings
-from typing import Union, List, Iterable, Iterator, TYPE_CHECKING, Callable
-from typing import Optional
-from pathlib import Path
import random
+import warnings
+from pathlib import Path
+from typing import TYPE_CHECKING, Callable, Iterable, Iterator, List, Optional, Union
+
import srsly
from .. import util
+from ..errors import Errors, Warnings
+from ..tokens import Doc, DocBin
+from ..vocab import Vocab
from .augment import dont_augment
from .example import Example
-from ..errors import Warnings, Errors
-from ..tokens import DocBin, Doc
-from ..vocab import Vocab
if TYPE_CHECKING:
# This lets us add type hints for mypy etc. without causing circular imports
diff --git a/spacy/training/example.pxd b/spacy/training/example.pxd
index 49e239757..a7c71fa88 100644
--- a/spacy/training/example.pxd
+++ b/spacy/training/example.pxd
@@ -1,6 +1,7 @@
-from ..tokens.doc cimport Doc
from libc.stdint cimport uint64_t
+from ..tokens.doc cimport Doc
+
cdef class Example:
cdef readonly Doc x
diff --git a/spacy/training/example.pyx b/spacy/training/example.pyx
index 95b0f0de9..abdac23ea 100644
--- a/spacy/training/example.pyx
+++ b/spacy/training/example.pyx
@@ -1,19 +1,29 @@
-from collections.abc import Iterable as IterableInstance
import warnings
+from collections.abc import Iterable as IterableInstance
+
import numpy
+
from murmurhash.mrmr cimport hash64
from ..tokens.doc cimport Doc
from ..tokens.span cimport Span
-from ..tokens.span import Span
+
from ..attrs import IDS
-from .alignment import Alignment
-from .iob_utils import biluo_to_iob, offsets_to_biluo_tags, doc_to_biluo_tags
-from .iob_utils import biluo_tags_to_spans, remove_bilu_prefix
from ..errors import Errors, Warnings
from ..pipeline._parser_internals import nonproj
+from ..tokens.span import Span
+from .alignment import Alignment
+from .iob_utils import (
+ biluo_tags_to_spans,
+ biluo_to_iob,
+ doc_to_biluo_tags,
+ offsets_to_biluo_tags,
+ remove_bilu_prefix,
+)
+
from ..tokens.token cimport MISSING_DEP
-from ..util import logger, to_ternary_int, all_equal
+
+from ..util import all_equal, logger, to_ternary_int
cpdef Doc annotations_to_doc(vocab, tok_annot, doc_annot):
diff --git a/spacy/training/gold_io.pyx b/spacy/training/gold_io.pyx
index 69654e2c7..1e7b3681d 100644
--- a/spacy/training/gold_io.pyx
+++ b/spacy/training/gold_io.pyx
@@ -1,10 +1,12 @@
+import json
import warnings
+
import srsly
+
from .. import util
from ..errors import Warnings
from ..tokens import Doc
from .iob_utils import offsets_to_biluo_tags, tags_to_entities
-import json
def docs_to_json(docs, doc_id=0, ner_missing_tag="O"):
diff --git a/spacy/training/initialize.py b/spacy/training/initialize.py
index 9cf759c55..39dc06b9e 100644
--- a/spacy/training/initialize.py
+++ b/spacy/training/initialize.py
@@ -1,24 +1,33 @@
-from typing import Union, Dict, Optional, Any, IO, TYPE_CHECKING
-from thinc.api import Config, fix_random_seed, set_gpu_allocator
-from thinc.api import ConfigValidationError
-from pathlib import Path
-import srsly
-import numpy
-import tarfile
import gzip
-import zipfile
-import tqdm
-from itertools import islice
+import tarfile
import warnings
+import zipfile
+from itertools import islice
+from pathlib import Path
+from typing import IO, TYPE_CHECKING, Any, Dict, Optional, Union
+
+import numpy
+import srsly
+import tqdm
+from thinc.api import Config, ConfigValidationError, fix_random_seed, set_gpu_allocator
-from .pretrain import get_tok2vec_ref
-from ..lookups import Lookups
-from ..vectors import Vectors, Mode as VectorsMode
from ..errors import Errors, Warnings
+from ..lookups import Lookups
from ..schemas import ConfigSchemaTraining
-from ..util import registry, load_model_from_config, resolve_dot_names, logger
-from ..util import load_model, ensure_path, get_sourced_components
-from ..util import OOV_RANK, DEFAULT_OOV_PROB
+from ..util import (
+ DEFAULT_OOV_PROB,
+ OOV_RANK,
+ ensure_path,
+ get_sourced_components,
+ load_model,
+ load_model_from_config,
+ logger,
+ registry,
+ resolve_dot_names,
+)
+from ..vectors import Mode as VectorsMode
+from ..vectors import Vectors
+from .pretrain import get_tok2vec_ref
if TYPE_CHECKING:
from ..language import Language # noqa: F401
diff --git a/spacy/training/iob_utils.py b/spacy/training/iob_utils.py
index 0d4d246b0..64d02a1e2 100644
--- a/spacy/training/iob_utils.py
+++ b/spacy/training/iob_utils.py
@@ -1,8 +1,8 @@
-from typing import List, Dict, Tuple, Iterable, Union, Iterator, cast
import warnings
+from typing import Dict, Iterable, Iterator, List, Tuple, Union, cast
from ..errors import Errors, Warnings
-from ..tokens import Span, Doc
+from ..tokens import Doc, Span
def iob_to_biluo(tags: Iterable[str]) -> List[str]:
diff --git a/spacy/training/loggers.py b/spacy/training/loggers.py
index 7de31822e..1ec0b7b25 100644
--- a/spacy/training/loggers.py
+++ b/spacy/training/loggers.py
@@ -1,13 +1,14 @@
-from typing import TYPE_CHECKING, Dict, Any, Tuple, Callable, List, Optional, IO, Union
-from wasabi import Printer
-from pathlib import Path
-import tqdm
import sys
-import srsly
+from pathlib import Path
+from typing import IO, TYPE_CHECKING, Any, Callable, Dict, List, Optional, Tuple, Union
+
+import srsly
+import tqdm
+from wasabi import Printer
-from ..util import registry
-from ..errors import Errors
from .. import util
+from ..errors import Errors
+from ..util import registry
if TYPE_CHECKING:
from ..language import Language # noqa: F401
diff --git a/spacy/training/loop.py b/spacy/training/loop.py
index eca40e3d9..56df53957 100644
--- a/spacy/training/loop.py
+++ b/spacy/training/loop.py
@@ -1,17 +1,28 @@
-from typing import List, Callable, Tuple, Dict, Iterable, Union, Any, IO
-from typing import Optional, TYPE_CHECKING
+import random
+import shutil
+import sys
from pathlib import Path
from timeit import default_timer as timer
-from thinc.api import Optimizer, Config, constant, fix_random_seed, set_gpu_allocator
-from wasabi import Printer
-import random
-import sys
-import shutil
+from typing import (
+ IO,
+ TYPE_CHECKING,
+ Any,
+ Callable,
+ Dict,
+ Iterable,
+ List,
+ Optional,
+ Tuple,
+ Union,
+)
+
+from thinc.api import Config, Optimizer, constant, fix_random_seed, set_gpu_allocator
+from wasabi import Printer
-from .example import Example
-from ..schemas import ConfigSchemaTraining
from ..errors import Errors
-from ..util import resolve_dot_names, registry, logger
+from ..schemas import ConfigSchemaTraining
+from ..util import logger, registry, resolve_dot_names
+from .example import Example
if TYPE_CHECKING:
from ..language import Language # noqa: F401
diff --git a/spacy/training/pretrain.py b/spacy/training/pretrain.py
index ebbc5d837..14a813a09 100644
--- a/spacy/training/pretrain.py
+++ b/spacy/training/pretrain.py
@@ -1,20 +1,26 @@
-from typing import Optional, Callable, Iterable, Union, List
-from thinc.api import Config, fix_random_seed, set_gpu_allocator, Model, Optimizer
-from thinc.api import set_dropout_rate
-from pathlib import Path
-from collections import Counter
-import srsly
-import time
import re
+import time
+from collections import Counter
+from pathlib import Path
+from typing import Callable, Iterable, List, Optional, Union
+import srsly
+from thinc.api import (
+ Config,
+ Model,
+ Optimizer,
+ fix_random_seed,
+ set_dropout_rate,
+ set_gpu_allocator,
+)
from thinc.config import ConfigValidationError
from wasabi import Printer
-from .example import Example
from ..errors import Errors
-from ..tokens import Doc
from ..schemas import ConfigSchemaPretrain
-from ..util import registry, load_model_from_config, dot_to_object
+from ..tokens import Doc
+from ..util import dot_to_object, load_model_from_config, registry
+from .example import Example
def pretrain(
diff --git a/spacy/ty.py b/spacy/ty.py
index 7e79a3d4d..f389456c0 100644
--- a/spacy/ty.py
+++ b/spacy/ty.py
@@ -1,13 +1,21 @@
-from typing import TYPE_CHECKING
-from typing import Optional, Any, Iterable, Dict, Callable, Sequence, List
+from typing import (
+ TYPE_CHECKING,
+ Any,
+ Callable,
+ Dict,
+ Iterable,
+ List,
+ Optional,
+ Sequence,
+)
+
+from thinc.api import Model, Optimizer
from .compat import Protocol, runtime_checkable
-from thinc.api import Optimizer, Model
-
if TYPE_CHECKING:
- from .training import Example
from .language import Language
+ from .training import Example
@runtime_checkable
diff --git a/spacy/typedefs.pxd b/spacy/typedefs.pxd
index 8cdc70e42..72d4d99ac 100644
--- a/spacy/typedefs.pxd
+++ b/spacy/typedefs.pxd
@@ -1,6 +1,4 @@
-from libc.stdint cimport uint16_t, uint32_t, uint64_t, uintptr_t, int32_t
-from libc.stdint cimport uint8_t
-
+from libc.stdint cimport int32_t, uint8_t, uint16_t, uint32_t, uint64_t, uintptr_t
ctypedef float weight_t
ctypedef uint64_t hash_t
diff --git a/spacy/util.py b/spacy/util.py
index 8cc89217d..ec6ab47c0 100644
--- a/spacy/util.py
+++ b/spacy/util.py
@@ -1,38 +1,62 @@
-from typing import List, Mapping, NoReturn, Union, Dict, Any, Set, cast
-from typing import Optional, Iterable, Callable, Tuple, Type
-from typing import Iterator, Pattern, Generator, TYPE_CHECKING
-from types import ModuleType
-import os
+import functools
import importlib
import importlib.util
-import re
-from pathlib import Path
-import thinc
-from thinc.api import NumpyOps, get_current_ops, Adam, Config, Optimizer
-from thinc.api import ConfigValidationError, Model
-import functools
-import itertools
-import numpy
-import srsly
-import catalogue
-from catalogue import RegistryError, Registry
-import langcodes
-import sys
-import warnings
-from packaging.specifiers import SpecifierSet, InvalidSpecifier
-from packaging.version import Version, InvalidVersion
-from packaging.requirements import Requirement
-import subprocess
-from contextlib import contextmanager
-from collections import defaultdict
-import tempfile
-import shutil
-import shlex
import inspect
-import pkgutil
+import itertools
import logging
+import os
+import pkgutil
+import re
+import shlex
+import shutil
import socket
import stat
+import subprocess
+import sys
+import tempfile
+import warnings
+from collections import defaultdict
+from contextlib import contextmanager
+from pathlib import Path
+from types import ModuleType
+from typing import (
+ TYPE_CHECKING,
+ Any,
+ Callable,
+ Dict,
+ Generator,
+ Iterable,
+ Iterator,
+ List,
+ Mapping,
+ NoReturn,
+ Optional,
+ Pattern,
+ Set,
+ Tuple,
+ Type,
+ Union,
+ cast,
+)
+
+import catalogue
+import langcodes
+import numpy
+import srsly
+import thinc
+from catalogue import Registry, RegistryError
+from packaging.requirements import Requirement
+from packaging.specifiers import InvalidSpecifier, SpecifierSet
+from packaging.version import InvalidVersion, Version
+from thinc.api import (
+ Adam,
+ Config,
+ ConfigValidationError,
+ Model,
+ NumpyOps,
+ Optimizer,
+ get_current_ops,
+)
try:
import cupy.random
@@ -43,13 +67,12 @@ except ImportError:
# and have since moved to Thinc. We're importing them here so people's code
# doesn't break, but they should always be imported from Thinc from now on,
# not from spacy.util.
-from thinc.api import fix_random_seed, compounding, decaying # noqa: F401
+from thinc.api import compounding, decaying, fix_random_seed # noqa: F401
-
-from .symbols import ORTH
-from .compat import cupy, CudaStream, is_windows, importlib_metadata
-from .errors import Errors, Warnings, OLD_MODEL_SHORTCUTS
from . import about
+from .compat import CudaStream, cupy, importlib_metadata, is_windows
+from .errors import OLD_MODEL_SHORTCUTS, Errors, Warnings
+from .symbols import ORTH
if TYPE_CHECKING:
# This lets us add type hints for mypy etc. without causing circular imports
diff --git a/spacy/vectors.pyx b/spacy/vectors.pyx
index be0f6db09..bc654252a 100644
--- a/spacy/vectors.pyx
+++ b/spacy/vectors.pyx
@@ -1,14 +1,15 @@
cimport numpy as np
-from libc.stdint cimport uint32_t, uint64_t
from cython.operator cimport dereference as deref
+from libc.stdint cimport uint32_t, uint64_t
from libcpp.set cimport set as cppset
from murmurhash.mrmr cimport hash128_x64
import functools
-import numpy
-from typing import cast
import warnings
from enum import Enum
+from typing import cast
+
+import numpy
import srsly
from thinc.api import Ops, get_array_module, get_current_ops
from thinc.backends import get_array_ops
@@ -16,9 +17,9 @@ from thinc.types import Floats2d
from .strings cimport StringStore
-from .strings import get_string_id
-from .errors import Errors, Warnings
from . import util
+from .errors import Errors, Warnings
+from .strings import get_string_id
def unpickle_vectors(bytes_data):
diff --git a/spacy/vocab.pxd b/spacy/vocab.pxd
index 9c951b2b7..3b0173e3e 100644
--- a/spacy/vocab.pxd
+++ b/spacy/vocab.pxd
@@ -1,12 +1,12 @@
-from libcpp.vector cimport vector
-from preshed.maps cimport PreshMap
from cymem.cymem cimport Pool
+from libcpp.vector cimport vector
from murmurhash.mrmr cimport hash64
+from preshed.maps cimport PreshMap
+from .morphology cimport Morphology
+from .strings cimport StringStore
from .structs cimport LexemeC, TokenC
from .typedefs cimport attr_t, hash_t
-from .strings cimport StringStore
-from .morphology cimport Morphology
cdef LexemeC EMPTY_LEXEME
diff --git a/spacy/vocab.pyi b/spacy/vocab.pyi
index 4cc359c47..b7ff20348 100644
--- a/spacy/vocab.pyi
+++ b/spacy/vocab.pyi
@@ -1,14 +1,15 @@
-from typing import Callable, Iterator, Optional, Union, List, Dict
-from typing import Any, Iterable
+from pathlib import Path
+from typing import Any, Callable, Dict, Iterable, Iterator, List, Optional, Union
+
from thinc.types import Floats1d, FloatsXd
+
from . import Language
-from .strings import StringStore
from .lexeme import Lexeme
from .lookups import Lookups
from .morphology import Morphology
+from .strings import StringStore
from .tokens import Doc, Span
from .vectors import Vectors
-from pathlib import Path
def create_vocab(
lang: Optional[str], defaults: Any, vectors_name: Optional[str] = ...
diff --git a/spacy/vocab.pyx b/spacy/vocab.pyx
index 27f8e5f98..d47122d08 100644
--- a/spacy/vocab.pyx
+++ b/spacy/vocab.pyx
@@ -1,26 +1,27 @@
# cython: profile=True
from libc.string cimport memcpy
+import functools
+
import numpy
import srsly
from thinc.api import get_array_module, get_current_ops
-import functools
-from .lexeme cimport EMPTY_LEXEME, OOV_RANK
-from .lexeme cimport Lexeme
-from .typedefs cimport attr_t
-from .tokens.token cimport Token
from .attrs cimport LANG, ORTH
+from .lexeme cimport EMPTY_LEXEME, OOV_RANK, Lexeme
+from .tokens.token cimport Token
+from .typedefs cimport attr_t
+from . import util
+from .attrs import IS_STOP, NORM, intify_attrs
from .compat import copy_reg
from .errors import Errors
-from .attrs import intify_attrs, NORM, IS_STOP
-from .vectors import Vectors, Mode as VectorsMode
-from .util import registry
-from .lookups import Lookups
-from . import util
+from .lang.lex_attrs import LEX_ATTRS, get_lang, is_stop
from .lang.norm_exceptions import BASE_NORMS
-from .lang.lex_attrs import LEX_ATTRS, is_stop, get_lang
+from .lookups import Lookups
+from .util import registry
+from .vectors import Mode as VectorsMode
+from .vectors import Vectors
def create_vocab(lang, defaults, vectors_name=None):
diff --git a/website/meta/universe.json b/website/meta/universe.json
index 5d4eb0f14..39884c46f 100644
--- a/website/meta/universe.json
+++ b/website/meta/universe.json
@@ -114,26 +114,30 @@
"id": "grecy",
"title": "greCy",
"slogan": "Ancient Greek pipelines for spaCy",
- "description": "greCy offers state-of-the-art pipelines for ancient Greek NLP. The repository makes language models available in various sizes, some of them containing floret word vectors and a BERT transformer layer.",
+ "description": "greCy offers state-of-the-art pipelines for ancient Greek NLP. It installs language models available in various sizes, some of them containing either word vectors or the aristoBERTo transformer.",
"github": "jmyerston/greCy",
+ "pip": "grecy",
"code_example": [
- "import spacy",
- "#After installing the grc_ud_proiel_trf wheel package from the greCy repository",
+ "python -m grecy install grc_proiel_trf",
"",
- "nlp = spacy.load('grc_ud_proiel_trf')",
- "doc = nlp('δοκῶ μοι περὶ ὧν πυνθάνεσθε οὐκ ἀμελέτητος εἶναι.')",
+ "#After installing grc_proiel_trf or any other model",
+ "import spacy",
+ "",
+ "nlp = spacy.load('grc_proiel_trf')",
+ "doc = nlp('δοκῶ μοι περὶ ὧν πυνθάνεσθε οὐκ ἀμελέτητος εἶναι')",
"",
"for token in doc:",
- " print(token.text, token.norm_, token.lemma_, token.pos_, token.tag_)"
+ " print(f'{token.text}, lemma: {token.lemma_}, pos: {token.pos_}, dep: {token.dep_}')"
],
"code_language": "python",
+ "thumb": "https://jacobo-syntax.hf.space/media/03a5317fa660c142e41dd2870b4273ce4e668e6fcdee0a276891f563.png",
"author": "Jacobo Myerston",
"author_links": {
"twitter": "@jcbmyrstn",
"github": "jmyerston",
"website": "https://huggingface.co/spaces/Jacobo/syntax"
},
- "category": ["pipeline", "research"],
+ "category": ["pipeline", "research","models"],
"tags": ["ancient Greek"]
},
{
@@ -2735,10 +2739,9 @@
"description": "Have you ever struggled with needing a [spaCy TextCategorizer](https://spacy.io/api/textcategorizer) but didn't have the time to train one from scratch? Classy Classification is the way to go! For few-shot classification using [sentence-transformers](https://github.com/UKPLab/sentence-transformers) or [spaCy models](https://spacy.io/usage/models), provide a dictionary with labels and examples, or just provide a list of labels for zero shot-classification with [Huggingface zero-shot classifiers](https://huggingface.co/models?pipeline_tag=zero-shot-classification).",
"github": "davidberenstein1957/classy-classification",
"pip": "classy-classification",
- "thumb": "https://raw.githubusercontent.com/Pandora-Intelligence/classy-classification/master/logo.png",
+ "thumb": "https://raw.githubusercontent.com/davidberenstein1957/classy-classification/master/logo.png",
"code_example": [
"import spacy",
- "import classy_classification",
"",
"data = {",
" \"furniture\": [\"This text is about chairs.\",",
@@ -2783,14 +2786,13 @@
"title": "Concise Concepts",
"slogan": "Concise Concepts uses few-shot NER based on word embedding similarity to get you going with easy!",
"description": "When wanting to apply NER to concise concepts, it is really easy to come up with examples, but it takes some effort to train an entire pipeline. Concise Concepts uses few-shot NER based on word embedding similarity to get you going with easy!",
- "github": "pandora-intelligence/concise-concepts",
+ "github": "davidberenstein1957/concise-concepts",
"pip": "concise-concepts",
- "thumb": "https://raw.githubusercontent.com/Pandora-Intelligence/concise-concepts/master/img/logo.png",
- "image": "https://raw.githubusercontent.com/Pandora-Intelligence/concise-concepts/master/img/example.png",
+ "thumb": "https://raw.githubusercontent.com/davidberenstein1957/concise-concepts/master/img/logo.png",
+ "image": "https://raw.githubusercontent.com/davidberenstein1957/concise-concepts/master/img/example.png",
"code_example": [
"import spacy",
"from spacy import displacy",
- "import concise_concepts",
"",
"data = {",
" \"fruit\": [\"apple\", \"pear\", \"orange\"],",
@@ -2830,13 +2832,12 @@
"title": "Crosslingual Coreference",
"slogan": "One multi-lingual coreference model to rule them all!",
"description": "Coreference is amazing but the data required for training a model is very scarce. In our case, the available training for non-English languages also data proved to be poorly annotated. Crosslingual Coreference therefore uses the assumption a trained model with English data and cross-lingual embeddings should work for other languages with similar sentence structure. Verified to work quite well for at least (EN, NL, DK, FR, DE).",
- "github": "pandora-intelligence/crosslingual-coreference",
+ "github": "davidberenstein1957/crosslingual-coreference",
"pip": "crosslingual-coreference",
- "thumb": "https://raw.githubusercontent.com/Pandora-Intelligence/crosslingual-coreference/master/img/logo.png",
- "image": "https://raw.githubusercontent.com/Pandora-Intelligence/crosslingual-coreference/master/img/example_total.png",
+ "thumb": "https://raw.githubusercontent.com/davidberenstein1957/crosslingual-coreference/master/img/logo.png",
+ "image": "https://raw.githubusercontent.com/davidberenstein1957/crosslingual-coreference/master/img/example_total.png",
"code_example": [
"import spacy",
- "import crosslingual_coreference",
"",
"text = \"\"\"",
" Do not forget about Momofuku Ando!",
@@ -2929,6 +2930,54 @@
"tags": ["ner", "few-shot", "augmentation", "datasets", "training"],
"spacy_version": 3
},
+ {
+ "id": "spacysetfit",
+ "title": "spaCy-SetFit",
+ "slogan": "An an easy and intuitive approach to use SetFit in combination with spaCy.",
+ "description": "spaCy-SetFit is a Python library that extends spaCy's text categorization capabilities by incorporating SetFit for few-shot classification. It allows you to train a text categorizer using a intuitive dictionary. \n\nThe library integrates with spaCy's pipeline architecture, enabling easy integration and configuration of the text categorizer component. You can provide a training dataset containing inlier and outlier examples, and spaCy-SetFit will use the paraphrase-MiniLM-L3-v2 model for training the text categorizer with SetFit. Once trained, you can use the categorizer to classify new text and obtain category probabilities.",
+ "github": "davidberenstein1957/spacy-setfit",
+ "pip": "spacy-setfit",
+ "thumb": "https://raw.githubusercontent.com/davidberenstein1957/spacy-setfit/main/logo.png",
+ "code_example": [
+ "import spacy",
+ "",
+ "# Create some example data",
+ "train_dataset = {",
+ " \"inlier\": [",
+ " \"Text about furniture\",",
+ " \"Couches, benches and televisions.\",",
+ " \"I really need to get a new sofa.\"",
+ " ],",
+ " \"outlier\": [",
+ " \"Text about kitchen equipment\",",
+ " \"This text is about politics\",",
+ " \"Comments about AI and stuff.\"",
+ " ]",
+ "}",
+ "",
+ "# Load the spaCy language model:",
+ "nlp = spacy.load(\"en_core_web_sm\")",
+ "",
+ "# Add the \"text_categorizer\" pipeline component to the spaCy model, and configure it with SetFit parameters:",
+ "nlp.add_pipe(\"text_categorizer\", config={",
+ " \"pretrained_model_name_or_path\": \"paraphrase-MiniLM-L3-v2\",",
+ " \"setfit_trainer_args\": {",
+ " \"train_dataset\": train_dataset",
+ " }",
+ "})",
+ "doc = nlp(\"I really need to get a new sofa.\")",
+ "doc.cats",
+ "# {'inlier': 0.902350975129, 'outlier': 0.097649024871}"
+ ],
+ "author": "David Berenstein",
+ "author_links": {
+ "github": "davidberenstein1957",
+ "website": "https://www.linkedin.com/in/david-berenstein-1bab11105/"
+ },
+ "category": ["pipeline"],
+ "tags": ["few-shot", "SetFit", "training"],
+ "spacy_version": 3
+ },
{
"id": "blackstone",
"title": "Blackstone",
@@ -4339,6 +4388,37 @@
},
"category": ["apis", "standalone"],
"tags": ["apis", "deployment"]
+ },
+ {
+ "id": "span_marker",
+ "title": "SpanMarker",
+ "slogan": "Effortless state-of-the-art NER in spaCy",
+ "description": "The SpanMarker integration with spaCy allows you to seamlessly replace the default spaCy `\"ner\"` pipeline component with any [SpanMarker model available on the Hugging Face Hub](https://huggingface.co/models?library=span-marker). Through this, you can take advantage of the advanced Named Entity Recognition capabilities of SpanMarker within the familiar and powerful spaCy framework.\n\nBy default, the `span_marker` pipeline component uses a [SpanMarker model using RoBERTa-large trained on OntoNotes v5.0](https://huggingface.co/tomaarsen/span-marker-roberta-large-ontonotes5). This model reaches a competitive 91.54 F1, notably higher than the [85.5 and 89.8 F1](https://spacy.io/usage/facts-figures#section-benchmarks) from `en_core_web_lg` and `en_core_web_trf`, respectively. A short head-to-head between this SpanMarker model and the `trf` spaCy model has been posted [here](https://github.com/tomaarsen/SpanMarkerNER/pull/12).\n\nAdditionally, see [here](https://tomaarsen.github.io/SpanMarkerNER/notebooks/spacy_integration.html) for documentation on using SpanMarker with spaCy.",
+ "github": "tomaarsen/SpanMarkerNER",
+ "pip": "span_marker",
+ "code_example": [
+ "import spacy",
+ "",
+ "nlp = spacy.load(\"en_core_web_sm\", disable=[\"ner\"])",
+ "nlp.add_pipe(\"span_marker\", config={\"model\": \"tomaarsen/span-marker-roberta-large-ontonotes5\"})",
+ "",
+ "text = \"\"\"Cleopatra VII, also known as Cleopatra the Great, was the last active ruler of the \\",
+ "Ptolemaic Kingdom of Egypt. She was born in 69 BCE and ruled Egypt from 51 BCE until her \\",
+ "death in 30 BCE.\"\"\"",
+ "doc = nlp(text)",
+ "print([(entity, entity.label_) for entity in doc.ents])",
+ "# [(Cleopatra VII, \"PERSON\"), (Cleopatra the Great, \"PERSON\"), (the Ptolemaic Kingdom of Egypt, \"GPE\"),",
+ "# (69 BCE, \"DATE\"), (Egypt, \"GPE\"), (51 BCE, \"DATE\"), (30 BCE, \"DATE\")]"
+ ],
+ "code_language": "python",
+ "url": "https://tomaarsen.github.io/SpanMarkerNER",
+ "author": "Tom Aarsen",
+ "author_links": {
+ "github": "tomaarsen",
+ "website": "https://www.linkedin.com/in/tomaarsen"
+ },
+ "category": ["pipeline", "standalone", "scientific"],
+ "tags": ["ner"]
}
],
diff --git a/website/src/components/quickstart.js b/website/src/components/quickstart.js
index 160e5a778..2b5bfb5ba 100644
--- a/website/src/components/quickstart.js
+++ b/website/src/components/quickstart.js
@@ -215,15 +215,17 @@ const Quickstart = ({
}
)}
-
- {Children.toArray(children).flat().filter(isRelevant)}
+
+
+ {Children.toArray(children).flat().filter(isRelevant)}
+