diff --git a/pyproject.toml b/pyproject.toml
index 9cd96ac2d..dcb5cf10d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -9,3 +9,6 @@ requires = [
"numpy>=1.15.0",
]
build-backend = "setuptools.build_meta"
+
+[tool.isort]
+profile = "black"
diff --git a/requirements.txt b/requirements.txt
index b979929c5..a007f495e 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -38,3 +38,4 @@ types-setuptools>=57.0.0
types-requests
types-setuptools>=57.0.0
black==22.3.0
+isort>=5.0,<6.0
diff --git a/spacy/__init__.py b/spacy/__init__.py
index c3568bc5c..1a18ad0d5 100644
--- a/spacy/__init__.py
+++ b/spacy/__init__.py
@@ -1,6 +1,6 @@
-from typing import Union, Iterable, Dict, Any
-from pathlib import Path
import sys
+from pathlib import Path
+from typing import Any, Dict, Iterable, Union
# set library-specific custom warning handling before doing anything else
from .errors import setup_default_warnings
@@ -8,20 +8,17 @@ from .errors import setup_default_warnings
setup_default_warnings() # noqa: E402
# These are imported as part of the API
-from thinc.api import prefer_gpu, require_gpu, require_cpu # noqa: F401
-from thinc.api import Config
+from thinc.api import Config, prefer_gpu, require_cpu, require_gpu # noqa: F401
from . import pipeline # noqa: F401
-from .cli.info import info # noqa: F401
-from .glossary import explain # noqa: F401
-from .about import __version__ # noqa: F401
-from .util import registry, logger # noqa: F401
-
-from .errors import Errors
-from .language import Language
-from .vocab import Vocab
from . import util
-
+from .about import __version__ # noqa: F401
+from .cli.info import info # noqa: F401
+from .errors import Errors
+from .glossary import explain # noqa: F401
+from .language import Language
+from .util import logger, registry # noqa: F401
+from .vocab import Vocab
if sys.maxunicode == 65535:
raise SystemError(Errors.E130)
diff --git a/spacy/attrs.pxd b/spacy/attrs.pxd
index 33d5372de..6dc9ecaee 100644
--- a/spacy/attrs.pxd
+++ b/spacy/attrs.pxd
@@ -1,6 +1,7 @@
# Reserve 64 values for flag features
from . cimport symbols
+
cdef enum attr_id_t:
NULL_ATTR
IS_ALPHA
diff --git a/spacy/cli/__init__.py b/spacy/cli/__init__.py
index 868526b42..549a27616 100644
--- a/spacy/cli/__init__.py
+++ b/spacy/cli/__init__.py
@@ -1,35 +1,35 @@
from wasabi import msg
from ._util import app, setup_cli # noqa: F401
+from .apply import apply # noqa: F401
+from .assemble import assemble_cli # noqa: F401
# These are the actual functions, NOT the wrapped CLI commands. The CLI commands
# are registered automatically and won't have to be imported here.
from .benchmark_speed import benchmark_speed_cli # noqa: F401
-from .download import download # noqa: F401
-from .info import info # noqa: F401
-from .package import package # noqa: F401
-from .profile import profile # noqa: F401
-from .train import train_cli # noqa: F401
-from .assemble import assemble_cli # noqa: F401
-from .pretrain import pretrain # noqa: F401
-from .debug_data import debug_data # noqa: F401
-from .debug_config import debug_config # noqa: F401
-from .debug_model import debug_model # noqa: F401
-from .debug_diff import debug_diff # noqa: F401
-from .evaluate import evaluate # noqa: F401
-from .apply import apply # noqa: F401
from .convert import convert # noqa: F401
-from .init_pipeline import init_pipeline_cli # noqa: F401
-from .init_config import init_config, fill_config # noqa: F401
-from .validate import validate # noqa: F401
-from .project.clone import project_clone # noqa: F401
-from .project.assets import project_assets # noqa: F401
-from .project.run import project_run # noqa: F401
-from .project.dvc import project_update_dvc # noqa: F401
-from .project.push import project_push # noqa: F401
-from .project.pull import project_pull # noqa: F401
-from .project.document import project_document # noqa: F401
+from .debug_config import debug_config # noqa: F401
+from .debug_data import debug_data # noqa: F401
+from .debug_diff import debug_diff # noqa: F401
+from .debug_model import debug_model # noqa: F401
+from .download import download # noqa: F401
+from .evaluate import evaluate # noqa: F401
from .find_threshold import find_threshold # noqa: F401
+from .info import info # noqa: F401
+from .init_config import fill_config, init_config # noqa: F401
+from .init_pipeline import init_pipeline_cli # noqa: F401
+from .package import package # noqa: F401
+from .pretrain import pretrain # noqa: F401
+from .profile import profile # noqa: F401
+from .project.assets import project_assets # noqa: F401
+from .project.clone import project_clone # noqa: F401
+from .project.document import project_document # noqa: F401
+from .project.dvc import project_update_dvc # noqa: F401
+from .project.pull import project_pull # noqa: F401
+from .project.push import project_push # noqa: F401
+from .project.run import project_run # noqa: F401
+from .train import train_cli # noqa: F401
+from .validate import validate # noqa: F401
@app.command("link", no_args_is_help=True, deprecated=True, hidden=True)
diff --git a/spacy/cli/_util.py b/spacy/cli/_util.py
index f104feff9..eff897316 100644
--- a/spacy/cli/_util.py
+++ b/spacy/cli/_util.py
@@ -1,26 +1,44 @@
-from typing import Dict, Any, Union, List, Optional, Tuple, Iterable
-from typing import TYPE_CHECKING, overload
-import sys
-import shutil
-from pathlib import Path
-from wasabi import msg, Printer
-import srsly
import hashlib
+import os
+import shutil
+import sys
+from configparser import InterpolationError
+from contextlib import contextmanager
+from pathlib import Path
+from typing import (
+ TYPE_CHECKING,
+ Any,
+ Dict,
+ Iterable,
+ List,
+ Optional,
+ Tuple,
+ Union,
+ overload,
+)
+
+import srsly
import typer
from click import NoSuchOption
from click.parser import split_arg_string
-from typer.main import get_command
-from contextlib import contextmanager
from thinc.api import Config, ConfigValidationError, require_gpu
from thinc.util import gpu_is_available
-from configparser import InterpolationError
-import os
+from typer.main import get_command
+from wasabi import Printer, msg
+from .. import about
from ..compat import Literal
from ..schemas import ProjectConfigSchema, validate
-from ..util import import_file, run_command, make_tempdir, registry, logger
-from ..util import is_compatible_version, SimpleFrozenDict, ENV_VARS
-from .. import about
+from ..util import (
+ ENV_VARS,
+ SimpleFrozenDict,
+ import_file,
+ is_compatible_version,
+ logger,
+ make_tempdir,
+ registry,
+ run_command,
+)
if TYPE_CHECKING:
from pathy import FluidPath # noqa: F401
diff --git a/spacy/cli/apply.py b/spacy/cli/apply.py
index f0df4e757..8c4b4c8bf 100644
--- a/spacy/cli/apply.py
+++ b/spacy/cli/apply.py
@@ -1,18 +1,15 @@
-import tqdm
-import srsly
-
from itertools import chain
from pathlib import Path
-from typing import Optional, List, Iterable, cast, Union
+from typing import Iterable, List, Optional, Union, cast
+import srsly
+import tqdm
from wasabi import msg
-from ._util import app, Arg, Opt, setup_gpu, import_code, walk_directory
-
from ..tokens import Doc, DocBin
-from ..vocab import Vocab
from ..util import ensure_path, load_model
-
+from ..vocab import Vocab
+from ._util import Arg, Opt, app, import_code, setup_gpu, walk_directory
path_help = """Location of the documents to predict on.
Can be a single file in .spacy format or a .jsonl file.
diff --git a/spacy/cli/assemble.py b/spacy/cli/assemble.py
index 1cfa290a3..ee2500b27 100644
--- a/spacy/cli/assemble.py
+++ b/spacy/cli/assemble.py
@@ -1,13 +1,20 @@
-from typing import Optional
-from pathlib import Path
-from wasabi import msg
-import typer
import logging
+from pathlib import Path
+from typing import Optional
+
+import typer
+from wasabi import msg
-from ._util import app, Arg, Opt, parse_config_overrides, show_validation_error
-from ._util import import_code
from .. import util
from ..util import get_sourced_components, load_model_from_config
+from ._util import (
+ Arg,
+ Opt,
+ app,
+ import_code,
+ parse_config_overrides,
+ show_validation_error,
+)
@app.command(
diff --git a/spacy/cli/benchmark_speed.py b/spacy/cli/benchmark_speed.py
index 4eb20a5fa..a683d1591 100644
--- a/spacy/cli/benchmark_speed.py
+++ b/spacy/cli/benchmark_speed.py
@@ -1,11 +1,12 @@
-from typing import Iterable, List, Optional
import random
-from itertools import islice
-import numpy
-from pathlib import Path
import time
-from tqdm import tqdm
+from itertools import islice
+from pathlib import Path
+from typing import Iterable, List, Optional
+
+import numpy
import typer
+from tqdm import tqdm
from wasabi import msg
from .. import util
diff --git a/spacy/cli/convert.py b/spacy/cli/convert.py
index 68d454b3e..a66a68133 100644
--- a/spacy/cli/convert.py
+++ b/spacy/cli/convert.py
@@ -1,18 +1,22 @@
-from typing import Callable, Iterable, Mapping, Optional, Any, Union
-from enum import Enum
-from pathlib import Path
-from wasabi import Printer
-import srsly
+import itertools
import re
import sys
-import itertools
+from enum import Enum
+from pathlib import Path
+from typing import Any, Callable, Iterable, Mapping, Optional, Union
+
+import srsly
+from wasabi import Printer
-from ._util import app, Arg, Opt, walk_directory
-from ..training import docs_to_json
from ..tokens import Doc, DocBin
-from ..training.converters import iob_to_docs, conll_ner_to_docs, json_to_docs
-from ..training.converters import conllu_to_docs
-
+from ..training import docs_to_json
+from ..training.converters import (
+ conll_ner_to_docs,
+ conllu_to_docs,
+ iob_to_docs,
+ json_to_docs,
+)
+from ._util import Arg, Opt, app, walk_directory
# Converters are matched by file extension except for ner/iob, which are
# matched by file extension and content. To add a converter, add a new
diff --git a/spacy/cli/debug_config.py b/spacy/cli/debug_config.py
index 409fac4ed..0e5382cd9 100644
--- a/spacy/cli/debug_config.py
+++ b/spacy/cli/debug_config.py
@@ -1,15 +1,22 @@
-from typing import Optional, Dict, Any, Union, List
from pathlib import Path
-from wasabi import msg, table
+from typing import Any, Dict, List, Optional, Union
+
+import typer
from thinc.api import Config
from thinc.config import VARIABLE_RE
-import typer
+from wasabi import msg, table
-from ._util import Arg, Opt, show_validation_error, parse_config_overrides
-from ._util import import_code, debug_cli
+from .. import util
from ..schemas import ConfigSchemaInit, ConfigSchemaTraining
from ..util import registry
-from .. import util
+from ._util import (
+ Arg,
+ Opt,
+ debug_cli,
+ import_code,
+ parse_config_overrides,
+ show_validation_error,
+)
@debug_cli.command(
diff --git a/spacy/cli/debug_data.py b/spacy/cli/debug_data.py
index 2826cd084..e3d0a102f 100644
--- a/spacy/cli/debug_data.py
+++ b/spacy/cli/debug_data.py
@@ -1,31 +1,49 @@
-from typing import Any, Dict, Iterable, List, Optional, Sequence, Set, Tuple, Union
-from typing import cast, overload
-from pathlib import Path
-from collections import Counter
-import sys
-import srsly
-from wasabi import Printer, MESSAGES, msg
-import typer
import math
-import numpy
+import sys
+from collections import Counter
+from pathlib import Path
+from typing import (
+ Any,
+ Dict,
+ Iterable,
+ List,
+ Optional,
+ Sequence,
+ Set,
+ Tuple,
+ Union,
+ cast,
+ overload,
+)
-from ._util import app, Arg, Opt, show_validation_error, parse_config_overrides
-from ._util import import_code, debug_cli, _format_number
-from ..training import Example, remove_bilu_prefix
-from ..training.initialize import get_sourced_components
-from ..schemas import ConfigSchemaTraining
-from ..pipeline import TrainablePipe
+import numpy
+import srsly
+import typer
+from wasabi import MESSAGES, Printer, msg
+
+from .. import util
+from ..compat import Literal
+from ..language import Language
+from ..morphology import Morphology
+from ..pipeline import Morphologizer, SpanCategorizer, TrainablePipe
+from ..pipeline._edit_tree_internals.edit_trees import EditTrees
from ..pipeline._parser_internals import nonproj
from ..pipeline._parser_internals.nonproj import DELIMITER
-from ..pipeline import Morphologizer, SpanCategorizer
-from ..pipeline._edit_tree_internals.edit_trees import EditTrees
-from ..morphology import Morphology
-from ..language import Language
+from ..schemas import ConfigSchemaTraining
+from ..training import Example, remove_bilu_prefix
+from ..training.initialize import get_sourced_components
from ..util import registry, resolve_dot_names
-from ..compat import Literal
from ..vectors import Mode as VectorsMode
-from .. import util
-
+from ._util import (
+ Arg,
+ Opt,
+ _format_number,
+ app,
+ debug_cli,
+ import_code,
+ parse_config_overrides,
+ show_validation_error,
+)
# Minimum number of expected occurrences of NER label in data to train new label
NEW_LABEL_THRESHOLD = 50
diff --git a/spacy/cli/debug_diff.py b/spacy/cli/debug_diff.py
index 6697c38ae..c53b0acab 100644
--- a/spacy/cli/debug_diff.py
+++ b/spacy/cli/debug_diff.py
@@ -1,13 +1,13 @@
+from pathlib import Path
from typing import Optional
import typer
-from wasabi import Printer, diff_strings, MarkdownRenderer
-from pathlib import Path
from thinc.api import Config
+from wasabi import MarkdownRenderer, Printer, diff_strings
-from ._util import debug_cli, Arg, Opt, show_validation_error, parse_config_overrides
from ..util import load_config
-from .init_config import init_config, Optimizations
+from ._util import Arg, Opt, debug_cli, parse_config_overrides, show_validation_error
+from .init_config import Optimizations, init_config
@debug_cli.command(
diff --git a/spacy/cli/debug_model.py b/spacy/cli/debug_model.py
index 190094d81..8a0fd4889 100644
--- a/spacy/cli/debug_model.py
+++ b/spacy/cli/debug_model.py
@@ -1,19 +1,32 @@
-from typing import Dict, Any, Optional
-from pathlib import Path
import itertools
+from pathlib import Path
+from typing import Any, Dict, Optional
+
+import typer
+from thinc.api import (
+ Model,
+ data_validation,
+ fix_random_seed,
+ set_dropout_rate,
+ set_gpu_allocator,
+)
+from wasabi import msg
from spacy.training import Example
from spacy.util import resolve_dot_names
-from wasabi import msg
-from thinc.api import fix_random_seed, set_dropout_rate
-from thinc.api import Model, data_validation, set_gpu_allocator
-import typer
-from ._util import Arg, Opt, debug_cli, show_validation_error
-from ._util import parse_config_overrides, string_to_list, setup_gpu
+from .. import util
from ..schemas import ConfigSchemaTraining
from ..util import registry
-from .. import util
+from ._util import (
+ Arg,
+ Opt,
+ debug_cli,
+ parse_config_overrides,
+ setup_gpu,
+ show_validation_error,
+ string_to_list,
+)
@debug_cli.command(
diff --git a/spacy/cli/download.py b/spacy/cli/download.py
index df4bca53d..de731b0fd 100644
--- a/spacy/cli/download.py
+++ b/spacy/cli/download.py
@@ -1,14 +1,14 @@
-from typing import Optional, Sequence
-import requests
import sys
-from wasabi import msg
-import typer
+from typing import Optional, Sequence
+
+import requests
+import typer
+from wasabi import msg
-from ._util import app, Arg, Opt, WHEEL_SUFFIX, SDIST_SUFFIX
from .. import about
-from ..util import is_package, get_minor_version, run_command
-from ..util import is_prerelease_version
from ..errors import OLD_MODEL_SHORTCUTS
+from ..util import get_minor_version, is_package, is_prerelease_version, run_command
+from ._util import SDIST_SUFFIX, WHEEL_SUFFIX, Arg, Opt, app
@app.command(
diff --git a/spacy/cli/evaluate.py b/spacy/cli/evaluate.py
index 9fcdd18be..6235b658d 100644
--- a/spacy/cli/evaluate.py
+++ b/spacy/cli/evaluate.py
@@ -1,16 +1,16 @@
-from typing import Optional, List, Dict, Any, Union
-from wasabi import Printer
-from pathlib import Path
import re
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Union
+
import srsly
from thinc.api import fix_random_seed
+from wasabi import Printer
-from ..training import Corpus
-from ..tokens import Doc
-from ._util import app, Arg, Opt, setup_gpu, import_code, benchmark_cli
+from .. import displacy, util
from ..scorer import Scorer
-from .. import util
-from .. import displacy
+from ..tokens import Doc
+from ..training import Corpus
+from ._util import Arg, Opt, app, benchmark_cli, import_code, setup_gpu
@benchmark_cli.command(
diff --git a/spacy/cli/find_threshold.py b/spacy/cli/find_threshold.py
index 6d591053d..7aa32c0c6 100644
--- a/spacy/cli/find_threshold.py
+++ b/spacy/cli/find_threshold.py
@@ -1,17 +1,17 @@
import functools
+import logging
import operator
from pathlib import Path
-import logging
-from typing import Optional, Tuple, Any, Dict, List
+from typing import Any, Dict, List, Optional, Tuple
import numpy
import wasabi.tables
-from ..pipeline import TextCategorizer, MultiLabel_TextCategorizer
-from ..errors import Errors
-from ..training import Corpus
-from ._util import app, Arg, Opt, import_code, setup_gpu
from .. import util
+from ..errors import Errors
+from ..pipeline import MultiLabel_TextCategorizer, TextCategorizer
+from ..training import Corpus
+from ._util import Arg, Opt, app, import_code, setup_gpu
_DEFAULTS = {
"n_trials": 11,
diff --git a/spacy/cli/info.py b/spacy/cli/info.py
index d82bf3fbc..8bfc6b54f 100644
--- a/spacy/cli/info.py
+++ b/spacy/cli/info.py
@@ -1,15 +1,15 @@
-from typing import Optional, Dict, Any, Union, List
-import platform
import json
+import platform
from pathlib import Path
-from wasabi import Printer, MarkdownRenderer
-import srsly
+from typing import Any, Dict, List, Optional, Union
-from ._util import app, Arg, Opt, string_to_list
-from .download import get_model_filename, get_latest_version
-from .. import util
-from .. import about
+import srsly
+from wasabi import MarkdownRenderer, Printer
+
+from .. import about, util
from ..compat import importlib_metadata
+from ._util import Arg, Opt, app, string_to_list
+from .download import get_latest_version, get_model_filename
@app.command("info")
diff --git a/spacy/cli/init_config.py b/spacy/cli/init_config.py
index b634caa4c..a7c03d00f 100644
--- a/spacy/cli/init_config.py
+++ b/spacy/cli/init_config.py
@@ -1,19 +1,26 @@
-from typing import Optional, List, Tuple
+import re
from enum import Enum
from pathlib import Path
-from wasabi import Printer, diff_strings
-from thinc.api import Config
+from typing import List, Optional, Tuple
+
import srsly
-import re
from jinja2 import Template
+from thinc.api import Config
+from wasabi import Printer, diff_strings
from .. import util
from ..language import DEFAULT_CONFIG_PRETRAIN_PATH
from ..schemas import RecommendationSchema
from ..util import SimpleFrozenList
-from ._util import init_cli, Arg, Opt, show_validation_error, COMMAND
-from ._util import string_to_list, import_code
-
+from ._util import (
+ COMMAND,
+ Arg,
+ Opt,
+ import_code,
+ init_cli,
+ show_validation_error,
+ string_to_list,
+)
ROOT = Path(__file__).parent / "templates"
TEMPLATE_PATH = ROOT / "quickstart_training.jinja"
diff --git a/spacy/cli/init_pipeline.py b/spacy/cli/init_pipeline.py
index d53a61b8e..e0d048c69 100644
--- a/spacy/cli/init_pipeline.py
+++ b/spacy/cli/init_pipeline.py
@@ -1,15 +1,23 @@
-from typing import Optional
import logging
from pathlib import Path
-from wasabi import msg
-import typer
+from typing import Optional
+
import srsly
+import typer
+from wasabi import msg
from .. import util
-from ..training.initialize import init_nlp, convert_vectors
from ..language import Language
-from ._util import init_cli, Arg, Opt, parse_config_overrides, show_validation_error
-from ._util import import_code, setup_gpu
+from ..training.initialize import convert_vectors, init_nlp
+from ._util import (
+ Arg,
+ Opt,
+ import_code,
+ init_cli,
+ parse_config_overrides,
+ setup_gpu,
+ show_validation_error,
+)
@init_cli.command("vectors")
diff --git a/spacy/cli/package.py b/spacy/cli/package.py
index 6351f28eb..4545578e6 100644
--- a/spacy/cli/package.py
+++ b/spacy/cli/package.py
@@ -1,18 +1,18 @@
-from typing import Optional, Union, Any, Dict, List, Tuple, cast
-import shutil
-from pathlib import Path
-from wasabi import Printer, MarkdownRenderer, get_raw_input
-from thinc.api import Config
-from collections import defaultdict
-from catalogue import RegistryError
-import srsly
-import sys
import re
+import shutil
+import sys
+from collections import defaultdict
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Tuple, Union, cast
-from ._util import app, Arg, Opt, string_to_list, WHEEL_SUFFIX, SDIST_SUFFIX
-from ..schemas import validate, ModelMetaSchema
-from .. import util
-from .. import about
+import srsly
+from catalogue import RegistryError
+from thinc.api import Config
+from wasabi import MarkdownRenderer, Printer, get_raw_input
+
+from .. import about, util
+from ..schemas import ModelMetaSchema, validate
+from ._util import SDIST_SUFFIX, WHEEL_SUFFIX, Arg, Opt, app, string_to_list
@app.command("package")
diff --git a/spacy/cli/pretrain.py b/spacy/cli/pretrain.py
index 45042e605..446c40510 100644
--- a/spacy/cli/pretrain.py
+++ b/spacy/cli/pretrain.py
@@ -1,13 +1,21 @@
-from typing import Optional
-from pathlib import Path
-from wasabi import msg
-import typer
import re
+from pathlib import Path
+from typing import Optional
+
+import typer
+from wasabi import msg
-from ._util import app, Arg, Opt, parse_config_overrides, show_validation_error
-from ._util import import_code, setup_gpu
from ..training.pretrain import pretrain
from ..util import load_config
+from ._util import (
+ Arg,
+ Opt,
+ app,
+ import_code,
+ parse_config_overrides,
+ setup_gpu,
+ show_validation_error,
+)
@app.command(
diff --git a/spacy/cli/profile.py b/spacy/cli/profile.py
index 3c282c73d..e1f720327 100644
--- a/spacy/cli/profile.py
+++ b/spacy/cli/profile.py
@@ -1,17 +1,18 @@
-from typing import Optional, Sequence, Union, Iterator
-import tqdm
-from pathlib import Path
-import srsly
import cProfile
+import itertools
import pstats
import sys
-import itertools
-from wasabi import msg, Printer
-import typer
+from pathlib import Path
+from typing import Iterator, Optional, Sequence, Union
+
+import srsly
+import tqdm
+import typer
+from wasabi import Printer, msg
-from ._util import app, debug_cli, Arg, Opt, NAME
from ..language import Language
from ..util import load_model
+from ._util import NAME, Arg, Opt, app, debug_cli
@debug_cli.command("profile")
diff --git a/spacy/cli/project/assets.py b/spacy/cli/project/assets.py
index 8f35b2d23..aa2705986 100644
--- a/spacy/cli/project/assets.py
+++ b/spacy/cli/project/assets.py
@@ -1,16 +1,27 @@
-from typing import Any, Dict, Optional
-from pathlib import Path
-from wasabi import msg
import os
import re
import shutil
+from pathlib import Path
+from typing import Any, Dict, Optional
+
import requests
import typer
+from wasabi import msg
from ...util import ensure_path, working_dir
-from .._util import project_cli, Arg, Opt, PROJECT_FILE, load_project_config
-from .._util import get_checksum, download_file, git_checkout, get_git_version
-from .._util import SimpleFrozenDict, parse_config_overrides
+from .._util import (
+ PROJECT_FILE,
+ Arg,
+ Opt,
+ SimpleFrozenDict,
+ download_file,
+ get_checksum,
+ get_git_version,
+ git_checkout,
+ load_project_config,
+ parse_config_overrides,
+ project_cli,
+)
# Whether assets are extra if `extra` is not set.
EXTRA_DEFAULT = False
diff --git a/spacy/cli/project/clone.py b/spacy/cli/project/clone.py
index 14b4ed9b5..2ee27c92a 100644
--- a/spacy/cli/project/clone.py
+++ b/spacy/cli/project/clone.py
@@ -1,13 +1,22 @@
-from typing import Optional
-from pathlib import Path
-from wasabi import msg
-import subprocess
import re
+import subprocess
+from pathlib import Path
+from typing import Optional
+
+from wasabi import msg
from ... import about
from ...util import ensure_path
-from .._util import project_cli, Arg, Opt, COMMAND, PROJECT_FILE
-from .._util import git_checkout, get_git_version, git_repo_branch_exists
+from .._util import (
+ COMMAND,
+ PROJECT_FILE,
+ Arg,
+ Opt,
+ get_git_version,
+ git_checkout,
+ git_repo_branch_exists,
+ project_cli,
+)
DEFAULT_REPO = about.__projects__
DEFAULT_PROJECTS_BRANCH = about.__projects_branch__
diff --git a/spacy/cli/project/document.py b/spacy/cli/project/document.py
index 1ba43a958..80107d27a 100644
--- a/spacy/cli/project/document.py
+++ b/spacy/cli/project/document.py
@@ -1,9 +1,9 @@
from pathlib import Path
-from wasabi import msg, MarkdownRenderer
+
+from wasabi import MarkdownRenderer, msg
from ...util import working_dir
-from .._util import project_cli, Arg, Opt, PROJECT_FILE, load_project_config
-
+from .._util import PROJECT_FILE, Arg, Opt, load_project_config, project_cli
DOCS_URL = "https://spacy.io"
INTRO_PROJECT = f"""The [`{PROJECT_FILE}`]({PROJECT_FILE}) defines the data assets required by the
diff --git a/spacy/cli/project/dvc.py b/spacy/cli/project/dvc.py
index a15353855..9ad55c433 100644
--- a/spacy/cli/project/dvc.py
+++ b/spacy/cli/project/dvc.py
@@ -1,15 +1,28 @@
"""This module contains helpers and subcommands for integrating spaCy projects
with Data Version Controk (DVC). https://dvc.org"""
-from typing import Dict, Any, List, Optional, Iterable
import subprocess
from pathlib import Path
+from typing import Any, Dict, Iterable, List, Optional
+
from wasabi import msg
-from .._util import PROJECT_FILE, load_project_config, get_hash, project_cli
-from .._util import Arg, Opt, NAME, COMMAND
-from ...util import working_dir, split_command, join_command, run_command
-from ...util import SimpleFrozenList
-
+from ...util import (
+ SimpleFrozenList,
+ join_command,
+ run_command,
+ split_command,
+ working_dir,
+)
+from .._util import (
+ COMMAND,
+ NAME,
+ PROJECT_FILE,
+ Arg,
+ Opt,
+ get_hash,
+ load_project_config,
+ project_cli,
+)
DVC_CONFIG = "dvc.yaml"
DVC_DIR = ".dvc"
diff --git a/spacy/cli/project/pull.py b/spacy/cli/project/pull.py
index 8894baa50..e9be74df7 100644
--- a/spacy/cli/project/pull.py
+++ b/spacy/cli/project/pull.py
@@ -1,9 +1,9 @@
from pathlib import Path
+
from wasabi import msg
-from .remote_storage import RemoteStorage
-from .remote_storage import get_command_hash
-from .._util import project_cli, Arg, logger
-from .._util import load_project_config
+
+from .._util import Arg, load_project_config, logger, project_cli
+from .remote_storage import RemoteStorage, get_command_hash
from .run import update_lockfile
diff --git a/spacy/cli/project/push.py b/spacy/cli/project/push.py
index a8178de21..a7915e547 100644
--- a/spacy/cli/project/push.py
+++ b/spacy/cli/project/push.py
@@ -1,9 +1,9 @@
from pathlib import Path
+
from wasabi import msg
-from .remote_storage import RemoteStorage
-from .remote_storage import get_content_hash, get_command_hash
-from .._util import load_project_config
-from .._util import project_cli, Arg, logger
+
+from .._util import Arg, load_project_config, logger, project_cli
+from .remote_storage import RemoteStorage, get_command_hash, get_content_hash
@project_cli.command("push")
diff --git a/spacy/cli/project/remote_storage.py b/spacy/cli/project/remote_storage.py
index 076541580..84235a90d 100644
--- a/spacy/cli/project/remote_storage.py
+++ b/spacy/cli/project/remote_storage.py
@@ -1,18 +1,25 @@
-from typing import Optional, List, Dict, TYPE_CHECKING
+import hashlib
import os
import site
-import hashlib
-import urllib.parse
import tarfile
+import urllib.parse
from pathlib import Path
+from typing import TYPE_CHECKING, Dict, List, Optional
+
from wasabi import msg
-from .._util import get_hash, get_checksum, upload_file, download_file
-from .._util import ensure_pathy, make_tempdir
-from ...util import get_minor_version, ENV_VARS, check_bool_env_var
-from ...git_info import GIT_VERSION
from ... import about
from ...errors import Errors
+from ...git_info import GIT_VERSION
+from ...util import ENV_VARS, check_bool_env_var, get_minor_version
+from .._util import (
+ download_file,
+ ensure_pathy,
+ get_checksum,
+ get_hash,
+ make_tempdir,
+ upload_file,
+)
if TYPE_CHECKING:
from pathy import FluidPath # noqa: F401
diff --git a/spacy/cli/project/run.py b/spacy/cli/project/run.py
index 0f4858a99..43972a202 100644
--- a/spacy/cli/project/run.py
+++ b/spacy/cli/project/run.py
@@ -1,20 +1,39 @@
-from typing import Optional, List, Dict, Sequence, Any, Iterable, Tuple
import os.path
-from pathlib import Path
-
-from wasabi import msg
-from wasabi.util import locale_escape
import sys
+from pathlib import Path
+from typing import Any, Dict, Iterable, List, Optional, Sequence, Tuple
+
import srsly
import typer
+from wasabi import msg
+from wasabi.util import locale_escape
from ... import about
from ...git_info import GIT_VERSION
-from ...util import working_dir, run_command, split_command, is_cwd, join_command
-from ...util import SimpleFrozenList, is_minor_version_match, ENV_VARS
-from ...util import check_bool_env_var, SimpleFrozenDict
-from .._util import PROJECT_FILE, PROJECT_LOCK, load_project_config, get_hash
-from .._util import get_checksum, project_cli, Arg, Opt, COMMAND, parse_config_overrides
+from ...util import (
+ ENV_VARS,
+ SimpleFrozenDict,
+ SimpleFrozenList,
+ check_bool_env_var,
+ is_cwd,
+ is_minor_version_match,
+ join_command,
+ run_command,
+ split_command,
+ working_dir,
+)
+from .._util import (
+ COMMAND,
+ PROJECT_FILE,
+ PROJECT_LOCK,
+ Arg,
+ Opt,
+ get_checksum,
+ get_hash,
+ load_project_config,
+ parse_config_overrides,
+ project_cli,
+)
@project_cli.command(
diff --git a/spacy/cli/train.py b/spacy/cli/train.py
index cc22cbba6..8bdabd39c 100644
--- a/spacy/cli/train.py
+++ b/spacy/cli/train.py
@@ -1,15 +1,23 @@
-from typing import Optional, Dict, Any, Union
-from pathlib import Path
-from wasabi import msg
-import typer
import logging
import sys
+from pathlib import Path
+from typing import Any, Dict, Optional, Union
+
+import typer
+from wasabi import msg
-from ._util import app, Arg, Opt, parse_config_overrides, show_validation_error
-from ._util import import_code, setup_gpu
-from ..training.loop import train as train_nlp
-from ..training.initialize import init_nlp
from .. import util
+from ..training.initialize import init_nlp
+from ..training.loop import train as train_nlp
+from ._util import (
+ Arg,
+ Opt,
+ app,
+ import_code,
+ parse_config_overrides,
+ setup_gpu,
+ show_validation_error,
+)
@app.command(
diff --git a/spacy/cli/validate.py b/spacy/cli/validate.py
index a918e9a39..0426f05fd 100644
--- a/spacy/cli/validate.py
+++ b/spacy/cli/validate.py
@@ -1,14 +1,21 @@
-from typing import Tuple
-from pathlib import Path
import sys
-import requests
-from wasabi import msg, Printer
import warnings
+from pathlib import Path
+from typing import Tuple
+
+import requests
+from wasabi import Printer, msg
-from ._util import app
from .. import about
-from ..util import get_package_version, get_installed_models, get_minor_version
-from ..util import get_package_path, get_model_meta, is_compatible_version
+from ..util import (
+ get_installed_models,
+ get_minor_version,
+ get_model_meta,
+ get_package_path,
+ get_package_version,
+ is_compatible_version,
+)
+from ._util import app
@app.command("validate")
diff --git a/spacy/compat.py b/spacy/compat.py
index 89132735d..522fa30dd 100644
--- a/spacy/compat.py
+++ b/spacy/compat.py
@@ -1,5 +1,6 @@
"""Helpers for Python and platform compatibility."""
import sys
+
from thinc.util import copy_array
try:
diff --git a/spacy/displacy/__init__.py b/spacy/displacy/__init__.py
index f42dad0c9..bde2d04fe 100644
--- a/spacy/displacy/__init__.py
+++ b/spacy/displacy/__init__.py
@@ -4,15 +4,13 @@ spaCy's built in visualization suite for dependencies and named entities.
DOCS: https://spacy.io/api/top-level#displacy
USAGE: https://spacy.io/usage/visualizers
"""
-from typing import Union, Iterable, Optional, Dict, Any, Callable
import warnings
+from typing import Any, Callable, Dict, Iterable, Optional, Union
-from .render import DependencyRenderer, EntityRenderer, SpanRenderer
-from ..tokens import Doc, Span
from ..errors import Errors, Warnings
-from ..util import is_in_jupyter
-from ..util import find_available_port
-
+from ..tokens import Doc, Span
+from ..util import find_available_port, is_in_jupyter
+from .render import DependencyRenderer, EntityRenderer, SpanRenderer
_html = {}
RENDER_WRAPPER = None
@@ -68,7 +66,7 @@ def render(
if jupyter or (jupyter is None and is_in_jupyter()):
# return HTML rendered by IPython display()
# See #4840 for details on span wrapper to disable mathjax
- from IPython.core.display import display, HTML
+ from IPython.core.display import HTML, display
return display(HTML('{}'.format(html)))
return html
diff --git a/spacy/displacy/render.py b/spacy/displacy/render.py
index f74222dc2..86869e3b8 100644
--- a/spacy/displacy/render.py
+++ b/spacy/displacy/render.py
@@ -1,15 +1,29 @@
-from typing import Any, Dict, List, Optional, Tuple, Union
-import uuid
import itertools
+import uuid
+from typing import Any, Dict, List, Optional, Tuple, Union
from ..errors import Errors
from ..util import escape_html, minify_html, registry
-from .templates import TPL_DEP_ARCS, TPL_DEP_SVG, TPL_DEP_WORDS
-from .templates import TPL_DEP_WORDS_LEMMA, TPL_ENT, TPL_ENT_RTL, TPL_ENTS
-from .templates import TPL_FIGURE, TPL_KB_LINK, TPL_PAGE, TPL_SPAN
-from .templates import TPL_SPAN_RTL, TPL_SPAN_SLICE, TPL_SPAN_SLICE_RTL
-from .templates import TPL_SPAN_START, TPL_SPAN_START_RTL, TPL_SPANS
-from .templates import TPL_TITLE
+from .templates import (
+ TPL_DEP_ARCS,
+ TPL_DEP_SVG,
+ TPL_DEP_WORDS,
+ TPL_DEP_WORDS_LEMMA,
+ TPL_ENT,
+ TPL_ENT_RTL,
+ TPL_ENTS,
+ TPL_FIGURE,
+ TPL_KB_LINK,
+ TPL_PAGE,
+ TPL_SPAN,
+ TPL_SPAN_RTL,
+ TPL_SPAN_SLICE,
+ TPL_SPAN_SLICE_RTL,
+ TPL_SPAN_START,
+ TPL_SPAN_START_RTL,
+ TPL_SPANS,
+ TPL_TITLE,
+)
DEFAULT_LANG = "en"
DEFAULT_DIR = "ltr"
diff --git a/spacy/errors.py b/spacy/errors.py
index 928c3be90..987754bd2 100644
--- a/spacy/errors.py
+++ b/spacy/errors.py
@@ -1,4 +1,5 @@
import warnings
+
from .compat import Literal
diff --git a/spacy/glossary.py b/spacy/glossary.py
index d2240fbba..1f628698b 100644
--- a/spacy/glossary.py
+++ b/spacy/glossary.py
@@ -1,4 +1,5 @@
import warnings
+
from .errors import Warnings
diff --git a/spacy/kb/__init__.py b/spacy/kb/__init__.py
index 1d70a9b34..3ce3e4c33 100644
--- a/spacy/kb/__init__.py
+++ b/spacy/kb/__init__.py
@@ -1,3 +1,3 @@
+from .candidate import Candidate, get_candidates, get_candidates_batch
from .kb import KnowledgeBase
from .kb_in_memory import InMemoryLookupKB
-from .candidate import Candidate, get_candidates, get_candidates_batch
diff --git a/spacy/kb/candidate.pxd b/spacy/kb/candidate.pxd
index 942ce9dd0..9fc4c4e9d 100644
--- a/spacy/kb/candidate.pxd
+++ b/spacy/kb/candidate.pxd
@@ -1,6 +1,8 @@
-from .kb cimport KnowledgeBase
from libcpp.vector cimport vector
+
from ..typedefs cimport hash_t
+from .kb cimport KnowledgeBase
+
# Object used by the Entity Linker that summarizes one entity-alias candidate combination.
cdef class Candidate:
diff --git a/spacy/kb/candidate.pyx b/spacy/kb/candidate.pyx
index c89efeb03..4cd734f43 100644
--- a/spacy/kb/candidate.pyx
+++ b/spacy/kb/candidate.pyx
@@ -1,9 +1,12 @@
# cython: infer_types=True, profile=True
from typing import Iterable
+
from .kb cimport KnowledgeBase
+
from ..tokens import Span
+
cdef class Candidate:
"""A `Candidate` object refers to a textual mention (`alias`) that may or may not be resolved
to a specific `entity` from a Knowledge Base. This will be used as input for the entity linking
diff --git a/spacy/kb/kb.pxd b/spacy/kb/kb.pxd
index 1adeef8ae..263469546 100644
--- a/spacy/kb/kb.pxd
+++ b/spacy/kb/kb.pxd
@@ -2,8 +2,10 @@
from cymem.cymem cimport Pool
from libc.stdint cimport int64_t
+
from ..vocab cimport Vocab
+
cdef class KnowledgeBase:
cdef Pool mem
cdef readonly Vocab vocab
diff --git a/spacy/kb/kb.pyx b/spacy/kb/kb.pyx
index ce4bc0138..a88e18e1f 100644
--- a/spacy/kb/kb.pyx
+++ b/spacy/kb/kb.pyx
@@ -2,12 +2,13 @@
from pathlib import Path
from typing import Iterable, Tuple, Union
+
from cymem.cymem cimport Pool
-from .candidate import Candidate
+from ..errors import Errors
from ..tokens import Span
from ..util import SimpleFrozenList
-from ..errors import Errors
+from .candidate import Candidate
cdef class KnowledgeBase:
diff --git a/spacy/kb/kb_in_memory.pxd b/spacy/kb/kb_in_memory.pxd
index 825a6bde9..08ec6b2a3 100644
--- a/spacy/kb/kb_in_memory.pxd
+++ b/spacy/kb/kb_in_memory.pxd
@@ -1,11 +1,11 @@
"""Knowledge-base for entity or concept linking."""
-from preshed.maps cimport PreshMap
-from libcpp.vector cimport vector
from libc.stdint cimport int32_t, int64_t
from libc.stdio cimport FILE
+from libcpp.vector cimport vector
+from preshed.maps cimport PreshMap
+from ..structs cimport AliasC, KBEntryC
from ..typedefs cimport hash_t
-from ..structs cimport KBEntryC, AliasC
from .kb cimport KnowledgeBase
ctypedef vector[KBEntryC] entry_vec
diff --git a/spacy/kb/kb_in_memory.pyx b/spacy/kb/kb_in_memory.pyx
index 2a74d047b..e991f7720 100644
--- a/spacy/kb/kb_in_memory.pyx
+++ b/spacy/kb/kb_in_memory.pyx
@@ -1,23 +1,28 @@
# cython: infer_types=True, profile=True
-from typing import Iterable, Callable, Dict, Any, Union
+from typing import Any, Callable, Dict, Iterable, Union
import srsly
-from preshed.maps cimport PreshMap
-from cpython.exc cimport PyErr_SetFromErrno
-from libc.stdio cimport fopen, fclose, fread, fwrite, feof, fseek
-from libc.stdint cimport int32_t, int64_t
-from libcpp.vector cimport vector
-from pathlib import Path
+from cpython.exc cimport PyErr_SetFromErrno
+from libc.stdint cimport int32_t, int64_t
+from libc.stdio cimport fclose, feof, fopen, fread, fseek, fwrite
+from libcpp.vector cimport vector
+from preshed.maps cimport PreshMap
+
import warnings
+from pathlib import Path
from ..tokens import Span
+
from ..typedefs cimport hash_t
-from ..errors import Errors, Warnings
+
from .. import util
+from ..errors import Errors, Warnings
from ..util import SimpleFrozenList, ensure_path
+
from ..vocab cimport Vocab
from .kb cimport KnowledgeBase
+
from .candidate import Candidate as Candidate
diff --git a/spacy/lang/af/__init__.py b/spacy/lang/af/__init__.py
index 553fcbf4c..8bd73c7ad 100644
--- a/spacy/lang/af/__init__.py
+++ b/spacy/lang/af/__init__.py
@@ -1,5 +1,5 @@
+from ...language import BaseDefaults, Language
from .stop_words import STOP_WORDS
-from ...language import Language, BaseDefaults
class AfrikaansDefaults(BaseDefaults):
diff --git a/spacy/lang/am/__init__.py b/spacy/lang/am/__init__.py
index ddae556d6..284823eaa 100644
--- a/spacy/lang/am/__init__.py
+++ b/spacy/lang/am/__init__.py
@@ -1,12 +1,11 @@
-from .stop_words import STOP_WORDS
+from ...attrs import LANG
+from ...language import BaseDefaults, Language
+from ...util import update_exc
+from ..tokenizer_exceptions import BASE_EXCEPTIONS
from .lex_attrs import LEX_ATTRS
from .punctuation import TOKENIZER_SUFFIXES
-
+from .stop_words import STOP_WORDS
from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
-from ..tokenizer_exceptions import BASE_EXCEPTIONS
-from ...language import Language, BaseDefaults
-from ...attrs import LANG
-from ...util import update_exc
class AmharicDefaults(BaseDefaults):
diff --git a/spacy/lang/am/punctuation.py b/spacy/lang/am/punctuation.py
index 555a179fa..87447b054 100644
--- a/spacy/lang/am/punctuation.py
+++ b/spacy/lang/am/punctuation.py
@@ -1,5 +1,11 @@
-from ..char_classes import LIST_PUNCT, LIST_ELLIPSES, LIST_QUOTES, CURRENCY
-from ..char_classes import UNITS, ALPHA_UPPER
+from ..char_classes import (
+ ALPHA_UPPER,
+ CURRENCY,
+ LIST_ELLIPSES,
+ LIST_PUNCT,
+ LIST_QUOTES,
+ UNITS,
+)
_list_punct = LIST_PUNCT + "፡ ። ፣ ፤ ፥ ፦ ፧ ፠ ፨".strip().split()
diff --git a/spacy/lang/am/tokenizer_exceptions.py b/spacy/lang/am/tokenizer_exceptions.py
index 9472fe918..1ccf996ca 100644
--- a/spacy/lang/am/tokenizer_exceptions.py
+++ b/spacy/lang/am/tokenizer_exceptions.py
@@ -1,5 +1,4 @@
-from ...symbols import ORTH, NORM
-
+from ...symbols import NORM, ORTH
_exc = {}
diff --git a/spacy/lang/ar/__init__.py b/spacy/lang/ar/__init__.py
index 18c1f90ed..d50b0722c 100644
--- a/spacy/lang/ar/__init__.py
+++ b/spacy/lang/ar/__init__.py
@@ -1,8 +1,8 @@
-from .stop_words import STOP_WORDS
+from ...language import BaseDefaults, Language
from .lex_attrs import LEX_ATTRS
from .punctuation import TOKENIZER_SUFFIXES
+from .stop_words import STOP_WORDS
from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
-from ...language import Language, BaseDefaults
class ArabicDefaults(BaseDefaults):
diff --git a/spacy/lang/ar/punctuation.py b/spacy/lang/ar/punctuation.py
index f30204c02..cf03fc68e 100644
--- a/spacy/lang/ar/punctuation.py
+++ b/spacy/lang/ar/punctuation.py
@@ -1,5 +1,11 @@
-from ..char_classes import LIST_PUNCT, LIST_ELLIPSES, LIST_QUOTES, CURRENCY
-from ..char_classes import UNITS, ALPHA_UPPER
+from ..char_classes import (
+ ALPHA_UPPER,
+ CURRENCY,
+ LIST_ELLIPSES,
+ LIST_PUNCT,
+ LIST_QUOTES,
+ UNITS,
+)
_suffixes = (
LIST_PUNCT
diff --git a/spacy/lang/ar/tokenizer_exceptions.py b/spacy/lang/ar/tokenizer_exceptions.py
index 7c385bef8..eb16876f5 100644
--- a/spacy/lang/ar/tokenizer_exceptions.py
+++ b/spacy/lang/ar/tokenizer_exceptions.py
@@ -1,7 +1,6 @@
-from ..tokenizer_exceptions import BASE_EXCEPTIONS
-from ...symbols import ORTH, NORM
+from ...symbols import NORM, ORTH
from ...util import update_exc
-
+from ..tokenizer_exceptions import BASE_EXCEPTIONS
_exc = {}
diff --git a/spacy/lang/az/__init__.py b/spacy/lang/az/__init__.py
index 476898364..32949aa3e 100644
--- a/spacy/lang/az/__init__.py
+++ b/spacy/lang/az/__init__.py
@@ -1,6 +1,6 @@
-from .stop_words import STOP_WORDS
+from ...language import BaseDefaults, Language
from .lex_attrs import LEX_ATTRS
-from ...language import Language, BaseDefaults
+from .stop_words import STOP_WORDS
class AzerbaijaniDefaults(BaseDefaults):
diff --git a/spacy/lang/az/lex_attrs.py b/spacy/lang/az/lex_attrs.py
index 73a5e2762..96fb7f020 100644
--- a/spacy/lang/az/lex_attrs.py
+++ b/spacy/lang/az/lex_attrs.py
@@ -1,6 +1,5 @@
from ...attrs import LIKE_NUM
-
# Eleven, twelve etc. are written separate: on bir, on iki
_num_words = [
diff --git a/spacy/lang/bg/__init__.py b/spacy/lang/bg/__init__.py
index c9176b946..acca63ba1 100644
--- a/spacy/lang/bg/__init__.py
+++ b/spacy/lang/bg/__init__.py
@@ -1,12 +1,14 @@
+from ...attrs import LANG
+from ...language import BaseDefaults, Language
+from ...util import update_exc
+from ..punctuation import (
+ COMBINING_DIACRITICS_TOKENIZER_INFIXES,
+ COMBINING_DIACRITICS_TOKENIZER_SUFFIXES,
+)
+from ..tokenizer_exceptions import BASE_EXCEPTIONS
+from .lex_attrs import LEX_ATTRS
from .stop_words import STOP_WORDS
from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
-from .lex_attrs import LEX_ATTRS
-from ..tokenizer_exceptions import BASE_EXCEPTIONS
-from ..punctuation import COMBINING_DIACRITICS_TOKENIZER_INFIXES
-from ..punctuation import COMBINING_DIACRITICS_TOKENIZER_SUFFIXES
-from ...language import Language, BaseDefaults
-from ...attrs import LANG
-from ...util import update_exc
class BulgarianDefaults(BaseDefaults):
diff --git a/spacy/lang/bg/lex_attrs.py b/spacy/lang/bg/lex_attrs.py
index bba3c74cd..0b7942aec 100644
--- a/spacy/lang/bg/lex_attrs.py
+++ b/spacy/lang/bg/lex_attrs.py
@@ -1,6 +1,5 @@
from ...attrs import LIKE_NUM
-
_num_words = [
"нула",
"едно",
diff --git a/spacy/lang/bg/tokenizer_exceptions.py b/spacy/lang/bg/tokenizer_exceptions.py
index 0f484b778..89d466daf 100644
--- a/spacy/lang/bg/tokenizer_exceptions.py
+++ b/spacy/lang/bg/tokenizer_exceptions.py
@@ -4,8 +4,7 @@ References:
(countries, occupations, fields of studies and more).
"""
-from ...symbols import ORTH, NORM
-
+from ...symbols import NORM, ORTH
_exc = {}
diff --git a/spacy/lang/bn/__init__.py b/spacy/lang/bn/__init__.py
index 6d0331e00..6a5d37f5b 100644
--- a/spacy/lang/bn/__init__.py
+++ b/spacy/lang/bn/__init__.py
@@ -1,10 +1,12 @@
-from typing import Optional, Callable
+from typing import Callable, Optional
+
from thinc.api import Model
-from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
-from .punctuation import TOKENIZER_PREFIXES, TOKENIZER_SUFFIXES, TOKENIZER_INFIXES
-from .stop_words import STOP_WORDS
-from ...language import Language, BaseDefaults
+
+from ...language import BaseDefaults, Language
from ...pipeline import Lemmatizer
+from .punctuation import TOKENIZER_INFIXES, TOKENIZER_PREFIXES, TOKENIZER_SUFFIXES
+from .stop_words import STOP_WORDS
+from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
class BengaliDefaults(BaseDefaults):
diff --git a/spacy/lang/bn/punctuation.py b/spacy/lang/bn/punctuation.py
index becfe8d2a..ddb91cef1 100644
--- a/spacy/lang/bn/punctuation.py
+++ b/spacy/lang/bn/punctuation.py
@@ -1,6 +1,14 @@
-from ..char_classes import LIST_PUNCT, LIST_ELLIPSES, LIST_QUOTES, LIST_ICONS
-from ..char_classes import ALPHA_LOWER, ALPHA, HYPHENS, CONCAT_QUOTES, UNITS
-
+from ..char_classes import (
+ ALPHA,
+ ALPHA_LOWER,
+ CONCAT_QUOTES,
+ HYPHENS,
+ LIST_ELLIPSES,
+ LIST_ICONS,
+ LIST_PUNCT,
+ LIST_QUOTES,
+ UNITS,
+)
_currency = r"\$¢£€¥฿৳"
_quotes = CONCAT_QUOTES.replace("'", "")
diff --git a/spacy/lang/bn/tokenizer_exceptions.py b/spacy/lang/bn/tokenizer_exceptions.py
index e666522b8..016bf0fc5 100644
--- a/spacy/lang/bn/tokenizer_exceptions.py
+++ b/spacy/lang/bn/tokenizer_exceptions.py
@@ -1,7 +1,6 @@
-from ..tokenizer_exceptions import BASE_EXCEPTIONS
-from ...symbols import ORTH, NORM
+from ...symbols import NORM, ORTH
from ...util import update_exc
-
+from ..tokenizer_exceptions import BASE_EXCEPTIONS
_exc = {}
diff --git a/spacy/lang/ca/__init__.py b/spacy/lang/ca/__init__.py
index a3def660d..8b2f3e85a 100755
--- a/spacy/lang/ca/__init__.py
+++ b/spacy/lang/ca/__init__.py
@@ -1,14 +1,14 @@
-from typing import Optional, Callable
+from typing import Callable, Optional
from thinc.api import Model
-from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
-from .punctuation import TOKENIZER_INFIXES, TOKENIZER_SUFFIXES, TOKENIZER_PREFIXES
-from .stop_words import STOP_WORDS
-from .lex_attrs import LEX_ATTRS
-from .syntax_iterators import SYNTAX_ITERATORS
-from ...language import Language, BaseDefaults
+from ...language import BaseDefaults, Language
from .lemmatizer import CatalanLemmatizer
+from .lex_attrs import LEX_ATTRS
+from .punctuation import TOKENIZER_INFIXES, TOKENIZER_PREFIXES, TOKENIZER_SUFFIXES
+from .stop_words import STOP_WORDS
+from .syntax_iterators import SYNTAX_ITERATORS
+from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
class CatalanDefaults(BaseDefaults):
diff --git a/spacy/lang/ca/lex_attrs.py b/spacy/lang/ca/lex_attrs.py
index be8b7a6ea..3e99da0e0 100644
--- a/spacy/lang/ca/lex_attrs.py
+++ b/spacy/lang/ca/lex_attrs.py
@@ -1,6 +1,5 @@
from ...attrs import LIKE_NUM
-
_num_words = [
"zero",
"un",
diff --git a/spacy/lang/ca/punctuation.py b/spacy/lang/ca/punctuation.py
index 8e2f09828..6914f67a7 100755
--- a/spacy/lang/ca/punctuation.py
+++ b/spacy/lang/ca/punctuation.py
@@ -1,9 +1,18 @@
-from ..char_classes import LIST_PUNCT, LIST_ELLIPSES, LIST_QUOTES, LIST_ICONS
-from ..char_classes import LIST_CURRENCY
-from ..char_classes import CURRENCY
-from ..char_classes import CONCAT_QUOTES, ALPHA_LOWER, ALPHA_UPPER, ALPHA, PUNCT
-from ..char_classes import merge_chars, _units
-
+from ..char_classes import (
+ ALPHA,
+ ALPHA_LOWER,
+ ALPHA_UPPER,
+ CONCAT_QUOTES,
+ CURRENCY,
+ LIST_CURRENCY,
+ LIST_ELLIPSES,
+ LIST_ICONS,
+ LIST_PUNCT,
+ LIST_QUOTES,
+ PUNCT,
+ _units,
+ merge_chars,
+)
ELISION = " ' ’ ".strip().replace(" ", "").replace("\n", "")
diff --git a/spacy/lang/ca/syntax_iterators.py b/spacy/lang/ca/syntax_iterators.py
index 917e07c93..16a4c6a81 100644
--- a/spacy/lang/ca/syntax_iterators.py
+++ b/spacy/lang/ca/syntax_iterators.py
@@ -1,7 +1,8 @@
-from typing import Union, Iterator, Tuple
-from ...tokens import Doc, Span
-from ...symbols import NOUN, PROPN
+from typing import Iterator, Tuple, Union
+
from ...errors import Errors
+from ...symbols import NOUN, PROPN
+from ...tokens import Doc, Span
def noun_chunks(doclike: Union[Doc, Span]) -> Iterator[Tuple[int, int, int]]:
diff --git a/spacy/lang/ca/tokenizer_exceptions.py b/spacy/lang/ca/tokenizer_exceptions.py
index b261b3498..67165780e 100755
--- a/spacy/lang/ca/tokenizer_exceptions.py
+++ b/spacy/lang/ca/tokenizer_exceptions.py
@@ -1,7 +1,6 @@
-from ..tokenizer_exceptions import BASE_EXCEPTIONS
-from ...symbols import ORTH, NORM
+from ...symbols import NORM, ORTH
from ...util import update_exc
-
+from ..tokenizer_exceptions import BASE_EXCEPTIONS
_exc = {}
diff --git a/spacy/lang/cs/__init__.py b/spacy/lang/cs/__init__.py
index 3e70e4078..9ea60afdf 100644
--- a/spacy/lang/cs/__init__.py
+++ b/spacy/lang/cs/__init__.py
@@ -1,6 +1,6 @@
-from .stop_words import STOP_WORDS
+from ...language import BaseDefaults, Language
from .lex_attrs import LEX_ATTRS
-from ...language import Language, BaseDefaults
+from .stop_words import STOP_WORDS
class CzechDefaults(BaseDefaults):
diff --git a/spacy/lang/da/__init__.py b/spacy/lang/da/__init__.py
index e148a7b4f..372f372dd 100644
--- a/spacy/lang/da/__init__.py
+++ b/spacy/lang/da/__init__.py
@@ -1,9 +1,9 @@
-from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
+from ...language import BaseDefaults, Language
+from .lex_attrs import LEX_ATTRS
from .punctuation import TOKENIZER_INFIXES, TOKENIZER_SUFFIXES
from .stop_words import STOP_WORDS
-from .lex_attrs import LEX_ATTRS
from .syntax_iterators import SYNTAX_ITERATORS
-from ...language import Language, BaseDefaults
+from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
class DanishDefaults(BaseDefaults):
diff --git a/spacy/lang/da/lex_attrs.py b/spacy/lang/da/lex_attrs.py
index 403af686c..8e0420912 100644
--- a/spacy/lang/da/lex_attrs.py
+++ b/spacy/lang/da/lex_attrs.py
@@ -1,6 +1,5 @@
from ...attrs import LIKE_NUM
-
# Source http://fjern-uv.dk/tal.php
_num_words = """nul
en et to tre fire fem seks syv otte ni ti
diff --git a/spacy/lang/da/punctuation.py b/spacy/lang/da/punctuation.py
index e050ab7aa..f70fe3d64 100644
--- a/spacy/lang/da/punctuation.py
+++ b/spacy/lang/da/punctuation.py
@@ -1,8 +1,13 @@
-from ..char_classes import LIST_ELLIPSES, LIST_ICONS
-from ..char_classes import CONCAT_QUOTES, ALPHA, ALPHA_LOWER, ALPHA_UPPER
+from ..char_classes import (
+ ALPHA,
+ ALPHA_LOWER,
+ ALPHA_UPPER,
+ CONCAT_QUOTES,
+ LIST_ELLIPSES,
+ LIST_ICONS,
+)
from ..punctuation import TOKENIZER_SUFFIXES
-
_quotes = CONCAT_QUOTES.replace("'", "")
_infixes = (
diff --git a/spacy/lang/da/syntax_iterators.py b/spacy/lang/da/syntax_iterators.py
index a0b70f004..60224f0b1 100644
--- a/spacy/lang/da/syntax_iterators.py
+++ b/spacy/lang/da/syntax_iterators.py
@@ -1,7 +1,8 @@
-from typing import Union, Iterator, Tuple
-from ...tokens import Doc, Span
-from ...symbols import NOUN, PROPN, PRON, VERB, AUX
+from typing import Iterator, Tuple, Union
+
from ...errors import Errors
+from ...symbols import AUX, NOUN, PRON, PROPN, VERB
+from ...tokens import Doc, Span
def noun_chunks(doclike: Union[Doc, Span]) -> Iterator[Tuple[int, int, int]]:
diff --git a/spacy/lang/da/tokenizer_exceptions.py b/spacy/lang/da/tokenizer_exceptions.py
index ce25c546b..649d12022 100644
--- a/spacy/lang/da/tokenizer_exceptions.py
+++ b/spacy/lang/da/tokenizer_exceptions.py
@@ -2,10 +2,9 @@
Tokenizer Exceptions.
Source: https://forkortelse.dk/ and various others.
"""
-from ..tokenizer_exceptions import BASE_EXCEPTIONS
-from ...symbols import ORTH, NORM
+from ...symbols import NORM, ORTH
from ...util import update_exc
-
+from ..tokenizer_exceptions import BASE_EXCEPTIONS
_exc = {}
diff --git a/spacy/lang/de/__init__.py b/spacy/lang/de/__init__.py
index 65863c098..4f45b2357 100644
--- a/spacy/lang/de/__init__.py
+++ b/spacy/lang/de/__init__.py
@@ -1,8 +1,8 @@
-from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
-from .punctuation import TOKENIZER_PREFIXES, TOKENIZER_SUFFIXES, TOKENIZER_INFIXES
+from ...language import BaseDefaults, Language
+from .punctuation import TOKENIZER_INFIXES, TOKENIZER_PREFIXES, TOKENIZER_SUFFIXES
from .stop_words import STOP_WORDS
from .syntax_iterators import SYNTAX_ITERATORS
-from ...language import Language, BaseDefaults
+from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
class GermanDefaults(BaseDefaults):
diff --git a/spacy/lang/de/punctuation.py b/spacy/lang/de/punctuation.py
index 69d402237..862207649 100644
--- a/spacy/lang/de/punctuation.py
+++ b/spacy/lang/de/punctuation.py
@@ -1,9 +1,18 @@
-from ..char_classes import LIST_ELLIPSES, LIST_ICONS, LIST_PUNCT, LIST_QUOTES
-from ..char_classes import CURRENCY, UNITS, PUNCT
-from ..char_classes import CONCAT_QUOTES, ALPHA, ALPHA_LOWER, ALPHA_UPPER
+from ..char_classes import (
+ ALPHA,
+ ALPHA_LOWER,
+ ALPHA_UPPER,
+ CONCAT_QUOTES,
+ CURRENCY,
+ LIST_ELLIPSES,
+ LIST_ICONS,
+ LIST_PUNCT,
+ LIST_QUOTES,
+ PUNCT,
+ UNITS,
+)
from ..punctuation import TOKENIZER_PREFIXES as BASE_TOKENIZER_PREFIXES
-
_prefixes = ["``"] + BASE_TOKENIZER_PREFIXES
_suffixes = (
diff --git a/spacy/lang/de/syntax_iterators.py b/spacy/lang/de/syntax_iterators.py
index e80504998..544fe299c 100644
--- a/spacy/lang/de/syntax_iterators.py
+++ b/spacy/lang/de/syntax_iterators.py
@@ -1,7 +1,7 @@
-from typing import Union, Iterator, Tuple
+from typing import Iterator, Tuple, Union
-from ...symbols import NOUN, PROPN, PRON
from ...errors import Errors
+from ...symbols import NOUN, PRON, PROPN
from ...tokens import Doc, Span
diff --git a/spacy/lang/de/tokenizer_exceptions.py b/spacy/lang/de/tokenizer_exceptions.py
index 21d99cffe..3f1aeeccd 100644
--- a/spacy/lang/de/tokenizer_exceptions.py
+++ b/spacy/lang/de/tokenizer_exceptions.py
@@ -1,7 +1,6 @@
-from ..tokenizer_exceptions import BASE_EXCEPTIONS
-from ...symbols import ORTH, NORM
+from ...symbols import NORM, ORTH
from ...util import update_exc
-
+from ..tokenizer_exceptions import BASE_EXCEPTIONS
_exc = {
"auf'm": [{ORTH: "auf"}, {ORTH: "'m", NORM: "dem"}],
diff --git a/spacy/lang/dsb/__init__.py b/spacy/lang/dsb/__init__.py
index c66092a0c..096eced19 100644
--- a/spacy/lang/dsb/__init__.py
+++ b/spacy/lang/dsb/__init__.py
@@ -1,6 +1,6 @@
+from ...language import BaseDefaults, Language
from .lex_attrs import LEX_ATTRS
from .stop_words import STOP_WORDS
-from ...language import Language, BaseDefaults
class LowerSorbianDefaults(BaseDefaults):
diff --git a/spacy/lang/el/__init__.py b/spacy/lang/el/__init__.py
index 53dd9be8e..00e52bd97 100644
--- a/spacy/lang/el/__init__.py
+++ b/spacy/lang/el/__init__.py
@@ -1,13 +1,14 @@
-from typing import Optional, Callable
+from typing import Callable, Optional
+
from thinc.api import Model
-from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
-from .stop_words import STOP_WORDS
-from .lex_attrs import LEX_ATTRS
-from .syntax_iterators import SYNTAX_ITERATORS
-from .punctuation import TOKENIZER_PREFIXES, TOKENIZER_SUFFIXES, TOKENIZER_INFIXES
+from ...language import BaseDefaults, Language
from .lemmatizer import GreekLemmatizer
-from ...language import Language, BaseDefaults
+from .lex_attrs import LEX_ATTRS
+from .punctuation import TOKENIZER_INFIXES, TOKENIZER_PREFIXES, TOKENIZER_SUFFIXES
+from .stop_words import STOP_WORDS
+from .syntax_iterators import SYNTAX_ITERATORS
+from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
class GreekDefaults(BaseDefaults):
diff --git a/spacy/lang/el/get_pos_from_wiktionary.py b/spacy/lang/el/get_pos_from_wiktionary.py
index 369973cc0..10b54d112 100644
--- a/spacy/lang/el/get_pos_from_wiktionary.py
+++ b/spacy/lang/el/get_pos_from_wiktionary.py
@@ -1,5 +1,6 @@
def get_pos_from_wiktionary():
import re
+
from gensim.corpora.wikicorpus import extract_pages
regex = re.compile(r"==={{(\w+)\|el}}===")
diff --git a/spacy/lang/el/punctuation.py b/spacy/lang/el/punctuation.py
index 2d5690407..b8b717bac 100644
--- a/spacy/lang/el/punctuation.py
+++ b/spacy/lang/el/punctuation.py
@@ -1,6 +1,16 @@
-from ..char_classes import LIST_PUNCT, LIST_ELLIPSES, LIST_QUOTES, LIST_CURRENCY
-from ..char_classes import LIST_ICONS, ALPHA_LOWER, ALPHA_UPPER, ALPHA, HYPHENS
-from ..char_classes import CONCAT_QUOTES, CURRENCY
+from ..char_classes import (
+ ALPHA,
+ ALPHA_LOWER,
+ ALPHA_UPPER,
+ CONCAT_QUOTES,
+ CURRENCY,
+ HYPHENS,
+ LIST_CURRENCY,
+ LIST_ELLIPSES,
+ LIST_ICONS,
+ LIST_PUNCT,
+ LIST_QUOTES,
+)
_units = (
"km km² km³ m m² m³ dm dm² dm³ cm cm² cm³ mm mm² mm³ ha µm nm yd in ft "
diff --git a/spacy/lang/el/syntax_iterators.py b/spacy/lang/el/syntax_iterators.py
index 18fa46695..31c7dccf7 100644
--- a/spacy/lang/el/syntax_iterators.py
+++ b/spacy/lang/el/syntax_iterators.py
@@ -1,7 +1,7 @@
-from typing import Union, Iterator, Tuple
+from typing import Iterator, Tuple, Union
-from ...symbols import NOUN, PROPN, PRON
from ...errors import Errors
+from ...symbols import NOUN, PRON, PROPN
from ...tokens import Doc, Span
diff --git a/spacy/lang/el/tokenizer_exceptions.py b/spacy/lang/el/tokenizer_exceptions.py
index 0a36d5d2b..41317ba97 100644
--- a/spacy/lang/el/tokenizer_exceptions.py
+++ b/spacy/lang/el/tokenizer_exceptions.py
@@ -1,6 +1,6 @@
-from ..tokenizer_exceptions import BASE_EXCEPTIONS
-from ...symbols import ORTH, NORM
+from ...symbols import NORM, ORTH
from ...util import update_exc
+from ..tokenizer_exceptions import BASE_EXCEPTIONS
_exc = {}
diff --git a/spacy/lang/en/__init__.py b/spacy/lang/en/__init__.py
index 876186979..c4bcfb938 100644
--- a/spacy/lang/en/__init__.py
+++ b/spacy/lang/en/__init__.py
@@ -1,13 +1,14 @@
-from typing import Optional, Callable
+from typing import Callable, Optional
+
from thinc.api import Model
-from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
-from .stop_words import STOP_WORDS
-from .lex_attrs import LEX_ATTRS
-from .syntax_iterators import SYNTAX_ITERATORS
-from .punctuation import TOKENIZER_INFIXES
+from ...language import BaseDefaults, Language
from .lemmatizer import EnglishLemmatizer
-from ...language import Language, BaseDefaults
+from .lex_attrs import LEX_ATTRS
+from .punctuation import TOKENIZER_INFIXES
+from .stop_words import STOP_WORDS
+from .syntax_iterators import SYNTAX_ITERATORS
+from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
class EnglishDefaults(BaseDefaults):
diff --git a/spacy/lang/en/punctuation.py b/spacy/lang/en/punctuation.py
index 5d3eb792e..775c6b001 100644
--- a/spacy/lang/en/punctuation.py
+++ b/spacy/lang/en/punctuation.py
@@ -1,5 +1,12 @@
-from ..char_classes import LIST_ELLIPSES, LIST_ICONS, HYPHENS
-from ..char_classes import CONCAT_QUOTES, ALPHA_LOWER, ALPHA_UPPER, ALPHA
+from ..char_classes import (
+ ALPHA,
+ ALPHA_LOWER,
+ ALPHA_UPPER,
+ CONCAT_QUOTES,
+ HYPHENS,
+ LIST_ELLIPSES,
+ LIST_ICONS,
+)
_infixes = (
LIST_ELLIPSES
diff --git a/spacy/lang/en/syntax_iterators.py b/spacy/lang/en/syntax_iterators.py
index 7904e5621..140ae0a5c 100644
--- a/spacy/lang/en/syntax_iterators.py
+++ b/spacy/lang/en/syntax_iterators.py
@@ -1,7 +1,7 @@
-from typing import Union, Iterator, Tuple
+from typing import Iterator, Tuple, Union
-from ...symbols import NOUN, PROPN, PRON
from ...errors import Errors
+from ...symbols import NOUN, PRON, PROPN
from ...tokens import Doc, Span
diff --git a/spacy/lang/en/tokenizer_exceptions.py b/spacy/lang/en/tokenizer_exceptions.py
index 7886e28cb..dd3650c18 100644
--- a/spacy/lang/en/tokenizer_exceptions.py
+++ b/spacy/lang/en/tokenizer_exceptions.py
@@ -1,8 +1,8 @@
from typing import Dict, List
-from ..tokenizer_exceptions import BASE_EXCEPTIONS
-from ...symbols import ORTH, NORM
-from ...util import update_exc
+from ...symbols import NORM, ORTH
+from ...util import update_exc
+from ..tokenizer_exceptions import BASE_EXCEPTIONS
_exc: Dict[str, List[Dict]] = {}
_exclude = [
diff --git a/spacy/lang/es/__init__.py b/spacy/lang/es/__init__.py
index e75955202..bcaed8672 100644
--- a/spacy/lang/es/__init__.py
+++ b/spacy/lang/es/__init__.py
@@ -1,12 +1,14 @@
-from typing import Optional, Callable
+from typing import Callable, Optional
+
from thinc.api import Model
-from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
-from .stop_words import STOP_WORDS
-from .lex_attrs import LEX_ATTRS
+
+from ...language import BaseDefaults, Language
from .lemmatizer import SpanishLemmatizer
-from .syntax_iterators import SYNTAX_ITERATORS
+from .lex_attrs import LEX_ATTRS
from .punctuation import TOKENIZER_INFIXES, TOKENIZER_SUFFIXES
-from ...language import Language, BaseDefaults
+from .stop_words import STOP_WORDS
+from .syntax_iterators import SYNTAX_ITERATORS
+from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
class SpanishDefaults(BaseDefaults):
diff --git a/spacy/lang/es/lemmatizer.py b/spacy/lang/es/lemmatizer.py
index ca5fc08c8..44f968347 100644
--- a/spacy/lang/es/lemmatizer.py
+++ b/spacy/lang/es/lemmatizer.py
@@ -1,5 +1,5 @@
-from typing import List, Optional, Tuple
import re
+from typing import List, Optional, Tuple
from ...pipeline import Lemmatizer
from ...tokens import Token
diff --git a/spacy/lang/es/lex_attrs.py b/spacy/lang/es/lex_attrs.py
index 9d1fa93b8..4c477eaee 100644
--- a/spacy/lang/es/lex_attrs.py
+++ b/spacy/lang/es/lex_attrs.py
@@ -1,6 +1,5 @@
from ...attrs import LIKE_NUM
-
_num_words = [
"cero",
"uno",
diff --git a/spacy/lang/es/punctuation.py b/spacy/lang/es/punctuation.py
index e9552371e..3d20518cd 100644
--- a/spacy/lang/es/punctuation.py
+++ b/spacy/lang/es/punctuation.py
@@ -1,8 +1,17 @@
-from ..char_classes import LIST_PUNCT, LIST_ELLIPSES, LIST_QUOTES
-from ..char_classes import LIST_ICONS, CURRENCY, LIST_UNITS, PUNCT
-from ..char_classes import CONCAT_QUOTES, ALPHA_LOWER, ALPHA_UPPER, ALPHA
-from ..char_classes import merge_chars
-
+from ..char_classes import (
+ ALPHA,
+ ALPHA_LOWER,
+ ALPHA_UPPER,
+ CONCAT_QUOTES,
+ CURRENCY,
+ LIST_ELLIPSES,
+ LIST_ICONS,
+ LIST_PUNCT,
+ LIST_QUOTES,
+ LIST_UNITS,
+ PUNCT,
+ merge_chars,
+)
_list_units = [u for u in LIST_UNITS if u != "%"]
_units = merge_chars(" ".join(_list_units))
diff --git a/spacy/lang/es/syntax_iterators.py b/spacy/lang/es/syntax_iterators.py
index f2ca2a678..96df444a3 100644
--- a/spacy/lang/es/syntax_iterators.py
+++ b/spacy/lang/es/syntax_iterators.py
@@ -1,7 +1,7 @@
-from typing import Union, Iterator, Tuple
+from typing import Iterator, Tuple, Union
-from ...symbols import NOUN, PROPN, PRON
from ...errors import Errors
+from ...symbols import NOUN, PRON, PROPN
from ...tokens import Doc, Span
diff --git a/spacy/lang/es/tokenizer_exceptions.py b/spacy/lang/es/tokenizer_exceptions.py
index 74cdc143d..2ea0ed8b7 100644
--- a/spacy/lang/es/tokenizer_exceptions.py
+++ b/spacy/lang/es/tokenizer_exceptions.py
@@ -1,7 +1,6 @@
-from ..tokenizer_exceptions import BASE_EXCEPTIONS
-from ...symbols import ORTH, NORM
+from ...symbols import NORM, ORTH
from ...util import update_exc
-
+from ..tokenizer_exceptions import BASE_EXCEPTIONS
_exc = {
"pal": [{ORTH: "pa"}, {ORTH: "l", NORM: "el"}],
diff --git a/spacy/lang/et/__init__.py b/spacy/lang/et/__init__.py
index 274bc1309..9ec7e6006 100644
--- a/spacy/lang/et/__init__.py
+++ b/spacy/lang/et/__init__.py
@@ -1,5 +1,5 @@
+from ...language import BaseDefaults, Language
from .stop_words import STOP_WORDS
-from ...language import Language, BaseDefaults
class EstonianDefaults(BaseDefaults):
diff --git a/spacy/lang/eu/__init__.py b/spacy/lang/eu/__init__.py
index 3346468bd..81f9c4a18 100644
--- a/spacy/lang/eu/__init__.py
+++ b/spacy/lang/eu/__init__.py
@@ -1,7 +1,7 @@
-from .stop_words import STOP_WORDS
+from ...language import BaseDefaults, Language
from .lex_attrs import LEX_ATTRS
from .punctuation import TOKENIZER_SUFFIXES
-from ...language import Language, BaseDefaults
+from .stop_words import STOP_WORDS
class BasqueDefaults(BaseDefaults):
diff --git a/spacy/lang/eu/punctuation.py b/spacy/lang/eu/punctuation.py
index 5d35d0a25..382bfc75c 100644
--- a/spacy/lang/eu/punctuation.py
+++ b/spacy/lang/eu/punctuation.py
@@ -1,4 +1,3 @@
from ..punctuation import TOKENIZER_SUFFIXES
-
_suffixes = TOKENIZER_SUFFIXES
diff --git a/spacy/lang/fa/__init__.py b/spacy/lang/fa/__init__.py
index 914e4c27d..e5baa8b4a 100644
--- a/spacy/lang/fa/__init__.py
+++ b/spacy/lang/fa/__init__.py
@@ -1,12 +1,14 @@
-from typing import Optional, Callable
+from typing import Callable, Optional
+
from thinc.api import Model
-from .stop_words import STOP_WORDS
-from .lex_attrs import LEX_ATTRS
-from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
-from .punctuation import TOKENIZER_SUFFIXES
-from .syntax_iterators import SYNTAX_ITERATORS
-from ...language import Language, BaseDefaults
+
+from ...language import BaseDefaults, Language
from ...pipeline import Lemmatizer
+from .lex_attrs import LEX_ATTRS
+from .punctuation import TOKENIZER_SUFFIXES
+from .stop_words import STOP_WORDS
+from .syntax_iterators import SYNTAX_ITERATORS
+from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
class PersianDefaults(BaseDefaults):
diff --git a/spacy/lang/fa/lex_attrs.py b/spacy/lang/fa/lex_attrs.py
index 99b8e2787..065e81bd6 100644
--- a/spacy/lang/fa/lex_attrs.py
+++ b/spacy/lang/fa/lex_attrs.py
@@ -1,6 +1,5 @@
from ...attrs import LIKE_NUM
-
MIM = "م"
ZWNJ_O_MIM = "ام"
YE_NUN = "ین"
diff --git a/spacy/lang/fa/punctuation.py b/spacy/lang/fa/punctuation.py
index 4b258c13d..c1ee570ce 100644
--- a/spacy/lang/fa/punctuation.py
+++ b/spacy/lang/fa/punctuation.py
@@ -1,5 +1,11 @@
-from ..char_classes import LIST_PUNCT, LIST_ELLIPSES, LIST_QUOTES, CURRENCY
-from ..char_classes import UNITS, ALPHA_UPPER
+from ..char_classes import (
+ ALPHA_UPPER,
+ CURRENCY,
+ LIST_ELLIPSES,
+ LIST_PUNCT,
+ LIST_QUOTES,
+ UNITS,
+)
_suffixes = (
LIST_PUNCT
diff --git a/spacy/lang/fa/syntax_iterators.py b/spacy/lang/fa/syntax_iterators.py
index 8207884b0..3052369a7 100644
--- a/spacy/lang/fa/syntax_iterators.py
+++ b/spacy/lang/fa/syntax_iterators.py
@@ -1,7 +1,8 @@
-from typing import Union, Iterator, Tuple
-from ...tokens import Doc, Span
-from ...symbols import NOUN, PROPN, PRON
+from typing import Iterator, Tuple, Union
+
from ...errors import Errors
+from ...symbols import NOUN, PRON, PROPN
+from ...tokens import Doc, Span
def noun_chunks(doclike: Union[Doc, Span]) -> Iterator[Tuple[int, int, int]]:
diff --git a/spacy/lang/fa/tokenizer_exceptions.py b/spacy/lang/fa/tokenizer_exceptions.py
index 30df798ab..3b31b7f67 100644
--- a/spacy/lang/fa/tokenizer_exceptions.py
+++ b/spacy/lang/fa/tokenizer_exceptions.py
@@ -1,5 +1,4 @@
-from ...symbols import ORTH, NORM
-
+from ...symbols import NORM, ORTH
TOKENIZER_EXCEPTIONS = {
".ق ": [{ORTH: ".ق "}],
diff --git a/spacy/lang/fi/__init__.py b/spacy/lang/fi/__init__.py
index c3a0cf451..3e371b9b5 100644
--- a/spacy/lang/fi/__init__.py
+++ b/spacy/lang/fi/__init__.py
@@ -1,9 +1,9 @@
-from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
-from .stop_words import STOP_WORDS
+from ...language import BaseDefaults, Language
from .lex_attrs import LEX_ATTRS
from .punctuation import TOKENIZER_INFIXES, TOKENIZER_SUFFIXES
+from .stop_words import STOP_WORDS
from .syntax_iterators import SYNTAX_ITERATORS
-from ...language import Language, BaseDefaults
+from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
class FinnishDefaults(BaseDefaults):
diff --git a/spacy/lang/fi/lex_attrs.py b/spacy/lang/fi/lex_attrs.py
index 4d500cead..9eec41b3d 100644
--- a/spacy/lang/fi/lex_attrs.py
+++ b/spacy/lang/fi/lex_attrs.py
@@ -1,6 +1,5 @@
from ...attrs import LIKE_NUM
-
_num_words = [
"nolla",
"yksi",
diff --git a/spacy/lang/fi/punctuation.py b/spacy/lang/fi/punctuation.py
index 6e14dde38..29ddc3111 100644
--- a/spacy/lang/fi/punctuation.py
+++ b/spacy/lang/fi/punctuation.py
@@ -1,8 +1,14 @@
-from ..char_classes import LIST_ELLIPSES, LIST_ICONS, LIST_HYPHENS
-from ..char_classes import CONCAT_QUOTES, ALPHA, ALPHA_LOWER, ALPHA_UPPER
+from ..char_classes import (
+ ALPHA,
+ ALPHA_LOWER,
+ ALPHA_UPPER,
+ CONCAT_QUOTES,
+ LIST_ELLIPSES,
+ LIST_HYPHENS,
+ LIST_ICONS,
+)
from ..punctuation import TOKENIZER_SUFFIXES
-
_quotes = CONCAT_QUOTES.replace("'", "")
DASHES = "|".join(x for x in LIST_HYPHENS if x != "-")
diff --git a/spacy/lang/fi/syntax_iterators.py b/spacy/lang/fi/syntax_iterators.py
index 6b481e51f..6e2216713 100644
--- a/spacy/lang/fi/syntax_iterators.py
+++ b/spacy/lang/fi/syntax_iterators.py
@@ -1,7 +1,8 @@
from typing import Iterator, Tuple, Union
-from ...tokens import Doc, Span
-from ...symbols import NOUN, PROPN, PRON
+
from ...errors import Errors
+from ...symbols import NOUN, PRON, PROPN
+from ...tokens import Doc, Span
def noun_chunks(doclike: Union[Doc, Span]) -> Iterator[Tuple[int, int, int]]:
diff --git a/spacy/lang/fi/tokenizer_exceptions.py b/spacy/lang/fi/tokenizer_exceptions.py
index 465333b0a..881d5b91d 100644
--- a/spacy/lang/fi/tokenizer_exceptions.py
+++ b/spacy/lang/fi/tokenizer_exceptions.py
@@ -1,7 +1,6 @@
-from ..tokenizer_exceptions import BASE_EXCEPTIONS
-from ...symbols import ORTH, NORM
+from ...symbols import NORM, ORTH
from ...util import update_exc
-
+from ..tokenizer_exceptions import BASE_EXCEPTIONS
_exc = {}
diff --git a/spacy/lang/fr/__init__.py b/spacy/lang/fr/__init__.py
index 27d2a915e..a8bc7f53e 100644
--- a/spacy/lang/fr/__init__.py
+++ b/spacy/lang/fr/__init__.py
@@ -1,15 +1,14 @@
-from typing import Optional, Callable
+from typing import Callable, Optional
from thinc.api import Model
-from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS, TOKEN_MATCH
-from .punctuation import TOKENIZER_PREFIXES, TOKENIZER_INFIXES
-from .punctuation import TOKENIZER_SUFFIXES
-from .stop_words import STOP_WORDS
-from .lex_attrs import LEX_ATTRS
-from .syntax_iterators import SYNTAX_ITERATORS
+from ...language import BaseDefaults, Language
from .lemmatizer import FrenchLemmatizer
-from ...language import Language, BaseDefaults
+from .lex_attrs import LEX_ATTRS
+from .punctuation import TOKENIZER_INFIXES, TOKENIZER_PREFIXES, TOKENIZER_SUFFIXES
+from .stop_words import STOP_WORDS
+from .syntax_iterators import SYNTAX_ITERATORS
+from .tokenizer_exceptions import TOKEN_MATCH, TOKENIZER_EXCEPTIONS
class FrenchDefaults(BaseDefaults):
diff --git a/spacy/lang/fr/lex_attrs.py b/spacy/lang/fr/lex_attrs.py
index 811312ad7..9cf508a07 100644
--- a/spacy/lang/fr/lex_attrs.py
+++ b/spacy/lang/fr/lex_attrs.py
@@ -1,6 +1,5 @@
from ...attrs import LIKE_NUM
-
_num_words = set(
"""
zero un une deux trois quatre cinq six sept huit neuf dix
diff --git a/spacy/lang/fr/punctuation.py b/spacy/lang/fr/punctuation.py
index 873d01d87..a3b178a2f 100644
--- a/spacy/lang/fr/punctuation.py
+++ b/spacy/lang/fr/punctuation.py
@@ -1,8 +1,16 @@
-from ..punctuation import TOKENIZER_PREFIXES, TOKENIZER_INFIXES
-from ..char_classes import LIST_PUNCT, LIST_ELLIPSES, LIST_QUOTES, CURRENCY
-from ..char_classes import CONCAT_QUOTES, UNITS, ALPHA, ALPHA_LOWER, ALPHA_UPPER
-from ..char_classes import merge_chars
-
+from ..char_classes import (
+ ALPHA,
+ ALPHA_LOWER,
+ ALPHA_UPPER,
+ CONCAT_QUOTES,
+ CURRENCY,
+ LIST_ELLIPSES,
+ LIST_PUNCT,
+ LIST_QUOTES,
+ UNITS,
+ merge_chars,
+)
+from ..punctuation import TOKENIZER_INFIXES, TOKENIZER_PREFIXES
ELISION = "' ’".replace(" ", "")
HYPHENS = r"- – — ‐ ‑".replace(" ", "")
diff --git a/spacy/lang/fr/syntax_iterators.py b/spacy/lang/fr/syntax_iterators.py
index 5849c40b3..a6bf3d3ca 100644
--- a/spacy/lang/fr/syntax_iterators.py
+++ b/spacy/lang/fr/syntax_iterators.py
@@ -1,7 +1,7 @@
-from typing import Union, Iterator, Tuple
+from typing import Iterator, Tuple, Union
-from ...symbols import NOUN, PROPN, PRON
from ...errors import Errors
+from ...symbols import NOUN, PRON, PROPN
from ...tokens import Doc, Span
diff --git a/spacy/lang/fr/tokenizer_exceptions.py b/spacy/lang/fr/tokenizer_exceptions.py
index 2e88b58cf..fa2062ef9 100644
--- a/spacy/lang/fr/tokenizer_exceptions.py
+++ b/spacy/lang/fr/tokenizer_exceptions.py
@@ -1,11 +1,10 @@
import re
-from ..tokenizer_exceptions import BASE_EXCEPTIONS
-from .punctuation import ELISION, HYPHENS
-from ..char_classes import ALPHA_LOWER, ALPHA
from ...symbols import ORTH
from ...util import update_exc
-
+from ..char_classes import ALPHA, ALPHA_LOWER
+from ..tokenizer_exceptions import BASE_EXCEPTIONS
+from .punctuation import ELISION, HYPHENS
# not using the large _tokenizer_exceptions_list by default as it slows down the tokenizer
# from ._tokenizer_exceptions_list import FR_BASE_EXCEPTIONS
diff --git a/spacy/lang/ga/__init__.py b/spacy/lang/ga/__init__.py
index 3be53bc7a..6f9a27a14 100644
--- a/spacy/lang/ga/__init__.py
+++ b/spacy/lang/ga/__init__.py
@@ -2,10 +2,10 @@ from typing import Optional
from thinc.api import Model
-from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
-from .stop_words import STOP_WORDS
-from ...language import Language, BaseDefaults
+from ...language import BaseDefaults, Language
from .lemmatizer import IrishLemmatizer
+from .stop_words import STOP_WORDS
+from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
class IrishDefaults(BaseDefaults):
diff --git a/spacy/lang/ga/lemmatizer.py b/spacy/lang/ga/lemmatizer.py
index 47aec8fd4..c9fbfbc19 100644
--- a/spacy/lang/ga/lemmatizer.py
+++ b/spacy/lang/ga/lemmatizer.py
@@ -1,4 +1,4 @@
-from typing import List, Dict, Tuple
+from typing import Dict, List, Tuple
from ...pipeline import Lemmatizer
from ...tokens import Token
diff --git a/spacy/lang/ga/tokenizer_exceptions.py b/spacy/lang/ga/tokenizer_exceptions.py
index 63af65fe9..eb4b413fb 100644
--- a/spacy/lang/ga/tokenizer_exceptions.py
+++ b/spacy/lang/ga/tokenizer_exceptions.py
@@ -1,7 +1,6 @@
-from ..tokenizer_exceptions import BASE_EXCEPTIONS
-from ...symbols import ORTH, NORM
+from ...symbols import NORM, ORTH
from ...util import update_exc
-
+from ..tokenizer_exceptions import BASE_EXCEPTIONS
_exc = {
"'acha'n": [{ORTH: "'ach", NORM: "gach"}, {ORTH: "a'n", NORM: "aon"}],
diff --git a/spacy/lang/grc/__init__.py b/spacy/lang/grc/__init__.py
index 019b3802e..ed742f4c5 100644
--- a/spacy/lang/grc/__init__.py
+++ b/spacy/lang/grc/__init__.py
@@ -1,8 +1,8 @@
-from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
-from .stop_words import STOP_WORDS
+from ...language import BaseDefaults, Language
from .lex_attrs import LEX_ATTRS
-from .punctuation import TOKENIZER_PREFIXES, TOKENIZER_SUFFIXES, TOKENIZER_INFIXES
-from ...language import Language, BaseDefaults
+from .punctuation import TOKENIZER_INFIXES, TOKENIZER_PREFIXES, TOKENIZER_SUFFIXES
+from .stop_words import STOP_WORDS
+from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
class AncientGreekDefaults(BaseDefaults):
diff --git a/spacy/lang/grc/lex_attrs.py b/spacy/lang/grc/lex_attrs.py
index 0ab15e6fd..33cfca05b 100644
--- a/spacy/lang/grc/lex_attrs.py
+++ b/spacy/lang/grc/lex_attrs.py
@@ -1,6 +1,5 @@
from ...attrs import LIKE_NUM
-
_num_words = [
# CARDINALS
"εἷς",
diff --git a/spacy/lang/grc/punctuation.py b/spacy/lang/grc/punctuation.py
index 8f3589e9a..8e9fc8bf2 100644
--- a/spacy/lang/grc/punctuation.py
+++ b/spacy/lang/grc/punctuation.py
@@ -1,6 +1,15 @@
-from ..char_classes import LIST_PUNCT, LIST_ELLIPSES, LIST_QUOTES, LIST_CURRENCY
-from ..char_classes import LIST_ICONS, ALPHA_LOWER, ALPHA_UPPER, ALPHA, HYPHENS
-from ..char_classes import CONCAT_QUOTES
+from ..char_classes import (
+ ALPHA,
+ ALPHA_LOWER,
+ ALPHA_UPPER,
+ CONCAT_QUOTES,
+ HYPHENS,
+ LIST_CURRENCY,
+ LIST_ELLIPSES,
+ LIST_ICONS,
+ LIST_PUNCT,
+ LIST_QUOTES,
+)
_prefixes = (
[
diff --git a/spacy/lang/grc/tokenizer_exceptions.py b/spacy/lang/grc/tokenizer_exceptions.py
index bcee70f32..86527ff61 100644
--- a/spacy/lang/grc/tokenizer_exceptions.py
+++ b/spacy/lang/grc/tokenizer_exceptions.py
@@ -1,6 +1,6 @@
-from ..tokenizer_exceptions import BASE_EXCEPTIONS
-from ...symbols import ORTH, NORM
+from ...symbols import NORM, ORTH
from ...util import update_exc
+from ..tokenizer_exceptions import BASE_EXCEPTIONS
_exc = {}
diff --git a/spacy/lang/gu/__init__.py b/spacy/lang/gu/__init__.py
index e6fbc9d18..2f22034c1 100644
--- a/spacy/lang/gu/__init__.py
+++ b/spacy/lang/gu/__init__.py
@@ -1,5 +1,5 @@
+from ...language import BaseDefaults, Language
from .stop_words import STOP_WORDS
-from ...language import Language, BaseDefaults
class GujaratiDefaults(BaseDefaults):
diff --git a/spacy/lang/he/__init__.py b/spacy/lang/he/__init__.py
index dd2ee478d..07084acf1 100644
--- a/spacy/lang/he/__init__.py
+++ b/spacy/lang/he/__init__.py
@@ -1,6 +1,6 @@
-from .stop_words import STOP_WORDS
+from ...language import BaseDefaults, Language
from .lex_attrs import LEX_ATTRS
-from ...language import Language, BaseDefaults
+from .stop_words import STOP_WORDS
class HebrewDefaults(BaseDefaults):
diff --git a/spacy/lang/hi/__init__.py b/spacy/lang/hi/__init__.py
index 4c8ae446d..980dc31c1 100644
--- a/spacy/lang/hi/__init__.py
+++ b/spacy/lang/hi/__init__.py
@@ -1,6 +1,6 @@
-from .stop_words import STOP_WORDS
+from ...language import BaseDefaults, Language
from .lex_attrs import LEX_ATTRS
-from ...language import Language, BaseDefaults
+from .stop_words import STOP_WORDS
class HindiDefaults(BaseDefaults):
diff --git a/spacy/lang/hi/lex_attrs.py b/spacy/lang/hi/lex_attrs.py
index ee845e8b1..4ecd1db66 100644
--- a/spacy/lang/hi/lex_attrs.py
+++ b/spacy/lang/hi/lex_attrs.py
@@ -1,6 +1,5 @@
+from ...attrs import LIKE_NUM, NORM
from ..norm_exceptions import BASE_NORMS
-from ...attrs import NORM, LIKE_NUM
-
# fmt: off
_stem_suffixes = [
diff --git a/spacy/lang/hr/__init__.py b/spacy/lang/hr/__init__.py
index 30870b522..fd7622a3d 100644
--- a/spacy/lang/hr/__init__.py
+++ b/spacy/lang/hr/__init__.py
@@ -1,5 +1,5 @@
+from ...language import BaseDefaults, Language
from .stop_words import STOP_WORDS
-from ...language import Language, BaseDefaults
class CroatianDefaults(BaseDefaults):
diff --git a/spacy/lang/hsb/__init__.py b/spacy/lang/hsb/__init__.py
index 034d82319..e8b2ffc9f 100644
--- a/spacy/lang/hsb/__init__.py
+++ b/spacy/lang/hsb/__init__.py
@@ -1,7 +1,7 @@
+from ...language import BaseDefaults, Language
from .lex_attrs import LEX_ATTRS
from .stop_words import STOP_WORDS
from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
-from ...language import Language, BaseDefaults
class UpperSorbianDefaults(BaseDefaults):
diff --git a/spacy/lang/hsb/tokenizer_exceptions.py b/spacy/lang/hsb/tokenizer_exceptions.py
index 4b9a4f98a..cd3bac913 100644
--- a/spacy/lang/hsb/tokenizer_exceptions.py
+++ b/spacy/lang/hsb/tokenizer_exceptions.py
@@ -1,6 +1,6 @@
-from ..tokenizer_exceptions import BASE_EXCEPTIONS
-from ...symbols import ORTH, NORM
+from ...symbols import NORM, ORTH
from ...util import update_exc
+from ..tokenizer_exceptions import BASE_EXCEPTIONS
_exc = dict()
for exc_data in [
diff --git a/spacy/lang/hu/__init__.py b/spacy/lang/hu/__init__.py
index 9426bacea..799e6d230 100644
--- a/spacy/lang/hu/__init__.py
+++ b/spacy/lang/hu/__init__.py
@@ -1,7 +1,7 @@
-from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS, TOKEN_MATCH
-from .punctuation import TOKENIZER_PREFIXES, TOKENIZER_SUFFIXES, TOKENIZER_INFIXES
+from ...language import BaseDefaults, Language
+from .punctuation import TOKENIZER_INFIXES, TOKENIZER_PREFIXES, TOKENIZER_SUFFIXES
from .stop_words import STOP_WORDS
-from ...language import Language, BaseDefaults
+from .tokenizer_exceptions import TOKEN_MATCH, TOKENIZER_EXCEPTIONS
class HungarianDefaults(BaseDefaults):
diff --git a/spacy/lang/hu/punctuation.py b/spacy/lang/hu/punctuation.py
index f827cd677..dbf93c622 100644
--- a/spacy/lang/hu/punctuation.py
+++ b/spacy/lang/hu/punctuation.py
@@ -1,6 +1,14 @@
-from ..char_classes import LIST_PUNCT, LIST_ELLIPSES, LIST_QUOTES, CONCAT_QUOTES
-from ..char_classes import CONCAT_ICONS, UNITS, ALPHA, ALPHA_LOWER, ALPHA_UPPER
-
+from ..char_classes import (
+ ALPHA,
+ ALPHA_LOWER,
+ ALPHA_UPPER,
+ CONCAT_ICONS,
+ CONCAT_QUOTES,
+ LIST_ELLIPSES,
+ LIST_PUNCT,
+ LIST_QUOTES,
+ UNITS,
+)
# removing ° from the special icons to keep e.g. 99° as one token
_concat_icons = CONCAT_ICONS.replace("\u00B0", "")
diff --git a/spacy/lang/hu/tokenizer_exceptions.py b/spacy/lang/hu/tokenizer_exceptions.py
index ffaa74f50..3f79b02d2 100644
--- a/spacy/lang/hu/tokenizer_exceptions.py
+++ b/spacy/lang/hu/tokenizer_exceptions.py
@@ -1,10 +1,9 @@
import re
-from ..tokenizer_exceptions import BASE_EXCEPTIONS
-from ..punctuation import ALPHA_LOWER, CURRENCY
from ...symbols import ORTH
from ...util import update_exc
-
+from ..punctuation import ALPHA_LOWER, CURRENCY
+from ..tokenizer_exceptions import BASE_EXCEPTIONS
_exc = {}
diff --git a/spacy/lang/hy/__init__.py b/spacy/lang/hy/__init__.py
index 481eaae0a..e00d4fd11 100644
--- a/spacy/lang/hy/__init__.py
+++ b/spacy/lang/hy/__init__.py
@@ -1,6 +1,6 @@
-from .stop_words import STOP_WORDS
+from ...language import BaseDefaults, Language
from .lex_attrs import LEX_ATTRS
-from ...language import Language, BaseDefaults
+from .stop_words import STOP_WORDS
class ArmenianDefaults(BaseDefaults):
diff --git a/spacy/lang/hy/lex_attrs.py b/spacy/lang/hy/lex_attrs.py
index 9c9c0380c..4c96b8ab5 100644
--- a/spacy/lang/hy/lex_attrs.py
+++ b/spacy/lang/hy/lex_attrs.py
@@ -1,6 +1,5 @@
from ...attrs import LIKE_NUM
-
_num_words = [
"զրո",
"մեկ",
diff --git a/spacy/lang/id/__init__.py b/spacy/lang/id/__init__.py
index 0d72cfa9d..93eb3214a 100644
--- a/spacy/lang/id/__init__.py
+++ b/spacy/lang/id/__init__.py
@@ -1,9 +1,9 @@
-from .stop_words import STOP_WORDS
-from .punctuation import TOKENIZER_SUFFIXES, TOKENIZER_PREFIXES, TOKENIZER_INFIXES
-from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
+from ...language import BaseDefaults, Language
from .lex_attrs import LEX_ATTRS
+from .punctuation import TOKENIZER_INFIXES, TOKENIZER_PREFIXES, TOKENIZER_SUFFIXES
+from .stop_words import STOP_WORDS
from .syntax_iterators import SYNTAX_ITERATORS
-from ...language import Language, BaseDefaults
+from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
class IndonesianDefaults(BaseDefaults):
diff --git a/spacy/lang/id/lex_attrs.py b/spacy/lang/id/lex_attrs.py
index 3167f4659..5952c4d06 100644
--- a/spacy/lang/id/lex_attrs.py
+++ b/spacy/lang/id/lex_attrs.py
@@ -1,8 +1,7 @@
import unicodedata
-from .punctuation import LIST_CURRENCY
from ...attrs import IS_CURRENCY, LIKE_NUM
-
+from .punctuation import LIST_CURRENCY
_num_words = [
"nol",
diff --git a/spacy/lang/id/punctuation.py b/spacy/lang/id/punctuation.py
index f6c2387d8..8303b8eaa 100644
--- a/spacy/lang/id/punctuation.py
+++ b/spacy/lang/id/punctuation.py
@@ -1,6 +1,5 @@
-from ..punctuation import TOKENIZER_PREFIXES, TOKENIZER_SUFFIXES, TOKENIZER_INFIXES
-from ..char_classes import ALPHA, merge_chars, split_chars, _currency, _units
-
+from ..char_classes import ALPHA, _currency, _units, merge_chars, split_chars
+from ..punctuation import TOKENIZER_INFIXES, TOKENIZER_PREFIXES, TOKENIZER_SUFFIXES
_units = (
_units + "s bit Gbps Mbps mbps Kbps kbps ƒ ppi px "
diff --git a/spacy/lang/id/syntax_iterators.py b/spacy/lang/id/syntax_iterators.py
index fa984d411..027798687 100644
--- a/spacy/lang/id/syntax_iterators.py
+++ b/spacy/lang/id/syntax_iterators.py
@@ -1,7 +1,7 @@
-from typing import Union, Iterator, Tuple
+from typing import Iterator, Tuple, Union
-from ...symbols import NOUN, PROPN, PRON
from ...errors import Errors
+from ...symbols import NOUN, PRON, PROPN
from ...tokens import Doc, Span
diff --git a/spacy/lang/id/tokenizer_exceptions.py b/spacy/lang/id/tokenizer_exceptions.py
index ff77ede9f..8dea4e97f 100644
--- a/spacy/lang/id/tokenizer_exceptions.py
+++ b/spacy/lang/id/tokenizer_exceptions.py
@@ -1,8 +1,7 @@
+from ...symbols import NORM, ORTH
+from ...util import update_exc
from ..tokenizer_exceptions import BASE_EXCEPTIONS
from ._tokenizer_exceptions_list import ID_BASE_EXCEPTIONS
-from ...symbols import ORTH, NORM
-from ...util import update_exc
-
# Daftar singkatan dan Akronim dari:
# https://id.wiktionary.org/wiki/Wiktionary:Daftar_singkatan_dan_akronim_bahasa_Indonesia#A
diff --git a/spacy/lang/is/__init__.py b/spacy/lang/is/__init__.py
index 318363beb..af1260045 100644
--- a/spacy/lang/is/__init__.py
+++ b/spacy/lang/is/__init__.py
@@ -1,5 +1,5 @@
+from ...language import BaseDefaults, Language
from .stop_words import STOP_WORDS
-from ...language import Language, BaseDefaults
class IcelandicDefaults(BaseDefaults):
diff --git a/spacy/lang/it/__init__.py b/spacy/lang/it/__init__.py
index ecf322bd7..14458d811 100644
--- a/spacy/lang/it/__init__.py
+++ b/spacy/lang/it/__init__.py
@@ -1,12 +1,13 @@
-from typing import Optional, Callable
+from typing import Callable, Optional
+
from thinc.api import Model
-from .stop_words import STOP_WORDS
-from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
-from .punctuation import TOKENIZER_PREFIXES, TOKENIZER_INFIXES
-from ...language import Language, BaseDefaults
+from ...language import BaseDefaults, Language
from .lemmatizer import ItalianLemmatizer
+from .punctuation import TOKENIZER_INFIXES, TOKENIZER_PREFIXES
+from .stop_words import STOP_WORDS
from .syntax_iterators import SYNTAX_ITERATORS
+from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
class ItalianDefaults(BaseDefaults):
diff --git a/spacy/lang/it/lemmatizer.py b/spacy/lang/it/lemmatizer.py
index e44e64e3a..bf869166d 100644
--- a/spacy/lang/it/lemmatizer.py
+++ b/spacy/lang/it/lemmatizer.py
@@ -1,4 +1,4 @@
-from typing import List, Dict, Tuple
+from typing import Dict, List, Tuple
from ...pipeline import Lemmatizer
from ...tokens import Token
diff --git a/spacy/lang/it/punctuation.py b/spacy/lang/it/punctuation.py
index f01ab4f0d..51318b22d 100644
--- a/spacy/lang/it/punctuation.py
+++ b/spacy/lang/it/punctuation.py
@@ -1,8 +1,13 @@
+from ..char_classes import (
+ ALPHA,
+ ALPHA_LOWER,
+ ALPHA_UPPER,
+ CONCAT_QUOTES,
+ HYPHENS,
+ LIST_ELLIPSES,
+ LIST_ICONS,
+)
from ..punctuation import TOKENIZER_PREFIXES as BASE_TOKENIZER_PREFIXES
-from ..char_classes import LIST_ELLIPSES, LIST_ICONS
-from ..char_classes import ALPHA, HYPHENS, CONCAT_QUOTES
-from ..char_classes import ALPHA_LOWER, ALPHA_UPPER
-
ELISION = "'’"
diff --git a/spacy/lang/it/syntax_iterators.py b/spacy/lang/it/syntax_iterators.py
index f63df3fad..924627648 100644
--- a/spacy/lang/it/syntax_iterators.py
+++ b/spacy/lang/it/syntax_iterators.py
@@ -1,7 +1,7 @@
-from typing import Union, Iterator, Tuple
+from typing import Iterator, Tuple, Union
-from ...symbols import NOUN, PROPN, PRON
from ...errors import Errors
+from ...symbols import NOUN, PRON, PROPN
from ...tokens import Doc, Span
diff --git a/spacy/lang/it/tokenizer_exceptions.py b/spacy/lang/it/tokenizer_exceptions.py
index 42883863b..2e7a5a1a3 100644
--- a/spacy/lang/it/tokenizer_exceptions.py
+++ b/spacy/lang/it/tokenizer_exceptions.py
@@ -1,7 +1,6 @@
-from ..tokenizer_exceptions import BASE_EXCEPTIONS
from ...symbols import ORTH
from ...util import update_exc
-
+from ..tokenizer_exceptions import BASE_EXCEPTIONS
_exc = {
"all'art.": [{ORTH: "all'"}, {ORTH: "art."}],
diff --git a/spacy/lang/ja/__init__.py b/spacy/lang/ja/__init__.py
index bf86305fb..0d5f97ac8 100644
--- a/spacy/lang/ja/__init__.py
+++ b/spacy/lang/ja/__init__.py
@@ -1,27 +1,27 @@
-from typing import Optional, Union, Dict, Any, Callable
-from pathlib import Path
-import srsly
-from collections import namedtuple
-from thinc.api import Model
import re
+from collections import namedtuple
+from pathlib import Path
+from typing import Any, Callable, Dict, Optional, Union
-from .stop_words import STOP_WORDS
-from .syntax_iterators import SYNTAX_ITERATORS
-from .tag_map import TAG_MAP
-from .tag_orth_map import TAG_ORTH_MAP
-from .tag_bigram_map import TAG_BIGRAM_MAP
+import srsly
+from thinc.api import Model
+
+from ... import util
from ...errors import Errors
-from ...language import Language, BaseDefaults
+from ...language import BaseDefaults, Language
from ...pipeline import Morphologizer
from ...pipeline.morphologizer import DEFAULT_MORPH_MODEL
from ...scorer import Scorer
from ...symbols import POS
from ...tokens import Doc, MorphAnalysis
from ...training import validate_examples
-from ...util import DummyTokenizer, registry, load_config_from_str
+from ...util import DummyTokenizer, load_config_from_str, registry
from ...vocab import Vocab
-from ... import util
-
+from .stop_words import STOP_WORDS
+from .syntax_iterators import SYNTAX_ITERATORS
+from .tag_bigram_map import TAG_BIGRAM_MAP
+from .tag_map import TAG_MAP
+from .tag_orth_map import TAG_ORTH_MAP
DEFAULT_CONFIG = """
[nlp]
diff --git a/spacy/lang/ja/syntax_iterators.py b/spacy/lang/ja/syntax_iterators.py
index 588a9ba03..34670083e 100644
--- a/spacy/lang/ja/syntax_iterators.py
+++ b/spacy/lang/ja/syntax_iterators.py
@@ -1,9 +1,8 @@
-from typing import Union, Iterator, Tuple, Set
+from typing import Iterator, Set, Tuple, Union
-from ...symbols import NOUN, PROPN, PRON, VERB
+from ...symbols import NOUN, PRON, PROPN, VERB
from ...tokens import Doc, Span
-
# TODO: this can probably be pruned a bit
# fmt: off
labels = ["nsubj", "nmod", "ddoclike", "nsubjpass", "pcomp", "pdoclike", "doclike", "obl", "dative", "appos", "attr", "ROOT"]
diff --git a/spacy/lang/ja/tag_map.py b/spacy/lang/ja/tag_map.py
index c6de3831a..5c14f41bf 100644
--- a/spacy/lang/ja/tag_map.py
+++ b/spacy/lang/ja/tag_map.py
@@ -1,6 +1,23 @@
-from ...symbols import POS, PUNCT, INTJ, ADJ, AUX, ADP, PART, SCONJ, NOUN
-from ...symbols import SYM, PRON, VERB, ADV, PROPN, NUM, DET, SPACE, CCONJ
-
+from ...symbols import (
+ ADJ,
+ ADP,
+ ADV,
+ AUX,
+ CCONJ,
+ DET,
+ INTJ,
+ NOUN,
+ NUM,
+ PART,
+ POS,
+ PRON,
+ PROPN,
+ PUNCT,
+ SCONJ,
+ SPACE,
+ SYM,
+ VERB,
+)
TAG_MAP = {
# Explanation of Unidic tags:
diff --git a/spacy/lang/kn/__init__.py b/spacy/lang/kn/__init__.py
index ccd46a394..44d53f6b7 100644
--- a/spacy/lang/kn/__init__.py
+++ b/spacy/lang/kn/__init__.py
@@ -1,5 +1,5 @@
+from ...language import BaseDefaults, Language
from .stop_words import STOP_WORDS
-from ...language import Language, BaseDefaults
class KannadaDefaults(BaseDefaults):
diff --git a/spacy/lang/ko/__init__.py b/spacy/lang/ko/__init__.py
index 0e02e4a2d..e2c860f7d 100644
--- a/spacy/lang/ko/__init__.py
+++ b/spacy/lang/ko/__init__.py
@@ -1,17 +1,16 @@
-from typing import Iterator, Any, Dict
+from typing import Any, Dict, Iterator
+from ...language import BaseDefaults, Language
+from ...scorer import Scorer
+from ...symbols import POS, X
+from ...tokens import Doc
+from ...training import validate_examples
+from ...util import DummyTokenizer, load_config_from_str, registry
+from ...vocab import Vocab
+from .lex_attrs import LEX_ATTRS
from .punctuation import TOKENIZER_INFIXES
from .stop_words import STOP_WORDS
from .tag_map import TAG_MAP
-from .lex_attrs import LEX_ATTRS
-from ...language import Language, BaseDefaults
-from ...tokens import Doc
-from ...scorer import Scorer
-from ...symbols import POS, X
-from ...training import validate_examples
-from ...util import DummyTokenizer, registry, load_config_from_str
-from ...vocab import Vocab
-
DEFAULT_CONFIG = """
[nlp]
diff --git a/spacy/lang/ko/lex_attrs.py b/spacy/lang/ko/lex_attrs.py
index ac5bc7e48..2c49aa389 100644
--- a/spacy/lang/ko/lex_attrs.py
+++ b/spacy/lang/ko/lex_attrs.py
@@ -1,6 +1,5 @@
from ...attrs import LIKE_NUM
-
_num_words = [
"영",
"공",
diff --git a/spacy/lang/ko/punctuation.py b/spacy/lang/ko/punctuation.py
index f5f1c51da..c3c32ea1f 100644
--- a/spacy/lang/ko/punctuation.py
+++ b/spacy/lang/ko/punctuation.py
@@ -1,7 +1,6 @@
from ..char_classes import LIST_QUOTES
from ..punctuation import TOKENIZER_INFIXES as BASE_TOKENIZER_INFIXES
-
_infixes = (
["·", "ㆍ", r"\(", r"\)"]
+ [r"(?<=[0-9])~(?=[0-9-])"]
diff --git a/spacy/lang/ko/tag_map.py b/spacy/lang/ko/tag_map.py
index 26a8c56b9..85598c3ef 100644
--- a/spacy/lang/ko/tag_map.py
+++ b/spacy/lang/ko/tag_map.py
@@ -1,5 +1,21 @@
-from ...symbols import POS, PUNCT, INTJ, X, SYM, ADJ, AUX, ADP, CONJ, NOUN, PRON
-from ...symbols import VERB, ADV, PROPN, NUM, DET
+from ...symbols import (
+ ADJ,
+ ADP,
+ ADV,
+ AUX,
+ CONJ,
+ DET,
+ INTJ,
+ NOUN,
+ NUM,
+ POS,
+ PRON,
+ PROPN,
+ PUNCT,
+ SYM,
+ VERB,
+ X,
+)
# 은전한닢(mecab-ko-dic)의 품사 태그를 universal pos tag로 대응시킴
# https://docs.google.com/spreadsheets/d/1-9blXKjtjeKZqsf4NzHeYJCrr49-nXeRF6D80udfcwY/edit#gid=589544265
diff --git a/spacy/lang/ky/__init__.py b/spacy/lang/ky/__init__.py
index ccca384bd..fafc0f020 100644
--- a/spacy/lang/ky/__init__.py
+++ b/spacy/lang/ky/__init__.py
@@ -1,8 +1,8 @@
+from ...language import BaseDefaults, Language
from .lex_attrs import LEX_ATTRS
from .punctuation import TOKENIZER_INFIXES
from .stop_words import STOP_WORDS
from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
-from ...language import Language, BaseDefaults
class KyrgyzDefaults(BaseDefaults):
diff --git a/spacy/lang/ky/punctuation.py b/spacy/lang/ky/punctuation.py
index fa9819f80..6d89da2f7 100644
--- a/spacy/lang/ky/punctuation.py
+++ b/spacy/lang/ky/punctuation.py
@@ -1,5 +1,12 @@
-from ..char_classes import ALPHA, ALPHA_LOWER, ALPHA_UPPER, CONCAT_QUOTES, HYPHENS
-from ..char_classes import LIST_ELLIPSES, LIST_ICONS
+from ..char_classes import (
+ ALPHA,
+ ALPHA_LOWER,
+ ALPHA_UPPER,
+ CONCAT_QUOTES,
+ HYPHENS,
+ LIST_ELLIPSES,
+ LIST_ICONS,
+)
_hyphens_no_dash = HYPHENS.replace("-", "").strip("|").replace("||", "")
_infixes = (
diff --git a/spacy/lang/ky/tokenizer_exceptions.py b/spacy/lang/ky/tokenizer_exceptions.py
index 8ec727ac1..c93e3dac3 100644
--- a/spacy/lang/ky/tokenizer_exceptions.py
+++ b/spacy/lang/ky/tokenizer_exceptions.py
@@ -1,6 +1,6 @@
-from ..tokenizer_exceptions import BASE_EXCEPTIONS
-from ...symbols import ORTH, NORM
+from ...symbols import NORM, ORTH
from ...util import update_exc
+from ..tokenizer_exceptions import BASE_EXCEPTIONS
_exc = {}
diff --git a/spacy/lang/la/__init__.py b/spacy/lang/la/__init__.py
index 37164c3f3..d77ae267e 100644
--- a/spacy/lang/la/__init__.py
+++ b/spacy/lang/la/__init__.py
@@ -1,8 +1,8 @@
-from ...language import Language, BaseDefaults
-from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
-from .stop_words import STOP_WORDS
+from ...language import BaseDefaults, Language
from .lex_attrs import LEX_ATTRS
+from .stop_words import STOP_WORDS
from .syntax_iterators import SYNTAX_ITERATORS
+from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
class LatinDefaults(BaseDefaults):
diff --git a/spacy/lang/la/lex_attrs.py b/spacy/lang/la/lex_attrs.py
index 9db1218a4..fcb35defc 100644
--- a/spacy/lang/la/lex_attrs.py
+++ b/spacy/lang/la/lex_attrs.py
@@ -1,6 +1,7 @@
-from ...attrs import LIKE_NUM
import re
+from ...attrs import LIKE_NUM
+
# cf. Goyvaerts/Levithan 2009; case-insensitive, allow 4
roman_numerals_compile = re.compile(
r"(?i)^(?=[MDCLXVI])M*(C[MD]|D?C{0,4})(X[CL]|L?X{0,4})(I[XV]|V?I{0,4})$"
diff --git a/spacy/lang/la/syntax_iterators.py b/spacy/lang/la/syntax_iterators.py
index 7093bacf9..39b4fb39d 100644
--- a/spacy/lang/la/syntax_iterators.py
+++ b/spacy/lang/la/syntax_iterators.py
@@ -1,7 +1,8 @@
-from typing import Union, Iterator, Tuple
-from ...tokens import Doc, Span
-from ...symbols import NOUN, PROPN, PRON, VERB, AUX
+from typing import Iterator, Tuple, Union
+
from ...errors import Errors
+from ...symbols import AUX, NOUN, PRON, PROPN, VERB
+from ...tokens import Doc, Span
# NB: Modified from da on suggestion from https://github.com/explosion/spaCy/issues/7457#issuecomment-800349751 [PJB]
diff --git a/spacy/lang/la/tokenizer_exceptions.py b/spacy/lang/la/tokenizer_exceptions.py
index 6d14b92c5..c0b98116f 100644
--- a/spacy/lang/la/tokenizer_exceptions.py
+++ b/spacy/lang/la/tokenizer_exceptions.py
@@ -1,7 +1,6 @@
-from ..tokenizer_exceptions import BASE_EXCEPTIONS
from ...symbols import ORTH
from ...util import update_exc
-
+from ..tokenizer_exceptions import BASE_EXCEPTIONS
## TODO: Look into systematically handling u/v
_exc = {
diff --git a/spacy/lang/lb/__init__.py b/spacy/lang/lb/__init__.py
index 7827e7762..2386b4356 100644
--- a/spacy/lang/lb/__init__.py
+++ b/spacy/lang/lb/__init__.py
@@ -1,8 +1,8 @@
-from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
-from .punctuation import TOKENIZER_INFIXES
+from ...language import BaseDefaults, Language
from .lex_attrs import LEX_ATTRS
+from .punctuation import TOKENIZER_INFIXES
from .stop_words import STOP_WORDS
-from ...language import Language, BaseDefaults
+from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
class LuxembourgishDefaults(BaseDefaults):
diff --git a/spacy/lang/lb/lex_attrs.py b/spacy/lang/lb/lex_attrs.py
index d2d50d9dc..119231374 100644
--- a/spacy/lang/lb/lex_attrs.py
+++ b/spacy/lang/lb/lex_attrs.py
@@ -1,6 +1,5 @@
from ...attrs import LIKE_NUM
-
_num_words = set(
"""
null eent zwee dräi véier fënnef sechs ziwen aacht néng zéng eelef zwielef dräizéng
diff --git a/spacy/lang/lb/punctuation.py b/spacy/lang/lb/punctuation.py
index e382c56c5..8bdbf9713 100644
--- a/spacy/lang/lb/punctuation.py
+++ b/spacy/lang/lb/punctuation.py
@@ -1,4 +1,4 @@
-from ..char_classes import LIST_ELLIPSES, LIST_ICONS, ALPHA, ALPHA_LOWER, ALPHA_UPPER
+from ..char_classes import ALPHA, ALPHA_LOWER, ALPHA_UPPER, LIST_ELLIPSES, LIST_ICONS
ELISION = " ' ’ ".strip().replace(" ", "")
diff --git a/spacy/lang/lb/tokenizer_exceptions.py b/spacy/lang/lb/tokenizer_exceptions.py
index d00dc9610..844826e27 100644
--- a/spacy/lang/lb/tokenizer_exceptions.py
+++ b/spacy/lang/lb/tokenizer_exceptions.py
@@ -1,7 +1,6 @@
-from ..tokenizer_exceptions import BASE_EXCEPTIONS
-from ...symbols import ORTH, NORM
+from ...symbols import NORM, ORTH
from ...util import update_exc
-
+from ..tokenizer_exceptions import BASE_EXCEPTIONS
# TODO
# treat other apostrophes within words as part of the word: [op d'mannst], [fir d'éischt] (= exceptions)
diff --git a/spacy/lang/lex_attrs.py b/spacy/lang/lex_attrs.py
index 6ed981a06..3ac20420d 100644
--- a/spacy/lang/lex_attrs.py
+++ b/spacy/lang/lex_attrs.py
@@ -1,11 +1,10 @@
-from typing import Set
-import unicodedata
import re
+import unicodedata
+from typing import Set
from .. import attrs
from .tokenizer_exceptions import URL_MATCH
-
_like_email = re.compile(r"([a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+)").match
_tlds = set(
"com|org|edu|gov|net|mil|aero|asia|biz|cat|coop|info|int|jobs|mobi|museum|"
diff --git a/spacy/lang/lg/__init__.py b/spacy/lang/lg/__init__.py
index 6f7153fce..a87685375 100644
--- a/spacy/lang/lg/__init__.py
+++ b/spacy/lang/lg/__init__.py
@@ -1,7 +1,7 @@
-from .stop_words import STOP_WORDS
+from ...language import BaseDefaults, Language
from .lex_attrs import LEX_ATTRS
from .punctuation import TOKENIZER_INFIXES
-from ...language import Language, BaseDefaults
+from .stop_words import STOP_WORDS
class LugandaDefaults(BaseDefaults):
diff --git a/spacy/lang/lg/punctuation.py b/spacy/lang/lg/punctuation.py
index 5d3eb792e..775c6b001 100644
--- a/spacy/lang/lg/punctuation.py
+++ b/spacy/lang/lg/punctuation.py
@@ -1,5 +1,12 @@
-from ..char_classes import LIST_ELLIPSES, LIST_ICONS, HYPHENS
-from ..char_classes import CONCAT_QUOTES, ALPHA_LOWER, ALPHA_UPPER, ALPHA
+from ..char_classes import (
+ ALPHA,
+ ALPHA_LOWER,
+ ALPHA_UPPER,
+ CONCAT_QUOTES,
+ HYPHENS,
+ LIST_ELLIPSES,
+ LIST_ICONS,
+)
_infixes = (
LIST_ELLIPSES
diff --git a/spacy/lang/lij/__init__.py b/spacy/lang/lij/__init__.py
index b7e11f77e..3b8e972c6 100644
--- a/spacy/lang/lij/__init__.py
+++ b/spacy/lang/lij/__init__.py
@@ -1,7 +1,7 @@
+from ...language import BaseDefaults, Language
+from .punctuation import TOKENIZER_INFIXES
from .stop_words import STOP_WORDS
from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
-from .punctuation import TOKENIZER_INFIXES
-from ...language import Language, BaseDefaults
class LigurianDefaults(BaseDefaults):
diff --git a/spacy/lang/lij/punctuation.py b/spacy/lang/lij/punctuation.py
index d50b75589..c5c150d0a 100644
--- a/spacy/lang/lij/punctuation.py
+++ b/spacy/lang/lij/punctuation.py
@@ -1,6 +1,5 @@
-from ..punctuation import TOKENIZER_INFIXES
from ..char_classes import ALPHA
-
+from ..punctuation import TOKENIZER_INFIXES
ELISION = " ' ’ ".strip().replace(" ", "").replace("\n", "")
diff --git a/spacy/lang/lij/tokenizer_exceptions.py b/spacy/lang/lij/tokenizer_exceptions.py
index 52eae2c89..cf5a1af66 100644
--- a/spacy/lang/lij/tokenizer_exceptions.py
+++ b/spacy/lang/lij/tokenizer_exceptions.py
@@ -1,7 +1,6 @@
-from ..tokenizer_exceptions import BASE_EXCEPTIONS
from ...symbols import ORTH
from ...util import update_exc
-
+from ..tokenizer_exceptions import BASE_EXCEPTIONS
_exc = {}
diff --git a/spacy/lang/lt/__init__.py b/spacy/lang/lt/__init__.py
index 3ae000e5f..f3ea257b1 100644
--- a/spacy/lang/lt/__init__.py
+++ b/spacy/lang/lt/__init__.py
@@ -1,8 +1,8 @@
-from .punctuation import TOKENIZER_INFIXES, TOKENIZER_SUFFIXES
-from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
-from .stop_words import STOP_WORDS
+from ...language import BaseDefaults, Language
from .lex_attrs import LEX_ATTRS
-from ...language import Language, BaseDefaults
+from .punctuation import TOKENIZER_INFIXES, TOKENIZER_SUFFIXES
+from .stop_words import STOP_WORDS
+from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
class LithuanianDefaults(BaseDefaults):
diff --git a/spacy/lang/lt/punctuation.py b/spacy/lang/lt/punctuation.py
index 22aee0941..deef24854 100644
--- a/spacy/lang/lt/punctuation.py
+++ b/spacy/lang/lt/punctuation.py
@@ -1,9 +1,14 @@
-from ..char_classes import LIST_ICONS, LIST_ELLIPSES
-from ..char_classes import CONCAT_QUOTES, ALPHA_LOWER, ALPHA_UPPER, ALPHA
-from ..char_classes import HYPHENS
+from ..char_classes import (
+ ALPHA,
+ ALPHA_LOWER,
+ ALPHA_UPPER,
+ CONCAT_QUOTES,
+ HYPHENS,
+ LIST_ELLIPSES,
+ LIST_ICONS,
+)
from ..punctuation import TOKENIZER_SUFFIXES
-
_infixes = (
LIST_ELLIPSES
+ LIST_ICONS
diff --git a/spacy/lang/lt/tokenizer_exceptions.py b/spacy/lang/lt/tokenizer_exceptions.py
index 118fb2190..d39b86dfc 100644
--- a/spacy/lang/lt/tokenizer_exceptions.py
+++ b/spacy/lang/lt/tokenizer_exceptions.py
@@ -1,7 +1,6 @@
-from ..tokenizer_exceptions import BASE_EXCEPTIONS
from ...symbols import ORTH
from ...util import update_exc
-
+from ..tokenizer_exceptions import BASE_EXCEPTIONS
_exc = {}
diff --git a/spacy/lang/lv/__init__.py b/spacy/lang/lv/__init__.py
index a05e5b939..fdfca5e97 100644
--- a/spacy/lang/lv/__init__.py
+++ b/spacy/lang/lv/__init__.py
@@ -1,5 +1,5 @@
+from ...language import BaseDefaults, Language
from .stop_words import STOP_WORDS
-from ...language import Language, BaseDefaults
class LatvianDefaults(BaseDefaults):
diff --git a/spacy/lang/mk/__init__.py b/spacy/lang/mk/__init__.py
index fa07cfef9..413f0038d 100644
--- a/spacy/lang/mk/__init__.py
+++ b/spacy/lang/mk/__init__.py
@@ -1,15 +1,16 @@
-from typing import Optional, Callable
+from typing import Callable, Optional
+
from thinc.api import Model
+
+from ...attrs import LANG
+from ...language import BaseDefaults, Language
+from ...lookups import Lookups
+from ...util import update_exc
+from ..tokenizer_exceptions import BASE_EXCEPTIONS
from .lemmatizer import MacedonianLemmatizer
+from .lex_attrs import LEX_ATTRS
from .stop_words import STOP_WORDS
from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
-from .lex_attrs import LEX_ATTRS
-from ..tokenizer_exceptions import BASE_EXCEPTIONS
-
-from ...language import Language, BaseDefaults
-from ...attrs import LANG
-from ...util import update_exc
-from ...lookups import Lookups
class MacedonianDefaults(BaseDefaults):
diff --git a/spacy/lang/mk/lemmatizer.py b/spacy/lang/mk/lemmatizer.py
index a792095e7..f5a5eca85 100644
--- a/spacy/lang/mk/lemmatizer.py
+++ b/spacy/lang/mk/lemmatizer.py
@@ -1,5 +1,5 @@
-from typing import List
from collections import OrderedDict
+from typing import List
from ...pipeline import Lemmatizer
from ...tokens import Token
diff --git a/spacy/lang/mk/tokenizer_exceptions.py b/spacy/lang/mk/tokenizer_exceptions.py
index 3b589b2a9..40f2c1d80 100644
--- a/spacy/lang/mk/tokenizer_exceptions.py
+++ b/spacy/lang/mk/tokenizer_exceptions.py
@@ -1,5 +1,4 @@
-from ...symbols import ORTH, NORM
-
+from ...symbols import NORM, ORTH
_exc = {}
diff --git a/spacy/lang/ml/__init__.py b/spacy/lang/ml/__init__.py
index 9f90605f0..0b17b8a7a 100644
--- a/spacy/lang/ml/__init__.py
+++ b/spacy/lang/ml/__init__.py
@@ -1,6 +1,6 @@
-from .stop_words import STOP_WORDS
+from ...language import BaseDefaults, Language
from .lex_attrs import LEX_ATTRS
-from ...language import Language, BaseDefaults
+from .stop_words import STOP_WORDS
class MalayalamDefaults(BaseDefaults):
diff --git a/spacy/lang/ml/lex_attrs.py b/spacy/lang/ml/lex_attrs.py
index 9ac19b6a7..33a144f6b 100644
--- a/spacy/lang/ml/lex_attrs.py
+++ b/spacy/lang/ml/lex_attrs.py
@@ -1,6 +1,5 @@
from ...attrs import LIKE_NUM
-
# reference 2: https://www.omniglot.com/language/numbers/malayalam.htm
_num_words = [
diff --git a/spacy/lang/mr/__init__.py b/spacy/lang/mr/__init__.py
index 3e172fa60..f980efbd0 100644
--- a/spacy/lang/mr/__init__.py
+++ b/spacy/lang/mr/__init__.py
@@ -1,5 +1,5 @@
+from ...language import BaseDefaults, Language
from .stop_words import STOP_WORDS
-from ...language import Language, BaseDefaults
class MarathiDefaults(BaseDefaults):
diff --git a/spacy/lang/ms/__init__.py b/spacy/lang/ms/__init__.py
index 31a58a7e6..f53ebfcf2 100644
--- a/spacy/lang/ms/__init__.py
+++ b/spacy/lang/ms/__init__.py
@@ -1,9 +1,9 @@
-from .stop_words import STOP_WORDS
-from .punctuation import TOKENIZER_SUFFIXES, TOKENIZER_PREFIXES, TOKENIZER_INFIXES
-from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
+from ...language import BaseDefaults, Language
from .lex_attrs import LEX_ATTRS
+from .punctuation import TOKENIZER_INFIXES, TOKENIZER_PREFIXES, TOKENIZER_SUFFIXES
+from .stop_words import STOP_WORDS
from .syntax_iterators import SYNTAX_ITERATORS
-from ...language import Language, BaseDefaults
+from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
class MalayDefaults(BaseDefaults):
diff --git a/spacy/lang/ms/lex_attrs.py b/spacy/lang/ms/lex_attrs.py
index 42759fa4f..2088c9955 100644
--- a/spacy/lang/ms/lex_attrs.py
+++ b/spacy/lang/ms/lex_attrs.py
@@ -1,8 +1,7 @@
import unicodedata
-from .punctuation import LIST_CURRENCY
from ...attrs import IS_CURRENCY, LIKE_NUM
-
+from .punctuation import LIST_CURRENCY
_num_words = [
"kosong",
diff --git a/spacy/lang/ms/punctuation.py b/spacy/lang/ms/punctuation.py
index 9fff72576..a8d6c2e8e 100644
--- a/spacy/lang/ms/punctuation.py
+++ b/spacy/lang/ms/punctuation.py
@@ -1,6 +1,5 @@
-from ..punctuation import TOKENIZER_PREFIXES, TOKENIZER_SUFFIXES, TOKENIZER_INFIXES
-from ..char_classes import ALPHA, merge_chars, split_chars, _currency, _units
-
+from ..char_classes import ALPHA, _currency, _units, merge_chars, split_chars
+from ..punctuation import TOKENIZER_INFIXES, TOKENIZER_PREFIXES, TOKENIZER_SUFFIXES
_units = (
_units + "s bit Gbps Mbps mbps Kbps kbps ƒ ppi px "
diff --git a/spacy/lang/ms/syntax_iterators.py b/spacy/lang/ms/syntax_iterators.py
index fa984d411..027798687 100644
--- a/spacy/lang/ms/syntax_iterators.py
+++ b/spacy/lang/ms/syntax_iterators.py
@@ -1,7 +1,7 @@
-from typing import Union, Iterator, Tuple
+from typing import Iterator, Tuple, Union
-from ...symbols import NOUN, PROPN, PRON
from ...errors import Errors
+from ...symbols import NOUN, PRON, PROPN
from ...tokens import Doc, Span
diff --git a/spacy/lang/ms/tokenizer_exceptions.py b/spacy/lang/ms/tokenizer_exceptions.py
index 6b6cf3b15..e8b53fed8 100644
--- a/spacy/lang/ms/tokenizer_exceptions.py
+++ b/spacy/lang/ms/tokenizer_exceptions.py
@@ -1,8 +1,7 @@
+from ...symbols import NORM, ORTH
+from ...util import update_exc
from ..tokenizer_exceptions import BASE_EXCEPTIONS
from ._tokenizer_exceptions_list import MS_BASE_EXCEPTIONS
-from ...symbols import ORTH, NORM
-from ...util import update_exc
-
# Daftar singkatan dan Akronim dari:
# https://ms.wiktionary.org/wiki/Wiktionary:Senarai_akronim_dan_singkatan
diff --git a/spacy/lang/nb/__init__.py b/spacy/lang/nb/__init__.py
index e079236fd..ef4665ccc 100644
--- a/spacy/lang/nb/__init__.py
+++ b/spacy/lang/nb/__init__.py
@@ -1,12 +1,13 @@
-from typing import Optional, Callable
+from typing import Callable, Optional
+
from thinc.api import Model
-from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
-from .punctuation import TOKENIZER_PREFIXES, TOKENIZER_INFIXES
-from .punctuation import TOKENIZER_SUFFIXES
+
+from ...language import BaseDefaults, Language
+from ...pipeline import Lemmatizer
+from .punctuation import TOKENIZER_INFIXES, TOKENIZER_PREFIXES, TOKENIZER_SUFFIXES
from .stop_words import STOP_WORDS
from .syntax_iterators import SYNTAX_ITERATORS
-from ...language import Language, BaseDefaults
-from ...pipeline import Lemmatizer
+from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
class NorwegianDefaults(BaseDefaults):
diff --git a/spacy/lang/nb/punctuation.py b/spacy/lang/nb/punctuation.py
index 8f2933670..a1fdb872a 100644
--- a/spacy/lang/nb/punctuation.py
+++ b/spacy/lang/nb/punctuation.py
@@ -1,7 +1,17 @@
-from ..char_classes import LIST_ELLIPSES, LIST_ICONS, LIST_PUNCT, LIST_QUOTES
-from ..char_classes import CONCAT_QUOTES, ALPHA, ALPHA_LOWER, ALPHA_UPPER
-from ..char_classes import CURRENCY, PUNCT, UNITS, LIST_CURRENCY
-
+from ..char_classes import (
+ ALPHA,
+ ALPHA_LOWER,
+ ALPHA_UPPER,
+ CONCAT_QUOTES,
+ CURRENCY,
+ LIST_CURRENCY,
+ LIST_ELLIPSES,
+ LIST_ICONS,
+ LIST_PUNCT,
+ LIST_QUOTES,
+ PUNCT,
+ UNITS,
+)
# Punctuation adapted from Danish
_quotes = CONCAT_QUOTES.replace("'", "")
diff --git a/spacy/lang/nb/syntax_iterators.py b/spacy/lang/nb/syntax_iterators.py
index d86662693..89a8f5edf 100644
--- a/spacy/lang/nb/syntax_iterators.py
+++ b/spacy/lang/nb/syntax_iterators.py
@@ -1,7 +1,7 @@
-from typing import Union, Iterator, Tuple
+from typing import Iterator, Tuple, Union
-from ...symbols import NOUN, PROPN, PRON
from ...errors import Errors
+from ...symbols import NOUN, PRON, PROPN
from ...tokens import Doc, Span
diff --git a/spacy/lang/nb/tokenizer_exceptions.py b/spacy/lang/nb/tokenizer_exceptions.py
index 0be436ae4..9b99a1d65 100644
--- a/spacy/lang/nb/tokenizer_exceptions.py
+++ b/spacy/lang/nb/tokenizer_exceptions.py
@@ -1,7 +1,6 @@
-from ..tokenizer_exceptions import BASE_EXCEPTIONS
-from ...symbols import ORTH, NORM
+from ...symbols import NORM, ORTH
from ...util import update_exc
-
+from ..tokenizer_exceptions import BASE_EXCEPTIONS
_exc = {}
diff --git a/spacy/lang/ne/__init__.py b/spacy/lang/ne/__init__.py
index 0028d1b0b..5c9e6870e 100644
--- a/spacy/lang/ne/__init__.py
+++ b/spacy/lang/ne/__init__.py
@@ -1,6 +1,6 @@
-from .stop_words import STOP_WORDS
+from ...language import BaseDefaults, Language
from .lex_attrs import LEX_ATTRS
-from ...language import Language, BaseDefaults
+from .stop_words import STOP_WORDS
class NepaliDefaults(BaseDefaults):
diff --git a/spacy/lang/ne/lex_attrs.py b/spacy/lang/ne/lex_attrs.py
index 7cb01c515..91d5b0eb5 100644
--- a/spacy/lang/ne/lex_attrs.py
+++ b/spacy/lang/ne/lex_attrs.py
@@ -1,6 +1,5 @@
+from ...attrs import LIKE_NUM, NORM
from ..norm_exceptions import BASE_NORMS
-from ...attrs import NORM, LIKE_NUM
-
# fmt: off
_stem_suffixes = [
diff --git a/spacy/lang/nl/__init__.py b/spacy/lang/nl/__init__.py
index ad2205a0b..213041a85 100644
--- a/spacy/lang/nl/__init__.py
+++ b/spacy/lang/nl/__init__.py
@@ -1,15 +1,14 @@
-from typing import Optional, Callable
+from typing import Callable, Optional
from thinc.api import Model
+from ...language import BaseDefaults, Language
from .lemmatizer import DutchLemmatizer
from .lex_attrs import LEX_ATTRS
-from .punctuation import TOKENIZER_PREFIXES, TOKENIZER_INFIXES
-from .punctuation import TOKENIZER_SUFFIXES
+from .punctuation import TOKENIZER_INFIXES, TOKENIZER_PREFIXES, TOKENIZER_SUFFIXES
from .stop_words import STOP_WORDS
from .syntax_iterators import SYNTAX_ITERATORS
from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
-from ...language import Language, BaseDefaults
class DutchDefaults(BaseDefaults):
diff --git a/spacy/lang/nl/lex_attrs.py b/spacy/lang/nl/lex_attrs.py
index f1acaefeb..488224c2f 100644
--- a/spacy/lang/nl/lex_attrs.py
+++ b/spacy/lang/nl/lex_attrs.py
@@ -1,6 +1,5 @@
from ...attrs import LIKE_NUM
-
_num_words = set(
"""
nul een één twee drie vier vijf zes zeven acht negen tien elf twaalf dertien
diff --git a/spacy/lang/nl/punctuation.py b/spacy/lang/nl/punctuation.py
index d9dd2a6e3..c9a4c9eeb 100644
--- a/spacy/lang/nl/punctuation.py
+++ b/spacy/lang/nl/punctuation.py
@@ -1,10 +1,19 @@
-from ..char_classes import LIST_ELLIPSES, LIST_ICONS, LIST_UNITS, merge_chars
-from ..char_classes import LIST_PUNCT, LIST_QUOTES, CURRENCY, PUNCT
-from ..char_classes import CONCAT_QUOTES, ALPHA, ALPHA_LOWER, ALPHA_UPPER
-
+from ..char_classes import (
+ ALPHA,
+ ALPHA_LOWER,
+ ALPHA_UPPER,
+ CONCAT_QUOTES,
+ CURRENCY,
+ LIST_ELLIPSES,
+ LIST_ICONS,
+ LIST_PUNCT,
+ LIST_QUOTES,
+ LIST_UNITS,
+ PUNCT,
+ merge_chars,
+)
from ..punctuation import TOKENIZER_PREFIXES as BASE_TOKENIZER_PREFIXES
-
_prefixes = [",,"] + BASE_TOKENIZER_PREFIXES
diff --git a/spacy/lang/nl/syntax_iterators.py b/spacy/lang/nl/syntax_iterators.py
index be9beabe6..d7388a333 100644
--- a/spacy/lang/nl/syntax_iterators.py
+++ b/spacy/lang/nl/syntax_iterators.py
@@ -1,7 +1,7 @@
-from typing import Union, Iterator, Tuple
+from typing import Iterator, Tuple, Union
-from ...symbols import NOUN, PRON
from ...errors import Errors
+from ...symbols import NOUN, PRON
from ...tokens import Doc, Span
diff --git a/spacy/lang/nl/tokenizer_exceptions.py b/spacy/lang/nl/tokenizer_exceptions.py
index 489d10d71..85ad49f14 100644
--- a/spacy/lang/nl/tokenizer_exceptions.py
+++ b/spacy/lang/nl/tokenizer_exceptions.py
@@ -1,7 +1,6 @@
-from ..tokenizer_exceptions import BASE_EXCEPTIONS
from ...symbols import ORTH
from ...util import update_exc
-
+from ..tokenizer_exceptions import BASE_EXCEPTIONS
# Extensive list of both common and uncommon dutch abbreviations copied from
# github.com/diasks2/pragmatic_segmenter, a Ruby library for rule-based
diff --git a/spacy/lang/pl/__init__.py b/spacy/lang/pl/__init__.py
index 02c96799b..50a3a8e4c 100644
--- a/spacy/lang/pl/__init__.py
+++ b/spacy/lang/pl/__init__.py
@@ -1,15 +1,13 @@
-from typing import Optional, Callable
+from typing import Callable, Optional
from thinc.api import Model
-from .punctuation import TOKENIZER_PREFIXES, TOKENIZER_INFIXES
-from .punctuation import TOKENIZER_SUFFIXES
-from .stop_words import STOP_WORDS
-from .lex_attrs import LEX_ATTRS
-from .lemmatizer import PolishLemmatizer
+from ...language import BaseDefaults, Language
from ..tokenizer_exceptions import BASE_EXCEPTIONS
-from ...language import Language, BaseDefaults
-
+from .lemmatizer import PolishLemmatizer
+from .lex_attrs import LEX_ATTRS
+from .punctuation import TOKENIZER_INFIXES, TOKENIZER_PREFIXES, TOKENIZER_SUFFIXES
+from .stop_words import STOP_WORDS
TOKENIZER_EXCEPTIONS = {
exc: val for exc, val in BASE_EXCEPTIONS.items() if not exc.endswith(".")
diff --git a/spacy/lang/pl/lemmatizer.py b/spacy/lang/pl/lemmatizer.py
index 059d0609a..d1d2a9c54 100644
--- a/spacy/lang/pl/lemmatizer.py
+++ b/spacy/lang/pl/lemmatizer.py
@@ -1,4 +1,4 @@
-from typing import List, Dict, Tuple
+from typing import Dict, List, Tuple
from ...pipeline import Lemmatizer
from ...tokens import Token
diff --git a/spacy/lang/pl/lex_attrs.py b/spacy/lang/pl/lex_attrs.py
index ce56e28a8..398f52a3c 100644
--- a/spacy/lang/pl/lex_attrs.py
+++ b/spacy/lang/pl/lex_attrs.py
@@ -1,6 +1,5 @@
from ...attrs import LIKE_NUM
-
_num_words = [
"zero",
"jeden",
diff --git a/spacy/lang/pl/punctuation.py b/spacy/lang/pl/punctuation.py
index 31e56b9ae..84ff239ed 100644
--- a/spacy/lang/pl/punctuation.py
+++ b/spacy/lang/pl/punctuation.py
@@ -1,6 +1,17 @@
-from ..char_classes import LIST_ELLIPSES, LIST_PUNCT, LIST_HYPHENS
-from ..char_classes import LIST_ICONS, LIST_QUOTES, CURRENCY, UNITS, PUNCT
-from ..char_classes import CONCAT_QUOTES, ALPHA, ALPHA_LOWER, ALPHA_UPPER
+from ..char_classes import (
+ ALPHA,
+ ALPHA_LOWER,
+ ALPHA_UPPER,
+ CONCAT_QUOTES,
+ CURRENCY,
+ LIST_ELLIPSES,
+ LIST_HYPHENS,
+ LIST_ICONS,
+ LIST_PUNCT,
+ LIST_QUOTES,
+ PUNCT,
+ UNITS,
+)
from ..punctuation import TOKENIZER_PREFIXES as BASE_TOKENIZER_PREFIXES
_quotes = CONCAT_QUOTES.replace("'", "")
diff --git a/spacy/lang/pt/__init__.py b/spacy/lang/pt/__init__.py
index 454002491..be4041f8e 100644
--- a/spacy/lang/pt/__init__.py
+++ b/spacy/lang/pt/__init__.py
@@ -1,9 +1,9 @@
-from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
-from .stop_words import STOP_WORDS
+from ...language import BaseDefaults, Language
from .lex_attrs import LEX_ATTRS
-from .syntax_iterators import SYNTAX_ITERATORS
from .punctuation import TOKENIZER_INFIXES, TOKENIZER_PREFIXES
-from ...language import Language, BaseDefaults
+from .stop_words import STOP_WORDS
+from .syntax_iterators import SYNTAX_ITERATORS
+from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
class PortugueseDefaults(BaseDefaults):
diff --git a/spacy/lang/pt/lex_attrs.py b/spacy/lang/pt/lex_attrs.py
index 3c6979ab4..de6a67f14 100644
--- a/spacy/lang/pt/lex_attrs.py
+++ b/spacy/lang/pt/lex_attrs.py
@@ -1,6 +1,5 @@
from ...attrs import LIKE_NUM
-
_num_words = [
"zero",
"um",
diff --git a/spacy/lang/pt/punctuation.py b/spacy/lang/pt/punctuation.py
index 08e31f9d0..b2d63cb3d 100644
--- a/spacy/lang/pt/punctuation.py
+++ b/spacy/lang/pt/punctuation.py
@@ -1,6 +1,6 @@
+from ..punctuation import TOKENIZER_INFIXES as BASE_TOKENIZER_INFIXES
from ..punctuation import TOKENIZER_PREFIXES as BASE_TOKENIZER_PREFIXES
from ..punctuation import TOKENIZER_SUFFIXES as BASE_TOKENIZER_SUFFIXES
-from ..punctuation import TOKENIZER_INFIXES as BASE_TOKENIZER_INFIXES
_prefixes = [r"\w{1,3}\$"] + BASE_TOKENIZER_PREFIXES
diff --git a/spacy/lang/pt/syntax_iterators.py b/spacy/lang/pt/syntax_iterators.py
index 62661f5e4..11017aace 100644
--- a/spacy/lang/pt/syntax_iterators.py
+++ b/spacy/lang/pt/syntax_iterators.py
@@ -1,7 +1,7 @@
-from typing import Union, Iterator, Tuple
+from typing import Iterator, Tuple, Union
-from ...symbols import NOUN, PROPN, PRON
from ...errors import Errors
+from ...symbols import NOUN, PRON, PROPN
from ...tokens import Doc, Span
diff --git a/spacy/lang/pt/tokenizer_exceptions.py b/spacy/lang/pt/tokenizer_exceptions.py
index 187fc65ea..e369eda80 100644
--- a/spacy/lang/pt/tokenizer_exceptions.py
+++ b/spacy/lang/pt/tokenizer_exceptions.py
@@ -1,7 +1,6 @@
-from ..tokenizer_exceptions import BASE_EXCEPTIONS
from ...symbols import ORTH
from ...util import update_exc
-
+from ..tokenizer_exceptions import BASE_EXCEPTIONS
_exc = {}
diff --git a/spacy/lang/punctuation.py b/spacy/lang/punctuation.py
index a1cfe6224..e4a6392c8 100644
--- a/spacy/lang/punctuation.py
+++ b/spacy/lang/punctuation.py
@@ -1,7 +1,19 @@
-from .char_classes import LIST_PUNCT, LIST_ELLIPSES, LIST_QUOTES, LIST_CURRENCY
-from .char_classes import LIST_ICONS, HYPHENS, CURRENCY, UNITS, COMBINING_DIACRITICS
-from .char_classes import CONCAT_QUOTES, ALPHA_LOWER, ALPHA_UPPER, ALPHA, PUNCT
-
+from .char_classes import (
+ ALPHA,
+ ALPHA_LOWER,
+ ALPHA_UPPER,
+ COMBINING_DIACRITICS,
+ CONCAT_QUOTES,
+ CURRENCY,
+ HYPHENS,
+ LIST_CURRENCY,
+ LIST_ELLIPSES,
+ LIST_ICONS,
+ LIST_PUNCT,
+ LIST_QUOTES,
+ PUNCT,
+ UNITS,
+)
TOKENIZER_PREFIXES = (
["§", "%", "=", "—", "–", r"\+(?![0-9])"]
diff --git a/spacy/lang/ro/__init__.py b/spacy/lang/ro/__init__.py
index 50027ffd2..441fefbb6 100644
--- a/spacy/lang/ro/__init__.py
+++ b/spacy/lang/ro/__init__.py
@@ -1,9 +1,8 @@
-from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
-from .stop_words import STOP_WORDS
-from .punctuation import TOKENIZER_PREFIXES, TOKENIZER_INFIXES
-from .punctuation import TOKENIZER_SUFFIXES
+from ...language import BaseDefaults, Language
from .lex_attrs import LEX_ATTRS
-from ...language import Language, BaseDefaults
+from .punctuation import TOKENIZER_INFIXES, TOKENIZER_PREFIXES, TOKENIZER_SUFFIXES
+from .stop_words import STOP_WORDS
+from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
# Lemma data note:
# Original pairs downloaded from http://www.lexiconista.com/datasets/lemmatization/
diff --git a/spacy/lang/ro/lex_attrs.py b/spacy/lang/ro/lex_attrs.py
index 0f86f53cd..736aa911a 100644
--- a/spacy/lang/ro/lex_attrs.py
+++ b/spacy/lang/ro/lex_attrs.py
@@ -1,6 +1,5 @@
from ...attrs import LIKE_NUM
-
_num_words = set(
"""
zero unu doi două trei patru cinci șase șapte opt nouă zece
diff --git a/spacy/lang/ro/punctuation.py b/spacy/lang/ro/punctuation.py
index 529e1c977..7259f9ae7 100644
--- a/spacy/lang/ro/punctuation.py
+++ b/spacy/lang/ro/punctuation.py
@@ -1,9 +1,18 @@
import itertools
-from ..char_classes import LIST_PUNCT, LIST_ELLIPSES, LIST_QUOTES, LIST_CURRENCY
-from ..char_classes import LIST_ICONS, CURRENCY
-from ..char_classes import CONCAT_QUOTES, ALPHA_LOWER, ALPHA_UPPER, ALPHA, PUNCT
-
+from ..char_classes import (
+ ALPHA,
+ ALPHA_LOWER,
+ ALPHA_UPPER,
+ CONCAT_QUOTES,
+ CURRENCY,
+ LIST_CURRENCY,
+ LIST_ELLIPSES,
+ LIST_ICONS,
+ LIST_PUNCT,
+ LIST_QUOTES,
+ PUNCT,
+)
_list_icons = [x for x in LIST_ICONS if x != "°"]
_list_icons = [x.replace("\\u00B0", "") for x in _list_icons]
diff --git a/spacy/lang/ro/tokenizer_exceptions.py b/spacy/lang/ro/tokenizer_exceptions.py
index b8af0b1d6..a397b2754 100644
--- a/spacy/lang/ro/tokenizer_exceptions.py
+++ b/spacy/lang/ro/tokenizer_exceptions.py
@@ -1,9 +1,8 @@
-from ..tokenizer_exceptions import BASE_EXCEPTIONS
from ...symbols import ORTH
from ...util import update_exc
+from ..tokenizer_exceptions import BASE_EXCEPTIONS
from .punctuation import _make_ro_variants
-
_exc = {}
diff --git a/spacy/lang/ru/__init__.py b/spacy/lang/ru/__init__.py
index 7d17628c4..880965b70 100644
--- a/spacy/lang/ru/__init__.py
+++ b/spacy/lang/ru/__init__.py
@@ -1,13 +1,16 @@
-from typing import Optional, Callable
+from typing import Callable, Optional
+
from thinc.api import Model
+from ...language import BaseDefaults, Language
+from ..punctuation import (
+ COMBINING_DIACRITICS_TOKENIZER_INFIXES,
+ COMBINING_DIACRITICS_TOKENIZER_SUFFIXES,
+)
+from .lemmatizer import RussianLemmatizer
+from .lex_attrs import LEX_ATTRS
from .stop_words import STOP_WORDS
from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
-from .lex_attrs import LEX_ATTRS
-from .lemmatizer import RussianLemmatizer
-from ..punctuation import COMBINING_DIACRITICS_TOKENIZER_INFIXES
-from ..punctuation import COMBINING_DIACRITICS_TOKENIZER_SUFFIXES
-from ...language import Language, BaseDefaults
class RussianDefaults(BaseDefaults):
diff --git a/spacy/lang/ru/lemmatizer.py b/spacy/lang/ru/lemmatizer.py
index f4a35de38..1e41220f3 100644
--- a/spacy/lang/ru/lemmatizer.py
+++ b/spacy/lang/ru/lemmatizer.py
@@ -1,4 +1,4 @@
-from typing import Optional, List, Dict, Tuple, Callable
+from typing import Callable, Dict, List, Optional, Tuple
from thinc.api import Model
@@ -8,7 +8,6 @@ from ...symbols import POS
from ...tokens import Token
from ...vocab import Vocab
-
PUNCT_RULES = {"«": '"', "»": '"'}
diff --git a/spacy/lang/ru/lex_attrs.py b/spacy/lang/ru/lex_attrs.py
index 2afe47623..e0b35bdc0 100644
--- a/spacy/lang/ru/lex_attrs.py
+++ b/spacy/lang/ru/lex_attrs.py
@@ -1,6 +1,5 @@
from ...attrs import LIKE_NUM
-
_num_words = list(
set(
"""
diff --git a/spacy/lang/ru/tokenizer_exceptions.py b/spacy/lang/ru/tokenizer_exceptions.py
index e1889f785..0a8c476b1 100644
--- a/spacy/lang/ru/tokenizer_exceptions.py
+++ b/spacy/lang/ru/tokenizer_exceptions.py
@@ -1,6 +1,6 @@
-from ..tokenizer_exceptions import BASE_EXCEPTIONS
-from ...symbols import ORTH, NORM
+from ...symbols import NORM, ORTH
from ...util import update_exc
+from ..tokenizer_exceptions import BASE_EXCEPTIONS
_exc = {}
diff --git a/spacy/lang/sa/__init__.py b/spacy/lang/sa/__init__.py
index 61398af6c..c7c0e98e6 100644
--- a/spacy/lang/sa/__init__.py
+++ b/spacy/lang/sa/__init__.py
@@ -1,6 +1,6 @@
-from .stop_words import STOP_WORDS
+from ...language import BaseDefaults, Language
from .lex_attrs import LEX_ATTRS
-from ...language import Language, BaseDefaults
+from .stop_words import STOP_WORDS
class SanskritDefaults(BaseDefaults):
diff --git a/spacy/lang/si/__init__.py b/spacy/lang/si/__init__.py
index 971cee3c6..08d0937b1 100644
--- a/spacy/lang/si/__init__.py
+++ b/spacy/lang/si/__init__.py
@@ -1,6 +1,6 @@
-from .stop_words import STOP_WORDS
+from ...language import BaseDefaults, Language
from .lex_attrs import LEX_ATTRS
-from ...language import Language, BaseDefaults
+from .stop_words import STOP_WORDS
class SinhalaDefaults(BaseDefaults):
diff --git a/spacy/lang/sk/__init__.py b/spacy/lang/sk/__init__.py
index da6e3048e..2ed7448d2 100644
--- a/spacy/lang/sk/__init__.py
+++ b/spacy/lang/sk/__init__.py
@@ -1,6 +1,6 @@
-from .stop_words import STOP_WORDS
+from ...language import BaseDefaults, Language
from .lex_attrs import LEX_ATTRS
-from ...language import Language, BaseDefaults
+from .stop_words import STOP_WORDS
class SlovakDefaults(BaseDefaults):
diff --git a/spacy/lang/sl/__init__.py b/spacy/lang/sl/__init__.py
index 0070e9fa1..cd3d70fc9 100644
--- a/spacy/lang/sl/__init__.py
+++ b/spacy/lang/sl/__init__.py
@@ -1,8 +1,8 @@
+from ...language import BaseDefaults, Language
from .lex_attrs import LEX_ATTRS
-from .punctuation import TOKENIZER_INFIXES, TOKENIZER_SUFFIXES, TOKENIZER_PREFIXES
+from .punctuation import TOKENIZER_INFIXES, TOKENIZER_PREFIXES, TOKENIZER_SUFFIXES
from .stop_words import STOP_WORDS
from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
-from ...language import Language, BaseDefaults
class SlovenianDefaults(BaseDefaults):
diff --git a/spacy/lang/sl/lex_attrs.py b/spacy/lang/sl/lex_attrs.py
index 958152e37..3c1493050 100644
--- a/spacy/lang/sl/lex_attrs.py
+++ b/spacy/lang/sl/lex_attrs.py
@@ -1,7 +1,6 @@
-from ...attrs import LIKE_NUM
-from ...attrs import IS_CURRENCY
import unicodedata
+from ...attrs import IS_CURRENCY, LIKE_NUM
_num_words = set(
"""
diff --git a/spacy/lang/sl/punctuation.py b/spacy/lang/sl/punctuation.py
index b6ca1830e..dadb54d31 100644
--- a/spacy/lang/sl/punctuation.py
+++ b/spacy/lang/sl/punctuation.py
@@ -1,20 +1,21 @@
from ..char_classes import (
+ ALPHA,
+ ALPHA_LOWER,
+ ALPHA_UPPER,
+ CONCAT_QUOTES,
+ CURRENCY,
+ HYPHENS,
+ LIST_CURRENCY,
LIST_ELLIPSES,
LIST_ICONS,
- HYPHENS,
LIST_PUNCT,
LIST_QUOTES,
- CURRENCY,
- UNITS,
PUNCT,
- LIST_CURRENCY,
- CONCAT_QUOTES,
+ UNITS,
+ merge_chars,
)
-from ..char_classes import CONCAT_QUOTES, ALPHA_LOWER, ALPHA_UPPER, ALPHA
-from ..char_classes import merge_chars
from ..punctuation import TOKENIZER_PREFIXES as BASE_TOKENIZER_PREFIXES
-
INCLUDE_SPECIAL = ["\\+", "\\/", "\\•", "\\¯", "\\=", "\\×"] + HYPHENS.split("|")
_prefixes = INCLUDE_SPECIAL + BASE_TOKENIZER_PREFIXES
diff --git a/spacy/lang/sl/tokenizer_exceptions.py b/spacy/lang/sl/tokenizer_exceptions.py
index 3d4109228..ec4ea9e41 100644
--- a/spacy/lang/sl/tokenizer_exceptions.py
+++ b/spacy/lang/sl/tokenizer_exceptions.py
@@ -1,7 +1,8 @@
from typing import Dict, List
-from ..tokenizer_exceptions import BASE_EXCEPTIONS
-from ...symbols import ORTH, NORM
+
+from ...symbols import NORM, ORTH
from ...util import update_exc
+from ..tokenizer_exceptions import BASE_EXCEPTIONS
_exc: Dict[str, List[Dict]] = {}
diff --git a/spacy/lang/sq/__init__.py b/spacy/lang/sq/__init__.py
index 5e32a0cbe..1c8a5acf8 100644
--- a/spacy/lang/sq/__init__.py
+++ b/spacy/lang/sq/__init__.py
@@ -1,5 +1,5 @@
+from ...language import BaseDefaults, Language
from .stop_words import STOP_WORDS
-from ...language import Language, BaseDefaults
class AlbanianDefaults(BaseDefaults):
diff --git a/spacy/lang/sr/__init__.py b/spacy/lang/sr/__init__.py
index b99ce96ec..5f121d79e 100644
--- a/spacy/lang/sr/__init__.py
+++ b/spacy/lang/sr/__init__.py
@@ -1,8 +1,8 @@
-from .stop_words import STOP_WORDS
-from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
+from ...language import BaseDefaults, Language
from .lex_attrs import LEX_ATTRS
from .punctuation import TOKENIZER_INFIXES, TOKENIZER_SUFFIXES
-from ...language import Language, BaseDefaults
+from .stop_words import STOP_WORDS
+from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
class SerbianDefaults(BaseDefaults):
diff --git a/spacy/lang/sr/lex_attrs.py b/spacy/lang/sr/lex_attrs.py
index dc48909bc..696b9fd74 100644
--- a/spacy/lang/sr/lex_attrs.py
+++ b/spacy/lang/sr/lex_attrs.py
@@ -1,6 +1,5 @@
from ...attrs import LIKE_NUM
-
_num_words = [
"нула",
"један",
diff --git a/spacy/lang/sr/punctuation.py b/spacy/lang/sr/punctuation.py
index 793a20ec2..cafb0f68f 100644
--- a/spacy/lang/sr/punctuation.py
+++ b/spacy/lang/sr/punctuation.py
@@ -1,7 +1,16 @@
-from ..char_classes import LIST_ELLIPSES, LIST_ICONS, LIST_PUNCT, LIST_QUOTES
-from ..char_classes import CURRENCY, UNITS, PUNCT
-from ..char_classes import CONCAT_QUOTES, ALPHA, ALPHA_LOWER, ALPHA_UPPER
-
+from ..char_classes import (
+ ALPHA,
+ ALPHA_LOWER,
+ ALPHA_UPPER,
+ CONCAT_QUOTES,
+ CURRENCY,
+ LIST_ELLIPSES,
+ LIST_ICONS,
+ LIST_PUNCT,
+ LIST_QUOTES,
+ PUNCT,
+ UNITS,
+)
_infixes = (
LIST_ELLIPSES
diff --git a/spacy/lang/sr/tokenizer_exceptions.py b/spacy/lang/sr/tokenizer_exceptions.py
index dcaa3e239..b7db0aadc 100755
--- a/spacy/lang/sr/tokenizer_exceptions.py
+++ b/spacy/lang/sr/tokenizer_exceptions.py
@@ -1,7 +1,6 @@
-from ..tokenizer_exceptions import BASE_EXCEPTIONS
-from ...symbols import ORTH, NORM
+from ...symbols import NORM, ORTH
from ...util import update_exc
-
+from ..tokenizer_exceptions import BASE_EXCEPTIONS
_exc = {}
diff --git a/spacy/lang/sv/__init__.py b/spacy/lang/sv/__init__.py
index 28e5085a8..bb4ee1702 100644
--- a/spacy/lang/sv/__init__.py
+++ b/spacy/lang/sv/__init__.py
@@ -1,12 +1,14 @@
-from typing import Optional, Callable
+from typing import Callable, Optional
+
from thinc.api import Model
-from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
-from .stop_words import STOP_WORDS
-from .lex_attrs import LEX_ATTRS
-from .syntax_iterators import SYNTAX_ITERATORS
-from ...language import Language, BaseDefaults
+
+from ...language import BaseDefaults, Language
from ...pipeline import Lemmatizer
+from .lex_attrs import LEX_ATTRS
from .punctuation import TOKENIZER_INFIXES, TOKENIZER_SUFFIXES
+from .stop_words import STOP_WORDS
+from .syntax_iterators import SYNTAX_ITERATORS
+from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
class SwedishDefaults(BaseDefaults):
diff --git a/spacy/lang/sv/lex_attrs.py b/spacy/lang/sv/lex_attrs.py
index f8ada9e2e..8eeafede8 100644
--- a/spacy/lang/sv/lex_attrs.py
+++ b/spacy/lang/sv/lex_attrs.py
@@ -1,6 +1,5 @@
from ...attrs import LIKE_NUM
-
_num_words = [
"noll",
"en",
diff --git a/spacy/lang/sv/punctuation.py b/spacy/lang/sv/punctuation.py
index 67f1bcdc4..64f1da989 100644
--- a/spacy/lang/sv/punctuation.py
+++ b/spacy/lang/sv/punctuation.py
@@ -1,8 +1,13 @@
-from ..char_classes import LIST_ELLIPSES, LIST_ICONS
-from ..char_classes import CONCAT_QUOTES, ALPHA, ALPHA_LOWER, ALPHA_UPPER
+from ..char_classes import (
+ ALPHA,
+ ALPHA_LOWER,
+ ALPHA_UPPER,
+ CONCAT_QUOTES,
+ LIST_ELLIPSES,
+ LIST_ICONS,
+)
from ..punctuation import TOKENIZER_SUFFIXES
-
_quotes = CONCAT_QUOTES.replace("'", "")
_infixes = (
diff --git a/spacy/lang/sv/syntax_iterators.py b/spacy/lang/sv/syntax_iterators.py
index 06ad016ac..09153a8ec 100644
--- a/spacy/lang/sv/syntax_iterators.py
+++ b/spacy/lang/sv/syntax_iterators.py
@@ -1,7 +1,7 @@
-from typing import Union, Iterator, Tuple
+from typing import Iterator, Tuple, Union
-from ...symbols import NOUN, PROPN, PRON
from ...errors import Errors
+from ...symbols import NOUN, PRON, PROPN
from ...tokens import Doc, Span
diff --git a/spacy/lang/sv/tokenizer_exceptions.py b/spacy/lang/sv/tokenizer_exceptions.py
index ce7db895a..8fd3afbe3 100644
--- a/spacy/lang/sv/tokenizer_exceptions.py
+++ b/spacy/lang/sv/tokenizer_exceptions.py
@@ -1,6 +1,6 @@
-from ..tokenizer_exceptions import BASE_EXCEPTIONS
from ...symbols import NORM, ORTH
from ...util import update_exc
+from ..tokenizer_exceptions import BASE_EXCEPTIONS
_exc = {}
diff --git a/spacy/lang/ta/__init__.py b/spacy/lang/ta/__init__.py
index 4929a4b97..7fd29371a 100644
--- a/spacy/lang/ta/__init__.py
+++ b/spacy/lang/ta/__init__.py
@@ -1,6 +1,6 @@
-from .stop_words import STOP_WORDS
+from ...language import BaseDefaults, Language
from .lex_attrs import LEX_ATTRS
-from ...language import Language, BaseDefaults
+from .stop_words import STOP_WORDS
class TamilDefaults(BaseDefaults):
diff --git a/spacy/lang/ta/lex_attrs.py b/spacy/lang/ta/lex_attrs.py
index f830f4ac9..d66125552 100644
--- a/spacy/lang/ta/lex_attrs.py
+++ b/spacy/lang/ta/lex_attrs.py
@@ -1,6 +1,5 @@
from ...attrs import LIKE_NUM
-
_numeral_suffixes = {"பத்து": "பது", "ற்று": "று", "ரத்து": "ரம்", "சத்து": "சம்"}
_num_words = [
"பூச்சியம்",
diff --git a/spacy/lang/te/__init__.py b/spacy/lang/te/__init__.py
index 77cc2fe9b..611e9746a 100644
--- a/spacy/lang/te/__init__.py
+++ b/spacy/lang/te/__init__.py
@@ -1,6 +1,6 @@
-from .stop_words import STOP_WORDS
+from ...language import BaseDefaults, Language
from .lex_attrs import LEX_ATTRS
-from ...language import Language, BaseDefaults
+from .stop_words import STOP_WORDS
class TeluguDefaults(BaseDefaults):
diff --git a/spacy/lang/th/__init__.py b/spacy/lang/th/__init__.py
index 12b1527e0..bd29d32a4 100644
--- a/spacy/lang/th/__init__.py
+++ b/spacy/lang/th/__init__.py
@@ -1,10 +1,9 @@
-from .stop_words import STOP_WORDS
-from .lex_attrs import LEX_ATTRS
-from ...language import Language, BaseDefaults
+from ...language import BaseDefaults, Language
from ...tokens import Doc
-from ...util import DummyTokenizer, registry, load_config_from_str
+from ...util import DummyTokenizer, load_config_from_str, registry
from ...vocab import Vocab
-
+from .lex_attrs import LEX_ATTRS
+from .stop_words import STOP_WORDS
DEFAULT_CONFIG = """
[nlp]
diff --git a/spacy/lang/th/lex_attrs.py b/spacy/lang/th/lex_attrs.py
index bc4e5293e..80f6ccbe8 100644
--- a/spacy/lang/th/lex_attrs.py
+++ b/spacy/lang/th/lex_attrs.py
@@ -1,6 +1,5 @@
from ...attrs import LIKE_NUM
-
_num_words = [
"ศูนย์",
"หนึ่ง",
diff --git a/spacy/lang/th/tokenizer_exceptions.py b/spacy/lang/th/tokenizer_exceptions.py
index 92116d474..954766d28 100644
--- a/spacy/lang/th/tokenizer_exceptions.py
+++ b/spacy/lang/th/tokenizer_exceptions.py
@@ -1,6 +1,5 @@
from ...symbols import ORTH
-
_exc = {
# หน่วยงานรัฐ / government agency
"กกต.": [{ORTH: "กกต."}],
diff --git a/spacy/lang/ti/__init__.py b/spacy/lang/ti/__init__.py
index c74c081b5..510999f67 100644
--- a/spacy/lang/ti/__init__.py
+++ b/spacy/lang/ti/__init__.py
@@ -1,12 +1,11 @@
-from .stop_words import STOP_WORDS
+from ...attrs import LANG
+from ...language import BaseDefaults, Language
+from ...util import update_exc
+from ..tokenizer_exceptions import BASE_EXCEPTIONS
from .lex_attrs import LEX_ATTRS
from .punctuation import TOKENIZER_SUFFIXES
-
+from .stop_words import STOP_WORDS
from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
-from ..tokenizer_exceptions import BASE_EXCEPTIONS
-from ...language import Language, BaseDefaults
-from ...attrs import LANG
-from ...util import update_exc
class TigrinyaDefaults(BaseDefaults):
diff --git a/spacy/lang/ti/punctuation.py b/spacy/lang/ti/punctuation.py
index aa884c2ba..f29f30e26 100644
--- a/spacy/lang/ti/punctuation.py
+++ b/spacy/lang/ti/punctuation.py
@@ -1,5 +1,11 @@
-from ..char_classes import LIST_PUNCT, LIST_ELLIPSES, LIST_QUOTES, CURRENCY
-from ..char_classes import UNITS, ALPHA_UPPER
+from ..char_classes import (
+ ALPHA_UPPER,
+ CURRENCY,
+ LIST_ELLIPSES,
+ LIST_PUNCT,
+ LIST_QUOTES,
+ UNITS,
+)
_list_punct = LIST_PUNCT + "፡ ። ፣ ፤ ፥ ፦ ፧ ፠ ፨".strip().split()
diff --git a/spacy/lang/ti/tokenizer_exceptions.py b/spacy/lang/ti/tokenizer_exceptions.py
index 3d79cd84b..711e4b406 100644
--- a/spacy/lang/ti/tokenizer_exceptions.py
+++ b/spacy/lang/ti/tokenizer_exceptions.py
@@ -1,5 +1,4 @@
-from ...symbols import ORTH, NORM
-
+from ...symbols import NORM, ORTH
_exc = {}
diff --git a/spacy/lang/tl/__init__.py b/spacy/lang/tl/__init__.py
index 30838890a..6849810ef 100644
--- a/spacy/lang/tl/__init__.py
+++ b/spacy/lang/tl/__init__.py
@@ -1,7 +1,7 @@
-from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
-from .stop_words import STOP_WORDS
+from ...language import BaseDefaults, Language
from .lex_attrs import LEX_ATTRS
-from ...language import Language, BaseDefaults
+from .stop_words import STOP_WORDS
+from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
class TagalogDefaults(BaseDefaults):
diff --git a/spacy/lang/tl/lex_attrs.py b/spacy/lang/tl/lex_attrs.py
index 60bdc923b..8866453a0 100644
--- a/spacy/lang/tl/lex_attrs.py
+++ b/spacy/lang/tl/lex_attrs.py
@@ -1,6 +1,5 @@
from ...attrs import LIKE_NUM
-
_num_words = [
"sero",
"isa",
diff --git a/spacy/lang/tl/tokenizer_exceptions.py b/spacy/lang/tl/tokenizer_exceptions.py
index 51ad12d9f..b10c90437 100644
--- a/spacy/lang/tl/tokenizer_exceptions.py
+++ b/spacy/lang/tl/tokenizer_exceptions.py
@@ -1,7 +1,6 @@
-from ..tokenizer_exceptions import BASE_EXCEPTIONS
-from ...symbols import ORTH, NORM
+from ...symbols import NORM, ORTH
from ...util import update_exc
-
+from ..tokenizer_exceptions import BASE_EXCEPTIONS
_exc = {
"tayo'y": [{ORTH: "tayo"}, {ORTH: "'y", NORM: "ay"}],
diff --git a/spacy/lang/tn/__init__.py b/spacy/lang/tn/__init__.py
index 28e887eea..4cb8a1635 100644
--- a/spacy/lang/tn/__init__.py
+++ b/spacy/lang/tn/__init__.py
@@ -1,7 +1,7 @@
-from .stop_words import STOP_WORDS
+from ...language import BaseDefaults, Language
from .lex_attrs import LEX_ATTRS
from .punctuation import TOKENIZER_INFIXES
-from ...language import Language, BaseDefaults
+from .stop_words import STOP_WORDS
class SetswanaDefaults(BaseDefaults):
diff --git a/spacy/lang/tn/punctuation.py b/spacy/lang/tn/punctuation.py
index a52755564..54d76fbaf 100644
--- a/spacy/lang/tn/punctuation.py
+++ b/spacy/lang/tn/punctuation.py
@@ -1,5 +1,12 @@
-from ..char_classes import LIST_ELLIPSES, LIST_ICONS, HYPHENS
-from ..char_classes import CONCAT_QUOTES, ALPHA_LOWER, ALPHA_UPPER, ALPHA
+from ..char_classes import (
+ ALPHA,
+ ALPHA_LOWER,
+ ALPHA_UPPER,
+ CONCAT_QUOTES,
+ HYPHENS,
+ LIST_ELLIPSES,
+ LIST_ICONS,
+)
_infixes = (
LIST_ELLIPSES
diff --git a/spacy/lang/tokenizer_exceptions.py b/spacy/lang/tokenizer_exceptions.py
index d76fe4262..dbf9aab49 100644
--- a/spacy/lang/tokenizer_exceptions.py
+++ b/spacy/lang/tokenizer_exceptions.py
@@ -1,8 +1,7 @@
import re
+from ..symbols import NORM, ORTH
from .char_classes import ALPHA_LOWER
-from ..symbols import ORTH, NORM
-
# URL validation regex courtesy of: https://mathiasbynens.be/demo/url-regex
# and https://gist.github.com/dperini/729294 (Diego Perini, MIT License)
diff --git a/spacy/lang/tr/__init__.py b/spacy/lang/tr/__init__.py
index 02b5c7bf4..9aa752168 100644
--- a/spacy/lang/tr/__init__.py
+++ b/spacy/lang/tr/__init__.py
@@ -1,8 +1,8 @@
-from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS, TOKEN_MATCH
+from ...language import BaseDefaults, Language
+from .lex_attrs import LEX_ATTRS
from .stop_words import STOP_WORDS
from .syntax_iterators import SYNTAX_ITERATORS
-from .lex_attrs import LEX_ATTRS
-from ...language import Language, BaseDefaults
+from .tokenizer_exceptions import TOKEN_MATCH, TOKENIZER_EXCEPTIONS
class TurkishDefaults(BaseDefaults):
diff --git a/spacy/lang/tr/lex_attrs.py b/spacy/lang/tr/lex_attrs.py
index 6d9f4f388..2189932b6 100644
--- a/spacy/lang/tr/lex_attrs.py
+++ b/spacy/lang/tr/lex_attrs.py
@@ -1,6 +1,5 @@
from ...attrs import LIKE_NUM
-
# Thirteen, fifteen etc. are written separate: on üç
_num_words = [
diff --git a/spacy/lang/tr/syntax_iterators.py b/spacy/lang/tr/syntax_iterators.py
index 769af1223..ed588424a 100644
--- a/spacy/lang/tr/syntax_iterators.py
+++ b/spacy/lang/tr/syntax_iterators.py
@@ -1,7 +1,8 @@
-from typing import Union, Iterator, Tuple
-from ...tokens import Doc, Span
-from ...symbols import NOUN, PROPN, PRON
+from typing import Iterator, Tuple, Union
+
from ...errors import Errors
+from ...symbols import NOUN, PRON, PROPN
+from ...tokens import Doc, Span
def noun_chunks(doclike: Union[Doc, Span]) -> Iterator[Tuple[int, int, int]]:
diff --git a/spacy/lang/tr/tokenizer_exceptions.py b/spacy/lang/tr/tokenizer_exceptions.py
index 22fa9f09e..d095a3d0e 100644
--- a/spacy/lang/tr/tokenizer_exceptions.py
+++ b/spacy/lang/tr/tokenizer_exceptions.py
@@ -1,8 +1,7 @@
import re
-from ..punctuation import ALPHA_LOWER, ALPHA
-from ...symbols import ORTH, NORM
-
+from ...symbols import NORM, ORTH
+from ..punctuation import ALPHA, ALPHA_LOWER
_exc = {}
diff --git a/spacy/lang/tt/__init__.py b/spacy/lang/tt/__init__.py
index d5e1e87ef..ce04d09c2 100644
--- a/spacy/lang/tt/__init__.py
+++ b/spacy/lang/tt/__init__.py
@@ -1,8 +1,8 @@
+from ...language import BaseDefaults, Language
from .lex_attrs import LEX_ATTRS
from .punctuation import TOKENIZER_INFIXES
from .stop_words import STOP_WORDS
from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
-from ...language import Language, BaseDefaults
class TatarDefaults(BaseDefaults):
diff --git a/spacy/lang/tt/punctuation.py b/spacy/lang/tt/punctuation.py
index f644a8ccb..5c233df7c 100644
--- a/spacy/lang/tt/punctuation.py
+++ b/spacy/lang/tt/punctuation.py
@@ -1,5 +1,12 @@
-from ..char_classes import ALPHA, ALPHA_LOWER, ALPHA_UPPER, CONCAT_QUOTES, HYPHENS
-from ..char_classes import LIST_ELLIPSES, LIST_ICONS
+from ..char_classes import (
+ ALPHA,
+ ALPHA_LOWER,
+ ALPHA_UPPER,
+ CONCAT_QUOTES,
+ HYPHENS,
+ LIST_ELLIPSES,
+ LIST_ICONS,
+)
_hyphens_no_dash = HYPHENS.replace("-", "").strip("|").replace("||", "")
_infixes = (
diff --git a/spacy/lang/tt/tokenizer_exceptions.py b/spacy/lang/tt/tokenizer_exceptions.py
index 3b8cc86b5..280b9f866 100644
--- a/spacy/lang/tt/tokenizer_exceptions.py
+++ b/spacy/lang/tt/tokenizer_exceptions.py
@@ -1,7 +1,6 @@
-from ..tokenizer_exceptions import BASE_EXCEPTIONS
-from ...symbols import ORTH, NORM
+from ...symbols import NORM, ORTH
from ...util import update_exc
-
+from ..tokenizer_exceptions import BASE_EXCEPTIONS
_exc = {}
diff --git a/spacy/lang/uk/__init__.py b/spacy/lang/uk/__init__.py
index bfea9ff69..5dd75a2a4 100644
--- a/spacy/lang/uk/__init__.py
+++ b/spacy/lang/uk/__init__.py
@@ -1,14 +1,16 @@
-from typing import Optional, Callable
+from typing import Callable, Optional
from thinc.api import Model
-from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
-from .stop_words import STOP_WORDS
-from .lex_attrs import LEX_ATTRS
+from ...language import BaseDefaults, Language
+from ..punctuation import (
+ COMBINING_DIACRITICS_TOKENIZER_INFIXES,
+ COMBINING_DIACRITICS_TOKENIZER_SUFFIXES,
+)
from .lemmatizer import UkrainianLemmatizer
-from ..punctuation import COMBINING_DIACRITICS_TOKENIZER_INFIXES
-from ..punctuation import COMBINING_DIACRITICS_TOKENIZER_SUFFIXES
-from ...language import Language, BaseDefaults
+from .lex_attrs import LEX_ATTRS
+from .stop_words import STOP_WORDS
+from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
class UkrainianDefaults(BaseDefaults):
diff --git a/spacy/lang/uk/lemmatizer.py b/spacy/lang/uk/lemmatizer.py
index 37015cc2a..9ec582b76 100644
--- a/spacy/lang/uk/lemmatizer.py
+++ b/spacy/lang/uk/lemmatizer.py
@@ -1,10 +1,10 @@
-from typing import Optional, Callable
+from typing import Callable, Optional
from thinc.api import Model
-from ..ru.lemmatizer import RussianLemmatizer
from ...pipeline.lemmatizer import lemmatizer_score
from ...vocab import Vocab
+from ..ru.lemmatizer import RussianLemmatizer
class UkrainianLemmatizer(RussianLemmatizer):
diff --git a/spacy/lang/uk/tokenizer_exceptions.py b/spacy/lang/uk/tokenizer_exceptions.py
index 7e168a27c..07dd941af 100644
--- a/spacy/lang/uk/tokenizer_exceptions.py
+++ b/spacy/lang/uk/tokenizer_exceptions.py
@@ -1,7 +1,6 @@
-from ..tokenizer_exceptions import BASE_EXCEPTIONS
-from ...symbols import ORTH, NORM
+from ...symbols import NORM, ORTH
from ...util import update_exc
-
+from ..tokenizer_exceptions import BASE_EXCEPTIONS
_exc = {}
diff --git a/spacy/lang/ur/__init__.py b/spacy/lang/ur/__init__.py
index 266c5a73d..4f20ac92f 100644
--- a/spacy/lang/ur/__init__.py
+++ b/spacy/lang/ur/__init__.py
@@ -1,7 +1,7 @@
-from .stop_words import STOP_WORDS
+from ...language import BaseDefaults, Language
from .lex_attrs import LEX_ATTRS
from .punctuation import TOKENIZER_SUFFIXES
-from ...language import Language, BaseDefaults
+from .stop_words import STOP_WORDS
class UrduDefaults(BaseDefaults):
diff --git a/spacy/lang/ur/punctuation.py b/spacy/lang/ur/punctuation.py
index 5d35d0a25..382bfc75c 100644
--- a/spacy/lang/ur/punctuation.py
+++ b/spacy/lang/ur/punctuation.py
@@ -1,4 +1,3 @@
from ..punctuation import TOKENIZER_SUFFIXES
-
_suffixes = TOKENIZER_SUFFIXES
diff --git a/spacy/lang/vi/__init__.py b/spacy/lang/vi/__init__.py
index 822dc348c..a621b8bfe 100644
--- a/spacy/lang/vi/__init__.py
+++ b/spacy/lang/vi/__init__.py
@@ -1,17 +1,17 @@
-from typing import Any, Dict, Union
-from pathlib import Path
import re
-import srsly
import string
+from pathlib import Path
+from typing import Any, Dict, Union
+
+import srsly
-from .stop_words import STOP_WORDS
-from .lex_attrs import LEX_ATTRS
-from ...language import Language, BaseDefaults
-from ...tokens import Doc
-from ...util import DummyTokenizer, registry, load_config_from_str
-from ...vocab import Vocab
from ... import util
-
+from ...language import BaseDefaults, Language
+from ...tokens import Doc
+from ...util import DummyTokenizer, load_config_from_str, registry
+from ...vocab import Vocab
+from .lex_attrs import LEX_ATTRS
+from .stop_words import STOP_WORDS
DEFAULT_CONFIG = """
[nlp]
diff --git a/spacy/lang/vi/lex_attrs.py b/spacy/lang/vi/lex_attrs.py
index 0cbda4ffb..82997a133 100644
--- a/spacy/lang/vi/lex_attrs.py
+++ b/spacy/lang/vi/lex_attrs.py
@@ -1,6 +1,5 @@
from ...attrs import LIKE_NUM
-
_num_words = [
"không", # Zero
"một", # One
diff --git a/spacy/lang/yo/__init__.py b/spacy/lang/yo/__init__.py
index 6c38ec8af..93c4ca493 100644
--- a/spacy/lang/yo/__init__.py
+++ b/spacy/lang/yo/__init__.py
@@ -1,6 +1,6 @@
-from .stop_words import STOP_WORDS
+from ...language import BaseDefaults, Language
from .lex_attrs import LEX_ATTRS
-from ...language import Language, BaseDefaults
+from .stop_words import STOP_WORDS
class YorubaDefaults(BaseDefaults):
diff --git a/spacy/lang/yo/lex_attrs.py b/spacy/lang/yo/lex_attrs.py
index ead68ced2..5f33e06a5 100644
--- a/spacy/lang/yo/lex_attrs.py
+++ b/spacy/lang/yo/lex_attrs.py
@@ -2,7 +2,6 @@ import unicodedata
from ...attrs import LIKE_NUM
-
_num_words = [
"ení",
"oókàn",
diff --git a/spacy/lang/zh/__init__.py b/spacy/lang/zh/__init__.py
index fdf6776e2..f7bb09277 100644
--- a/spacy/lang/zh/__init__.py
+++ b/spacy/lang/zh/__init__.py
@@ -1,21 +1,21 @@
-from typing import Optional, List, Dict, Any, Callable, Iterable
-from enum import Enum
import tempfile
-import srsly
import warnings
+from enum import Enum
from pathlib import Path
+from typing import Any, Callable, Dict, Iterable, List, Optional
-from ...errors import Warnings, Errors
-from ...language import Language, BaseDefaults
+import srsly
+
+from ... import util
+from ...errors import Errors, Warnings
+from ...language import BaseDefaults, Language
from ...scorer import Scorer
from ...tokens import Doc
-from ...training import validate_examples, Example
-from ...util import DummyTokenizer, registry, load_config_from_str
+from ...training import Example, validate_examples
+from ...util import DummyTokenizer, load_config_from_str, registry
from ...vocab import Vocab
from .lex_attrs import LEX_ATTRS
from .stop_words import STOP_WORDS
-from ... import util
-
# fmt: off
_PKUSEG_INSTALL_MSG = "install spacy-pkuseg with `pip install \"spacy-pkuseg>=0.0.27,<0.1.0\"` or `conda install -c conda-forge \"spacy-pkuseg>=0.0.27,<0.1.0\"`"
diff --git a/spacy/lang/zh/lex_attrs.py b/spacy/lang/zh/lex_attrs.py
index 08c8e3160..36fa7310a 100644
--- a/spacy/lang/zh/lex_attrs.py
+++ b/spacy/lang/zh/lex_attrs.py
@@ -2,7 +2,6 @@ import re
from ...attrs import LIKE_NUM
-
_single_num_words = [
"〇",
"一",
diff --git a/spacy/language.py b/spacy/language.py
index 0e9ff6893..80077bf69 100644
--- a/spacy/language.py
+++ b/spacy/language.py
@@ -1,47 +1,70 @@
-from typing import Iterator, Optional, Any, Dict, Callable, Iterable
-from typing import Union, Tuple, List, Set, Pattern, Sequence
-from typing import NoReturn, TypeVar, cast, overload
-
-from dataclasses import dataclass
-import random
-import itertools
import functools
+import itertools
+import multiprocessing as mp
+import random
+import traceback
+import warnings
from contextlib import contextmanager
from copy import deepcopy
-from pathlib import Path
-import warnings
-
-from thinc.api import get_current_ops, Config, CupyOps, Optimizer
-import srsly
-import multiprocessing as mp
+from dataclasses import dataclass
from itertools import chain, cycle
+from pathlib import Path
from timeit import default_timer as timer
-import traceback
+from typing import (
+ Any,
+ Callable,
+ Dict,
+ Iterable,
+ Iterator,
+ List,
+ NoReturn,
+ Optional,
+ Pattern,
+ Sequence,
+ Set,
+ Tuple,
+ TypeVar,
+ Union,
+ cast,
+ overload,
+)
-from . import ty
-from .tokens.underscore import Underscore
-from .vocab import Vocab, create_vocab
-from .pipe_analysis import validate_attrs, analyze_pipes, print_pipe_analysis
-from .training import Example, validate_examples
-from .training.initialize import init_vocab, init_tok2vec
-from .scorer import Scorer
-from .util import registry, SimpleFrozenList, _pipe, raise_error, _DEFAULT_EMPTY_PIPES
-from .util import SimpleFrozenDict, combine_score_weights, CONFIG_SECTION_ORDER
-from .util import warn_if_jupyter_cupy
-from .lang.tokenizer_exceptions import URL_MATCH, BASE_EXCEPTIONS
-from .lang.punctuation import TOKENIZER_PREFIXES, TOKENIZER_SUFFIXES
-from .lang.punctuation import TOKENIZER_INFIXES
-from .tokens import Doc
-from .tokenizer import Tokenizer
-from .errors import Errors, Warnings
-from .schemas import ConfigSchema, ConfigSchemaNlp, ConfigSchemaInit
-from .schemas import ConfigSchemaPretrain, validate_init_settings
-from .git_info import GIT_VERSION
-from . import util
-from . import about
-from .lookups import load_lookups
+import srsly
+from thinc.api import Config, CupyOps, Optimizer, get_current_ops
+
+from . import about, ty, util
from .compat import Literal
-
+from .errors import Errors, Warnings
+from .git_info import GIT_VERSION
+from .lang.punctuation import TOKENIZER_INFIXES, TOKENIZER_PREFIXES, TOKENIZER_SUFFIXES
+from .lang.tokenizer_exceptions import BASE_EXCEPTIONS, URL_MATCH
+from .lookups import load_lookups
+from .pipe_analysis import analyze_pipes, print_pipe_analysis, validate_attrs
+from .schemas import (
+ ConfigSchema,
+ ConfigSchemaInit,
+ ConfigSchemaNlp,
+ ConfigSchemaPretrain,
+ validate_init_settings,
+)
+from .scorer import Scorer
+from .tokenizer import Tokenizer
+from .tokens import Doc
+from .tokens.underscore import Underscore
+from .training import Example, validate_examples
+from .training.initialize import init_tok2vec, init_vocab
+from .util import (
+ _DEFAULT_EMPTY_PIPES,
+ CONFIG_SECTION_ORDER,
+ SimpleFrozenDict,
+ SimpleFrozenList,
+ _pipe,
+ combine_score_weights,
+ raise_error,
+ registry,
+ warn_if_jupyter_cupy,
+)
+from .vocab import Vocab, create_vocab
PipeCallable = Callable[[Doc], Doc]
diff --git a/spacy/lexeme.pxd b/spacy/lexeme.pxd
index 8dea0d6a2..ff2e4f92e 100644
--- a/spacy/lexeme.pxd
+++ b/spacy/lexeme.pxd
@@ -1,11 +1,20 @@
from numpy cimport ndarray
-from .typedefs cimport attr_t, hash_t, flags_t, len_t, tag_t
-from .attrs cimport attr_id_t
-from .attrs cimport ID, ORTH, LOWER, NORM, SHAPE, PREFIX, SUFFIX, LENGTH, LANG
-
-from .structs cimport LexemeC
+from .attrs cimport (
+ ID,
+ LANG,
+ LENGTH,
+ LOWER,
+ NORM,
+ ORTH,
+ PREFIX,
+ SHAPE,
+ SUFFIX,
+ attr_id_t,
+)
from .strings cimport StringStore
+from .structs cimport LexemeC
+from .typedefs cimport attr_t, flags_t, hash_t, len_t, tag_t
from .vocab cimport Vocab
diff --git a/spacy/lexeme.pyi b/spacy/lexeme.pyi
index 9b7a6156a..9980b9fce 100644
--- a/spacy/lexeme.pyi
+++ b/spacy/lexeme.pyi
@@ -1,8 +1,7 @@
-from typing import (
- Union,
- Any,
-)
+from typing import Any, Union
+
from thinc.types import Floats1d
+
from .tokens import Doc, Span, Token
from .vocab import Vocab
diff --git a/spacy/lexeme.pyx b/spacy/lexeme.pyx
index e70feaf9a..00e2c6258 100644
--- a/spacy/lexeme.pyx
+++ b/spacy/lexeme.pyx
@@ -1,24 +1,40 @@
# cython: embedsignature=True
# Compiler crashes on memory view coercion without this. Should report bug.
+cimport numpy as np
from cython.view cimport array as cvarray
from libc.string cimport memset
-cimport numpy as np
+
np.import_array()
+import warnings
+
import numpy
from thinc.api import get_array_module
-import warnings
+from .attrs cimport (
+ IS_ALPHA,
+ IS_ASCII,
+ IS_BRACKET,
+ IS_CURRENCY,
+ IS_DIGIT,
+ IS_LEFT_PUNCT,
+ IS_LOWER,
+ IS_PUNCT,
+ IS_QUOTE,
+ IS_RIGHT_PUNCT,
+ IS_SPACE,
+ IS_STOP,
+ IS_TITLE,
+ IS_UPPER,
+ LIKE_EMAIL,
+ LIKE_NUM,
+ LIKE_URL,
+)
from .typedefs cimport attr_t, flags_t
-from .attrs cimport IS_ALPHA, IS_ASCII, IS_DIGIT, IS_LOWER, IS_PUNCT, IS_SPACE
-from .attrs cimport IS_TITLE, IS_UPPER, LIKE_URL, LIKE_NUM, LIKE_EMAIL, IS_STOP
-from .attrs cimport IS_BRACKET, IS_QUOTE, IS_LEFT_PUNCT, IS_RIGHT_PUNCT
-from .attrs cimport IS_CURRENCY
from .attrs import intify_attrs
from .errors import Errors, Warnings
-
OOV_RANK = 0xffffffffffffffff # UINT64_MAX
memset(&EMPTY_LEXEME, 0, sizeof(LexemeC))
EMPTY_LEXEME.id = OOV_RANK
diff --git a/spacy/lookups.py b/spacy/lookups.py
index d7cc44fb3..1a2c44bfa 100644
--- a/spacy/lookups.py
+++ b/spacy/lookups.py
@@ -1,13 +1,13 @@
-from typing import Any, List, Union, Optional, Dict
+from collections import OrderedDict
from pathlib import Path
+from typing import Any, Dict, List, Optional, Union
+
import srsly
from preshed.bloom import BloomFilter
-from collections import OrderedDict
from .errors import Errors
-from .util import SimpleFrozenDict, ensure_path, registry, load_language_data
from .strings import get_string_id
-
+from .util import SimpleFrozenDict, ensure_path, load_language_data, registry
UNSET = object()
diff --git a/spacy/matcher/__init__.py b/spacy/matcher/__init__.py
index a4f164847..f671f2e35 100644
--- a/spacy/matcher/__init__.py
+++ b/spacy/matcher/__init__.py
@@ -1,6 +1,6 @@
-from .matcher import Matcher
-from .phrasematcher import PhraseMatcher
from .dependencymatcher import DependencyMatcher
from .levenshtein import levenshtein
+from .matcher import Matcher
+from .phrasematcher import PhraseMatcher
__all__ = ["Matcher", "PhraseMatcher", "DependencyMatcher", "levenshtein"]
diff --git a/spacy/matcher/dependencymatcher.pyi b/spacy/matcher/dependencymatcher.pyi
index c19d3a71c..b9fbabda7 100644
--- a/spacy/matcher/dependencymatcher.pyi
+++ b/spacy/matcher/dependencymatcher.pyi
@@ -1,8 +1,9 @@
from typing import Any, Callable, Dict, List, Optional, Tuple, Union
-from .matcher import Matcher
-from ..vocab import Vocab
+
from ..tokens.doc import Doc
from ..tokens.span import Span
+from ..vocab import Vocab
+from .matcher import Matcher
class DependencyMatcher:
"""Match dependency parse tree based on pattern rules."""
diff --git a/spacy/matcher/dependencymatcher.pyx b/spacy/matcher/dependencymatcher.pyx
index 48fb3eb2a..a214c0668 100644
--- a/spacy/matcher/dependencymatcher.pyx
+++ b/spacy/matcher/dependencymatcher.pyx
@@ -1,18 +1,16 @@
# cython: infer_types=True, profile=True
-from typing import List
+import warnings
from collections import defaultdict
from itertools import product
+from typing import List
-import warnings
-
-from .matcher cimport Matcher
-from ..vocab cimport Vocab
from ..tokens.doc cimport Doc
+from ..vocab cimport Vocab
+from .matcher cimport Matcher
from ..errors import Errors, Warnings
from ..tokens import Span
-
DELIMITER = "||"
INDEX_HEAD = 1
INDEX_RELOP = 0
diff --git a/spacy/matcher/matcher.pxd b/spacy/matcher/matcher.pxd
index 51854d562..2c82cea1d 100644
--- a/spacy/matcher/matcher.pxd
+++ b/spacy/matcher/matcher.pxd
@@ -1,11 +1,11 @@
+from cymem.cymem cimport Pool
from libc.stdint cimport int32_t
from libcpp.vector cimport vector
-from cymem.cymem cimport Pool
-from ..vocab cimport Vocab
-from ..typedefs cimport attr_t, hash_t
-from ..structs cimport TokenC
from ..lexeme cimport attr_id_t
+from ..structs cimport TokenC
+from ..typedefs cimport attr_t, hash_t
+from ..vocab cimport Vocab
cdef enum action_t:
diff --git a/spacy/matcher/matcher.pyi b/spacy/matcher/matcher.pyi
index 48922865b..c33b534cb 100644
--- a/spacy/matcher/matcher.pyi
+++ b/spacy/matcher/matcher.pyi
@@ -1,8 +1,19 @@
-from typing import Any, List, Dict, Tuple, Optional, Callable, Union
-from typing import Iterator, Iterable, overload
+from typing import (
+ Any,
+ Callable,
+ Dict,
+ Iterable,
+ Iterator,
+ List,
+ Optional,
+ Tuple,
+ Union,
+ overload,
+)
+
from ..compat import Literal
-from ..vocab import Vocab
from ..tokens import Doc, Span
+from ..vocab import Vocab
class Matcher:
def __init__(
diff --git a/spacy/matcher/matcher.pyx b/spacy/matcher/matcher.pyx
index b886bd2ec..3d03f37ae 100644
--- a/spacy/matcher/matcher.pyx
+++ b/spacy/matcher/matcher.pyx
@@ -1,32 +1,43 @@
# cython: binding=True, infer_types=True, profile=True
-from typing import List, Iterable
+from typing import Iterable, List
-from libcpp.vector cimport vector
-from libc.stdint cimport int32_t, int8_t
-from libc.string cimport memset, memcmp
from cymem.cymem cimport Pool
+from libc.stdint cimport int8_t, int32_t
+from libc.string cimport memcmp, memset
+from libcpp.vector cimport vector
from murmurhash.mrmr cimport hash64
import re
-import srsly
import warnings
-from ..typedefs cimport attr_t
+import srsly
+
+from ..attrs cimport (
+ DEP,
+ ENT_IOB,
+ ID,
+ LEMMA,
+ MORPH,
+ NULL_ATTR,
+ ORTH,
+ POS,
+ TAG,
+ attr_id_t,
+)
from ..structs cimport TokenC
-from ..vocab cimport Vocab
from ..tokens.doc cimport Doc, get_token_attr_for_matcher
+from ..tokens.morphanalysis cimport MorphAnalysis
from ..tokens.span cimport Span
from ..tokens.token cimport Token
-from ..tokens.morphanalysis cimport MorphAnalysis
-from ..attrs cimport ID, attr_id_t, NULL_ATTR, ORTH, POS, TAG, DEP, LEMMA, MORPH, ENT_IOB
+from ..typedefs cimport attr_t
+from ..vocab cimport Vocab
-from .levenshtein import levenshtein_compare
-from ..schemas import validate_token_pattern
-from ..errors import Errors, MatchPatternError, Warnings
-from ..strings import get_string_id
from ..attrs import IDS
+from ..errors import Errors, MatchPatternError, Warnings
+from ..schemas import validate_token_pattern
+from ..strings import get_string_id
from ..util import registry
-
+from .levenshtein import levenshtein_compare
DEF PADDING = 5
diff --git a/spacy/matcher/phrasematcher.pxd b/spacy/matcher/phrasematcher.pxd
index 1bdc19012..bffc1ac97 100644
--- a/spacy/matcher/phrasematcher.pxd
+++ b/spacy/matcher/phrasematcher.pxd
@@ -1,6 +1,6 @@
-from libcpp.vector cimport vector
from cymem.cymem cimport Pool
-from preshed.maps cimport key_t, MapStruct
+from libcpp.vector cimport vector
+from preshed.maps cimport MapStruct, key_t
from ..attrs cimport attr_id_t
from ..structs cimport SpanC
diff --git a/spacy/matcher/phrasematcher.pyi b/spacy/matcher/phrasematcher.pyi
index 68e3386e4..459b3bb24 100644
--- a/spacy/matcher/phrasematcher.pyi
+++ b/spacy/matcher/phrasematcher.pyi
@@ -1,8 +1,9 @@
-from typing import List, Tuple, Union, Optional, Callable, Any, Dict, overload
+from typing import Any, Callable, Dict, List, Optional, Tuple, Union, overload
+
from ..compat import Literal
-from .matcher import Matcher
-from ..vocab import Vocab
from ..tokens import Doc, Span
+from ..vocab import Vocab
+from .matcher import Matcher
class PhraseMatcher:
def __init__(
diff --git a/spacy/matcher/phrasematcher.pyx b/spacy/matcher/phrasematcher.pyx
index 382029872..c407cf1cc 100644
--- a/spacy/matcher/phrasematcher.pyx
+++ b/spacy/matcher/phrasematcher.pyx
@@ -1,18 +1,20 @@
# cython: infer_types=True, profile=True
from libc.stdint cimport uintptr_t
-from preshed.maps cimport map_init, map_set, map_get, map_clear, map_iter
+from preshed.maps cimport map_clear, map_get, map_init, map_iter, map_set
import warnings
-from ..attrs cimport ORTH, POS, TAG, DEP, LEMMA, MORPH
+from ..attrs cimport DEP, LEMMA, MORPH, ORTH, POS, TAG
+
from ..attrs import IDS
+
from ..structs cimport TokenC
-from ..tokens.token cimport Token
from ..tokens.span cimport Span
+from ..tokens.token cimport Token
from ..typedefs cimport attr_t
-from ..schemas import TokenPattern
from ..errors import Errors, Warnings
+from ..schemas import TokenPattern
cdef class PhraseMatcher:
diff --git a/spacy/ml/_character_embed.py b/spacy/ml/_character_embed.py
index e46735102..89c836144 100644
--- a/spacy/ml/_character_embed.py
+++ b/spacy/ml/_character_embed.py
@@ -1,4 +1,5 @@
from typing import List
+
from thinc.api import Model
from thinc.types import Floats2d
diff --git a/spacy/ml/callbacks.py b/spacy/ml/callbacks.py
index 3b60ec2ab..e2378a7ba 100644
--- a/spacy/ml/callbacks.py
+++ b/spacy/ml/callbacks.py
@@ -1,8 +1,8 @@
-from typing import Type, Callable, Dict, TYPE_CHECKING, List, Optional, Set
import functools
import inspect
import types
import warnings
+from typing import TYPE_CHECKING, Callable, Dict, List, Optional, Set, Type
from thinc.layers import with_nvtx_range
from thinc.model import Model, wrap_model_recursive
diff --git a/spacy/ml/extract_ngrams.py b/spacy/ml/extract_ngrams.py
index c9c82f369..ce7c585cc 100644
--- a/spacy/ml/extract_ngrams.py
+++ b/spacy/ml/extract_ngrams.py
@@ -1,7 +1,7 @@
from thinc.api import Model
-from ..util import registry
from ..attrs import LOWER
+from ..util import registry
@registry.layers("spacy.extract_ngrams.v1")
diff --git a/spacy/ml/extract_spans.py b/spacy/ml/extract_spans.py
index af6be78db..ac0f5fa1b 100644
--- a/spacy/ml/extract_spans.py
+++ b/spacy/ml/extract_spans.py
@@ -1,6 +1,7 @@
-from typing import List, Tuple, Callable
+from typing import Callable, List, Tuple
+
from thinc.api import Model, to_numpy
-from thinc.types import Ragged, Ints1d
+from thinc.types import Ints1d, Ragged
from ..util import registry
diff --git a/spacy/ml/featureextractor.py b/spacy/ml/featureextractor.py
index ed2918f02..06f1ff51a 100644
--- a/spacy/ml/featureextractor.py
+++ b/spacy/ml/featureextractor.py
@@ -1,6 +1,7 @@
-from typing import List, Union, Callable, Tuple
-from thinc.types import Ints2d
+from typing import Callable, List, Tuple, Union
+
from thinc.api import Model, registry
+from thinc.types import Ints2d
from ..tokens import Doc
diff --git a/spacy/ml/models/entity_linker.py b/spacy/ml/models/entity_linker.py
index 7332ca199..b7100c00a 100644
--- a/spacy/ml/models/entity_linker.py
+++ b/spacy/ml/models/entity_linker.py
@@ -1,16 +1,31 @@
from pathlib import Path
-from typing import Optional, Callable, Iterable, List, Tuple
-from thinc.types import Floats2d
-from thinc.api import chain, list2ragged, reduce_mean, residual
-from thinc.api import Model, Maxout, Linear, tuplify, Ragged
+from typing import Callable, Iterable, List, Optional, Tuple
+
+from thinc.api import (
+ Linear,
+ Maxout,
+ Model,
+ Ragged,
+ chain,
+ list2ragged,
+ reduce_mean,
+ residual,
+ tuplify,
+)
+from thinc.types import Floats2d
-from ...util import registry
-from ...kb import KnowledgeBase, InMemoryLookupKB
-from ...kb import Candidate, get_candidates, get_candidates_batch
-from ...vocab import Vocab
-from ...tokens import Span, Doc
-from ..extract_spans import extract_spans
from ...errors import Errors
+from ...kb import (
+ Candidate,
+ InMemoryLookupKB,
+ KnowledgeBase,
+ get_candidates,
+ get_candidates_batch,
+)
+from ...tokens import Doc, Span
+from ...util import registry
+from ...vocab import Vocab
+from ..extract_spans import extract_spans
@registry.architectures("spacy.EntityLinker.v2")
diff --git a/spacy/ml/models/multi_task.py b/spacy/ml/models/multi_task.py
index 7eb13b608..b7faf1cd7 100644
--- a/spacy/ml/models/multi_task.py
+++ b/spacy/ml/models/multi_task.py
@@ -1,22 +1,33 @@
-from typing import Any, Optional, Iterable, Tuple, List, Callable, TYPE_CHECKING, cast
-from thinc.types import Floats2d, Ints1d
-from thinc.api import chain, Maxout, LayerNorm, Softmax, Linear, zero_init, Model
-from thinc.api import MultiSoftmax, list2array
-from thinc.api import to_categorical, CosineDistance, L2Distance
-from thinc.loss import Loss
-
-from ...util import registry, OOV_RANK
-from ...errors import Errors
-from ...attrs import ID, ORTH
-from ...vectors import Mode as VectorsMode
+from functools import partial
+from typing import TYPE_CHECKING, Any, Callable, Iterable, List, Optional, Tuple, cast
import numpy
-from functools import partial
+from thinc.api import (
+ CosineDistance,
+ L2Distance,
+ LayerNorm,
+ Linear,
+ Maxout,
+ Model,
+ MultiSoftmax,
+ Softmax,
+ chain,
+ list2array,
+ to_categorical,
+ zero_init,
+)
+from thinc.loss import Loss
+from thinc.types import Floats2d, Ints1d
+
+from ...attrs import ID, ORTH
+from ...errors import Errors
+from ...util import OOV_RANK, registry
+from ...vectors import Mode as VectorsMode
if TYPE_CHECKING:
# This lets us add type hints for mypy etc. without causing circular imports
- from ...vocab import Vocab # noqa: F401
from ...tokens.doc import Doc # noqa: F401
+ from ...vocab import Vocab # noqa: F401
@registry.architectures("spacy.PretrainVectors.v1")
diff --git a/spacy/ml/models/parser.py b/spacy/ml/models/parser.py
index a70d84dea..f6c0e565d 100644
--- a/spacy/ml/models/parser.py
+++ b/spacy/ml/models/parser.py
@@ -1,13 +1,14 @@
-from typing import Optional, List, cast
-from thinc.api import Model, chain, list2array, Linear, zero_init, use_ops
+from typing import List, Optional, cast
+
+from thinc.api import Linear, Model, chain, list2array, use_ops, zero_init
from thinc.types import Floats2d
-from ...errors import Errors
from ...compat import Literal
+from ...errors import Errors
+from ...tokens import Doc
from ...util import registry
from .._precomputable_affine import PrecomputableAffine
from ..tb_framework import TransitionModel
-from ...tokens import Doc
@registry.architectures("spacy.TransitionBasedParser.v2")
diff --git a/spacy/ml/models/span_finder.py b/spacy/ml/models/span_finder.py
index a805e2086..d327fc761 100644
--- a/spacy/ml/models/span_finder.py
+++ b/spacy/ml/models/span_finder.py
@@ -4,7 +4,6 @@ from thinc.api import Model, chain, with_array
from thinc.types import Floats1d, Floats2d
from ...tokens import Doc
-
from ...util import registry
InT = List[Doc]
diff --git a/spacy/ml/models/spancat.py b/spacy/ml/models/spancat.py
index 893db2e6d..140ec553a 100644
--- a/spacy/ml/models/spancat.py
+++ b/spacy/ml/models/spancat.py
@@ -1,11 +1,24 @@
from typing import List, Tuple, cast
-from thinc.api import Model, with_getitem, chain, list2ragged, Logistic
-from thinc.api import Maxout, Linear, concatenate, glorot_uniform_init
-from thinc.api import reduce_mean, reduce_max, reduce_first, reduce_last
-from thinc.types import Ragged, Floats2d
-from ...util import registry
+from thinc.api import (
+ Linear,
+ Logistic,
+ Maxout,
+ Model,
+ chain,
+ concatenate,
+ glorot_uniform_init,
+ list2ragged,
+ reduce_first,
+ reduce_last,
+ reduce_max,
+ reduce_mean,
+ with_getitem,
+)
+from thinc.types import Floats2d, Ragged
+
from ...tokens import Doc
+from ...util import registry
from ..extract_spans import extract_spans
diff --git a/spacy/ml/models/tagger.py b/spacy/ml/models/tagger.py
index 9f8ef7b2b..8f1554fab 100644
--- a/spacy/ml/models/tagger.py
+++ b/spacy/ml/models/tagger.py
@@ -1,9 +1,10 @@
-from typing import Optional, List
-from thinc.api import zero_init, with_array, Softmax_v2, chain, Model
+from typing import List, Optional
+
+from thinc.api import Model, Softmax_v2, chain, with_array, zero_init
from thinc.types import Floats2d
-from ...util import registry
from ...tokens import Doc
+from ...util import registry
@registry.architectures("spacy.Tagger.v2")
diff --git a/spacy/ml/models/textcat.py b/spacy/ml/models/textcat.py
index 9c7e607fe..ab14110d2 100644
--- a/spacy/ml/models/textcat.py
+++ b/spacy/ml/models/textcat.py
@@ -1,22 +1,39 @@
-from typing import Optional, List, cast
from functools import partial
+from typing import List, Optional, cast
-from thinc.types import Floats2d
-from thinc.api import Model, reduce_mean, Linear, list2ragged, Logistic
-from thinc.api import chain, concatenate, clone, Dropout, ParametricAttention
-from thinc.api import SparseLinear, Softmax, softmax_activation, Maxout, reduce_sum
-from thinc.api import with_cpu, Relu, residual, LayerNorm, resizable
+from thinc.api import (
+ Dropout,
+ LayerNorm,
+ Linear,
+ Logistic,
+ Maxout,
+ Model,
+ ParametricAttention,
+ Relu,
+ Softmax,
+ SparseLinear,
+ chain,
+ clone,
+ concatenate,
+ list2ragged,
+ reduce_mean,
+ reduce_sum,
+ residual,
+ resizable,
+ softmax_activation,
+ with_cpu,
+)
from thinc.layers.chain import init as init_chain
-from thinc.layers.resizable import resize_model, resize_linear_weighted
+from thinc.layers.resizable import resize_linear_weighted, resize_model
+from thinc.types import Floats2d
from ...attrs import ORTH
+from ...tokens import Doc
from ...util import registry
from ..extract_ngrams import extract_ngrams
from ..staticvectors import StaticVectors
-from ...tokens import Doc
from .tok2vec import get_tok2vec_width
-
NEG_VALUE = -5000
diff --git a/spacy/ml/models/tok2vec.py b/spacy/ml/models/tok2vec.py
index 30c7360ff..2e9d21ef4 100644
--- a/spacy/ml/models/tok2vec.py
+++ b/spacy/ml/models/tok2vec.py
@@ -1,17 +1,32 @@
-from typing import Optional, List, Union, cast
-from thinc.types import Floats2d, Ints2d, Ragged, Ints1d
-from thinc.api import chain, clone, concatenate, with_array, with_padded
-from thinc.api import Model, noop, list2ragged, ragged2list, HashEmbed
-from thinc.api import expand_window, residual, Maxout, Mish, PyTorchLSTM
+from typing import List, Optional, Union, cast
-from ...tokens import Doc
-from ...util import registry
+from thinc.api import (
+ HashEmbed,
+ Maxout,
+ Mish,
+ Model,
+ PyTorchLSTM,
+ chain,
+ clone,
+ concatenate,
+ expand_window,
+ list2ragged,
+ noop,
+ ragged2list,
+ residual,
+ with_array,
+ with_padded,
+)
+from thinc.types import Floats2d, Ints1d, Ints2d, Ragged
+
+from ...attrs import intify_attr
from ...errors import Errors
from ...ml import _character_embed
-from ..staticvectors import StaticVectors
-from ..featureextractor import FeatureExtractor
from ...pipeline.tok2vec import Tok2VecListener
-from ...attrs import intify_attr
+from ...tokens import Doc
+from ...util import registry
+from ..featureextractor import FeatureExtractor
+from ..staticvectors import StaticVectors
@registry.architectures("spacy.Tok2VecListener.v1")
diff --git a/spacy/ml/parser_model.pxd b/spacy/ml/parser_model.pxd
index 8def6cea5..ca31c1699 100644
--- a/spacy/ml/parser_model.pxd
+++ b/spacy/ml/parser_model.pxd
@@ -1,7 +1,8 @@
-from libc.string cimport memset, memcpy
+from libc.string cimport memcpy, memset
from thinc.backends.cblas cimport CBlas
-from ..typedefs cimport weight_t, hash_t
+
from ..pipeline._parser_internals._state cimport StateC
+from ..typedefs cimport hash_t, weight_t
cdef struct SizesC:
diff --git a/spacy/ml/parser_model.pyx b/spacy/ml/parser_model.pyx
index 961bf4d70..5cffc4c2d 100644
--- a/spacy/ml/parser_model.pyx
+++ b/spacy/ml/parser_model.pyx
@@ -1,19 +1,20 @@
# cython: infer_types=True, cdivision=True, boundscheck=False
cimport numpy as np
from libc.math cimport exp
-from libc.string cimport memset, memcpy
from libc.stdlib cimport calloc, free, realloc
-from thinc.backends.linalg cimport Vec, VecVec
+from libc.string cimport memcpy, memset
from thinc.backends.cblas cimport saxpy, sgemm
+from thinc.backends.linalg cimport Vec, VecVec
import numpy
import numpy.random
-from thinc.api import Model, CupyOps, NumpyOps, get_ops
+from thinc.api import CupyOps, Model, NumpyOps, get_ops
from .. import util
from ..errors import Errors
-from ..typedefs cimport weight_t, class_t, hash_t
+
from ..pipeline._parser_internals.stateclass cimport StateClass
+from ..typedefs cimport class_t, hash_t, weight_t
cdef WeightsC get_c_weights(model) except *:
diff --git a/spacy/ml/staticvectors.py b/spacy/ml/staticvectors.py
index 04cfe912d..6fcb13ad0 100644
--- a/spacy/ml/staticvectors.py
+++ b/spacy/ml/staticvectors.py
@@ -1,11 +1,12 @@
-from typing import List, Tuple, Callable, Optional, Sequence, cast
-from thinc.initializers import glorot_uniform_init
-from thinc.util import partial
-from thinc.types import Ragged, Floats2d, Floats1d, Ints1d
-from thinc.api import Model, Ops, registry
+from typing import Callable, List, Optional, Sequence, Tuple, cast
+
+from thinc.api import Model, Ops, registry
+from thinc.initializers import glorot_uniform_init
+from thinc.types import Floats1d, Floats2d, Ints1d, Ragged
+from thinc.util import partial
-from ..tokens import Doc
from ..errors import Errors
+from ..tokens import Doc
from ..vectors import Mode
from ..vocab import Vocab
diff --git a/spacy/ml/tb_framework.py b/spacy/ml/tb_framework.py
index ab4a969e2..e351ad4e5 100644
--- a/spacy/ml/tb_framework.py
+++ b/spacy/ml/tb_framework.py
@@ -1,6 +1,7 @@
from thinc.api import Model, noop
-from .parser_model import ParserStepModel
+
from ..util import registry
+from .parser_model import ParserStepModel
@registry.layers("spacy.TransitionModel.v1")
diff --git a/spacy/morphology.pxd b/spacy/morphology.pxd
index 8d449d065..968764b82 100644
--- a/spacy/morphology.pxd
+++ b/spacy/morphology.pxd
@@ -1,10 +1,10 @@
-from cymem.cymem cimport Pool
-from preshed.maps cimport PreshMap
cimport numpy as np
+from cymem.cymem cimport Pool
from libc.stdint cimport uint64_t
+from preshed.maps cimport PreshMap
-from .structs cimport MorphAnalysisC
from .strings cimport StringStore
+from .structs cimport MorphAnalysisC
from .typedefs cimport attr_t, hash_t
diff --git a/spacy/morphology.pyx b/spacy/morphology.pyx
index c3ffc46a1..1062fff09 100644
--- a/spacy/morphology.pyx
+++ b/spacy/morphology.pyx
@@ -1,12 +1,13 @@
# cython: infer_types
-import numpy
import warnings
+import numpy
+
from .attrs cimport POS
-from .parts_of_speech import IDS as POS_IDS
-from .errors import Warnings
from . import symbols
+from .errors import Warnings
+from .parts_of_speech import IDS as POS_IDS
cdef class Morphology:
diff --git a/spacy/parts_of_speech.pxd b/spacy/parts_of_speech.pxd
index 0bf5b4789..a0b2567f1 100644
--- a/spacy/parts_of_speech.pxd
+++ b/spacy/parts_of_speech.pxd
@@ -1,5 +1,6 @@
from . cimport symbols
+
cpdef enum univ_pos_t:
NO_TAG = 0
ADJ = symbols.ADJ
diff --git a/spacy/pipe_analysis.py b/spacy/pipe_analysis.py
index 245747061..d26884487 100644
--- a/spacy/pipe_analysis.py
+++ b/spacy/pipe_analysis.py
@@ -1,8 +1,9 @@
-from typing import List, Set, Dict, Iterable, ItemsView, Union, TYPE_CHECKING
+from typing import TYPE_CHECKING, Dict, ItemsView, Iterable, List, Set, Union
+
from wasabi import msg
-from .tokens import Doc, Token, Span
from .errors import Errors
+from .tokens import Doc, Span, Token
from .util import dot_to_dict
if TYPE_CHECKING:
diff --git a/spacy/pipeline/_edit_tree_internals/edit_trees.pxd b/spacy/pipeline/_edit_tree_internals/edit_trees.pxd
index dc4289f37..3d63af921 100644
--- a/spacy/pipeline/_edit_tree_internals/edit_trees.pxd
+++ b/spacy/pipeline/_edit_tree_internals/edit_trees.pxd
@@ -2,8 +2,9 @@ from libc.stdint cimport uint32_t, uint64_t
from libcpp.unordered_map cimport unordered_map
from libcpp.vector cimport vector
-from ...typedefs cimport attr_t, hash_t, len_t
from ...strings cimport StringStore
+from ...typedefs cimport attr_t, hash_t, len_t
+
cdef extern from "" namespace "std" nogil:
void swap[T](T& a, T& b) except + # Only available in Cython 3.
diff --git a/spacy/pipeline/_edit_tree_internals/edit_trees.pyx b/spacy/pipeline/_edit_tree_internals/edit_trees.pyx
index 9d18c0334..daab0d204 100644
--- a/spacy/pipeline/_edit_tree_internals/edit_trees.pyx
+++ b/spacy/pipeline/_edit_tree_internals/edit_trees.pyx
@@ -1,7 +1,6 @@
# cython: infer_types=True, binding=True
from cython.operator cimport dereference as deref
-from libc.stdint cimport uint32_t
-from libc.stdint cimport UINT32_MAX
+from libc.stdint cimport UINT32_MAX, uint32_t
from libc.string cimport memset
from libcpp.pair cimport pair
from libcpp.vector cimport vector
@@ -15,7 +14,6 @@ from ...errors import Errors
from ...strings import StringStore
from .schemas import validate_edit_tree
-
NULL_TREE_ID = UINT32_MAX
cdef LCS find_lcs(str source, str target):
diff --git a/spacy/pipeline/_edit_tree_internals/schemas.py b/spacy/pipeline/_edit_tree_internals/schemas.py
index c01d0632e..1e307b66c 100644
--- a/spacy/pipeline/_edit_tree_internals/schemas.py
+++ b/spacy/pipeline/_edit_tree_internals/schemas.py
@@ -1,5 +1,6 @@
-from typing import Any, Dict, List, Union
from collections import defaultdict
+from typing import Any, Dict, List, Union
+
from pydantic import BaseModel, Field, ValidationError
from pydantic.types import StrictBool, StrictInt, StrictStr
diff --git a/spacy/pipeline/_parser_internals/_beam_utils.pxd b/spacy/pipeline/_parser_internals/_beam_utils.pxd
index de3573fbc..596306b23 100644
--- a/spacy/pipeline/_parser_internals/_beam_utils.pxd
+++ b/spacy/pipeline/_parser_internals/_beam_utils.pxd
@@ -1,5 +1,6 @@
from ...typedefs cimport class_t, hash_t
+
# These are passed as callbacks to thinc.search.Beam
cdef int transition_state(void* _dest, void* _src, class_t clas, void* _moves) except -1
diff --git a/spacy/pipeline/_parser_internals/_beam_utils.pyx b/spacy/pipeline/_parser_internals/_beam_utils.pyx
index fa7df2056..04dd3f11e 100644
--- a/spacy/pipeline/_parser_internals/_beam_utils.pyx
+++ b/spacy/pipeline/_parser_internals/_beam_utils.pyx
@@ -1,15 +1,21 @@
# cython: infer_types=True
# cython: profile=True
cimport numpy as np
+
import numpy
-from cpython.ref cimport PyObject, Py_XDECREF
+
+from cpython.ref cimport Py_XDECREF, PyObject
from thinc.extra.search cimport Beam
+
from thinc.extra.search import MaxViolation
+
from thinc.extra.search cimport MaxViolation
-from ...typedefs cimport hash_t, class_t
-from .transition_system cimport TransitionSystem, Transition
+from ...typedefs cimport class_t, hash_t
+from .transition_system cimport Transition, TransitionSystem
+
from ...errors import Errors
+
from .stateclass cimport StateC, StateClass
diff --git a/spacy/pipeline/_parser_internals/_state.pxd b/spacy/pipeline/_parser_internals/_state.pxd
index a1262bb61..24acc350c 100644
--- a/spacy/pipeline/_parser_internals/_state.pxd
+++ b/spacy/pipeline/_parser_internals/_state.pxd
@@ -1,19 +1,20 @@
-from cython.operator cimport dereference as deref, preincrement as incr
-from libc.string cimport memcpy, memset
-from libc.stdlib cimport calloc, free
-from libc.stdint cimport uint32_t, uint64_t
cimport libcpp
+from cpython.exc cimport PyErr_CheckSignals, PyErr_SetFromErrno
+from cython.operator cimport dereference as deref
+from cython.operator cimport preincrement as incr
+from libc.stdint cimport uint32_t, uint64_t
+from libc.stdlib cimport calloc, free
+from libc.string cimport memcpy, memset
+from libcpp.set cimport set
from libcpp.unordered_map cimport unordered_map
from libcpp.vector cimport vector
-from libcpp.set cimport set
-from cpython.exc cimport PyErr_CheckSignals, PyErr_SetFromErrno
from murmurhash.mrmr cimport hash64
-from ...vocab cimport EMPTY_LEXEME
-from ...structs cimport TokenC, SpanC
-from ...lexeme cimport Lexeme
from ...attrs cimport IS_SPACE
+from ...lexeme cimport Lexeme
+from ...structs cimport SpanC, TokenC
from ...typedefs cimport attr_t
+from ...vocab cimport EMPTY_LEXEME
cdef inline bint is_space_token(const TokenC* token) nogil:
diff --git a/spacy/pipeline/_parser_internals/arc_eager.pxd b/spacy/pipeline/_parser_internals/arc_eager.pxd
index b618bc587..2c17e7b26 100644
--- a/spacy/pipeline/_parser_internals/arc_eager.pxd
+++ b/spacy/pipeline/_parser_internals/arc_eager.pxd
@@ -1,5 +1,5 @@
+from ...typedefs cimport attr_t, weight_t
from ._state cimport StateC
-from ...typedefs cimport weight_t, attr_t
from .transition_system cimport Transition, TransitionSystem
diff --git a/spacy/pipeline/_parser_internals/arc_eager.pyx b/spacy/pipeline/_parser_internals/arc_eager.pyx
index 257b5ef8a..2c9eb0ff5 100644
--- a/spacy/pipeline/_parser_internals/arc_eager.pyx
+++ b/spacy/pipeline/_parser_internals/arc_eager.pyx
@@ -1,22 +1,27 @@
# cython: profile=True, cdivision=True, infer_types=True
-from cymem.cymem cimport Pool, Address
+from cymem.cymem cimport Address, Pool
from libc.stdint cimport int32_t
from libcpp.vector cimport vector
-from collections import defaultdict, Counter
+from collections import Counter, defaultdict
-from ...typedefs cimport hash_t, attr_t
from ...strings cimport hash_string
from ...structs cimport TokenC
from ...tokens.doc cimport Doc, set_children_from_heads
from ...tokens.token cimport MISSING_DEP
+from ...typedefs cimport attr_t, hash_t
+
from ...training import split_bilu_label
+
from ...training.example cimport Example
+from ._state cimport ArcC, StateC
from .stateclass cimport StateClass
-from ._state cimport StateC, ArcC
+
from ...errors import Errors
+
from thinc.extra.search cimport Beam
+
cdef weight_t MIN_SCORE = -90000
cdef attr_t SUBTOK_LABEL = hash_string('subtok')
diff --git a/spacy/pipeline/_parser_internals/ner.pyx b/spacy/pipeline/_parser_internals/ner.pyx
index fab872f00..e1edb4464 100644
--- a/spacy/pipeline/_parser_internals/ner.pyx
+++ b/spacy/pipeline/_parser_internals/ner.pyx
@@ -1,22 +1,28 @@
import os
import random
-from libc.stdint cimport int32_t
+
from cymem.cymem cimport Pool
+from libc.stdint cimport int32_t
from collections import Counter
+
from thinc.extra.search cimport Beam
from ...tokens.doc cimport Doc
+
from ...tokens.span import Span
-from ...tokens.span cimport Span
-from ...typedefs cimport weight_t, attr_t
-from ...lexeme cimport Lexeme
+
from ...attrs cimport IS_SPACE
-from ...structs cimport TokenC, SpanC
+from ...lexeme cimport Lexeme
+from ...structs cimport SpanC, TokenC
+from ...tokens.span cimport Span
+from ...typedefs cimport attr_t, weight_t
+
from ...training import split_bilu_label
+
from ...training.example cimport Example
-from .stateclass cimport StateClass
from ._state cimport StateC
+from .stateclass cimport StateClass
from .transition_system cimport Transition, do_func_t
from ...errors import Errors
diff --git a/spacy/pipeline/_parser_internals/nonproj.pxd b/spacy/pipeline/_parser_internals/nonproj.pxd
index aabdf7ebe..1a349d56a 100644
--- a/spacy/pipeline/_parser_internals/nonproj.pxd
+++ b/spacy/pipeline/_parser_internals/nonproj.pxd
@@ -1,4 +1,5 @@
from libcpp.string cimport string
+
cdef extern from "nonproj.hh":
cdef void raise_domain_error(const string& msg) nogil except +
diff --git a/spacy/pipeline/_parser_internals/nonproj.pyx b/spacy/pipeline/_parser_internals/nonproj.pyx
index d1b6e7066..66f423b3b 100644
--- a/spacy/pipeline/_parser_internals/nonproj.pyx
+++ b/spacy/pipeline/_parser_internals/nonproj.pyx
@@ -4,19 +4,20 @@ for doing pseudo-projective parsing implementation uses the HEAD decoration
scheme.
"""
from copy import copy
-from cython.operator cimport preincrement as incr, dereference as deref
+
+from cython.operator cimport dereference as deref
+from cython.operator cimport preincrement as incr
from libc.limits cimport INT_MAX
from libc.stdlib cimport abs
from libcpp cimport bool
from libcpp.string cimport string, to_string
-from libcpp.vector cimport vector
from libcpp.unordered_set cimport unordered_set
+from libcpp.vector cimport vector
from ...tokens.doc cimport Doc, set_children_from_heads
from ...errors import Errors
-
DELIMITER = '||'
diff --git a/spacy/pipeline/_parser_internals/stateclass.pxd b/spacy/pipeline/_parser_internals/stateclass.pxd
index 54ff344b9..b8ecc1bbf 100644
--- a/spacy/pipeline/_parser_internals/stateclass.pxd
+++ b/spacy/pipeline/_parser_internals/stateclass.pxd
@@ -1,9 +1,8 @@
from cymem.cymem cimport Pool
-from ...structs cimport TokenC, SpanC
-from ...typedefs cimport attr_t
+from ...structs cimport SpanC, TokenC
from ...tokens.doc cimport Doc
-
+from ...typedefs cimport attr_t
from ._state cimport StateC
diff --git a/spacy/pipeline/_parser_internals/stateclass.pyx b/spacy/pipeline/_parser_internals/stateclass.pyx
index 4eaddd997..0a2657af1 100644
--- a/spacy/pipeline/_parser_internals/stateclass.pyx
+++ b/spacy/pipeline/_parser_internals/stateclass.pyx
@@ -1,9 +1,10 @@
# cython: infer_types=True
import numpy
+
from libcpp.vector cimport vector
-from ._state cimport ArcC
from ...tokens.doc cimport Doc
+from ._state cimport ArcC
cdef class StateClass:
diff --git a/spacy/pipeline/_parser_internals/transition_system.pxd b/spacy/pipeline/_parser_internals/transition_system.pxd
index 52ebd2b8e..ce17480d4 100644
--- a/spacy/pipeline/_parser_internals/transition_system.pxd
+++ b/spacy/pipeline/_parser_internals/transition_system.pxd
@@ -1,11 +1,11 @@
from cymem.cymem cimport Pool
-from ...typedefs cimport attr_t, weight_t
-from ...structs cimport TokenC
from ...strings cimport StringStore
+from ...structs cimport TokenC
from ...training.example cimport Example
-from .stateclass cimport StateClass
+from ...typedefs cimport attr_t, weight_t
from ._state cimport StateC
+from .stateclass cimport StateClass
cdef struct Transition:
diff --git a/spacy/pipeline/_parser_internals/transition_system.pyx b/spacy/pipeline/_parser_internals/transition_system.pyx
index 18eb745a9..053c87f22 100644
--- a/spacy/pipeline/_parser_internals/transition_system.pyx
+++ b/spacy/pipeline/_parser_internals/transition_system.pyx
@@ -1,18 +1,20 @@
# cython: infer_types=True
from __future__ import print_function
+
from cymem.cymem cimport Pool
from collections import Counter
+
import srsly
-from . cimport _beam_utils
-from ...typedefs cimport weight_t, attr_t
-from ...tokens.doc cimport Doc
from ...structs cimport TokenC
+from ...tokens.doc cimport Doc
+from ...typedefs cimport attr_t, weight_t
+from . cimport _beam_utils
from .stateclass cimport StateClass
-from ...errors import Errors
from ... import util
+from ...errors import Errors
cdef weight_t MIN_SCORE = -90000
diff --git a/spacy/pipeline/attributeruler.py b/spacy/pipeline/attributeruler.py
index 0d9494865..8ac74d92b 100644
--- a/spacy/pipeline/attributeruler.py
+++ b/spacy/pipeline/attributeruler.py
@@ -1,21 +1,20 @@
-from typing import List, Dict, Union, Iterable, Any, Optional, Callable
-from typing import Tuple
-import srsly
from pathlib import Path
+from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Union
-from .pipe import Pipe
+import srsly
+
+from .. import util
from ..errors import Errors
-from ..training import Example
from ..language import Language
from ..matcher import Matcher
from ..scorer import Scorer
from ..symbols import IDS
from ..tokens import Doc, Span
from ..tokens._retokenize import normalize_token_attrs, set_token_attrs
-from ..vocab import Vocab
+from ..training import Example
from ..util import SimpleFrozenList, registry
-from .. import util
-
+from ..vocab import Vocab
+from .pipe import Pipe
MatcherPatternType = List[Dict[Union[int, str], Any]]
AttributeRulerPatternType = Dict[str, Union[MatcherPatternType, Dict, int]]
diff --git a/spacy/pipeline/dep_parser.pyx b/spacy/pipeline/dep_parser.pyx
index e5f686158..cb896c385 100644
--- a/spacy/pipeline/dep_parser.pyx
+++ b/spacy/pipeline/dep_parser.pyx
@@ -1,20 +1,21 @@
# cython: infer_types=True, profile=True, binding=True
from collections import defaultdict
-from typing import Optional, Iterable, Callable
-from thinc.api import Model, Config
+from typing import Callable, Iterable, Optional
+
+from thinc.api import Config, Model
from ._parser_internals.transition_system import TransitionSystem
-from .transition_parser cimport Parser
-from ._parser_internals.arc_eager cimport ArcEager
-from .functions import merge_subtokens
+from ._parser_internals.arc_eager cimport ArcEager
+from .transition_parser cimport Parser
+
from ..language import Language
-from ._parser_internals import nonproj
-from ._parser_internals.nonproj import DELIMITER
from ..scorer import Scorer
from ..training import remove_bilu_prefix
from ..util import registry
-
+from ._parser_internals import nonproj
+from ._parser_internals.nonproj import DELIMITER
+from .functions import merge_subtokens
default_model_config = """
[model]
diff --git a/spacy/pipeline/edit_tree_lemmatizer.py b/spacy/pipeline/edit_tree_lemmatizer.py
index 332badd8c..4a6174bc3 100644
--- a/spacy/pipeline/edit_tree_lemmatizer.py
+++ b/spacy/pipeline/edit_tree_lemmatizer.py
@@ -1,24 +1,22 @@
-from typing import cast, Any, Callable, Dict, Iterable, List, Optional
-from typing import Tuple
from collections import Counter
from itertools import islice
-import numpy as np
+from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, cast
+import numpy as np
import srsly
-from thinc.api import Config, Model, SequenceCategoricalCrossentropy, NumpyOps
+from thinc.api import Config, Model, NumpyOps, SequenceCategoricalCrossentropy
from thinc.types import Floats2d, Ints2d
-from ._edit_tree_internals.edit_trees import EditTrees
-from ._edit_tree_internals.schemas import validate_edit_tree
-from .lemmatizer import lemmatizer_score
-from .trainable_pipe import TrainablePipe
+from .. import util
from ..errors import Errors
from ..language import Language
from ..tokens import Doc
from ..training import Example, validate_examples, validate_get_examples
from ..vocab import Vocab
-from .. import util
-
+from ._edit_tree_internals.edit_trees import EditTrees
+from ._edit_tree_internals.schemas import validate_edit_tree
+from .lemmatizer import lemmatizer_score
+from .trainable_pipe import TrainablePipe
# The cutoff value of *top_k* above which an alternative method is used to process guesses.
TOP_K_GUARDRAIL = 20
diff --git a/spacy/pipeline/entity_linker.py b/spacy/pipeline/entity_linker.py
index 76ccc3247..a730ece1b 100644
--- a/spacy/pipeline/entity_linker.py
+++ b/spacy/pipeline/entity_linker.py
@@ -1,25 +1,25 @@
-from typing import Optional, Iterable, Callable, Dict, Union, List, Any
-from thinc.types import Floats2d
-from pathlib import Path
-from itertools import islice
-import srsly
import random
-from thinc.api import CosineDistance, Model, Optimizer, Config
-from thinc.api import set_dropout_rate
+from itertools import islice
+from pathlib import Path
+from typing import Any, Callable, Dict, Iterable, List, Optional, Union
+
+import srsly
+from thinc.api import Config, CosineDistance, Model, Optimizer, set_dropout_rate
+from thinc.types import Floats2d
-from ..kb import KnowledgeBase, Candidate
-from ..ml import empty_kb
-from ..tokens import Doc, Span
-from .pipe import deserialize_config
-from .legacy.entity_linker import EntityLinker_v1
-from .trainable_pipe import TrainablePipe
-from ..language import Language
-from ..vocab import Vocab
-from ..training import Example, validate_examples, validate_get_examples
-from ..errors import Errors
-from ..util import SimpleFrozenList, registry
from .. import util
+from ..errors import Errors
+from ..kb import Candidate, KnowledgeBase
+from ..language import Language
+from ..ml import empty_kb
from ..scorer import Scorer
+from ..tokens import Doc, Span
+from ..training import Example, validate_examples, validate_get_examples
+from ..util import SimpleFrozenList, registry
+from ..vocab import Vocab
+from .legacy.entity_linker import EntityLinker_v1
+from .pipe import deserialize_config
+from .trainable_pipe import TrainablePipe
# See #9050
BACKWARD_OVERWRITE = True
diff --git a/spacy/pipeline/entityruler.py b/spacy/pipeline/entityruler.py
index 6a3755533..3683cfc02 100644
--- a/spacy/pipeline/entityruler.py
+++ b/spacy/pipeline/entityruler.py
@@ -1,19 +1,19 @@
-from typing import Optional, Union, List, Dict, Tuple, Iterable, Any, Callable, Sequence
import warnings
from collections import defaultdict
from pathlib import Path
+from typing import Any, Callable, Dict, Iterable, List, Optional, Sequence, Tuple, Union
+
import srsly
-from .pipe import Pipe
-from ..training import Example
-from ..language import Language
from ..errors import Errors, Warnings
-from ..util import ensure_path, to_disk, from_disk, SimpleFrozenList, registry
-from ..tokens import Doc, Span
+from ..language import Language
from ..matcher import Matcher, PhraseMatcher
from ..matcher.levenshtein import levenshtein_compare
from ..scorer import get_ner_prf
-
+from ..tokens import Doc, Span
+from ..training import Example
+from ..util import SimpleFrozenList, ensure_path, from_disk, registry, to_disk
+from .pipe import Pipe
DEFAULT_ENT_ID_SEP = "||"
PatternType = Dict[str, Union[str, List[Dict[str, Any]]]]
diff --git a/spacy/pipeline/functions.py b/spacy/pipeline/functions.py
index c005395bf..2bf0437d5 100644
--- a/spacy/pipeline/functions.py
+++ b/spacy/pipeline/functions.py
@@ -1,12 +1,13 @@
-from typing import Dict, Any
-import srsly
import warnings
+from typing import Any, Dict
+import srsly
+
+from .. import util
from ..errors import Warnings
from ..language import Language
from ..matcher import Matcher
from ..tokens import Doc
-from .. import util
@Language.component(
diff --git a/spacy/pipeline/legacy/entity_linker.py b/spacy/pipeline/legacy/entity_linker.py
index c14dfa1db..1e46db019 100644
--- a/spacy/pipeline/legacy/entity_linker.py
+++ b/spacy/pipeline/legacy/entity_linker.py
@@ -1,28 +1,28 @@
# This file is present to provide a prior version of the EntityLinker component
# for backwards compatability. For details see #9669.
-from typing import Optional, Iterable, Callable, Dict, Union, List, Any
-from thinc.types import Floats2d
-from pathlib import Path
-from itertools import islice
-import srsly
import random
-from thinc.api import CosineDistance, Model, Optimizer
-from thinc.api import set_dropout_rate
import warnings
+from itertools import islice
+from pathlib import Path
+from typing import Any, Callable, Dict, Iterable, List, Optional, Union
-from ...kb import KnowledgeBase, Candidate
+import srsly
+from thinc.api import CosineDistance, Model, Optimizer, set_dropout_rate
+from thinc.types import Floats2d
+
+from ... import util
+from ...errors import Errors, Warnings
+from ...kb import Candidate, KnowledgeBase
+from ...language import Language
from ...ml import empty_kb
+from ...scorer import Scorer
from ...tokens import Doc, Span
+from ...training import Example, validate_examples, validate_get_examples
+from ...util import SimpleFrozenList
+from ...vocab import Vocab
from ..pipe import deserialize_config
from ..trainable_pipe import TrainablePipe
-from ...language import Language
-from ...vocab import Vocab
-from ...training import Example, validate_examples, validate_get_examples
-from ...errors import Errors, Warnings
-from ...util import SimpleFrozenList
-from ... import util
-from ...scorer import Scorer
# See #9050
BACKWARD_OVERWRITE = True
diff --git a/spacy/pipeline/lemmatizer.py b/spacy/pipeline/lemmatizer.py
index 9c2fc2f09..09e501595 100644
--- a/spacy/pipeline/lemmatizer.py
+++ b/spacy/pipeline/lemmatizer.py
@@ -1,19 +1,19 @@
-from typing import Optional, List, Dict, Any, Callable, Iterable, Union, Tuple
-from thinc.api import Model
-from pathlib import Path
-
import warnings
+from pathlib import Path
+from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Union
-from .pipe import Pipe
+from thinc.api import Model
+
+from .. import util
from ..errors import Errors, Warnings
from ..language import Language
-from ..training import Example
from ..lookups import Lookups, load_lookups
from ..scorer import Scorer
from ..tokens import Doc, Token
+from ..training import Example
+from ..util import SimpleFrozenList, logger, registry
from ..vocab import Vocab
-from ..util import logger, SimpleFrozenList, registry
-from .. import util
+from .pipe import Pipe
@Language.factory(
diff --git a/spacy/pipeline/morphologizer.pyx b/spacy/pipeline/morphologizer.pyx
index be8f82212..4ca0ce165 100644
--- a/spacy/pipeline/morphologizer.pyx
+++ b/spacy/pipeline/morphologizer.pyx
@@ -1,23 +1,24 @@
# cython: infer_types=True, profile=True, binding=True
-from typing import Optional, Union, Dict, Callable
-import srsly
-from thinc.api import SequenceCategoricalCrossentropy, Model, Config
from itertools import islice
+from typing import Callable, Dict, Optional, Union
+import srsly
+from thinc.api import Config, Model, SequenceCategoricalCrossentropy
+
+from ..morphology cimport Morphology
from ..tokens.doc cimport Doc
from ..vocab cimport Vocab
-from ..morphology cimport Morphology
-from ..parts_of_speech import IDS as POS_IDS
-from ..symbols import POS
-from ..language import Language
-from ..errors import Errors
-from .pipe import deserialize_config
-from .tagger import Tagger
from .. import util
+from ..errors import Errors
+from ..language import Language
+from ..parts_of_speech import IDS as POS_IDS
from ..scorer import Scorer
+from ..symbols import POS
from ..training import validate_examples, validate_get_examples
from ..util import registry
+from .pipe import deserialize_config
+from .tagger import Tagger
# See #9050
BACKWARD_OVERWRITE = True
diff --git a/spacy/pipeline/multitask.pyx b/spacy/pipeline/multitask.pyx
index 8c44061e2..6b62c0811 100644
--- a/spacy/pipeline/multitask.pyx
+++ b/spacy/pipeline/multitask.pyx
@@ -1,19 +1,18 @@
# cython: infer_types=True, profile=True, binding=True
from typing import Optional
+
import numpy
-from thinc.api import CosineDistance, to_categorical, Model, Config
-from thinc.api import set_dropout_rate
+from thinc.api import Config, CosineDistance, Model, set_dropout_rate, to_categorical
from ..tokens.doc cimport Doc
-from .trainable_pipe import TrainablePipe
-from .tagger import Tagger
-from ..training import validate_examples
-from ..language import Language
-from ._parser_internals import nonproj
-from ..attrs import POS, ID
+from ..attrs import ID, POS
from ..errors import Errors
-
+from ..language import Language
+from ..training import validate_examples
+from ._parser_internals import nonproj
+from .tagger import Tagger
+from .trainable_pipe import TrainablePipe
default_model_config = """
[model]
diff --git a/spacy/pipeline/ner.pyx b/spacy/pipeline/ner.pyx
index 25f48c9f8..8dd6c3c43 100644
--- a/spacy/pipeline/ner.pyx
+++ b/spacy/pipeline/ner.pyx
@@ -1,16 +1,18 @@
# cython: infer_types=True, profile=True, binding=True
from collections import defaultdict
-from typing import Optional, Iterable, Callable
-from thinc.api import Model, Config
+from typing import Callable, Iterable, Optional
+
+from thinc.api import Config, Model
from ._parser_internals.transition_system import TransitionSystem
-from .transition_parser cimport Parser
-from ._parser_internals.ner cimport BiluoPushDown
-from ..language import Language
-from ..scorer import get_ner_prf, PRFScore
-from ..util import registry
-from ..training import remove_bilu_prefix
+from ._parser_internals.ner cimport BiluoPushDown
+from .transition_parser cimport Parser
+
+from ..language import Language
+from ..scorer import PRFScore, get_ner_prf
+from ..training import remove_bilu_prefix
+from ..util import registry
default_model_config = """
[model]
diff --git a/spacy/pipeline/pipe.pyi b/spacy/pipeline/pipe.pyi
index 9dd6a9d50..9a1c11cef 100644
--- a/spacy/pipeline/pipe.pyi
+++ b/spacy/pipeline/pipe.pyi
@@ -1,11 +1,20 @@
from pathlib import Path
-from typing import Any, Callable, Dict, Iterable, Iterator, List
-from typing import NoReturn, Optional, Tuple, Union
+from typing import (
+ Any,
+ Callable,
+ Dict,
+ Iterable,
+ Iterator,
+ List,
+ NoReturn,
+ Optional,
+ Tuple,
+ Union,
+)
-from ..tokens.doc import Doc
-
-from ..training import Example
from ..language import Language
+from ..tokens.doc import Doc
+from ..training import Example
class Pipe:
def __call__(self, doc: Doc) -> Doc: ...
diff --git a/spacy/pipeline/pipe.pyx b/spacy/pipeline/pipe.pyx
index 8407acc45..42f518882 100644
--- a/spacy/pipeline/pipe.pyx
+++ b/spacy/pipeline/pipe.pyx
@@ -1,15 +1,17 @@
# cython: infer_types=True, profile=True, binding=True
-from typing import Optional, Tuple, Iterable, Iterator, Callable, Union, Dict
-import srsly
import warnings
+from typing import Callable, Dict, Iterable, Iterator, Optional, Tuple, Union
+
+import srsly
from ..tokens.doc cimport Doc
-from ..training import Example
from ..errors import Errors, Warnings
from ..language import Language
+from ..training import Example
from ..util import raise_error
+
cdef class Pipe:
"""This class is a base class and not instantiated directly. It provides
an interface for pipeline components to implement.
diff --git a/spacy/pipeline/sentencizer.pyx b/spacy/pipeline/sentencizer.pyx
index 77f4e8adb..2fe7e1540 100644
--- a/spacy/pipeline/sentencizer.pyx
+++ b/spacy/pipeline/sentencizer.pyx
@@ -1,14 +1,15 @@
# cython: infer_types=True, profile=True, binding=True
-from typing import Optional, List, Callable
+from typing import Callable, List, Optional
+
import srsly
from ..tokens.doc cimport Doc
-from .pipe import Pipe
-from .senter import senter_score
+from .. import util
from ..language import Language
from ..scorer import Scorer
-from .. import util
+from .pipe import Pipe
+from .senter import senter_score
# see #9050
BACKWARD_OVERWRITE = False
diff --git a/spacy/pipeline/senter.pyx b/spacy/pipeline/senter.pyx
index 6808fe70e..26f98ba59 100644
--- a/spacy/pipeline/senter.pyx
+++ b/spacy/pipeline/senter.pyx
@@ -1,19 +1,19 @@
# cython: infer_types=True, profile=True, binding=True
-from typing import Optional, Callable
from itertools import islice
+from typing import Callable, Optional
import srsly
-from thinc.api import Model, SequenceCategoricalCrossentropy, Config
+from thinc.api import Config, Model, SequenceCategoricalCrossentropy
from ..tokens.doc cimport Doc
-from .tagger import Tagger
-from ..language import Language
+from .. import util
from ..errors import Errors
+from ..language import Language
from ..scorer import Scorer
from ..training import validate_examples, validate_get_examples
from ..util import registry
-from .. import util
+from .tagger import Tagger
# See #9050
BACKWARD_OVERWRITE = False
diff --git a/spacy/pipeline/span_finder.py b/spacy/pipeline/span_finder.py
index da3c38430..91be2f2ae 100644
--- a/spacy/pipeline/span_finder.py
+++ b/spacy/pipeline/span_finder.py
@@ -3,15 +3,14 @@ from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple
from thinc.api import Config, Model, Optimizer, set_dropout_rate
from thinc.types import Floats2d
+from ..errors import Errors
from ..language import Language
-from .trainable_pipe import TrainablePipe
from ..scorer import Scorer
from ..tokens import Doc, Span
from ..training import Example
-from ..errors import Errors
-
from ..util import registry
from .spancat import DEFAULT_SPANS_KEY
+from .trainable_pipe import TrainablePipe
span_finder_default_config = """
[model]
diff --git a/spacy/pipeline/span_ruler.py b/spacy/pipeline/span_ruler.py
index b0669c0ef..2a5e2179a 100644
--- a/spacy/pipeline/span_ruler.py
+++ b/spacy/pipeline/span_ruler.py
@@ -1,20 +1,32 @@
-from typing import Optional, Union, List, Dict, Tuple, Iterable, Any, Callable
-from typing import Sequence, Set, cast
import warnings
from functools import partial
from pathlib import Path
+from typing import (
+ Any,
+ Callable,
+ Dict,
+ Iterable,
+ List,
+ Optional,
+ Sequence,
+ Set,
+ Tuple,
+ Union,
+ cast,
+)
+
import srsly
-from .pipe import Pipe
-from ..training import Example
-from ..language import Language
+from .. import util
from ..errors import Errors, Warnings
-from ..util import ensure_path, SimpleFrozenList, registry
-from ..tokens import Doc, Span
-from ..scorer import Scorer
+from ..language import Language
from ..matcher import Matcher, PhraseMatcher
from ..matcher.levenshtein import levenshtein_compare
-from .. import util
+from ..scorer import Scorer
+from ..tokens import Doc, Span
+from ..training import Example
+from ..util import SimpleFrozenList, ensure_path, registry
+from .pipe import Pipe
PatternType = Dict[str, Union[str, List[Dict[str, Any]]]]
DEFAULT_SPANS_KEY = "ruler"
diff --git a/spacy/pipeline/tagger.pyx b/spacy/pipeline/tagger.pyx
index 4d5d78035..47aae2bb7 100644
--- a/spacy/pipeline/tagger.pyx
+++ b/spacy/pipeline/tagger.pyx
@@ -1,26 +1,27 @@
# cython: infer_types=True, profile=True, binding=True
-from typing import Callable, Optional
-import numpy
-import srsly
-from thinc.api import Model, set_dropout_rate, SequenceCategoricalCrossentropy, Config
-from thinc.types import Floats2d
import warnings
from itertools import islice
+from typing import Callable, Optional
+
+import numpy
+import srsly
+from thinc.api import Config, Model, SequenceCategoricalCrossentropy, set_dropout_rate
+from thinc.types import Floats2d
-from ..tokens.doc cimport Doc
from ..morphology cimport Morphology
+from ..tokens.doc cimport Doc
from ..vocab cimport Vocab
-from .trainable_pipe import TrainablePipe
-from .pipe import deserialize_config
-from ..language import Language
-from ..attrs import POS, ID
-from ..parts_of_speech import X
+from .. import util
+from ..attrs import ID, POS
from ..errors import Errors, Warnings
+from ..language import Language
+from ..parts_of_speech import X
from ..scorer import Scorer
from ..training import validate_examples, validate_get_examples
from ..util import registry
-from .. import util
+from .pipe import deserialize_config
+from .trainable_pipe import TrainablePipe
# See #9050
BACKWARD_OVERWRITE = False
diff --git a/spacy/pipeline/textcat.py b/spacy/pipeline/textcat.py
index 650a01949..610ed99b6 100644
--- a/spacy/pipeline/textcat.py
+++ b/spacy/pipeline/textcat.py
@@ -1,18 +1,18 @@
-from typing import Iterable, Tuple, Optional, Dict, List, Callable, Any
-from thinc.api import get_array_module, Model, Optimizer, set_dropout_rate, Config
-from thinc.types import Floats2d
-import numpy
from itertools import islice
+from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple
+
+import numpy
+from thinc.api import Config, Model, Optimizer, get_array_module, set_dropout_rate
+from thinc.types import Floats2d
-from .trainable_pipe import TrainablePipe
-from ..language import Language
-from ..training import Example, validate_examples, validate_get_examples
from ..errors import Errors
+from ..language import Language
from ..scorer import Scorer
from ..tokens import Doc
+from ..training import Example, validate_examples, validate_get_examples
from ..util import registry
from ..vocab import Vocab
-
+from .trainable_pipe import TrainablePipe
single_label_default_config = """
[model]
diff --git a/spacy/pipeline/textcat_multilabel.py b/spacy/pipeline/textcat_multilabel.py
index 41c0e2f63..364e6f436 100644
--- a/spacy/pipeline/textcat_multilabel.py
+++ b/spacy/pipeline/textcat_multilabel.py
@@ -1,19 +1,18 @@
-from typing import Iterable, Optional, Dict, List, Callable, Any
-from thinc.types import Floats2d
-from thinc.api import Model, Config
-
from itertools import islice
+from typing import Any, Callable, Dict, Iterable, List, Optional
+
+from thinc.api import Config, Model
+from thinc.types import Floats2d
-from ..language import Language
-from ..training import Example, validate_get_examples
from ..errors import Errors
+from ..language import Language
from ..scorer import Scorer
from ..tokens import Doc
+from ..training import Example, validate_get_examples
from ..util import registry
from ..vocab import Vocab
from .textcat import TextCategorizer
-
multi_label_default_config = """
[model]
@architectures = "spacy.TextCatEnsemble.v2"
diff --git a/spacy/pipeline/tok2vec.py b/spacy/pipeline/tok2vec.py
index c742aaeaa..677f5eec1 100644
--- a/spacy/pipeline/tok2vec.py
+++ b/spacy/pipeline/tok2vec.py
@@ -1,13 +1,14 @@
-from typing import Sequence, Iterable, Optional, Dict, Callable, List, Any
-from thinc.api import Model, set_dropout_rate, Optimizer, Config
from itertools import islice
+from typing import Any, Callable, Dict, Iterable, List, Optional, Sequence
+
+from thinc.api import Config, Model, Optimizer, set_dropout_rate
-from .trainable_pipe import TrainablePipe
-from ..training import Example, validate_examples, validate_get_examples
-from ..tokens import Doc
-from ..vocab import Vocab
-from ..language import Language
from ..errors import Errors
+from ..language import Language
+from ..tokens import Doc
+from ..training import Example, validate_examples, validate_get_examples
+from ..vocab import Vocab
+from .trainable_pipe import TrainablePipe
default_model_config = """
[model]
diff --git a/spacy/pipeline/trainable_pipe.pxd b/spacy/pipeline/trainable_pipe.pxd
index 65daa8b22..b1d2550a1 100644
--- a/spacy/pipeline/trainable_pipe.pxd
+++ b/spacy/pipeline/trainable_pipe.pxd
@@ -1,5 +1,6 @@
-from .pipe cimport Pipe
from ..vocab cimport Vocab
+from .pipe cimport Pipe
+
cdef class TrainablePipe(Pipe):
cdef public Vocab vocab
diff --git a/spacy/pipeline/trainable_pipe.pyx b/spacy/pipeline/trainable_pipe.pyx
index 3f0507d4b..7aa91ac16 100644
--- a/spacy/pipeline/trainable_pipe.pyx
+++ b/spacy/pipeline/trainable_pipe.pyx
@@ -1,17 +1,17 @@
# cython: infer_types=True, profile=True, binding=True
-from typing import Iterable, Iterator, Optional, Dict, Tuple, Callable
+from typing import Callable, Dict, Iterable, Iterator, Optional, Tuple
+
import srsly
-from thinc.api import set_dropout_rate, Model, Optimizer
+from thinc.api import Model, Optimizer, set_dropout_rate
from ..tokens.doc cimport Doc
-from ..training import validate_examples
-from ..errors import Errors
-from .pipe import Pipe, deserialize_config
from .. import util
-from ..vocab import Vocab
+from ..errors import Errors
from ..language import Language
-from ..training import Example
+from ..training import Example, validate_examples
+from ..vocab import Vocab
+from .pipe import Pipe, deserialize_config
cdef class TrainablePipe(Pipe):
diff --git a/spacy/pipeline/transition_parser.pxd b/spacy/pipeline/transition_parser.pxd
index 1521fde60..e5e88d521 100644
--- a/spacy/pipeline/transition_parser.pxd
+++ b/spacy/pipeline/transition_parser.pxd
@@ -1,11 +1,11 @@
from cymem.cymem cimport Pool
from thinc.backends.cblas cimport CBlas
+from ..ml.parser_model cimport ActivationsC, SizesC, WeightsC
from ..vocab cimport Vocab
-from .trainable_pipe cimport TrainablePipe
-from ._parser_internals.transition_system cimport Transition, TransitionSystem
from ._parser_internals._state cimport StateC
-from ..ml.parser_model cimport WeightsC, ActivationsC, SizesC
+from ._parser_internals.transition_system cimport Transition, TransitionSystem
+from .trainable_pipe cimport TrainablePipe
cdef class Parser(TrainablePipe):
diff --git a/spacy/pipeline/transition_parser.pyx b/spacy/pipeline/transition_parser.pyx
index 1327db2ce..ef4d9b362 100644
--- a/spacy/pipeline/transition_parser.pyx
+++ b/spacy/pipeline/transition_parser.pyx
@@ -1,34 +1,50 @@
# cython: infer_types=True, cdivision=True, boundscheck=False, binding=True
from __future__ import print_function
-from cymem.cymem cimport Pool
+
cimport numpy as np
+from cymem.cymem cimport Pool
+
from itertools import islice
-from libcpp.vector cimport vector
-from libc.string cimport memset, memcpy
+
from libc.stdlib cimport calloc, free
+from libc.string cimport memcpy, memset
+from libcpp.vector cimport vector
+
import random
import srsly
-from thinc.api import get_ops, set_dropout_rate, CupyOps, NumpyOps
+from thinc.api import CupyOps, NumpyOps, get_ops, set_dropout_rate
+
from thinc.extra.search cimport Beam
-import numpy.random
-import numpy
+
import warnings
-from ._parser_internals.stateclass cimport StateClass
-from ..ml.parser_model cimport alloc_activations, free_activations
-from ..ml.parser_model cimport predict_states, arg_max_if_valid
-from ..ml.parser_model cimport WeightsC, ActivationsC, SizesC, cpu_log_loss
-from ..ml.parser_model cimport get_c_weights, get_c_sizes
+import numpy
+import numpy.random
+
+from ..ml.parser_model cimport (
+ ActivationsC,
+ SizesC,
+ WeightsC,
+ alloc_activations,
+ arg_max_if_valid,
+ cpu_log_loss,
+ free_activations,
+ get_c_sizes,
+ get_c_weights,
+ predict_states,
+)
from ..tokens.doc cimport Doc
+from ._parser_internals.stateclass cimport StateClass
+
from .trainable_pipe import TrainablePipe
+
from ._parser_internals cimport _beam_utils
-from ._parser_internals import _beam_utils
-from ..training import validate_examples, validate_get_examples
-from ..errors import Errors, Warnings
from .. import util
-
+from ..errors import Errors, Warnings
+from ..training import validate_examples, validate_get_examples
+from ._parser_internals import _beam_utils
NUMPY_OPS = NumpyOps()
diff --git a/spacy/schemas.py b/spacy/schemas.py
index 140592dcd..22f45372c 100644
--- a/spacy/schemas.py
+++ b/spacy/schemas.py
@@ -1,17 +1,39 @@
-from typing import Dict, List, Union, Optional, Any, Callable, Type, Tuple
-from typing import Iterable, TypeVar, TYPE_CHECKING
-from .compat import Literal
-from enum import Enum
-from pydantic import BaseModel, Field, ValidationError, validator, create_model
-from pydantic import StrictStr, StrictInt, StrictFloat, StrictBool, ConstrainedStr
-from pydantic.main import ModelMetaclass
-from thinc.api import Optimizer, ConfigValidationError, Model
-from thinc.config import Promise
-from collections import defaultdict
import inspect
import re
+from collections import defaultdict
+from enum import Enum
+from typing import (
+ TYPE_CHECKING,
+ Any,
+ Callable,
+ Dict,
+ Iterable,
+ List,
+ Optional,
+ Tuple,
+ Type,
+ TypeVar,
+ Union,
+)
+
+from pydantic import (
+ BaseModel,
+ ConstrainedStr,
+ Field,
+ StrictBool,
+ StrictFloat,
+ StrictInt,
+ StrictStr,
+ ValidationError,
+ create_model,
+ validator,
+)
+from pydantic.main import ModelMetaclass
+from thinc.api import ConfigValidationError, Model, Optimizer
+from thinc.config import Promise
from .attrs import NAMES
+from .compat import Literal
from .lookups import Lookups
from .util import is_cython_func
diff --git a/spacy/scorer.py b/spacy/scorer.py
index 86cd00a50..48d9f03ab 100644
--- a/spacy/scorer.py
+++ b/spacy/scorer.py
@@ -1,13 +1,23 @@
-from typing import Optional, Iterable, Dict, Set, List, Any, Callable, Tuple
-from typing import TYPE_CHECKING
-import numpy as np
from collections import defaultdict
+from typing import (
+ TYPE_CHECKING,
+ Any,
+ Callable,
+ Dict,
+ Iterable,
+ List,
+ Optional,
+ Set,
+ Tuple,
+)
+
+import numpy as np
-from .training import Example
-from .tokens import Token, Doc, Span
from .errors import Errors
-from .util import get_lang_class, SimpleFrozenList
from .morphology import Morphology
+from .tokens import Doc, Span, Token
+from .training import Example
+from .util import SimpleFrozenList, get_lang_class
if TYPE_CHECKING:
# This lets us add type hints for mypy etc. without causing circular imports
diff --git a/spacy/strings.pxd b/spacy/strings.pxd
index 5f03a9a28..d22f48ba1 100644
--- a/spacy/strings.pxd
+++ b/spacy/strings.pxd
@@ -1,9 +1,9 @@
-from libc.stdint cimport int64_t
-from libcpp.vector cimport vector
-from libcpp.set cimport set
from cymem.cymem cimport Pool
-from preshed.maps cimport PreshMap
+from libc.stdint cimport int64_t
+from libcpp.set cimport set
+from libcpp.vector cimport vector
from murmurhash.mrmr cimport hash64
+from preshed.maps cimport PreshMap
from .typedefs cimport attr_t, hash_t
diff --git a/spacy/strings.pyi b/spacy/strings.pyi
index b29389b9a..f8fe8381c 100644
--- a/spacy/strings.pyi
+++ b/spacy/strings.pyi
@@ -1,5 +1,5 @@
-from typing import Optional, Iterable, Iterator, Union, Any, overload
from pathlib import Path
+from typing import Any, Iterable, Iterator, Optional, Union, overload
def get_string_id(key: Union[str, int]) -> int: ...
diff --git a/spacy/strings.pyx b/spacy/strings.pyx
index c5f218342..16c3e2b5b 100644
--- a/spacy/strings.pyx
+++ b/spacy/strings.pyx
@@ -1,18 +1,19 @@
# cython: infer_types=True
cimport cython
+from libc.stdint cimport uint32_t
from libc.string cimport memcpy
from libcpp.set cimport set
-from libc.stdint cimport uint32_t
-from murmurhash.mrmr cimport hash64, hash32
+from murmurhash.mrmr cimport hash32, hash64
import srsly
from .typedefs cimport hash_t
+from . import util
+from .errors import Errors
from .symbols import IDS as SYMBOLS_BY_STR
from .symbols import NAMES as SYMBOLS_BY_INT
-from .errors import Errors
-from . import util
+
# Not particularly elegant, but this is faster than `isinstance(key, numbers.Integral)`
cdef inline bint _try_coerce_to_hash(object key, hash_t* out_hash):
diff --git a/spacy/structs.pxd b/spacy/structs.pxd
index 86d5b67ed..9efb068fd 100644
--- a/spacy/structs.pxd
+++ b/spacy/structs.pxd
@@ -1,11 +1,10 @@
-from libc.stdint cimport uint8_t, uint32_t, int32_t, uint64_t
-from libcpp.vector cimport vector
-from libcpp.unordered_set cimport unordered_set
+from libc.stdint cimport int32_t, int64_t, uint8_t, uint32_t, uint64_t
from libcpp.unordered_map cimport unordered_map
-from libc.stdint cimport int32_t, int64_t
+from libcpp.unordered_set cimport unordered_set
+from libcpp.vector cimport vector
-from .typedefs cimport flags_t, attr_t, hash_t
from .parts_of_speech cimport univ_pos_t
+from .typedefs cimport attr_t, flags_t, hash_t
cdef struct LexemeC:
diff --git a/spacy/tests/conftest.py b/spacy/tests/conftest.py
index 00b8f5f1c..4ca741dfc 100644
--- a/spacy/tests/conftest.py
+++ b/spacy/tests/conftest.py
@@ -1,7 +1,8 @@
import pytest
-from spacy.util import get_lang_class
from hypothesis import settings
+from spacy.util import get_lang_class
+
# Functionally disable deadline settings for tests
# to prevent spurious test failures in CI builds.
settings.register_profile("no_deadlines", deadline=2 * 60 * 1000) # in ms
diff --git a/spacy/tests/doc/test_add_entities.py b/spacy/tests/doc/test_add_entities.py
index 231b7c2a8..259b21fb3 100644
--- a/spacy/tests/doc/test_add_entities.py
+++ b/spacy/tests/doc/test_add_entities.py
@@ -1,10 +1,11 @@
-from spacy.pipeline.ner import DEFAULT_NER_MODEL
-from spacy.training import Example
-from spacy.pipeline import EntityRecognizer
-from spacy.tokens import Span, Doc
-from spacy import registry
import pytest
+from spacy import registry
+from spacy.pipeline import EntityRecognizer
+from spacy.pipeline.ner import DEFAULT_NER_MODEL
+from spacy.tokens import Doc, Span
+from spacy.training import Example
+
def _ner_example(ner):
doc = Doc(
diff --git a/spacy/tests/doc/test_array.py b/spacy/tests/doc/test_array.py
index 1f2d7d999..757655f55 100644
--- a/spacy/tests/doc/test_array.py
+++ b/spacy/tests/doc/test_array.py
@@ -1,8 +1,8 @@
import numpy
import pytest
+from spacy.attrs import DEP, MORPH, ORTH, POS, SHAPE
from spacy.tokens import Doc
-from spacy.attrs import ORTH, SHAPE, POS, DEP, MORPH
@pytest.mark.issue(2203)
diff --git a/spacy/tests/doc/test_creation.py b/spacy/tests/doc/test_creation.py
index 302a9b6ea..4bc1de3e0 100644
--- a/spacy/tests/doc/test_creation.py
+++ b/spacy/tests/doc/test_creation.py
@@ -1,7 +1,8 @@
import pytest
-from spacy.vocab import Vocab
-from spacy.tokens import Doc
+
from spacy import util
+from spacy.tokens import Doc
+from spacy.vocab import Vocab
@pytest.fixture
diff --git a/spacy/tests/doc/test_doc_api.py b/spacy/tests/doc/test_doc_api.py
index 38003dea9..73544c51a 100644
--- a/spacy/tests/doc/test_doc_api.py
+++ b/spacy/tests/doc/test_doc_api.py
@@ -1,13 +1,22 @@
+import warnings
import weakref
import numpy
-from numpy.testing import assert_array_equal
import pytest
-import warnings
+from numpy.testing import assert_array_equal
from thinc.api import NumpyOps, get_current_ops
-from spacy.attrs import DEP, ENT_IOB, ENT_TYPE, HEAD, IS_ALPHA, MORPH, POS
-from spacy.attrs import SENT_START, TAG
+from spacy.attrs import (
+ DEP,
+ ENT_IOB,
+ ENT_TYPE,
+ HEAD,
+ IS_ALPHA,
+ MORPH,
+ POS,
+ SENT_START,
+ TAG,
+)
from spacy.lang.en import English
from spacy.lang.xx import MultiLanguage
from spacy.language import Language
diff --git a/spacy/tests/doc/test_graph.py b/spacy/tests/doc/test_graph.py
index e464b0058..d14a5b057 100644
--- a/spacy/tests/doc/test_graph.py
+++ b/spacy/tests/doc/test_graph.py
@@ -1,6 +1,6 @@
-from spacy.vocab import Vocab
from spacy.tokens.doc import Doc
from spacy.tokens.graph import Graph
+from spacy.vocab import Vocab
def test_graph_init():
diff --git a/spacy/tests/doc/test_json_doc_conversion.py b/spacy/tests/doc/test_json_doc_conversion.py
index 11a1817e6..a76472d07 100644
--- a/spacy/tests/doc/test_json_doc_conversion.py
+++ b/spacy/tests/doc/test_json_doc_conversion.py
@@ -1,8 +1,10 @@
import pytest
+import srsly
+
import spacy
from spacy import schemas
from spacy.tokens import Doc, Span, Token
-import srsly
+
from .test_underscore import clean_underscore # noqa: F401
diff --git a/spacy/tests/doc/test_pickle_doc.py b/spacy/tests/doc/test_pickle_doc.py
index 28cb66714..2e28162d4 100644
--- a/spacy/tests/doc/test_pickle_doc.py
+++ b/spacy/tests/doc/test_pickle_doc.py
@@ -1,5 +1,5 @@
-from spacy.language import Language
from spacy.compat import pickle
+from spacy.language import Language
def test_pickle_single_doc():
diff --git a/spacy/tests/doc/test_retokenize_merge.py b/spacy/tests/doc/test_retokenize_merge.py
index 20c302da1..45d54346e 100644
--- a/spacy/tests/doc/test_retokenize_merge.py
+++ b/spacy/tests/doc/test_retokenize_merge.py
@@ -1,7 +1,8 @@
import pytest
+
from spacy.attrs import LEMMA
-from spacy.vocab import Vocab
from spacy.tokens import Doc, Token
+from spacy.vocab import Vocab
def test_doc_retokenize_merge(en_tokenizer):
diff --git a/spacy/tests/doc/test_retokenize_split.py b/spacy/tests/doc/test_retokenize_split.py
index ec4deb033..61ef599be 100644
--- a/spacy/tests/doc/test_retokenize_split.py
+++ b/spacy/tests/doc/test_retokenize_split.py
@@ -1,8 +1,8 @@
import numpy
import pytest
-from spacy.vocab import Vocab
from spacy.tokens import Doc, Token
+from spacy.vocab import Vocab
@pytest.mark.issue(3540)
diff --git a/spacy/tests/doc/test_span.py b/spacy/tests/doc/test_span.py
index a5c512dc0..04dde2bfa 100644
--- a/spacy/tests/doc/test_span.py
+++ b/spacy/tests/doc/test_span.py
@@ -1,13 +1,13 @@
-import pytest
import numpy
+import pytest
from numpy.testing import assert_array_equal
+from thinc.api import get_current_ops
-from spacy.attrs import ORTH, LENGTH
+from spacy.attrs import LENGTH, ORTH
from spacy.lang.en import English
from spacy.tokens import Doc, Span, Token
-from spacy.vocab import Vocab
from spacy.util import filter_spans
-from thinc.api import get_current_ops
+from spacy.vocab import Vocab
from ..util import add_vecs_to_vocab
from .test_underscore import clean_underscore # noqa: F401
diff --git a/spacy/tests/doc/test_span_group.py b/spacy/tests/doc/test_span_group.py
index cea2c42ee..ef78172bf 100644
--- a/spacy/tests/doc/test_span_group.py
+++ b/spacy/tests/doc/test_span_group.py
@@ -1,9 +1,10 @@
+from random import Random
from typing import List
import pytest
-from random import Random
+
from spacy.matcher import Matcher
-from spacy.tokens import Span, SpanGroup, Doc
+from spacy.tokens import Doc, Span, SpanGroup
from spacy.util import filter_spans
diff --git a/spacy/tests/doc/test_token_api.py b/spacy/tests/doc/test_token_api.py
index e715c5e85..782dfd774 100644
--- a/spacy/tests/doc/test_token_api.py
+++ b/spacy/tests/doc/test_token_api.py
@@ -1,10 +1,11 @@
-import pytest
import numpy
-from spacy.attrs import IS_ALPHA, IS_DIGIT, IS_LOWER, IS_PUNCT, IS_TITLE, IS_STOP
+import pytest
+
+from spacy.attrs import IS_ALPHA, IS_DIGIT, IS_LOWER, IS_PUNCT, IS_STOP, IS_TITLE
from spacy.symbols import VERB
-from spacy.vocab import Vocab
from spacy.tokens import Doc
from spacy.training import Example
+from spacy.vocab import Vocab
@pytest.fixture
diff --git a/spacy/tests/doc/test_underscore.py b/spacy/tests/doc/test_underscore.py
index b934221af..b79d2f01f 100644
--- a/spacy/tests/doc/test_underscore.py
+++ b/spacy/tests/doc/test_underscore.py
@@ -1,5 +1,6 @@
import pytest
from mock import Mock
+
from spacy.tokens import Doc, Span, Token
from spacy.tokens.underscore import Underscore
diff --git a/spacy/tests/lang/bn/test_tokenizer.py b/spacy/tests/lang/bn/test_tokenizer.py
index 5b18c5269..e9a4d5e54 100644
--- a/spacy/tests/lang/bn/test_tokenizer.py
+++ b/spacy/tests/lang/bn/test_tokenizer.py
@@ -1,6 +1,5 @@
import pytest
-
# fmt: off
TESTCASES = [
# Punctuation tests
diff --git a/spacy/tests/lang/da/test_noun_chunks.py b/spacy/tests/lang/da/test_noun_chunks.py
index 30df92c0b..b4d389e4b 100644
--- a/spacy/tests/lang/da/test_noun_chunks.py
+++ b/spacy/tests/lang/da/test_noun_chunks.py
@@ -1,4 +1,5 @@
import pytest
+
from spacy.tokens import Doc
diff --git a/spacy/tests/lang/da/test_text.py b/spacy/tests/lang/da/test_text.py
index 3c6cca5ac..e1f3b96e2 100644
--- a/spacy/tests/lang/da/test_text.py
+++ b/spacy/tests/lang/da/test_text.py
@@ -1,4 +1,5 @@
import pytest
+
from spacy.lang.da.lex_attrs import like_num
diff --git a/spacy/tests/lang/en/test_customized_tokenizer.py b/spacy/tests/lang/en/test_customized_tokenizer.py
index f5302cb31..8251306a6 100644
--- a/spacy/tests/lang/en/test_customized_tokenizer.py
+++ b/spacy/tests/lang/en/test_customized_tokenizer.py
@@ -1,9 +1,10 @@
-import pytest
import re
+
+import pytest
+
from spacy.lang.en import English
from spacy.tokenizer import Tokenizer
-from spacy.util import compile_prefix_regex, compile_suffix_regex
-from spacy.util import compile_infix_regex
+from spacy.util import compile_infix_regex, compile_prefix_regex, compile_suffix_regex
@pytest.fixture
diff --git a/spacy/tests/lang/en/test_noun_chunks.py b/spacy/tests/lang/en/test_noun_chunks.py
index 0c54ffbb4..bda203b2c 100644
--- a/spacy/tests/lang/en/test_noun_chunks.py
+++ b/spacy/tests/lang/en/test_noun_chunks.py
@@ -1,6 +1,7 @@
-from spacy.tokens import Doc
import pytest
+from spacy.tokens import Doc
+
@pytest.fixture
def doc(en_vocab):
diff --git a/spacy/tests/lang/en/test_punct.py b/spacy/tests/lang/en/test_punct.py
index 1d10478a1..79d03d2db 100644
--- a/spacy/tests/lang/en/test_punct.py
+++ b/spacy/tests/lang/en/test_punct.py
@@ -1,7 +1,7 @@
import pytest
-from spacy.util import compile_prefix_regex
-from spacy.lang.punctuation import TOKENIZER_PREFIXES
+from spacy.lang.punctuation import TOKENIZER_PREFIXES
+from spacy.util import compile_prefix_regex
PUNCT_OPEN = ["(", "[", "{", "*"]
PUNCT_CLOSE = [")", "]", "}", "*"]
diff --git a/spacy/tests/lang/en/test_sbd.py b/spacy/tests/lang/en/test_sbd.py
index d30c72750..c07c23193 100644
--- a/spacy/tests/lang/en/test_sbd.py
+++ b/spacy/tests/lang/en/test_sbd.py
@@ -1,4 +1,5 @@
import pytest
+
from spacy.tokens import Doc
from ...util import apply_transition_sequence
diff --git a/spacy/tests/lang/en/test_text.py b/spacy/tests/lang/en/test_text.py
index 358f4c0f9..53cf0cc5b 100644
--- a/spacy/tests/lang/en/test_text.py
+++ b/spacy/tests/lang/en/test_text.py
@@ -1,4 +1,5 @@
import pytest
+
from spacy.lang.en.lex_attrs import like_num
diff --git a/spacy/tests/lang/es/test_noun_chunks.py b/spacy/tests/lang/es/test_noun_chunks.py
index 6118a0458..8e5fe8354 100644
--- a/spacy/tests/lang/es/test_noun_chunks.py
+++ b/spacy/tests/lang/es/test_noun_chunks.py
@@ -1,6 +1,7 @@
-from spacy.tokens import Doc
import pytest
+from spacy.tokens import Doc
+
# fmt: off
@pytest.mark.parametrize(
diff --git a/spacy/tests/lang/es/test_text.py b/spacy/tests/lang/es/test_text.py
index d95f6d26b..1d1f7fa6b 100644
--- a/spacy/tests/lang/es/test_text.py
+++ b/spacy/tests/lang/es/test_text.py
@@ -1,6 +1,7 @@
import pytest
-from spacy.lang.es.lex_attrs import like_num
+
from spacy.lang.es import Spanish
+from spacy.lang.es.lex_attrs import like_num
@pytest.mark.issue(3803)
diff --git a/spacy/tests/lang/fi/test_noun_chunks.py b/spacy/tests/lang/fi/test_noun_chunks.py
index cab84b311..37e1b00a0 100644
--- a/spacy/tests/lang/fi/test_noun_chunks.py
+++ b/spacy/tests/lang/fi/test_noun_chunks.py
@@ -1,6 +1,6 @@
import pytest
-from spacy.tokens import Doc
+from spacy.tokens import Doc
FI_NP_TEST_EXAMPLES = [
(
diff --git a/spacy/tests/lang/fi/test_tokenizer.py b/spacy/tests/lang/fi/test_tokenizer.py
index dc40e18a3..2d9f081a7 100644
--- a/spacy/tests/lang/fi/test_tokenizer.py
+++ b/spacy/tests/lang/fi/test_tokenizer.py
@@ -1,6 +1,5 @@
import pytest
-
ABBREVIATION_TESTS = [
(
"Hyvää uutta vuotta t. siht. Niemelä!",
diff --git a/spacy/tests/lang/fr/test_noun_chunks.py b/spacy/tests/lang/fr/test_noun_chunks.py
index 25b95f566..436e07b29 100644
--- a/spacy/tests/lang/fr/test_noun_chunks.py
+++ b/spacy/tests/lang/fr/test_noun_chunks.py
@@ -1,6 +1,7 @@
-from spacy.tokens import Doc
import pytest
+from spacy.tokens import Doc
+
# fmt: off
@pytest.mark.parametrize(
diff --git a/spacy/tests/lang/fr/test_prefix_suffix_infix.py b/spacy/tests/lang/fr/test_prefix_suffix_infix.py
index 272531b63..b81ccbc0e 100644
--- a/spacy/tests/lang/fr/test_prefix_suffix_infix.py
+++ b/spacy/tests/lang/fr/test_prefix_suffix_infix.py
@@ -1,7 +1,8 @@
import pytest
-from spacy.language import Language, BaseDefaults
-from spacy.lang.punctuation import TOKENIZER_INFIXES
+
from spacy.lang.char_classes import ALPHA
+from spacy.lang.punctuation import TOKENIZER_INFIXES
+from spacy.language import BaseDefaults, Language
@pytest.mark.issue(768)
diff --git a/spacy/tests/lang/fr/test_text.py b/spacy/tests/lang/fr/test_text.py
index 01231f593..2c58a1c4a 100644
--- a/spacy/tests/lang/fr/test_text.py
+++ b/spacy/tests/lang/fr/test_text.py
@@ -1,4 +1,5 @@
import pytest
+
from spacy.lang.fr.lex_attrs import like_num
diff --git a/spacy/tests/lang/ga/test_tokenizer.py b/spacy/tests/lang/ga/test_tokenizer.py
index 78127ef7c..0c16b27d2 100644
--- a/spacy/tests/lang/ga/test_tokenizer.py
+++ b/spacy/tests/lang/ga/test_tokenizer.py
@@ -1,6 +1,5 @@
import pytest
-
# fmt: off
GA_TOKEN_EXCEPTION_TESTS = [
("Niall Ó Domhnaill, Rialtas na hÉireann 1977 (lch. 600).", ["Niall", "Ó", "Domhnaill", ",", "Rialtas", "na", "hÉireann", "1977", "(", "lch.", "600", ")", "."]),
diff --git a/spacy/tests/lang/grc/test_tokenizer.py b/spacy/tests/lang/grc/test_tokenizer.py
index 3df5b546b..9f29b9024 100644
--- a/spacy/tests/lang/grc/test_tokenizer.py
+++ b/spacy/tests/lang/grc/test_tokenizer.py
@@ -1,6 +1,5 @@
import pytest
-
# fmt: off
GRC_TOKEN_EXCEPTION_TESTS = [
("τὸ 〈τῆς〉 φιλοσοφίας ἔργον ἔνιοί φασιν ἀπὸ ⟦βαρβάρων⟧ ἄρξαι.", ["τὸ", "〈", "τῆς", "〉", "φιλοσοφίας", "ἔργον", "ἔνιοί", "φασιν", "ἀπὸ", "⟦", "βαρβάρων", "⟧", "ἄρξαι", "."]),
diff --git a/spacy/tests/lang/he/test_tokenizer.py b/spacy/tests/lang/he/test_tokenizer.py
index 3716f7e3b..15d059328 100644
--- a/spacy/tests/lang/he/test_tokenizer.py
+++ b/spacy/tests/lang/he/test_tokenizer.py
@@ -1,4 +1,5 @@
import pytest
+
from spacy.lang.he.lex_attrs import like_num
diff --git a/spacy/tests/lang/hi/test_lex_attrs.py b/spacy/tests/lang/hi/test_lex_attrs.py
index 80a7cc1c4..2d8d4a53e 100644
--- a/spacy/tests/lang/hi/test_lex_attrs.py
+++ b/spacy/tests/lang/hi/test_lex_attrs.py
@@ -1,5 +1,6 @@
import pytest
-from spacy.lang.hi.lex_attrs import norm, like_num
+
+from spacy.lang.hi.lex_attrs import like_num, norm
def test_hi_tokenizer_handles_long_text(hi_tokenizer):
diff --git a/spacy/tests/lang/hi/test_text.py b/spacy/tests/lang/hi/test_text.py
index 791cc3822..837dc3099 100644
--- a/spacy/tests/lang/hi/test_text.py
+++ b/spacy/tests/lang/hi/test_text.py
@@ -1,4 +1,5 @@
import pytest
+
from spacy.lang.hi import Hindi
diff --git a/spacy/tests/lang/hu/test_tokenizer.py b/spacy/tests/lang/hu/test_tokenizer.py
index 0488474ae..fa689c8f3 100644
--- a/spacy/tests/lang/hu/test_tokenizer.py
+++ b/spacy/tests/lang/hu/test_tokenizer.py
@@ -1,6 +1,5 @@
import pytest
-
DEFAULT_TESTS = [
("N. kormányzósági\nszékhely.", ["N.", "kormányzósági", "székhely", "."]),
pytest.param(
diff --git a/spacy/tests/lang/hy/test_text.py b/spacy/tests/lang/hy/test_text.py
index ac0f1e128..7a69c2a81 100644
--- a/spacy/tests/lang/hy/test_text.py
+++ b/spacy/tests/lang/hy/test_text.py
@@ -1,4 +1,5 @@
import pytest
+
from spacy.lang.hy.lex_attrs import like_num
diff --git a/spacy/tests/lang/hy/test_tokenizer.py b/spacy/tests/lang/hy/test_tokenizer.py
index e9efb224a..9423cb4d0 100644
--- a/spacy/tests/lang/hy/test_tokenizer.py
+++ b/spacy/tests/lang/hy/test_tokenizer.py
@@ -1,6 +1,5 @@
import pytest
-
# TODO add test cases with valid punctuation signs.
hy_tokenize_text_test = [
diff --git a/spacy/tests/lang/id/test_text.py b/spacy/tests/lang/id/test_text.py
index ed6487b68..7397a8c17 100644
--- a/spacy/tests/lang/id/test_text.py
+++ b/spacy/tests/lang/id/test_text.py
@@ -1,4 +1,5 @@
import pytest
+
from spacy.lang.id.lex_attrs import like_num
diff --git a/spacy/tests/lang/it/test_noun_chunks.py b/spacy/tests/lang/it/test_noun_chunks.py
index 0a8c10e79..7f6659ee7 100644
--- a/spacy/tests/lang/it/test_noun_chunks.py
+++ b/spacy/tests/lang/it/test_noun_chunks.py
@@ -1,6 +1,7 @@
-from spacy.tokens import Doc
import pytest
+from spacy.tokens import Doc
+
# fmt: off
@pytest.mark.parametrize(
diff --git a/spacy/tests/lang/ja/test_morphologizer_factory.py b/spacy/tests/lang/ja/test_morphologizer_factory.py
index a4e038d01..d504576d0 100644
--- a/spacy/tests/lang/ja/test_morphologizer_factory.py
+++ b/spacy/tests/lang/ja/test_morphologizer_factory.py
@@ -1,4 +1,5 @@
import pytest
+
from spacy.lang.ja import Japanese
diff --git a/spacy/tests/lang/ja/test_serialize.py b/spacy/tests/lang/ja/test_serialize.py
index 011eb470f..f48b2570e 100644
--- a/spacy/tests/lang/ja/test_serialize.py
+++ b/spacy/tests/lang/ja/test_serialize.py
@@ -1,6 +1,7 @@
import pickle
from spacy.lang.ja import Japanese
+
from ...util import make_tempdir
diff --git a/spacy/tests/lang/ja/test_tokenizer.py b/spacy/tests/lang/ja/test_tokenizer.py
index ef7bed06d..a26347444 100644
--- a/spacy/tests/lang/ja/test_tokenizer.py
+++ b/spacy/tests/lang/ja/test_tokenizer.py
@@ -1,7 +1,8 @@
import pytest
+from spacy.lang.ja import DetailedToken, Japanese
+
from ...tokenizer.test_naughty_strings import NAUGHTY_STRINGS
-from spacy.lang.ja import Japanese, DetailedToken
# fmt: off
TOKENIZER_TESTS = [
diff --git a/spacy/tests/lang/ko/test_serialize.py b/spacy/tests/lang/ko/test_serialize.py
index 75288fcc5..bba7bce6e 100644
--- a/spacy/tests/lang/ko/test_serialize.py
+++ b/spacy/tests/lang/ko/test_serialize.py
@@ -1,6 +1,7 @@
import pickle
from spacy.lang.ko import Korean
+
from ...util import make_tempdir
diff --git a/spacy/tests/lang/ky/test_tokenizer.py b/spacy/tests/lang/ky/test_tokenizer.py
index 5cf6eb1a6..b089dd9b9 100644
--- a/spacy/tests/lang/ky/test_tokenizer.py
+++ b/spacy/tests/lang/ky/test_tokenizer.py
@@ -1,6 +1,5 @@
import pytest
-
INFIX_HYPHEN_TESTS = [
("Бала-чака жакшыбы?", "Бала-чака жакшыбы ?".split()),
("Кыз-келиндер кийими.", "Кыз-келиндер кийими .".split()),
diff --git a/spacy/tests/lang/la/test_noun_chunks.py b/spacy/tests/lang/la/test_noun_chunks.py
index ba8f5658b..70a3392cd 100644
--- a/spacy/tests/lang/la/test_noun_chunks.py
+++ b/spacy/tests/lang/la/test_noun_chunks.py
@@ -1,4 +1,5 @@
import pytest
+
from spacy.tokens import Doc
diff --git a/spacy/tests/lang/la/test_text.py b/spacy/tests/lang/la/test_text.py
index 48e7359a4..74606c4e8 100644
--- a/spacy/tests/lang/la/test_text.py
+++ b/spacy/tests/lang/la/test_text.py
@@ -1,4 +1,5 @@
import pytest
+
from spacy.lang.la.lex_attrs import like_num
diff --git a/spacy/tests/lang/mk/test_text.py b/spacy/tests/lang/mk/test_text.py
index b8881082c..b3a7ff9ee 100644
--- a/spacy/tests/lang/mk/test_text.py
+++ b/spacy/tests/lang/mk/test_text.py
@@ -1,4 +1,5 @@
import pytest
+
from spacy.lang.mk.lex_attrs import like_num
diff --git a/spacy/tests/lang/ms/test_text.py b/spacy/tests/lang/ms/test_text.py
index d6cd169ce..4b0ac3b2b 100644
--- a/spacy/tests/lang/ms/test_text.py
+++ b/spacy/tests/lang/ms/test_text.py
@@ -1,4 +1,5 @@
import pytest
+
from spacy.lang.ms.lex_attrs import like_num
diff --git a/spacy/tests/lang/nb/test_tokenizer.py b/spacy/tests/lang/nb/test_tokenizer.py
index 2da6e8d40..4f5fd89a3 100644
--- a/spacy/tests/lang/nb/test_tokenizer.py
+++ b/spacy/tests/lang/nb/test_tokenizer.py
@@ -1,6 +1,5 @@
import pytest
-
NB_TOKEN_EXCEPTION_TESTS = [
(
"Smørsausen brukes bl.a. til fisk",
diff --git a/spacy/tests/lang/nl/test_noun_chunks.py b/spacy/tests/lang/nl/test_noun_chunks.py
index 8962e3b75..6004ac230 100644
--- a/spacy/tests/lang/nl/test_noun_chunks.py
+++ b/spacy/tests/lang/nl/test_noun_chunks.py
@@ -1,4 +1,5 @@
import pytest
+
from spacy.tokens import Doc
from spacy.util import filter_spans
diff --git a/spacy/tests/lang/nl/test_text.py b/spacy/tests/lang/nl/test_text.py
index 8bc72cc6d..d6413e0d7 100644
--- a/spacy/tests/lang/nl/test_text.py
+++ b/spacy/tests/lang/nl/test_text.py
@@ -1,4 +1,5 @@
import pytest
+
from spacy.lang.nl.lex_attrs import like_num
diff --git a/spacy/tests/lang/pt/test_noun_chunks.py b/spacy/tests/lang/pt/test_noun_chunks.py
index 9a42ce268..eee96d593 100644
--- a/spacy/tests/lang/pt/test_noun_chunks.py
+++ b/spacy/tests/lang/pt/test_noun_chunks.py
@@ -1,6 +1,7 @@
-from spacy.tokens import Doc
import pytest
+from spacy.tokens import Doc
+
# fmt: off
@pytest.mark.parametrize(
diff --git a/spacy/tests/lang/pt/test_text.py b/spacy/tests/lang/pt/test_text.py
index 3a9162b80..cb8723901 100644
--- a/spacy/tests/lang/pt/test_text.py
+++ b/spacy/tests/lang/pt/test_text.py
@@ -1,4 +1,5 @@
import pytest
+
from spacy.lang.pt.lex_attrs import like_num
diff --git a/spacy/tests/lang/ro/test_tokenizer.py b/spacy/tests/lang/ro/test_tokenizer.py
index 64c072470..d2affd607 100644
--- a/spacy/tests/lang/ro/test_tokenizer.py
+++ b/spacy/tests/lang/ro/test_tokenizer.py
@@ -1,6 +1,5 @@
import pytest
-
TEST_CASES = [
(
"Adresa este str. Principală nr. 5.",
diff --git a/spacy/tests/lang/ru/test_lemmatizer.py b/spacy/tests/lang/ru/test_lemmatizer.py
index 9a5a9ad68..66aa7e3a6 100644
--- a/spacy/tests/lang/ru/test_lemmatizer.py
+++ b/spacy/tests/lang/ru/test_lemmatizer.py
@@ -1,6 +1,6 @@
import pytest
-from spacy.tokens import Doc
+from spacy.tokens import Doc
pytestmark = pytest.mark.filterwarnings("ignore::DeprecationWarning")
diff --git a/spacy/tests/lang/ru/test_text.py b/spacy/tests/lang/ru/test_text.py
index b0eaf66bb..0bbed2122 100644
--- a/spacy/tests/lang/ru/test_text.py
+++ b/spacy/tests/lang/ru/test_text.py
@@ -1,4 +1,5 @@
import pytest
+
from spacy.lang.ru.lex_attrs import like_num
diff --git a/spacy/tests/lang/ru/test_tokenizer.py b/spacy/tests/lang/ru/test_tokenizer.py
index 083b55a09..c941e21fc 100644
--- a/spacy/tests/lang/ru/test_tokenizer.py
+++ b/spacy/tests/lang/ru/test_tokenizer.py
@@ -1,6 +1,6 @@
from string import punctuation
-import pytest
+import pytest
PUNCT_OPEN = ["(", "[", "{", "*"]
PUNCT_CLOSE = [")", "]", "}", "*"]
diff --git a/spacy/tests/lang/sr/test_tokenizer.py b/spacy/tests/lang/sr/test_tokenizer.py
index fdcf790d8..7ecd9596b 100644
--- a/spacy/tests/lang/sr/test_tokenizer.py
+++ b/spacy/tests/lang/sr/test_tokenizer.py
@@ -1,6 +1,5 @@
import pytest
-
PUNCT_OPEN = ["(", "[", "{", "*"]
PUNCT_CLOSE = [")", "]", "}", "*"]
PUNCT_PAIRED = [("(", ")"), ("[", "]"), ("{", "}"), ("*", "*")]
diff --git a/spacy/tests/lang/sv/test_lex_attrs.py b/spacy/tests/lang/sv/test_lex_attrs.py
index 656c4706b..a47b17b27 100644
--- a/spacy/tests/lang/sv/test_lex_attrs.py
+++ b/spacy/tests/lang/sv/test_lex_attrs.py
@@ -1,4 +1,5 @@
import pytest
+
from spacy.lang.sv.lex_attrs import like_num
diff --git a/spacy/tests/lang/sv/test_noun_chunks.py b/spacy/tests/lang/sv/test_noun_chunks.py
index d2410156c..599148384 100644
--- a/spacy/tests/lang/sv/test_noun_chunks.py
+++ b/spacy/tests/lang/sv/test_noun_chunks.py
@@ -1,4 +1,5 @@
import pytest
+
from spacy.tokens import Doc
diff --git a/spacy/tests/lang/sv/test_tokenizer.py b/spacy/tests/lang/sv/test_tokenizer.py
index 8871f4414..f19c6b66f 100644
--- a/spacy/tests/lang/sv/test_tokenizer.py
+++ b/spacy/tests/lang/sv/test_tokenizer.py
@@ -1,6 +1,5 @@
import pytest
-
SV_TOKEN_EXCEPTION_TESTS = [
(
"Smörsåsen används bl.a. till fisk",
diff --git a/spacy/tests/lang/ta/test_text.py b/spacy/tests/lang/ta/test_text.py
index 228a14c18..2d15e96fc 100644
--- a/spacy/tests/lang/ta/test_text.py
+++ b/spacy/tests/lang/ta/test_text.py
@@ -1,4 +1,5 @@
import pytest
+
from spacy.lang.ta import Tamil
# Wikipedia excerpt: https://en.wikipedia.org/wiki/Chennai (Tamil Language)
diff --git a/spacy/tests/lang/ta/test_tokenizer.py b/spacy/tests/lang/ta/test_tokenizer.py
index 6ba8a2400..e668b5aca 100644
--- a/spacy/tests/lang/ta/test_tokenizer.py
+++ b/spacy/tests/lang/ta/test_tokenizer.py
@@ -1,6 +1,7 @@
import pytest
-from spacy.symbols import ORTH
+
from spacy.lang.ta import Tamil
+from spacy.symbols import ORTH
TA_BASIC_TOKENIZATION_TESTS = [
(
diff --git a/spacy/tests/lang/test_attrs.py b/spacy/tests/lang/test_attrs.py
index 1c27c1744..fd96e8f9b 100644
--- a/spacy/tests/lang/test_attrs.py
+++ b/spacy/tests/lang/test_attrs.py
@@ -1,10 +1,15 @@
import pytest
-from spacy.attrs import intify_attrs, ENT_IOB
-from spacy.attrs import IS_ALPHA, LEMMA, NORM, ORTH, intify_attrs
+from spacy.attrs import ENT_IOB, IS_ALPHA, LEMMA, NORM, ORTH, intify_attrs
from spacy.lang.en.stop_words import STOP_WORDS
-from spacy.lang.lex_attrs import is_ascii, is_currency, is_punct, is_stop
-from spacy.lang.lex_attrs import like_url, word_shape
+from spacy.lang.lex_attrs import (
+ is_ascii,
+ is_currency,
+ is_punct,
+ is_stop,
+ like_url,
+ word_shape,
+)
@pytest.mark.parametrize("word", ["the"])
diff --git a/spacy/tests/lang/test_initialize.py b/spacy/tests/lang/test_initialize.py
index 36f4a75e0..8a158647a 100644
--- a/spacy/tests/lang/test_initialize.py
+++ b/spacy/tests/lang/test_initialize.py
@@ -1,6 +1,6 @@
import pytest
-from spacy.util import get_lang_class
+from spacy.util import get_lang_class
# fmt: off
# Only include languages with no external dependencies
diff --git a/spacy/tests/lang/test_lemmatizers.py b/spacy/tests/lang/test_lemmatizers.py
index e419f0a14..ddb3336ff 100644
--- a/spacy/tests/lang/test_lemmatizers.py
+++ b/spacy/tests/lang/test_lemmatizers.py
@@ -1,9 +1,9 @@
import pytest
+
from spacy import registry
from spacy.lookups import Lookups
from spacy.util import get_lang_class
-
# fmt: off
# Only include languages with no external dependencies
# excluded: ru, uk
diff --git a/spacy/tests/lang/th/test_serialize.py b/spacy/tests/lang/th/test_serialize.py
index a3de4bf54..57d0f1726 100644
--- a/spacy/tests/lang/th/test_serialize.py
+++ b/spacy/tests/lang/th/test_serialize.py
@@ -1,6 +1,7 @@
import pickle
from spacy.lang.th import Thai
+
from ...util import make_tempdir
diff --git a/spacy/tests/lang/tl/test_punct.py b/spacy/tests/lang/tl/test_punct.py
index d6bcf297d..e2c93bf88 100644
--- a/spacy/tests/lang/tl/test_punct.py
+++ b/spacy/tests/lang/tl/test_punct.py
@@ -1,7 +1,7 @@
import pytest
-from spacy.util import compile_prefix_regex
-from spacy.lang.punctuation import TOKENIZER_PREFIXES
+from spacy.lang.punctuation import TOKENIZER_PREFIXES
+from spacy.util import compile_prefix_regex
PUNCT_OPEN = ["(", "[", "{", "*"]
PUNCT_CLOSE = [")", "]", "}", "*"]
diff --git a/spacy/tests/lang/tl/test_text.py b/spacy/tests/lang/tl/test_text.py
index 17429617c..26635ca90 100644
--- a/spacy/tests/lang/tl/test_text.py
+++ b/spacy/tests/lang/tl/test_text.py
@@ -1,4 +1,5 @@
import pytest
+
from spacy.lang.tl.lex_attrs import like_num
# https://github.com/explosion/spaCy/blob/master/spacy/tests/lang/en/test_text.py
diff --git a/spacy/tests/lang/tr/test_text.py b/spacy/tests/lang/tr/test_text.py
index 323b11bd1..b4d84daae 100644
--- a/spacy/tests/lang/tr/test_text.py
+++ b/spacy/tests/lang/tr/test_text.py
@@ -1,4 +1,5 @@
import pytest
+
from spacy.lang.tr.lex_attrs import like_num
diff --git a/spacy/tests/lang/tr/test_tokenizer.py b/spacy/tests/lang/tr/test_tokenizer.py
index 9f988eae9..b07c98535 100644
--- a/spacy/tests/lang/tr/test_tokenizer.py
+++ b/spacy/tests/lang/tr/test_tokenizer.py
@@ -1,6 +1,5 @@
import pytest
-
ABBREV_TESTS = [
("Dr. Murat Bey ile görüştüm.", ["Dr.", "Murat", "Bey", "ile", "görüştüm", "."]),
("Dr.la görüştüm.", ["Dr.la", "görüştüm", "."]),
diff --git a/spacy/tests/lang/tt/test_tokenizer.py b/spacy/tests/lang/tt/test_tokenizer.py
index 246d2824d..0bb241f27 100644
--- a/spacy/tests/lang/tt/test_tokenizer.py
+++ b/spacy/tests/lang/tt/test_tokenizer.py
@@ -1,6 +1,5 @@
import pytest
-
INFIX_HYPHEN_TESTS = [
("Явым-төшем күләме.", "Явым-төшем күләме .".split()),
("Хатын-кыз киеме.", "Хатын-кыз киеме .".split()),
diff --git a/spacy/tests/lang/uk/test_lemmatizer.py b/spacy/tests/lang/uk/test_lemmatizer.py
index a65bb25e5..060114cdf 100644
--- a/spacy/tests/lang/uk/test_lemmatizer.py
+++ b/spacy/tests/lang/uk/test_lemmatizer.py
@@ -1,6 +1,6 @@
import pytest
-from spacy.tokens import Doc
+from spacy.tokens import Doc
pytestmark = pytest.mark.filterwarnings("ignore::DeprecationWarning")
diff --git a/spacy/tests/lang/uk/test_tokenizer.py b/spacy/tests/lang/uk/test_tokenizer.py
index 6596f490a..7960a30a2 100644
--- a/spacy/tests/lang/uk/test_tokenizer.py
+++ b/spacy/tests/lang/uk/test_tokenizer.py
@@ -1,6 +1,5 @@
import pytest
-
PUNCT_OPEN = ["(", "[", "{", "*"]
PUNCT_CLOSE = [")", "]", "}", "*"]
PUNCT_PAIRED = [("(", ")"), ("[", "]"), ("{", "}"), ("*", "*")]
diff --git a/spacy/tests/lang/vi/test_serialize.py b/spacy/tests/lang/vi/test_serialize.py
index 55dab799c..20bfd20d5 100644
--- a/spacy/tests/lang/vi/test_serialize.py
+++ b/spacy/tests/lang/vi/test_serialize.py
@@ -1,6 +1,7 @@
import pickle
from spacy.lang.vi import Vietnamese
+
from ...util import make_tempdir
diff --git a/spacy/tests/lang/vi/test_tokenizer.py b/spacy/tests/lang/vi/test_tokenizer.py
index 3d0642d1e..ca6dee985 100644
--- a/spacy/tests/lang/vi/test_tokenizer.py
+++ b/spacy/tests/lang/vi/test_tokenizer.py
@@ -1,8 +1,8 @@
import pytest
-from ...tokenizer.test_naughty_strings import NAUGHTY_STRINGS
from spacy.lang.vi import Vietnamese
+from ...tokenizer.test_naughty_strings import NAUGHTY_STRINGS
# fmt: off
TOKENIZER_TESTS = [
diff --git a/spacy/tests/lang/yo/test_text.py b/spacy/tests/lang/yo/test_text.py
index 48b689f3d..a1bbc38da 100644
--- a/spacy/tests/lang/yo/test_text.py
+++ b/spacy/tests/lang/yo/test_text.py
@@ -1,4 +1,5 @@
import pytest
+
from spacy.lang.yo.lex_attrs import like_num
diff --git a/spacy/tests/lang/zh/test_serialize.py b/spacy/tests/lang/zh/test_serialize.py
index 03cdbbe24..4b014d713 100644
--- a/spacy/tests/lang/zh/test_serialize.py
+++ b/spacy/tests/lang/zh/test_serialize.py
@@ -1,5 +1,7 @@
import pytest
+
from spacy.lang.zh import Chinese
+
from ...util import make_tempdir
diff --git a/spacy/tests/lang/zh/test_tokenizer.py b/spacy/tests/lang/zh/test_tokenizer.py
index 741eb0ace..cdba5e397 100644
--- a/spacy/tests/lang/zh/test_tokenizer.py
+++ b/spacy/tests/lang/zh/test_tokenizer.py
@@ -1,7 +1,7 @@
import pytest
-from spacy.lang.zh import Chinese, _get_pkuseg_trie_data
from thinc.api import ConfigValidationError
+from spacy.lang.zh import Chinese, _get_pkuseg_trie_data
# fmt: off
TEXTS = ("作为语言而言,为世界使用人数最多的语言,目前世界有五分之一人口做为母语。",)
diff --git a/spacy/tests/matcher/test_dependency_matcher.py b/spacy/tests/matcher/test_dependency_matcher.py
index 200384320..44b3bb26b 100644
--- a/spacy/tests/matcher/test_dependency_matcher.py
+++ b/spacy/tests/matcher/test_dependency_matcher.py
@@ -1,8 +1,10 @@
-import pytest
+import copy
import pickle
import re
-import copy
+
+import pytest
from mock import Mock
+
from spacy.matcher import DependencyMatcher
from spacy.tokens import Doc, Token
diff --git a/spacy/tests/matcher/test_levenshtein.py b/spacy/tests/matcher/test_levenshtein.py
index 5afb7e1fc..fd85579ae 100644
--- a/spacy/tests/matcher/test_levenshtein.py
+++ b/spacy/tests/matcher/test_levenshtein.py
@@ -1,4 +1,5 @@
import pytest
+
from spacy.matcher import levenshtein
from spacy.matcher.levenshtein import levenshtein_compare
diff --git a/spacy/tests/matcher/test_matcher_api.py b/spacy/tests/matcher/test_matcher_api.py
index 09ab6c7dc..c824ca392 100644
--- a/spacy/tests/matcher/test_matcher_api.py
+++ b/spacy/tests/matcher/test_matcher_api.py
@@ -1,7 +1,8 @@
import pytest
from mock import Mock
+
from spacy.matcher import Matcher
-from spacy.tokens import Doc, Token, Span
+from spacy.tokens import Doc, Span, Token
from ..doc.test_underscore import clean_underscore # noqa: F401
diff --git a/spacy/tests/matcher/test_pattern_validation.py b/spacy/tests/matcher/test_pattern_validation.py
index e7eced02c..21fa36865 100644
--- a/spacy/tests/matcher/test_pattern_validation.py
+++ b/spacy/tests/matcher/test_pattern_validation.py
@@ -1,6 +1,7 @@
import pytest
-from spacy.matcher import Matcher
+
from spacy.errors import MatchPatternError
+from spacy.matcher import Matcher
from spacy.schemas import validate_token_pattern
# (pattern, num errors with validation, num errors identified with minimal
diff --git a/spacy/tests/matcher/test_phrase_matcher.py b/spacy/tests/matcher/test_phrase_matcher.py
index 8a8d9eb84..7335bbdf1 100644
--- a/spacy/tests/matcher/test_phrase_matcher.py
+++ b/spacy/tests/matcher/test_phrase_matcher.py
@@ -1,14 +1,14 @@
-import pytest
import warnings
+
+import pytest
import srsly
from mock import Mock
from spacy.lang.en import English
-from spacy.matcher import PhraseMatcher, Matcher
+from spacy.matcher import Matcher, PhraseMatcher
from spacy.tokens import Doc, Span
from spacy.vocab import Vocab
-
from ..util import make_tempdir
diff --git a/spacy/tests/morphology/test_morph_features.py b/spacy/tests/morphology/test_morph_features.py
index 0693da690..ae20f9ba8 100644
--- a/spacy/tests/morphology/test_morph_features.py
+++ b/spacy/tests/morphology/test_morph_features.py
@@ -1,4 +1,5 @@
import pytest
+
from spacy.morphology import Morphology
from spacy.strings import StringStore, get_string_id
diff --git a/spacy/tests/morphology/test_morph_pickle.py b/spacy/tests/morphology/test_morph_pickle.py
index d9b0e3476..5c1a8a31e 100644
--- a/spacy/tests/morphology/test_morph_pickle.py
+++ b/spacy/tests/morphology/test_morph_pickle.py
@@ -1,5 +1,7 @@
-import pytest
import pickle
+
+import pytest
+
from spacy.morphology import Morphology
from spacy.strings import StringStore
diff --git a/spacy/tests/package/test_requirements.py b/spacy/tests/package/test_requirements.py
index b403f274f..9e83d5fb1 100644
--- a/spacy/tests/package/test_requirements.py
+++ b/spacy/tests/package/test_requirements.py
@@ -13,6 +13,7 @@ def test_build_dependencies():
"hypothesis",
"pre-commit",
"black",
+ "isort",
"mypy",
"types-dataclasses",
"types-mock",
diff --git a/spacy/tests/parser/test_add_label.py b/spacy/tests/parser/test_add_label.py
index f89e993e9..89626597d 100644
--- a/spacy/tests/parser/test_add_label.py
+++ b/spacy/tests/parser/test_add_label.py
@@ -1,14 +1,15 @@
import pytest
from thinc.api import Adam, fix_random_seed
+
from spacy import registry
-from spacy.language import Language
from spacy.attrs import NORM
-from spacy.vocab import Vocab
-from spacy.training import Example
-from spacy.tokens import Doc
+from spacy.language import Language
from spacy.pipeline import DependencyParser, EntityRecognizer
-from spacy.pipeline.ner import DEFAULT_NER_MODEL
from spacy.pipeline.dep_parser import DEFAULT_PARSER_MODEL
+from spacy.pipeline.ner import DEFAULT_NER_MODEL
+from spacy.tokens import Doc
+from spacy.training import Example
+from spacy.vocab import Vocab
@pytest.fixture
diff --git a/spacy/tests/parser/test_arc_eager_oracle.py b/spacy/tests/parser/test_arc_eager_oracle.py
index bb226f9c5..fafd23268 100644
--- a/spacy/tests/parser/test_arc_eager_oracle.py
+++ b/spacy/tests/parser/test_arc_eager_oracle.py
@@ -1,12 +1,13 @@
import pytest
-from spacy.vocab import Vocab
+
from spacy import registry
-from spacy.training import Example
from spacy.pipeline import DependencyParser
-from spacy.tokens import Doc
-from spacy.pipeline._parser_internals.nonproj import projectivize
from spacy.pipeline._parser_internals.arc_eager import ArcEager
+from spacy.pipeline._parser_internals.nonproj import projectivize
from spacy.pipeline.dep_parser import DEFAULT_PARSER_MODEL
+from spacy.tokens import Doc
+from spacy.training import Example
+from spacy.vocab import Vocab
def get_sequence_costs(M, words, heads, deps, transitions):
diff --git a/spacy/tests/parser/test_ner.py b/spacy/tests/parser/test_ner.py
index 7198859b3..1509c31bb 100644
--- a/spacy/tests/parser/test_ner.py
+++ b/spacy/tests/parser/test_ner.py
@@ -1,21 +1,21 @@
+import logging
import random
import pytest
from numpy.testing import assert_equal
+from spacy import registry, util
from spacy.attrs import ENT_IOB
-from spacy import util, registry
from spacy.lang.en import English
from spacy.lang.it import Italian
from spacy.language import Language
from spacy.lookups import Lookups
from spacy.pipeline import EntityRecognizer
-from spacy.pipeline.ner import DEFAULT_NER_MODEL
from spacy.pipeline._parser_internals.ner import BiluoPushDown
-from spacy.training import Example, iob_to_biluo, split_bilu_label
+from spacy.pipeline.ner import DEFAULT_NER_MODEL
from spacy.tokens import Doc, Span
+from spacy.training import Example, iob_to_biluo, split_bilu_label
from spacy.vocab import Vocab
-import logging
from ..util import make_tempdir
diff --git a/spacy/tests/parser/test_neural_parser.py b/spacy/tests/parser/test_neural_parser.py
index 1bb5d4aa5..5bef5758f 100644
--- a/spacy/tests/parser/test_neural_parser.py
+++ b/spacy/tests/parser/test_neural_parser.py
@@ -1,14 +1,14 @@
import pytest
+from thinc.api import Model
from spacy import registry
-from spacy.training import Example
-from spacy.vocab import Vocab
from spacy.pipeline._parser_internals.arc_eager import ArcEager
+from spacy.pipeline.dep_parser import DEFAULT_PARSER_MODEL
+from spacy.pipeline.tok2vec import DEFAULT_TOK2VEC_MODEL
from spacy.pipeline.transition_parser import Parser
from spacy.tokens.doc import Doc
-from thinc.api import Model
-from spacy.pipeline.tok2vec import DEFAULT_TOK2VEC_MODEL
-from spacy.pipeline.dep_parser import DEFAULT_PARSER_MODEL
+from spacy.training import Example
+from spacy.vocab import Vocab
@pytest.fixture
diff --git a/spacy/tests/parser/test_nn_beam.py b/spacy/tests/parser/test_nn_beam.py
index 4ba020ef0..f852e5cda 100644
--- a/spacy/tests/parser/test_nn_beam.py
+++ b/spacy/tests/parser/test_nn_beam.py
@@ -1,16 +1,17 @@
-import pytest
import hypothesis
import hypothesis.strategies
import numpy
-from spacy.vocab import Vocab
-from spacy.language import Language
-from spacy.pipeline._parser_internals.arc_eager import ArcEager
-from spacy.tokens import Doc
-from spacy.pipeline._parser_internals._beam_utils import BeamBatch
-from spacy.pipeline._parser_internals.stateclass import StateClass
-from spacy.training import Example
+import pytest
from thinc.tests.strategies import ndarrays_of_shape
+from spacy.language import Language
+from spacy.pipeline._parser_internals._beam_utils import BeamBatch
+from spacy.pipeline._parser_internals.arc_eager import ArcEager
+from spacy.pipeline._parser_internals.stateclass import StateClass
+from spacy.tokens import Doc
+from spacy.training import Example
+from spacy.vocab import Vocab
+
@pytest.fixture(scope="module")
def vocab():
diff --git a/spacy/tests/parser/test_nonproj.py b/spacy/tests/parser/test_nonproj.py
index 051d0ef0c..f4e09fc91 100644
--- a/spacy/tests/parser/test_nonproj.py
+++ b/spacy/tests/parser/test_nonproj.py
@@ -1,7 +1,12 @@
import pytest
-from spacy.pipeline._parser_internals.nonproj import ancestors, contains_cycle
-from spacy.pipeline._parser_internals.nonproj import is_nonproj_tree, is_nonproj_arc
+
from spacy.pipeline._parser_internals import nonproj
+from spacy.pipeline._parser_internals.nonproj import (
+ ancestors,
+ contains_cycle,
+ is_nonproj_arc,
+ is_nonproj_tree,
+)
from spacy.tokens import Doc
diff --git a/spacy/tests/parser/test_parse.py b/spacy/tests/parser/test_parse.py
index 4b05c6721..3565c62af 100644
--- a/spacy/tests/parser/test_parse.py
+++ b/spacy/tests/parser/test_parse.py
@@ -5,12 +5,12 @@ from thinc.api import Adam
from spacy import registry, util
from spacy.attrs import DEP, NORM
from spacy.lang.en import English
-from spacy.tokens import Doc
-from spacy.training import Example
-from spacy.vocab import Vocab
from spacy.pipeline import DependencyParser
from spacy.pipeline.dep_parser import DEFAULT_PARSER_MODEL
from spacy.pipeline.tok2vec import DEFAULT_TOK2VEC_MODEL
+from spacy.tokens import Doc
+from spacy.training import Example
+from spacy.vocab import Vocab
from ..util import apply_transition_sequence, make_tempdir
diff --git a/spacy/tests/parser/test_parse_navigate.py b/spacy/tests/parser/test_parse_navigate.py
index 50da60594..d2f684fdc 100644
--- a/spacy/tests/parser/test_parse_navigate.py
+++ b/spacy/tests/parser/test_parse_navigate.py
@@ -1,4 +1,5 @@
import pytest
+
from spacy.tokens import Doc
diff --git a/spacy/tests/parser/test_preset_sbd.py b/spacy/tests/parser/test_preset_sbd.py
index d71388900..dcbb9679d 100644
--- a/spacy/tests/parser/test_preset_sbd.py
+++ b/spacy/tests/parser/test_preset_sbd.py
@@ -1,12 +1,13 @@
import pytest
from thinc.api import Adam
-from spacy.attrs import NORM
-from spacy.vocab import Vocab
+
from spacy import registry
-from spacy.training import Example
+from spacy.attrs import NORM
+from spacy.pipeline import DependencyParser
from spacy.pipeline.dep_parser import DEFAULT_PARSER_MODEL
from spacy.tokens import Doc
-from spacy.pipeline import DependencyParser
+from spacy.training import Example
+from spacy.vocab import Vocab
@pytest.fixture
diff --git a/spacy/tests/parser/test_space_attachment.py b/spacy/tests/parser/test_space_attachment.py
index 2b80272d6..30e66b37a 100644
--- a/spacy/tests/parser/test_space_attachment.py
+++ b/spacy/tests/parser/test_space_attachment.py
@@ -1,4 +1,5 @@
import pytest
+
from spacy.tokens import Doc
from ..util import apply_transition_sequence
diff --git a/spacy/tests/parser/test_state.py b/spacy/tests/parser/test_state.py
index ca1755c48..0febc3d09 100644
--- a/spacy/tests/parser/test_state.py
+++ b/spacy/tests/parser/test_state.py
@@ -1,8 +1,8 @@
import pytest
+from spacy.pipeline._parser_internals.stateclass import StateClass
from spacy.tokens.doc import Doc
from spacy.vocab import Vocab
-from spacy.pipeline._parser_internals.stateclass import StateClass
@pytest.fixture
diff --git a/spacy/tests/pipeline/test_analysis.py b/spacy/tests/pipeline/test_analysis.py
index df3d7dff5..503b501ce 100644
--- a/spacy/tests/pipeline/test_analysis.py
+++ b/spacy/tests/pipeline/test_analysis.py
@@ -1,7 +1,8 @@
+import pytest
+from mock import Mock
+
from spacy.language import Language
from spacy.pipe_analysis import get_attr_info, validate_attrs
-from mock import Mock
-import pytest
def test_component_decorator_assigns():
diff --git a/spacy/tests/pipeline/test_annotates_on_update.py b/spacy/tests/pipeline/test_annotates_on_update.py
index 869b8b874..d4feebd30 100644
--- a/spacy/tests/pipeline/test_annotates_on_update.py
+++ b/spacy/tests/pipeline/test_annotates_on_update.py
@@ -1,12 +1,13 @@
from typing import Callable, Iterable, Iterator
-import pytest
+import pytest
from thinc.api import Config
+
+from spacy.lang.en import English
from spacy.language import Language
from spacy.training import Example
from spacy.training.loop import train
-from spacy.lang.en import English
-from spacy.util import registry, load_model_from_config
+from spacy.util import load_model_from_config, registry
@pytest.fixture
diff --git a/spacy/tests/pipeline/test_attributeruler.py b/spacy/tests/pipeline/test_attributeruler.py
index dab3ebf57..06587b4be 100644
--- a/spacy/tests/pipeline/test_attributeruler.py
+++ b/spacy/tests/pipeline/test_attributeruler.py
@@ -1,10 +1,11 @@
-import pytest
import numpy
-from spacy.training import Example
+import pytest
+
+from spacy import registry, util
from spacy.lang.en import English
from spacy.pipeline import AttributeRuler
-from spacy import util, registry
from spacy.tokens import Doc
+from spacy.training import Example
from ..util import make_tempdir
diff --git a/spacy/tests/pipeline/test_edit_tree_lemmatizer.py b/spacy/tests/pipeline/test_edit_tree_lemmatizer.py
index 128d75680..5a8f0aee2 100644
--- a/spacy/tests/pipeline/test_edit_tree_lemmatizer.py
+++ b/spacy/tests/pipeline/test_edit_tree_lemmatizer.py
@@ -1,16 +1,17 @@
import pickle
+
+import hypothesis.strategies as st
import pytest
from hypothesis import given
-import hypothesis.strategies as st
+
from spacy import util
from spacy.lang.en import English
from spacy.language import Language
from spacy.pipeline._edit_tree_internals.edit_trees import EditTrees
-from spacy.training import Example
from spacy.strings import StringStore
+from spacy.training import Example
from spacy.util import make_tempdir
-
TRAIN_DATA = [
("She likes green eggs", {"lemmas": ["she", "like", "green", "egg"]}),
("Eat blue ham", {"lemmas": ["eat", "blue", "ham"]}),
diff --git a/spacy/tests/pipeline/test_entity_linker.py b/spacy/tests/pipeline/test_entity_linker.py
index fc960cb01..00771a0f0 100644
--- a/spacy/tests/pipeline/test_entity_linker.py
+++ b/spacy/tests/pipeline/test_entity_linker.py
@@ -1,12 +1,12 @@
-from typing import Callable, Iterable, Dict, Any, Tuple
+from typing import Any, Callable, Dict, Iterable, Tuple
import pytest
from numpy.testing import assert_equal
-from spacy import registry, util, Language
+from spacy import Language, registry, util
from spacy.attrs import ENT_KB_ID
from spacy.compat import pickle
-from spacy.kb import Candidate, InMemoryLookupKB, get_candidates, KnowledgeBase
+from spacy.kb import Candidate, InMemoryLookupKB, KnowledgeBase, get_candidates
from spacy.lang.en import English
from spacy.ml import load_kb
from spacy.ml.models.entity_linker import build_span_maker
@@ -15,7 +15,7 @@ from spacy.pipeline.legacy import EntityLinker_v1
from spacy.pipeline.tok2vec import DEFAULT_TOK2VEC_MODEL
from spacy.scorer import Scorer
from spacy.tests.util import make_tempdir
-from spacy.tokens import Span, Doc
+from spacy.tokens import Doc, Span
from spacy.training import Example
from spacy.util import ensure_path
from spacy.vocab import Vocab
diff --git a/spacy/tests/pipeline/test_entity_ruler.py b/spacy/tests/pipeline/test_entity_ruler.py
index 417f930cb..d0ab00391 100644
--- a/spacy/tests/pipeline/test_entity_ruler.py
+++ b/spacy/tests/pipeline/test_entity_ruler.py
@@ -1,16 +1,14 @@
import pytest
+from thinc.api import NumpyOps, get_current_ops
from spacy import registry
-from spacy.tokens import Doc, Span
-from spacy.language import Language
-from spacy.lang.en import English
-from spacy.pipeline import EntityRuler, EntityRecognizer, merge_entities
-from spacy.pipeline import SpanRuler
-from spacy.pipeline.ner import DEFAULT_NER_MODEL
from spacy.errors import MatchPatternError
+from spacy.lang.en import English
+from spacy.language import Language
+from spacy.pipeline import EntityRecognizer, EntityRuler, SpanRuler, merge_entities
+from spacy.pipeline.ner import DEFAULT_NER_MODEL
from spacy.tests.util import make_tempdir
-
-from thinc.api import NumpyOps, get_current_ops
+from spacy.tokens import Doc, Span
ENTITY_RULERS = ["entity_ruler", "future_entity_ruler"]
diff --git a/spacy/tests/pipeline/test_functions.py b/spacy/tests/pipeline/test_functions.py
index e4adfe2fe..f4db4ee98 100644
--- a/spacy/tests/pipeline/test_functions.py
+++ b/spacy/tests/pipeline/test_functions.py
@@ -1,7 +1,8 @@
import pytest
-from spacy.pipeline.functions import merge_subtokens
+
from spacy.language import Language
-from spacy.tokens import Span, Doc
+from spacy.pipeline.functions import merge_subtokens
+from spacy.tokens import Doc, Span
from ..doc.test_underscore import clean_underscore # noqa: F401
diff --git a/spacy/tests/pipeline/test_initialize.py b/spacy/tests/pipeline/test_initialize.py
index c9b514770..6dd4114f1 100644
--- a/spacy/tests/pipeline/test_initialize.py
+++ b/spacy/tests/pipeline/test_initialize.py
@@ -1,9 +1,10 @@
import pytest
-from spacy.language import Language
-from spacy.lang.en import English
-from spacy.training import Example
-from thinc.api import ConfigValidationError
from pydantic import StrictBool
+from thinc.api import ConfigValidationError
+
+from spacy.lang.en import English
+from spacy.language import Language
+from spacy.training import Example
def test_initialize_arguments():
diff --git a/spacy/tests/pipeline/test_lemmatizer.py b/spacy/tests/pipeline/test_lemmatizer.py
index 0d2d3d6e5..ccc2e0b15 100644
--- a/spacy/tests/pipeline/test_lemmatizer.py
+++ b/spacy/tests/pipeline/test_lemmatizer.py
@@ -1,6 +1,8 @@
-import pytest
import pickle
-from spacy import util, registry
+
+import pytest
+
+from spacy import registry, util
from spacy.lang.en import English
from spacy.lookups import Lookups
diff --git a/spacy/tests/pipeline/test_models.py b/spacy/tests/pipeline/test_models.py
index e3fd28d0f..fef0017a8 100644
--- a/spacy/tests/pipeline/test_models.py
+++ b/spacy/tests/pipeline/test_models.py
@@ -3,7 +3,6 @@ from typing import List
import numpy
import pytest
from numpy.testing import assert_almost_equal
-from spacy.vocab import Vocab
from thinc.api import Model, data_validation, get_current_ops
from thinc.types import Array2d, Ragged
@@ -11,7 +10,7 @@ from spacy.lang.en import English
from spacy.ml import FeatureExtractor, StaticVectors
from spacy.ml._character_embed import CharacterEmbed
from spacy.tokens import Doc
-
+from spacy.vocab import Vocab
OPS = get_current_ops()
diff --git a/spacy/tests/pipeline/test_morphologizer.py b/spacy/tests/pipeline/test_morphologizer.py
index 74c571ccf..0d895f236 100644
--- a/spacy/tests/pipeline/test_morphologizer.py
+++ b/spacy/tests/pipeline/test_morphologizer.py
@@ -1,16 +1,15 @@
import pytest
-from numpy.testing import assert_equal, assert_almost_equal
-
+from numpy.testing import assert_almost_equal, assert_equal
from thinc.api import get_current_ops
from spacy import util
-from spacy.training import Example
+from spacy.attrs import MORPH
from spacy.lang.en import English
from spacy.language import Language
-from spacy.tests.util import make_tempdir
from spacy.morphology import Morphology
-from spacy.attrs import MORPH
+from spacy.tests.util import make_tempdir
from spacy.tokens import Doc
+from spacy.training import Example
def test_label_types():
diff --git a/spacy/tests/pipeline/test_pipe_factories.py b/spacy/tests/pipeline/test_pipe_factories.py
index 232b0512e..0f1454b55 100644
--- a/spacy/tests/pipeline/test_pipe_factories.py
+++ b/spacy/tests/pipeline/test_pipe_factories.py
@@ -1,14 +1,14 @@
import pytest
+from pydantic import StrictInt, StrictStr
+from thinc.api import ConfigValidationError, Linear, Model
import spacy
-from spacy.language import Language
-from spacy.lang.en import English
from spacy.lang.de import German
+from spacy.lang.en import English
+from spacy.language import Language
from spacy.pipeline.tok2vec import DEFAULT_TOK2VEC_MODEL
from spacy.tokens import Doc
-from spacy.util import registry, SimpleFrozenDict, combine_score_weights
-from thinc.api import Model, Linear, ConfigValidationError
-from pydantic import StrictInt, StrictStr
+from spacy.util import SimpleFrozenDict, combine_score_weights, registry
from ..util import make_tempdir
diff --git a/spacy/tests/pipeline/test_sentencizer.py b/spacy/tests/pipeline/test_sentencizer.py
index 5dd0fef43..9b1ddd530 100644
--- a/spacy/tests/pipeline/test_sentencizer.py
+++ b/spacy/tests/pipeline/test_sentencizer.py
@@ -1,8 +1,9 @@
import pytest
+
import spacy
+from spacy.lang.en import English
from spacy.pipeline import Sentencizer
from spacy.tokens import Doc
-from spacy.lang.en import English
def test_sentencizer(en_vocab):
diff --git a/spacy/tests/pipeline/test_senter.py b/spacy/tests/pipeline/test_senter.py
index 047f59bef..6c7655812 100644
--- a/spacy/tests/pipeline/test_senter.py
+++ b/spacy/tests/pipeline/test_senter.py
@@ -1,12 +1,12 @@
import pytest
from numpy.testing import assert_equal
-from spacy.attrs import SENT_START
from spacy import util
-from spacy.training import Example
+from spacy.attrs import SENT_START
from spacy.lang.en import English
from spacy.language import Language
from spacy.tests.util import make_tempdir
+from spacy.training import Example
def test_label_types():
diff --git a/spacy/tests/pipeline/test_span_finder.py b/spacy/tests/pipeline/test_span_finder.py
index 91b08cabf..1a8789fff 100644
--- a/spacy/tests/pipeline/test_span_finder.py
+++ b/spacy/tests/pipeline/test_span_finder.py
@@ -1,15 +1,13 @@
import pytest
from thinc.api import Config
-from spacy.language import Language
+from spacy import util
from spacy.lang.en import English
+from spacy.language import Language
from spacy.pipeline.span_finder import span_finder_default_config
from spacy.tokens import Doc
from spacy.training import Example
-from spacy import util
-from spacy.util import registry
-from spacy.util import fix_random_seed, make_tempdir
-
+from spacy.util import fix_random_seed, make_tempdir, registry
SPANS_KEY = "pytest"
TRAIN_DATA = [
diff --git a/spacy/tests/pipeline/test_span_ruler.py b/spacy/tests/pipeline/test_span_ruler.py
index 794815359..0a8616f44 100644
--- a/spacy/tests/pipeline/test_span_ruler.py
+++ b/spacy/tests/pipeline/test_span_ruler.py
@@ -1,13 +1,12 @@
import pytest
+from thinc.api import NumpyOps, get_current_ops
import spacy
from spacy import registry
from spacy.errors import MatchPatternError
+from spacy.tests.util import make_tempdir
from spacy.tokens import Span
from spacy.training import Example
-from spacy.tests.util import make_tempdir
-
-from thinc.api import NumpyOps, get_current_ops
@pytest.fixture
diff --git a/spacy/tests/pipeline/test_spancat.py b/spacy/tests/pipeline/test_spancat.py
index b7024cf36..9405a78e0 100644
--- a/spacy/tests/pipeline/test_spancat.py
+++ b/spacy/tests/pipeline/test_spancat.py
@@ -1,7 +1,7 @@
-import pytest
import numpy
-from numpy.testing import assert_array_equal, assert_almost_equal
-from thinc.api import get_current_ops, NumpyOps, Ragged
+import pytest
+from numpy.testing import assert_almost_equal, assert_array_equal
+from thinc.api import NumpyOps, Ragged, get_current_ops
from spacy import util
from spacy.lang.en import English
@@ -9,7 +9,7 @@ from spacy.language import Language
from spacy.tokens import SpanGroup
from spacy.tokens._dict_proxies import SpanGroups
from spacy.training import Example
-from spacy.util import fix_random_seed, registry, make_tempdir
+from spacy.util import fix_random_seed, make_tempdir, registry
OPS = get_current_ops()
diff --git a/spacy/tests/pipeline/test_tagger.py b/spacy/tests/pipeline/test_tagger.py
index 746f32ee3..4b5f1ee99 100644
--- a/spacy/tests/pipeline/test_tagger.py
+++ b/spacy/tests/pipeline/test_tagger.py
@@ -1,12 +1,12 @@
import pytest
-from numpy.testing import assert_equal, assert_almost_equal
-from spacy.attrs import TAG
+from numpy.testing import assert_almost_equal, assert_equal
+from thinc.api import compounding, get_current_ops
from spacy import util
-from spacy.training import Example
+from spacy.attrs import TAG
from spacy.lang.en import English
from spacy.language import Language
-from thinc.api import compounding, get_current_ops
+from spacy.training import Example
from ..util import make_tempdir
diff --git a/spacy/tests/pipeline/test_textcat.py b/spacy/tests/pipeline/test_textcat.py
index d042f3445..9ce5909f1 100644
--- a/spacy/tests/pipeline/test_textcat.py
+++ b/spacy/tests/pipeline/test_textcat.py
@@ -12,12 +12,16 @@ from spacy.cli.evaluate import print_prf_per_type, print_textcats_auc_per_cat
from spacy.lang.en import English
from spacy.language import Language
from spacy.pipeline import TextCategorizer
-from spacy.pipeline.textcat import single_label_bow_config
-from spacy.pipeline.textcat import single_label_cnn_config
-from spacy.pipeline.textcat import single_label_default_config
-from spacy.pipeline.textcat_multilabel import multi_label_bow_config
-from spacy.pipeline.textcat_multilabel import multi_label_cnn_config
-from spacy.pipeline.textcat_multilabel import multi_label_default_config
+from spacy.pipeline.textcat import (
+ single_label_bow_config,
+ single_label_cnn_config,
+ single_label_default_config,
+)
+from spacy.pipeline.textcat_multilabel import (
+ multi_label_bow_config,
+ multi_label_cnn_config,
+ multi_label_default_config,
+)
from spacy.pipeline.tok2vec import DEFAULT_TOK2VEC_MODEL
from spacy.scorer import Scorer
from spacy.tokens import Doc, DocBin
diff --git a/spacy/tests/pipeline/test_tok2vec.py b/spacy/tests/pipeline/test_tok2vec.py
index e423d9a19..76c7d6f62 100644
--- a/spacy/tests/pipeline/test_tok2vec.py
+++ b/spacy/tests/pipeline/test_tok2vec.py
@@ -1,17 +1,21 @@
import pytest
-from spacy.ml.models.tok2vec import build_Tok2Vec_model
-from spacy.ml.models.tok2vec import MultiHashEmbed, MaxoutWindowEncoder
-from spacy.pipeline.tok2vec import Tok2Vec, Tok2VecListener
-from spacy.vocab import Vocab
-from spacy.tokens import Doc
-from spacy.training import Example
+from numpy.testing import assert_array_equal
+from thinc.api import Config, get_current_ops
+
from spacy import util
from spacy.lang.en import English
+from spacy.ml.models.tok2vec import (
+ MaxoutWindowEncoder,
+ MultiHashEmbed,
+ build_Tok2Vec_model,
+)
+from spacy.pipeline.tok2vec import Tok2Vec, Tok2VecListener
+from spacy.tokens import Doc
+from spacy.training import Example
from spacy.util import registry
-from thinc.api import Config, get_current_ops
-from numpy.testing import assert_array_equal
+from spacy.vocab import Vocab
-from ..util import get_batch, make_tempdir, add_vecs_to_vocab
+from ..util import add_vecs_to_vocab, get_batch, make_tempdir
def test_empty_doc():
diff --git a/spacy/tests/serialize/test_resource_warning.py b/spacy/tests/serialize/test_resource_warning.py
index befd05635..ab6e6e9ee 100644
--- a/spacy/tests/serialize/test_resource_warning.py
+++ b/spacy/tests/serialize/test_resource_warning.py
@@ -1,12 +1,14 @@
import warnings
from unittest import TestCase
+
import pytest
import srsly
from numpy import zeros
+
from spacy.kb.kb_in_memory import InMemoryLookupKB, Writer
-from spacy.vectors import Vectors
from spacy.language import Language
from spacy.pipeline import TrainablePipe
+from spacy.vectors import Vectors
from spacy.vocab import Vocab
from ..util import make_tempdir
diff --git a/spacy/tests/serialize/test_serialize_config.py b/spacy/tests/serialize/test_serialize_config.py
index 85e6f8b2c..3e158ad8b 100644
--- a/spacy/tests/serialize/test_serialize_config.py
+++ b/spacy/tests/serialize/test_serialize_config.py
@@ -5,13 +5,20 @@ from thinc.api import Config, ConfigValidationError
import spacy
from spacy.lang.de import German
from spacy.lang.en import English
-from spacy.language import DEFAULT_CONFIG, DEFAULT_CONFIG_PRETRAIN_PATH
-from spacy.language import Language
-from spacy.ml.models import MaxoutWindowEncoder, MultiHashEmbed
-from spacy.ml.models import build_tb_parser_model, build_Tok2Vec_model
+from spacy.language import DEFAULT_CONFIG, DEFAULT_CONFIG_PRETRAIN_PATH, Language
+from spacy.ml.models import (
+ MaxoutWindowEncoder,
+ MultiHashEmbed,
+ build_tb_parser_model,
+ build_Tok2Vec_model,
+)
from spacy.schemas import ConfigSchema, ConfigSchemaPretrain
-from spacy.util import load_config, load_config_from_str
-from spacy.util import load_model_from_config, registry
+from spacy.util import (
+ load_config,
+ load_config_from_str,
+ load_model_from_config,
+ registry,
+)
from ..util import make_tempdir
diff --git a/spacy/tests/serialize/test_serialize_extension_attrs.py b/spacy/tests/serialize/test_serialize_extension_attrs.py
index 9cfa1a552..f3b6cb000 100644
--- a/spacy/tests/serialize/test_serialize_extension_attrs.py
+++ b/spacy/tests/serialize/test_serialize_extension_attrs.py
@@ -1,4 +1,5 @@
import pytest
+
from spacy.tokens import Doc, Token
from spacy.vocab import Vocab
diff --git a/spacy/tests/serialize/test_serialize_kb.py b/spacy/tests/serialize/test_serialize_kb.py
index f9d2e226b..99eb8cd86 100644
--- a/spacy/tests/serialize/test_serialize_kb.py
+++ b/spacy/tests/serialize/test_serialize_kb.py
@@ -1,16 +1,16 @@
from pathlib import Path
-from typing import Callable, Iterable, Any, Dict
+from typing import Any, Callable, Dict, Iterable
import srsly
-
-from spacy import util, Errors
-from spacy.util import ensure_path, registry, load_model_from_config, SimpleFrozenList
-from spacy.kb.kb_in_memory import InMemoryLookupKB
-from spacy.vocab import Vocab
+from numpy import zeros
from thinc.api import Config
+from spacy import Errors, util
+from spacy.kb.kb_in_memory import InMemoryLookupKB
+from spacy.util import SimpleFrozenList, ensure_path, load_model_from_config, registry
+from spacy.vocab import Vocab
+
from ..util import make_tempdir
-from numpy import zeros
def test_serialize_kb_disk(en_vocab):
diff --git a/spacy/tests/serialize/test_serialize_language.py b/spacy/tests/serialize/test_serialize_language.py
index c03287548..9c36015a9 100644
--- a/spacy/tests/serialize/test_serialize_language.py
+++ b/spacy/tests/serialize/test_serialize_language.py
@@ -1,11 +1,11 @@
-import re
import pickle
+import re
import pytest
-from spacy.language import Language
-from spacy.lang.it import Italian
from spacy.lang.en import English
+from spacy.lang.it import Italian
+from spacy.language import Language
from spacy.tokenizer import Tokenizer
from spacy.training import Example
from spacy.util import load_config_from_str
diff --git a/spacy/tests/serialize/test_serialize_pipeline.py b/spacy/tests/serialize/test_serialize_pipeline.py
index 9fcf18e2d..6bbe743a1 100644
--- a/spacy/tests/serialize/test_serialize_pipeline.py
+++ b/spacy/tests/serialize/test_serialize_pipeline.py
@@ -8,15 +8,21 @@ import spacy
from spacy import Vocab, load, registry
from spacy.lang.en import English
from spacy.language import Language
-from spacy.pipeline import DependencyParser, EntityRecognizer, EntityRuler
-from spacy.pipeline import SentenceRecognizer, Tagger, TextCategorizer
-from spacy.pipeline import TrainablePipe
+from spacy.pipeline import (
+ DependencyParser,
+ EntityRecognizer,
+ EntityRuler,
+ SentenceRecognizer,
+ Tagger,
+ TextCategorizer,
+ TrainablePipe,
+)
from spacy.pipeline.dep_parser import DEFAULT_PARSER_MODEL
from spacy.pipeline.senter import DEFAULT_SENTER_MODEL
from spacy.pipeline.tagger import DEFAULT_TAGGER_MODEL
from spacy.pipeline.textcat import DEFAULT_SINGLE_TEXTCAT_MODEL
-from spacy.util import ensure_path, load_model
from spacy.tokens import Span
+from spacy.util import ensure_path, load_model
from ..util import make_tempdir
diff --git a/spacy/tests/serialize/test_serialize_tokenizer.py b/spacy/tests/serialize/test_serialize_tokenizer.py
index 9b74d7721..e998a78b4 100644
--- a/spacy/tests/serialize/test_serialize_tokenizer.py
+++ b/spacy/tests/serialize/test_serialize_tokenizer.py
@@ -7,8 +7,13 @@ from spacy.attrs import ENT_IOB, ENT_TYPE
from spacy.lang.en import English
from spacy.tokenizer import Tokenizer
from spacy.tokens import Doc
-from spacy.util import compile_infix_regex, compile_prefix_regex
-from spacy.util import compile_suffix_regex, get_lang_class, load_model
+from spacy.util import (
+ compile_infix_regex,
+ compile_prefix_regex,
+ compile_suffix_regex,
+ get_lang_class,
+ load_model,
+)
from ..util import assert_packed_msg_equal, make_tempdir
diff --git a/spacy/tests/test_architectures.py b/spacy/tests/test_architectures.py
index 26eabd4e5..3b5804a69 100644
--- a/spacy/tests/test_architectures.py
+++ b/spacy/tests/test_architectures.py
@@ -1,7 +1,8 @@
import pytest
-from spacy import registry
-from thinc.api import Linear
from catalogue import RegistryError
+from thinc.api import Linear
+
+from spacy import registry
def test_get_architecture():
diff --git a/spacy/tests/test_cli.py b/spacy/tests/test_cli.py
index 351e6bf11..88d3ffa45 100644
--- a/spacy/tests/test_cli.py
+++ b/spacy/tests/test_cli.py
@@ -1,43 +1,51 @@
-import os
import math
-from collections import Counter
-from typing import Tuple, List, Dict, Any
+import os
import time
+from collections import Counter
from pathlib import Path
+from typing import Any, Dict, List, Tuple
-import spacy
import numpy
import pytest
import srsly
from click import NoSuchOption
from packaging.specifiers import SpecifierSet
from thinc.api import Config, ConfigValidationError
-from spacy.tokens import DocBin
+import spacy
from spacy import about
from spacy.cli import info
-from spacy.cli._util import is_subpath_of, load_project_config, walk_directory
-from spacy.cli._util import parse_config_overrides, string_to_list
-from spacy.cli._util import substitute_project_variables
-from spacy.cli._util import validate_project_commands
-from spacy.cli._util import upload_file, download_file
-from spacy.cli.debug_data import _compile_gold, _get_labels_from_model
-from spacy.cli.debug_data import _get_labels_from_spancat
-from spacy.cli.debug_data import _get_distribution, _get_kl_divergence
-from spacy.cli.debug_data import _get_span_characteristics
-from spacy.cli.debug_data import _print_span_characteristics
-from spacy.cli.debug_data import _get_spans_length_freq_dist
+from spacy.cli._util import (
+ download_file,
+ is_subpath_of,
+ load_project_config,
+ parse_config_overrides,
+ string_to_list,
+ substitute_project_variables,
+ upload_file,
+ validate_project_commands,
+ walk_directory,
+)
+from spacy.cli.apply import apply
+from spacy.cli.debug_data import (
+ _compile_gold,
+ _get_distribution,
+ _get_kl_divergence,
+ _get_labels_from_model,
+ _get_labels_from_spancat,
+ _get_span_characteristics,
+ _get_spans_length_freq_dist,
+ _print_span_characteristics,
+)
from spacy.cli.download import get_compatibility, get_version
from spacy.cli.evaluate import render_parses
-from spacy.cli.init_config import RECOMMENDATIONS, init_config, fill_config
+from spacy.cli.find_threshold import find_threshold
+from spacy.cli.init_config import RECOMMENDATIONS, fill_config, init_config
from spacy.cli.init_pipeline import _init_labels
-from spacy.cli.package import get_third_party_dependencies
-from spacy.cli.package import _is_permitted_package_name
+from spacy.cli.package import _is_permitted_package_name, get_third_party_dependencies
from spacy.cli.project.remote_storage import RemoteStorage
from spacy.cli.project.run import _check_requirements
from spacy.cli.validate import get_model_pkgs
-from spacy.cli.apply import apply
-from spacy.cli.find_threshold import find_threshold
from spacy.lang.en import English
from spacy.lang.nl import Dutch
from spacy.language import Language
@@ -45,9 +53,8 @@ from spacy.schemas import ProjectConfigSchema, RecommendationSchema, validate
from spacy.tokens import Doc, DocBin
from spacy.tokens.span import Span
from spacy.training import Example, docs_to_json, offsets_to_biluo_tags
-from spacy.training.converters import conll_ner_to_docs, conllu_to_docs
-from spacy.training.converters import iob_to_docs
-from spacy.util import ENV_VARS, get_minor_version, load_model_from_config, load_config
+from spacy.training.converters import conll_ner_to_docs, conllu_to_docs, iob_to_docs
+from spacy.util import ENV_VARS, get_minor_version, load_config, load_model_from_config
from .util import make_tempdir
diff --git a/spacy/tests/test_cli_app.py b/spacy/tests/test_cli_app.py
index 5ff4dfa26..3a426113b 100644
--- a/spacy/tests/test_cli_app.py
+++ b/spacy/tests/test_cli_app.py
@@ -1,11 +1,13 @@
import os
from pathlib import Path
+
import pytest
import srsly
from typer.testing import CliRunner
-from spacy.tokens import DocBin, Doc
from spacy.cli._util import app, get_git_version
+from spacy.tokens import Doc, DocBin
+
from .util import make_tempdir, normalize_whitespace
diff --git a/spacy/tests/test_displacy.py b/spacy/tests/test_displacy.py
index 837a92e02..ce103068a 100644
--- a/spacy/tests/test_displacy.py
+++ b/spacy/tests/test_displacy.py
@@ -5,7 +5,7 @@ from spacy import displacy
from spacy.displacy.render import DependencyRenderer, EntityRenderer
from spacy.lang.en import English
from spacy.lang.fa import Persian
-from spacy.tokens import Span, Doc
+from spacy.tokens import Doc, Span
@pytest.mark.issue(2361)
diff --git a/spacy/tests/test_language.py b/spacy/tests/test_language.py
index 236856dad..51eec3239 100644
--- a/spacy/tests/test_language.py
+++ b/spacy/tests/test_language.py
@@ -1,21 +1,22 @@
import itertools
import logging
from unittest import mock
+
import pytest
+from thinc.api import CupyOps, NumpyOps, get_current_ops
+
+import spacy
+from spacy.lang.de import German
+from spacy.lang.en import English
from spacy.language import Language
from spacy.scorer import Scorer
from spacy.tokens import Doc, Span
-from spacy.vocab import Vocab
from spacy.training import Example
-from spacy.lang.en import English
-from spacy.lang.de import German
-from spacy.util import registry, ignore_error, raise_error, find_matching_language
-import spacy
-from thinc.api import CupyOps, NumpyOps, get_current_ops
+from spacy.util import find_matching_language, ignore_error, raise_error, registry
+from spacy.vocab import Vocab
from .util import add_vecs_to_vocab, assert_docs_equal
-
try:
import torch
diff --git a/spacy/tests/test_misc.py b/spacy/tests/test_misc.py
index 618f17334..19163d350 100644
--- a/spacy/tests/test_misc.py
+++ b/spacy/tests/test_misc.py
@@ -1,24 +1,39 @@
-import pytest
-import os
import ctypes
+import os
from pathlib import Path
-from spacy.about import __version__ as spacy_version
-from spacy import util
-from spacy import prefer_gpu, require_gpu, require_cpu
-from spacy.ml._precomputable_affine import PrecomputableAffine
-from spacy.ml._precomputable_affine import _backprop_precomputable_affine_padding
-from spacy.util import dot_to_object, SimpleFrozenList, import_file
-from spacy.util import to_ternary_int, find_available_port
-from thinc.api import Config, Optimizer, ConfigValidationError
-from thinc.api import get_current_ops, set_current_ops, NumpyOps, CupyOps, MPSOps
+
+import pytest
+from pydantic import ValidationError
+from thinc.api import (
+ Config,
+ ConfigValidationError,
+ CupyOps,
+ MPSOps,
+ NumpyOps,
+ Optimizer,
+ get_current_ops,
+ set_current_ops,
+)
from thinc.compat import has_cupy_gpu, has_torch_mps_gpu
-from spacy.training.batchers import minibatch_by_words
+
+from spacy import prefer_gpu, require_cpu, require_gpu, util
+from spacy.about import __version__ as spacy_version
from spacy.lang.en import English
from spacy.lang.nl import Dutch
from spacy.language import DEFAULT_CONFIG_PATH
+from spacy.ml._precomputable_affine import (
+ PrecomputableAffine,
+ _backprop_precomputable_affine_padding,
+)
from spacy.schemas import ConfigSchemaTraining, TokenPattern, TokenPatternSchema
-from pydantic import ValidationError
-
+from spacy.training.batchers import minibatch_by_words
+from spacy.util import (
+ SimpleFrozenList,
+ dot_to_object,
+ find_available_port,
+ import_file,
+ to_ternary_int,
+)
from .util import get_random_doc, make_tempdir
@@ -441,7 +456,7 @@ def test_find_available_port():
port = 5000
assert find_available_port(port, host) == port, "Port 5000 isn't free"
- from wsgiref.simple_server import make_server, demo_app
+ from wsgiref.simple_server import demo_app, make_server
with make_server(host, port, demo_app) as httpd:
with pytest.warns(UserWarning, match="already in use"):
diff --git a/spacy/tests/test_models.py b/spacy/tests/test_models.py
index d91ed1201..e6692ad92 100644
--- a/spacy/tests/test_models.py
+++ b/spacy/tests/test_models.py
@@ -1,16 +1,31 @@
from typing import List
-import pytest
-from thinc.api import fix_random_seed, Adam, set_dropout_rate
-from thinc.api import Ragged, reduce_mean, Logistic, chain, Relu
-from numpy.testing import assert_array_equal, assert_array_almost_equal
+
import numpy
-from spacy.ml.models import build_Tok2Vec_model, MultiHashEmbed, MaxoutWindowEncoder
-from spacy.ml.models import build_bow_text_classifier, build_simple_cnn_text_classifier
-from spacy.ml.models import build_spancat_model
-from spacy.ml.staticvectors import StaticVectors
-from spacy.ml.extract_spans import extract_spans, _get_span_indices
+import pytest
+from numpy.testing import assert_array_almost_equal, assert_array_equal
+from thinc.api import (
+ Adam,
+ Logistic,
+ Ragged,
+ Relu,
+ chain,
+ fix_random_seed,
+ reduce_mean,
+ set_dropout_rate,
+)
+
from spacy.lang.en import English
from spacy.lang.en.examples import sentences as EN_SENTENCES
+from spacy.ml.extract_spans import _get_span_indices, extract_spans
+from spacy.ml.models import (
+ MaxoutWindowEncoder,
+ MultiHashEmbed,
+ build_bow_text_classifier,
+ build_simple_cnn_text_classifier,
+ build_spancat_model,
+ build_Tok2Vec_model,
+)
+from spacy.ml.staticvectors import StaticVectors
def get_textcat_bow_kwargs():
diff --git a/spacy/tests/test_pickles.py b/spacy/tests/test_pickles.py
index 0c56ae0d2..e3acd27a3 100644
--- a/spacy/tests/test_pickles.py
+++ b/spacy/tests/test_pickles.py
@@ -1,11 +1,12 @@
-import pytest
import numpy
+import pytest
import srsly
+
+from spacy.attrs import NORM
from spacy.lang.en import English
from spacy.strings import StringStore
from spacy.tokens import Doc
from spacy.vocab import Vocab
-from spacy.attrs import NORM
@pytest.mark.parametrize("text1,text2", [("hello", "bye")])
diff --git a/spacy/tests/test_scorer.py b/spacy/tests/test_scorer.py
index f95c44149..95daf046c 100644
--- a/spacy/tests/test_scorer.py
+++ b/spacy/tests/test_scorer.py
@@ -1,13 +1,12 @@
-from numpy.testing import assert_almost_equal, assert_array_almost_equal
import pytest
+from numpy.testing import assert_almost_equal, assert_array_almost_equal
from pytest import approx
+
+from spacy.lang.en import English
+from spacy.scorer import PRFScore, ROCAUCScore, Scorer, _roc_auc_score, _roc_curve
+from spacy.tokens import Doc, Span
from spacy.training import Example
from spacy.training.iob_utils import offsets_to_biluo_tags
-from spacy.scorer import Scorer, ROCAUCScore, PRFScore
-from spacy.scorer import _roc_auc_score, _roc_curve
-from spacy.lang.en import English
-from spacy.tokens import Doc, Span
-
test_las_apple = [
[
diff --git a/spacy/tests/tokenizer/test_exceptions.py b/spacy/tests/tokenizer/test_exceptions.py
index 85716377a..1f8f52c79 100644
--- a/spacy/tests/tokenizer/test_exceptions.py
+++ b/spacy/tests/tokenizer/test_exceptions.py
@@ -1,4 +1,5 @@
import sys
+
import pytest
diff --git a/spacy/tests/tokenizer/test_tokenizer.py b/spacy/tests/tokenizer/test_tokenizer.py
index 6af58b344..1ea5f78c9 100644
--- a/spacy/tests/tokenizer/test_tokenizer.py
+++ b/spacy/tests/tokenizer/test_tokenizer.py
@@ -3,15 +3,19 @@ import re
import numpy
import pytest
-from spacy.lang.en import English
from spacy.lang.de import German
+from spacy.lang.en import English
+from spacy.symbols import ORTH
from spacy.tokenizer import Tokenizer
from spacy.tokens import Doc
from spacy.training import Example
-from spacy.util import compile_prefix_regex, compile_suffix_regex, ensure_path
-from spacy.util import compile_infix_regex
+from spacy.util import (
+ compile_infix_regex,
+ compile_prefix_regex,
+ compile_suffix_regex,
+ ensure_path,
+)
from spacy.vocab import Vocab
-from spacy.symbols import ORTH
@pytest.mark.issue(743)
diff --git a/spacy/tests/tokenizer/test_urls.py b/spacy/tests/tokenizer/test_urls.py
index 57e970f87..ff8812be1 100644
--- a/spacy/tests/tokenizer/test_urls.py
+++ b/spacy/tests/tokenizer/test_urls.py
@@ -2,7 +2,6 @@ import pytest
from spacy.lang.tokenizer_exceptions import BASE_EXCEPTIONS
-
URLS_BASIC = [
"http://www.nytimes.com/2016/04/20/us/politics/new-york-primary-preview.html?hp&action=click&pgtype=Homepage&clickSource=story-heading&module=a-lede-package-region®ion=top-news&WT.nav=top-news&_r=0",
"www.red-stars.com",
diff --git a/spacy/tests/training/test_augmenters.py b/spacy/tests/training/test_augmenters.py
index 35860a199..49a83010b 100644
--- a/spacy/tests/training/test_augmenters.py
+++ b/spacy/tests/training/test_augmenters.py
@@ -1,13 +1,17 @@
-import pytest
-from spacy.pipeline._parser_internals.nonproj import contains_cycle
-from spacy.training import Corpus, Example
-from spacy.training.augment import create_orth_variants_augmenter
-from spacy.training.augment import create_lower_casing_augmenter
-from spacy.training.augment import make_whitespace_variant
-from spacy.lang.en import English
-from spacy.tokens import DocBin, Doc, Span
-from contextlib import contextmanager
import random
+from contextlib import contextmanager
+
+import pytest
+
+from spacy.lang.en import English
+from spacy.pipeline._parser_internals.nonproj import contains_cycle
+from spacy.tokens import Doc, DocBin, Span
+from spacy.training import Corpus, Example
+from spacy.training.augment import (
+ create_lower_casing_augmenter,
+ create_orth_variants_augmenter,
+ make_whitespace_variant,
+)
from ..util import make_tempdir
diff --git a/spacy/tests/training/test_corpus.py b/spacy/tests/training/test_corpus.py
index b4f9cc13a..e7cae9893 100644
--- a/spacy/tests/training/test_corpus.py
+++ b/spacy/tests/training/test_corpus.py
@@ -1,8 +1,9 @@
-from typing import IO, Generator, Iterable, List, TextIO, Tuple
+import tempfile
from contextlib import contextmanager
from pathlib import Path
+from typing import IO, Generator, Iterable, List, TextIO, Tuple
+
import pytest
-import tempfile
from spacy.lang.en import English
from spacy.training import Example, PlainTextCorpus
diff --git a/spacy/tests/training/test_logger.py b/spacy/tests/training/test_logger.py
index 0dfd0cbf4..48750026b 100644
--- a/spacy/tests/training/test_logger.py
+++ b/spacy/tests/training/test_logger.py
@@ -1,6 +1,6 @@
import pytest
-import spacy
+import spacy
from spacy.training import loggers
diff --git a/spacy/tests/training/test_new_example.py b/spacy/tests/training/test_new_example.py
index 6b15603b3..88f819984 100644
--- a/spacy/tests/training/test_new_example.py
+++ b/spacy/tests/training/test_new_example.py
@@ -1,8 +1,9 @@
import pytest
-from spacy.training.example import Example
+
from spacy.tokens import Doc
-from spacy.vocab import Vocab
+from spacy.training.example import Example
from spacy.util import to_ternary_int
+from spacy.vocab import Vocab
def test_Example_init_requires_doc_objects():
diff --git a/spacy/tests/training/test_pretraining.py b/spacy/tests/training/test_pretraining.py
index 6cfdeed20..5e5f94622 100644
--- a/spacy/tests/training/test_pretraining.py
+++ b/spacy/tests/training/test_pretraining.py
@@ -1,4 +1,5 @@
from pathlib import Path
+
import numpy as np
import pytest
import srsly
@@ -6,14 +7,15 @@ from thinc.api import Config, get_current_ops
from spacy import util
from spacy.lang.en import English
+from spacy.language import DEFAULT_CONFIG_PATH, DEFAULT_CONFIG_PRETRAIN_PATH
+from spacy.ml.models.multi_task import create_pretrain_vectors
+from spacy.tokens import Doc, DocBin
from spacy.training.initialize import init_nlp
from spacy.training.loop import train
from spacy.training.pretrain import pretrain
-from spacy.tokens import Doc, DocBin
-from spacy.language import DEFAULT_CONFIG_PRETRAIN_PATH, DEFAULT_CONFIG_PATH
-from spacy.ml.models.multi_task import create_pretrain_vectors
from spacy.vectors import Vectors
from spacy.vocab import Vocab
+
from ..util import make_tempdir
pretrain_string_listener = """
diff --git a/spacy/tests/training/test_readers.py b/spacy/tests/training/test_readers.py
index 8c5c81625..22cf75272 100644
--- a/spacy/tests/training/test_readers.py
+++ b/spacy/tests/training/test_readers.py
@@ -1,10 +1,12 @@
-from typing import Dict, Iterable, Callable
+from typing import Callable, Dict, Iterable
+
import pytest
from thinc.api import Config, fix_random_seed
+
from spacy import Language
-from spacy.util import load_model_from_config, registry, resolve_dot_names
from spacy.schemas import ConfigSchemaTraining
from spacy.training import Example
+from spacy.util import load_model_from_config, registry, resolve_dot_names
def test_readers():
diff --git a/spacy/tests/training/test_rehearse.py b/spacy/tests/training/test_rehearse.py
index 5ac7fc217..7efe57a36 100644
--- a/spacy/tests/training/test_rehearse.py
+++ b/spacy/tests/training/test_rehearse.py
@@ -1,9 +1,9 @@
-import pytest
-import spacy
-
from typing import List
-from spacy.training import Example
+import pytest
+
+import spacy
+from spacy.training import Example
TRAIN_DATA = [
(
diff --git a/spacy/tests/training/test_training.py b/spacy/tests/training/test_training.py
index 7933ea31f..a492a8be3 100644
--- a/spacy/tests/training/test_training.py
+++ b/spacy/tests/training/test_training.py
@@ -2,20 +2,32 @@ import random
import numpy
import pytest
-import spacy
import srsly
+from thinc.api import Adam, compounding
+
+import spacy
from spacy.lang.en import English
from spacy.tokens import Doc, DocBin
-from spacy.training import Alignment, Corpus, Example, biluo_tags_to_offsets
-from spacy.training import biluo_tags_to_spans, docs_to_json, iob_to_biluo
-from spacy.training import offsets_to_biluo_tags
-from spacy.training.alignment_array import AlignmentArray
+from spacy.training import (
+ Alignment,
+ Corpus,
+ Example,
+ biluo_tags_to_offsets,
+ biluo_tags_to_spans,
+ docs_to_json,
+ iob_to_biluo,
+ offsets_to_biluo_tags,
+)
from spacy.training.align import get_alignments
+from spacy.training.alignment_array import AlignmentArray
from spacy.training.converters import json_to_docs
from spacy.training.loop import train_while_improving
-from spacy.util import get_words_and_spaces, load_model_from_path, minibatch
-from spacy.util import load_config_from_str
-from thinc.api import compounding, Adam
+from spacy.util import (
+ get_words_and_spaces,
+ load_config_from_str,
+ load_model_from_path,
+ minibatch,
+)
from ..util import make_tempdir
diff --git a/spacy/tests/util.py b/spacy/tests/util.py
index c2647558d..a5548898c 100644
--- a/spacy/tests/util.py
+++ b/spacy/tests/util.py
@@ -1,14 +1,16 @@
-import numpy
-import tempfile
import contextlib
import re
+import tempfile
+
+import numpy
import srsly
-from spacy.tokens import Doc
-from spacy.vocab import Vocab
-from spacy.util import make_tempdir # noqa: F401
-from spacy.training import split_bilu_label
from thinc.api import get_current_ops
+from spacy.tokens import Doc
+from spacy.training import split_bilu_label
+from spacy.util import make_tempdir # noqa: F401
+from spacy.vocab import Vocab
+
@contextlib.contextmanager
def make_tempfile(mode="r"):
diff --git a/spacy/tests/vocab_vectors/test_lexeme.py b/spacy/tests/vocab_vectors/test_lexeme.py
index d91f41db3..156e3391a 100644
--- a/spacy/tests/vocab_vectors/test_lexeme.py
+++ b/spacy/tests/vocab_vectors/test_lexeme.py
@@ -1,5 +1,6 @@
import numpy
import pytest
+
from spacy.attrs import IS_ALPHA, IS_DIGIT
from spacy.lookups import Lookups
from spacy.tokens import Doc
diff --git a/spacy/tests/vocab_vectors/test_lookups.py b/spacy/tests/vocab_vectors/test_lookups.py
index 94e31a072..addd3fe4f 100644
--- a/spacy/tests/vocab_vectors/test_lookups.py
+++ b/spacy/tests/vocab_vectors/test_lookups.py
@@ -1,4 +1,5 @@
import pytest
+
from spacy.lookups import Lookups, Table
from spacy.strings import get_string_id
from spacy.vocab import Vocab
diff --git a/spacy/tests/vocab_vectors/test_similarity.py b/spacy/tests/vocab_vectors/test_similarity.py
index 1efcdd81e..5a28f5414 100644
--- a/spacy/tests/vocab_vectors/test_similarity.py
+++ b/spacy/tests/vocab_vectors/test_similarity.py
@@ -1,9 +1,10 @@
-import pytest
import numpy
+import pytest
+
from spacy.tokens import Doc
from spacy.vocab import Vocab
-from ..util import get_cosine, add_vecs_to_vocab
+from ..util import add_vecs_to_vocab, get_cosine
@pytest.fixture
diff --git a/spacy/tests/vocab_vectors/test_stringstore.py b/spacy/tests/vocab_vectors/test_stringstore.py
index a0f8016af..61039fffd 100644
--- a/spacy/tests/vocab_vectors/test_stringstore.py
+++ b/spacy/tests/vocab_vectors/test_stringstore.py
@@ -1,4 +1,5 @@
import pytest
+
from spacy.strings import StringStore
diff --git a/spacy/tests/vocab_vectors/test_vocab_api.py b/spacy/tests/vocab_vectors/test_vocab_api.py
index b9c386eb8..e373b9d0b 100644
--- a/spacy/tests/vocab_vectors/test_vocab_api.py
+++ b/spacy/tests/vocab_vectors/test_vocab_api.py
@@ -1,6 +1,7 @@
import os
import pytest
+
from spacy.attrs import IS_ALPHA, LEMMA, ORTH
from spacy.lang.en import English
from spacy.parts_of_speech import NOUN, VERB
diff --git a/spacy/tokenizer.pxd b/spacy/tokenizer.pxd
index e6a072053..f7585b45a 100644
--- a/spacy/tokenizer.pxd
+++ b/spacy/tokenizer.pxd
@@ -1,13 +1,13 @@
+from cymem.cymem cimport Pool
from libcpp.vector cimport vector
from preshed.maps cimport PreshMap
-from cymem.cymem cimport Pool
-from .typedefs cimport hash_t
-from .structs cimport LexemeC, SpanC, TokenC
-from .strings cimport StringStore
-from .tokens.doc cimport Doc
-from .vocab cimport Vocab, LexemesOrTokens, _Cached
from .matcher.phrasematcher cimport PhraseMatcher
+from .strings cimport StringStore
+from .structs cimport LexemeC, SpanC, TokenC
+from .tokens.doc cimport Doc
+from .typedefs cimport hash_t
+from .vocab cimport LexemesOrTokens, Vocab, _Cached
cdef class Tokenizer:
diff --git a/spacy/tokenizer.pyx b/spacy/tokenizer.pyx
index a4a68ae8e..3861b1cee 100644
--- a/spacy/tokenizer.pyx
+++ b/spacy/tokenizer.pyx
@@ -1,29 +1,27 @@
# cython: embedsignature=True, profile=True, binding=True
+cimport cython
+from cymem.cymem cimport Pool
from cython.operator cimport dereference as deref
from cython.operator cimport preincrement as preinc
from libc.string cimport memcpy, memset
from libcpp.set cimport set as stdset
-from cymem.cymem cimport Pool
from preshed.maps cimport PreshMap
-cimport cython
import re
import warnings
-from .tokens.doc cimport Doc
-from .strings cimport hash_string
from .lexeme cimport EMPTY_LEXEME
+from .strings cimport hash_string
+from .tokens.doc cimport Doc
-from .attrs import intify_attrs
-from .symbols import ORTH, NORM
-from .errors import Errors, Warnings
from . import util
-from .util import registry, get_words_and_spaces
from .attrs import intify_attrs
-from .symbols import ORTH
+from .errors import Errors, Warnings
from .scorer import Scorer
-from .training import validate_examples
+from .symbols import NORM, ORTH
from .tokens import Span
+from .training import validate_examples
+from .util import get_words_and_spaces, registry
cdef class Tokenizer:
diff --git a/spacy/tokens/__init__.py b/spacy/tokens/__init__.py
index 64090925d..f4b2bf022 100644
--- a/spacy/tokens/__init__.py
+++ b/spacy/tokens/__init__.py
@@ -1,8 +1,8 @@
+from ._serialize import DocBin
from .doc import Doc
-from .token import Token
+from .morphanalysis import MorphAnalysis
from .span import Span
from .span_group import SpanGroup
-from ._serialize import DocBin
-from .morphanalysis import MorphAnalysis
+from .token import Token
__all__ = ["Doc", "Token", "Span", "SpanGroup", "DocBin", "MorphAnalysis"]
diff --git a/spacy/tokens/_dict_proxies.py b/spacy/tokens/_dict_proxies.py
index 6edcce13d..b2b496307 100644
--- a/spacy/tokens/_dict_proxies.py
+++ b/spacy/tokens/_dict_proxies.py
@@ -1,12 +1,12 @@
-from typing import Dict, Iterable, List, Tuple, Union, Optional, TYPE_CHECKING
import warnings
import weakref
from collections import UserDict
+from typing import TYPE_CHECKING, Dict, Iterable, List, Optional, Tuple, Union
+
import srsly
-from .span_group import SpanGroup
from ..errors import Errors, Warnings
-
+from .span_group import SpanGroup
if TYPE_CHECKING:
# This lets us add type hints for mypy etc. without causing circular imports
diff --git a/spacy/tokens/_retokenize.pyi b/spacy/tokens/_retokenize.pyi
index 8834d38c0..097fbd1a9 100644
--- a/spacy/tokens/_retokenize.pyi
+++ b/spacy/tokens/_retokenize.pyi
@@ -1,8 +1,9 @@
-from typing import Dict, Any, Union, List, Tuple
+from typing import Any, Dict, List, Tuple, Union
+
+from .. import Vocab
from .doc import Doc
from .span import Span
from .token import Token
-from .. import Vocab
class Retokenizer:
def __init__(self, doc: Doc) -> None: ...
diff --git a/spacy/tokens/_retokenize.pyx b/spacy/tokens/_retokenize.pyx
index 43e6d4aa7..8ed707ab9 100644
--- a/spacy/tokens/_retokenize.pyx
+++ b/spacy/tokens/_retokenize.pyx
@@ -1,24 +1,24 @@
# cython: infer_types=True, bounds_check=False, profile=True
-from libc.string cimport memcpy, memset
-from libc.stdlib cimport malloc, free
from cymem.cymem cimport Pool
+from libc.stdlib cimport free, malloc
+from libc.string cimport memcpy, memset
-from thinc.api import get_array_module
import numpy
+from thinc.api import get_array_module
-from .doc cimport Doc, set_children_from_heads, token_by_start, token_by_end
+from ..attrs cimport MORPH, NORM
+from ..lexeme cimport EMPTY_LEXEME, Lexeme
+from ..structs cimport LexemeC, TokenC
+from ..vocab cimport Vocab
+from .doc cimport Doc, set_children_from_heads, token_by_end, token_by_start
from .span cimport Span
from .token cimport Token
-from ..lexeme cimport Lexeme, EMPTY_LEXEME
-from ..structs cimport LexemeC, TokenC
-from ..attrs cimport MORPH, NORM
-from ..vocab cimport Vocab
-from .underscore import is_writable_attr
from ..attrs import intify_attrs
-from ..util import SimpleFrozenDict
from ..errors import Errors
from ..strings import get_string_id
+from ..util import SimpleFrozenDict
+from .underscore import is_writable_attr
cdef class Retokenizer:
diff --git a/spacy/tokens/_serialize.py b/spacy/tokens/_serialize.py
index 73c857d1f..873d85835 100644
--- a/spacy/tokens/_serialize.py
+++ b/spacy/tokens/_serialize.py
@@ -1,22 +1,20 @@
-from typing import List, Dict, Set, Iterable, Iterator, Union, Optional
-from pathlib import Path
-import numpy
-from numpy import ndarray
import zlib
+from pathlib import Path
+from typing import Dict, Iterable, Iterator, List, Optional, Set, Union
+
+import numpy
import srsly
+from numpy import ndarray
from thinc.api import NumpyOps
-from .doc import Doc
-from ..vocab import Vocab
+from ..attrs import IDS, ORTH, SPACY, intify_attr
from ..compat import copy_reg
-from ..attrs import SPACY, ORTH, intify_attr, IDS
from ..errors import Errors
-from ..util import ensure_path, SimpleFrozenList
+from ..util import SimpleFrozenList, ensure_path
+from ..vocab import Vocab
from ._dict_proxies import SpanGroups
-
-# fmt: off
-ALL_ATTRS = ("ORTH", "NORM", "TAG", "HEAD", "DEP", "ENT_IOB", "ENT_TYPE", "ENT_KB_ID", "ENT_ID", "LEMMA", "MORPH", "POS", "SENT_START")
-# fmt: on
+from .doc import DOCBIN_ALL_ATTRS as ALL_ATTRS
+from .doc import Doc
class DocBin:
diff --git a/spacy/tokens/doc.pxd b/spacy/tokens/doc.pxd
index 57d087958..d7f092c94 100644
--- a/spacy/tokens/doc.pxd
+++ b/spacy/tokens/doc.pxd
@@ -1,10 +1,10 @@
-from cymem.cymem cimport Pool
cimport numpy as np
+from cymem.cymem cimport Pool
-from ..vocab cimport Vocab
-from ..structs cimport TokenC, LexemeC, SpanC
-from ..typedefs cimport attr_t
from ..attrs cimport attr_id_t
+from ..structs cimport LexemeC, SpanC, TokenC
+from ..typedefs cimport attr_t
+from ..vocab cimport Vocab
cdef attr_t get_token_attr(const TokenC* token, attr_id_t feat_name) nogil
diff --git a/spacy/tokens/doc.pyi b/spacy/tokens/doc.pyi
index 9d45960ab..00c7a9d07 100644
--- a/spacy/tokens/doc.pyi
+++ b/spacy/tokens/doc.pyi
@@ -1,16 +1,31 @@
-from typing import Callable, Protocol, Iterable, Iterator, Optional
-from typing import Union, Tuple, List, Dict, Any, overload
+from pathlib import Path
+from typing import (
+ Any,
+ Callable,
+ Dict,
+ Iterable,
+ Iterator,
+ List,
+ Optional,
+ Protocol,
+ Tuple,
+ Union,
+ overload,
+)
+
+import numpy as np
from cymem.cymem import Pool
from thinc.types import Floats1d, Floats2d, Ints2d
-from .span import Span
-from .token import Token
-from ._dict_proxies import SpanGroups
-from ._retokenize import Retokenizer
+
from ..lexeme import Lexeme
from ..vocab import Vocab
+from ._dict_proxies import SpanGroups
+from ._retokenize import Retokenizer
+from .span import Span
+from .token import Token
from .underscore import Underscore
-from pathlib import Path
-import numpy as np
+
+DOCBIN_ALL_ATTRS: Tuple[str, ...]
class DocMethod(Protocol):
def __call__(self: Doc, *args: Any, **kwargs: Any) -> Any: ... # type: ignore[misc]
diff --git a/spacy/tokens/doc.pyx b/spacy/tokens/doc.pyx
index 6c196ad78..206253949 100644
--- a/spacy/tokens/doc.pyx
+++ b/spacy/tokens/doc.pyx
@@ -3,45 +3,67 @@ from typing import Set
cimport cython
cimport numpy as np
-from libc.string cimport memcpy
from libc.math cimport sqrt
from libc.stdint cimport int32_t, uint64_t
+from libc.string cimport memcpy
import copy
+import itertools
+import warnings
from collections import Counter, defaultdict
from enum import Enum
-import itertools
+
import numpy
import srsly
from thinc.api import get_array_module, get_current_ops
from thinc.util import copy_array
-import warnings
from .span cimport Span
from .token cimport MISSING_DEP
-from ._dict_proxies import SpanGroups
-from .token cimport Token
-from ..lexeme cimport Lexeme, EMPTY_LEXEME
-from ..typedefs cimport attr_t, flags_t
-from ..attrs cimport attr_id_t
-from ..attrs cimport LENGTH, POS, LEMMA, TAG, MORPH, DEP, HEAD, SPACY, ENT_IOB
-from ..attrs cimport ENT_TYPE, ENT_ID, ENT_KB_ID, SENT_START, IDX, NORM
-from ..attrs import intify_attr, IDS
+from ._dict_proxies import SpanGroups
+
+from ..attrs cimport (
+ DEP,
+ ENT_ID,
+ ENT_IOB,
+ ENT_KB_ID,
+ ENT_TYPE,
+ HEAD,
+ IDX,
+ LEMMA,
+ LENGTH,
+ MORPH,
+ NORM,
+ POS,
+ SENT_START,
+ SPACY,
+ TAG,
+ attr_id_t,
+)
+from ..lexeme cimport EMPTY_LEXEME, Lexeme
+from ..typedefs cimport attr_t, flags_t
+from .token cimport Token
+
+from .. import parts_of_speech, schemas, util
+from ..attrs import IDS, intify_attr
from ..compat import copy_reg, pickle
from ..errors import Errors, Warnings
from ..morphology import Morphology
-from .. import util
-from .. import parts_of_speech
-from .. import schemas
-from .underscore import Underscore, get_ext_args
-from ._retokenize import Retokenizer
-from ._serialize import ALL_ATTRS as DOCBIN_ALL_ATTRS
from ..util import get_words_and_spaces
+from ._retokenize import Retokenizer
+from .underscore import Underscore, get_ext_args
DEF PADDING = 5
+# We store the docbin attrs here rather than in _serialize to avoid
+# import cycles.
+
+# fmt: off
+DOCBIN_ALL_ATTRS = ("ORTH", "NORM", "TAG", "HEAD", "DEP", "ENT_IOB", "ENT_TYPE", "ENT_KB_ID", "ENT_ID", "LEMMA", "MORPH", "POS", "SENT_START")
+# fmt: on
+
cdef int bounds_check(int i, int length, int padding) except -1:
if (i + padding) < 0:
raise IndexError(Errors.E026.format(i=i, length=length))
diff --git a/spacy/tokens/graph.pxd b/spacy/tokens/graph.pxd
index 6f2f80656..083ef6522 100644
--- a/spacy/tokens/graph.pxd
+++ b/spacy/tokens/graph.pxd
@@ -1,7 +1,8 @@
-from libcpp.vector cimport vector
from cymem.cymem cimport Pool
+from libcpp.vector cimport vector
from preshed.maps cimport PreshMap
-from ..structs cimport GraphC, EdgeC
+
+from ..structs cimport EdgeC, GraphC
cdef class Graph:
diff --git a/spacy/tokens/graph.pyx b/spacy/tokens/graph.pyx
index adc4d23c8..47f0a20d4 100644
--- a/spacy/tokens/graph.pyx
+++ b/spacy/tokens/graph.pyx
@@ -1,19 +1,26 @@
# cython: infer_types=True, cdivision=True, boundscheck=False, binding=True
-from typing import List, Tuple, Generator
+from typing import Generator, List, Tuple
+
+cimport cython
+from cython.operator cimport dereference
from libc.stdint cimport int32_t, int64_t
from libcpp.pair cimport pair
from libcpp.unordered_map cimport unordered_map
from libcpp.unordered_set cimport unordered_set
-from cython.operator cimport dereference
-cimport cython
+
import weakref
-from preshed.maps cimport map_get_unless_missing
+
from murmurhash.mrmr cimport hash64
+from preshed.maps cimport map_get_unless_missing
from .. import Errors
+
from ..typedefs cimport hash_t
+
from ..strings import get_string_id
+
from ..structs cimport EdgeC, GraphC
+
from .token import Token
diff --git a/spacy/tokens/morphanalysis.pxd b/spacy/tokens/morphanalysis.pxd
index 9510875c9..728f0aaf7 100644
--- a/spacy/tokens/morphanalysis.pxd
+++ b/spacy/tokens/morphanalysis.pxd
@@ -1,6 +1,6 @@
-from ..vocab cimport Vocab
-from ..typedefs cimport hash_t
from ..structs cimport MorphAnalysisC
+from ..typedefs cimport hash_t
+from ..vocab cimport Vocab
cdef class MorphAnalysis:
diff --git a/spacy/tokens/morphanalysis.pyi b/spacy/tokens/morphanalysis.pyi
index a5376e80d..b35ff36aa 100644
--- a/spacy/tokens/morphanalysis.pyi
+++ b/spacy/tokens/morphanalysis.pyi
@@ -1,4 +1,5 @@
from typing import Any, Dict, Iterator, List, Optional, Union
+
from ..vocab import Vocab
class MorphAnalysis:
diff --git a/spacy/tokens/morphanalysis.pyx b/spacy/tokens/morphanalysis.pyx
index baa3800a1..0992a0b66 100644
--- a/spacy/tokens/morphanalysis.pyx
+++ b/spacy/tokens/morphanalysis.pyx
@@ -1,11 +1,12 @@
-from libc.string cimport memset
cimport numpy as np
+from libc.string cimport memset
from ..errors import Errors
from ..morphology import Morphology
+
+from ..morphology cimport check_feature, get_by_field, list_features
+from ..typedefs cimport attr_t, hash_t
from ..vocab cimport Vocab
-from ..typedefs cimport hash_t, attr_t
-from ..morphology cimport list_features, check_feature, get_by_field
cdef class MorphAnalysis:
diff --git a/spacy/tokens/span.pxd b/spacy/tokens/span.pxd
index 78bee0a8c..d77bbea70 100644
--- a/spacy/tokens/span.pxd
+++ b/spacy/tokens/span.pxd
@@ -1,8 +1,8 @@
cimport numpy as np
-from .doc cimport Doc
-from ..typedefs cimport attr_t
from ..structs cimport SpanC
+from ..typedefs cimport attr_t
+from .doc cimport Doc
cdef class Span:
diff --git a/spacy/tokens/span.pyx b/spacy/tokens/span.pyx
index 29b8ce703..73192b760 100644
--- a/spacy/tokens/span.pyx
+++ b/spacy/tokens/span.pyx
@@ -1,22 +1,23 @@
cimport numpy as np
from libc.math cimport sqrt
+import copy
+import warnings
+
import numpy
from thinc.api import get_array_module
-import warnings
-import copy
-from .doc cimport token_by_start, token_by_end, get_token_attr, _get_lca_matrix
-from ..structs cimport TokenC, LexemeC
-from ..typedefs cimport flags_t, attr_t, hash_t
-from ..attrs cimport attr_id_t
-from ..parts_of_speech cimport univ_pos_t
from ..attrs cimport *
+from ..attrs cimport attr_id_t
from ..lexeme cimport Lexeme
+from ..parts_of_speech cimport univ_pos_t
+from ..structs cimport LexemeC, TokenC
from ..symbols cimport dep
+from ..typedefs cimport attr_t, flags_t, hash_t
+from .doc cimport _get_lca_matrix, get_token_attr, token_by_end, token_by_start
-from ..util import normalize_slice
from ..errors import Errors, Warnings
+from ..util import normalize_slice
from .underscore import Underscore, get_ext_args
diff --git a/spacy/tokens/span_group.pxd b/spacy/tokens/span_group.pxd
index 5074aa275..7f4145682 100644
--- a/spacy/tokens/span_group.pxd
+++ b/spacy/tokens/span_group.pxd
@@ -1,6 +1,8 @@
from libcpp.vector cimport vector
+
from ..structs cimport SpanC
+
cdef class SpanGroup:
cdef public object _doc_ref
cdef public str name
diff --git a/spacy/tokens/span_group.pyx b/spacy/tokens/span_group.pyx
index c748fa256..48ad4a516 100644
--- a/spacy/tokens/span_group.pyx
+++ b/spacy/tokens/span_group.pyx
@@ -1,10 +1,12 @@
-from typing import Iterable, Tuple, Union, Optional, TYPE_CHECKING
-import weakref
import struct
+import weakref
from copy import deepcopy
+from typing import TYPE_CHECKING, Iterable, Optional, Tuple, Union
+
import srsly
from spacy.errors import Errors
+
from .span cimport Span
diff --git a/spacy/tokens/token.pxd b/spacy/tokens/token.pxd
index 58b727764..fc02ff624 100644
--- a/spacy/tokens/token.pxd
+++ b/spacy/tokens/token.pxd
@@ -1,14 +1,16 @@
from numpy cimport ndarray
-from ..vocab cimport Vocab
-from ..structs cimport TokenC
+
from ..attrs cimport *
-from ..typedefs cimport attr_t, flags_t
-from ..parts_of_speech cimport univ_pos_t
-from .doc cimport Doc
from ..lexeme cimport Lexeme
+from ..parts_of_speech cimport univ_pos_t
+from ..structs cimport TokenC
+from ..typedefs cimport attr_t, flags_t
+from ..vocab cimport Vocab
+from .doc cimport Doc
from ..errors import Errors
+
cdef int MISSING_DEP = 0
cdef class Token:
diff --git a/spacy/tokens/token.pyi b/spacy/tokens/token.pyi
index bd585d034..e7863fd16 100644
--- a/spacy/tokens/token.pyi
+++ b/spacy/tokens/token.pyi
@@ -1,18 +1,12 @@
-from typing import (
- Callable,
- Protocol,
- Iterator,
- Optional,
- Union,
- Tuple,
- Any,
-)
+from typing import Any, Callable, Iterator, Optional, Protocol, Tuple, Union
+
from thinc.types import Floats1d, FloatsXd
-from .doc import Doc
-from .span import Span
-from .morphanalysis import MorphAnalysis
+
from ..lexeme import Lexeme
from ..vocab import Vocab
+from .doc import Doc
+from .morphanalysis import MorphAnalysis
+from .span import Span
from .underscore import Underscore
class TokenMethod(Protocol):
diff --git a/spacy/tokens/token.pyx b/spacy/tokens/token.pyx
index 7fff6b162..8c384f417 100644
--- a/spacy/tokens/token.pyx
+++ b/spacy/tokens/token.pyx
@@ -1,26 +1,43 @@
# cython: infer_types=True
# Compiler crashes on memory view coercion without this. Should report bug.
-from cython.view cimport array as cvarray
cimport numpy as np
+from cython.view cimport array as cvarray
+
np.import_array()
+import warnings
+
import numpy
from thinc.api import get_array_module
-import warnings
-from ..typedefs cimport hash_t
+from ..attrs cimport (
+ IS_ALPHA,
+ IS_ASCII,
+ IS_BRACKET,
+ IS_CURRENCY,
+ IS_DIGIT,
+ IS_LEFT_PUNCT,
+ IS_LOWER,
+ IS_PUNCT,
+ IS_QUOTE,
+ IS_RIGHT_PUNCT,
+ IS_SPACE,
+ IS_STOP,
+ IS_TITLE,
+ IS_UPPER,
+ LIKE_EMAIL,
+ LIKE_NUM,
+ LIKE_URL,
+)
from ..lexeme cimport Lexeme
-from ..attrs cimport IS_ALPHA, IS_ASCII, IS_DIGIT, IS_LOWER, IS_PUNCT, IS_SPACE
-from ..attrs cimport IS_BRACKET, IS_QUOTE, IS_LEFT_PUNCT, IS_RIGHT_PUNCT
-from ..attrs cimport IS_TITLE, IS_UPPER, IS_CURRENCY, IS_STOP
-from ..attrs cimport LIKE_URL, LIKE_NUM, LIKE_EMAIL
from ..symbols cimport conj
-from .morphanalysis cimport MorphAnalysis
+from ..typedefs cimport hash_t
from .doc cimport set_children_from_heads
+from .morphanalysis cimport MorphAnalysis
from .. import parts_of_speech
-from ..errors import Errors, Warnings
from ..attrs import IOB_STRINGS
+from ..errors import Errors, Warnings
from .underscore import Underscore, get_ext_args
diff --git a/spacy/tokens/underscore.py b/spacy/tokens/underscore.py
index e9a4e1862..0aa0c1e6d 100644
--- a/spacy/tokens/underscore.py
+++ b/spacy/tokens/underscore.py
@@ -1,6 +1,7 @@
-from typing import Dict, Any, List, Optional, Tuple, Union, TYPE_CHECKING
-import functools
import copy
+import functools
+from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union
+
from ..errors import Errors
if TYPE_CHECKING:
diff --git a/spacy/training/__init__.py b/spacy/training/__init__.py
index a6f873f05..b8c0792f0 100644
--- a/spacy/training/__init__.py
+++ b/spacy/training/__init__.py
@@ -1,12 +1,18 @@
-from .corpus import Corpus, JsonlCorpus, PlainTextCorpus # noqa: F401
-from .example import Example, validate_examples, validate_get_examples # noqa: F401
from .alignment import Alignment # noqa: F401
from .augment import dont_augment, orth_variants_augmenter # noqa: F401
-from .iob_utils import iob_to_biluo, biluo_to_iob # noqa: F401
-from .iob_utils import offsets_to_biluo_tags, biluo_tags_to_offsets # noqa: F401
-from .iob_utils import biluo_tags_to_spans, tags_to_entities # noqa: F401
-from .iob_utils import split_bilu_label, remove_bilu_prefix # noqa: F401
-from .gold_io import docs_to_json, read_json_file # noqa: F401
from .batchers import minibatch_by_padded_size, minibatch_by_words # noqa: F401
-from .loggers import console_logger # noqa: F401
from .callbacks import create_copy_from_base_model # noqa: F401
+from .corpus import Corpus, JsonlCorpus, PlainTextCorpus # noqa: F401
+from .example import Example, validate_examples, validate_get_examples # noqa: F401
+from .gold_io import docs_to_json, read_json_file # noqa: F401
+from .iob_utils import ( # noqa: F401
+ biluo_tags_to_offsets,
+ biluo_tags_to_spans,
+ biluo_to_iob,
+ iob_to_biluo,
+ offsets_to_biluo_tags,
+ remove_bilu_prefix,
+ split_bilu_label,
+ tags_to_entities,
+)
+from .loggers import console_logger # noqa: F401
diff --git a/spacy/training/align.pyx b/spacy/training/align.pyx
index 0ef1fd35d..8bd43b048 100644
--- a/spacy/training/align.pyx
+++ b/spacy/training/align.pyx
@@ -1,6 +1,6 @@
-from typing import List, Tuple
-from itertools import chain
import re
+from itertools import chain
+from typing import List, Tuple
from ..errors import Errors
diff --git a/spacy/training/alignment.py b/spacy/training/alignment.py
index 6d24714bf..3f615d10b 100644
--- a/spacy/training/alignment.py
+++ b/spacy/training/alignment.py
@@ -1,5 +1,5 @@
-from typing import List
from dataclasses import dataclass
+from typing import List
from .align import get_alignments
from .alignment_array import AlignmentArray
diff --git a/spacy/training/alignment_array.pxd b/spacy/training/alignment_array.pxd
index 056f5bef3..bb28f3ac6 100644
--- a/spacy/training/alignment_array.pxd
+++ b/spacy/training/alignment_array.pxd
@@ -1,5 +1,6 @@
-from libcpp.vector cimport vector
cimport numpy as np
+from libcpp.vector cimport vector
+
cdef class AlignmentArray:
cdef np.ndarray _data
diff --git a/spacy/training/alignment_array.pyx b/spacy/training/alignment_array.pyx
index 01e9d9bf8..b0be1512b 100644
--- a/spacy/training/alignment_array.pyx
+++ b/spacy/training/alignment_array.pyx
@@ -1,6 +1,9 @@
from typing import List
-from ..errors import Errors
+
import numpy
+
+from ..errors import Errors
+
from libc.stdint cimport int32_t
diff --git a/spacy/training/augment.py b/spacy/training/augment.py
index 2fe8c24fb..1ebd3313c 100644
--- a/spacy/training/augment.py
+++ b/spacy/training/augment.py
@@ -1,12 +1,11 @@
-from typing import Callable, Iterator, Dict, List, Tuple, TYPE_CHECKING
-from typing import Optional
-import random
import itertools
+import random
from functools import partial
+from typing import TYPE_CHECKING, Callable, Dict, Iterator, List, Optional, Tuple
from ..util import registry
from .example import Example
-from .iob_utils import split_bilu_label, _doc_to_biluo_tags_with_partial
+from .iob_utils import _doc_to_biluo_tags_with_partial, split_bilu_label
if TYPE_CHECKING:
from ..language import Language # noqa: F401
diff --git a/spacy/training/batchers.py b/spacy/training/batchers.py
index f0b6c3123..050c3351b 100644
--- a/spacy/training/batchers.py
+++ b/spacy/training/batchers.py
@@ -1,10 +1,18 @@
-from typing import Union, Iterable, Sequence, TypeVar, List, Callable, Iterator
-from typing import Optional, Any
-from functools import partial
import itertools
+from functools import partial
+from typing import (
+ Any,
+ Callable,
+ Iterable,
+ Iterator,
+ List,
+ Optional,
+ Sequence,
+ TypeVar,
+ Union,
+)
-from ..util import registry, minibatch
-
+from ..util import minibatch, registry
Sizing = Union[Sequence[int], int]
ItemT = TypeVar("ItemT")
diff --git a/spacy/training/callbacks.py b/spacy/training/callbacks.py
index 7e2494f5b..21c3d56a1 100644
--- a/spacy/training/callbacks.py
+++ b/spacy/training/callbacks.py
@@ -1,14 +1,17 @@
-from typing import Callable, Optional
+from typing import TYPE_CHECKING, Callable, Optional
+
from ..errors import Errors
-from ..language import Language
-from ..util import load_model, registry, logger
+from ..util import load_model, logger, registry
+
+if TYPE_CHECKING:
+ from ..language import Language
@registry.callbacks("spacy.copy_from_base_model.v1")
def create_copy_from_base_model(
tokenizer: Optional[str] = None,
vocab: Optional[str] = None,
-) -> Callable[[Language], Language]:
+) -> Callable[["Language"], "Language"]:
def copy_from_base_model(nlp):
if tokenizer:
logger.info("Copying tokenizer from: %s", tokenizer)
diff --git a/spacy/training/converters/__init__.py b/spacy/training/converters/__init__.py
index e91b6aaa6..8173da64c 100644
--- a/spacy/training/converters/__init__.py
+++ b/spacy/training/converters/__init__.py
@@ -1,4 +1,4 @@
-from .iob_to_docs import iob_to_docs # noqa: F401
from .conll_ner_to_docs import conll_ner_to_docs # noqa: F401
-from .json_to_docs import json_to_docs # noqa: F401
from .conllu_to_docs import conllu_to_docs # noqa: F401
+from .iob_to_docs import iob_to_docs # noqa: F401
+from .json_to_docs import json_to_docs # noqa: F401
diff --git a/spacy/training/converters/conll_ner_to_docs.py b/spacy/training/converters/conll_ner_to_docs.py
index 28b21c5f0..b19d1791b 100644
--- a/spacy/training/converters/conll_ner_to_docs.py
+++ b/spacy/training/converters/conll_ner_to_docs.py
@@ -1,10 +1,10 @@
from wasabi import Printer
-from .. import tags_to_entities
-from ...training import iob_to_biluo
-from ...tokens import Doc, Span
from ...errors import Errors
-from ...util import load_model, get_lang_class
+from ...tokens import Doc, Span
+from ...training import iob_to_biluo
+from ...util import get_lang_class, load_model
+from .. import tags_to_entities
def conll_ner_to_docs(
diff --git a/spacy/training/converters/conllu_to_docs.py b/spacy/training/converters/conllu_to_docs.py
index 7052504cc..bda5c88c3 100644
--- a/spacy/training/converters/conllu_to_docs.py
+++ b/spacy/training/converters/conllu_to_docs.py
@@ -1,11 +1,12 @@
import re
-from .conll_ner_to_docs import n_sents_info
-from ...training import iob_to_biluo, biluo_tags_to_spans
-from ...tokens import Doc, Token, Span
-from ...vocab import Vocab
from wasabi import Printer
+from ...tokens import Doc, Span, Token
+from ...training import biluo_tags_to_spans, iob_to_biluo
+from ...vocab import Vocab
+from .conll_ner_to_docs import n_sents_info
+
def conllu_to_docs(
input_data,
diff --git a/spacy/training/converters/iob_to_docs.py b/spacy/training/converters/iob_to_docs.py
index 60fb7df61..45bb65692 100644
--- a/spacy/training/converters/iob_to_docs.py
+++ b/spacy/training/converters/iob_to_docs.py
@@ -1,11 +1,11 @@
from wasabi import Printer
-from .conll_ner_to_docs import n_sents_info
-from ...vocab import Vocab
-from ...training import iob_to_biluo, tags_to_entities
-from ...tokens import Doc, Span
from ...errors import Errors
+from ...tokens import Doc, Span
+from ...training import iob_to_biluo, tags_to_entities
from ...util import minibatch
+from ...vocab import Vocab
+from .conll_ner_to_docs import n_sents_info
def iob_to_docs(input_data, n_sents=10, no_print=False, *args, **kwargs):
diff --git a/spacy/training/converters/json_to_docs.py b/spacy/training/converters/json_to_docs.py
index 4123839f2..b4beedd2f 100644
--- a/spacy/training/converters/json_to_docs.py
+++ b/spacy/training/converters/json_to_docs.py
@@ -1,9 +1,13 @@
import srsly
-from ..gold_io import json_iterate, json_to_annotations
-from ..example import annotations_to_doc
-from ..example import _fix_legacy_dict_data, _parse_example_dict_data
-from ...util import load_model
+
from ...lang.xx import MultiLanguage
+from ...util import load_model
+from ..example import (
+ _fix_legacy_dict_data,
+ _parse_example_dict_data,
+ annotations_to_doc,
+)
+from ..gold_io import json_iterate, json_to_annotations
def json_to_docs(input_data, model=None, **kwargs):
diff --git a/spacy/training/corpus.py b/spacy/training/corpus.py
index 086ad831c..6037c15e3 100644
--- a/spacy/training/corpus.py
+++ b/spacy/training/corpus.py
@@ -1,16 +1,16 @@
-import warnings
-from typing import Union, List, Iterable, Iterator, TYPE_CHECKING, Callable
-from typing import Optional
-from pathlib import Path
import random
+import warnings
+from pathlib import Path
+from typing import TYPE_CHECKING, Callable, Iterable, Iterator, List, Optional, Union
+
import srsly
from .. import util
+from ..errors import Errors, Warnings
+from ..tokens import Doc, DocBin
+from ..vocab import Vocab
from .augment import dont_augment
from .example import Example
-from ..errors import Warnings, Errors
-from ..tokens import DocBin, Doc
-from ..vocab import Vocab
if TYPE_CHECKING:
# This lets us add type hints for mypy etc. without causing circular imports
diff --git a/spacy/training/example.pxd b/spacy/training/example.pxd
index 49e239757..a7c71fa88 100644
--- a/spacy/training/example.pxd
+++ b/spacy/training/example.pxd
@@ -1,6 +1,7 @@
-from ..tokens.doc cimport Doc
from libc.stdint cimport uint64_t
+from ..tokens.doc cimport Doc
+
cdef class Example:
cdef readonly Doc x
diff --git a/spacy/training/example.pyx b/spacy/training/example.pyx
index 95b0f0de9..abdac23ea 100644
--- a/spacy/training/example.pyx
+++ b/spacy/training/example.pyx
@@ -1,19 +1,29 @@
-from collections.abc import Iterable as IterableInstance
import warnings
+from collections.abc import Iterable as IterableInstance
+
import numpy
+
from murmurhash.mrmr cimport hash64
from ..tokens.doc cimport Doc
from ..tokens.span cimport Span
-from ..tokens.span import Span
+
from ..attrs import IDS
-from .alignment import Alignment
-from .iob_utils import biluo_to_iob, offsets_to_biluo_tags, doc_to_biluo_tags
-from .iob_utils import biluo_tags_to_spans, remove_bilu_prefix
from ..errors import Errors, Warnings
from ..pipeline._parser_internals import nonproj
+from ..tokens.span import Span
+from .alignment import Alignment
+from .iob_utils import (
+ biluo_tags_to_spans,
+ biluo_to_iob,
+ doc_to_biluo_tags,
+ offsets_to_biluo_tags,
+ remove_bilu_prefix,
+)
+
from ..tokens.token cimport MISSING_DEP
-from ..util import logger, to_ternary_int, all_equal
+
+from ..util import all_equal, logger, to_ternary_int
cpdef Doc annotations_to_doc(vocab, tok_annot, doc_annot):
diff --git a/spacy/training/gold_io.pyx b/spacy/training/gold_io.pyx
index 69654e2c7..1e7b3681d 100644
--- a/spacy/training/gold_io.pyx
+++ b/spacy/training/gold_io.pyx
@@ -1,10 +1,12 @@
+import json
import warnings
+
import srsly
+
from .. import util
from ..errors import Warnings
from ..tokens import Doc
from .iob_utils import offsets_to_biluo_tags, tags_to_entities
-import json
def docs_to_json(docs, doc_id=0, ner_missing_tag="O"):
diff --git a/spacy/training/initialize.py b/spacy/training/initialize.py
index 9cf759c55..39dc06b9e 100644
--- a/spacy/training/initialize.py
+++ b/spacy/training/initialize.py
@@ -1,24 +1,33 @@
-from typing import Union, Dict, Optional, Any, IO, TYPE_CHECKING
-from thinc.api import Config, fix_random_seed, set_gpu_allocator
-from thinc.api import ConfigValidationError
-from pathlib import Path
-import srsly
-import numpy
-import tarfile
import gzip
-import zipfile
-import tqdm
-from itertools import islice
+import tarfile
import warnings
+import zipfile
+from itertools import islice
+from pathlib import Path
+from typing import IO, TYPE_CHECKING, Any, Dict, Optional, Union
+
+import numpy
+import srsly
+import tqdm
+from thinc.api import Config, ConfigValidationError, fix_random_seed, set_gpu_allocator
-from .pretrain import get_tok2vec_ref
-from ..lookups import Lookups
-from ..vectors import Vectors, Mode as VectorsMode
from ..errors import Errors, Warnings
+from ..lookups import Lookups
from ..schemas import ConfigSchemaTraining
-from ..util import registry, load_model_from_config, resolve_dot_names, logger
-from ..util import load_model, ensure_path, get_sourced_components
-from ..util import OOV_RANK, DEFAULT_OOV_PROB
+from ..util import (
+ DEFAULT_OOV_PROB,
+ OOV_RANK,
+ ensure_path,
+ get_sourced_components,
+ load_model,
+ load_model_from_config,
+ logger,
+ registry,
+ resolve_dot_names,
+)
+from ..vectors import Mode as VectorsMode
+from ..vectors import Vectors
+from .pretrain import get_tok2vec_ref
if TYPE_CHECKING:
from ..language import Language # noqa: F401
diff --git a/spacy/training/iob_utils.py b/spacy/training/iob_utils.py
index 0d4d246b0..64d02a1e2 100644
--- a/spacy/training/iob_utils.py
+++ b/spacy/training/iob_utils.py
@@ -1,8 +1,8 @@
-from typing import List, Dict, Tuple, Iterable, Union, Iterator, cast
import warnings
+from typing import Dict, Iterable, Iterator, List, Tuple, Union, cast
from ..errors import Errors, Warnings
-from ..tokens import Span, Doc
+from ..tokens import Doc, Span
def iob_to_biluo(tags: Iterable[str]) -> List[str]:
diff --git a/spacy/training/loggers.py b/spacy/training/loggers.py
index 7de31822e..1ec0b7b25 100644
--- a/spacy/training/loggers.py
+++ b/spacy/training/loggers.py
@@ -1,13 +1,14 @@
-from typing import TYPE_CHECKING, Dict, Any, Tuple, Callable, List, Optional, IO, Union
-from wasabi import Printer
-from pathlib import Path
-import tqdm
import sys
-import srsly
+from pathlib import Path
+from typing import IO, TYPE_CHECKING, Any, Callable, Dict, List, Optional, Tuple, Union
+
+import srsly
+import tqdm
+from wasabi import Printer
-from ..util import registry
-from ..errors import Errors
from .. import util
+from ..errors import Errors
+from ..util import registry
if TYPE_CHECKING:
from ..language import Language # noqa: F401
diff --git a/spacy/training/loop.py b/spacy/training/loop.py
index eca40e3d9..56df53957 100644
--- a/spacy/training/loop.py
+++ b/spacy/training/loop.py
@@ -1,17 +1,28 @@
-from typing import List, Callable, Tuple, Dict, Iterable, Union, Any, IO
-from typing import Optional, TYPE_CHECKING
+import random
+import shutil
+import sys
from pathlib import Path
from timeit import default_timer as timer
-from thinc.api import Optimizer, Config, constant, fix_random_seed, set_gpu_allocator
-from wasabi import Printer
-import random
-import sys
-import shutil
+from typing import (
+ IO,
+ TYPE_CHECKING,
+ Any,
+ Callable,
+ Dict,
+ Iterable,
+ List,
+ Optional,
+ Tuple,
+ Union,
+)
+
+from thinc.api import Config, Optimizer, constant, fix_random_seed, set_gpu_allocator
+from wasabi import Printer
-from .example import Example
-from ..schemas import ConfigSchemaTraining
from ..errors import Errors
-from ..util import resolve_dot_names, registry, logger
+from ..schemas import ConfigSchemaTraining
+from ..util import logger, registry, resolve_dot_names
+from .example import Example
if TYPE_CHECKING:
from ..language import Language # noqa: F401
diff --git a/spacy/training/pretrain.py b/spacy/training/pretrain.py
index ebbc5d837..14a813a09 100644
--- a/spacy/training/pretrain.py
+++ b/spacy/training/pretrain.py
@@ -1,20 +1,26 @@
-from typing import Optional, Callable, Iterable, Union, List
-from thinc.api import Config, fix_random_seed, set_gpu_allocator, Model, Optimizer
-from thinc.api import set_dropout_rate
-from pathlib import Path
-from collections import Counter
-import srsly
-import time
import re
+import time
+from collections import Counter
+from pathlib import Path
+from typing import Callable, Iterable, List, Optional, Union
+import srsly
+from thinc.api import (
+ Config,
+ Model,
+ Optimizer,
+ fix_random_seed,
+ set_dropout_rate,
+ set_gpu_allocator,
+)
from thinc.config import ConfigValidationError
from wasabi import Printer
-from .example import Example
from ..errors import Errors
-from ..tokens import Doc
from ..schemas import ConfigSchemaPretrain
-from ..util import registry, load_model_from_config, dot_to_object
+from ..tokens import Doc
+from ..util import dot_to_object, load_model_from_config, registry
+from .example import Example
def pretrain(
diff --git a/spacy/ty.py b/spacy/ty.py
index 7e79a3d4d..f389456c0 100644
--- a/spacy/ty.py
+++ b/spacy/ty.py
@@ -1,13 +1,21 @@
-from typing import TYPE_CHECKING
-from typing import Optional, Any, Iterable, Dict, Callable, Sequence, List
+from typing import (
+ TYPE_CHECKING,
+ Any,
+ Callable,
+ Dict,
+ Iterable,
+ List,
+ Optional,
+ Sequence,
+)
+
+from thinc.api import Model, Optimizer
from .compat import Protocol, runtime_checkable
-from thinc.api import Optimizer, Model
-
if TYPE_CHECKING:
- from .training import Example
from .language import Language
+ from .training import Example
@runtime_checkable
diff --git a/spacy/typedefs.pxd b/spacy/typedefs.pxd
index 8cdc70e42..72d4d99ac 100644
--- a/spacy/typedefs.pxd
+++ b/spacy/typedefs.pxd
@@ -1,6 +1,4 @@
-from libc.stdint cimport uint16_t, uint32_t, uint64_t, uintptr_t, int32_t
-from libc.stdint cimport uint8_t
-
+from libc.stdint cimport int32_t, uint8_t, uint16_t, uint32_t, uint64_t, uintptr_t
ctypedef float weight_t
ctypedef uint64_t hash_t
diff --git a/spacy/util.py b/spacy/util.py
index 8cc89217d..ec6ab47c0 100644
--- a/spacy/util.py
+++ b/spacy/util.py
@@ -1,38 +1,62 @@
-from typing import List, Mapping, NoReturn, Union, Dict, Any, Set, cast
-from typing import Optional, Iterable, Callable, Tuple, Type
-from typing import Iterator, Pattern, Generator, TYPE_CHECKING
-from types import ModuleType
-import os
+import functools
import importlib
import importlib.util
-import re
-from pathlib import Path
-import thinc
-from thinc.api import NumpyOps, get_current_ops, Adam, Config, Optimizer
-from thinc.api import ConfigValidationError, Model
-import functools
-import itertools
-import numpy
-import srsly
-import catalogue
-from catalogue import RegistryError, Registry
-import langcodes
-import sys
-import warnings
-from packaging.specifiers import SpecifierSet, InvalidSpecifier
-from packaging.version import Version, InvalidVersion
-from packaging.requirements import Requirement
-import subprocess
-from contextlib import contextmanager
-from collections import defaultdict
-import tempfile
-import shutil
-import shlex
import inspect
-import pkgutil
+import itertools
import logging
+import os
+import pkgutil
+import re
+import shlex
+import shutil
import socket
import stat
+import subprocess
+import sys
+import tempfile
+import warnings
+from collections import defaultdict
+from contextlib import contextmanager
+from pathlib import Path
+from types import ModuleType
+from typing import (
+ TYPE_CHECKING,
+ Any,
+ Callable,
+ Dict,
+ Generator,
+ Iterable,
+ Iterator,
+ List,
+ Mapping,
+ NoReturn,
+ Optional,
+ Pattern,
+ Set,
+ Tuple,
+ Type,
+ Union,
+ cast,
+)
+
+import catalogue
+import langcodes
+import numpy
+import srsly
+import thinc
+from catalogue import Registry, RegistryError
+from packaging.requirements import Requirement
+from packaging.specifiers import InvalidSpecifier, SpecifierSet
+from packaging.version import InvalidVersion, Version
+from thinc.api import (
+ Adam,
+ Config,
+ ConfigValidationError,
+ Model,
+ NumpyOps,
+ Optimizer,
+ get_current_ops,
+)
try:
import cupy.random
@@ -43,13 +67,12 @@ except ImportError:
# and have since moved to Thinc. We're importing them here so people's code
# doesn't break, but they should always be imported from Thinc from now on,
# not from spacy.util.
-from thinc.api import fix_random_seed, compounding, decaying # noqa: F401
+from thinc.api import compounding, decaying, fix_random_seed # noqa: F401
-
-from .symbols import ORTH
-from .compat import cupy, CudaStream, is_windows, importlib_metadata
-from .errors import Errors, Warnings, OLD_MODEL_SHORTCUTS
from . import about
+from .compat import CudaStream, cupy, importlib_metadata, is_windows
+from .errors import OLD_MODEL_SHORTCUTS, Errors, Warnings
+from .symbols import ORTH
if TYPE_CHECKING:
# This lets us add type hints for mypy etc. without causing circular imports
diff --git a/spacy/vectors.pyx b/spacy/vectors.pyx
index be0f6db09..bc654252a 100644
--- a/spacy/vectors.pyx
+++ b/spacy/vectors.pyx
@@ -1,14 +1,15 @@
cimport numpy as np
-from libc.stdint cimport uint32_t, uint64_t
from cython.operator cimport dereference as deref
+from libc.stdint cimport uint32_t, uint64_t
from libcpp.set cimport set as cppset
from murmurhash.mrmr cimport hash128_x64
import functools
-import numpy
-from typing import cast
import warnings
from enum import Enum
+from typing import cast
+
+import numpy
import srsly
from thinc.api import Ops, get_array_module, get_current_ops
from thinc.backends import get_array_ops
@@ -16,9 +17,9 @@ from thinc.types import Floats2d
from .strings cimport StringStore
-from .strings import get_string_id
-from .errors import Errors, Warnings
from . import util
+from .errors import Errors, Warnings
+from .strings import get_string_id
def unpickle_vectors(bytes_data):
diff --git a/spacy/vocab.pxd b/spacy/vocab.pxd
index 9c951b2b7..3b0173e3e 100644
--- a/spacy/vocab.pxd
+++ b/spacy/vocab.pxd
@@ -1,12 +1,12 @@
-from libcpp.vector cimport vector
-from preshed.maps cimport PreshMap
from cymem.cymem cimport Pool
+from libcpp.vector cimport vector
from murmurhash.mrmr cimport hash64
+from preshed.maps cimport PreshMap
+from .morphology cimport Morphology
+from .strings cimport StringStore
from .structs cimport LexemeC, TokenC
from .typedefs cimport attr_t, hash_t
-from .strings cimport StringStore
-from .morphology cimport Morphology
cdef LexemeC EMPTY_LEXEME
diff --git a/spacy/vocab.pyi b/spacy/vocab.pyi
index 4cc359c47..b7ff20348 100644
--- a/spacy/vocab.pyi
+++ b/spacy/vocab.pyi
@@ -1,14 +1,15 @@
-from typing import Callable, Iterator, Optional, Union, List, Dict
-from typing import Any, Iterable
+from pathlib import Path
+from typing import Any, Callable, Dict, Iterable, Iterator, List, Optional, Union
+
from thinc.types import Floats1d, FloatsXd
+
from . import Language
-from .strings import StringStore
from .lexeme import Lexeme
from .lookups import Lookups
from .morphology import Morphology
+from .strings import StringStore
from .tokens import Doc, Span
from .vectors import Vectors
-from pathlib import Path
def create_vocab(
lang: Optional[str], defaults: Any, vectors_name: Optional[str] = ...
diff --git a/spacy/vocab.pyx b/spacy/vocab.pyx
index 27f8e5f98..d47122d08 100644
--- a/spacy/vocab.pyx
+++ b/spacy/vocab.pyx
@@ -1,26 +1,27 @@
# cython: profile=True
from libc.string cimport memcpy
+import functools
+
import numpy
import srsly
from thinc.api import get_array_module, get_current_ops
-import functools
-from .lexeme cimport EMPTY_LEXEME, OOV_RANK
-from .lexeme cimport Lexeme
-from .typedefs cimport attr_t
-from .tokens.token cimport Token
from .attrs cimport LANG, ORTH
+from .lexeme cimport EMPTY_LEXEME, OOV_RANK, Lexeme
+from .tokens.token cimport Token
+from .typedefs cimport attr_t
+from . import util
+from .attrs import IS_STOP, NORM, intify_attrs
from .compat import copy_reg
from .errors import Errors
-from .attrs import intify_attrs, NORM, IS_STOP
-from .vectors import Vectors, Mode as VectorsMode
-from .util import registry
-from .lookups import Lookups
-from . import util
+from .lang.lex_attrs import LEX_ATTRS, get_lang, is_stop
from .lang.norm_exceptions import BASE_NORMS
-from .lang.lex_attrs import LEX_ATTRS, is_stop, get_lang
+from .lookups import Lookups
+from .util import registry
+from .vectors import Mode as VectorsMode
+from .vectors import Vectors
def create_vocab(lang, defaults, vectors_name=None):