spaCy/spacy/tests/pipeline/test_pipe_methods.py
Matthew Honnibal 5bebbf7550
Python 3.13 support (#13823)
In order to support Python 3.13, we had to migrate to Cython 3.0. This caused some tricky interaction with our Pydantic usage, because Cython 3 uses the from __future__ import annotations semantics, which causes type annotations to be saved as strings.

The end result is that we can't have Language.factory decorated functions in Cython modules anymore, as the Language.factory decorator expects to inspect the signature of the functions and build a Pydantic model. If the function is implemented in Cython, an error is raised because the type is not resolved.

To address this I've moved the factory functions into a new module, spacy.pipeline.factories. I've added __getattr__ importlib hooks to the previous locations, in case anyone was importing these functions directly. The change should have no backwards compatibility implications.

Along the way I've also refactored the registration of functions for the config. Previously these ran as import-time side-effects, using the registry decorator. I've created instead a new module spacy.registrations. When the registry is accessed it calls a function ensure_populated(), which cases the registrations to occur.

I've made a similar change to the Language.factory registrations in the new spacy.pipeline.factories module.

I want to remove these import-time side-effects so that we can speed up the loading time of the library, which can be especially painful on the CLI. I also find that I'm often working to track down the implementations of functions referenced by strings in the config. Having the registrations all happen in one place will make this easier.

With these changes I've fortunately avoided the need to migrate to Pydantic v2 properly --- we're still using the v1 compatibility shim. We might not be able to hold out forever though: Pydantic (reasonably) aren't actively supporting the v1 shims. I put a lot of work into v2 migration when investigating the 3.13 support, and it's definitely challenging. In any case, it's a relief that we don't have to do the v2 migration at the same time as the Cython 3.0/Python 3.13 support.
2025-05-22 13:47:21 +02:00

678 lines
22 KiB
Python

import gc
import numpy
import pytest
from thinc.api import get_current_ops
import spacy
from spacy.lang.en import English
from spacy.lang.en.syntax_iterators import noun_chunks
from spacy.language import Language
from spacy.pipeline import TrainablePipe
from spacy.tokens import Doc
from spacy.training import Example
from spacy.util import SimpleFrozenList, get_arg_names, make_tempdir
from spacy.vocab import Vocab
@pytest.fixture
def nlp():
return Language()
@Language.component("new_pipe")
def new_pipe(doc):
return doc
@Language.component("other_pipe")
def other_pipe(doc):
return doc
@pytest.mark.issue(1506)
def test_issue1506():
def string_generator():
for _ in range(10001):
yield "It's sentence produced by that bug."
for _ in range(10001):
yield "I erase some hbdsaj lemmas."
for _ in range(10001):
yield "I erase lemmas."
for _ in range(10001):
yield "It's sentence produced by that bug."
for _ in range(10001):
yield "It's sentence produced by that bug."
nlp = English()
for i, d in enumerate(nlp.pipe(string_generator())):
# We should run cleanup more than one time to actually cleanup data.
# In first run — clean up only mark strings as «not hitted».
if i == 10000 or i == 20000 or i == 30000:
gc.collect()
for t in d:
str(t.lemma_)
@pytest.mark.issue(1654)
def test_issue1654():
nlp = Language(Vocab())
assert not nlp.pipeline
@Language.component("component")
def component(doc):
return doc
nlp.add_pipe("component", name="1")
nlp.add_pipe("component", name="2", after="1")
nlp.add_pipe("component", name="3", after="2")
assert nlp.pipe_names == ["1", "2", "3"]
nlp2 = Language(Vocab())
assert not nlp2.pipeline
nlp2.add_pipe("component", name="3")
nlp2.add_pipe("component", name="2", before="3")
nlp2.add_pipe("component", name="1", before="2")
assert nlp2.pipe_names == ["1", "2", "3"]
@pytest.mark.issue(3880)
def test_issue3880():
"""Test that `nlp.pipe()` works when an empty string ends the batch.
Fixed in v7.0.5 of Thinc.
"""
texts = ["hello", "world", "", ""]
nlp = English()
nlp.add_pipe("parser").add_label("dep")
nlp.add_pipe("ner").add_label("PERSON")
nlp.add_pipe("tagger").add_label("NN")
nlp.initialize()
for doc in nlp.pipe(texts):
pass
@pytest.mark.issue(5082)
def test_issue5082():
# Ensure the 'merge_entities' pipeline does something sensible for the vectors of the merged tokens
nlp = English()
vocab = nlp.vocab
array1 = numpy.asarray([0.1, 0.5, 0.8], dtype=numpy.float32)
array2 = numpy.asarray([-0.2, -0.6, -0.9], dtype=numpy.float32)
array3 = numpy.asarray([0.3, -0.1, 0.7], dtype=numpy.float32)
array4 = numpy.asarray([0.5, 0, 0.3], dtype=numpy.float32)
array34 = numpy.asarray([0.4, -0.05, 0.5], dtype=numpy.float32)
vocab.set_vector("I", array1)
vocab.set_vector("like", array2)
vocab.set_vector("David", array3)
vocab.set_vector("Bowie", array4)
text = "I like David Bowie"
patterns = [
{"label": "PERSON", "pattern": [{"LOWER": "david"}, {"LOWER": "bowie"}]}
]
ruler = nlp.add_pipe("entity_ruler")
ruler.add_patterns(patterns)
parsed_vectors_1 = [t.vector for t in nlp(text)]
assert len(parsed_vectors_1) == 4
ops = get_current_ops()
numpy.testing.assert_array_equal(ops.to_numpy(parsed_vectors_1[0]), array1)
numpy.testing.assert_array_equal(ops.to_numpy(parsed_vectors_1[1]), array2)
numpy.testing.assert_array_equal(ops.to_numpy(parsed_vectors_1[2]), array3)
numpy.testing.assert_array_equal(ops.to_numpy(parsed_vectors_1[3]), array4)
nlp.add_pipe("merge_entities")
parsed_vectors_2 = [t.vector for t in nlp(text)]
assert len(parsed_vectors_2) == 3
numpy.testing.assert_array_equal(ops.to_numpy(parsed_vectors_2[0]), array1)
numpy.testing.assert_array_equal(ops.to_numpy(parsed_vectors_2[1]), array2)
numpy.testing.assert_array_equal(ops.to_numpy(parsed_vectors_2[2]), array34)
@pytest.mark.issue(5458)
def test_issue5458():
# Test that the noun chuncker does not generate overlapping spans
# fmt: off
words = ["In", "an", "era", "where", "markets", "have", "brought", "prosperity", "and", "empowerment", "."]
vocab = Vocab(strings=words)
deps = ["ROOT", "det", "pobj", "advmod", "nsubj", "aux", "relcl", "dobj", "cc", "conj", "punct"]
pos = ["ADP", "DET", "NOUN", "ADV", "NOUN", "AUX", "VERB", "NOUN", "CCONJ", "NOUN", "PUNCT"]
heads = [0, 2, 0, 9, 6, 6, 2, 6, 7, 7, 0]
# fmt: on
en_doc = Doc(vocab, words=words, pos=pos, heads=heads, deps=deps)
en_doc.noun_chunks_iterator = noun_chunks
# if there are overlapping spans, this will fail with an E102 error "Can't merge non-disjoint spans"
nlp = English()
merge_nps = nlp.create_pipe("merge_noun_chunks")
merge_nps(en_doc)
def test_multiple_predictions():
class DummyPipe(TrainablePipe):
def __init__(self):
self.model = "dummy_model"
def predict(self, docs):
return ([1, 2, 3], [4, 5, 6])
def set_annotations(self, docs, scores):
return docs
nlp = Language()
doc = nlp.make_doc("foo")
dummy_pipe = DummyPipe()
dummy_pipe(doc)
def test_add_pipe_no_name(nlp):
nlp.add_pipe("new_pipe")
assert "new_pipe" in nlp.pipe_names
def test_add_pipe_duplicate_name(nlp):
nlp.add_pipe("new_pipe", name="duplicate_name")
with pytest.raises(ValueError):
nlp.add_pipe("new_pipe", name="duplicate_name")
@pytest.mark.parametrize("name", ["parser"])
def test_add_pipe_first(nlp, name):
nlp.add_pipe("new_pipe", name=name, first=True)
assert nlp.pipeline[0][0] == name
@pytest.mark.parametrize("name1,name2", [("parser", "lambda_pipe")])
def test_add_pipe_last(nlp, name1, name2):
Language.component("new_pipe2", func=lambda doc: doc)
nlp.add_pipe("new_pipe2", name=name2)
nlp.add_pipe("new_pipe", name=name1, last=True)
assert nlp.pipeline[0][0] != name1
assert nlp.pipeline[-1][0] == name1
def test_cant_add_pipe_first_and_last(nlp):
with pytest.raises(ValueError):
nlp.add_pipe("new_pipe", first=True, last=True)
@pytest.mark.parametrize("name", ["test_get_pipe"])
def test_get_pipe(nlp, name):
with pytest.raises(KeyError):
nlp.get_pipe(name)
nlp.add_pipe("new_pipe", name=name)
assert nlp.get_pipe(name) == new_pipe
@pytest.mark.parametrize(
"name,replacement,invalid_replacement",
[("test_replace_pipe", "other_pipe", lambda doc: doc)],
)
def test_replace_pipe(nlp, name, replacement, invalid_replacement):
with pytest.raises(ValueError):
nlp.replace_pipe(name, new_pipe)
nlp.add_pipe("new_pipe", name=name)
with pytest.raises(ValueError):
nlp.replace_pipe(name, invalid_replacement)
nlp.replace_pipe(name, replacement)
assert nlp.get_pipe(name) == nlp.create_pipe(replacement)
def test_replace_last_pipe(nlp):
nlp.add_pipe("sentencizer")
nlp.add_pipe("ner")
assert nlp.pipe_names == ["sentencizer", "ner"]
nlp.replace_pipe("ner", "ner")
assert nlp.pipe_names == ["sentencizer", "ner"]
def test_replace_pipe_config(nlp):
nlp.add_pipe("entity_linker")
nlp.add_pipe("sentencizer")
assert nlp.get_pipe("entity_linker").incl_prior is True
nlp.replace_pipe("entity_linker", "entity_linker", config={"incl_prior": False})
assert nlp.get_pipe("entity_linker").incl_prior is False
@pytest.mark.parametrize("old_name,new_name", [("old_pipe", "new_pipe")])
def test_rename_pipe(nlp, old_name, new_name):
with pytest.raises(ValueError):
nlp.rename_pipe(old_name, new_name)
nlp.add_pipe("new_pipe", name=old_name)
nlp.rename_pipe(old_name, new_name)
assert nlp.pipeline[0][0] == new_name
@pytest.mark.parametrize("name", ["my_component"])
def test_remove_pipe(nlp, name):
with pytest.raises(ValueError):
nlp.remove_pipe(name)
nlp.add_pipe("new_pipe", name=name)
assert len(nlp.pipeline) == 1
removed_name, removed_component = nlp.remove_pipe(name)
assert not len(nlp.pipeline)
assert removed_name == name
assert removed_component == new_pipe
@pytest.mark.parametrize("name", ["my_component"])
def test_disable_pipes_method(nlp, name):
nlp.add_pipe("new_pipe", name=name)
assert nlp.has_pipe(name)
disabled = nlp.select_pipes(disable=name)
assert not nlp.has_pipe(name)
disabled.restore()
@pytest.mark.parametrize("name", ["my_component"])
def test_enable_pipes_method(nlp, name):
nlp.add_pipe("new_pipe", name=name)
assert nlp.has_pipe(name)
disabled = nlp.select_pipes(enable=[])
assert not nlp.has_pipe(name)
disabled.restore()
@pytest.mark.parametrize("name", ["my_component"])
def test_disable_pipes_context(nlp, name):
"""Test that an enabled component stays enabled after running the context manager."""
nlp.add_pipe("new_pipe", name=name)
assert nlp.has_pipe(name)
with nlp.select_pipes(disable=name):
assert not nlp.has_pipe(name)
assert nlp.has_pipe(name)
@pytest.mark.parametrize("name", ["my_component"])
def test_disable_pipes_context_restore(nlp, name):
"""Test that a disabled component stays disabled after running the context manager."""
nlp.add_pipe("new_pipe", name=name)
assert nlp.has_pipe(name)
nlp.disable_pipe(name)
assert not nlp.has_pipe(name)
with nlp.select_pipes(disable=name):
assert not nlp.has_pipe(name)
assert not nlp.has_pipe(name)
def test_select_pipes_list_arg(nlp):
for name in ["c1", "c2", "c3"]:
nlp.add_pipe("new_pipe", name=name)
assert nlp.has_pipe(name)
with nlp.select_pipes(disable=["c1", "c2"]):
assert not nlp.has_pipe("c1")
assert not nlp.has_pipe("c2")
assert nlp.has_pipe("c3")
with nlp.select_pipes(enable="c3"):
assert not nlp.has_pipe("c1")
assert not nlp.has_pipe("c2")
assert nlp.has_pipe("c3")
with nlp.select_pipes(enable=["c1", "c2"], disable="c3"):
assert nlp.has_pipe("c1")
assert nlp.has_pipe("c2")
assert not nlp.has_pipe("c3")
with nlp.select_pipes(enable=[]):
assert not nlp.has_pipe("c1")
assert not nlp.has_pipe("c2")
assert not nlp.has_pipe("c3")
with nlp.select_pipes(enable=["c1", "c2", "c3"], disable=[]):
assert nlp.has_pipe("c1")
assert nlp.has_pipe("c2")
assert nlp.has_pipe("c3")
with nlp.select_pipes(disable=["c1", "c2", "c3"], enable=[]):
assert not nlp.has_pipe("c1")
assert not nlp.has_pipe("c2")
assert not nlp.has_pipe("c3")
def test_select_pipes_errors(nlp):
for name in ["c1", "c2", "c3"]:
nlp.add_pipe("new_pipe", name=name)
assert nlp.has_pipe(name)
with pytest.raises(ValueError):
nlp.select_pipes()
with pytest.raises(ValueError):
nlp.select_pipes(enable=["c1", "c2"], disable=["c1"])
with pytest.raises(ValueError):
nlp.select_pipes(enable=["c1", "c2"], disable=[])
with pytest.raises(ValueError):
nlp.select_pipes(enable=[], disable=["c3"])
disabled = nlp.select_pipes(disable=["c2"])
nlp.remove_pipe("c2")
with pytest.raises(ValueError):
disabled.restore()
@pytest.mark.parametrize("n_pipes", [100])
def test_add_lots_of_pipes(nlp, n_pipes):
Language.component("n_pipes", func=lambda doc: doc)
for i in range(n_pipes):
nlp.add_pipe("n_pipes", name=f"pipe_{i}")
assert len(nlp.pipe_names) == n_pipes
@pytest.mark.parametrize("component", [lambda doc: doc, {"hello": "world"}])
def test_raise_for_invalid_components(nlp, component):
with pytest.raises(ValueError):
nlp.add_pipe(component)
@pytest.mark.parametrize("component", ["ner", "tagger", "parser", "textcat"])
def test_pipe_base_class_add_label(nlp, component):
label = "TEST"
pipe = nlp.create_pipe(component)
pipe.add_label(label)
if component == "tagger":
# Tagger always has the default coarse-grained label scheme
assert label in pipe.labels
else:
assert pipe.labels == (label,)
def test_pipe_labels(nlp):
input_labels = {
"ner": ["PERSON", "ORG", "GPE"],
"textcat": ["POSITIVE", "NEGATIVE"],
}
for name, labels in input_labels.items():
nlp.add_pipe(name)
pipe = nlp.get_pipe(name)
for label in labels:
pipe.add_label(label)
assert len(pipe.labels) == len(labels)
assert len(nlp.pipe_labels) == len(input_labels)
for name, labels in nlp.pipe_labels.items():
assert sorted(input_labels[name]) == sorted(labels)
def test_add_pipe_before_after():
"""Test that before/after works with strings and ints."""
nlp = Language()
nlp.add_pipe("ner")
with pytest.raises(ValueError):
nlp.add_pipe("textcat", before="parser")
nlp.add_pipe("textcat", before="ner")
assert nlp.pipe_names == ["textcat", "ner"]
with pytest.raises(ValueError):
nlp.add_pipe("parser", before=3)
with pytest.raises(ValueError):
nlp.add_pipe("parser", after=3)
nlp.add_pipe("parser", after=0)
assert nlp.pipe_names == ["textcat", "parser", "ner"]
nlp.add_pipe("tagger", before=2)
assert nlp.pipe_names == ["textcat", "parser", "tagger", "ner"]
with pytest.raises(ValueError):
nlp.add_pipe("entity_ruler", after=1, first=True)
with pytest.raises(ValueError):
nlp.add_pipe("entity_ruler", before="ner", after=2)
with pytest.raises(ValueError):
nlp.add_pipe("entity_ruler", before=True)
with pytest.raises(ValueError):
nlp.add_pipe("entity_ruler", first=False)
def test_disable_enable_pipes():
name = "test_disable_enable_pipes"
results = {}
def make_component(name):
results[name] = ""
def component(doc):
nonlocal results
results[name] = doc.text
return doc
return component
c1 = Language.component(f"{name}1", func=make_component(f"{name}1"))
c2 = Language.component(f"{name}2", func=make_component(f"{name}2"))
nlp = Language()
nlp.add_pipe(f"{name}1")
nlp.add_pipe(f"{name}2")
assert results[f"{name}1"] == ""
assert results[f"{name}2"] == ""
assert nlp.pipeline == [(f"{name}1", c1), (f"{name}2", c2)]
assert nlp.pipe_names == [f"{name}1", f"{name}2"]
nlp.disable_pipe(f"{name}1")
assert nlp.disabled == [f"{name}1"]
assert nlp.component_names == [f"{name}1", f"{name}2"]
assert nlp.pipe_names == [f"{name}2"]
assert nlp.config["nlp"]["disabled"] == [f"{name}1"]
nlp("hello")
assert results[f"{name}1"] == "" # didn't run
assert results[f"{name}2"] == "hello" # ran
nlp.enable_pipe(f"{name}1")
assert nlp.disabled == []
assert nlp.pipe_names == [f"{name}1", f"{name}2"]
assert nlp.config["nlp"]["disabled"] == []
nlp("world")
assert results[f"{name}1"] == "world"
assert results[f"{name}2"] == "world"
nlp.disable_pipe(f"{name}2")
nlp.remove_pipe(f"{name}2")
assert nlp.components == [(f"{name}1", c1)]
assert nlp.pipeline == [(f"{name}1", c1)]
assert nlp.component_names == [f"{name}1"]
assert nlp.pipe_names == [f"{name}1"]
assert nlp.disabled == []
assert nlp.config["nlp"]["disabled"] == []
nlp.rename_pipe(f"{name}1", name)
assert nlp.components == [(name, c1)]
assert nlp.component_names == [name]
nlp("!")
assert results[f"{name}1"] == "!"
assert results[f"{name}2"] == "world"
with pytest.raises(ValueError):
nlp.disable_pipe(f"{name}2")
nlp.disable_pipe(name)
assert nlp.component_names == [name]
assert nlp.pipe_names == []
assert nlp.config["nlp"]["disabled"] == [name]
nlp("?")
assert results[f"{name}1"] == "!"
def test_pipe_methods_frozen():
"""Test that spaCy raises custom error messages if "frozen" properties are
accessed. We still want to use a list here to not break backwards
compatibility, but users should see an error if they're trying to append
to nlp.pipeline etc."""
nlp = Language()
ner = nlp.add_pipe("ner")
assert nlp.pipe_names == ["ner"]
for prop in [
nlp.pipeline,
nlp.pipe_names,
nlp.components,
nlp.component_names,
nlp.disabled,
nlp.factory_names,
]:
assert isinstance(prop, list)
assert isinstance(prop, SimpleFrozenList)
with pytest.raises(NotImplementedError):
nlp.pipeline.append(("ner2", ner))
with pytest.raises(NotImplementedError):
nlp.pipe_names.pop()
with pytest.raises(NotImplementedError):
nlp.components.sort()
with pytest.raises(NotImplementedError):
nlp.component_names.clear()
@pytest.mark.parametrize(
"pipe", ["tagger", "parser", "ner", "textcat", "morphologizer"]
)
def test_pipe_label_data_exports_labels(pipe):
nlp = Language()
pipe = nlp.add_pipe(pipe)
# Make sure pipe has pipe labels
assert getattr(pipe, "label_data", None) is not None
# Make sure pipe can be initialized with labels
initialize = getattr(pipe, "initialize", None)
assert initialize is not None
assert "labels" in get_arg_names(initialize)
@pytest.mark.parametrize("pipe", ["senter", "entity_linker"])
def test_pipe_label_data_no_labels(pipe):
nlp = Language()
pipe = nlp.add_pipe(pipe)
assert getattr(pipe, "label_data", None) is None
initialize = getattr(pipe, "initialize", None)
if initialize is not None:
assert "labels" not in get_arg_names(initialize)
def test_pipe_methods_initialize():
"""Test that the [initialize] config reflects the components correctly."""
nlp = Language()
nlp.add_pipe("tagger")
assert "tagger" not in nlp.config["initialize"]["components"]
nlp.config["initialize"]["components"]["tagger"] = {"labels": ["hello"]}
assert nlp.config["initialize"]["components"]["tagger"] == {"labels": ["hello"]}
nlp.remove_pipe("tagger")
assert "tagger" not in nlp.config["initialize"]["components"]
nlp.add_pipe("tagger")
assert "tagger" not in nlp.config["initialize"]["components"]
nlp.config["initialize"]["components"]["tagger"] = {"labels": ["hello"]}
nlp.rename_pipe("tagger", "my_tagger")
assert "tagger" not in nlp.config["initialize"]["components"]
assert nlp.config["initialize"]["components"]["my_tagger"] == {"labels": ["hello"]}
nlp.config["initialize"]["components"]["test"] = {"foo": "bar"}
nlp.add_pipe("ner", name="test")
assert "test" in nlp.config["initialize"]["components"]
nlp.remove_pipe("test")
assert "test" not in nlp.config["initialize"]["components"]
def test_update_with_annotates():
name = "test_with_annotates"
results = {}
def make_component(name):
results[name] = ""
def component(doc):
nonlocal results
results[name] += doc.text
return doc
return component
Language.component(f"{name}1", func=make_component(f"{name}1"))
Language.component(f"{name}2", func=make_component(f"{name}2"))
components = set([f"{name}1", f"{name}2"])
nlp = English()
texts = ["a", "bb", "ccc"]
examples = []
for text in texts:
examples.append(Example(nlp.make_doc(text), nlp.make_doc(text)))
for components_to_annotate in [
[],
[f"{name}1"],
[f"{name}1", f"{name}2"],
[f"{name}2", f"{name}1"],
]:
for key in results:
results[key] = ""
nlp = English(vocab=nlp.vocab)
nlp.add_pipe(f"{name}1")
nlp.add_pipe(f"{name}2")
nlp.update(examples, annotates=components_to_annotate)
for component in components_to_annotate:
assert results[component] == "".join(eg.predicted.text for eg in examples)
for component in components - set(components_to_annotate):
assert results[component] == ""
@pytest.mark.issue(11443)
def test_enable_disable_conflict_with_config():
"""Test conflict between enable/disable w.r.t. `nlp.disabled` set in the config."""
nlp = English()
nlp.add_pipe("tagger")
nlp.add_pipe("senter")
nlp.add_pipe("sentencizer")
with make_tempdir() as tmp_dir:
nlp.to_disk(tmp_dir)
# Expected to succeed, as config and arguments do not conflict.
assert spacy.load(
tmp_dir, enable=["tagger"], config={"nlp": {"disabled": ["senter"]}}
).disabled == ["senter", "sentencizer"]
# Expected to succeed without warning due to the lack of a conflicting config option.
spacy.load(tmp_dir, enable=["tagger"])
# Expected to fail due to conflict between enable and disabled.
with pytest.raises(ValueError):
spacy.load(
tmp_dir,
enable=["senter"],
config={"nlp": {"disabled": ["senter", "tagger"]}},
)
def test_load_disable_enable():
"""Tests spacy.load() with dis-/enabling components."""
base_nlp = English()
for pipe in ("sentencizer", "tagger", "parser"):
base_nlp.add_pipe(pipe)
with make_tempdir() as tmp_dir:
base_nlp.to_disk(tmp_dir)
to_disable = ["parser", "tagger"]
to_enable = ["tagger", "parser"]
single_str = "tagger"
# Setting only `disable`.
nlp = spacy.load(tmp_dir, disable=to_disable)
assert all([comp_name in nlp.disabled for comp_name in to_disable])
# Setting only `enable`.
nlp = spacy.load(tmp_dir, enable=to_enable)
assert all(
[
(comp_name in nlp.disabled) is (comp_name not in to_enable)
for comp_name in nlp.component_names
]
)
# Loading with a string representing one component
nlp = spacy.load(tmp_dir, exclude=single_str)
assert single_str not in nlp.component_names
nlp = spacy.load(tmp_dir, disable=single_str)
assert single_str in nlp.component_names
assert single_str not in nlp.pipe_names
assert nlp._disabled == {single_str}
assert nlp.disabled == [single_str]
# Testing consistent enable/disable combination.
nlp = spacy.load(
tmp_dir,
enable=to_enable,
disable=[
comp_name
for comp_name in nlp.component_names
if comp_name not in to_enable
],
)
assert all(
[
(comp_name in nlp.disabled) is (comp_name not in to_enable)
for comp_name in nlp.component_names
]
)
# Inconsistent enable/disable combination.
with pytest.raises(ValueError):
spacy.load(tmp_dir, enable=to_enable, disable=["parser"])