Remove simple_ner code (#6041)

* remove simple_ner code * remove unused _biluo and _iob files
2025-11-10 21:07:53 +03:00 · 2020-09-09 16:11:27 +02:00 · 2020-09-09 16:11:27 +02:00 · cb66ea7400
commit cb66ea7400
parent 24053d83ec
9 changed files with 5 additions and 687 deletions
--- a/extra/experiments/tok2vec-ner/multihashembed_tok2vec.cfg
+++ b/extra/experiments/tok2vec-ner/multihashembed_tok2vec.cfg
@ -31,10 +31,13 @@ lang = "en"
 vectors = null

 [nlp.pipeline.ner]
-factory = "simple_ner"
+factory = "ner"

 [nlp.pipeline.ner.model]
-@architectures = "spacy.BiluoTagger.v1"
+@architectures = "spacy.TransitionBasedParser.v1"
+nr_feature_tokens = 6
+hidden_width = 64
+maxout_pieces = 2

 [nlp.pipeline.ner.model.tok2vec]
@architectures = "spacy.HashEmbedCNN.v1"
--- a/spacy/ml/_biluo.py
+++ b/spacy/ml/_biluo.py
@ -1,105 +0,0 @@
-"""Thinc layer to do simpler transition-based parsing, NER, etc."""
-from typing import Dict, Optional
-import numpy
-from thinc.api import Model
-from thinc.types import Padded, Floats3d
-
-
-def BILUO() -> Model[Padded, Padded]:
-    return Model(
-        "biluo",
-        forward,
-        init=init,
-        dims={"nO": None},
-        attrs={"get_num_actions": get_num_actions},
-    )
-
-
-def init(model, X: Optional[Padded] = None, Y: Optional[Padded] = None):
-    if X is not None and Y is not None:
-        if X.data.shape != Y.data.shape:
-            # TODO: Fix error
-            raise ValueError("Mismatched shapes (TODO: Fix message)")
-        model.set_dim("nO", X.data.shape[2])
-    elif X is not None:
-        model.set_dim("nO", X.data.shape[2])
-    elif Y is not None:
-        model.set_dim("nO", Y.data.shape[2])
-    elif model.get_dim("nO") is None:
-        raise ValueError("Dimension unset for BILUO: nO")
-
-
-def forward(model: Model[Padded, Padded], Xp: Padded, is_train: bool):
-    n_labels = (model.get_dim("nO") - 1) // 4
-    n_tokens, n_docs, n_actions = Xp.data.shape
-    # At each timestep, we make a validity mask of shape (n_docs, n_actions)
-    # to indicate which actions are valid next for each sequence. To construct
-    # the mask, we have a state of shape (2, n_actions) and a validity table of
-    # shape (2, n_actions+1, n_actions). The first dimension of the state indicates
-    # whether it's the last token, the second dimension indicates the previous
-    # action, plus a special 'null action' for the first entry.
-    valid_transitions = model.ops.asarray(_get_transition_table(n_labels))
-    prev_actions = model.ops.alloc1i(n_docs)
-    # Initialize as though prev action was O
-    prev_actions.fill(n_actions - 1)
-    Y = model.ops.alloc3f(*Xp.data.shape)
-    masks = model.ops.alloc3f(*Y.shape)
-    max_value = Xp.data.max()
-    for t in range(Xp.data.shape[0]):
-        is_last = (Xp.lengths < (t + 2)).astype("i")
-        masks[t] = valid_transitions[is_last, prev_actions]
-        # Don't train the out-of-bounds sequences.
-        masks[t, Xp.size_at_t[t] :] = 0
-        # Valid actions get 0*10e8, invalid get large negative value
-        Y[t] = Xp.data[t] + ((masks[t] - 1) * max_value * 10)
-        prev_actions = Y[t].argmax(axis=-1)
-
-    def backprop_biluo(dY: Padded) -> Padded:
-        dY.data *= masks
-        return dY
-
-    return Padded(Y, Xp.size_at_t, Xp.lengths, Xp.indices), backprop_biluo
-
-
-def get_num_actions(n_labels: int) -> int:
-    # One BEGIN action per label
-    # One IN action per label
-    # One LAST action per label
-    # One UNIT action per label
-    # One OUT action
-    return n_labels + n_labels + n_labels + n_labels + 1
-
-
-def _get_transition_table(
-    n_labels: int, *, _cache: Dict[int, Floats3d] = {}
-) -> Floats3d:
-    n_actions = get_num_actions(n_labels)
-    if n_actions in _cache:
-        return _cache[n_actions]
-    table = numpy.zeros((2, n_actions, n_actions), dtype="f")
-    B_start, B_end = (0, n_labels)
-    I_start, I_end = (B_end, B_end + n_labels)
-    L_start, L_end = (I_end, I_end + n_labels)
-    U_start, _ = (L_end, L_end + n_labels)  # noqa: F841
-    # Using ranges allows us to set specific cells, which is necessary to express
-    # that only actions of the same label are valid continuations.
-    B_range = numpy.arange(B_start, B_end)
-    I_range = numpy.arange(I_start, I_end)
-    L_range = numpy.arange(L_start, L_end)
-    # If this is the last token and the previous action was B or I, only L
-    # of that label is valid
-    table[1, B_range, L_range] = 1
-    table[1, I_range, L_range] = 1
-    # If this isn't the last token and the previous action was B or I, only I or
-    # L of that label are valid.
-    table[0, B_range, I_range] = 1
-    table[0, B_range, L_range] = 1
-    table[0, I_range, I_range] = 1
-    table[0, I_range, L_range] = 1
-    # If this isn't the last token and the previous was L, U or O, B is valid
-    table[0, L_start:, :B_end] = 1
-    # Regardless of whether this is the last token, if the previous action was
-    # {L, U, O}, U and O are valid.
-    table[:, L_start:, U_start:] = 1
-    _cache[n_actions] = table
-    return table
--- a/spacy/ml/_iob.py
+++ b/spacy/ml/_iob.py
@ -1,88 +0,0 @@
-"""Thinc layer to do simpler transition-based parsing, NER, etc."""
-from typing import Dict, Optional
-from thinc.api import Ops, Model
-from thinc.types import Padded, Floats3d
-
-
-def IOB() -> Model[Padded, Padded]:
-    return Model(
-        "biluo",
-        forward,
-        init=init,
-        dims={"nO": None},
-        attrs={"get_num_actions": get_num_actions},
-    )
-
-
-def init(model: Model, X: Optional[Padded] = None, Y: Optional[Padded] = None) -> None:
-    if X is not None and Y is not None:
-        if X.data.shape != Y.data.shape:
-            # TODO: Fix error
-            raise ValueError("Mismatched shapes (TODO: Fix message)")
-        model.set_dim("nO", X.data.shape[2])
-    elif X is not None:
-        model.set_dim("nO", X.data.shape[2])
-    elif Y is not None:
-        model.set_dim("nO", Y.data.shape[2])
-    elif model.get_dim("nO") is None:
-        raise ValueError("Dimension unset for BILUO: nO")
-
-
-def forward(model: Model[Padded, Padded], Xp: Padded, is_train: bool):
-    n_labels = (model.get_dim("nO") - 1) // 2
-    n_tokens, n_docs, n_actions = Xp.data.shape
-    # At each timestep, we make a validity mask of shape (n_docs, n_actions)
-    # to indicate which actions are valid next for each sequence. To construct
-    # the mask, we have a state of shape (2, n_actions) and a validity table of
-    # shape (2, n_actions+1, n_actions). The first dimension of the state indicates
-    # whether it's the last token, the second dimension indicates the previous
-    # action, plus a special 'null action' for the first entry.
-    valid_transitions = _get_transition_table(model.ops, n_labels)
-    prev_actions = model.ops.alloc1i(n_docs)
-    # Initialize as though prev action was O
-    prev_actions.fill(n_actions - 1)
-    Y = model.ops.alloc3f(*Xp.data.shape)
-    masks = model.ops.alloc3f(*Y.shape)
-    for t in range(Xp.data.shape[0]):
-        masks[t] = valid_transitions[prev_actions]
-        # Don't train the out-of-bounds sequences.
-        masks[t, Xp.size_at_t[t] :] = 0
-        # Valid actions get 0*10e8, invalid get -1*10e8
-        Y[t] = Xp.data[t] + ((masks[t] - 1) * 10e8)
-        prev_actions = Y[t].argmax(axis=-1)
-
-    def backprop_biluo(dY: Padded) -> Padded:
-        # Masking the gradient seems to do poorly here. But why?
-        # dY.data *= masks
-        return dY
-
-    return Padded(Y, Xp.size_at_t, Xp.lengths, Xp.indices), backprop_biluo
-
-
-def get_num_actions(n_labels: int) -> int:
-    # One BEGIN action per label
-    # One IN action per label
-    # One OUT action
-    return n_labels * 2 + 1
-
-
-def _get_transition_table(
-    ops: Ops, n_labels: int, _cache: Dict[int, Floats3d] = {}
-) -> Floats3d:
-    n_actions = get_num_actions(n_labels)
-    if n_actions in _cache:
-        return ops.asarray(_cache[n_actions])
-    table = ops.alloc2f(n_actions, n_actions)
-    B_start, B_end = (0, n_labels)
-    I_start, I_end = (B_end, B_end + n_labels)
-    O_action = I_end
-    B_range = ops.xp.arange(B_start, B_end)
-    I_range = ops.xp.arange(I_start, I_end)
-    # B and O are always valid
-    table[:, B_start:B_end] = 1
-    table[:, O_action] = 1
-    # I can only follow a matching B
-    table[B_range, I_range] = 1
-
-    _cache[n_actions] = table
-    return table
--- a/spacy/ml/models/init.py
+++ b/spacy/ml/models/init.py
@ -1,6 +1,5 @@
 from .entity_linker import *  # noqa
 from .parser import *  # noqa
-from .simple_ner import *  # noqa
 from .tagger import *  # noqa
 from .textcat import *  # noqa
 from .tok2vec import *  # noqa
--- a/spacy/ml/models/simple_ner.py
+++ b/spacy/ml/models/simple_ner.py
@ -1,104 +0,0 @@
-from typing import List
-from thinc.api import Model, Linear, with_array, softmax_activation, padded2list
-from thinc.api import chain, list2padded, configure_normal_init
-from thinc.api import Dropout
-from thinc.types import Floats2d
-
-from ...tokens import Doc
-from .._biluo import BILUO
-from .._iob import IOB
-from ...util import registry
-
-
-@registry.architectures.register("spacy.BILUOTagger.v1")
-def BiluoTagger(
-    tok2vec: Model[List[Doc], List[Floats2d]]
-) -> Model[List[Doc], List[Floats2d]]:
-    """Construct a simple NER tagger, that predicts BILUO tag scores for each
-    token and uses greedy decoding with transition-constraints to return a valid
-    BILUO tag sequence.
-
-    A BILUO tag sequence encodes a sequence of non-overlapping labelled spans
-    into tags assigned to each token. The first token of a span is given the
-    tag B-LABEL, the last token of the span is given the tag L-LABEL, and tokens
-    within the span are given the tag I-LABEL. Single-token spans are given
-    the tag U-LABEL. All other tokens are assigned the tag O.
-
-    The BILUO tag scheme generally results in better linear separation between
-    classes, especially for non-CRF models, because there are more distinct classes
-    for the different situations (Ratinov et al., 2009).
-    """
-    biluo = BILUO()
-    linear = Linear(
-        nO=None, nI=tok2vec.get_dim("nO"), init_W=configure_normal_init(mean=0.02)
-    )
-    model = chain(
-        tok2vec,
-        list2padded(),
-        with_array(chain(Dropout(0.1), linear)),
-        biluo,
-        with_array(softmax_activation()),
-        padded2list(),
-    )
-    return Model(
-        "biluo-tagger",
-        forward,
-        init=init,
-        layers=[model, linear],
-        refs={"tok2vec": tok2vec, "linear": linear, "biluo": biluo},
-        dims={"nO": None},
-        attrs={"get_num_actions": biluo.attrs["get_num_actions"]},
-    )
-
-
-@registry.architectures.register("spacy.IOBTagger.v1")
-def IOBTagger(
-    tok2vec: Model[List[Doc], List[Floats2d]]
-) -> Model[List[Doc], List[Floats2d]]:
-    """Construct a simple NER tagger, that predicts IOB tag scores for each
-    token and uses greedy decoding with transition-constraints to return a valid
-    IOB tag sequence.
-
-    An IOB tag sequence encodes a sequence of non-overlapping labelled spans
-    into tags assigned to each token. The first token of a span is given the
-    tag B-LABEL, and subsequent tokens are given the tag I-LABEL.
-    All other tokens are assigned the tag O.
-    """
-    biluo = IOB()
-    linear = Linear(nO=None, nI=tok2vec.get_dim("nO"))
-    model = chain(
-        tok2vec,
-        list2padded(),
-        with_array(linear),
-        biluo,
-        with_array(softmax_activation()),
-        padded2list(),
-    )
-    return Model(
-        "iob-tagger",
-        forward,
-        init=init,
-        layers=[model],
-        refs={"tok2vec": tok2vec, "linear": linear, "biluo": biluo},
-        dims={"nO": None},
-        attrs={"get_num_actions": biluo.attrs["get_num_actions"]},
-    )
-
-
-def init(model: Model[List[Doc], List[Floats2d]], X=None, Y=None) -> None:
-    if model.has_dim("nO") is None and Y:
-        model.set_dim("nO", Y[0].shape[1])
-    nO = model.get_dim("nO")
-    biluo = model.get_ref("biluo")
-    linear = model.get_ref("linear")
-    biluo.set_dim("nO", nO)
-    if linear.has_dim("nO") is None:
-        linear.set_dim("nO", nO)
-    model.layers[0].initialize(X=X, Y=Y)
-
-
-def forward(model: Model, X: List[Doc], is_train: bool):
-    return model.layers[0](X, is_train)
-
-
-__all__ = ["BiluoTagger"]
--- a/spacy/pipeline/init.py
+++ b/spacy/pipeline/init.py
@ -8,7 +8,6 @@ from .morphologizer import Morphologizer
 from .pipe import Pipe
 from .senter import SentenceRecognizer
 from .sentencizer import Sentencizer
-from .simple_ner import SimpleNER
 from .tagger import Tagger
 from .textcat import TextCategorizer
 from .tok2vec import Tok2Vec
@ -25,7 +24,6 @@ __all__ = [
    "Pipe",
    "SentenceRecognizer",
    "Sentencizer",
-    "SimpleNER",
    "Tagger",
    "TextCategorizer",
    "Tok2Vec",
--- a/spacy/pipeline/simple_ner.py
+++ b/spacy/pipeline/simple_ner.py
@ -1,223 +0,0 @@
-from typing import List, Iterable, Optional, Dict, Tuple, Callable, Set
-from thinc.types import Floats2d
-from thinc.api import SequenceCategoricalCrossentropy, set_dropout_rate, Model
-from thinc.api import Optimizer, Config
-from thinc.util import to_numpy
-from itertools import islice
-
-from ..errors import Errors
-from ..training import Example, spans_from_biluo_tags, iob_to_biluo, biluo_to_iob
-from ..training import validate_examples
-from ..tokens import Doc
-from ..language import Language
-from ..vocab import Vocab
-from ..scorer import Scorer
-from .pipe import Pipe
-
-
-default_model_config = """
-[model]
-@architectures = "spacy.BILUOTagger.v1"
-
-[model.tok2vec]
-@architectures = "spacy.HashEmbedCNN.v1"
-pretrained_vectors = null
-width = 128
-depth = 4
-embed_size = 7000
-window_size = 1
-maxout_pieces = 3
-subword_features = true
-"""
-DEFAULT_SIMPLE_NER_MODEL = Config().from_str(default_model_config)["model"]
-
-
-@Language.factory(
-    "simple_ner",
-    assigns=["doc.ents"],
-    default_config={"labels": [], "model": DEFAULT_SIMPLE_NER_MODEL},
-    scores=["ents_p", "ents_r", "ents_f", "ents_per_type"],
-    default_score_weights={"ents_f": 1.0, "ents_p": 0.0, "ents_r": 0.0},
-)
-def make_simple_ner(
-    nlp: Language, name: str, model: Model, labels: Iterable[str]
-) -> "SimpleNER":
-    return SimpleNER(nlp.vocab, model, name, labels=labels)
-
-
-class SimpleNER(Pipe):
-    """Named entity recognition with a tagging model. The model should include
-    validity constraints to ensure that only valid tag sequences are returned."""
-
-    def __init__(
-        self,
-        vocab: Vocab,
-        model: Model,
-        name: str = "simple_ner",
-        *,
-        labels: Iterable[str],
-    ) -> None:
-        self.vocab = vocab
-        self.model = model
-        self.name = name
-        self.cfg = {"labels": []}
-        for label in labels:
-            self.add_label(label)
-        self.loss_func = SequenceCategoricalCrossentropy(
-            names=self.get_tag_names(), normalize=True, missing_value=None
-        )
-        assert self.model is not None
-
-    @property
-    def is_biluo(self) -> bool:
-        return self.model.name.startswith("biluo")
-
-    @property
-    def labels(self) -> Tuple[str]:
-        return tuple(self.cfg["labels"])
-
-    def add_label(self, label: str) -> None:
-        """Add a new label to the pipe.
-        label (str): The label to add.
-        DOCS: https://nightly.spacy.io/api/simplener#add_label
-        """
-        if not isinstance(label, str):
-            raise ValueError(Errors.E187)
-        if label not in self.labels:
-            self.cfg["labels"].append(label)
-            self.vocab.strings.add(label)
-
-    def get_tag_names(self) -> List[str]:
-        if self.is_biluo:
-            return (
-                [f"B-{label}" for label in self.labels]
-                + [f"I-{label}" for label in self.labels]
-                + [f"L-{label}" for label in self.labels]
-                + [f"U-{label}" for label in self.labels]
-                + ["O"]
-            )
-        else:
-            return (
-                [f"B-{label}" for label in self.labels]
-                + [f"I-{label}" for label in self.labels]
-                + ["O"]
-            )
-
-    def predict(self, docs: List[Doc]) -> List[Floats2d]:
-        scores = self.model.predict(docs)
-        return scores
-
-    def set_annotations(self, docs: List[Doc], scores: List[Floats2d]) -> None:
-        """Set entities on a batch of documents from a batch of scores."""
-        tag_names = self.get_tag_names()
-        for i, doc in enumerate(docs):
-            actions = to_numpy(scores[i].argmax(axis=1))
-            tags = [tag_names[actions[j]] for j in range(len(doc))]
-            if not self.is_biluo:
-                tags = iob_to_biluo(tags)
-            doc.ents = spans_from_biluo_tags(doc, tags)
-
-    def update(
-        self,
-        examples: List[Example],
-        *,
-        set_annotations: bool = False,
-        drop: float = 0.0,
-        sgd: Optional[Optimizer] = None,
-        losses: Optional[Dict[str, float]] = None,
-    ) -> Dict[str, float]:
-        if losses is None:
-            losses = {}
-        losses.setdefault("ner", 0.0)
-        validate_examples(examples, "SimpleNER.update")
-        if not any(_has_ner(eg) for eg in examples):
-            return losses
-        docs = [eg.predicted for eg in examples]
-        set_dropout_rate(self.model, drop)
-        scores, bp_scores = self.model.begin_update(docs)
-        loss, d_scores = self.get_loss(examples, scores)
-        bp_scores(d_scores)
-        if set_annotations:
-            self.set_annotations(docs, scores)
-        if sgd is not None:
-            self.model.finish_update(sgd)
-        losses["ner"] += loss
-        return losses
-
-    def get_loss(self, examples: List[Example], scores) -> Tuple[List[Floats2d], float]:
-        validate_examples(examples, "SimpleNER.get_loss")
-        truths = []
-        for eg in examples:
-            tags = eg.get_aligned_ner()
-            gold_tags = [(tag if tag != "-" else None) for tag in tags]
-            if not self.is_biluo:
-                gold_tags = biluo_to_iob(gold_tags)
-            truths.append(gold_tags)
-        for i in range(len(scores)):
-            if len(scores[i]) != len(truths[i]):
-                raise ValueError(
-                    f"Mismatched output and gold sizes.\n"
-                    f"Output: {len(scores[i])}, gold: {len(truths[i])}."
-                    f"Input: {len(examples[i].doc)}"
-                )
-        d_scores, loss = self.loss_func(scores, truths)
-        return loss, d_scores
-
-    def begin_training(
-        self,
-        get_examples: Callable[[], Iterable[Example]],
-        pipeline: Optional[List[Tuple[str, Callable[[Doc], Doc]]]] = None,
-        sgd: Optional[Optimizer] = None,
-    ):
-        self._ensure_examples(get_examples)
-        all_labels = set()
-        for example in get_examples():
-            all_labels.update(_get_labels(example))
-        for label in sorted(all_labels):
-            if label != "":
-                self.add_label(label)
-        doc_sample = []
-        label_sample = []
-        self._require_labels()
-        for example in islice(get_examples(), 10):
-            doc_sample.append(example.x)
-            gold_tags = example.get_aligned_ner()
-            if not self.is_biluo:
-                gold_tags = biluo_to_iob(gold_tags)
-            gold_array = [
-                [1.0 if tag == gold_tag else 0.0 for tag in self.get_tag_names()]
-                for gold_tag in gold_tags
-            ]
-            label_sample.append(self.model.ops.asarray(gold_array, dtype="float32"))
-        assert len(doc_sample) > 0, Errors.E923.format(name=self.name)
-        assert len(label_sample) > 0, Errors.E923.format(name=self.name)
-        self.model.initialize(X=doc_sample, Y=label_sample)
-        if pipeline is not None:
-            self.init_multitask_objectives(get_examples, pipeline, sgd=sgd, **self.cfg)
-        self.loss_func = SequenceCategoricalCrossentropy(
-            names=self.get_tag_names(), normalize=True, missing_value=None
-        )
-        return sgd
-
-    def init_multitask_objectives(self, *args, **kwargs):
-        pass
-
-    def score(self, examples, **kwargs):
-        validate_examples(examples, "SimpleNER.score")
-        return Scorer.score_spans(examples, "ents", **kwargs)
-
-
-def _has_ner(example: Example) -> bool:
-    for ner_tag in example.get_aligned_ner():
-        if ner_tag != "-" and ner_tag is not None:
-            return True
-    else:
-        return False
-
-
-def _get_labels(example: Example) -> Set[str]:
-    labels = set()
-    for ner_tag in example.get_aligned("ENT_TYPE", as_string=True):
-        if ner_tag != "O" and ner_tag != "-" and ner_tag != "":
-            labels.add(ner_tag)
-    return labels
--- a/spacy/tests/pipeline/test_simple_ner.py
+++ b/spacy/tests/pipeline/test_simple_ner.py
@ -1,106 +0,0 @@
-import pytest
-from spacy.lang.en import English
-from spacy.training import Example
-from spacy import util
-from ..util import make_tempdir
-
-
-TRAIN_DATA = [
-    ("Who is Shaka S Khan?", {"entities": [(7, 19, "PERSON")]}),
-    ("I like London and Berlin.", {"entities": [(7, 13, "LOC"), (18, 24, "LOC")]}),
-]
-
-
-def test_no_label():
-    nlp = English()
-    nlp.add_pipe("simple_ner")
-    with pytest.raises(ValueError):
-        nlp.begin_training()
-
-
-def test_implicit_label():
-    nlp = English()
-    ner = nlp.add_pipe("simple_ner")
-    train_examples = []
-    ner.add_label("ORG")
-    for t in TRAIN_DATA:
-        train_examples.append(Example.from_dict(nlp.make_doc(t[0]), t[1]))
-    nlp.begin_training(get_examples=lambda: train_examples)
-
-
-@pytest.mark.skip(reason="Should be fixed")
-def test_untrained():
-    # This shouldn't crash, but it does when the simple_ner produces an invalid sequence like ['L-PERSON', 'L-ORG']
-    nlp = English()
-    ner = nlp.add_pipe("simple_ner")
-    ner.add_label("PERSON")
-    ner.add_label("LOC")
-    ner.add_label("ORG")
-    nlp.begin_training()
-    nlp("Example sentence")
-
-
-def test_resize():
-    nlp = English()
-    ner = nlp.add_pipe("simple_ner")
-    ner.add_label("PERSON")
-    ner.add_label("LOC")
-    nlp.begin_training()
-    assert len(ner.labels) == 2
-    ner.add_label("ORG")
-    nlp.begin_training()
-    assert len(ner.labels) == 3
-
-
-def test_begin_training_examples():
-    nlp = English()
-    ner = nlp.add_pipe("simple_ner")
-    train_examples = []
-    for text, annotations in TRAIN_DATA:
-        train_examples.append(Example.from_dict(nlp.make_doc(text), annotations))
-        for ent in annotations.get("entities"):
-            ner.add_label(ent[2])
-    # you shouldn't really call this more than once, but for testing it should be fine
-    nlp.begin_training()
-    nlp.begin_training(get_examples=lambda: train_examples)
-    with pytest.raises(TypeError):
-        nlp.begin_training(get_examples=lambda: None)
-    with pytest.raises(TypeError):
-        nlp.begin_training(get_examples=lambda: train_examples[0])
-    with pytest.raises(ValueError):
-        nlp.begin_training(get_examples=lambda: [])
-    with pytest.raises(ValueError):
-        nlp.begin_training(get_examples=train_examples)
-
-
-def test_overfitting_IO():
-    # Simple test to try and quickly overfit the SimpleNER component - ensuring the ML models work correctly
-    nlp = English()
-    ner = nlp.add_pipe("simple_ner")
-    train_examples = []
-    for text, annotations in TRAIN_DATA:
-        train_examples.append(Example.from_dict(nlp.make_doc(text), annotations))
-    optimizer = nlp.begin_training(get_examples=lambda: train_examples)
-
-    for i in range(50):
-        losses = {}
-        nlp.update(train_examples, sgd=optimizer, losses=losses)
-    assert losses["ner"] < 0.0001
-
-    # test the trained model
-    test_text = "I like London."
-    doc = nlp(test_text)
-    ents = doc.ents
-    assert len(ents) == 1
-    assert ents[0].text == "London"
-    assert ents[0].label_ == "LOC"
-
-    # Also test the results are still the same after IO
-    with make_tempdir() as tmp_dir:
-        nlp.to_disk(tmp_dir)
-        nlp2 = util.load_model_from_path(tmp_dir)
-        doc2 = nlp2(test_text)
-        ents2 = doc2.ents
-        assert len(ents2) == 1
-        assert ents2[0].text == "London"
-        assert ents2[0].label_ == "LOC"
--- a/website/docs/api/architectures.md
+++ b/website/docs/api/architectures.md
@ -456,62 +456,6 @@ consists of either two or three subnetworks:
 | `nO`                | The number of actions the model will predict between. Usually inferred from data at the beginning of training, or loaded from disk. ~~int~~                                                                                                                                                                                                                             |
 | **CREATES**         | The model using the architecture. ~~Model[List[Docs], List[List[Floats2d]]]~~                                                                                                                                                                                                                                                                                           |

-### spacy.BILUOTagger.v1 {#BILUOTagger source="spacy/ml/models/simple_ner.py"}
-
-> #### Example Config
->
-> ```ini
-> [model]
-> @architectures = "spacy.BILUOTagger.v1 "
->
-> [model.tok2vec]
-> @architectures = "spacy.HashEmbedCNN.v1"
-> # etc.
-> ```
-
-Construct a simple NER tagger that predicts
-[BILUO](/usage/linguistic-features#accessing-ner) tag scores for each token and
-uses greedy decoding with transition-constraints to return a valid BILUO tag
-sequence. A BILUO tag sequence encodes a sequence of non-overlapping labelled
-spans into tags assigned to each token. The first token of a span is given the
-tag `B-LABEL`, the last token of the span is given the tag `L-LABEL`, and tokens
-within the span are given the tag `U-LABEL`. Single-token spans are given the
-tag `U-LABEL`. All other tokens are assigned the tag `O`. The BILUO tag scheme
-generally results in better linear separation between classes, especially for
-non-CRF models, because there are more distinct classes for the different
-situations ([Ratinov et al., 2009](https://www.aclweb.org/anthology/W09-1119/)).
-
-| Name        | Description                                                                                |
-| ----------- | ------------------------------------------------------------------------------------------ |
-| `tok2vec`   | Subnetwork to map tokens into vector representations. ~~Model[List[Doc], List[Floats2d]]~~ |
-| **CREATES** | The model using the architecture. ~~Model[List[Doc], List[Floats2d]]~~                     |
-
-### spacy.IOBTagger.v1 {#IOBTagger source="spacy/ml/models/simple_ner.py"}
-
-> #### Example Config
->
-> ```ini
-> [model]
-> @architectures = "spacy.IOBTagger.v1 "
->
-> [model.tok2vec]
-> @architectures = "spacy.HashEmbedCNN.v1"
-> # etc.
-> ```
-
-Construct a simple NER tagger, that predicts
-[IOB](/usage/linguistic-features#accessing-ner) tag scores for each token and
-uses greedy decoding with transition-constraints to return a valid IOB tag
-sequence. An IOB tag sequence encodes a sequence of non-overlapping labeled
-spans into tags assigned to each token. The first token of a span is given the
-tag B-LABEL, and subsequent tokens are given the tag I-LABEL. All other tokens
-are assigned the tag O.
-
-| Name        | Description                                                                                |
-| ----------- | ------------------------------------------------------------------------------------------ |
-| `tok2vec`   | Subnetwork to map tokens into vector representations. ~~Model[List[Doc], List[Floats2d]]~~ |
-| **CREATES** | The model using the architecture. ~~Model[List[Doc], List[Floats2d]]~~                     |
-
 ## Tagging architectures {#tagger source="spacy/ml/models/tagger.py"}

 ### spacy.Tagger.v1 {#Tagger}