Remove simple_ner code (#6041)

* remove simple_ner code * remove unused _biluo and _iob files
2025-11-10 21:07:53 +03:00 · 2020-09-09 16:11:27 +02:00 · 2020-09-09 16:11:27 +02:00 · cb66ea7400
commit cb66ea7400
parent 24053d83ec
9 changed files with 5 additions and 687 deletions
--- a/extra/experiments/tok2vec-ner/multihashembed_tok2vec.cfg
+++ b/extra/experiments/tok2vec-ner/multihashembed_tok2vec.cfg
@ -31,10 +31,13 @@ lang = "en"
 vectors = null
 [nlp.pipeline.ner]
-factory = "simple_ner"
+factory = "ner"
 [nlp.pipeline.ner.model]
-@architectures = "spacy.BiluoTagger.v1"
+@architectures = "spacy.TransitionBasedParser.v1"
 nr_feature_tokens = 6
 hidden_width = 64
 maxout_pieces = 2
 [nlp.pipeline.ner.model.tok2vec]
@architectures = "spacy.HashEmbedCNN.v1"
--- a/spacy/ml/_biluo.py
+++ b/spacy/ml/_biluo.py
@ -1,105 +0,0 @@
 """Thinc layer to do simpler transition-based parsing, NER, etc."""
 from typing import Dict, Optional
 import numpy
 from thinc.api import Model
 from thinc.types import Padded, Floats3d
 def BILUO() -> Model[Padded, Padded]:
    return Model(
        "biluo",
        forward,
        init=init,
        dims={"nO": None},
        attrs={"get_num_actions": get_num_actions},
    )
 def init(model, X: Optional[Padded] = None, Y: Optional[Padded] = None):
    if X is not None and Y is not None:
        if X.data.shape != Y.data.shape:
            # TODO: Fix error
            raise ValueError("Mismatched shapes (TODO: Fix message)")
        model.set_dim("nO", X.data.shape[2])
    elif X is not None:
        model.set_dim("nO", X.data.shape[2])
    elif Y is not None:
        model.set_dim("nO", Y.data.shape[2])
    elif model.get_dim("nO") is None:
        raise ValueError("Dimension unset for BILUO: nO")
 def forward(model: Model[Padded, Padded], Xp: Padded, is_train: bool):
    n_labels = (model.get_dim("nO") - 1) // 4
    n_tokens, n_docs, n_actions = Xp.data.shape
    # At each timestep, we make a validity mask of shape (n_docs, n_actions)
    # to indicate which actions are valid next for each sequence. To construct
    # the mask, we have a state of shape (2, n_actions) and a validity table of
    # shape (2, n_actions+1, n_actions). The first dimension of the state indicates
    # whether it's the last token, the second dimension indicates the previous
    # action, plus a special 'null action' for the first entry.
    valid_transitions = model.ops.asarray(_get_transition_table(n_labels))
    prev_actions = model.ops.alloc1i(n_docs)
    # Initialize as though prev action was O
    prev_actions.fill(n_actions - 1)
    Y = model.ops.alloc3f(*Xp.data.shape)
    masks = model.ops.alloc3f(*Y.shape)
    max_value = Xp.data.max()
    for t in range(Xp.data.shape[0]):
        is_last = (Xp.lengths < (t + 2)).astype("i")
        masks[t] = valid_transitions[is_last, prev_actions]
        # Don't train the out-of-bounds sequences.
        masks[t, Xp.size_at_t[t] :] = 0
        # Valid actions get 0*10e8, invalid get large negative value
        Y[t] = Xp.data[t] + ((masks[t] - 1) * max_value * 10)
        prev_actions = Y[t].argmax(axis=-1)
    def backprop_biluo(dY: Padded) -> Padded:
        dY.data *= masks
        return dY
    return Padded(Y, Xp.size_at_t, Xp.lengths, Xp.indices), backprop_biluo
 def get_num_actions(n_labels: int) -> int:
    # One BEGIN action per label
    # One IN action per label
    # One LAST action per label
    # One UNIT action per label
    # One OUT action
    return n_labels + n_labels + n_labels + n_labels + 1
 def _get_transition_table(
    n_labels: int, *, _cache: Dict[int, Floats3d] = {}
 ) -> Floats3d:
    n_actions = get_num_actions(n_labels)
    if n_actions in _cache:
        return _cache[n_actions]
    table = numpy.zeros((2, n_actions, n_actions), dtype="f")
    B_start, B_end = (0, n_labels)
    I_start, I_end = (B_end, B_end + n_labels)
    L_start, L_end = (I_end, I_end + n_labels)
    U_start, _ = (L_end, L_end + n_labels)  # noqa: F841
    # Using ranges allows us to set specific cells, which is necessary to express
    # that only actions of the same label are valid continuations.
    B_range = numpy.arange(B_start, B_end)
    I_range = numpy.arange(I_start, I_end)
    L_range = numpy.arange(L_start, L_end)
    # If this is the last token and the previous action was B or I, only L
    # of that label is valid
    table[1, B_range, L_range] = 1
    table[1, I_range, L_range] = 1
    # If this isn't the last token and the previous action was B or I, only I or
    # L of that label are valid.
    table[0, B_range, I_range] = 1
    table[0, B_range, L_range] = 1
    table[0, I_range, I_range] = 1
    table[0, I_range, L_range] = 1
    # If this isn't the last token and the previous was L, U or O, B is valid
    table[0, L_start:, :B_end] = 1
    # Regardless of whether this is the last token, if the previous action was
    # {L, U, O}, U and O are valid.
    table[:, L_start:, U_start:] = 1
    _cache[n_actions] = table
    return table
--- a/spacy/ml/_iob.py
+++ b/spacy/ml/_iob.py
@ -1,88 +0,0 @@
 """Thinc layer to do simpler transition-based parsing, NER, etc."""
 from typing import Dict, Optional
 from thinc.api import Ops, Model
 from thinc.types import Padded, Floats3d
 def IOB() -> Model[Padded, Padded]:
    return Model(
        "biluo",
        forward,
        init=init,
        dims={"nO": None},
        attrs={"get_num_actions": get_num_actions},
    )
 def init(model: Model, X: Optional[Padded] = None, Y: Optional[Padded] = None) -> None:
    if X is not None and Y is not None:
        if X.data.shape != Y.data.shape:
            # TODO: Fix error
            raise ValueError("Mismatched shapes (TODO: Fix message)")
        model.set_dim("nO", X.data.shape[2])
    elif X is not None:
        model.set_dim("nO", X.data.shape[2])
    elif Y is not None:
        model.set_dim("nO", Y.data.shape[2])
    elif model.get_dim("nO") is None:
        raise ValueError("Dimension unset for BILUO: nO")
 def forward(model: Model[Padded, Padded], Xp: Padded, is_train: bool):
    n_labels = (model.get_dim("nO") - 1) // 2
    n_tokens, n_docs, n_actions = Xp.data.shape
    # At each timestep, we make a validity mask of shape (n_docs, n_actions)
    # to indicate which actions are valid next for each sequence. To construct
    # the mask, we have a state of shape (2, n_actions) and a validity table of
    # shape (2, n_actions+1, n_actions). The first dimension of the state indicates
    # whether it's the last token, the second dimension indicates the previous
    # action, plus a special 'null action' for the first entry.
    valid_transitions = _get_transition_table(model.ops, n_labels)
    prev_actions = model.ops.alloc1i(n_docs)
    # Initialize as though prev action was O
    prev_actions.fill(n_actions - 1)
    Y = model.ops.alloc3f(*Xp.data.shape)
    masks = model.ops.alloc3f(*Y.shape)
    for t in range(Xp.data.shape[0]):
        masks[t] = valid_transitions[prev_actions]
        # Don't train the out-of-bounds sequences.
        masks[t, Xp.size_at_t[t] :] = 0
        # Valid actions get 0*10e8, invalid get -1*10e8
        Y[t] = Xp.data[t] + ((masks[t] - 1) * 10e8)
        prev_actions = Y[t].argmax(axis=-1)
    def backprop_biluo(dY: Padded) -> Padded:
        # Masking the gradient seems to do poorly here. But why?
        # dY.data *= masks
        return dY
    return Padded(Y, Xp.size_at_t, Xp.lengths, Xp.indices), backprop_biluo
 def get_num_actions(n_labels: int) -> int:
    # One BEGIN action per label
    # One IN action per label
    # One OUT action
    return n_labels * 2 + 1
 def _get_transition_table(
    ops: Ops, n_labels: int, _cache: Dict[int, Floats3d] = {}
 ) -> Floats3d:
    n_actions = get_num_actions(n_labels)
    if n_actions in _cache:
        return ops.asarray(_cache[n_actions])
    table = ops.alloc2f(n_actions, n_actions)
    B_start, B_end = (0, n_labels)
    I_start, I_end = (B_end, B_end + n_labels)
    O_action = I_end
    B_range = ops.xp.arange(B_start, B_end)
    I_range = ops.xp.arange(I_start, I_end)
    # B and O are always valid
    table[:, B_start:B_end] = 1
    table[:, O_action] = 1
    # I can only follow a matching B
    table[B_range, I_range] = 1
    _cache[n_actions] = table
    return table
--- a/spacy/ml/models/init.py
+++ b/spacy/ml/models/init.py
@ -1,6 +1,5 @@
 from .entity_linker import *  # noqa
 from .parser import *  # noqa
 from .simple_ner import *  # noqa
 from .tagger import *  # noqa
 from .textcat import *  # noqa
 from .tok2vec import *  # noqa
--- a/spacy/ml/models/simple_ner.py
+++ b/spacy/ml/models/simple_ner.py
@ -1,104 +0,0 @@
 from typing import List
 from thinc.api import Model, Linear, with_array, softmax_activation, padded2list
 from thinc.api import chain, list2padded, configure_normal_init
 from thinc.api import Dropout
 from thinc.types import Floats2d
 from ...tokens import Doc
 from .._biluo import BILUO
 from .._iob import IOB
 from ...util import registry
@registry.architectures.register("spacy.BILUOTagger.v1")
 def BiluoTagger(
    tok2vec: Model[List[Doc], List[Floats2d]]
 ) -> Model[List[Doc], List[Floats2d]]:
    """Construct a simple NER tagger, that predicts BILUO tag scores for each
    token and uses greedy decoding with transition-constraints to return a valid
    BILUO tag sequence.
    A BILUO tag sequence encodes a sequence of non-overlapping labelled spans
    into tags assigned to each token. The first token of a span is given the
    tag B-LABEL, the last token of the span is given the tag L-LABEL, and tokens
    within the span are given the tag I-LABEL. Single-token spans are given
    the tag U-LABEL. All other tokens are assigned the tag O.
    The BILUO tag scheme generally results in better linear separation between
    classes, especially for non-CRF models, because there are more distinct classes
    for the different situations (Ratinov et al., 2009).
    """
    biluo = BILUO()
    linear = Linear(
        nO=None, nI=tok2vec.get_dim("nO"), init_W=configure_normal_init(mean=0.02)
    )
    model = chain(
        tok2vec,
        list2padded(),
        with_array(chain(Dropout(0.1), linear)),
        biluo,
        with_array(softmax_activation()),
        padded2list(),
    )
    return Model(
        "biluo-tagger",
        forward,
        init=init,
        layers=[model, linear],
        refs={"tok2vec": tok2vec, "linear": linear, "biluo": biluo},
        dims={"nO": None},
        attrs={"get_num_actions": biluo.attrs["get_num_actions"]},
    )
@registry.architectures.register("spacy.IOBTagger.v1")
 def IOBTagger(
    tok2vec: Model[List[Doc], List[Floats2d]]
 ) -> Model[List[Doc], List[Floats2d]]:
    """Construct a simple NER tagger, that predicts IOB tag scores for each
    token and uses greedy decoding with transition-constraints to return a valid
    IOB tag sequence.
    An IOB tag sequence encodes a sequence of non-overlapping labelled spans
    into tags assigned to each token. The first token of a span is given the
    tag B-LABEL, and subsequent tokens are given the tag I-LABEL.
    All other tokens are assigned the tag O.
    """
    biluo = IOB()
    linear = Linear(nO=None, nI=tok2vec.get_dim("nO"))
    model = chain(
        tok2vec,
        list2padded(),
        with_array(linear),
        biluo,
        with_array(softmax_activation()),
        padded2list(),
    )
    return Model(
        "iob-tagger",
        forward,
        init=init,
        layers=[model],
        refs={"tok2vec": tok2vec, "linear": linear, "biluo": biluo},
        dims={"nO": None},
        attrs={"get_num_actions": biluo.attrs["get_num_actions"]},
    )
 def init(model: Model[List[Doc], List[Floats2d]], X=None, Y=None) -> None:
    if model.has_dim("nO") is None and Y:
        model.set_dim("nO", Y[0].shape[1])
    nO = model.get_dim("nO")
    biluo = model.get_ref("biluo")
    linear = model.get_ref("linear")
    biluo.set_dim("nO", nO)
    if linear.has_dim("nO") is None:
        linear.set_dim("nO", nO)
    model.layers[0].initialize(X=X, Y=Y)
 def forward(model: Model, X: List[Doc], is_train: bool):
    return model.layers[0](X, is_train)
 __all__ = ["BiluoTagger"]
--- a/spacy/pipeline/init.py
+++ b/spacy/pipeline/init.py
@ -8,7 +8,6 @@ from .morphologizer import Morphologizer
 from .pipe import Pipe
 from .senter import SentenceRecognizer
 from .sentencizer import Sentencizer
 from .simple_ner import SimpleNER
 from .tagger import Tagger
 from .textcat import TextCategorizer
 from .tok2vec import Tok2Vec
@ -25,7 +24,6 @@ __all__ = [
    "Pipe",
    "SentenceRecognizer",
    "Sentencizer",
    "SimpleNER",
    "Tagger",
    "TextCategorizer",
    "Tok2Vec",
--- a/spacy/pipeline/simple_ner.py
+++ b/spacy/pipeline/simple_ner.py
@ -1,223 +0,0 @@
 from typing import List, Iterable, Optional, Dict, Tuple, Callable, Set
 from thinc.types import Floats2d
 from thinc.api import SequenceCategoricalCrossentropy, set_dropout_rate, Model
 from thinc.api import Optimizer, Config
 from thinc.util import to_numpy
 from itertools import islice
 from ..errors import Errors
 from ..training import Example, spans_from_biluo_tags, iob_to_biluo, biluo_to_iob
 from ..training import validate_examples
 from ..tokens import Doc
 from ..language import Language
 from ..vocab import Vocab
 from ..scorer import Scorer
 from .pipe import Pipe
 default_model_config = """
 [model]
@architectures = "spacy.BILUOTagger.v1"
 [model.tok2vec]
@architectures = "spacy.HashEmbedCNN.v1"
 pretrained_vectors = null
 width = 128
 depth = 4
 embed_size = 7000
 window_size = 1
 maxout_pieces = 3
 subword_features = true
 """
 DEFAULT_SIMPLE_NER_MODEL = Config().from_str(default_model_config)["model"]
@Language.factory(
    "simple_ner",
    assigns=["doc.ents"],
    default_config={"labels": [], "model": DEFAULT_SIMPLE_NER_MODEL},
    scores=["ents_p", "ents_r", "ents_f", "ents_per_type"],
    default_score_weights={"ents_f": 1.0, "ents_p": 0.0, "ents_r": 0.0},
 )
 def make_simple_ner(
    nlp: Language, name: str, model: Model, labels: Iterable[str]
 ) -> "SimpleNER":
    return SimpleNER(nlp.vocab, model, name, labels=labels)
 class SimpleNER(Pipe):
    """Named entity recognition with a tagging model. The model should include
    validity constraints to ensure that only valid tag sequences are returned."""
    def __init__(
        self,
        vocab: Vocab,
        model: Model,
        name: str = "simple_ner",
        *,
        labels: Iterable[str],
    ) -> None:
        self.vocab = vocab
        self.model = model
        self.name = name
        self.cfg = {"labels": []}
        for label in labels:
            self.add_label(label)
        self.loss_func = SequenceCategoricalCrossentropy(
            names=self.get_tag_names(), normalize=True, missing_value=None
        )
        assert self.model is not None
    @property
    def is_biluo(self) -> bool:
        return self.model.name.startswith("biluo")
    @property
    def labels(self) -> Tuple[str]:
        return tuple(self.cfg["labels"])
    def add_label(self, label: str) -> None:
        """Add a new label to the pipe.
        label (str): The label to add.
        DOCS: https://nightly.spacy.io/api/simplener#add_label
        """
        if not isinstance(label, str):
            raise ValueError(Errors.E187)
        if label not in self.labels:
            self.cfg["labels"].append(label)
            self.vocab.strings.add(label)
    def get_tag_names(self) -> List[str]:
        if self.is_biluo:
            return (
                [f"B-{label}" for label in self.labels]
                + [f"I-{label}" for label in self.labels]
                + [f"L-{label}" for label in self.labels]
                + [f"U-{label}" for label in self.labels]
                + ["O"]
            )
        else:
            return (
                [f"B-{label}" for label in self.labels]
                + [f"I-{label}" for label in self.labels]
                + ["O"]
            )
    def predict(self, docs: List[Doc]) -> List[Floats2d]:
        scores = self.model.predict(docs)
        return scores
    def set_annotations(self, docs: List[Doc], scores: List[Floats2d]) -> None:
        """Set entities on a batch of documents from a batch of scores."""
        tag_names = self.get_tag_names()
        for i, doc in enumerate(docs):
            actions = to_numpy(scores[i].argmax(axis=1))
            tags = [tag_names[actions[j]] for j in range(len(doc))]
            if not self.is_biluo:
                tags = iob_to_biluo(tags)
            doc.ents = spans_from_biluo_tags(doc, tags)
    def update(
        self,
        examples: List[Example],
        *,
        set_annotations: bool = False,
        drop: float = 0.0,
        sgd: Optional[Optimizer] = None,
        losses: Optional[Dict[str, float]] = None,
    ) -> Dict[str, float]:
        if losses is None:
            losses = {}
        losses.setdefault("ner", 0.0)
        validate_examples(examples, "SimpleNER.update")
        if not any(_has_ner(eg) for eg in examples):
            return losses
        docs = [eg.predicted for eg in examples]
        set_dropout_rate(self.model, drop)
        scores, bp_scores = self.model.begin_update(docs)
        loss, d_scores = self.get_loss(examples, scores)
        bp_scores(d_scores)
        if set_annotations:
            self.set_annotations(docs, scores)
        if sgd is not None:
            self.model.finish_update(sgd)
        losses["ner"] += loss
        return losses
    def get_loss(self, examples: List[Example], scores) -> Tuple[List[Floats2d], float]:
        validate_examples(examples, "SimpleNER.get_loss")
        truths = []
        for eg in examples:
            tags = eg.get_aligned_ner()
            gold_tags = [(tag if tag != "-" else None) for tag in tags]
            if not self.is_biluo:
                gold_tags = biluo_to_iob(gold_tags)
            truths.append(gold_tags)
        for i in range(len(scores)):
            if len(scores[i]) != len(truths[i]):
                raise ValueError(
                    f"Mismatched output and gold sizes.\n"
                    f"Output: {len(scores[i])}, gold: {len(truths[i])}."
                    f"Input: {len(examples[i].doc)}"
                )
        d_scores, loss = self.loss_func(scores, truths)
        return loss, d_scores
    def begin_training(
        self,
        get_examples: Callable[[], Iterable[Example]],
        pipeline: Optional[List[Tuple[str, Callable[[Doc], Doc]]]] = None,
        sgd: Optional[Optimizer] = None,
    ):
        self._ensure_examples(get_examples)
        all_labels = set()
        for example in get_examples():
            all_labels.update(_get_labels(example))
        for label in sorted(all_labels):
            if label != "":
                self.add_label(label)
        doc_sample = []
        label_sample = []
        self._require_labels()
        for example in islice(get_examples(), 10):
            doc_sample.append(example.x)
            gold_tags = example.get_aligned_ner()
            if not self.is_biluo:
                gold_tags = biluo_to_iob(gold_tags)
            gold_array = [
                [1.0 if tag == gold_tag else 0.0 for tag in self.get_tag_names()]
                for gold_tag in gold_tags
            ]
            label_sample.append(self.model.ops.asarray(gold_array, dtype="float32"))
        assert len(doc_sample) > 0, Errors.E923.format(name=self.name)
        assert len(label_sample) > 0, Errors.E923.format(name=self.name)
        self.model.initialize(X=doc_sample, Y=label_sample)
        if pipeline is not None:
            self.init_multitask_objectives(get_examples, pipeline, sgd=sgd, **self.cfg)
        self.loss_func = SequenceCategoricalCrossentropy(
            names=self.get_tag_names(), normalize=True, missing_value=None
        )
        return sgd
    def init_multitask_objectives(self, *args, **kwargs):
        pass
    def score(self, examples, **kwargs):
        validate_examples(examples, "SimpleNER.score")
        return Scorer.score_spans(examples, "ents", **kwargs)
 def _has_ner(example: Example) -> bool:
    for ner_tag in example.get_aligned_ner():
        if ner_tag != "-" and ner_tag is not None:
            return True
    else:
        return False
 def _get_labels(example: Example) -> Set[str]:
    labels = set()
    for ner_tag in example.get_aligned("ENT_TYPE", as_string=True):
        if ner_tag != "O" and ner_tag != "-" and ner_tag != "":
            labels.add(ner_tag)
    return labels
--- a/spacy/tests/pipeline/test_simple_ner.py
+++ b/spacy/tests/pipeline/test_simple_ner.py
@ -1,106 +0,0 @@
 import pytest
 from spacy.lang.en import English
 from spacy.training import Example
 from spacy import util
 from ..util import make_tempdir
 TRAIN_DATA = [
    ("Who is Shaka S Khan?", {"entities": [(7, 19, "PERSON")]}),
    ("I like London and Berlin.", {"entities": [(7, 13, "LOC"), (18, 24, "LOC")]}),
 ]
 def test_no_label():
    nlp = English()
    nlp.add_pipe("simple_ner")
    with pytest.raises(ValueError):
        nlp.begin_training()
 def test_implicit_label():
    nlp = English()
    ner = nlp.add_pipe("simple_ner")
    train_examples = []
    ner.add_label("ORG")
    for t in TRAIN_DATA:
        train_examples.append(Example.from_dict(nlp.make_doc(t[0]), t[1]))
    nlp.begin_training(get_examples=lambda: train_examples)
@pytest.mark.skip(reason="Should be fixed")
 def test_untrained():
    # This shouldn't crash, but it does when the simple_ner produces an invalid sequence like ['L-PERSON', 'L-ORG']
    nlp = English()
    ner = nlp.add_pipe("simple_ner")
    ner.add_label("PERSON")
    ner.add_label("LOC")
    ner.add_label("ORG")
    nlp.begin_training()
    nlp("Example sentence")
 def test_resize():
    nlp = English()
    ner = nlp.add_pipe("simple_ner")
    ner.add_label("PERSON")
    ner.add_label("LOC")
    nlp.begin_training()
    assert len(ner.labels) == 2
    ner.add_label("ORG")
    nlp.begin_training()
    assert len(ner.labels) == 3
 def test_begin_training_examples():
    nlp = English()
    ner = nlp.add_pipe("simple_ner")
    train_examples = []
    for text, annotations in TRAIN_DATA:
        train_examples.append(Example.from_dict(nlp.make_doc(text), annotations))
        for ent in annotations.get("entities"):
            ner.add_label(ent[2])
    # you shouldn't really call this more than once, but for testing it should be fine
    nlp.begin_training()
    nlp.begin_training(get_examples=lambda: train_examples)
    with pytest.raises(TypeError):
        nlp.begin_training(get_examples=lambda: None)
    with pytest.raises(TypeError):
        nlp.begin_training(get_examples=lambda: train_examples[0])
    with pytest.raises(ValueError):
        nlp.begin_training(get_examples=lambda: [])
    with pytest.raises(ValueError):
        nlp.begin_training(get_examples=train_examples)
 def test_overfitting_IO():
    # Simple test to try and quickly overfit the SimpleNER component - ensuring the ML models work correctly
    nlp = English()
    ner = nlp.add_pipe("simple_ner")
    train_examples = []
    for text, annotations in TRAIN_DATA:
        train_examples.append(Example.from_dict(nlp.make_doc(text), annotations))
    optimizer = nlp.begin_training(get_examples=lambda: train_examples)
    for i in range(50):
        losses = {}
        nlp.update(train_examples, sgd=optimizer, losses=losses)
    assert losses["ner"] < 0.0001
    # test the trained model
    test_text = "I like London."
    doc = nlp(test_text)
    ents = doc.ents
    assert len(ents) == 1
    assert ents[0].text == "London"
    assert ents[0].label_ == "LOC"
    # Also test the results are still the same after IO
    with make_tempdir() as tmp_dir:
        nlp.to_disk(tmp_dir)
        nlp2 = util.load_model_from_path(tmp_dir)
        doc2 = nlp2(test_text)
        ents2 = doc2.ents
        assert len(ents2) == 1
        assert ents2[0].text == "London"
        assert ents2[0].label_ == "LOC"
--- a/website/docs/api/architectures.md
+++ b/website/docs/api/architectures.md
@ -456,62 +456,6 @@ consists of either two or three subnetworks:
 | `nO`                | The number of actions the model will predict between. Usually inferred from data at the beginning of training, or loaded from disk. ~~int~~                                                                                                                                                                                                                             |
 | **CREATES**         | The model using the architecture. ~~Model[List[Docs], List[List[Floats2d]]]~~                                                                                                                                                                                                                                                                                           |
 ### spacy.BILUOTagger.v1 {#BILUOTagger source="spacy/ml/models/simple_ner.py"}
 > #### Example Config
 >
 > ```ini
 > [model]
 > @architectures = "spacy.BILUOTagger.v1 "
 >
 > [model.tok2vec]
 > @architectures = "spacy.HashEmbedCNN.v1"
 > # etc.
 > ```
 Construct a simple NER tagger that predicts
 [BILUO](/usage/linguistic-features#accessing-ner) tag scores for each token and
 uses greedy decoding with transition-constraints to return a valid BILUO tag
 sequence. A BILUO tag sequence encodes a sequence of non-overlapping labelled
 spans into tags assigned to each token. The first token of a span is given the
 tag `B-LABEL`, the last token of the span is given the tag `L-LABEL`, and tokens
 within the span are given the tag `U-LABEL`. Single-token spans are given the
 tag `U-LABEL`. All other tokens are assigned the tag `O`. The BILUO tag scheme
 generally results in better linear separation between classes, especially for
 non-CRF models, because there are more distinct classes for the different
 situations ([Ratinov et al., 2009](https://www.aclweb.org/anthology/W09-1119/)).
 | Name        | Description                                                                                |
 | ----------- | ------------------------------------------------------------------------------------------ |
 | `tok2vec`   | Subnetwork to map tokens into vector representations. ~~Model[List[Doc], List[Floats2d]]~~ |
 | **CREATES** | The model using the architecture. ~~Model[List[Doc], List[Floats2d]]~~                     |
 ### spacy.IOBTagger.v1 {#IOBTagger source="spacy/ml/models/simple_ner.py"}
 > #### Example Config
 >
 > ```ini
 > [model]
 > @architectures = "spacy.IOBTagger.v1 "
 >
 > [model.tok2vec]
 > @architectures = "spacy.HashEmbedCNN.v1"
 > # etc.
 > ```
 Construct a simple NER tagger, that predicts
 [IOB](/usage/linguistic-features#accessing-ner) tag scores for each token and
 uses greedy decoding with transition-constraints to return a valid IOB tag
 sequence. An IOB tag sequence encodes a sequence of non-overlapping labeled
 spans into tags assigned to each token. The first token of a span is given the
 tag B-LABEL, and subsequent tokens are given the tag I-LABEL. All other tokens
 are assigned the tag O.
 | Name        | Description                                                                                |
 | ----------- | ------------------------------------------------------------------------------------------ |
 | `tok2vec`   | Subnetwork to map tokens into vector representations. ~~Model[List[Doc], List[Floats2d]]~~ |
 | **CREATES** | The model using the architecture. ~~Model[List[Doc], List[Floats2d]]~~                     |
 ## Tagging architectures {#tagger source="spacy/ml/models/tagger.py"}
 ### spacy.Tagger.v1 {#Tagger}