Add docstrings for simple_ner

This commit is contained in:
Matthew Honnibal 2020-08-07 15:09:49 +02:00
parent 7ef8a64df9
commit da6e59519e

View File

@ -14,6 +14,20 @@ from ...util import registry
def BiluoTagger(
tok2vec: Model[List[Doc], List[Floats2d]]
) -> Model[List[Doc], List[Floats2d]]:
"""Construct a simple NER tagger, that predicts BILUO tag scores for each
token and uses greedy decoding with transition-constraints to return a valid
BILUO tag sequence.
A BILUO tag sequence encodes a sequence of non-overlapping labelled spans
into tags assigned to each token. The first token of a span is given the
tag B-LABEL, the last token of the span is given the tag L-LABEL, and tokens
within the span are given the tag U-LABEL. Single-token spans are given
the tag U-LABEL. All other tokens are assigned the tag O.
The BILUO tag scheme generally results in better linear separation between
classes, especially for non-CRF models, because there are more distinct classes
for the different situations (Ratinov et al., 2009).
"""
biluo = BILUO()
linear = Linear(
nO=None, nI=tok2vec.get_dim("nO"), init_W=configure_normal_init(mean=0.02)
@ -41,6 +55,15 @@ def BiluoTagger(
def IOBTagger(
tok2vec: Model[List[Doc], List[Floats2d]]
) -> Model[List[Doc], List[Floats2d]]:
"""Construct a simple NER tagger, that predicts IOB tag scores for each
token and uses greedy decoding with transition-constraints to return a valid
IOB tag sequence.
A IOB tag sequence encodes a sequence of non-overlapping labelled spans
into tags assigned to each token. The first token of a span is given the
tag B-LABEL, and subsequent tokens are given the tag I-LABEL.
All other tokens are assigned the tag O.
"""
biluo = IOB()
linear = Linear(nO=None, nI=tok2vec.get_dim("nO"))
model = chain(