mirror of
https://github.com/explosion/spaCy.git
synced 2025-07-05 12:23:06 +03:00
Add docstrings for simple_ner
This commit is contained in:
parent
7ef8a64df9
commit
da6e59519e
|
@ -14,6 +14,20 @@ from ...util import registry
|
||||||
def BiluoTagger(
|
def BiluoTagger(
|
||||||
tok2vec: Model[List[Doc], List[Floats2d]]
|
tok2vec: Model[List[Doc], List[Floats2d]]
|
||||||
) -> Model[List[Doc], List[Floats2d]]:
|
) -> Model[List[Doc], List[Floats2d]]:
|
||||||
|
"""Construct a simple NER tagger, that predicts BILUO tag scores for each
|
||||||
|
token and uses greedy decoding with transition-constraints to return a valid
|
||||||
|
BILUO tag sequence.
|
||||||
|
|
||||||
|
A BILUO tag sequence encodes a sequence of non-overlapping labelled spans
|
||||||
|
into tags assigned to each token. The first token of a span is given the
|
||||||
|
tag B-LABEL, the last token of the span is given the tag L-LABEL, and tokens
|
||||||
|
within the span are given the tag U-LABEL. Single-token spans are given
|
||||||
|
the tag U-LABEL. All other tokens are assigned the tag O.
|
||||||
|
|
||||||
|
The BILUO tag scheme generally results in better linear separation between
|
||||||
|
classes, especially for non-CRF models, because there are more distinct classes
|
||||||
|
for the different situations (Ratinov et al., 2009).
|
||||||
|
"""
|
||||||
biluo = BILUO()
|
biluo = BILUO()
|
||||||
linear = Linear(
|
linear = Linear(
|
||||||
nO=None, nI=tok2vec.get_dim("nO"), init_W=configure_normal_init(mean=0.02)
|
nO=None, nI=tok2vec.get_dim("nO"), init_W=configure_normal_init(mean=0.02)
|
||||||
|
@ -41,6 +55,15 @@ def BiluoTagger(
|
||||||
def IOBTagger(
|
def IOBTagger(
|
||||||
tok2vec: Model[List[Doc], List[Floats2d]]
|
tok2vec: Model[List[Doc], List[Floats2d]]
|
||||||
) -> Model[List[Doc], List[Floats2d]]:
|
) -> Model[List[Doc], List[Floats2d]]:
|
||||||
|
"""Construct a simple NER tagger, that predicts IOB tag scores for each
|
||||||
|
token and uses greedy decoding with transition-constraints to return a valid
|
||||||
|
IOB tag sequence.
|
||||||
|
|
||||||
|
A IOB tag sequence encodes a sequence of non-overlapping labelled spans
|
||||||
|
into tags assigned to each token. The first token of a span is given the
|
||||||
|
tag B-LABEL, and subsequent tokens are given the tag I-LABEL.
|
||||||
|
All other tokens are assigned the tag O.
|
||||||
|
"""
|
||||||
biluo = IOB()
|
biluo = IOB()
|
||||||
linear = Linear(nO=None, nI=tok2vec.get_dim("nO"))
|
linear = Linear(nO=None, nI=tok2vec.get_dim("nO"))
|
||||||
model = chain(
|
model = chain(
|
||||||
|
|
Loading…
Reference in New Issue
Block a user