diff --git a/spacy/training/iob_utils.py b/spacy/training/iob_utils.py index 0e8e7eed0..42dae8fc4 100644 --- a/spacy/training/iob_utils.py +++ b/spacy/training/iob_utils.py @@ -71,6 +71,8 @@ def offsets_to_biluo_tags( entities (iterable): A sequence of `(start, end, label)` triples. `start` and `end` should be character-offset integers denoting the slice into the original string. + missing (str): The label used for missing values, e.g. if tokenization + doesn’t align with the entity offsets. Defaults to "O". RETURNS (list): A list of unicode strings, describing the tags. Each tag string will be of the form either "", "O" or "{action}-{label}", where action is one of "B", "I", "L", "U". The missing label is used where the @@ -150,7 +152,7 @@ def biluo_tags_to_spans(doc: Doc, tags: Iterable[str]) -> List[Span]: to overwrite the doc.ents. doc (Doc): The document that the BILUO tags refer to. - entities (iterable): A sequence of BILUO tags with each tag describing one + tags (iterable): A sequence of BILUO tags with each tag describing one token. Each tag string will be of the form of either "", "O" or "{action}-{label}", where action is one of "B", "I", "L", "U". RETURNS (list): A sequence of Span objects. Each token with a missing IOB @@ -170,7 +172,7 @@ def biluo_tags_to_offsets( """Encode per-token tags following the BILUO scheme into entity offsets. doc (Doc): The document that the BILUO tags refer to. - entities (iterable): A sequence of BILUO tags with each tag describing one + tags (iterable): A sequence of BILUO tags with each tag describing one token. Each tags string will be of the form of either "", "O" or "{action}-{label}", where action is one of "B", "I", "L", "U". RETURNS (list): A sequence of `(start, end, label)` triples. `start` and diff --git a/website/docs/api/top-level.md b/website/docs/api/top-level.md index cfaa75bff..8190d9f78 100644 --- a/website/docs/api/top-level.md +++ b/website/docs/api/top-level.md @@ -879,7 +879,7 @@ This method was previously available as `spacy.gold.offsets_from_biluo_tags`. | Name | Description | | ----------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | | `doc` | The document that the BILUO tags refer to. ~~Doc~~ | -| `entities` | A sequence of [BILUO](/usage/linguistic-features#accessing-ner) tags with each tag describing one token. Each tag string will be of the form of either `""`, `"O"` or `"{action}-{label}"`, where action is one of `"B"`, `"I"`, `"L"`, `"U"`. ~~List[str]~~ | +| `tags` | A sequence of [BILUO](/usage/linguistic-features#accessing-ner) tags with each tag describing one token. Each tag string will be of the form of either `""`, `"O"` or `"{action}-{label}"`, where action is one of `"B"`, `"I"`, `"L"`, `"U"`. ~~List[str]~~ | | **RETURNS** | A sequence of `(start, end, label)` triples. `start` and `end` will be character-offset integers denoting the slice into the original string. ~~List[Tuple[int, int, str]]~~ | ### training.biluo_tags_to_spans {#biluo_tags_to_spans tag="function" new="2.1"} @@ -908,7 +908,7 @@ This method was previously available as `spacy.gold.spans_from_biluo_tags`. | Name | Description | | ----------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | | `doc` | The document that the BILUO tags refer to. ~~Doc~~ | -| `entities` | A sequence of [BILUO](/usage/linguistic-features#accessing-ner) tags with each tag describing one token. Each tag string will be of the form of either `""`, `"O"` or `"{action}-{label}"`, where action is one of `"B"`, `"I"`, `"L"`, `"U"`. ~~List[str]~~ | +| `tags` | A sequence of [BILUO](/usage/linguistic-features#accessing-ner) tags with each tag describing one token. Each tag string will be of the form of either `""`, `"O"` or `"{action}-{label}"`, where action is one of `"B"`, `"I"`, `"L"`, `"U"`. ~~List[str]~~ | | **RETURNS** | A sequence of `Span` objects with added entity labels. ~~List[Span]~~ | ## Utility functions {#util source="spacy/util.py"}