mirror of
https://github.com/explosion/spaCy.git
synced 2025-02-11 00:50:33 +03:00
Improve handling of missing values in NER
This commit is contained in:
parent
b8c85e593b
commit
6bda23ad26
|
@ -91,6 +91,10 @@ def biluo_tags_from_offsets(doc, entities, missing="O"):
|
|||
biluo = ["-" for _ in doc]
|
||||
# Handle entity cases
|
||||
for start_char, end_char, label in entities:
|
||||
if not label:
|
||||
if start_char in starts:
|
||||
biluo[starts[start_char]] = "O"
|
||||
else:
|
||||
for token_index in range(start_char, end_char):
|
||||
if token_index in tokens_in_ents.keys():
|
||||
raise ValueError(
|
||||
|
@ -127,7 +131,7 @@ def biluo_tags_from_offsets(doc, entities, missing="O"):
|
|||
break
|
||||
else:
|
||||
biluo[token.i] = missing
|
||||
if "-" in biluo:
|
||||
if "-" in biluo and missing != "-":
|
||||
ent_str = str(entities)
|
||||
warnings.warn(
|
||||
Warnings.W030.format(
|
||||
|
|
Loading…
Reference in New Issue
Block a user