fix renames and simple_ner labels

This commit is contained in:
svlandeg 2020-06-18 15:33:28 +02:00
parent 64fc840a5d
commit 1c71f2310c
2 changed files with 8 additions and 11 deletions

View File

@ -231,8 +231,8 @@ def train(
# check whether the setting 'exclusive_classes' corresponds to the provided training data
if textcat_multilabel:
multilabel_found = False
for ex in corpus.train_examples:
cats = ex.doc_annotation.cats
for eg in corpus.train_annotations:
cats = eg.reference.cats
textcat_labels.update(cats.keys())
if list(cats.values()).count(1.0) != 1:
multilabel_found = True
@ -244,8 +244,8 @@ def train(
"mutually exclusive classes more accurately."
)
else:
for ex in corpus.train_examples:
cats = ex.doc_annotation.cats
for eg in corpus.train_annotations:
cats = eg.reference.cats
textcat_labels.update(cats.keys())
if list(cats.values()).count(1.0) != 1:
msg.fail(
@ -346,10 +346,8 @@ def train(
progress = tqdm.tqdm(total=training["eval_frequency"], leave=False)
# Clean up the objects to faciliate garbage collection.
for eg in batch:
eg.doc = None
eg.goldparse = None
eg.doc_annotation = None
eg.token_annotation = None
eg.reference = None
eg.predicted = None
except Exception as e:
msg.warn(
f"Aborting and saving the final best model. "

View File

@ -143,8 +143,7 @@ def _has_ner(eg):
def _get_labels(examples):
labels = set()
for eg in examples:
for ner_tag in eg.token_annotation.entities:
for ner_tag in eg.get_aligned("ENT_TYPE", as_string=True):
if ner_tag != 'O' and ner_tag != '-':
_, label = ner_tag.split('-', 1)
labels.add(label)
labels.add(ner_tag)
return list(sorted(labels))