mirror of
https://github.com/explosion/spaCy.git
synced 2025-02-11 09:00:36 +03:00
fix renames and simple_ner labels
This commit is contained in:
parent
64fc840a5d
commit
1c71f2310c
|
@ -231,8 +231,8 @@ def train(
|
|||
# check whether the setting 'exclusive_classes' corresponds to the provided training data
|
||||
if textcat_multilabel:
|
||||
multilabel_found = False
|
||||
for ex in corpus.train_examples:
|
||||
cats = ex.doc_annotation.cats
|
||||
for eg in corpus.train_annotations:
|
||||
cats = eg.reference.cats
|
||||
textcat_labels.update(cats.keys())
|
||||
if list(cats.values()).count(1.0) != 1:
|
||||
multilabel_found = True
|
||||
|
@ -244,8 +244,8 @@ def train(
|
|||
"mutually exclusive classes more accurately."
|
||||
)
|
||||
else:
|
||||
for ex in corpus.train_examples:
|
||||
cats = ex.doc_annotation.cats
|
||||
for eg in corpus.train_annotations:
|
||||
cats = eg.reference.cats
|
||||
textcat_labels.update(cats.keys())
|
||||
if list(cats.values()).count(1.0) != 1:
|
||||
msg.fail(
|
||||
|
@ -346,10 +346,8 @@ def train(
|
|||
progress = tqdm.tqdm(total=training["eval_frequency"], leave=False)
|
||||
# Clean up the objects to faciliate garbage collection.
|
||||
for eg in batch:
|
||||
eg.doc = None
|
||||
eg.goldparse = None
|
||||
eg.doc_annotation = None
|
||||
eg.token_annotation = None
|
||||
eg.reference = None
|
||||
eg.predicted = None
|
||||
except Exception as e:
|
||||
msg.warn(
|
||||
f"Aborting and saving the final best model. "
|
||||
|
|
|
@ -143,8 +143,7 @@ def _has_ner(eg):
|
|||
def _get_labels(examples):
|
||||
labels = set()
|
||||
for eg in examples:
|
||||
for ner_tag in eg.token_annotation.entities:
|
||||
for ner_tag in eg.get_aligned("ENT_TYPE", as_string=True):
|
||||
if ner_tag != 'O' and ner_tag != '-':
|
||||
_, label = ner_tag.split('-', 1)
|
||||
labels.add(label)
|
||||
labels.add(ner_tag)
|
||||
return list(sorted(labels))
|
||||
|
|
Loading…
Reference in New Issue
Block a user