fix renames and simple_ner labels

2025-12-22 01:24:15 +03:00 · 2020-06-18 15:33:28 +02:00 · 2020-06-18 15:33:28 +02:00 · 1c71f2310c
commit 1c71f2310c
parent 64fc840a5d
2 changed files with 8 additions and 11 deletions
--- a/spacy/cli/train_from_config.py
+++ b/spacy/cli/train_from_config.py
@ -231,8 +231,8 @@ def train(
        # check whether the setting 'exclusive_classes' corresponds to the provided training data
        if textcat_multilabel:
            multilabel_found = False
-            for ex in corpus.train_examples:
+            for eg in corpus.train_annotations:
-                cats = ex.doc_annotation.cats
+                cats = eg.reference.cats
                textcat_labels.update(cats.keys())
                if list(cats.values()).count(1.0) != 1:
                    multilabel_found = True
@ -244,8 +244,8 @@ def train(
                    "mutually exclusive classes more accurately."
                )
        else:
-            for ex in corpus.train_examples:
+            for eg in corpus.train_annotations:
-                cats = ex.doc_annotation.cats
+                cats = eg.reference.cats
                textcat_labels.update(cats.keys())
                if list(cats.values()).count(1.0) != 1:
                    msg.fail(
@ -346,10 +346,8 @@ def train(
                progress = tqdm.tqdm(total=training["eval_frequency"], leave=False)
            # Clean up the objects to faciliate garbage collection.
            for eg in batch:
-                eg.doc = None
+                eg.reference = None
-                eg.goldparse = None
+                eg.predicted = None
                eg.doc_annotation = None
                eg.token_annotation = None
    except Exception as e:
        msg.warn(
            f"Aborting and saving the final best model. "
--- a/spacy/pipeline/simple_ner.py
+++ b/spacy/pipeline/simple_ner.py
@ -143,8 +143,7 @@ def _has_ner(eg):
 def _get_labels(examples):
    labels = set()
    for eg in examples:
-        for ner_tag in eg.token_annotation.entities:
+        for ner_tag in eg.get_aligned("ENT_TYPE", as_string=True):
            if ner_tag != 'O' and ner_tag != '-':
-                _, label = ner_tag.split('-', 1)
+                labels.add(ner_tag)
                labels.add(label)
    return list(sorted(labels))