correct silly typo

2026-01-10 10:41:14 +03:00 · 2020-06-17 14:48:14 +02:00 · 2020-06-17 14:48:14 +02:00 · 1a151b10d6
commit 1a151b10d6
parent f6c451b650
11 changed files with 32 additions and 28 deletions
--- a/spacy/cli/converters/conllu2json.py
+++ b/spacy/cli/converters/conllu2json.py
@ -148,7 +148,7 @@ def generate_sentence(example_dict, has_ner_tags, tag_pattern, ner_map=None):
    sentence = {}
    tokens = []
    token_annotation = example_dict["token_annotation"]
-    for i, id_ in enumerate(["ids"]):
+    for i, id_ in enumerate(token_annotation["ids"]):
        token = {}
        token["id"] = id_
        token["orth"] = token_annotation["words"][i]
--- a/spacy/cli/debug_data.py
+++ b/spacy/cli/debug_data.py
@ -485,12 +485,12 @@ def _compile_gold(examples, pipeline, nlp):
        "texts": set(),
    }
    for example in examples:
-        gold = example.gold
-        doc = example.doc
-        valid_words = [x for x in gold.words if x is not None]
+        gold = example.reference
+        doc = example.predicted
+        valid_words = [x for x in gold if x is not None]
        data["words"].update(valid_words)
        data["n_words"] += len(valid_words)
-        data["n_misaligned_words"] += len(gold.words) - len(valid_words)
+        data["n_misaligned_words"] += len(gold) - len(valid_words)
        data["texts"].add(doc.text)
        if len(nlp.vocab.vectors):
            for word in valid_words:
@ -545,10 +545,10 @@ def _format_labels(labels, counts=False):

 def _get_examples_without_label(data, label):
    count = 0
-    for ex in data:
+    for eg in data:
        labels = [
            label.split("-")[1]
-            for label in ex.gold.ner
+            for label in eg.gold.ner
            if label not in ("O", "-", None)
        ]
        if label not in labels:
--- a/spacy/gold/example.pyx
+++ b/spacy/gold/example.pyx
@ -56,8 +56,10 @@ cdef class Example:
        if "ORTH" not in tok_dict:
            tok_dict["ORTH"] = [tok.text for tok in predicted]
            tok_dict["SPACY"] = [tok.whitespace_ for tok in predicted]
+            print("added ORTH and SPACY to the tok_dict")
        if "SPACY" not in tok_dict:
            tok_dict["SPACY"] = None
+            print("added SPACY to the tok_dict")
        return Example(
            predicted,
            annotations2doc(predicted.vocab, tok_dict, doc_dict)
@ -75,13 +77,15 @@ cdef class Example:

    def get_aligned(self, field, as_string=False):
        """Return an aligned array for a token attribute."""
-        # TODO: This is probably wrong. I just bashed this out and there's probably
-        # all sorts of edge-cases.
        alignment = self.alignment
        i2j_multi = alignment.i2j_multi
        j2i_multi = alignment.j2i_multi
        gold_to_cand = alignment.gold_to_cand
        cand_to_gold = alignment.cand_to_gold
+        print("i2j_multi", i2j_multi)
+        print("j2i_multi", j2i_multi)
+        print("gold_to_cand", gold_to_cand)
+        print("cand_to_gold", cand_to_gold)

        vocab = self.reference.vocab
        gold_values = self.reference.to_array([field])
@ -97,6 +101,7 @@ cdef class Example:
            else:
                output[i] = gold_values[gold_i]

+        print("output before:" , output)
        if field in ["ENT_IOB"]:
            # Fix many-to-one IOB codes
            prev_j = -1
@ -111,17 +116,23 @@ cdef class Example:
                    prev_j = -1
                prev_value = value

+        print("output in between:" , output)
        if field in ["ENT_IOB", "ENT_TYPE"]:
            # Assign one-to-many NER tags
            for j, cand_j in enumerate(gold_to_cand):
+                print()
+                print("j", j)
+                print("cand_j", cand_j)
                if cand_j is None:
                    if j in j2i_multi:
                        i = j2i_multi[j]
                        if output[i] is None:
                            output[i] = gold_values[j]

+        print("output final:" , output)
        if as_string:
            output = [vocab.strings[o] if o is not None else o for o in output]
+            print("output as string:" , output)
        return output

    def to_dict(self):
--- a/spacy/tests/parser/test_add_label.py
+++ b/spacy/tests/parser/test_add_label.py
@ -1,7 +1,6 @@
 import pytest
 from thinc.api import Adam, NumpyOps
 from spacy.attrs import NORM
-from spacy.gold import GoldParse
 from spacy.vocab import Vocab

 from spacy.pipeline.defaults import default_parser, default_ner
--- a/spacy/tests/parser/test_arc_eager_oracle.py
+++ b/spacy/tests/parser/test_arc_eager_oracle.py
@ -4,7 +4,6 @@ from spacy.vocab import Vocab
 from spacy.pipeline.defaults import default_parser
 from spacy.pipeline import DependencyParser
 from spacy.tokens import Doc
-from spacy.gold import GoldParse
 from spacy.syntax.nonproj import projectivize
 from spacy.syntax.stateclass import StateClass
 from spacy.syntax.arc_eager import ArcEager
--- a/spacy/tests/parser/test_ner.py
+++ b/spacy/tests/parser/test_ner.py
@ -5,7 +5,6 @@ from spacy.pipeline.defaults import default_ner
 from spacy.pipeline import EntityRecognizer, EntityRuler
 from spacy.vocab import Vocab
 from spacy.syntax.ner import BiluoPushDown
-from spacy.gold import GoldParse
 from spacy.tokens import Doc

 from ..util import make_tempdir
--- a/spacy/tests/parser/test_neural_parser.py
+++ b/spacy/tests/parser/test_neural_parser.py
@ -4,7 +4,6 @@ from spacy.vocab import Vocab
 from spacy.syntax.arc_eager import ArcEager
 from spacy.syntax.nn_parser import Parser
 from spacy.tokens.doc import Doc
-from spacy.gold import GoldParse
 from thinc.api import Model


--- a/spacy/tests/parser/test_nn_beam.py
+++ b/spacy/tests/parser/test_nn_beam.py
@ -6,9 +6,7 @@ from spacy.pipeline.defaults import default_parser
 from spacy.pipeline import DependencyParser
 from spacy.syntax.arc_eager import ArcEager
 from spacy.tokens import Doc
-from spacy.syntax._beam_utils import ParserBeam
 from spacy.syntax.stateclass import StateClass
-from spacy.gold import GoldParse


@pytest.fixture
--- a/spacy/tests/pipeline/test_textcat.py
+++ b/spacy/tests/pipeline/test_textcat.py
@ -7,7 +7,6 @@ from spacy.lang.en import English
 from spacy.language import Language
 from spacy.pipeline import TextCategorizer
 from spacy.tokens import Doc
-from spacy.gold import GoldParse
 from spacy.util import fix_random_seed

 from ..util import make_tempdir
--- a/spacy/tests/regression/test_issue1501-2000.py
+++ b/spacy/tests/regression/test_issue1501-2000.py
@ -3,7 +3,7 @@ import gc
 import numpy
 import copy

-from spacy.gold import Example, TokenAnnotation
+from spacy.gold import Example
 from spacy.lang.en import English
 from spacy.lang.en.stop_words import STOP_WORDS
 from spacy.lang.lex_attrs import is_stop
@ -268,20 +268,21 @@ def test_issue1963(en_tokenizer):
    assert doc.tensor.shape == (3, 128)


+# TODO: fix
@pytest.mark.parametrize("label", ["U-JOB-NAME"])
 def test_issue1967(label):
    config = {"learn_tokens": False, "min_action_freq": 30, "beam_width": 1, "beam_update_prob": 1.0}
    ner = EntityRecognizer(Vocab(), default_ner(), **config)
-    example = Example(
-        doc=Doc(ner.vocab, words=["word"]),
-        token_annotation=TokenAnnotation(
-            ids=[0],
-            words=["word"],
-            tags=["tag"],
-            heads=[0],
-            deps=["dep"],
-            entities=[label]
-        )
+    example = Example.from_dict(
+        Doc(ner.vocab, words=["word"]),
+        {
+            "ids": [0],
+            "words": ["word"],
+            "tags": ["tag"],
+            "heads": [0],
+            "deps": ["dep"],
+            "entities": [label]
+        }
    )
    ner.moves.get_actions(gold_parses=[example])

--- a/spacy/tests/regression/test_issue4529.py
+++ b/spacy/tests/regression/test_issue4529.py
@ -1,5 +1,4 @@
 import pytest
-from spacy.gold import GoldParse


@pytest.mark.parametrize(