From 1a151b10d64b25e7acc03b065234c38b316e2462 Mon Sep 17 00:00:00 2001 From: svlandeg Date: Wed, 17 Jun 2020 14:48:14 +0200 Subject: [PATCH] correct silly typo --- spacy/cli/converters/conllu2json.py | 2 +- spacy/cli/debug_data.py | 12 +++++----- spacy/gold/example.pyx | 15 ++++++++++-- spacy/tests/parser/test_add_label.py | 1 - spacy/tests/parser/test_arc_eager_oracle.py | 1 - spacy/tests/parser/test_ner.py | 1 - spacy/tests/parser/test_neural_parser.py | 1 - spacy/tests/parser/test_nn_beam.py | 2 -- spacy/tests/pipeline/test_textcat.py | 1 - spacy/tests/regression/test_issue1501-2000.py | 23 ++++++++++--------- spacy/tests/regression/test_issue4529.py | 1 - 11 files changed, 32 insertions(+), 28 deletions(-) diff --git a/spacy/cli/converters/conllu2json.py b/spacy/cli/converters/conllu2json.py index 088cedafc..25ca1d4eb 100644 --- a/spacy/cli/converters/conllu2json.py +++ b/spacy/cli/converters/conllu2json.py @@ -148,7 +148,7 @@ def generate_sentence(example_dict, has_ner_tags, tag_pattern, ner_map=None): sentence = {} tokens = [] token_annotation = example_dict["token_annotation"] - for i, id_ in enumerate(["ids"]): + for i, id_ in enumerate(token_annotation["ids"]): token = {} token["id"] = id_ token["orth"] = token_annotation["words"][i] diff --git a/spacy/cli/debug_data.py b/spacy/cli/debug_data.py index 21f49956d..c86408170 100644 --- a/spacy/cli/debug_data.py +++ b/spacy/cli/debug_data.py @@ -485,12 +485,12 @@ def _compile_gold(examples, pipeline, nlp): "texts": set(), } for example in examples: - gold = example.gold - doc = example.doc - valid_words = [x for x in gold.words if x is not None] + gold = example.reference + doc = example.predicted + valid_words = [x for x in gold if x is not None] data["words"].update(valid_words) data["n_words"] += len(valid_words) - data["n_misaligned_words"] += len(gold.words) - len(valid_words) + data["n_misaligned_words"] += len(gold) - len(valid_words) data["texts"].add(doc.text) if len(nlp.vocab.vectors): for word in valid_words: @@ -545,10 +545,10 @@ def _format_labels(labels, counts=False): def _get_examples_without_label(data, label): count = 0 - for ex in data: + for eg in data: labels = [ label.split("-")[1] - for label in ex.gold.ner + for label in eg.gold.ner if label not in ("O", "-", None) ] if label not in labels: diff --git a/spacy/gold/example.pyx b/spacy/gold/example.pyx index 4c84c066b..92b9beb0f 100644 --- a/spacy/gold/example.pyx +++ b/spacy/gold/example.pyx @@ -56,8 +56,10 @@ cdef class Example: if "ORTH" not in tok_dict: tok_dict["ORTH"] = [tok.text for tok in predicted] tok_dict["SPACY"] = [tok.whitespace_ for tok in predicted] + print("added ORTH and SPACY to the tok_dict") if "SPACY" not in tok_dict: tok_dict["SPACY"] = None + print("added SPACY to the tok_dict") return Example( predicted, annotations2doc(predicted.vocab, tok_dict, doc_dict) @@ -75,13 +77,15 @@ cdef class Example: def get_aligned(self, field, as_string=False): """Return an aligned array for a token attribute.""" - # TODO: This is probably wrong. I just bashed this out and there's probably - # all sorts of edge-cases. alignment = self.alignment i2j_multi = alignment.i2j_multi j2i_multi = alignment.j2i_multi gold_to_cand = alignment.gold_to_cand cand_to_gold = alignment.cand_to_gold + print("i2j_multi", i2j_multi) + print("j2i_multi", j2i_multi) + print("gold_to_cand", gold_to_cand) + print("cand_to_gold", cand_to_gold) vocab = self.reference.vocab gold_values = self.reference.to_array([field]) @@ -97,6 +101,7 @@ cdef class Example: else: output[i] = gold_values[gold_i] + print("output before:" , output) if field in ["ENT_IOB"]: # Fix many-to-one IOB codes prev_j = -1 @@ -111,17 +116,23 @@ cdef class Example: prev_j = -1 prev_value = value + print("output in between:" , output) if field in ["ENT_IOB", "ENT_TYPE"]: # Assign one-to-many NER tags for j, cand_j in enumerate(gold_to_cand): + print() + print("j", j) + print("cand_j", cand_j) if cand_j is None: if j in j2i_multi: i = j2i_multi[j] if output[i] is None: output[i] = gold_values[j] + print("output final:" , output) if as_string: output = [vocab.strings[o] if o is not None else o for o in output] + print("output as string:" , output) return output def to_dict(self): diff --git a/spacy/tests/parser/test_add_label.py b/spacy/tests/parser/test_add_label.py index 54a57bf98..1a9e4a1a1 100644 --- a/spacy/tests/parser/test_add_label.py +++ b/spacy/tests/parser/test_add_label.py @@ -1,7 +1,6 @@ import pytest from thinc.api import Adam, NumpyOps from spacy.attrs import NORM -from spacy.gold import GoldParse from spacy.vocab import Vocab from spacy.pipeline.defaults import default_parser, default_ner diff --git a/spacy/tests/parser/test_arc_eager_oracle.py b/spacy/tests/parser/test_arc_eager_oracle.py index 5d265261f..4e4c15761 100644 --- a/spacy/tests/parser/test_arc_eager_oracle.py +++ b/spacy/tests/parser/test_arc_eager_oracle.py @@ -4,7 +4,6 @@ from spacy.vocab import Vocab from spacy.pipeline.defaults import default_parser from spacy.pipeline import DependencyParser from spacy.tokens import Doc -from spacy.gold import GoldParse from spacy.syntax.nonproj import projectivize from spacy.syntax.stateclass import StateClass from spacy.syntax.arc_eager import ArcEager diff --git a/spacy/tests/parser/test_ner.py b/spacy/tests/parser/test_ner.py index b0a8109dc..1d89495ea 100644 --- a/spacy/tests/parser/test_ner.py +++ b/spacy/tests/parser/test_ner.py @@ -5,7 +5,6 @@ from spacy.pipeline.defaults import default_ner from spacy.pipeline import EntityRecognizer, EntityRuler from spacy.vocab import Vocab from spacy.syntax.ner import BiluoPushDown -from spacy.gold import GoldParse from spacy.tokens import Doc from ..util import make_tempdir diff --git a/spacy/tests/parser/test_neural_parser.py b/spacy/tests/parser/test_neural_parser.py index ecf0dc13d..8e9cb84a4 100644 --- a/spacy/tests/parser/test_neural_parser.py +++ b/spacy/tests/parser/test_neural_parser.py @@ -4,7 +4,6 @@ from spacy.vocab import Vocab from spacy.syntax.arc_eager import ArcEager from spacy.syntax.nn_parser import Parser from spacy.tokens.doc import Doc -from spacy.gold import GoldParse from thinc.api import Model diff --git a/spacy/tests/parser/test_nn_beam.py b/spacy/tests/parser/test_nn_beam.py index fa5d59f9e..ef5854198 100644 --- a/spacy/tests/parser/test_nn_beam.py +++ b/spacy/tests/parser/test_nn_beam.py @@ -6,9 +6,7 @@ from spacy.pipeline.defaults import default_parser from spacy.pipeline import DependencyParser from spacy.syntax.arc_eager import ArcEager from spacy.tokens import Doc -from spacy.syntax._beam_utils import ParserBeam from spacy.syntax.stateclass import StateClass -from spacy.gold import GoldParse @pytest.fixture diff --git a/spacy/tests/pipeline/test_textcat.py b/spacy/tests/pipeline/test_textcat.py index 179659597..fc54cc5b5 100644 --- a/spacy/tests/pipeline/test_textcat.py +++ b/spacy/tests/pipeline/test_textcat.py @@ -7,7 +7,6 @@ from spacy.lang.en import English from spacy.language import Language from spacy.pipeline import TextCategorizer from spacy.tokens import Doc -from spacy.gold import GoldParse from spacy.util import fix_random_seed from ..util import make_tempdir diff --git a/spacy/tests/regression/test_issue1501-2000.py b/spacy/tests/regression/test_issue1501-2000.py index 09a343b66..bb89be711 100644 --- a/spacy/tests/regression/test_issue1501-2000.py +++ b/spacy/tests/regression/test_issue1501-2000.py @@ -3,7 +3,7 @@ import gc import numpy import copy -from spacy.gold import Example, TokenAnnotation +from spacy.gold import Example from spacy.lang.en import English from spacy.lang.en.stop_words import STOP_WORDS from spacy.lang.lex_attrs import is_stop @@ -268,20 +268,21 @@ def test_issue1963(en_tokenizer): assert doc.tensor.shape == (3, 128) +# TODO: fix @pytest.mark.parametrize("label", ["U-JOB-NAME"]) def test_issue1967(label): config = {"learn_tokens": False, "min_action_freq": 30, "beam_width": 1, "beam_update_prob": 1.0} ner = EntityRecognizer(Vocab(), default_ner(), **config) - example = Example( - doc=Doc(ner.vocab, words=["word"]), - token_annotation=TokenAnnotation( - ids=[0], - words=["word"], - tags=["tag"], - heads=[0], - deps=["dep"], - entities=[label] - ) + example = Example.from_dict( + Doc(ner.vocab, words=["word"]), + { + "ids": [0], + "words": ["word"], + "tags": ["tag"], + "heads": [0], + "deps": ["dep"], + "entities": [label] + } ) ner.moves.get_actions(gold_parses=[example]) diff --git a/spacy/tests/regression/test_issue4529.py b/spacy/tests/regression/test_issue4529.py index fa962c053..89b16a484 100644 --- a/spacy/tests/regression/test_issue4529.py +++ b/spacy/tests/regression/test_issue4529.py @@ -1,5 +1,4 @@ import pytest -from spacy.gold import GoldParse @pytest.mark.parametrize(