From a427ca9355ff4862d547ad0c2f3be2569ad857ae Mon Sep 17 00:00:00 2001 From: svlandeg Date: Fri, 19 Jun 2020 11:31:01 +0200 Subject: [PATCH] clean up --- spacy/language.py | 14 -------------- spacy/syntax/arc_eager.pyx | 6 +++--- spacy/syntax/nonproj.pyx | 4 ++-- spacy/tests/test_gold.py | 2 ++ 4 files changed, 7 insertions(+), 19 deletions(-) diff --git a/spacy/language.py b/spacy/language.py index c168afeea..b9a84e1bb 100644 --- a/spacy/language.py +++ b/spacy/language.py @@ -646,20 +646,6 @@ class Language(object): sgd(W, dW, key=key) return losses - def preprocess_gold(self, examples): - """Can be called before training to pre-process gold data. By default, - it handles nonprojectivity and adds missing tags to the tag map. - - examples (iterable): `Example` objects. - YIELDS (tuple): `Example` objects. - """ - # TODO: This is deprecated right? - for name, proc in self.pipeline: - if hasattr(proc, "preprocess_gold"): - examples = proc.preprocess_gold(examples) - for eg in examples: - yield eg - def begin_training(self, get_examples=None, sgd=None, component_cfg=None, **cfg): """Allocate models, pre-process training data and acquire a trainer and optimizer. Used as a contextmanager. diff --git a/spacy/syntax/arc_eager.pyx b/spacy/syntax/arc_eager.pyx index ea49f50d5..0dfcbf885 100644 --- a/spacy/syntax/arc_eager.pyx +++ b/spacy/syntax/arc_eager.pyx @@ -459,9 +459,9 @@ cdef class ArcEager(TransitionSystem): actions[RIGHT][label] = 1 actions[REDUCE][label] = 1 for example in kwargs.get('gold_parses', []): - heads, labels = nonproj.projectivize(example.token_annotation.heads, - example.token_annotation.deps) - for child, head, label in zip(example.token_annotation.ids, heads, labels): + heads, labels = nonproj.projectivize(example.get_aligned("HEAD"), + example.get_aligned("DEP")) + for child, head, label in zip(example.get_aligned("ID"), heads, labels): if label.upper() == 'ROOT' : label = 'ROOT' if head == child: diff --git a/spacy/syntax/nonproj.pyx b/spacy/syntax/nonproj.pyx index 5b1f57d2b..eded53fac 100644 --- a/spacy/syntax/nonproj.pyx +++ b/spacy/syntax/nonproj.pyx @@ -78,8 +78,8 @@ def is_decorated(label): def count_decorated_labels(gold_data): freqs = {} for example in gold_data: - proj_heads, deco_deps = projectivize(example.token_annotation.heads, - example.token_annotation.deps) + proj_heads, deco_deps = projectivize(example.get_aligned("HEAD"), + example.get_aligned("DEP")) # set the label to ROOT for each root dependent deco_deps = ['ROOT' if head == i else deco_deps[i] for i, head in enumerate(proj_heads)] diff --git a/spacy/tests/test_gold.py b/spacy/tests/test_gold.py index 9e63f8a98..f76b0c1e1 100644 --- a/spacy/tests/test_gold.py +++ b/spacy/tests/test_gold.py @@ -497,6 +497,8 @@ def test_split_sents(merged_dict): Doc(nlp.vocab, words=merged_dict["words"], spaces=merged_dict["spaces"]), merged_dict ) + assert example.text == "Hi there everyone It is just me" + assert len(get_parses_from_example( example, merge=False,