From b45b4aa392feea15cd2f8366a21244c2573094ac Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Mon, 22 May 2017 05:17:44 -0500 Subject: [PATCH 1/2] PseudoProjectivity --> nonproj --- spacy/pipeline.pyx | 4 ++-- spacy/syntax/parser.pyx | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/spacy/pipeline.pyx b/spacy/pipeline.pyx index 6f949a5b9..7eb75953a 100644 --- a/spacy/pipeline.pyx +++ b/spacy/pipeline.pyx @@ -31,7 +31,7 @@ from .syntax.stateclass cimport StateClass from .gold cimport GoldParse from .morphology cimport Morphology from .vocab cimport Vocab -from .syntax.nonproj import PseudoProjectivity +from .syntax import nonproj from .attrs import ID, LOWER, PREFIX, SUFFIX, SHAPE, TAG, DEP, POS from ._ml import rebatch, Tok2Vec, flatten, get_col, doc2feats @@ -265,7 +265,7 @@ class NeuralLabeller(NeuralTagger): pass def begin_training(self, gold_tuples, pipeline=None): - gold_tuples = PseudoProjectivity.preprocess_training_data(gold_tuples) + gold_tuples = nonproj.preprocess_training_data(gold_tuples) for raw_text, annots_brackets in gold_tuples: for annots, brackets in annots_brackets: ids, words, tags, heads, deps, ents = annots diff --git a/spacy/syntax/parser.pyx b/spacy/syntax/parser.pyx index b9de1e114..78698db12 100644 --- a/spacy/syntax/parser.pyx +++ b/spacy/syntax/parser.pyx @@ -33,7 +33,6 @@ from ._parse_features cimport CONTEXT_SIZE from ._parse_features cimport fill_context from .stateclass cimport StateClass from ._state cimport StateC -from .nonproj import PseudoProjectivity from .transition_system import OracleError from .transition_system cimport TransitionSystem, Transition from ..structs cimport TokenC From 2f78413a029b0899ecc2877092bd1635147ea1fe Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Mon, 22 May 2017 05:39:03 -0500 Subject: [PATCH 2/2] PseudoProjectivity->nonproj --- spacy/tests/parser/test_nonproj.py | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/spacy/tests/parser/test_nonproj.py b/spacy/tests/parser/test_nonproj.py index 8161d6fc3..237f0debd 100644 --- a/spacy/tests/parser/test_nonproj.py +++ b/spacy/tests/parser/test_nonproj.py @@ -2,7 +2,8 @@ from __future__ import unicode_literals from ...syntax.nonproj import ancestors, contains_cycle, is_nonproj_arc -from ...syntax.nonproj import is_nonproj_tree, PseudoProjectivity +from ...syntax.nonproj import is_nonproj_tree +from ...syntax import nonproj from ...attrs import DEP, HEAD from ..util import get_doc @@ -75,7 +76,7 @@ def test_parser_pseudoprojectivity(en_tokenizer): tokens = en_tokenizer('whatever ' * len(proj_heads)) rel_proj_heads = [head-i for i, head in enumerate(proj_heads)] doc = get_doc(tokens.vocab, [t.text for t in tokens], deps=deco_labels, heads=rel_proj_heads) - PseudoProjectivity.deprojectivize(doc) + nonproj.deprojectivize(doc) return [t.head.i for t in doc], [token.dep_ for token in doc] tree = [1, 2, 2] @@ -85,18 +86,18 @@ def test_parser_pseudoprojectivity(en_tokenizer): labels = ['det', 'nsubj', 'root', 'det', 'dobj', 'aux', 'nsubj', 'acl', 'punct'] labels2 = ['advmod', 'root', 'det', 'nsubj', 'advmod', 'det', 'dobj', 'det', 'nmod', 'aux', 'nmod', 'advmod', 'det', 'amod', 'punct'] - assert(PseudoProjectivity.decompose('X||Y') == ('X','Y')) - assert(PseudoProjectivity.decompose('X') == ('X','')) - assert(PseudoProjectivity.is_decorated('X||Y') == True) - assert(PseudoProjectivity.is_decorated('X') == False) + assert(nonproj.decompose('X||Y') == ('X','Y')) + assert(nonproj.decompose('X') == ('X','')) + assert(nonproj.is_decorated('X||Y') == True) + assert(nonproj.is_decorated('X') == False) - PseudoProjectivity._lift(0, tree) + nonproj._lift(0, tree) assert(tree == [2, 2, 2]) - assert(PseudoProjectivity._get_smallest_nonproj_arc(nonproj_tree) == 7) - assert(PseudoProjectivity._get_smallest_nonproj_arc(nonproj_tree2) == 10) + assert(nonproj._get_smallest_nonproj_arc(nonproj_tree) == 7) + assert(nonproj._get_smallest_nonproj_arc(nonproj_tree2) == 10) - proj_heads, deco_labels = PseudoProjectivity.projectivize(nonproj_tree, labels) + proj_heads, deco_labels = nonproj.projectivize(nonproj_tree, labels) assert(proj_heads == [1, 2, 2, 4, 5, 2, 7, 5, 2]) assert(deco_labels == ['det', 'nsubj', 'root', 'det', 'dobj', 'aux', 'nsubj', 'acl||dobj', 'punct']) @@ -105,7 +106,7 @@ def test_parser_pseudoprojectivity(en_tokenizer): assert(deproj_heads == nonproj_tree) assert(undeco_labels == labels) - proj_heads, deco_labels = PseudoProjectivity.projectivize(nonproj_tree2, labels2) + proj_heads, deco_labels = nonproj.projectivize(nonproj_tree2, labels2) assert(proj_heads == [1, 1, 3, 1, 5, 6, 9, 8, 6, 1, 9, 12, 13, 10, 1]) assert(deco_labels == ['advmod||aux', 'root', 'det', 'nsubj', 'advmod', 'det', 'dobj', 'det', 'nmod', 'aux', 'nmod||dobj',