Merge branch 'develop' of https://github.com/explosion/spaCy into develop

This commit is contained in:
Matthew Honnibal 2017-05-22 12:39:34 +02:00
commit 7e2cdc0c81
3 changed files with 14 additions and 14 deletions

View File

@ -31,7 +31,7 @@ from .syntax.stateclass cimport StateClass
from .gold cimport GoldParse
from .morphology cimport Morphology
from .vocab cimport Vocab
from .syntax.nonproj import PseudoProjectivity
from .syntax import nonproj
from .attrs import ID, LOWER, PREFIX, SUFFIX, SHAPE, TAG, DEP, POS
from ._ml import rebatch, Tok2Vec, flatten, get_col, doc2feats
@ -265,7 +265,7 @@ class NeuralLabeller(NeuralTagger):
pass
def begin_training(self, gold_tuples, pipeline=None):
gold_tuples = PseudoProjectivity.preprocess_training_data(gold_tuples)
gold_tuples = nonproj.preprocess_training_data(gold_tuples)
for raw_text, annots_brackets in gold_tuples:
for annots, brackets in annots_brackets:
ids, words, tags, heads, deps, ents = annots

View File

@ -33,7 +33,6 @@ from ._parse_features cimport CONTEXT_SIZE
from ._parse_features cimport fill_context
from .stateclass cimport StateClass
from ._state cimport StateC
from .nonproj import PseudoProjectivity
from .transition_system import OracleError
from .transition_system cimport TransitionSystem, Transition
from ..structs cimport TokenC

View File

@ -2,7 +2,8 @@
from __future__ import unicode_literals
from ...syntax.nonproj import ancestors, contains_cycle, is_nonproj_arc
from ...syntax.nonproj import is_nonproj_tree, PseudoProjectivity
from ...syntax.nonproj import is_nonproj_tree
from ...syntax import nonproj
from ...attrs import DEP, HEAD
from ..util import get_doc
@ -75,7 +76,7 @@ def test_parser_pseudoprojectivity(en_tokenizer):
tokens = en_tokenizer('whatever ' * len(proj_heads))
rel_proj_heads = [head-i for i, head in enumerate(proj_heads)]
doc = get_doc(tokens.vocab, [t.text for t in tokens], deps=deco_labels, heads=rel_proj_heads)
PseudoProjectivity.deprojectivize(doc)
nonproj.deprojectivize(doc)
return [t.head.i for t in doc], [token.dep_ for token in doc]
tree = [1, 2, 2]
@ -85,18 +86,18 @@ def test_parser_pseudoprojectivity(en_tokenizer):
labels = ['det', 'nsubj', 'root', 'det', 'dobj', 'aux', 'nsubj', 'acl', 'punct']
labels2 = ['advmod', 'root', 'det', 'nsubj', 'advmod', 'det', 'dobj', 'det', 'nmod', 'aux', 'nmod', 'advmod', 'det', 'amod', 'punct']
assert(PseudoProjectivity.decompose('X||Y') == ('X','Y'))
assert(PseudoProjectivity.decompose('X') == ('X',''))
assert(PseudoProjectivity.is_decorated('X||Y') == True)
assert(PseudoProjectivity.is_decorated('X') == False)
assert(nonproj.decompose('X||Y') == ('X','Y'))
assert(nonproj.decompose('X') == ('X',''))
assert(nonproj.is_decorated('X||Y') == True)
assert(nonproj.is_decorated('X') == False)
PseudoProjectivity._lift(0, tree)
nonproj._lift(0, tree)
assert(tree == [2, 2, 2])
assert(PseudoProjectivity._get_smallest_nonproj_arc(nonproj_tree) == 7)
assert(PseudoProjectivity._get_smallest_nonproj_arc(nonproj_tree2) == 10)
assert(nonproj._get_smallest_nonproj_arc(nonproj_tree) == 7)
assert(nonproj._get_smallest_nonproj_arc(nonproj_tree2) == 10)
proj_heads, deco_labels = PseudoProjectivity.projectivize(nonproj_tree, labels)
proj_heads, deco_labels = nonproj.projectivize(nonproj_tree, labels)
assert(proj_heads == [1, 2, 2, 4, 5, 2, 7, 5, 2])
assert(deco_labels == ['det', 'nsubj', 'root', 'det', 'dobj', 'aux',
'nsubj', 'acl||dobj', 'punct'])
@ -105,7 +106,7 @@ def test_parser_pseudoprojectivity(en_tokenizer):
assert(deproj_heads == nonproj_tree)
assert(undeco_labels == labels)
proj_heads, deco_labels = PseudoProjectivity.projectivize(nonproj_tree2, labels2)
proj_heads, deco_labels = nonproj.projectivize(nonproj_tree2, labels2)
assert(proj_heads == [1, 1, 3, 1, 5, 6, 9, 8, 6, 1, 9, 12, 13, 10, 1])
assert(deco_labels == ['advmod||aux', 'root', 'det', 'nsubj', 'advmod',
'det', 'dobj', 'det', 'nmod', 'aux', 'nmod||dobj',