mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 01:48:04 +03:00 
			
		
		
		
	Merge branch 'develop' of https://github.com/explosion/spaCy into develop
This commit is contained in:
		
						commit
						7e2cdc0c81
					
				| 
						 | 
					@ -31,7 +31,7 @@ from .syntax.stateclass cimport StateClass
 | 
				
			||||||
from .gold cimport GoldParse
 | 
					from .gold cimport GoldParse
 | 
				
			||||||
from .morphology cimport Morphology
 | 
					from .morphology cimport Morphology
 | 
				
			||||||
from .vocab cimport Vocab
 | 
					from .vocab cimport Vocab
 | 
				
			||||||
from .syntax.nonproj import PseudoProjectivity
 | 
					from .syntax import nonproj
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from .attrs import ID, LOWER, PREFIX, SUFFIX, SHAPE, TAG, DEP, POS
 | 
					from .attrs import ID, LOWER, PREFIX, SUFFIX, SHAPE, TAG, DEP, POS
 | 
				
			||||||
from ._ml import rebatch, Tok2Vec, flatten, get_col, doc2feats
 | 
					from ._ml import rebatch, Tok2Vec, flatten, get_col, doc2feats
 | 
				
			||||||
| 
						 | 
					@ -265,7 +265,7 @@ class NeuralLabeller(NeuralTagger):
 | 
				
			||||||
        pass
 | 
					        pass
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def begin_training(self, gold_tuples, pipeline=None):
 | 
					    def begin_training(self, gold_tuples, pipeline=None):
 | 
				
			||||||
        gold_tuples = PseudoProjectivity.preprocess_training_data(gold_tuples)
 | 
					        gold_tuples = nonproj.preprocess_training_data(gold_tuples)
 | 
				
			||||||
        for raw_text, annots_brackets in gold_tuples:
 | 
					        for raw_text, annots_brackets in gold_tuples:
 | 
				
			||||||
            for annots, brackets in annots_brackets:
 | 
					            for annots, brackets in annots_brackets:
 | 
				
			||||||
                ids, words, tags, heads, deps, ents = annots
 | 
					                ids, words, tags, heads, deps, ents = annots
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -33,7 +33,6 @@ from ._parse_features cimport CONTEXT_SIZE
 | 
				
			||||||
from ._parse_features cimport fill_context
 | 
					from ._parse_features cimport fill_context
 | 
				
			||||||
from .stateclass cimport StateClass
 | 
					from .stateclass cimport StateClass
 | 
				
			||||||
from ._state cimport StateC
 | 
					from ._state cimport StateC
 | 
				
			||||||
from .nonproj import PseudoProjectivity
 | 
					 | 
				
			||||||
from .transition_system import OracleError
 | 
					from .transition_system import OracleError
 | 
				
			||||||
from .transition_system cimport TransitionSystem, Transition
 | 
					from .transition_system cimport TransitionSystem, Transition
 | 
				
			||||||
from ..structs cimport TokenC
 | 
					from ..structs cimport TokenC
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -2,7 +2,8 @@
 | 
				
			||||||
from __future__ import unicode_literals
 | 
					from __future__ import unicode_literals
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from ...syntax.nonproj import ancestors, contains_cycle, is_nonproj_arc
 | 
					from ...syntax.nonproj import ancestors, contains_cycle, is_nonproj_arc
 | 
				
			||||||
from ...syntax.nonproj import is_nonproj_tree, PseudoProjectivity
 | 
					from ...syntax.nonproj import is_nonproj_tree
 | 
				
			||||||
 | 
					from ...syntax import nonproj
 | 
				
			||||||
from ...attrs import DEP, HEAD
 | 
					from ...attrs import DEP, HEAD
 | 
				
			||||||
from ..util import get_doc
 | 
					from ..util import get_doc
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -75,7 +76,7 @@ def test_parser_pseudoprojectivity(en_tokenizer):
 | 
				
			||||||
        tokens = en_tokenizer('whatever ' * len(proj_heads))
 | 
					        tokens = en_tokenizer('whatever ' * len(proj_heads))
 | 
				
			||||||
        rel_proj_heads = [head-i for i, head in enumerate(proj_heads)]
 | 
					        rel_proj_heads = [head-i for i, head in enumerate(proj_heads)]
 | 
				
			||||||
        doc = get_doc(tokens.vocab, [t.text for t in tokens], deps=deco_labels, heads=rel_proj_heads)
 | 
					        doc = get_doc(tokens.vocab, [t.text for t in tokens], deps=deco_labels, heads=rel_proj_heads)
 | 
				
			||||||
        PseudoProjectivity.deprojectivize(doc)
 | 
					        nonproj.deprojectivize(doc)
 | 
				
			||||||
        return [t.head.i for t in doc], [token.dep_ for token in doc]
 | 
					        return [t.head.i for t in doc], [token.dep_ for token in doc]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    tree = [1, 2, 2]
 | 
					    tree = [1, 2, 2]
 | 
				
			||||||
| 
						 | 
					@ -85,18 +86,18 @@ def test_parser_pseudoprojectivity(en_tokenizer):
 | 
				
			||||||
    labels = ['det', 'nsubj', 'root', 'det', 'dobj', 'aux', 'nsubj', 'acl', 'punct']
 | 
					    labels = ['det', 'nsubj', 'root', 'det', 'dobj', 'aux', 'nsubj', 'acl', 'punct']
 | 
				
			||||||
    labels2 = ['advmod', 'root', 'det', 'nsubj', 'advmod', 'det', 'dobj', 'det', 'nmod', 'aux', 'nmod', 'advmod', 'det', 'amod', 'punct']
 | 
					    labels2 = ['advmod', 'root', 'det', 'nsubj', 'advmod', 'det', 'dobj', 'det', 'nmod', 'aux', 'nmod', 'advmod', 'det', 'amod', 'punct']
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    assert(PseudoProjectivity.decompose('X||Y') == ('X','Y'))
 | 
					    assert(nonproj.decompose('X||Y') == ('X','Y'))
 | 
				
			||||||
    assert(PseudoProjectivity.decompose('X') == ('X',''))
 | 
					    assert(nonproj.decompose('X') == ('X',''))
 | 
				
			||||||
    assert(PseudoProjectivity.is_decorated('X||Y') == True)
 | 
					    assert(nonproj.is_decorated('X||Y') == True)
 | 
				
			||||||
    assert(PseudoProjectivity.is_decorated('X') == False)
 | 
					    assert(nonproj.is_decorated('X') == False)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    PseudoProjectivity._lift(0, tree)
 | 
					    nonproj._lift(0, tree)
 | 
				
			||||||
    assert(tree == [2, 2, 2])
 | 
					    assert(tree == [2, 2, 2])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    assert(PseudoProjectivity._get_smallest_nonproj_arc(nonproj_tree) == 7)
 | 
					    assert(nonproj._get_smallest_nonproj_arc(nonproj_tree) == 7)
 | 
				
			||||||
    assert(PseudoProjectivity._get_smallest_nonproj_arc(nonproj_tree2) == 10)
 | 
					    assert(nonproj._get_smallest_nonproj_arc(nonproj_tree2) == 10)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    proj_heads, deco_labels = PseudoProjectivity.projectivize(nonproj_tree, labels)
 | 
					    proj_heads, deco_labels = nonproj.projectivize(nonproj_tree, labels)
 | 
				
			||||||
    assert(proj_heads == [1, 2, 2, 4, 5, 2, 7, 5, 2])
 | 
					    assert(proj_heads == [1, 2, 2, 4, 5, 2, 7, 5, 2])
 | 
				
			||||||
    assert(deco_labels == ['det', 'nsubj', 'root', 'det', 'dobj', 'aux',
 | 
					    assert(deco_labels == ['det', 'nsubj', 'root', 'det', 'dobj', 'aux',
 | 
				
			||||||
                           'nsubj', 'acl||dobj', 'punct'])
 | 
					                           'nsubj', 'acl||dobj', 'punct'])
 | 
				
			||||||
| 
						 | 
					@ -105,7 +106,7 @@ def test_parser_pseudoprojectivity(en_tokenizer):
 | 
				
			||||||
    assert(deproj_heads == nonproj_tree)
 | 
					    assert(deproj_heads == nonproj_tree)
 | 
				
			||||||
    assert(undeco_labels == labels)
 | 
					    assert(undeco_labels == labels)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    proj_heads, deco_labels = PseudoProjectivity.projectivize(nonproj_tree2, labels2)
 | 
					    proj_heads, deco_labels = nonproj.projectivize(nonproj_tree2, labels2)
 | 
				
			||||||
    assert(proj_heads == [1, 1, 3, 1, 5, 6, 9, 8, 6, 1, 9, 12, 13, 10, 1])
 | 
					    assert(proj_heads == [1, 1, 3, 1, 5, 6, 9, 8, 6, 1, 9, 12, 13, 10, 1])
 | 
				
			||||||
    assert(deco_labels == ['advmod||aux', 'root', 'det', 'nsubj', 'advmod',
 | 
					    assert(deco_labels == ['advmod||aux', 'root', 'det', 'nsubj', 'advmod',
 | 
				
			||||||
                           'det', 'dobj', 'det', 'nmod', 'aux', 'nmod||dobj',
 | 
					                           'det', 'dobj', 'det', 'nmod', 'aux', 'nmod||dobj',
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue
	
	Block a user