mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 09:57:26 +03:00 
			
		
		
		
	
		
			
				
	
	
		
			26 lines
		
	
	
		
			972 B
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			26 lines
		
	
	
		
			972 B
		
	
	
	
		
			Python
		
	
	
	
	
	
from __future__ import unicode_literals
 | 
						|
from ..util import get_doc
 | 
						|
from ...vocab import Vocab
 | 
						|
from ...en import English
 | 
						|
 | 
						|
 | 
						|
def test_span_noun_chunks():
 | 
						|
    vocab = Vocab(lang='en', tag_map=English.Defaults.tag_map)
 | 
						|
    words = "Employees are recruiting talented staffers from overseas .".split()
 | 
						|
    heads = [1, 1, 0, 1, -2, -1, -5]
 | 
						|
    deps = ['nsubj', 'aux', 'ROOT', 'nmod', 'dobj', 'adv', 'pobj']
 | 
						|
    tags = ['NNS', 'VBP', 'VBG', 'JJ', 'NNS', 'IN', 'NN', '.']
 | 
						|
    doc = get_doc(vocab, words=words, heads=heads, deps=deps, tags=tags)
 | 
						|
    doc.is_parsed = True
 | 
						|
    
 | 
						|
    noun_chunks = [np.text for np in doc.noun_chunks]
 | 
						|
    assert noun_chunks == ['Employees', 'talented staffers', 'overseas']
 | 
						|
 | 
						|
    span = doc[0:4]
 | 
						|
    noun_chunks = [np.text for np in span.noun_chunks]
 | 
						|
    assert noun_chunks == ['Employees']
 | 
						|
 | 
						|
    for sent in doc.sents:
 | 
						|
        noun_chunks = [np.text for np in sent.noun_chunks]
 | 
						|
        assert noun_chunks == ['Employees', 'talented staffers', 'overseas']
 |