mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 18:07:26 +03:00 
			
		
		
		
	
		
			
				
	
	
		
			33 lines
		
	
	
		
			960 B
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			33 lines
		
	
	
		
			960 B
		
	
	
	
		
			Python
		
	
	
	
	
	
import numpy
 | 
						|
from spacy.tokens import Doc
 | 
						|
from spacy.attrs import DEP, POS, TAG
 | 
						|
 | 
						|
from ..util import get_doc
 | 
						|
 | 
						|
 | 
						|
def test_issue5048(en_vocab):
 | 
						|
    words = ["This", "is", "a", "sentence"]
 | 
						|
    pos_s = ["DET", "VERB", "DET", "NOUN"]
 | 
						|
    spaces = [" ", " ", " ", ""]
 | 
						|
    deps_s = ["dep", "adj", "nn", "atm"]
 | 
						|
    tags_s = ["DT", "VBZ", "DT", "NN"]
 | 
						|
 | 
						|
    strings = en_vocab.strings
 | 
						|
 | 
						|
    for w in words:
 | 
						|
        strings.add(w)
 | 
						|
    deps = [strings.add(d) for d in deps_s]
 | 
						|
    pos = [strings.add(p) for p in pos_s]
 | 
						|
    tags = [strings.add(t) for t in tags_s]
 | 
						|
 | 
						|
    attrs = [POS, DEP, TAG]
 | 
						|
    array = numpy.array(list(zip(pos, deps, tags)), dtype="uint64")
 | 
						|
 | 
						|
    doc = Doc(en_vocab, words=words, spaces=spaces)
 | 
						|
    doc.from_array(attrs, array)
 | 
						|
    v1 = [(token.text, token.pos_, token.tag_) for token in doc]
 | 
						|
 | 
						|
    doc2 = get_doc(en_vocab, words=words, pos=pos_s, deps=deps_s, tags=tags_s)
 | 
						|
    v2 = [(token.text, token.pos_, token.tag_) for token in doc2]
 | 
						|
    assert v1 == v2
 |