mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-31 16:07:41 +03:00 
			
		
		
		
	Add assert_docs_equal util to compare two docs
This commit is contained in:
		
							parent
							
								
									eac3f700fb
								
							
						
					
					
						commit
						442237787c
					
				|  | @ -43,3 +43,23 @@ def apply_transition_sequence(parser, doc, sequence): | |||
| def get_cosine(vec1, vec2): | ||||
|     """Get cosine for two given vectors""" | ||||
|     return numpy.dot(vec1, vec2) / (numpy.linalg.norm(vec1) * numpy.linalg.norm(vec2)) | ||||
| 
 | ||||
| 
 | ||||
| def assert_docs_equal(doc1, doc2): | ||||
|     # tokens | ||||
|     assert [ t.orth for t in doc1 ] == [ t.orth for t in doc2 ] | ||||
| 
 | ||||
|     # tags | ||||
|     assert [ t.pos for t in doc1 ] == [ t.pos for t in doc2 ] | ||||
|     assert [ t.tag for t in doc1 ] == [ t.tag for t in doc2 ] | ||||
| 
 | ||||
|     # parse | ||||
|     assert [ t.head.i for t in doc1 ] == [ t.head.i for t in doc2 ] | ||||
|     assert [ t.dep for t in doc1 ] == [ t.dep for t in doc2 ] | ||||
|     if doc1.is_parsed and doc2.is_parsed: | ||||
|         assert [ s for s in doc1.sents ] == [ s for s in doc2.sents ] | ||||
| 
 | ||||
|     # entities | ||||
|     assert [ t.ent_type for t in doc1 ] == [ t.ent_type for t in doc2 ] | ||||
|     assert [ t.ent_iob for t in doc1 ] == [ t.ent_iob for t in doc2 ] | ||||
|     assert [ ent for ent in doc1.ents ] == [ ent for ent in doc2.ents ] | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue
	
	Block a user