mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-31 16:07:41 +03:00 
			
		
		
		
	add links to to_dict
This commit is contained in:
		
							parent
							
								
									03db143cd0
								
							
						
					
					
						commit
						c705a28438
					
				|  | @ -126,7 +126,7 @@ cdef class Example: | |||
|             "doc_annotation": { | ||||
|                 "cats": dict(self.reference.cats), | ||||
|                 "entities": biluo_tags_from_doc(self.reference), | ||||
|                 "links": [], # TODO | ||||
|                 "links": self._links_to_dict() | ||||
|             }, | ||||
|             "token_annotation": { | ||||
|                 "ids": [t.i+1 for t in self.reference], | ||||
|  | @ -141,6 +141,14 @@ cdef class Example: | |||
|             } | ||||
|         } | ||||
| 
 | ||||
|     def _links_to_dict(self): | ||||
|         links = {} | ||||
|         for ent in self.reference.ents: | ||||
|             if ent.kb_id_: | ||||
|                 links[(ent.start_char, ent.end_char)] = {ent.kb_id_: 1.0} | ||||
|         return links | ||||
| 
 | ||||
| 
 | ||||
|     def split_sents(self): | ||||
|         """ Split the token annotations into multiple Examples based on | ||||
|         sent_starts and return a list of the new Examples""" | ||||
|  |  | |||
|  | @ -200,13 +200,16 @@ def test_gold_biluo_different_tokenization(en_vocab, en_tokenizer): | |||
|     words = ["I flew", "to", "San Francisco", "Valley", "."] | ||||
|     spaces = [True, True, True, False, False] | ||||
|     doc = Doc(en_vocab, words=words, spaces=spaces) | ||||
|     entities = [(len("I flew to "), len("I flew to San Francisco Valley"), "LOC")] | ||||
|     links = {(len("I flew to "), len("I flew to San Francisco Valley")): {"Q816843": 1.0}} | ||||
|     offset_start = len("I flew to ") | ||||
|     offset_end = len("I flew to San Francisco Valley") | ||||
|     entities = [(offset_start, offset_end, "LOC")] | ||||
|     links = {(offset_start, offset_end): {"Q816843": 1.0}} | ||||
|     gold_words = ["I", "flew to", "San", "Francisco Valley", "."] | ||||
|     example = Example.from_dict(doc, {"words": gold_words, "entities": entities, "links": links}) | ||||
|     assert example.get_aligned("ENT_IOB") == [2, 2, 3, 1, 2] | ||||
|     assert example.get_aligned("ENT_TYPE", as_string=True) == ["", "", "LOC", "LOC", ""] | ||||
|     assert example.get_aligned("ENT_KB_ID", as_string=True) == ["", "", "Q816843", "Q816843", ""] | ||||
|     assert example.to_dict()["doc_annotation"]["links"][(offset_start, offset_end)] == {"Q816843": 1.0} | ||||
| 
 | ||||
|     # additional whitespace tokens in GoldParse words | ||||
|     words, spaces = get_words_and_spaces( | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue
	
	Block a user