mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-31 16:07:41 +03:00 
			
		
		
		
	add links to to_dict
This commit is contained in:
		
							parent
							
								
									03db143cd0
								
							
						
					
					
						commit
						c705a28438
					
				|  | @ -126,7 +126,7 @@ cdef class Example: | ||||||
|             "doc_annotation": { |             "doc_annotation": { | ||||||
|                 "cats": dict(self.reference.cats), |                 "cats": dict(self.reference.cats), | ||||||
|                 "entities": biluo_tags_from_doc(self.reference), |                 "entities": biluo_tags_from_doc(self.reference), | ||||||
|                 "links": [], # TODO |                 "links": self._links_to_dict() | ||||||
|             }, |             }, | ||||||
|             "token_annotation": { |             "token_annotation": { | ||||||
|                 "ids": [t.i+1 for t in self.reference], |                 "ids": [t.i+1 for t in self.reference], | ||||||
|  | @ -141,6 +141,14 @@ cdef class Example: | ||||||
|             } |             } | ||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|  |     def _links_to_dict(self): | ||||||
|  |         links = {} | ||||||
|  |         for ent in self.reference.ents: | ||||||
|  |             if ent.kb_id_: | ||||||
|  |                 links[(ent.start_char, ent.end_char)] = {ent.kb_id_: 1.0} | ||||||
|  |         return links | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|     def split_sents(self): |     def split_sents(self): | ||||||
|         """ Split the token annotations into multiple Examples based on |         """ Split the token annotations into multiple Examples based on | ||||||
|         sent_starts and return a list of the new Examples""" |         sent_starts and return a list of the new Examples""" | ||||||
|  |  | ||||||
|  | @ -200,13 +200,16 @@ def test_gold_biluo_different_tokenization(en_vocab, en_tokenizer): | ||||||
|     words = ["I flew", "to", "San Francisco", "Valley", "."] |     words = ["I flew", "to", "San Francisco", "Valley", "."] | ||||||
|     spaces = [True, True, True, False, False] |     spaces = [True, True, True, False, False] | ||||||
|     doc = Doc(en_vocab, words=words, spaces=spaces) |     doc = Doc(en_vocab, words=words, spaces=spaces) | ||||||
|     entities = [(len("I flew to "), len("I flew to San Francisco Valley"), "LOC")] |     offset_start = len("I flew to ") | ||||||
|     links = {(len("I flew to "), len("I flew to San Francisco Valley")): {"Q816843": 1.0}} |     offset_end = len("I flew to San Francisco Valley") | ||||||
|  |     entities = [(offset_start, offset_end, "LOC")] | ||||||
|  |     links = {(offset_start, offset_end): {"Q816843": 1.0}} | ||||||
|     gold_words = ["I", "flew to", "San", "Francisco Valley", "."] |     gold_words = ["I", "flew to", "San", "Francisco Valley", "."] | ||||||
|     example = Example.from_dict(doc, {"words": gold_words, "entities": entities, "links": links}) |     example = Example.from_dict(doc, {"words": gold_words, "entities": entities, "links": links}) | ||||||
|     assert example.get_aligned("ENT_IOB") == [2, 2, 3, 1, 2] |     assert example.get_aligned("ENT_IOB") == [2, 2, 3, 1, 2] | ||||||
|     assert example.get_aligned("ENT_TYPE", as_string=True) == ["", "", "LOC", "LOC", ""] |     assert example.get_aligned("ENT_TYPE", as_string=True) == ["", "", "LOC", "LOC", ""] | ||||||
|     assert example.get_aligned("ENT_KB_ID", as_string=True) == ["", "", "Q816843", "Q816843", ""] |     assert example.get_aligned("ENT_KB_ID", as_string=True) == ["", "", "Q816843", "Q816843", ""] | ||||||
|  |     assert example.to_dict()["doc_annotation"]["links"][(offset_start, offset_end)] == {"Q816843": 1.0} | ||||||
| 
 | 
 | ||||||
|     # additional whitespace tokens in GoldParse words |     # additional whitespace tokens in GoldParse words | ||||||
|     words, spaces = get_words_and_spaces( |     words, spaces = get_words_and_spaces( | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue
	
	Block a user