mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-31 07:57:35 +03:00 
			
		
		
		
	adding spans to doc_annotation in Example.to_dict (#11261)
* adding spans to doc_annotation in Example.to_dict * to_dict compatible with from_dict: tuples instead of spans * use strings for label and kb_id * Simplify test * Update data formats docs Co-authored-by: Stefanie Wolf <stefanie.wolf@vitecsoftware.com> Co-authored-by: Adriane Boyd <adrianeboyd@gmail.com>
This commit is contained in:
		
							parent
							
								
									b07708d5d0
								
							
						
					
					
						commit
						23749cfc91
					
				|  | @ -431,3 +431,41 @@ def test_Example_aligned_whitespace(en_vocab): | |||
| 
 | ||||
|     example = Example(predicted, reference) | ||||
|     assert example.get_aligned("TAG", as_string=True) == tags | ||||
| 
 | ||||
| 
 | ||||
| @pytest.mark.issue("11260") | ||||
| def test_issue11260(): | ||||
|     annots = { | ||||
|         "words": ["I", "like", "New", "York", "."], | ||||
|         "spans": { | ||||
|             "cities": [(7, 15, "LOC", "")], | ||||
|             "people": [(0, 1, "PERSON", "")], | ||||
|         }, | ||||
|     } | ||||
|     vocab = Vocab() | ||||
|     predicted = Doc(vocab, words=annots["words"]) | ||||
|     example = Example.from_dict(predicted, annots) | ||||
|     assert len(example.reference.spans["cities"]) == 1 | ||||
|     assert len(example.reference.spans["people"]) == 1 | ||||
| 
 | ||||
|     output_dict = example.to_dict() | ||||
|     assert "spans" in output_dict["doc_annotation"] | ||||
|     assert output_dict["doc_annotation"]["spans"]["cities"] == annots["spans"]["cities"] | ||||
|     assert output_dict["doc_annotation"]["spans"]["people"] == annots["spans"]["people"] | ||||
| 
 | ||||
|     output_example = Example.from_dict(predicted, output_dict) | ||||
| 
 | ||||
|     assert len(output_example.reference.spans["cities"]) == len( | ||||
|         example.reference.spans["cities"] | ||||
|     ) | ||||
|     assert len(output_example.reference.spans["people"]) == len( | ||||
|         example.reference.spans["people"] | ||||
|     ) | ||||
|     for span in example.reference.spans["cities"]: | ||||
|         assert span.label_ == "LOC" | ||||
|         assert span.text == "New York" | ||||
|         assert span.start_char == 7 | ||||
|     for span in example.reference.spans["people"]: | ||||
|         assert span.label_ == "PERSON" | ||||
|         assert span.text == "I" | ||||
|         assert span.start_char == 0 | ||||
|  |  | |||
|  | @ -361,6 +361,7 @@ cdef class Example: | |||
|             "doc_annotation": { | ||||
|                 "cats": dict(self.reference.cats), | ||||
|                 "entities": doc_to_biluo_tags(self.reference), | ||||
|                 "spans": self._spans_to_dict(), | ||||
|                 "links": self._links_to_dict() | ||||
|             }, | ||||
|             "token_annotation": { | ||||
|  | @ -376,6 +377,18 @@ cdef class Example: | |||
|             } | ||||
|         } | ||||
| 
 | ||||
|     def _spans_to_dict(self): | ||||
|         span_dict = {} | ||||
|         for key in self.reference.spans: | ||||
|             span_tuples = [] | ||||
|             for span in self.reference.spans[key]:  | ||||
|                 span_tuple = (span.start_char, span.end_char, span.label_, span.kb_id_) | ||||
|                 span_tuples.append(span_tuple) | ||||
|             span_dict[key] = span_tuples | ||||
| 
 | ||||
|         return span_dict | ||||
| 
 | ||||
| 
 | ||||
|     def _links_to_dict(self): | ||||
|         links = {} | ||||
|         for ent in self.reference.ents: | ||||
|  |  | |||
|  | @ -395,12 +395,13 @@ file to keep track of your settings and hyperparameters and your own | |||
| >    "pos": List[str], | ||||
| >    "morphs": List[str], | ||||
| >    "sent_starts": List[Optional[bool]], | ||||
| >    "deps": List[string], | ||||
| >    "deps": List[str], | ||||
| >    "heads": List[int], | ||||
| >    "entities": List[str], | ||||
| >    "entities": List[(int, int, str)], | ||||
| >    "cats": Dict[str, float], | ||||
| >    "links": Dict[(int, int), dict], | ||||
| >    "spans": Dict[str, List[Tuple]], | ||||
| > } | ||||
| > ``` | ||||
| 
 | ||||
|  | @ -417,9 +418,10 @@ file to keep track of your settings and hyperparameters and your own | |||
| | `deps`        | List of string values indicating the [dependency relation](/usage/linguistic-features#dependency-parse) of a token to its head. ~~List[str]~~                                                                                  | | ||||
| | `heads`       | List of integer values indicating the dependency head of each token, referring to the absolute index of each token in the text. ~~List[int]~~                                                                                  | | ||||
| | `entities`    | **Option 1:** List of [BILUO tags](/usage/linguistic-features#accessing-ner) per token of the format `"{action}-{label}"`, or `None` for unannotated tokens. ~~List[str]~~                                                     | | ||||
| | `entities`    | **Option 2:** List of `"(start, end, label)"` tuples defining all entities in the text. ~~List[Tuple[int, int, str]]~~                                                                                                         | | ||||
| | `entities`    | **Option 2:** List of `(start_char, end_char, label)` tuples defining all entities in the text. ~~List[Tuple[int, int, str]]~~                                                                                                 | | ||||
| | `cats`        | Dictionary of `label`/`value` pairs indicating how relevant a certain [text category](/api/textcategorizer) is for the text. ~~Dict[str, float]~~                                                                              | | ||||
| | `links`       | Dictionary of `offset`/`dict` pairs defining [named entity links](/usage/linguistic-features#entity-linking). The character offsets are linked to a dictionary of relevant knowledge base IDs. ~~Dict[Tuple[int, int], Dict]~~ | | ||||
| | `spans`       | Dictionary of `spans_key`/`List[Tuple]` pairs defining the spans for each spans key as `(start_char, end_char, label, kb_id)` tuples. ~~Dict[str, List[Tuple[int, int, str, str]]~~                                            | | ||||
| 
 | ||||
| <Infobox title="Notes and caveats"> | ||||
| 
 | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue
	
	Block a user