mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-30 23:47:31 +03:00 
			
		
		
		
	Add span_id to Span.char_span, update Doc/Span.char_span docs (#12196)
* Add span_id to Span.char_span, update Doc/Span.char_span docs `Span.char_span(id=)` should be removed in the future. * Also use Union[int, str] in Doc docstring
This commit is contained in:
		
							parent
							
								
									774c10fa39
								
							
						
					
					
						commit
						5f8a398bb9
					
				|  | @ -163,6 +163,18 @@ def test_char_span(doc, i_sent, i, j, text): | ||||||
|         assert span.text == text |         assert span.text == text | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | def test_char_span_attributes(doc): | ||||||
|  |     label = "LABEL" | ||||||
|  |     kb_id = "KB_ID" | ||||||
|  |     span_id = "SPAN_ID" | ||||||
|  |     span1 = doc.char_span(20, 45, label=label, kb_id=kb_id, span_id=span_id) | ||||||
|  |     span2 = doc[1:].char_span(15, 40, label=label, kb_id=kb_id, span_id=span_id) | ||||||
|  |     assert span1.text == span2.text | ||||||
|  |     assert span1.label_ == span2.label_ == label | ||||||
|  |     assert span1.kb_id_ == span2.kb_id_ == kb_id | ||||||
|  |     assert span1.id_ == span2.id_ == span_id | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
| def test_spans_sent_spans(doc): | def test_spans_sent_spans(doc): | ||||||
|     sents = list(doc.sents) |     sents = list(doc.sents) | ||||||
|     assert sents[0].start == 0 |     assert sents[0].start == 0 | ||||||
|  |  | ||||||
|  | @ -108,6 +108,7 @@ class Doc: | ||||||
|         kb_id: Union[int, str] = ..., |         kb_id: Union[int, str] = ..., | ||||||
|         vector: Optional[Floats1d] = ..., |         vector: Optional[Floats1d] = ..., | ||||||
|         alignment_mode: str = ..., |         alignment_mode: str = ..., | ||||||
|  |         span_id: Union[int, str] = ..., | ||||||
|     ) -> Span: ... |     ) -> Span: ... | ||||||
|     def similarity(self, other: Union[Doc, Span, Token, Lexeme]) -> float: ... |     def similarity(self, other: Union[Doc, Span, Token, Lexeme]) -> float: ... | ||||||
|     @property |     @property | ||||||
|  |  | ||||||
|  | @ -528,9 +528,9 @@ cdef class Doc: | ||||||
|         doc (Doc): The parent document. |         doc (Doc): The parent document. | ||||||
|         start_idx (int): The index of the first character of the span. |         start_idx (int): The index of the first character of the span. | ||||||
|         end_idx (int): The index of the first character after the span. |         end_idx (int): The index of the first character after the span. | ||||||
|         label (uint64 or string): A label to attach to the Span, e.g. for |         label (Union[int, str]): A label to attach to the Span, e.g. for | ||||||
|             named entities. |             named entities. | ||||||
|         kb_id (uint64 or string):  An ID from a KB to capture the meaning of a |         kb_id (Union[int, str]):  An ID from a KB to capture the meaning of a | ||||||
|             named entity. |             named entity. | ||||||
|         vector (ndarray[ndim=1, dtype='float32']): A meaning representation of |         vector (ndarray[ndim=1, dtype='float32']): A meaning representation of | ||||||
|             the span. |             the span. | ||||||
|  | @ -539,6 +539,7 @@ cdef class Doc: | ||||||
|             with token boundaries), "contract" (span of all tokens completely |             with token boundaries), "contract" (span of all tokens completely | ||||||
|             within the character span), "expand" (span of all tokens at least |             within the character span), "expand" (span of all tokens at least | ||||||
|             partially covered by the character span). Defaults to "strict". |             partially covered by the character span). Defaults to "strict". | ||||||
|  |         span_id (Union[int, str]): An identifier to associate with the span. | ||||||
|         RETURNS (Span): The newly constructed object. |         RETURNS (Span): The newly constructed object. | ||||||
| 
 | 
 | ||||||
|         DOCS: https://spacy.io/api/doc#char_span |         DOCS: https://spacy.io/api/doc#char_span | ||||||
|  |  | ||||||
|  | @ -98,7 +98,9 @@ class Span: | ||||||
|         label: Union[int, str] = ..., |         label: Union[int, str] = ..., | ||||||
|         kb_id: Union[int, str] = ..., |         kb_id: Union[int, str] = ..., | ||||||
|         vector: Optional[Floats1d] = ..., |         vector: Optional[Floats1d] = ..., | ||||||
|  |         id: Union[int, str] = ..., | ||||||
|         alignment_mode: str = ..., |         alignment_mode: str = ..., | ||||||
|  |         span_id: Union[int, str] = ..., | ||||||
|     ) -> Span: ... |     ) -> Span: ... | ||||||
|     @property |     @property | ||||||
|     def conjuncts(self) -> Tuple[Token]: ... |     def conjuncts(self) -> Tuple[Token]: ... | ||||||
|  |  | ||||||
|  | @ -639,26 +639,28 @@ cdef class Span: | ||||||
|         else: |         else: | ||||||
|             return self.doc[root] |             return self.doc[root] | ||||||
| 
 | 
 | ||||||
|     def char_span(self, int start_idx, int end_idx, label=0, kb_id=0, vector=None, id=0, alignment_mode="strict"): |     def char_span(self, int start_idx, int end_idx, label=0, kb_id=0, vector=None, id=0, alignment_mode="strict", span_id=0): | ||||||
|         """Create a `Span` object from the slice `span.text[start : end]`. |         """Create a `Span` object from the slice `span.text[start : end]`. | ||||||
| 
 | 
 | ||||||
|         start (int): The index of the first character of the span. |         start (int): The index of the first character of the span. | ||||||
|         end (int): The index of the first character after the span. |         end (int): The index of the first character after the span. | ||||||
|         label (uint64 or string): A label to attach to the Span, e.g. for |         label (Union[int, str]): A label to attach to the Span, e.g. for | ||||||
|             named entities. |             named entities. | ||||||
|         kb_id (uint64 or string):  An ID from a KB to capture the meaning of a named entity. |         kb_id (Union[int, str]):  An ID from a KB to capture the meaning of a named entity. | ||||||
|         vector (ndarray[ndim=1, dtype='float32']): A meaning representation of |         vector (ndarray[ndim=1, dtype='float32']): A meaning representation of | ||||||
|             the span. |             the span. | ||||||
|  |         id (Union[int, str]): Unused. | ||||||
|         alignment_mode (str): How character indices are aligned to token |         alignment_mode (str): How character indices are aligned to token | ||||||
|             boundaries. Options: "strict" (character indices must be aligned |             boundaries. Options: "strict" (character indices must be aligned | ||||||
|             with token boundaries), "contract" (span of all tokens completely |             with token boundaries), "contract" (span of all tokens completely | ||||||
|             within the character span), "expand" (span of all tokens at least |             within the character span), "expand" (span of all tokens at least | ||||||
|             partially covered by the character span). Defaults to "strict". |             partially covered by the character span). Defaults to "strict". | ||||||
|  |         span_id (Union[int, str]): An identifier to associate with the span. | ||||||
|         RETURNS (Span): The newly constructed object. |         RETURNS (Span): The newly constructed object. | ||||||
|         """ |         """ | ||||||
|         start_idx += self.c.start_char |         start_idx += self.c.start_char | ||||||
|         end_idx += self.c.start_char |         end_idx += self.c.start_char | ||||||
|         return self.doc.char_span(start_idx, end_idx, label=label, kb_id=kb_id, vector=vector, alignment_mode=alignment_mode) |         return self.doc.char_span(start_idx, end_idx, label=label, kb_id=kb_id, vector=vector, alignment_mode=alignment_mode, span_id=span_id) | ||||||
| 
 | 
 | ||||||
|     @property |     @property | ||||||
|     def conjuncts(self): |     def conjuncts(self): | ||||||
|  |  | ||||||
|  | @ -209,15 +209,16 @@ alignment mode `"strict". | ||||||
| > assert span.text == "New York" | > assert span.text == "New York" | ||||||
| > ``` | > ``` | ||||||
| 
 | 
 | ||||||
| | Name             | Description                                                                                                                                                                                                                                                                  | | | Name                                     | Description                                                                                                                                                                                                                                                                  | | ||||||
| | ---------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | | ---------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | ||||||
| | `start`          | The index of the first character of the span. ~~int~~                                                                                                                                                                                                                        | | | `start`                                  | The index of the first character of the span. ~~int~~                                                                                                                                                                                                                        | | ||||||
| | `end`            | The index of the last character after the span. ~~int~~                                                                                                                                                                                                                      | | | `end`                                    | The index of the last character after the span. ~~int~~                                                                                                                                                                                                                      | | ||||||
| | `label`          | A label to attach to the span, e.g. for named entities. ~~Union[int, str]~~                                                                                                                                                                                                  | | | `label`                                  | A label to attach to the span, e.g. for named entities. ~~Union[int, str]~~                                                                                                                                                                                                  | | ||||||
| | `kb_id`          | An ID from a knowledge base to capture the meaning of a named entity. ~~Union[int, str]~~                                                                                                                                                                                    | | | `kb_id`                                  | An ID from a knowledge base to capture the meaning of a named entity. ~~Union[int, str]~~                                                                                                                                                                                    | | ||||||
| | `vector`         | A meaning representation of the span. ~~numpy.ndarray[ndim=1, dtype=float32]~~                                                                                                                                                                                               | | | `vector`                                 | A meaning representation of the span. ~~numpy.ndarray[ndim=1, dtype=float32]~~                                                                                                                                                                                               | | ||||||
| | `alignment_mode` | How character indices snap to token boundaries. Options: `"strict"` (no snapping), `"contract"` (span of all tokens completely within the character span), `"expand"` (span of all tokens at least partially covered by the character span). Defaults to `"strict"`. ~~str~~ | | | `alignment_mode`                         | How character indices snap to token boundaries. Options: `"strict"` (no snapping), `"contract"` (span of all tokens completely within the character span), `"expand"` (span of all tokens at least partially covered by the character span). Defaults to `"strict"`. ~~str~~ | | ||||||
| | **RETURNS**      | The newly constructed object or `None`. ~~Optional[Span]~~                                                                                                                                                                                                                   | | | `span_id` <Tag variant="new">3.3.1</Tag> | An identifier to associate with the span. ~~Union[int, str]~~                                                                                                                                                                                                                | | ||||||
|  | | **RETURNS**                              | The newly constructed object or `None`. ~~Optional[Span]~~                                                                                                                                                                                                                   | | ||||||
| 
 | 
 | ||||||
| ## Doc.set_ents {id="set_ents",tag="method",version="3"} | ## Doc.set_ents {id="set_ents",tag="method",version="3"} | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -193,7 +193,9 @@ the character indices don't map to a valid span. | ||||||
| | `label`                                         | A label to attach to the span, e.g. for named entities. ~~Union[int, str]~~                                                                                                                                                                                                  | | | `label`                                         | A label to attach to the span, e.g. for named entities. ~~Union[int, str]~~                                                                                                                                                                                                  | | ||||||
| | `kb_id`                                         | An ID from a knowledge base to capture the meaning of a named entity. ~~Union[int, str]~~                                                                                                                                                                                    | | | `kb_id`                                         | An ID from a knowledge base to capture the meaning of a named entity. ~~Union[int, str]~~                                                                                                                                                                                    | | ||||||
| | `vector`                                        | A meaning representation of the span. ~~numpy.ndarray[ndim=1, dtype=float32]~~                                                                                                                                                                                               | | | `vector`                                        | A meaning representation of the span. ~~numpy.ndarray[ndim=1, dtype=float32]~~                                                                                                                                                                                               | | ||||||
|  | | `id`                                            | Unused. ~~Union[int, str]~~                                                                                                                                                                                                                                                  | | ||||||
| | `alignment_mode` <Tag variant="new">3.5.1</Tag> | How character indices snap to token boundaries. Options: `"strict"` (no snapping), `"contract"` (span of all tokens completely within the character span), `"expand"` (span of all tokens at least partially covered by the character span). Defaults to `"strict"`. ~~str~~ | | | `alignment_mode` <Tag variant="new">3.5.1</Tag> | How character indices snap to token boundaries. Options: `"strict"` (no snapping), `"contract"` (span of all tokens completely within the character span), `"expand"` (span of all tokens at least partially covered by the character span). Defaults to `"strict"`. ~~str~~ | | ||||||
|  | | `span_id` <Tag variant="new">3.5.1</Tag>        | An identifier to associate with the span. ~~Union[int, str]~~                                                                                                                                                                                                                | | ||||||
| | **RETURNS**                                     | The newly constructed object or `None`. ~~Optional[Span]~~                                                                                                                                                                                                                   | | | **RETURNS**                                     | The newly constructed object or `None`. ~~Optional[Span]~~                                                                                                                                                                                                                   | | ||||||
| 
 | 
 | ||||||
| ## Span.similarity {id="similarity",tag="method",model="vectors"} | ## Span.similarity {id="similarity",tag="method",model="vectors"} | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue
	
	Block a user