mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-31 07:57:35 +03:00 
			
		
		
		
	Support more internal methods for SpanGroup (#10476)
* Added new convenience cython functions to SpanGroup to avoid unnecessary allocation/deallocation of objects * Replaced sorting in has_overlap with C++ for efficiency. Also, added a test for has_overlap * Added a method to efficiently merge SpanGroups * Added __delitem__, __add__ and __iadd__. Also, allowed to pass span lists to merge function. Replaced extend() body with call to merge * Renamed merge to concat and added missing things to documentation * Added operator+ and operator += in the documentation * Added a test for Doc deallocation * Update spacy/tokens/span_group.pyx * Updated SpanGroup tests to use new span list comparison function rather than assert_span_list_equal, eliminating the need to have a separate assert_not_equal fnction * Fixed typos in SpanGroup documentation Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com> * Minor changes requested by Sofie: rearranged import statements. Added new=3.2.1 tag to SpanGroup.__setitem__ documentation * SpanGroup: moved repetitive list index check/adjustment in a separate function * Turn off formatting that hurts readability spacy/tests/doc/test_span_group.py Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com> * Remove formatting that hurts readability spacy/tests/doc/test_span_group.py Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com> * Turn off formatting that hurts readability in spacy/tests/doc/test_span_group.py Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com> * Support more internal methods for SpanGroup Add support for: * `__setitem__` * `__delitem__` * `__iadd__`: for `SpanGroup` or `Iterable[Span]` * `__add__`: for `SpanGroup` only Adapted from #9698 with the scope limited to the magic methods. * Use v3.3 as new version in docs * Add new tag to SpanGroup.copy in API docs * Remove duplicate import * Apply suggestions from code review Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com> * Remaining suggestions and formatting Co-authored-by: nrodnova <nrodnova@hotmail.com> Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com> Co-authored-by: Natalia Rodnova <4512370+nrodnova@users.noreply.github.com>
This commit is contained in:
		
							parent
							
								
									c90dd6f265
								
							
						
					
					
						commit
						ca54de27bb
					
				|  | @ -524,6 +524,9 @@ class Errors(metaclass=ErrorsWithCodes): | |||
|     E202 = ("Unsupported {name} mode '{mode}'. Supported modes: {modes}.") | ||||
| 
 | ||||
|     # New errors added in v3.x | ||||
|     E855 = ("Invalid {obj}: {obj} is not from the same doc.") | ||||
|     E856 = ("Error accessing span at position {i}: out of bounds in span group " | ||||
|             "of length {length}.") | ||||
|     E857 = ("Entry '{name}' not found in edit tree lemmatizer labels.") | ||||
|     E858 = ("The {mode} vector table does not support this operation. " | ||||
|             "{alternative}") | ||||
|  |  | |||
							
								
								
									
										242
									
								
								spacy/tests/doc/test_span_group.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										242
									
								
								spacy/tests/doc/test_span_group.py
									
									
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,242 @@ | |||
| import pytest | ||||
| from random import Random | ||||
| from spacy.matcher import Matcher | ||||
| from spacy.tokens import Span, SpanGroup | ||||
| 
 | ||||
| 
 | ||||
| @pytest.fixture | ||||
| def doc(en_tokenizer): | ||||
|     doc = en_tokenizer("0 1 2 3 4 5 6") | ||||
|     matcher = Matcher(en_tokenizer.vocab, validate=True) | ||||
| 
 | ||||
|     # fmt: off | ||||
|     matcher.add("4", [[{}, {}, {}, {}]]) | ||||
|     matcher.add("2", [[{}, {}, ]]) | ||||
|     matcher.add("1", [[{}, ]]) | ||||
|     # fmt: on | ||||
|     matches = matcher(doc) | ||||
|     spans = [] | ||||
|     for match in matches: | ||||
|         spans.append( | ||||
|             Span(doc, match[1], match[2], en_tokenizer.vocab.strings[match[0]]) | ||||
|         ) | ||||
|     Random(42).shuffle(spans) | ||||
|     doc.spans["SPANS"] = SpanGroup( | ||||
|         doc, name="SPANS", attrs={"key": "value"}, spans=spans | ||||
|     ) | ||||
|     return doc | ||||
| 
 | ||||
| 
 | ||||
| @pytest.fixture | ||||
| def other_doc(en_tokenizer): | ||||
|     doc = en_tokenizer("0 1 2 3 4 5 6") | ||||
|     matcher = Matcher(en_tokenizer.vocab, validate=True) | ||||
| 
 | ||||
|     # fmt: off | ||||
|     matcher.add("4", [[{}, {}, {}, {}]]) | ||||
|     matcher.add("2", [[{}, {}, ]]) | ||||
|     matcher.add("1", [[{}, ]]) | ||||
|     # fmt: on | ||||
| 
 | ||||
|     matches = matcher(doc) | ||||
|     spans = [] | ||||
|     for match in matches: | ||||
|         spans.append( | ||||
|             Span(doc, match[1], match[2], en_tokenizer.vocab.strings[match[0]]) | ||||
|         ) | ||||
|     Random(42).shuffle(spans) | ||||
|     doc.spans["SPANS"] = SpanGroup( | ||||
|         doc, name="SPANS", attrs={"key": "value"}, spans=spans | ||||
|     ) | ||||
|     return doc | ||||
| 
 | ||||
| 
 | ||||
| @pytest.fixture | ||||
| def span_group(en_tokenizer): | ||||
|     doc = en_tokenizer("0 1 2 3 4 5 6") | ||||
|     matcher = Matcher(en_tokenizer.vocab, validate=True) | ||||
| 
 | ||||
|     # fmt: off | ||||
|     matcher.add("4", [[{}, {}, {}, {}]]) | ||||
|     matcher.add("2", [[{}, {}, ]]) | ||||
|     matcher.add("1", [[{}, ]]) | ||||
|     # fmt: on | ||||
| 
 | ||||
|     matches = matcher(doc) | ||||
|     spans = [] | ||||
|     for match in matches: | ||||
|         spans.append( | ||||
|             Span(doc, match[1], match[2], en_tokenizer.vocab.strings[match[0]]) | ||||
|         ) | ||||
|     Random(42).shuffle(spans) | ||||
|     doc.spans["SPANS"] = SpanGroup( | ||||
|         doc, name="SPANS", attrs={"key": "value"}, spans=spans | ||||
|     ) | ||||
| 
 | ||||
| 
 | ||||
| def test_span_group_copy(doc): | ||||
|     span_group = doc.spans["SPANS"] | ||||
|     clone = span_group.copy() | ||||
|     assert clone != span_group | ||||
|     assert clone.name == span_group.name | ||||
|     assert clone.attrs == span_group.attrs | ||||
|     assert len(clone) == len(span_group) | ||||
|     assert list(span_group) == list(clone) | ||||
|     clone.name = "new_name" | ||||
|     clone.attrs["key"] = "new_value" | ||||
|     clone.append(Span(doc, 0, 6, "LABEL")) | ||||
|     assert clone.name != span_group.name | ||||
|     assert clone.attrs != span_group.attrs | ||||
|     assert span_group.attrs["key"] == "value" | ||||
|     assert list(span_group) != list(clone) | ||||
| 
 | ||||
| 
 | ||||
| def test_span_group_set_item(doc, other_doc): | ||||
|     span_group = doc.spans["SPANS"] | ||||
| 
 | ||||
|     index = 5 | ||||
|     span = span_group[index] | ||||
|     span.label_ = "NEW LABEL" | ||||
|     span.kb_id = doc.vocab.strings["KB_ID"] | ||||
| 
 | ||||
|     assert span_group[index].label != span.label | ||||
|     assert span_group[index].kb_id != span.kb_id | ||||
| 
 | ||||
|     span_group[index] = span | ||||
|     assert span_group[index].start == span.start | ||||
|     assert span_group[index].end == span.end | ||||
|     assert span_group[index].label == span.label | ||||
|     assert span_group[index].kb_id == span.kb_id | ||||
|     assert span_group[index] == span | ||||
| 
 | ||||
|     with pytest.raises(IndexError): | ||||
|         span_group[-100] = span | ||||
|     with pytest.raises(IndexError): | ||||
|         span_group[100] = span | ||||
| 
 | ||||
|     span = Span(other_doc, 0, 2) | ||||
|     with pytest.raises(ValueError): | ||||
|         span_group[index] = span | ||||
| 
 | ||||
| 
 | ||||
| def test_span_group_has_overlap(doc): | ||||
|     span_group = doc.spans["SPANS"] | ||||
|     assert span_group.has_overlap | ||||
| 
 | ||||
| 
 | ||||
| def test_span_group_concat(doc, other_doc): | ||||
|     span_group_1 = doc.spans["SPANS"] | ||||
|     spans = [doc[0:5], doc[0:6]] | ||||
|     span_group_2 = SpanGroup( | ||||
|         doc, | ||||
|         name="MORE_SPANS", | ||||
|         attrs={"key": "new_value", "new_key": "new_value"}, | ||||
|         spans=spans, | ||||
|     ) | ||||
|     span_group_3 = span_group_1._concat(span_group_2) | ||||
|     assert span_group_3.name == span_group_1.name | ||||
|     assert span_group_3.attrs == {"key": "value", "new_key": "new_value"} | ||||
|     span_list_expected = list(span_group_1) + list(span_group_2) | ||||
|     assert list(span_group_3) == list(span_list_expected) | ||||
| 
 | ||||
|     # Inplace | ||||
|     span_list_expected = list(span_group_1) + list(span_group_2) | ||||
|     span_group_3 = span_group_1._concat(span_group_2, inplace=True) | ||||
|     assert span_group_3 == span_group_1 | ||||
|     assert span_group_3.name == span_group_1.name | ||||
|     assert span_group_3.attrs == {"key": "value", "new_key": "new_value"} | ||||
|     assert list(span_group_3) == list(span_list_expected) | ||||
| 
 | ||||
|     span_group_2 = other_doc.spans["SPANS"] | ||||
|     with pytest.raises(ValueError): | ||||
|         span_group_1._concat(span_group_2) | ||||
| 
 | ||||
| 
 | ||||
| def test_span_doc_delitem(doc): | ||||
|     span_group = doc.spans["SPANS"] | ||||
|     length = len(span_group) | ||||
|     index = 5 | ||||
|     span = span_group[index] | ||||
|     next_span = span_group[index + 1] | ||||
|     del span_group[index] | ||||
|     assert len(span_group) == length - 1 | ||||
|     assert span_group[index] != span | ||||
|     assert span_group[index] == next_span | ||||
| 
 | ||||
|     with pytest.raises(IndexError): | ||||
|         del span_group[-100] | ||||
|     with pytest.raises(IndexError): | ||||
|         del span_group[100] | ||||
| 
 | ||||
| 
 | ||||
| def test_span_group_add(doc): | ||||
|     span_group_1 = doc.spans["SPANS"] | ||||
|     spans = [doc[0:5], doc[0:6]] | ||||
|     span_group_2 = SpanGroup( | ||||
|         doc, | ||||
|         name="MORE_SPANS", | ||||
|         attrs={"key": "new_value", "new_key": "new_value"}, | ||||
|         spans=spans, | ||||
|     ) | ||||
| 
 | ||||
|     span_group_3_expected = span_group_1._concat(span_group_2) | ||||
| 
 | ||||
|     span_group_3 = span_group_1 + span_group_2 | ||||
|     assert len(span_group_3) == len(span_group_3_expected) | ||||
|     assert span_group_3.attrs == {"key": "value", "new_key": "new_value"} | ||||
|     assert list(span_group_3) == list(span_group_3_expected) | ||||
| 
 | ||||
| 
 | ||||
| def test_span_group_iadd(doc): | ||||
|     span_group_1 = doc.spans["SPANS"].copy() | ||||
|     spans = [doc[0:5], doc[0:6]] | ||||
|     span_group_2 = SpanGroup( | ||||
|         doc, | ||||
|         name="MORE_SPANS", | ||||
|         attrs={"key": "new_value", "new_key": "new_value"}, | ||||
|         spans=spans, | ||||
|     ) | ||||
| 
 | ||||
|     span_group_1_expected = span_group_1._concat(span_group_2) | ||||
| 
 | ||||
|     span_group_1 += span_group_2 | ||||
|     assert len(span_group_1) == len(span_group_1_expected) | ||||
|     assert span_group_1.attrs == {"key": "value", "new_key": "new_value"} | ||||
|     assert list(span_group_1) == list(span_group_1_expected) | ||||
| 
 | ||||
|     span_group_1 = doc.spans["SPANS"].copy() | ||||
|     span_group_1 += spans | ||||
|     assert len(span_group_1) == len(span_group_1_expected) | ||||
|     assert span_group_1.attrs == { | ||||
|         "key": "value", | ||||
|     } | ||||
|     assert list(span_group_1) == list(span_group_1_expected) | ||||
| 
 | ||||
| 
 | ||||
| def test_span_group_extend(doc): | ||||
|     span_group_1 = doc.spans["SPANS"].copy() | ||||
|     spans = [doc[0:5], doc[0:6]] | ||||
|     span_group_2 = SpanGroup( | ||||
|         doc, | ||||
|         name="MORE_SPANS", | ||||
|         attrs={"key": "new_value", "new_key": "new_value"}, | ||||
|         spans=spans, | ||||
|     ) | ||||
| 
 | ||||
|     span_group_1_expected = span_group_1._concat(span_group_2) | ||||
| 
 | ||||
|     span_group_1.extend(span_group_2) | ||||
|     assert len(span_group_1) == len(span_group_1_expected) | ||||
|     assert span_group_1.attrs == {"key": "value", "new_key": "new_value"} | ||||
|     assert list(span_group_1) == list(span_group_1_expected) | ||||
| 
 | ||||
|     span_group_1 = doc.spans["SPANS"] | ||||
|     span_group_1.extend(spans) | ||||
|     assert len(span_group_1) == len(span_group_1_expected) | ||||
|     assert span_group_1.attrs == {"key": "value"} | ||||
|     assert list(span_group_1) == list(span_group_1_expected) | ||||
| 
 | ||||
| 
 | ||||
| def test_span_group_dealloc(span_group): | ||||
|     with pytest.raises(AttributeError): | ||||
|         print(span_group.doc) | ||||
|  | @ -1,10 +1,11 @@ | |||
| from typing import Iterable, Tuple, Union, Optional, TYPE_CHECKING | ||||
| import weakref | ||||
| import struct | ||||
| from copy import deepcopy | ||||
| import srsly | ||||
| 
 | ||||
| from spacy.errors import Errors | ||||
| from .span cimport Span | ||||
| from libc.stdint cimport uint64_t, uint32_t, int32_t | ||||
| 
 | ||||
| 
 | ||||
| cdef class SpanGroup: | ||||
|  | @ -48,6 +49,8 @@ cdef class SpanGroup: | |||
|         self.name = name | ||||
|         self.attrs = dict(attrs) if attrs is not None else {} | ||||
|         cdef Span span | ||||
|         if len(spans) : | ||||
|             self.c.reserve(len(spans)) | ||||
|         for span in spans: | ||||
|             self.push_back(span.c) | ||||
| 
 | ||||
|  | @ -89,6 +92,72 @@ cdef class SpanGroup: | |||
|         """ | ||||
|         return self.c.size() | ||||
| 
 | ||||
|     def __getitem__(self, int i) -> Span: | ||||
|         """Get a span from the group. Note that a copy of the span is returned, | ||||
|         so if any changes are made to this span, they are not reflected in the | ||||
|         corresponding member of the span group. | ||||
| 
 | ||||
|         i (int): The item index. | ||||
|         RETURNS (Span): The span at the given index. | ||||
| 
 | ||||
|         DOCS: https://spacy.io/api/spangroup#getitem | ||||
|         """ | ||||
|         i = self._normalize_index(i) | ||||
|         return Span.cinit(self.doc, self.c[i]) | ||||
| 
 | ||||
|     def __delitem__(self, int i): | ||||
|         """Delete a span from the span group at index i. | ||||
| 
 | ||||
|         i (int): The item index. | ||||
| 
 | ||||
|         DOCS: https://spacy.io/api/spangroup#delitem | ||||
|         """ | ||||
|         i = self._normalize_index(i) | ||||
|         self.c.erase(self.c.begin() + i - 1) | ||||
| 
 | ||||
|     def __setitem__(self, int i, Span span): | ||||
|         """Set a span in the span group. | ||||
| 
 | ||||
|         i (int): The item index. | ||||
|         span (Span): The span. | ||||
| 
 | ||||
|         DOCS: https://spacy.io/api/spangroup#setitem | ||||
|         """ | ||||
|         if span.doc is not self.doc: | ||||
|             raise ValueError(Errors.E855.format(obj="span")) | ||||
| 
 | ||||
|         i = self._normalize_index(i) | ||||
|         self.c[i] = span.c | ||||
| 
 | ||||
|     def __iadd__(self, other: Union[SpanGroup, Iterable["Span"]]) -> SpanGroup: | ||||
|         """Operator +=. Append a span group or spans to this group and return | ||||
|         the current span group. | ||||
| 
 | ||||
|         other (Union[SpanGroup, Iterable["Span"]]): The SpanGroup or spans to | ||||
|             add. | ||||
| 
 | ||||
|         RETURNS (SpanGroup): The current span group. | ||||
| 
 | ||||
|         DOCS: https://spacy.io/api/spangroup#iadd | ||||
|         """ | ||||
|         return self._concat(other, inplace=True) | ||||
| 
 | ||||
|     def __add__(self, other: SpanGroup) -> SpanGroup: | ||||
|         """Operator +. Concatenate a span group with this group and return a | ||||
|         new span group. | ||||
| 
 | ||||
|         other (SpanGroup): The SpanGroup to add. | ||||
| 
 | ||||
|         RETURNS (SpanGroup): The concatenated SpanGroup. | ||||
| 
 | ||||
|         DOCS: https://spacy.io/api/spangroup#add | ||||
|         """ | ||||
|         # For Cython 0.x and __add__, you cannot rely on `self` as being `self` | ||||
|         # or being the right type, so both types need to be checked explicitly. | ||||
|         if isinstance(self, SpanGroup) and isinstance(other, SpanGroup): | ||||
|             return self._concat(other) | ||||
|         return NotImplemented | ||||
| 
 | ||||
|     def append(self, Span span): | ||||
|         """Add a span to the group. The span must refer to the same Doc | ||||
|         object as the span group. | ||||
|  | @ -98,35 +167,18 @@ cdef class SpanGroup: | |||
|         DOCS: https://spacy.io/api/spangroup#append | ||||
|         """ | ||||
|         if span.doc is not self.doc: | ||||
|             raise ValueError("Cannot add span to group: refers to different Doc.") | ||||
|             raise ValueError(Errors.E855.format(obj="span")) | ||||
|         self.push_back(span.c) | ||||
| 
 | ||||
|     def extend(self, spans): | ||||
|         """Add multiple spans to the group. All spans must refer to the same | ||||
|         Doc object as the span group. | ||||
|     def extend(self, spans_or_span_group: Union[SpanGroup, Iterable["Span"]]): | ||||
|         """Add multiple spans or contents of another SpanGroup to the group. | ||||
|         All spans must refer to the same Doc object as the span group. | ||||
| 
 | ||||
|         spans (Iterable[Span]): The spans to add. | ||||
|         spans (Union[SpanGroup, Iterable["Span"]]): The spans to add. | ||||
| 
 | ||||
|         DOCS: https://spacy.io/api/spangroup#extend | ||||
|         """ | ||||
|         cdef Span span | ||||
|         for span in spans: | ||||
|             self.append(span) | ||||
| 
 | ||||
|     def __getitem__(self, int i): | ||||
|         """Get a span from the group. | ||||
| 
 | ||||
|         i (int): The item index. | ||||
|         RETURNS (Span): The span at the given index. | ||||
| 
 | ||||
|         DOCS: https://spacy.io/api/spangroup#getitem | ||||
|         """ | ||||
|         cdef int size = self.c.size() | ||||
|         if i < -size or i >= size: | ||||
|             raise IndexError(f"list index {i} out of range") | ||||
|         if i < 0: | ||||
|             i += size | ||||
|         return Span.cinit(self.doc, self.c[i]) | ||||
|         self._concat(spans_or_span_group, inplace=True) | ||||
| 
 | ||||
|     def to_bytes(self): | ||||
|         """Serialize the SpanGroup's contents to a byte string. | ||||
|  | @ -136,6 +188,7 @@ cdef class SpanGroup: | |||
|         DOCS: https://spacy.io/api/spangroup#to_bytes | ||||
|         """ | ||||
|         output = {"name": self.name, "attrs": self.attrs, "spans": []} | ||||
|         cdef int i | ||||
|         for i in range(self.c.size()): | ||||
|             span = self.c[i] | ||||
|             # The struct.pack here is probably overkill, but it might help if | ||||
|  | @ -187,3 +240,74 @@ cdef class SpanGroup: | |||
| 
 | ||||
|     cdef void push_back(self, SpanC span) nogil: | ||||
|         self.c.push_back(span) | ||||
| 
 | ||||
|     def copy(self)  -> SpanGroup: | ||||
|         """Clones the span group. | ||||
| 
 | ||||
|         RETURNS (SpanGroup): A copy of the span group. | ||||
| 
 | ||||
|         DOCS: https://spacy.io/api/spangroup#copy | ||||
|         """ | ||||
|         return SpanGroup( | ||||
|             self.doc, | ||||
|             name=self.name, | ||||
|             attrs=deepcopy(self.attrs), | ||||
|             spans=list(self), | ||||
|         ) | ||||
| 
 | ||||
|     def _concat( | ||||
|         self, | ||||
|         other: Union[SpanGroup, Iterable["Span"]], | ||||
|         *, | ||||
|         inplace: bool = False, | ||||
|     ) -> SpanGroup: | ||||
|         """Concatenates the current span group with the provided span group or | ||||
|         spans, either in place or creating a copy. Preserves the name of self, | ||||
|         updates attrs only with values that are not in self. | ||||
| 
 | ||||
|         other (Union[SpanGroup, Iterable[Span]]): The spans to append. | ||||
|         inplace (bool): Indicates whether the operation should be performed in | ||||
|             place on the current span group. | ||||
| 
 | ||||
|         RETURNS (SpanGroup): Either a new SpanGroup or the current SpanGroup | ||||
|         depending on the value of inplace. | ||||
|         """ | ||||
|         cdef SpanGroup span_group = self if inplace else self.copy() | ||||
|         cdef SpanGroup other_group | ||||
|         cdef Span span | ||||
| 
 | ||||
|         if isinstance(other, SpanGroup): | ||||
|             other_group = other | ||||
|             if other_group.doc is not self.doc: | ||||
|                 raise ValueError(Errors.E855.format(obj="span group")) | ||||
| 
 | ||||
|             other_attrs = deepcopy(other_group.attrs) | ||||
|             span_group.attrs.update({ | ||||
|                 key: value for key, value in other_attrs.items() \ | ||||
|                 if key not in span_group.attrs | ||||
|             }) | ||||
|             if len(other_group): | ||||
|                 span_group.c.reserve(span_group.c.size() + other_group.c.size()) | ||||
|                 span_group.c.insert(span_group.c.end(), other_group.c.begin(), other_group.c.end()) | ||||
|         else: | ||||
|             if len(other): | ||||
|                 span_group.c.reserve(self.c.size() + len(other)) | ||||
|             for span in other: | ||||
|                 if span.doc is not self.doc: | ||||
|                     raise ValueError(Errors.E855.format(obj="span")) | ||||
|                 span_group.c.push_back(span.c) | ||||
| 
 | ||||
|         return span_group | ||||
| 
 | ||||
|     def _normalize_index(self, int i) -> int: | ||||
|         """Checks list index boundaries and adjusts the index if negative. | ||||
| 
 | ||||
|         i (int): The index. | ||||
|         RETURNS (int): The adjusted index. | ||||
|         """ | ||||
|         cdef int length = self.c.size() | ||||
|         if i < -length or i >= length: | ||||
|             raise IndexError(Errors.E856.format(i=i, length=length)) | ||||
|         if i < 0: | ||||
|             i += length | ||||
|         return i | ||||
|  |  | |||
|  | @ -104,7 +104,10 @@ Get the number of spans in the group. | |||
| 
 | ||||
| ## SpanGroup.\_\_getitem\_\_ {#getitem tag="method"} | ||||
| 
 | ||||
| Get a span from the group. | ||||
| Get a span from the group. Note that a copy of the span is returned, so if any | ||||
| changes are made to this span, they are not reflected in the corresponding | ||||
| member of the span group. The item or group will need to be reassigned for | ||||
| changes to be reflected in the span group. | ||||
| 
 | ||||
| > #### Example | ||||
| > | ||||
|  | @ -113,6 +116,8 @@ Get a span from the group. | |||
| > doc.spans["errors"] = [doc[0:1], doc[2:4]] | ||||
| > span = doc.spans["errors"][1] | ||||
| > assert span.text == "goi ng" | ||||
| > span.label_ = 'LABEL' | ||||
| > assert doc.spans["errors"][1] != 'LABEL' # The span within the group was not updated | ||||
| > ``` | ||||
| 
 | ||||
| | Name        | Description                           | | ||||
|  | @ -120,6 +125,83 @@ Get a span from the group. | |||
| | `i`         | The item index. ~~int~~               | | ||||
| | **RETURNS** | The span at the given index. ~~Span~~ | | ||||
| 
 | ||||
| ## SpanGroup.\_\_setitem\_\_ {#setitem tag="method", new="3.3"} | ||||
| 
 | ||||
| Set a span in the span group. | ||||
| 
 | ||||
| > #### Example | ||||
| > | ||||
| > ```python | ||||
| > doc = nlp("Their goi ng home") | ||||
| > doc.spans["errors"] = [doc[0:1], doc[2:4]] | ||||
| > span = doc[0:2] | ||||
| > doc.spans["errors"][0] = span | ||||
| > assert doc.spans["errors"][0].text == "Their goi" | ||||
| > ``` | ||||
| 
 | ||||
| | Name   | Description             | | ||||
| | ------ | ----------------------- | | ||||
| | `i`    | The item index. ~~int~~ | | ||||
| | `span` | The new value. ~~Span~~ | | ||||
| 
 | ||||
| ## SpanGroup.\_\_delitem\_\_ {#delitem tag="method", new="3.3"} | ||||
| 
 | ||||
| Delete a span from the span group. | ||||
| 
 | ||||
| > #### Example | ||||
| > | ||||
| > ```python | ||||
| > doc = nlp("Their goi ng home") | ||||
| > doc.spans["errors"] = [doc[0:1], doc[2:4]] | ||||
| > del doc.spans[0] | ||||
| > assert len(doc.spans["errors"]) == 1 | ||||
| > ``` | ||||
| 
 | ||||
| | Name | Description             | | ||||
| | ---- | ----------------------- | | ||||
| | `i`  | The item index. ~~int~~ | | ||||
| 
 | ||||
| ## SpanGroup.\_\_add\_\_ {#add tag="method", new="3.3"} | ||||
| 
 | ||||
| Concatenate the current span group with another span group and return the result | ||||
| in a new span group. Any `attrs` from the first span group will have precedence | ||||
| over `attrs` in the second. | ||||
| 
 | ||||
| > #### Example | ||||
| > | ||||
| > ```python | ||||
| > doc = nlp("Their goi ng home") | ||||
| > doc.spans["errors"] = [doc[0:1], doc[2:4]] | ||||
| > doc.spans["other"] = [doc[0:2], doc[1:3]] | ||||
| > span_group = doc.spans["errors"] + doc.spans["other"] | ||||
| > assert len(span_group) == 4 | ||||
| > ``` | ||||
| 
 | ||||
| | Name        | Description                                                                  | | ||||
| | ----------- | ---------------------------------------------------------------------------- | | ||||
| | `other`     | The span group or spans to concatenate. ~~Union[SpanGroup, Iterable[Span]]~~ | | ||||
| | **RETURNS** | The new span group. ~~SpanGroup~~                                            | | ||||
| 
 | ||||
| ## SpanGroup.\_\_iadd\_\_ {#iadd tag="method", new="3.3"} | ||||
| 
 | ||||
| Append an iterable of spans or the content of a span group to the current span | ||||
| group. Any `attrs` in the other span group will be added for keys that are not | ||||
| already present in the current span group. | ||||
| 
 | ||||
| > #### Example | ||||
| > | ||||
| > ```python | ||||
| > doc = nlp("Their goi ng home") | ||||
| > doc.spans["errors"] = [doc[0:1], doc[2:4]] | ||||
| > doc.spans["errors"] += [doc[3:4], doc[2:3]] | ||||
| > assert len(doc.spans["errors"]) == 4 | ||||
| > ``` | ||||
| 
 | ||||
| | Name        | Description                                                             | | ||||
| | ----------- | ----------------------------------------------------------------------- | | ||||
| | `other`     | The span group or spans to append. ~~Union[SpanGroup, Iterable[Span]]~~ | | ||||
| | **RETURNS** | The span group. ~~SpanGroup~~                                           | | ||||
| 
 | ||||
| ## SpanGroup.append {#append tag="method"} | ||||
| 
 | ||||
| Add a [`Span`](/api/span) object to the group. The span must refer to the same | ||||
|  | @ -140,8 +222,9 @@ Add a [`Span`](/api/span) object to the group. The span must refer to the same | |||
| 
 | ||||
| ## SpanGroup.extend {#extend tag="method"} | ||||
| 
 | ||||
| Add multiple [`Span`](/api/span) objects to the group. All spans must refer to | ||||
| the same [`Doc`](/api/doc) object as the span group. | ||||
| Add multiple [`Span`](/api/span) objects or contents of another `SpanGroup` to | ||||
| the group. All spans must refer to the same [`Doc`](/api/doc) object as the span | ||||
| group. | ||||
| 
 | ||||
| > #### Example | ||||
| > | ||||
|  | @ -150,11 +233,31 @@ the same [`Doc`](/api/doc) object as the span group. | |||
| > doc.spans["errors"] = [] | ||||
| > doc.spans["errors"].extend([doc[2:4], doc[0:1]]) | ||||
| > assert len(doc.spans["errors"]) == 2 | ||||
| > span_group = SpanGroup([doc[1:4], doc[0:3]) | ||||
| > doc.spans["errors"].extend(span_group) | ||||
| > ``` | ||||
| 
 | ||||
| | Name    | Description                          | | ||||
| | ------- | ------------------------------------ | | ||||
| | `spans` | The spans to add. ~~Iterable[Span]~~ | | ||||
| | Name    | Description                                              | | ||||
| | ------- | -------------------------------------------------------- | | ||||
| | `spans` | The spans to add. ~~Union[SpanGroup, Iterable["Span"]]~~ | | ||||
| 
 | ||||
| ## SpanGroup.copy {#copy tag="method", new="3.3"} | ||||
| 
 | ||||
| Return a copy of the span group. | ||||
| 
 | ||||
| > #### Example | ||||
| > | ||||
| > ```python | ||||
| > from spacy.tokens import SpanGroup | ||||
| > | ||||
| > doc = nlp("Their goi ng home") | ||||
| > doc.spans["errors"] = [doc[2:4], doc[0:3]] | ||||
| > new_group = doc.spans["errors"].copy() | ||||
| > ``` | ||||
| 
 | ||||
| | Name        | Description                                     | | ||||
| | ----------- | ----------------------------------------------- | | ||||
| | **RETURNS** | A copy of the `SpanGroup` object. ~~SpanGroup~~ | | ||||
| 
 | ||||
| ## SpanGroup.to_bytes {#to_bytes tag="method"} | ||||
| 
 | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue
	
	Block a user