mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-24 17:06:29 +03:00
SpanGroup(s)
-related optimizations (#11380)
* `SpanGroup`: Add support for binding copies to a new reference document * `SpanGroups`: Replace superfluous serialize-deserialize roundtrip in `copy` Instead, directly copy the in-memory representations of the constituent `SpanGroup`s. * Update `SpanGroup.copy()` signature * Rename `new_doc` param to `doc` * Fix kwdarg * Update `.pyi` file and docstrings * `mypy` fix * Update spacy/tokens/span_group.pyx * Update docs Co-authored-by: Adriane Boyd <adrianeboyd@gmail.com>
This commit is contained in:
parent
aafee5e1b7
commit
604a7c3c26
|
@ -42,7 +42,8 @@ class SpanGroups(UserDict):
|
||||||
def copy(self, doc: Optional["Doc"] = None) -> "SpanGroups":
|
def copy(self, doc: Optional["Doc"] = None) -> "SpanGroups":
|
||||||
if doc is None:
|
if doc is None:
|
||||||
doc = self._ensure_doc()
|
doc = self._ensure_doc()
|
||||||
return SpanGroups(doc).from_bytes(self.to_bytes())
|
data_copy = ((k, v.copy(doc=doc)) for k, v in self.items())
|
||||||
|
return SpanGroups(doc, items=data_copy)
|
||||||
|
|
||||||
def setdefault(self, key, default=None):
|
def setdefault(self, key, default=None):
|
||||||
if not isinstance(default, SpanGroup):
|
if not isinstance(default, SpanGroup):
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
from typing import Any, Dict, Iterable
|
from typing import Any, Dict, Iterable, Optional
|
||||||
from .doc import Doc
|
from .doc import Doc
|
||||||
from .span import Span
|
from .span import Span
|
||||||
|
|
||||||
|
@ -24,4 +24,4 @@ class SpanGroup:
|
||||||
def __getitem__(self, i: int) -> Span: ...
|
def __getitem__(self, i: int) -> Span: ...
|
||||||
def to_bytes(self) -> bytes: ...
|
def to_bytes(self) -> bytes: ...
|
||||||
def from_bytes(self, bytes_data: bytes) -> SpanGroup: ...
|
def from_bytes(self, bytes_data: bytes) -> SpanGroup: ...
|
||||||
def copy(self) -> SpanGroup: ...
|
def copy(self, doc: Optional[Doc] = ...) -> SpanGroup: ...
|
||||||
|
|
|
@ -241,15 +241,18 @@ cdef class SpanGroup:
|
||||||
cdef void push_back(self, SpanC span) nogil:
|
cdef void push_back(self, SpanC span) nogil:
|
||||||
self.c.push_back(span)
|
self.c.push_back(span)
|
||||||
|
|
||||||
def copy(self) -> SpanGroup:
|
def copy(self, doc: Optional["Doc"] = None) -> SpanGroup:
|
||||||
"""Clones the span group.
|
"""Clones the span group.
|
||||||
|
|
||||||
|
doc (Doc): New reference document to which the copy is bound.
|
||||||
RETURNS (SpanGroup): A copy of the span group.
|
RETURNS (SpanGroup): A copy of the span group.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/spangroup#copy
|
DOCS: https://spacy.io/api/spangroup#copy
|
||||||
"""
|
"""
|
||||||
|
if doc is None:
|
||||||
|
doc = self.doc
|
||||||
return SpanGroup(
|
return SpanGroup(
|
||||||
self.doc,
|
doc,
|
||||||
name=self.name,
|
name=self.name,
|
||||||
attrs=deepcopy(self.attrs),
|
attrs=deepcopy(self.attrs),
|
||||||
spans=list(self),
|
spans=list(self),
|
||||||
|
|
|
@ -255,9 +255,10 @@ Return a copy of the span group.
|
||||||
> new_group = doc.spans["errors"].copy()
|
> new_group = doc.spans["errors"].copy()
|
||||||
> ```
|
> ```
|
||||||
|
|
||||||
| Name | Description |
|
| Name | Description |
|
||||||
| ----------- | ----------------------------------------------- |
|
| ----------- | -------------------------------------------------------------------------------------------------- |
|
||||||
| **RETURNS** | A copy of the `SpanGroup` object. ~~SpanGroup~~ |
|
| `doc` | The document to which the copy is bound. Defaults to `None` for the current doc. ~~Optional[Doc]~~ |
|
||||||
|
| **RETURNS** | A copy of the `SpanGroup` object. ~~SpanGroup~~ |
|
||||||
|
|
||||||
## SpanGroup.to_bytes {#to_bytes tag="method"}
|
## SpanGroup.to_bytes {#to_bytes tag="method"}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user