mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-25 17:36:30 +03:00
6c6b8da7cc
* fix: De/Serialize `SpanGroups` including the SpanGroup keys This prevents the loss of `SpanGroup`s that have the same .name as other `SpanGroup`s within the same `SpanGroups` object (upon de/serialization of the `SpanGroups`). Fixes #10685 * Maintain backwards compatibility for serialized `SpanGroups` (serialized as: a list of `SpanGroup`s, or b'') * Add tests for `SpanGroups` deserialization backwards-compatibility * Move a `SpanGroups` de/serialization test (test_issue10685) to tests/serialize/test_serialize_spangroups.py * Output a warning if deserializing a `SpanGroups` with duplicate .name-d `SpanGroup`s * Minor refactor * `SpanGroups.from_bytes` handles only `list` and `dict` types with `dict` as the expected default * For lists, keep first rather than last value encountered * Update error message * Rename and update tests * Update to preserve list serialization of SpanGroups To avoid breaking compatibility of serialized `Doc` and `DocBin` with earlier versions of spacy v3, revert back to a list-only serialization, but update the names just for serialization so that the SpanGroups keys override the SpanGroup names. * Preserve object identity and current key overwrite * Preserve SpanGroup object identity * Preserve last rather than first span group from SpanGroup list format without SpanGroups keys * Update inline comments * Fix types * Add type info for SpanGroup.copy * Deserialize `SpanGroup`s as copies when a single SpanGroup is the value for more than 1 `SpanGroups` key. This is because we serialize `SpanGroups` as dicts (to maintain backward- and forward-compatibility) and we can't assume `SpanGroup`s with the same bytes/serialization were the same (identical) object, pre-serialization. * Update spacy/tokens/_dict_proxies.py * Add more SpanGroups serialization tests Test that serialized SpanGroups maintain their Span order * small clarification on older spaCy version * Update spacy/tests/serialize/test_serialize_span_groups.py Co-authored-by: Adriane Boyd <adrianeboyd@gmail.com> Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com>
28 lines
771 B
Python
28 lines
771 B
Python
from typing import Any, Dict, Iterable
|
|
from .doc import Doc
|
|
from .span import Span
|
|
|
|
class SpanGroup:
|
|
name: str
|
|
attrs: Dict[str, Any]
|
|
def __init__(
|
|
self,
|
|
doc: Doc,
|
|
*,
|
|
name: str = ...,
|
|
attrs: Dict[str, Any] = ...,
|
|
spans: Iterable[Span] = ...
|
|
) -> None: ...
|
|
def __repr__(self) -> str: ...
|
|
@property
|
|
def doc(self) -> Doc: ...
|
|
@property
|
|
def has_overlap(self) -> bool: ...
|
|
def __len__(self) -> int: ...
|
|
def append(self, span: Span) -> None: ...
|
|
def extend(self, spans: Iterable[Span]) -> None: ...
|
|
def __getitem__(self, i: int) -> Span: ...
|
|
def to_bytes(self) -> bytes: ...
|
|
def from_bytes(self, bytes_data: bytes) -> SpanGroup: ...
|
|
def copy(self) -> SpanGroup: ...
|