mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-31 07:57:35 +03:00 
			
		
		
		
	* fix: De/Serialize `SpanGroups` including the SpanGroup keys This prevents the loss of `SpanGroup`s that have the same .name as other `SpanGroup`s within the same `SpanGroups` object (upon de/serialization of the `SpanGroups`). Fixes #10685 * Maintain backwards compatibility for serialized `SpanGroups` (serialized as: a list of `SpanGroup`s, or b'') * Add tests for `SpanGroups` deserialization backwards-compatibility * Move a `SpanGroups` de/serialization test (test_issue10685) to tests/serialize/test_serialize_spangroups.py * Output a warning if deserializing a `SpanGroups` with duplicate .name-d `SpanGroup`s * Minor refactor * `SpanGroups.from_bytes` handles only `list` and `dict` types with `dict` as the expected default * For lists, keep first rather than last value encountered * Update error message * Rename and update tests * Update to preserve list serialization of SpanGroups To avoid breaking compatibility of serialized `Doc` and `DocBin` with earlier versions of spacy v3, revert back to a list-only serialization, but update the names just for serialization so that the SpanGroups keys override the SpanGroup names. * Preserve object identity and current key overwrite * Preserve SpanGroup object identity * Preserve last rather than first span group from SpanGroup list format without SpanGroups keys * Update inline comments * Fix types * Add type info for SpanGroup.copy * Deserialize `SpanGroup`s as copies when a single SpanGroup is the value for more than 1 `SpanGroups` key. This is because we serialize `SpanGroups` as dicts (to maintain backward- and forward-compatibility) and we can't assume `SpanGroup`s with the same bytes/serialization were the same (identical) object, pre-serialization. * Update spacy/tokens/_dict_proxies.py * Add more SpanGroups serialization tests Test that serialized SpanGroups maintain their Span order * small clarification on older spaCy version * Update spacy/tests/serialize/test_serialize_span_groups.py Co-authored-by: Adriane Boyd <adrianeboyd@gmail.com> Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com>
		
			
				
	
	
		
			28 lines
		
	
	
		
			771 B
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			28 lines
		
	
	
		
			771 B
		
	
	
	
		
			Python
		
	
	
	
	
	
| from typing import Any, Dict, Iterable
 | |
| from .doc import Doc
 | |
| from .span import Span
 | |
| 
 | |
| class SpanGroup:
 | |
|     name: str
 | |
|     attrs: Dict[str, Any]
 | |
|     def __init__(
 | |
|         self,
 | |
|         doc: Doc,
 | |
|         *,
 | |
|         name: str = ...,
 | |
|         attrs: Dict[str, Any] = ...,
 | |
|         spans: Iterable[Span] = ...
 | |
|     ) -> None: ...
 | |
|     def __repr__(self) -> str: ...
 | |
|     @property
 | |
|     def doc(self) -> Doc: ...
 | |
|     @property
 | |
|     def has_overlap(self) -> bool: ...
 | |
|     def __len__(self) -> int: ...
 | |
|     def append(self, span: Span) -> None: ...
 | |
|     def extend(self, spans: Iterable[Span]) -> None: ...
 | |
|     def __getitem__(self, i: int) -> Span: ...
 | |
|     def to_bytes(self) -> bytes: ...
 | |
|     def from_bytes(self, bytes_data: bytes) -> SpanGroup: ...
 | |
|     def copy(self) -> SpanGroup: ...
 |