diff --git a/spacy/tests/doc/test_add_entities.py b/spacy/tests/doc/test_add_entities.py
index 30d66115f..5788e9b86 100644
--- a/spacy/tests/doc/test_add_entities.py
+++ b/spacy/tests/doc/test_add_entities.py
@@ -81,3 +81,24 @@ def test_add_overlapping_entities(en_vocab):
new_entity = Span(doc, 0, 1, label=392)
with pytest.raises(ValueError):
doc.ents = list(doc.ents) + [new_entity]
+
+
+def test_ents_spangroup(en_vocab):
+ text = [
+ "Louisiana",
+ "Office",
+ "of",
+ "Conservation",
+ "in",
+ "the",
+ "United",
+ "States",
+ ]
+ doc = Doc(en_vocab, words=text)
+ doc.ents = [Span(doc, 0, 4, label=391), Span(doc, 6, 8, label=391)]
+
+ assert doc.ents_spangroup.doc == doc
+ assert len(doc.ents_spangroup) == 2
+ assert doc.ents_spangroup.name == "ents"
+ assert str(doc.ents_spangroup[0]) == " ".join(text[:4])
+ assert str(doc.ents_spangroup[1]) == " ".join(text[6:])
diff --git a/spacy/tokens/doc.pyi b/spacy/tokens/doc.pyi
index 48bc21c27..feb29f16e 100644
--- a/spacy/tokens/doc.pyi
+++ b/spacy/tokens/doc.pyi
@@ -4,6 +4,7 @@ from cymem.cymem import Pool
from thinc.types import ArrayXd, Floats1d, Floats2d, Ints2d, Ragged
from .span import Span
from .token import Token
+from .span_group import SpanGroup
from .span_groups import SpanGroups
from .retokenizer import Retokenizer
from ..lexeme import Lexeme
@@ -131,6 +132,8 @@ class Doc:
default: str = ...,
) -> None: ...
@property
+ def ents_spangroup(self) -> SpanGroup: ...
+ @property
def noun_chunks(self) -> Tuple[Span]: ...
@property
def sents(self) -> Tuple[Span]: ...
diff --git a/spacy/tokens/doc.pyx b/spacy/tokens/doc.pyx
index 0ea2c39ab..8869d20e2 100644
--- a/spacy/tokens/doc.pyx
+++ b/spacy/tokens/doc.pyx
@@ -18,6 +18,7 @@ from thinc.util import copy_array
import warnings
from .span cimport Span
+from .span_group import SpanGroup
from .token cimport MISSING_DEP
from .span_groups import SpanGroups
from .token cimport Token
@@ -702,6 +703,14 @@ cdef class Doc:
"""
return self.text
+ @property
+ def ents_spangroup(self) -> SpanGroup:
+ """
+ Returns entities (in `.ents`) as `SpanGroup`.
+ RETURNS (SpanGroup): All entities (in `.ents`) as `SpanGroup`.
+ """
+ return SpanGroup(self, spans=self.ents, name="ents")
+
property ents:
"""The named entities in the document. Returns a list of named entity
`Span` objects, if the entity recognizer has been applied.
diff --git a/website/docs/api/doc.mdx b/website/docs/api/doc.mdx
index fca056ed0..ba54953a4 100644
--- a/website/docs/api/doc.mdx
+++ b/website/docs/api/doc.mdx
@@ -752,22 +752,23 @@ The L2 norm of the document's vector representation.
## Attributes {id="attributes"}
-| Name | Description |
-| ------------------------------------------ | ---------------------------------------------------------------------------------------------------------------------------------------------- |
-| `text` | A string representation of the document text. ~~str~~ |
-| `text_with_ws` | An alias of `Doc.text`, provided for duck-type compatibility with `Span` and `Token`. ~~str~~ |
-| `mem` | The document's local memory heap, for all C data it owns. ~~cymem.Pool~~ |
-| `vocab` | The store of lexical types. ~~Vocab~~ |
-| `tensor` | Container for dense vector representations. ~~numpy.ndarray~~ |
-| `user_data` | A generic storage area, for user custom data. ~~Dict[str, Any]~~ |
-| `lang` | Language of the document's vocabulary. ~~int~~ |
-| `lang_` | Language of the document's vocabulary. ~~str~~ |
-| `user_hooks` | A dictionary that allows customization of the `Doc`'s properties. ~~Dict[str, Callable]~~ |
-| `user_token_hooks` | A dictionary that allows customization of properties of `Token` children. ~~Dict[str, Callable]~~ |
-| `user_span_hooks` | A dictionary that allows customization of properties of `Span` children. ~~Dict[str, Callable]~~ |
-| `has_unknown_spaces` | Whether the document was constructed without known spacing between tokens (typically when created from gold tokenization). ~~bool~~ |
-| `_` | User space for adding custom [attribute extensions](/usage/processing-pipelines#custom-components-attributes). ~~Underscore~~ |
-| `activations` 4.0 | A dictionary of activations per trainable pipe (available when the `save_activations` option of a pipe is enabled). ~~Dict[str, Option[Any]]~~ |
+| Name | Description |
+| --------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------- |
+| `text` | A string representation of the document text. ~~str~~ |
+| `text_with_ws` | An alias of `Doc.text`, provided for duck-type compatibility with `Span` and `Token`. ~~str~~ |
+| `mem` | The document's local memory heap, for all C data it owns. ~~cymem.Pool~~ |
+| `vocab` | The store of lexical types. ~~Vocab~~ |
+| `tensor` | Container for dense vector representations. ~~numpy.ndarray~~ |
+| `user_data` | A generic storage area, for user custom data. ~~Dict[str, Any]~~ |
+| `lang` | Language of the document's vocabulary. ~~int~~ |
+| `lang_` | Language of the document's vocabulary. ~~str~~ |
+| `user_hooks` | A dictionary that allows customization of the `Doc`'s properties. ~~Dict[str, Callable]~~ |
+| `user_token_hooks` | A dictionary that allows customization of properties of `Token` children. ~~Dict[str, Callable]~~ |
+| `user_span_hooks` | A dictionary that allows customization of properties of `Span` children. ~~Dict[str, Callable]~~ |
+| `has_unknown_spaces` | Whether the document was constructed without known spacing between tokens (typically when created from gold tokenization). ~~bool~~ |
+| `_` | User space for adding custom [attribute extensions](/usage/processing-pipelines#custom-components-attributes). ~~Underscore~~ |
+| `activations` 4.0 | A dictionary of activations per trainable pipe (available when the `save_activations` option of a pipe is enabled). ~~Dict[str, Option[Any]]~~ |
+| `ents_spangroup` 4.0 | All entitity `Span` instances (as stored in `.ents`) as [`SpanGroup`](/api/spangroup). ~~SpanGroup~~ |
## Serialization fields {id="serialization-fields"}