Return Tuple[Span] for all Doc/Span attrs that provide spans (#12288)

* Return Tuple[Span] for all Doc/Span attrs that provide spans * Update Span types
2025-10-21 11:14:32 +03:00 · 2023-03-01 16:00:02 +01:00 · 2023-03-01 16:00:02 +01:00 · da75896ef5
commit da75896ef5
parent df4c069a13
6 changed files with 58 additions and 55 deletions
--- a/spacy/tokens/doc.pyi
+++ b/spacy/tokens/doc.pyi
@ -131,9 +131,9 @@ class Doc:
        default: str = ...,
    ) -> None: ...
    @property
-    def noun_chunks(self) -> Iterator[Span]: ...
+    def noun_chunks(self) -> Tuple[Span]: ...
    @property
-    def sents(self) -> Iterator[Span]: ...
+    def sents(self) -> Tuple[Span]: ...
    @property
    def lang(self) -> int: ...
    @property
--- a/spacy/tokens/doc.pyx
+++ b/spacy/tokens/doc.pyx
@ -703,10 +703,10 @@ cdef class Doc:
        return self.text
    property ents:
-        """The named entities in the document. Returns a tuple of named entity
+        """The named entities in the document. Returns a list of named entity
        `Span` objects, if the entity recognizer has been applied.
-        RETURNS (tuple): Entities in the document, one `Span` per entity.
+        RETURNS (Tuple[Span]): Entities in the document, one `Span` per entity.
        DOCS: https://spacy.io/api/doc#ents
        """
@ -864,7 +864,7 @@ cdef class Doc:
        NP-level coordination, no prepositional phrases, and no relative
        clauses.
-        YIELDS (Span): Noun chunks in the document.
+        RETURNS (Tuple[Span]): Noun chunks in the document.
        DOCS: https://spacy.io/api/doc#noun_chunks
        """
@ -873,36 +873,35 @@ cdef class Doc:
        # Accumulate the result before beginning to iterate over it. This
        # prevents the tokenization from being changed out from under us
-        # during the iteration. The tricky thing here is that Span accepts
+        # during the iteration.
        # its tokenization changing, so it's okay once we have the Span
        # objects. See Issue #375.
        spans = []
        for start, end, label in self.noun_chunks_iterator(self):
            spans.append(Span(self, start, end, label=label))
-        for span in spans:
+        return tuple(spans)
            yield span
    @property
    def sents(self):
        """Iterate over the sentences in the document. Yields sentence `Span`
        objects. Sentence spans have no label.
-        YIELDS (Span): Sentences in the document.
+        RETURNS (Tuple[Span]): Sentences in the document.
        DOCS: https://spacy.io/api/doc#sents
        """
        if not self.has_annotation("SENT_START"):
            raise ValueError(Errors.E030)
        if "sents" in self.user_hooks:
-            yield from self.user_hooks["sents"](self)
+            return tuple(self.user_hooks["sents"](self))
        else:
            start = 0
            spans = []
            for i in range(1, self.length):
                if self.c[i].sent_start == 1:
-                    yield Span(self, start, i)
+                    spans.append(Span(self, start, i))
                    start = i
            if start != self.length:
-                yield Span(self, start, self.length)
+                spans.append(Span(self, start, self.length))
            return tuple(spans)
    @property
    def lang(self):
--- a/spacy/tokens/span.pyi
+++ b/spacy/tokens/span.pyi
@ -74,6 +74,8 @@ class Span:
    @property
    def ents(self) -> Tuple[Span]: ...
    @property
    def sents(self) -> Tuple[Span]: ...
    @property
    def has_vector(self) -> bool: ...
    @property
    def vector(self) -> Floats1d: ...
@ -86,7 +88,7 @@ class Span:
    @property
    def text_with_ws(self) -> str: ...
    @property
-    def noun_chunks(self) -> Iterator[Span]: ...
+    def noun_chunks(self) -> Tuple[Span]: ...
    @property
    def root(self) -> Token: ...
    def char_span(
--- a/spacy/tokens/span.pyx
+++ b/spacy/tokens/span.pyx
@ -461,20 +461,21 @@ cdef class Span:
        """Obtain the sentences that contain this span. If the given span
        crosses sentence boundaries, return all sentences it is a part of.
-        RETURNS (Iterable[Span]): All sentences that the span is a part of.
+        RETURNS (Tuple[Span]): All sentences that the span is a part of.
-         DOCS: https://spacy.io/api/span#sents
+        DOCS: https://spacy.io/api/span#sents
        """
        cdef int start
        cdef int i
        if "sents" in self.doc.user_span_hooks:
-            yield from self.doc.user_span_hooks["sents"](self)
+            return tuple(self.doc.user_span_hooks["sents"](self))
-        elif "sents" in self.doc.user_hooks:
+        spans = []
        if "sents" in self.doc.user_hooks:
            for sentence in self.doc.user_hooks["sents"](self.doc):
                if sentence.end > self.start:
                    if sentence.start < self.end or sentence.start == self.start == self.end:
-                        yield sentence
+                        spans.append(sentence)
                    else:
                        break
        else:
@ -489,12 +490,13 @@ cdef class Span:
            # Now, find all the sentences in the span
            for i in range(start + 1, self.doc.length):
                if self.doc.c[i].sent_start == 1:
-                    yield Span(self.doc, start, i)
+                    spans.append(Span(self.doc, start, i))
                    start = i
                    if start >= self.end:
                        break
            if start < self.end:
-                yield Span(self.doc, start, self.end)
+                spans.append(Span(self.doc, start, self.end))
        return tuple(spans)
    @property
@ -502,7 +504,7 @@ cdef class Span:
        """The named entities that fall completely within the span. Returns
        a tuple of `Span` objects.
-        RETURNS (tuple): Entities in the span, one `Span` per entity.
+        RETURNS (Tuple[Span]): Entities in the span, one `Span` per entity.
        DOCS: https://spacy.io/api/span#ents
        """
@ -517,7 +519,7 @@ cdef class Span:
                    ents.append(ent)
                else:
                    break
-        return ents
+        return tuple(ents)
    @property
    def has_vector(self):
@ -613,13 +615,15 @@ cdef class Span:
        NP-level coordination, no prepositional phrases, and no relative
        clauses.
-        YIELDS (Span): Noun chunks in the span.
+        RETURNS (Tuple[Span]): Noun chunks in the span.
        DOCS: https://spacy.io/api/span#noun_chunks
        """
        spans = []
        for span in self.doc.noun_chunks:
            if span.start >= self.start and span.end <= self.end:
-                yield span
+                spans.append(span)
        return tuple(spans)
    @property
    def root(self):
--- a/website/docs/api/doc.mdx
+++ b/website/docs/api/doc.mdx
@ -654,11 +654,10 @@ the [`TextCategorizer`](/api/textcategorizer).
 ## Doc.noun_chunks {id="noun_chunks",tag="property",model="parser"}
-Iterate over the base noun phrases in the document. Yields base noun-phrase
+Returns a tuple of the base noun phrases in the doc, if the document has been
-`Span` objects, if the document has been syntactically parsed. A base noun
+syntactically parsed. A base noun phrase, or "NP chunk", is a noun phrase that
-phrase, or "NP chunk", is a noun phrase that does not permit other NPs to be
+does not permit other NPs to be nested within it – so no NP-level coordination,
-nested within it – so no NP-level coordination, no prepositional phrases, and no
+no prepositional phrases, and no relative clauses.
 relative clauses.
 To customize the noun chunk iterator in a loaded pipeline, modify
 [`nlp.vocab.get_noun_chunks`](/api/vocab#attributes). If the `noun_chunk`
@ -675,13 +674,13 @@ implemented for the given language, a `NotImplementedError` is raised.
 > assert chunks[1].text == "another phrase"
 > ```
-| Name       | Description                           |
+| Name        | Description                                  |
-| ---------- | ------------------------------------- |
+| ----------- | -------------------------------------------- |
-| **YIELDS** | Noun chunks in the document. ~~Span~~ |
+| **RETURNS** | Noun chunks in the document. ~~Tuple[Span]~~ |
 ## Doc.sents {id="sents",tag="property",model="sentences"}
-Iterate over the sentences in the document. Sentence spans have no label.
+Returns a tuple of the sentences in the document. Sentence spans have no label.
 This property is only available when
 [sentence boundaries](/usage/linguistic-features#sbd) have been set on the
@ -697,9 +696,9 @@ will raise an error otherwise.
 > assert [s.root.text for s in sents] == ["is", "'s"]
 > ```
-| Name       | Description                         |
+| Name        | Description                                |
-| ---------- | ----------------------------------- |
+| ----------- | ------------------------------------------ |
-| **YIELDS** | Sentences in the document. ~~Span~~ |
+| **RETURNS** | Sentences in the document. ~~Tuple[Span]~~ |
 ## Doc.has_vector {id="has_vector",tag="property",model="vectors"}
--- a/website/docs/api/span.mdx
+++ b/website/docs/api/span.mdx
@ -275,17 +275,16 @@ The named entities that fall completely within the span. Returns a tuple of
 > assert ents[0].text == "Mr. Best"
 > ```
-| Name        | Description                                                       |
+| Name        | Description                                                  |
-| ----------- | ----------------------------------------------------------------- |
+| ----------- | ------------------------------------------------------------ |
-| **RETURNS** | Entities in the span, one `Span` per entity. ~~Tuple[Span, ...]~~ |
+| **RETURNS** | Entities in the span, one `Span` per entity. ~~Tuple[Span]~~ |
 ## Span.noun_chunks {id="noun_chunks",tag="property",model="parser"}
-Iterate over the base noun phrases in the span. Yields base noun-phrase `Span`
+Returns a tuple of the base noun phrases in the span if the document has been
-objects, if the document has been syntactically parsed. A base noun phrase, or
+syntactically parsed. A base noun phrase, or "NP chunk", is a noun phrase that
-"NP chunk", is a noun phrase that does not permit other NPs to be nested within
+does not permit other NPs to be nested within it – so no NP-level coordination,
-it – so no NP-level coordination, no prepositional phrases, and no relative
+no prepositional phrases, and no relative clauses.
 clauses.
 If the `noun_chunk` [syntax iterator](/usage/linguistic-features#language-data)
 has not been implemeted for the given language, a `NotImplementedError` is
@ -301,9 +300,9 @@ raised.
 > assert chunks[0].text == "another phrase"
 > ```
-| Name       | Description                       |
+| Name        | Description                              |
-| ---------- | --------------------------------- |
+| ----------- | ---------------------------------------- |
-| **YIELDS** | Noun chunks in the span. ~~Span~~ |
+| **RETURNS** | Noun chunks in the span. ~~Tuple[Span]~~ |
 ## Span.as_doc {id="as_doc",tag="method"}
@ -525,9 +524,9 @@ sent = doc[sent.start : max(sent.end, span.end)]
 ## Span.sents {id="sents",tag="property",model="sentences",version="3.2.1"}
-Returns a generator over the sentences the span belongs to. This property is
+Returns a tuple of the sentences the span belongs to. This property is only
-only available when [sentence boundaries](/usage/linguistic-features#sbd) have
+available when [sentence boundaries](/usage/linguistic-features#sbd) have been
-been set on the document by the `parser`, `senter`, `sentencizer` or some custom
+set on the document by the `parser`, `senter`, `sentencizer` or some custom
 function. It will raise an error otherwise.
 If the span happens to cross sentence boundaries, all sentences the span
@ -541,9 +540,9 @@ overlaps with will be returned.
 > assert len(span.sents) == 2
 > ```
-| Name        | Description                                                                |
+| Name        | Description                                                   |
-| ----------- | -------------------------------------------------------------------------- |
+| ----------- | ------------------------------------------------------------- |
-| **RETURNS** | A generator yielding sentences this `Span` is a part of ~~Iterable[Span]~~ |
+| **RETURNS** | A tuple of sentences this `Span` is a part of ~~Tuple[Span]~~ |
 ## Attributes {id="attributes"}