mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-25 17:36:30 +03:00
Return Tuple[Span] for all Doc/Span attrs that provide spans (#12288)
* Return Tuple[Span] for all Doc/Span attrs that provide spans * Update Span types
This commit is contained in:
parent
df4c069a13
commit
da75896ef5
|
@ -131,9 +131,9 @@ class Doc:
|
|||
default: str = ...,
|
||||
) -> None: ...
|
||||
@property
|
||||
def noun_chunks(self) -> Iterator[Span]: ...
|
||||
def noun_chunks(self) -> Tuple[Span]: ...
|
||||
@property
|
||||
def sents(self) -> Iterator[Span]: ...
|
||||
def sents(self) -> Tuple[Span]: ...
|
||||
@property
|
||||
def lang(self) -> int: ...
|
||||
@property
|
||||
|
|
|
@ -703,10 +703,10 @@ cdef class Doc:
|
|||
return self.text
|
||||
|
||||
property ents:
|
||||
"""The named entities in the document. Returns a tuple of named entity
|
||||
"""The named entities in the document. Returns a list of named entity
|
||||
`Span` objects, if the entity recognizer has been applied.
|
||||
|
||||
RETURNS (tuple): Entities in the document, one `Span` per entity.
|
||||
RETURNS (Tuple[Span]): Entities in the document, one `Span` per entity.
|
||||
|
||||
DOCS: https://spacy.io/api/doc#ents
|
||||
"""
|
||||
|
@ -864,7 +864,7 @@ cdef class Doc:
|
|||
NP-level coordination, no prepositional phrases, and no relative
|
||||
clauses.
|
||||
|
||||
YIELDS (Span): Noun chunks in the document.
|
||||
RETURNS (Tuple[Span]): Noun chunks in the document.
|
||||
|
||||
DOCS: https://spacy.io/api/doc#noun_chunks
|
||||
"""
|
||||
|
@ -873,36 +873,35 @@ cdef class Doc:
|
|||
|
||||
# Accumulate the result before beginning to iterate over it. This
|
||||
# prevents the tokenization from being changed out from under us
|
||||
# during the iteration. The tricky thing here is that Span accepts
|
||||
# its tokenization changing, so it's okay once we have the Span
|
||||
# objects. See Issue #375.
|
||||
# during the iteration.
|
||||
spans = []
|
||||
for start, end, label in self.noun_chunks_iterator(self):
|
||||
spans.append(Span(self, start, end, label=label))
|
||||
for span in spans:
|
||||
yield span
|
||||
return tuple(spans)
|
||||
|
||||
@property
|
||||
def sents(self):
|
||||
"""Iterate over the sentences in the document. Yields sentence `Span`
|
||||
objects. Sentence spans have no label.
|
||||
|
||||
YIELDS (Span): Sentences in the document.
|
||||
RETURNS (Tuple[Span]): Sentences in the document.
|
||||
|
||||
DOCS: https://spacy.io/api/doc#sents
|
||||
"""
|
||||
if not self.has_annotation("SENT_START"):
|
||||
raise ValueError(Errors.E030)
|
||||
if "sents" in self.user_hooks:
|
||||
yield from self.user_hooks["sents"](self)
|
||||
return tuple(self.user_hooks["sents"](self))
|
||||
else:
|
||||
start = 0
|
||||
spans = []
|
||||
for i in range(1, self.length):
|
||||
if self.c[i].sent_start == 1:
|
||||
yield Span(self, start, i)
|
||||
spans.append(Span(self, start, i))
|
||||
start = i
|
||||
if start != self.length:
|
||||
yield Span(self, start, self.length)
|
||||
spans.append(Span(self, start, self.length))
|
||||
return tuple(spans)
|
||||
|
||||
@property
|
||||
def lang(self):
|
||||
|
|
|
@ -74,6 +74,8 @@ class Span:
|
|||
@property
|
||||
def ents(self) -> Tuple[Span]: ...
|
||||
@property
|
||||
def sents(self) -> Tuple[Span]: ...
|
||||
@property
|
||||
def has_vector(self) -> bool: ...
|
||||
@property
|
||||
def vector(self) -> Floats1d: ...
|
||||
|
@ -86,7 +88,7 @@ class Span:
|
|||
@property
|
||||
def text_with_ws(self) -> str: ...
|
||||
@property
|
||||
def noun_chunks(self) -> Iterator[Span]: ...
|
||||
def noun_chunks(self) -> Tuple[Span]: ...
|
||||
@property
|
||||
def root(self) -> Token: ...
|
||||
def char_span(
|
||||
|
|
|
@ -461,20 +461,21 @@ cdef class Span:
|
|||
"""Obtain the sentences that contain this span. If the given span
|
||||
crosses sentence boundaries, return all sentences it is a part of.
|
||||
|
||||
RETURNS (Iterable[Span]): All sentences that the span is a part of.
|
||||
RETURNS (Tuple[Span]): All sentences that the span is a part of.
|
||||
|
||||
DOCS: https://spacy.io/api/span#sents
|
||||
DOCS: https://spacy.io/api/span#sents
|
||||
"""
|
||||
cdef int start
|
||||
cdef int i
|
||||
|
||||
if "sents" in self.doc.user_span_hooks:
|
||||
yield from self.doc.user_span_hooks["sents"](self)
|
||||
elif "sents" in self.doc.user_hooks:
|
||||
return tuple(self.doc.user_span_hooks["sents"](self))
|
||||
spans = []
|
||||
if "sents" in self.doc.user_hooks:
|
||||
for sentence in self.doc.user_hooks["sents"](self.doc):
|
||||
if sentence.end > self.start:
|
||||
if sentence.start < self.end or sentence.start == self.start == self.end:
|
||||
yield sentence
|
||||
spans.append(sentence)
|
||||
else:
|
||||
break
|
||||
else:
|
||||
|
@ -489,12 +490,13 @@ cdef class Span:
|
|||
# Now, find all the sentences in the span
|
||||
for i in range(start + 1, self.doc.length):
|
||||
if self.doc.c[i].sent_start == 1:
|
||||
yield Span(self.doc, start, i)
|
||||
spans.append(Span(self.doc, start, i))
|
||||
start = i
|
||||
if start >= self.end:
|
||||
break
|
||||
if start < self.end:
|
||||
yield Span(self.doc, start, self.end)
|
||||
spans.append(Span(self.doc, start, self.end))
|
||||
return tuple(spans)
|
||||
|
||||
|
||||
@property
|
||||
|
@ -502,7 +504,7 @@ cdef class Span:
|
|||
"""The named entities that fall completely within the span. Returns
|
||||
a tuple of `Span` objects.
|
||||
|
||||
RETURNS (tuple): Entities in the span, one `Span` per entity.
|
||||
RETURNS (Tuple[Span]): Entities in the span, one `Span` per entity.
|
||||
|
||||
DOCS: https://spacy.io/api/span#ents
|
||||
"""
|
||||
|
@ -517,7 +519,7 @@ cdef class Span:
|
|||
ents.append(ent)
|
||||
else:
|
||||
break
|
||||
return ents
|
||||
return tuple(ents)
|
||||
|
||||
@property
|
||||
def has_vector(self):
|
||||
|
@ -613,13 +615,15 @@ cdef class Span:
|
|||
NP-level coordination, no prepositional phrases, and no relative
|
||||
clauses.
|
||||
|
||||
YIELDS (Span): Noun chunks in the span.
|
||||
RETURNS (Tuple[Span]): Noun chunks in the span.
|
||||
|
||||
DOCS: https://spacy.io/api/span#noun_chunks
|
||||
"""
|
||||
spans = []
|
||||
for span in self.doc.noun_chunks:
|
||||
if span.start >= self.start and span.end <= self.end:
|
||||
yield span
|
||||
spans.append(span)
|
||||
return tuple(spans)
|
||||
|
||||
@property
|
||||
def root(self):
|
||||
|
|
|
@ -654,11 +654,10 @@ the [`TextCategorizer`](/api/textcategorizer).
|
|||
|
||||
## Doc.noun_chunks {id="noun_chunks",tag="property",model="parser"}
|
||||
|
||||
Iterate over the base noun phrases in the document. Yields base noun-phrase
|
||||
`Span` objects, if the document has been syntactically parsed. A base noun
|
||||
phrase, or "NP chunk", is a noun phrase that does not permit other NPs to be
|
||||
nested within it – so no NP-level coordination, no prepositional phrases, and no
|
||||
relative clauses.
|
||||
Returns a tuple of the base noun phrases in the doc, if the document has been
|
||||
syntactically parsed. A base noun phrase, or "NP chunk", is a noun phrase that
|
||||
does not permit other NPs to be nested within it – so no NP-level coordination,
|
||||
no prepositional phrases, and no relative clauses.
|
||||
|
||||
To customize the noun chunk iterator in a loaded pipeline, modify
|
||||
[`nlp.vocab.get_noun_chunks`](/api/vocab#attributes). If the `noun_chunk`
|
||||
|
@ -675,13 +674,13 @@ implemented for the given language, a `NotImplementedError` is raised.
|
|||
> assert chunks[1].text == "another phrase"
|
||||
> ```
|
||||
|
||||
| Name | Description |
|
||||
| ---------- | ------------------------------------- |
|
||||
| **YIELDS** | Noun chunks in the document. ~~Span~~ |
|
||||
| Name | Description |
|
||||
| ----------- | -------------------------------------------- |
|
||||
| **RETURNS** | Noun chunks in the document. ~~Tuple[Span]~~ |
|
||||
|
||||
## Doc.sents {id="sents",tag="property",model="sentences"}
|
||||
|
||||
Iterate over the sentences in the document. Sentence spans have no label.
|
||||
Returns a tuple of the sentences in the document. Sentence spans have no label.
|
||||
|
||||
This property is only available when
|
||||
[sentence boundaries](/usage/linguistic-features#sbd) have been set on the
|
||||
|
@ -697,9 +696,9 @@ will raise an error otherwise.
|
|||
> assert [s.root.text for s in sents] == ["is", "'s"]
|
||||
> ```
|
||||
|
||||
| Name | Description |
|
||||
| ---------- | ----------------------------------- |
|
||||
| **YIELDS** | Sentences in the document. ~~Span~~ |
|
||||
| Name | Description |
|
||||
| ----------- | ------------------------------------------ |
|
||||
| **RETURNS** | Sentences in the document. ~~Tuple[Span]~~ |
|
||||
|
||||
## Doc.has_vector {id="has_vector",tag="property",model="vectors"}
|
||||
|
||||
|
|
|
@ -275,17 +275,16 @@ The named entities that fall completely within the span. Returns a tuple of
|
|||
> assert ents[0].text == "Mr. Best"
|
||||
> ```
|
||||
|
||||
| Name | Description |
|
||||
| ----------- | ----------------------------------------------------------------- |
|
||||
| **RETURNS** | Entities in the span, one `Span` per entity. ~~Tuple[Span, ...]~~ |
|
||||
| Name | Description |
|
||||
| ----------- | ------------------------------------------------------------ |
|
||||
| **RETURNS** | Entities in the span, one `Span` per entity. ~~Tuple[Span]~~ |
|
||||
|
||||
## Span.noun_chunks {id="noun_chunks",tag="property",model="parser"}
|
||||
|
||||
Iterate over the base noun phrases in the span. Yields base noun-phrase `Span`
|
||||
objects, if the document has been syntactically parsed. A base noun phrase, or
|
||||
"NP chunk", is a noun phrase that does not permit other NPs to be nested within
|
||||
it – so no NP-level coordination, no prepositional phrases, and no relative
|
||||
clauses.
|
||||
Returns a tuple of the base noun phrases in the span if the document has been
|
||||
syntactically parsed. A base noun phrase, or "NP chunk", is a noun phrase that
|
||||
does not permit other NPs to be nested within it – so no NP-level coordination,
|
||||
no prepositional phrases, and no relative clauses.
|
||||
|
||||
If the `noun_chunk` [syntax iterator](/usage/linguistic-features#language-data)
|
||||
has not been implemeted for the given language, a `NotImplementedError` is
|
||||
|
@ -301,9 +300,9 @@ raised.
|
|||
> assert chunks[0].text == "another phrase"
|
||||
> ```
|
||||
|
||||
| Name | Description |
|
||||
| ---------- | --------------------------------- |
|
||||
| **YIELDS** | Noun chunks in the span. ~~Span~~ |
|
||||
| Name | Description |
|
||||
| ----------- | ---------------------------------------- |
|
||||
| **RETURNS** | Noun chunks in the span. ~~Tuple[Span]~~ |
|
||||
|
||||
## Span.as_doc {id="as_doc",tag="method"}
|
||||
|
||||
|
@ -525,9 +524,9 @@ sent = doc[sent.start : max(sent.end, span.end)]
|
|||
|
||||
## Span.sents {id="sents",tag="property",model="sentences",version="3.2.1"}
|
||||
|
||||
Returns a generator over the sentences the span belongs to. This property is
|
||||
only available when [sentence boundaries](/usage/linguistic-features#sbd) have
|
||||
been set on the document by the `parser`, `senter`, `sentencizer` or some custom
|
||||
Returns a tuple of the sentences the span belongs to. This property is only
|
||||
available when [sentence boundaries](/usage/linguistic-features#sbd) have been
|
||||
set on the document by the `parser`, `senter`, `sentencizer` or some custom
|
||||
function. It will raise an error otherwise.
|
||||
|
||||
If the span happens to cross sentence boundaries, all sentences the span
|
||||
|
@ -541,9 +540,9 @@ overlaps with will be returned.
|
|||
> assert len(span.sents) == 2
|
||||
> ```
|
||||
|
||||
| Name | Description |
|
||||
| ----------- | -------------------------------------------------------------------------- |
|
||||
| **RETURNS** | A generator yielding sentences this `Span` is a part of ~~Iterable[Span]~~ |
|
||||
| Name | Description |
|
||||
| ----------- | ------------------------------------------------------------- |
|
||||
| **RETURNS** | A tuple of sentences this `Span` is a part of ~~Tuple[Span]~~ |
|
||||
|
||||
## Attributes {id="attributes"}
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user