mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-27 18:36:36 +03:00
Return Tuple[Span] for all Doc/Span attrs that provide spans (#12288)
* Return Tuple[Span] for all Doc/Span attrs that provide spans * Update Span types
This commit is contained in:
parent
df4c069a13
commit
da75896ef5
|
@ -131,9 +131,9 @@ class Doc:
|
||||||
default: str = ...,
|
default: str = ...,
|
||||||
) -> None: ...
|
) -> None: ...
|
||||||
@property
|
@property
|
||||||
def noun_chunks(self) -> Iterator[Span]: ...
|
def noun_chunks(self) -> Tuple[Span]: ...
|
||||||
@property
|
@property
|
||||||
def sents(self) -> Iterator[Span]: ...
|
def sents(self) -> Tuple[Span]: ...
|
||||||
@property
|
@property
|
||||||
def lang(self) -> int: ...
|
def lang(self) -> int: ...
|
||||||
@property
|
@property
|
||||||
|
|
|
@ -703,10 +703,10 @@ cdef class Doc:
|
||||||
return self.text
|
return self.text
|
||||||
|
|
||||||
property ents:
|
property ents:
|
||||||
"""The named entities in the document. Returns a tuple of named entity
|
"""The named entities in the document. Returns a list of named entity
|
||||||
`Span` objects, if the entity recognizer has been applied.
|
`Span` objects, if the entity recognizer has been applied.
|
||||||
|
|
||||||
RETURNS (tuple): Entities in the document, one `Span` per entity.
|
RETURNS (Tuple[Span]): Entities in the document, one `Span` per entity.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/doc#ents
|
DOCS: https://spacy.io/api/doc#ents
|
||||||
"""
|
"""
|
||||||
|
@ -864,7 +864,7 @@ cdef class Doc:
|
||||||
NP-level coordination, no prepositional phrases, and no relative
|
NP-level coordination, no prepositional phrases, and no relative
|
||||||
clauses.
|
clauses.
|
||||||
|
|
||||||
YIELDS (Span): Noun chunks in the document.
|
RETURNS (Tuple[Span]): Noun chunks in the document.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/doc#noun_chunks
|
DOCS: https://spacy.io/api/doc#noun_chunks
|
||||||
"""
|
"""
|
||||||
|
@ -873,36 +873,35 @@ cdef class Doc:
|
||||||
|
|
||||||
# Accumulate the result before beginning to iterate over it. This
|
# Accumulate the result before beginning to iterate over it. This
|
||||||
# prevents the tokenization from being changed out from under us
|
# prevents the tokenization from being changed out from under us
|
||||||
# during the iteration. The tricky thing here is that Span accepts
|
# during the iteration.
|
||||||
# its tokenization changing, so it's okay once we have the Span
|
|
||||||
# objects. See Issue #375.
|
|
||||||
spans = []
|
spans = []
|
||||||
for start, end, label in self.noun_chunks_iterator(self):
|
for start, end, label in self.noun_chunks_iterator(self):
|
||||||
spans.append(Span(self, start, end, label=label))
|
spans.append(Span(self, start, end, label=label))
|
||||||
for span in spans:
|
return tuple(spans)
|
||||||
yield span
|
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def sents(self):
|
def sents(self):
|
||||||
"""Iterate over the sentences in the document. Yields sentence `Span`
|
"""Iterate over the sentences in the document. Yields sentence `Span`
|
||||||
objects. Sentence spans have no label.
|
objects. Sentence spans have no label.
|
||||||
|
|
||||||
YIELDS (Span): Sentences in the document.
|
RETURNS (Tuple[Span]): Sentences in the document.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/doc#sents
|
DOCS: https://spacy.io/api/doc#sents
|
||||||
"""
|
"""
|
||||||
if not self.has_annotation("SENT_START"):
|
if not self.has_annotation("SENT_START"):
|
||||||
raise ValueError(Errors.E030)
|
raise ValueError(Errors.E030)
|
||||||
if "sents" in self.user_hooks:
|
if "sents" in self.user_hooks:
|
||||||
yield from self.user_hooks["sents"](self)
|
return tuple(self.user_hooks["sents"](self))
|
||||||
else:
|
else:
|
||||||
start = 0
|
start = 0
|
||||||
|
spans = []
|
||||||
for i in range(1, self.length):
|
for i in range(1, self.length):
|
||||||
if self.c[i].sent_start == 1:
|
if self.c[i].sent_start == 1:
|
||||||
yield Span(self, start, i)
|
spans.append(Span(self, start, i))
|
||||||
start = i
|
start = i
|
||||||
if start != self.length:
|
if start != self.length:
|
||||||
yield Span(self, start, self.length)
|
spans.append(Span(self, start, self.length))
|
||||||
|
return tuple(spans)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def lang(self):
|
def lang(self):
|
||||||
|
|
|
@ -74,6 +74,8 @@ class Span:
|
||||||
@property
|
@property
|
||||||
def ents(self) -> Tuple[Span]: ...
|
def ents(self) -> Tuple[Span]: ...
|
||||||
@property
|
@property
|
||||||
|
def sents(self) -> Tuple[Span]: ...
|
||||||
|
@property
|
||||||
def has_vector(self) -> bool: ...
|
def has_vector(self) -> bool: ...
|
||||||
@property
|
@property
|
||||||
def vector(self) -> Floats1d: ...
|
def vector(self) -> Floats1d: ...
|
||||||
|
@ -86,7 +88,7 @@ class Span:
|
||||||
@property
|
@property
|
||||||
def text_with_ws(self) -> str: ...
|
def text_with_ws(self) -> str: ...
|
||||||
@property
|
@property
|
||||||
def noun_chunks(self) -> Iterator[Span]: ...
|
def noun_chunks(self) -> Tuple[Span]: ...
|
||||||
@property
|
@property
|
||||||
def root(self) -> Token: ...
|
def root(self) -> Token: ...
|
||||||
def char_span(
|
def char_span(
|
||||||
|
|
|
@ -461,20 +461,21 @@ cdef class Span:
|
||||||
"""Obtain the sentences that contain this span. If the given span
|
"""Obtain the sentences that contain this span. If the given span
|
||||||
crosses sentence boundaries, return all sentences it is a part of.
|
crosses sentence boundaries, return all sentences it is a part of.
|
||||||
|
|
||||||
RETURNS (Iterable[Span]): All sentences that the span is a part of.
|
RETURNS (Tuple[Span]): All sentences that the span is a part of.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/span#sents
|
DOCS: https://spacy.io/api/span#sents
|
||||||
"""
|
"""
|
||||||
cdef int start
|
cdef int start
|
||||||
cdef int i
|
cdef int i
|
||||||
|
|
||||||
if "sents" in self.doc.user_span_hooks:
|
if "sents" in self.doc.user_span_hooks:
|
||||||
yield from self.doc.user_span_hooks["sents"](self)
|
return tuple(self.doc.user_span_hooks["sents"](self))
|
||||||
elif "sents" in self.doc.user_hooks:
|
spans = []
|
||||||
|
if "sents" in self.doc.user_hooks:
|
||||||
for sentence in self.doc.user_hooks["sents"](self.doc):
|
for sentence in self.doc.user_hooks["sents"](self.doc):
|
||||||
if sentence.end > self.start:
|
if sentence.end > self.start:
|
||||||
if sentence.start < self.end or sentence.start == self.start == self.end:
|
if sentence.start < self.end or sentence.start == self.start == self.end:
|
||||||
yield sentence
|
spans.append(sentence)
|
||||||
else:
|
else:
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
|
@ -489,12 +490,13 @@ cdef class Span:
|
||||||
# Now, find all the sentences in the span
|
# Now, find all the sentences in the span
|
||||||
for i in range(start + 1, self.doc.length):
|
for i in range(start + 1, self.doc.length):
|
||||||
if self.doc.c[i].sent_start == 1:
|
if self.doc.c[i].sent_start == 1:
|
||||||
yield Span(self.doc, start, i)
|
spans.append(Span(self.doc, start, i))
|
||||||
start = i
|
start = i
|
||||||
if start >= self.end:
|
if start >= self.end:
|
||||||
break
|
break
|
||||||
if start < self.end:
|
if start < self.end:
|
||||||
yield Span(self.doc, start, self.end)
|
spans.append(Span(self.doc, start, self.end))
|
||||||
|
return tuple(spans)
|
||||||
|
|
||||||
|
|
||||||
@property
|
@property
|
||||||
|
@ -502,7 +504,7 @@ cdef class Span:
|
||||||
"""The named entities that fall completely within the span. Returns
|
"""The named entities that fall completely within the span. Returns
|
||||||
a tuple of `Span` objects.
|
a tuple of `Span` objects.
|
||||||
|
|
||||||
RETURNS (tuple): Entities in the span, one `Span` per entity.
|
RETURNS (Tuple[Span]): Entities in the span, one `Span` per entity.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/span#ents
|
DOCS: https://spacy.io/api/span#ents
|
||||||
"""
|
"""
|
||||||
|
@ -517,7 +519,7 @@ cdef class Span:
|
||||||
ents.append(ent)
|
ents.append(ent)
|
||||||
else:
|
else:
|
||||||
break
|
break
|
||||||
return ents
|
return tuple(ents)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def has_vector(self):
|
def has_vector(self):
|
||||||
|
@ -613,13 +615,15 @@ cdef class Span:
|
||||||
NP-level coordination, no prepositional phrases, and no relative
|
NP-level coordination, no prepositional phrases, and no relative
|
||||||
clauses.
|
clauses.
|
||||||
|
|
||||||
YIELDS (Span): Noun chunks in the span.
|
RETURNS (Tuple[Span]): Noun chunks in the span.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/span#noun_chunks
|
DOCS: https://spacy.io/api/span#noun_chunks
|
||||||
"""
|
"""
|
||||||
|
spans = []
|
||||||
for span in self.doc.noun_chunks:
|
for span in self.doc.noun_chunks:
|
||||||
if span.start >= self.start and span.end <= self.end:
|
if span.start >= self.start and span.end <= self.end:
|
||||||
yield span
|
spans.append(span)
|
||||||
|
return tuple(spans)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def root(self):
|
def root(self):
|
||||||
|
|
|
@ -654,11 +654,10 @@ the [`TextCategorizer`](/api/textcategorizer).
|
||||||
|
|
||||||
## Doc.noun_chunks {id="noun_chunks",tag="property",model="parser"}
|
## Doc.noun_chunks {id="noun_chunks",tag="property",model="parser"}
|
||||||
|
|
||||||
Iterate over the base noun phrases in the document. Yields base noun-phrase
|
Returns a tuple of the base noun phrases in the doc, if the document has been
|
||||||
`Span` objects, if the document has been syntactically parsed. A base noun
|
syntactically parsed. A base noun phrase, or "NP chunk", is a noun phrase that
|
||||||
phrase, or "NP chunk", is a noun phrase that does not permit other NPs to be
|
does not permit other NPs to be nested within it – so no NP-level coordination,
|
||||||
nested within it – so no NP-level coordination, no prepositional phrases, and no
|
no prepositional phrases, and no relative clauses.
|
||||||
relative clauses.
|
|
||||||
|
|
||||||
To customize the noun chunk iterator in a loaded pipeline, modify
|
To customize the noun chunk iterator in a loaded pipeline, modify
|
||||||
[`nlp.vocab.get_noun_chunks`](/api/vocab#attributes). If the `noun_chunk`
|
[`nlp.vocab.get_noun_chunks`](/api/vocab#attributes). If the `noun_chunk`
|
||||||
|
@ -675,13 +674,13 @@ implemented for the given language, a `NotImplementedError` is raised.
|
||||||
> assert chunks[1].text == "another phrase"
|
> assert chunks[1].text == "another phrase"
|
||||||
> ```
|
> ```
|
||||||
|
|
||||||
| Name | Description |
|
| Name | Description |
|
||||||
| ---------- | ------------------------------------- |
|
| ----------- | -------------------------------------------- |
|
||||||
| **YIELDS** | Noun chunks in the document. ~~Span~~ |
|
| **RETURNS** | Noun chunks in the document. ~~Tuple[Span]~~ |
|
||||||
|
|
||||||
## Doc.sents {id="sents",tag="property",model="sentences"}
|
## Doc.sents {id="sents",tag="property",model="sentences"}
|
||||||
|
|
||||||
Iterate over the sentences in the document. Sentence spans have no label.
|
Returns a tuple of the sentences in the document. Sentence spans have no label.
|
||||||
|
|
||||||
This property is only available when
|
This property is only available when
|
||||||
[sentence boundaries](/usage/linguistic-features#sbd) have been set on the
|
[sentence boundaries](/usage/linguistic-features#sbd) have been set on the
|
||||||
|
@ -697,9 +696,9 @@ will raise an error otherwise.
|
||||||
> assert [s.root.text for s in sents] == ["is", "'s"]
|
> assert [s.root.text for s in sents] == ["is", "'s"]
|
||||||
> ```
|
> ```
|
||||||
|
|
||||||
| Name | Description |
|
| Name | Description |
|
||||||
| ---------- | ----------------------------------- |
|
| ----------- | ------------------------------------------ |
|
||||||
| **YIELDS** | Sentences in the document. ~~Span~~ |
|
| **RETURNS** | Sentences in the document. ~~Tuple[Span]~~ |
|
||||||
|
|
||||||
## Doc.has_vector {id="has_vector",tag="property",model="vectors"}
|
## Doc.has_vector {id="has_vector",tag="property",model="vectors"}
|
||||||
|
|
||||||
|
|
|
@ -275,17 +275,16 @@ The named entities that fall completely within the span. Returns a tuple of
|
||||||
> assert ents[0].text == "Mr. Best"
|
> assert ents[0].text == "Mr. Best"
|
||||||
> ```
|
> ```
|
||||||
|
|
||||||
| Name | Description |
|
| Name | Description |
|
||||||
| ----------- | ----------------------------------------------------------------- |
|
| ----------- | ------------------------------------------------------------ |
|
||||||
| **RETURNS** | Entities in the span, one `Span` per entity. ~~Tuple[Span, ...]~~ |
|
| **RETURNS** | Entities in the span, one `Span` per entity. ~~Tuple[Span]~~ |
|
||||||
|
|
||||||
## Span.noun_chunks {id="noun_chunks",tag="property",model="parser"}
|
## Span.noun_chunks {id="noun_chunks",tag="property",model="parser"}
|
||||||
|
|
||||||
Iterate over the base noun phrases in the span. Yields base noun-phrase `Span`
|
Returns a tuple of the base noun phrases in the span if the document has been
|
||||||
objects, if the document has been syntactically parsed. A base noun phrase, or
|
syntactically parsed. A base noun phrase, or "NP chunk", is a noun phrase that
|
||||||
"NP chunk", is a noun phrase that does not permit other NPs to be nested within
|
does not permit other NPs to be nested within it – so no NP-level coordination,
|
||||||
it – so no NP-level coordination, no prepositional phrases, and no relative
|
no prepositional phrases, and no relative clauses.
|
||||||
clauses.
|
|
||||||
|
|
||||||
If the `noun_chunk` [syntax iterator](/usage/linguistic-features#language-data)
|
If the `noun_chunk` [syntax iterator](/usage/linguistic-features#language-data)
|
||||||
has not been implemeted for the given language, a `NotImplementedError` is
|
has not been implemeted for the given language, a `NotImplementedError` is
|
||||||
|
@ -301,9 +300,9 @@ raised.
|
||||||
> assert chunks[0].text == "another phrase"
|
> assert chunks[0].text == "another phrase"
|
||||||
> ```
|
> ```
|
||||||
|
|
||||||
| Name | Description |
|
| Name | Description |
|
||||||
| ---------- | --------------------------------- |
|
| ----------- | ---------------------------------------- |
|
||||||
| **YIELDS** | Noun chunks in the span. ~~Span~~ |
|
| **RETURNS** | Noun chunks in the span. ~~Tuple[Span]~~ |
|
||||||
|
|
||||||
## Span.as_doc {id="as_doc",tag="method"}
|
## Span.as_doc {id="as_doc",tag="method"}
|
||||||
|
|
||||||
|
@ -525,9 +524,9 @@ sent = doc[sent.start : max(sent.end, span.end)]
|
||||||
|
|
||||||
## Span.sents {id="sents",tag="property",model="sentences",version="3.2.1"}
|
## Span.sents {id="sents",tag="property",model="sentences",version="3.2.1"}
|
||||||
|
|
||||||
Returns a generator over the sentences the span belongs to. This property is
|
Returns a tuple of the sentences the span belongs to. This property is only
|
||||||
only available when [sentence boundaries](/usage/linguistic-features#sbd) have
|
available when [sentence boundaries](/usage/linguistic-features#sbd) have been
|
||||||
been set on the document by the `parser`, `senter`, `sentencizer` or some custom
|
set on the document by the `parser`, `senter`, `sentencizer` or some custom
|
||||||
function. It will raise an error otherwise.
|
function. It will raise an error otherwise.
|
||||||
|
|
||||||
If the span happens to cross sentence boundaries, all sentences the span
|
If the span happens to cross sentence boundaries, all sentences the span
|
||||||
|
@ -541,9 +540,9 @@ overlaps with will be returned.
|
||||||
> assert len(span.sents) == 2
|
> assert len(span.sents) == 2
|
||||||
> ```
|
> ```
|
||||||
|
|
||||||
| Name | Description |
|
| Name | Description |
|
||||||
| ----------- | -------------------------------------------------------------------------- |
|
| ----------- | ------------------------------------------------------------- |
|
||||||
| **RETURNS** | A generator yielding sentences this `Span` is a part of ~~Iterable[Span]~~ |
|
| **RETURNS** | A tuple of sentences this `Span` is a part of ~~Tuple[Span]~~ |
|
||||||
|
|
||||||
## Attributes {id="attributes"}
|
## Attributes {id="attributes"}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user