mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-25 09:26:27 +03:00
Update docstrings for Span class
This commit is contained in:
parent
b87066ff10
commit
593361ee3c
|
@ -20,22 +20,17 @@ from .. import about
|
||||||
|
|
||||||
|
|
||||||
cdef class Span:
|
cdef class Span:
|
||||||
"""
|
"""A slice from a Doc object."""
|
||||||
A slice from a Doc object.
|
|
||||||
"""
|
|
||||||
def __cinit__(self, Doc doc, int start, int end, int label=0, vector=None,
|
def __cinit__(self, Doc doc, int start, int end, int label=0, vector=None,
|
||||||
vector_norm=None):
|
vector_norm=None):
|
||||||
"""
|
"""Create a `Span` object from the slice `doc[start : end]`.
|
||||||
Create a Span object from the slice doc[start : end]
|
|
||||||
|
|
||||||
Arguments:
|
doc (Doc): The parent document.
|
||||||
doc (Doc): The parent document.
|
start (int): The index of the first token of the span.
|
||||||
start (int): The index of the first token of the span.
|
end (int): The index of the first token after the span.
|
||||||
end (int): The index of the first token after the span.
|
label (int): A label to attach to the Span, e.g. for named entities.
|
||||||
label (int): A label to attach to the Span, e.g. for named entities.
|
vector (ndarray[ndim=1, dtype='float32']): A meaning representation of the span.
|
||||||
vector (ndarray[ndim=1, dtype='float32']): A meaning representation of the span.
|
RETURNS (Span): The newly constructed object.
|
||||||
Returns:
|
|
||||||
Span The newly constructed object.
|
|
||||||
"""
|
"""
|
||||||
if not (0 <= start <= end <= len(doc)):
|
if not (0 <= start <= end <= len(doc)):
|
||||||
raise IndexError
|
raise IndexError
|
||||||
|
@ -70,7 +65,6 @@ cdef class Span:
|
||||||
def __hash__(self):
|
def __hash__(self):
|
||||||
return hash((self.doc, self.label, self.start_char, self.end_char))
|
return hash((self.doc, self.label, self.start_char, self.end_char))
|
||||||
|
|
||||||
|
|
||||||
def __len__(self):
|
def __len__(self):
|
||||||
self._recalculate_indices()
|
self._recalculate_indices()
|
||||||
if self.end < self.start:
|
if self.end < self.start:
|
||||||
|
@ -99,30 +93,21 @@ cdef class Span:
|
||||||
yield self.doc[i]
|
yield self.doc[i]
|
||||||
|
|
||||||
def merge(self, *args, **attributes):
|
def merge(self, *args, **attributes):
|
||||||
"""
|
"""Retokenize the document, such that the span is merged into a single token.
|
||||||
Retokenize the document, such that the span is merged into a single token.
|
|
||||||
|
|
||||||
Arguments:
|
**attributes: Attributes to assign to the merged token. By default,
|
||||||
**attributes:
|
attributes are inherited from the syntactic root token of the span.
|
||||||
Attributes to assign to the merged token. By default, attributes
|
RETURNS (Token): The newly merged token.
|
||||||
are inherited from the syntactic root token of the span.
|
|
||||||
Returns:
|
|
||||||
token (Token):
|
|
||||||
The newly merged token.
|
|
||||||
"""
|
"""
|
||||||
return self.doc.merge(self.start_char, self.end_char, *args, **attributes)
|
return self.doc.merge(self.start_char, self.end_char, *args, **attributes)
|
||||||
|
|
||||||
def similarity(self, other):
|
def similarity(self, other):
|
||||||
"""
|
""" Make a semantic similarity estimate. The default estimate is cosine
|
||||||
Make a semantic similarity estimate. The default estimate is cosine
|
|
||||||
similarity using an average of word vectors.
|
similarity using an average of word vectors.
|
||||||
|
|
||||||
Arguments:
|
other (object): The object to compare with. By default, accepts `Doc`,
|
||||||
other (object): The object to compare with. By default, accepts Doc,
|
`Span`, `Token` and `Lexeme` objects.
|
||||||
Span, Token and Lexeme objects.
|
RETURNS (float): A scalar similarity score. Higher is more similar.
|
||||||
|
|
||||||
Return:
|
|
||||||
score (float): A scalar similarity score. Higher is more similar.
|
|
||||||
"""
|
"""
|
||||||
if 'similarity' in self.doc.user_span_hooks:
|
if 'similarity' in self.doc.user_span_hooks:
|
||||||
self.doc.user_span_hooks['similarity'](self, other)
|
self.doc.user_span_hooks['similarity'](self, other)
|
||||||
|
@ -145,11 +130,9 @@ cdef class Span:
|
||||||
self.end = end + 1
|
self.end = end + 1
|
||||||
|
|
||||||
property sent:
|
property sent:
|
||||||
"""
|
"""The sentence span that this span is a part of.
|
||||||
The sentence span that this span is a part of.
|
|
||||||
|
|
||||||
Returns:
|
RETURNS (Span): The sentence span that the span is a part of.
|
||||||
Span The sentence this is part of.
|
|
||||||
"""
|
"""
|
||||||
def __get__(self):
|
def __get__(self):
|
||||||
if 'sent' in self.doc.user_span_hooks:
|
if 'sent' in self.doc.user_span_hooks:
|
||||||
|
@ -166,12 +149,14 @@ cdef class Span:
|
||||||
return self.doc[root.l_edge : root.r_edge + 1]
|
return self.doc[root.l_edge : root.r_edge + 1]
|
||||||
|
|
||||||
property has_vector:
|
property has_vector:
|
||||||
|
# TODO: docstring
|
||||||
def __get__(self):
|
def __get__(self):
|
||||||
if 'has_vector' in self.doc.user_span_hooks:
|
if 'has_vector' in self.doc.user_span_hooks:
|
||||||
return self.doc.user_span_hooks['has_vector'](self)
|
return self.doc.user_span_hooks['has_vector'](self)
|
||||||
return any(token.has_vector for token in self)
|
return any(token.has_vector for token in self)
|
||||||
|
|
||||||
property vector:
|
property vector:
|
||||||
|
# TODO: docstring
|
||||||
def __get__(self):
|
def __get__(self):
|
||||||
if 'vector' in self.doc.user_span_hooks:
|
if 'vector' in self.doc.user_span_hooks:
|
||||||
return self.doc.user_span_hooks['vector'](self)
|
return self.doc.user_span_hooks['vector'](self)
|
||||||
|
@ -180,6 +165,7 @@ cdef class Span:
|
||||||
return self._vector
|
return self._vector
|
||||||
|
|
||||||
property vector_norm:
|
property vector_norm:
|
||||||
|
# TODO: docstring
|
||||||
def __get__(self):
|
def __get__(self):
|
||||||
if 'vector_norm' in self.doc.user_span_hooks:
|
if 'vector_norm' in self.doc.user_span_hooks:
|
||||||
return self.doc.user_span_hooks['vector'](self)
|
return self.doc.user_span_hooks['vector'](self)
|
||||||
|
@ -193,6 +179,7 @@ cdef class Span:
|
||||||
return self._vector_norm
|
return self._vector_norm
|
||||||
|
|
||||||
property sentiment:
|
property sentiment:
|
||||||
|
# TODO: docstring
|
||||||
def __get__(self):
|
def __get__(self):
|
||||||
if 'sentiment' in self.doc.user_span_hooks:
|
if 'sentiment' in self.doc.user_span_hooks:
|
||||||
return self.doc.user_span_hooks['sentiment'](self)
|
return self.doc.user_span_hooks['sentiment'](self)
|
||||||
|
@ -200,6 +187,7 @@ cdef class Span:
|
||||||
return sum([token.sentiment for token in self]) / len(self)
|
return sum([token.sentiment for token in self]) / len(self)
|
||||||
|
|
||||||
property text:
|
property text:
|
||||||
|
# TODO: docstring
|
||||||
def __get__(self):
|
def __get__(self):
|
||||||
text = self.text_with_ws
|
text = self.text_with_ws
|
||||||
if self[-1].whitespace_:
|
if self[-1].whitespace_:
|
||||||
|
@ -207,16 +195,17 @@ cdef class Span:
|
||||||
return text
|
return text
|
||||||
|
|
||||||
property text_with_ws:
|
property text_with_ws:
|
||||||
|
# TODO: docstring
|
||||||
def __get__(self):
|
def __get__(self):
|
||||||
return u''.join([t.text_with_ws for t in self])
|
return u''.join([t.text_with_ws for t in self])
|
||||||
|
|
||||||
property noun_chunks:
|
property noun_chunks:
|
||||||
"""
|
"""Yields base noun-phrase `Span` objects, if the document has been
|
||||||
Yields base noun-phrase #[code Span] objects, if the document
|
syntactically parsed. A base noun phrase, or "NP chunk", is a noun
|
||||||
has been syntactically parsed. A base noun phrase, or
|
phrase that does not permit other NPs to be nested within it – so no
|
||||||
'NP chunk', is a noun phrase that does not permit other NPs to
|
NP-level coordination, no prepositional phrases, and no relative clauses.
|
||||||
be nested within it – so no NP-level coordination, no prepositional
|
|
||||||
phrases, and no relative clauses. For example:
|
YIELDS (Span): Base noun-phrase `Span` objects
|
||||||
"""
|
"""
|
||||||
def __get__(self):
|
def __get__(self):
|
||||||
if not self.doc.is_parsed:
|
if not self.doc.is_parsed:
|
||||||
|
@ -235,49 +224,47 @@ cdef class Span:
|
||||||
yield span
|
yield span
|
||||||
|
|
||||||
property root:
|
property root:
|
||||||
"""
|
"""The token within the span that's highest in the parse tree.
|
||||||
The token within the span that's highest in the parse tree. If there's a
|
If there's a tie, the earliest is prefered.
|
||||||
tie, the earlist is prefered.
|
|
||||||
|
|
||||||
Returns:
|
RETURNS (Token): The root token.
|
||||||
Token: The root token.
|
|
||||||
|
|
||||||
i.e. has the shortest path to the root of the sentence (or is the root
|
EXAMPLE: The root token has the shortest path to the root of the sentence
|
||||||
itself). If multiple words are equally high in the tree, the first word
|
(or is the root itself). If multiple words are equally high in the
|
||||||
is taken. For example:
|
tree, the first word is taken. For example:
|
||||||
|
|
||||||
>>> toks = nlp(u'I like New York in Autumn.')
|
>>> toks = nlp(u'I like New York in Autumn.')
|
||||||
|
|
||||||
Let's name the indices --- easier than writing "toks[4]" etc.
|
Let's name the indices – easier than writing `toks[4]` etc.
|
||||||
|
|
||||||
>>> i, like, new, york, in_, autumn, dot = range(len(toks))
|
>>> i, like, new, york, in_, autumn, dot = range(len(toks))
|
||||||
|
|
||||||
The head of 'new' is 'York', and the head of 'York' is 'like'
|
The head of 'new' is 'York', and the head of "York" is "like"
|
||||||
|
|
||||||
>>> toks[new].head.orth_
|
>>> toks[new].head.orth_
|
||||||
'York'
|
'York'
|
||||||
>>> toks[york].head.orth_
|
>>> toks[york].head.orth_
|
||||||
'like'
|
'like'
|
||||||
|
|
||||||
Create a span for "New York". Its root is "York".
|
Create a span for "New York". Its root is "York".
|
||||||
|
|
||||||
>>> new_york = toks[new:york+1]
|
>>> new_york = toks[new:york+1]
|
||||||
>>> new_york.root.orth_
|
>>> new_york.root.orth_
|
||||||
'York'
|
'York'
|
||||||
|
|
||||||
Here's a more complicated case, raise by Issue #214
|
Here's a more complicated case, raised by issue #214:
|
||||||
|
|
||||||
>>> toks = nlp(u'to, north and south carolina')
|
>>> toks = nlp(u'to, north and south carolina')
|
||||||
>>> to, north, and_, south, carolina = toks
|
>>> to, north, and_, south, carolina = toks
|
||||||
>>> south.head.text, carolina.head.text
|
>>> south.head.text, carolina.head.text
|
||||||
('north', 'to')
|
('north', 'to')
|
||||||
|
|
||||||
Here 'south' is a child of 'north', which is a child of 'carolina'.
|
Here "south" is a child of "north", which is a child of "carolina".
|
||||||
Carolina is the root of the span:
|
Carolina is the root of the span:
|
||||||
|
|
||||||
>>> south_carolina = toks[-2:]
|
>>> south_carolina = toks[-2:]
|
||||||
>>> south_carolina.root.text
|
>>> south_carolina.root.text
|
||||||
'carolina'
|
'carolina'
|
||||||
"""
|
"""
|
||||||
def __get__(self):
|
def __get__(self):
|
||||||
self._recalculate_indices()
|
self._recalculate_indices()
|
||||||
|
@ -314,10 +301,10 @@ cdef class Span:
|
||||||
return self.doc[root]
|
return self.doc[root]
|
||||||
|
|
||||||
property lefts:
|
property lefts:
|
||||||
"""
|
""" Tokens that are to the left of the span, whose head is within the
|
||||||
Tokens that are to the left of the span, whose head is within the Span.
|
`Span`.
|
||||||
|
|
||||||
Yields: Token A left-child of a token of the span.
|
YIELDS (Token):A left-child of a token of the span.
|
||||||
"""
|
"""
|
||||||
def __get__(self):
|
def __get__(self):
|
||||||
for token in reversed(self): # Reverse, so we get the tokens in order
|
for token in reversed(self): # Reverse, so we get the tokens in order
|
||||||
|
@ -326,10 +313,10 @@ cdef class Span:
|
||||||
yield left
|
yield left
|
||||||
|
|
||||||
property rights:
|
property rights:
|
||||||
"""
|
"""Tokens that are to the right of the Span, whose head is within the
|
||||||
Tokens that are to the right of the Span, whose head is within the Span.
|
`Span`.
|
||||||
|
|
||||||
Yields: Token A right-child of a token of the span.
|
YIELDS (Token): A right-child of a token of the span.
|
||||||
"""
|
"""
|
||||||
def __get__(self):
|
def __get__(self):
|
||||||
for token in self:
|
for token in self:
|
||||||
|
@ -338,10 +325,9 @@ cdef class Span:
|
||||||
yield right
|
yield right
|
||||||
|
|
||||||
property subtree:
|
property subtree:
|
||||||
"""
|
"""Tokens that descend from tokens in the span, but fall outside it.
|
||||||
Tokens that descend from tokens in the span, but fall outside it.
|
|
||||||
|
|
||||||
Yields: Token A descendant of a token within the span.
|
YIELDS (Token): A descendant of a token within the span.
|
||||||
"""
|
"""
|
||||||
def __get__(self):
|
def __get__(self):
|
||||||
for word in self.lefts:
|
for word in self.lefts:
|
||||||
|
@ -351,8 +337,9 @@ cdef class Span:
|
||||||
yield from word.subtree
|
yield from word.subtree
|
||||||
|
|
||||||
property ent_id:
|
property ent_id:
|
||||||
"""
|
"""An (integer) entity ID. Usually assigned by patterns in the `Matcher`.
|
||||||
An (integer) entity ID. Usually assigned by patterns in the Matcher.
|
|
||||||
|
RETURNS (int): The entity ID.
|
||||||
"""
|
"""
|
||||||
def __get__(self):
|
def __get__(self):
|
||||||
return self.root.ent_id
|
return self.root.ent_id
|
||||||
|
@ -362,9 +349,11 @@ cdef class Span:
|
||||||
raise NotImplementedError(
|
raise NotImplementedError(
|
||||||
"Can't yet set ent_id from Span. Vote for this feature on the issue "
|
"Can't yet set ent_id from Span. Vote for this feature on the issue "
|
||||||
"tracker: http://github.com/explosion/spaCy/issues")
|
"tracker: http://github.com/explosion/spaCy/issues")
|
||||||
|
|
||||||
property ent_id_:
|
property ent_id_:
|
||||||
"""
|
"""A (string) entity ID. Usually assigned by patterns in the `Matcher`.
|
||||||
A (string) entity ID. Usually assigned by patterns in the Matcher.
|
|
||||||
|
RETURNS (unicode): The entity ID.
|
||||||
"""
|
"""
|
||||||
def __get__(self):
|
def __get__(self):
|
||||||
return self.root.ent_id_
|
return self.root.ent_id_
|
||||||
|
@ -376,26 +365,32 @@ cdef class Span:
|
||||||
"tracker: http://github.com/explosion/spaCy/issues")
|
"tracker: http://github.com/explosion/spaCy/issues")
|
||||||
|
|
||||||
property orth_:
|
property orth_:
|
||||||
|
# TODO: docstring
|
||||||
def __get__(self):
|
def __get__(self):
|
||||||
return ''.join([t.string for t in self]).strip()
|
return ''.join([t.string for t in self]).strip()
|
||||||
|
|
||||||
property lemma_:
|
property lemma_:
|
||||||
|
# TODO: docstring
|
||||||
def __get__(self):
|
def __get__(self):
|
||||||
return ' '.join([t.lemma_ for t in self]).strip()
|
return ' '.join([t.lemma_ for t in self]).strip()
|
||||||
|
|
||||||
property upper_:
|
property upper_:
|
||||||
|
# TODO: docstring
|
||||||
def __get__(self):
|
def __get__(self):
|
||||||
return ''.join([t.string.upper() for t in self]).strip()
|
return ''.join([t.string.upper() for t in self]).strip()
|
||||||
|
|
||||||
property lower_:
|
property lower_:
|
||||||
|
# TODO: docstring
|
||||||
def __get__(self):
|
def __get__(self):
|
||||||
return ''.join([t.string.lower() for t in self]).strip()
|
return ''.join([t.string.lower() for t in self]).strip()
|
||||||
|
|
||||||
property string:
|
property string:
|
||||||
|
# TODO: docstring
|
||||||
def __get__(self):
|
def __get__(self):
|
||||||
return ''.join([t.string for t in self])
|
return ''.join([t.string for t in self])
|
||||||
|
|
||||||
property label_:
|
property label_:
|
||||||
|
# TODO: docstring
|
||||||
def __get__(self):
|
def __get__(self):
|
||||||
return self.doc.vocab.strings[self.label]
|
return self.doc.vocab.strings[self.label]
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user