mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-26 01:04:34 +03:00
Update docstrings and API docs for Span
This commit is contained in:
parent
23f9a3ccc8
commit
62ceec4fc6
|
@ -121,7 +121,7 @@ cdef class Span:
|
||||||
return self.doc.merge(self.start_char, self.end_char, *args, **attributes)
|
return self.doc.merge(self.start_char, self.end_char, *args, **attributes)
|
||||||
|
|
||||||
def similarity(self, other):
|
def similarity(self, other):
|
||||||
""" Make a semantic similarity estimate. The default estimate is cosine
|
"""Make a semantic similarity estimate. The default estimate is cosine
|
||||||
similarity using an average of word vectors.
|
similarity using an average of word vectors.
|
||||||
|
|
||||||
other (object): The object to compare with. By default, accepts `Doc`,
|
other (object): The object to compare with. By default, accepts `Doc`,
|
||||||
|
@ -168,14 +168,23 @@ cdef class Span:
|
||||||
return self.doc[root.l_edge : root.r_edge + 1]
|
return self.doc[root.l_edge : root.r_edge + 1]
|
||||||
|
|
||||||
property has_vector:
|
property has_vector:
|
||||||
# TODO: docstring
|
"""A boolean value indicating whether a word vector is associated with
|
||||||
|
the object.
|
||||||
|
|
||||||
|
RETURNS (bool): Whether a word vector is associated with the object.
|
||||||
|
"""
|
||||||
def __get__(self):
|
def __get__(self):
|
||||||
if 'has_vector' in self.doc.user_span_hooks:
|
if 'has_vector' in self.doc.user_span_hooks:
|
||||||
return self.doc.user_span_hooks['has_vector'](self)
|
return self.doc.user_span_hooks['has_vector'](self)
|
||||||
return any(token.has_vector for token in self)
|
return any(token.has_vector for token in self)
|
||||||
|
|
||||||
property vector:
|
property vector:
|
||||||
# TODO: docstring
|
"""A real-valued meaning representation. Defaults to an average of the
|
||||||
|
token vectors.
|
||||||
|
|
||||||
|
RETURNS (numpy.ndarray[ndim=1, dtype='float32']): A 1D numpy array
|
||||||
|
representing the span's semantics.
|
||||||
|
"""
|
||||||
def __get__(self):
|
def __get__(self):
|
||||||
if 'vector' in self.doc.user_span_hooks:
|
if 'vector' in self.doc.user_span_hooks:
|
||||||
return self.doc.user_span_hooks['vector'](self)
|
return self.doc.user_span_hooks['vector'](self)
|
||||||
|
@ -184,7 +193,10 @@ cdef class Span:
|
||||||
return self._vector
|
return self._vector
|
||||||
|
|
||||||
property vector_norm:
|
property vector_norm:
|
||||||
# TODO: docstring
|
"""The L2 norm of the document's vector representation.
|
||||||
|
|
||||||
|
RETURNS (float): The L2 norm of the vector representation.
|
||||||
|
"""
|
||||||
def __get__(self):
|
def __get__(self):
|
||||||
if 'vector_norm' in self.doc.user_span_hooks:
|
if 'vector_norm' in self.doc.user_span_hooks:
|
||||||
return self.doc.user_span_hooks['vector'](self)
|
return self.doc.user_span_hooks['vector'](self)
|
||||||
|
@ -206,7 +218,10 @@ cdef class Span:
|
||||||
return sum([token.sentiment for token in self]) / len(self)
|
return sum([token.sentiment for token in self]) / len(self)
|
||||||
|
|
||||||
property text:
|
property text:
|
||||||
# TODO: docstring
|
"""A unicode representation of the span text.
|
||||||
|
|
||||||
|
RETURNS (unicode): The original verbatim text of the span.
|
||||||
|
"""
|
||||||
def __get__(self):
|
def __get__(self):
|
||||||
text = self.text_with_ws
|
text = self.text_with_ws
|
||||||
if self[-1].whitespace_:
|
if self[-1].whitespace_:
|
||||||
|
@ -214,7 +229,11 @@ cdef class Span:
|
||||||
return text
|
return text
|
||||||
|
|
||||||
property text_with_ws:
|
property text_with_ws:
|
||||||
# TODO: docstring
|
"""The text content of the span with a trailing whitespace character if
|
||||||
|
the last token has one.
|
||||||
|
|
||||||
|
RETURNS (unicode): The text content of the span (with trailing whitespace).
|
||||||
|
"""
|
||||||
def __get__(self):
|
def __get__(self):
|
||||||
return u''.join([t.text_with_ws for t in self])
|
return u''.join([t.text_with_ws for t in self])
|
||||||
|
|
||||||
|
|
|
@ -127,9 +127,7 @@ p
|
||||||
| similarity using an average of word vectors.
|
| similarity using an average of word vectors.
|
||||||
|
|
||||||
+aside-code("Example").
|
+aside-code("Example").
|
||||||
doc = nlp(u'apples and oranges')
|
apples, and, oranges = nlp(u'apples and oranges')
|
||||||
apples = doc[0]
|
|
||||||
oranges = doc[1]
|
|
||||||
apples_oranges = apples.similarity(oranges)
|
apples_oranges = apples.similarity(oranges)
|
||||||
oranges_apples = oranges.similarity(apples)
|
oranges_apples = oranges.similarity(apples)
|
||||||
assert apples_oranges == oranges_apples
|
assert apples_oranges == oranges_apples
|
||||||
|
@ -165,49 +163,6 @@ p Retokenize the document, such that the span is merged into a single token.
|
||||||
+cell #[code Token]
|
+cell #[code Token]
|
||||||
+cell The newly merged token.
|
+cell The newly merged token.
|
||||||
|
|
||||||
+h(2, "text") Span.text
|
|
||||||
+tag property
|
|
||||||
|
|
||||||
+aside-code("Example").
|
|
||||||
doc = nlp('Give it back! He pleaded.')
|
|
||||||
assert doc[1:4].text == 'it back!'
|
|
||||||
|
|
||||||
p A unicode representation of the span text.
|
|
||||||
|
|
||||||
+table(["Name", "Type", "Description"])
|
|
||||||
+footrow
|
|
||||||
+cell returns
|
|
||||||
+cell unicode
|
|
||||||
+cell The original verbatim text of the span.
|
|
||||||
|
|
||||||
+h(2, "text_with_ws") Span.text_with_ws
|
|
||||||
+tag property
|
|
||||||
|
|
||||||
+aside-code("Example").
|
|
||||||
doc = nlp('Give it back! He pleaded.')
|
|
||||||
assert doc[1:4].text_with_ws == 'it back! '
|
|
||||||
|
|
||||||
p
|
|
||||||
| The text content of the span with a trailing whitespace character if the
|
|
||||||
| last token has one.
|
|
||||||
|
|
||||||
+table(["Name", "Type", "Description"])
|
|
||||||
+footrow
|
|
||||||
+cell returns
|
|
||||||
+cell unicode
|
|
||||||
+cell The text content of the span (with trailing whitespace).
|
|
||||||
|
|
||||||
+h(2, "sent") Span.sent
|
|
||||||
+tag property
|
|
||||||
|
|
||||||
p The sentence span that this span is a part of.
|
|
||||||
|
|
||||||
+table(["Name", "Type", "Description"])
|
|
||||||
+footrow
|
|
||||||
+cell returns
|
|
||||||
+cell #[code Span]
|
|
||||||
+cell The sentence this is part of.
|
|
||||||
|
|
||||||
+h(2, "root") Span.root
|
+h(2, "root") Span.root
|
||||||
+tag property
|
+tag property
|
||||||
|
|
||||||
|
@ -262,6 +217,56 @@ p Tokens that descend from tokens in the span, but fall outside it.
|
||||||
+cell #[code Token]
|
+cell #[code Token]
|
||||||
+cell A descendant of a token within the span.
|
+cell A descendant of a token within the span.
|
||||||
|
|
||||||
|
+h(2, "has_vector") Span.has_vector
|
||||||
|
+tag property
|
||||||
|
+tag requires model
|
||||||
|
|
||||||
|
p
|
||||||
|
| A boolean value indicating whether a word vector is associated with the
|
||||||
|
| object.
|
||||||
|
|
||||||
|
+aside-code("Example").
|
||||||
|
apple = nlp(u'apple')
|
||||||
|
assert apple.has_vector
|
||||||
|
|
||||||
|
+table(["Name", "Type", "Description"])
|
||||||
|
+footrow
|
||||||
|
+cell returns
|
||||||
|
+cell bool
|
||||||
|
+cell Whether the span has a vector data attached.
|
||||||
|
|
||||||
|
+h(2, "vector") Span.vector
|
||||||
|
+tag property
|
||||||
|
+tag requires model
|
||||||
|
|
||||||
|
p
|
||||||
|
| A real-valued meaning representation. Defaults to an average of the
|
||||||
|
| token vectors.
|
||||||
|
|
||||||
|
+aside-code("Example").
|
||||||
|
apple = nlp(u'apple')
|
||||||
|
(apple.vector.dtype, apple.vector.shape)
|
||||||
|
# (dtype('float32'), (300,))
|
||||||
|
|
||||||
|
+table(["Name", "Type", "Description"])
|
||||||
|
+footrow
|
||||||
|
+cell returns
|
||||||
|
+cell #[code numpy.ndarray[ndim=1, dtype='float32']]
|
||||||
|
+cell A 1D numpy array representing the span's semantics.
|
||||||
|
|
||||||
|
+h(2, "vector_norm") Span.vector_norm
|
||||||
|
+tag property
|
||||||
|
+tag requires model
|
||||||
|
|
||||||
|
p
|
||||||
|
| The L2 norm of the span's vector representation.
|
||||||
|
|
||||||
|
+table(["Name", "Type", "Description"])
|
||||||
|
+footrow
|
||||||
|
+cell returns
|
||||||
|
+cell float
|
||||||
|
+cell The L2 norm of the vector representation.
|
||||||
|
|
||||||
+h(2, "attributes") Attributes
|
+h(2, "attributes") Attributes
|
||||||
|
|
||||||
+table(["Name", "Type", "Description"])
|
+table(["Name", "Type", "Description"])
|
||||||
|
@ -270,6 +275,11 @@ p Tokens that descend from tokens in the span, but fall outside it.
|
||||||
+cell #[code Doc]
|
+cell #[code Doc]
|
||||||
+cell The parent document.
|
+cell The parent document.
|
||||||
|
|
||||||
|
+row
|
||||||
|
+cell #[code sent]
|
||||||
|
+cell #[code Span]
|
||||||
|
+cell The sentence span that this span is a part of.
|
||||||
|
|
||||||
+row
|
+row
|
||||||
+cell #[code start]
|
+cell #[code start]
|
||||||
+cell int
|
+cell int
|
||||||
|
@ -290,6 +300,18 @@ p Tokens that descend from tokens in the span, but fall outside it.
|
||||||
+cell int
|
+cell int
|
||||||
+cell The character offset for the end of the span.
|
+cell The character offset for the end of the span.
|
||||||
|
|
||||||
|
+row
|
||||||
|
+cell #[code text]
|
||||||
|
+cell unicode
|
||||||
|
+cell A unicode representation of the span text.
|
||||||
|
|
||||||
|
+row
|
||||||
|
+cell #[code text_with_ws]
|
||||||
|
+cell unicode
|
||||||
|
+cell
|
||||||
|
| The text content of the span with a trailing whitespace character
|
||||||
|
| if the last token has one.
|
||||||
|
|
||||||
+row
|
+row
|
||||||
+cell #[code label]
|
+cell #[code label]
|
||||||
+cell int
|
+cell int
|
||||||
|
|
Loading…
Reference in New Issue
Block a user