mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-12 18:26:30 +03:00
Update docstrings and API docs for Token
This commit is contained in:
parent
62ceec4fc6
commit
e9e62b01b0
|
@ -23,10 +23,14 @@ from .. import about
|
||||||
|
|
||||||
|
|
||||||
cdef class Token:
|
cdef class Token:
|
||||||
"""
|
"""An individual token – i.e. a word, punctuation symbol, whitespace, etc."""
|
||||||
An individual token --- i.e. a word, punctuation symbol, whitespace, etc.
|
|
||||||
"""
|
|
||||||
def __cinit__(self, Vocab vocab, Doc doc, int offset):
|
def __cinit__(self, Vocab vocab, Doc doc, int offset):
|
||||||
|
"""Construct a `Token` object.
|
||||||
|
|
||||||
|
vocab (Vocab): A storage container for lexical types.
|
||||||
|
doc (Doc): The parent document.
|
||||||
|
offset (int): The index of the token within the document.
|
||||||
|
"""
|
||||||
self.vocab = vocab
|
self.vocab = vocab
|
||||||
self.doc = doc
|
self.doc = doc
|
||||||
self.c = &self.doc.c[offset]
|
self.c = &self.doc.c[offset]
|
||||||
|
@ -36,8 +40,9 @@ cdef class Token:
|
||||||
return hash((self.doc, self.i))
|
return hash((self.doc, self.i))
|
||||||
|
|
||||||
def __len__(self):
|
def __len__(self):
|
||||||
"""
|
"""The number of unicode characters in the token, i.e. `token.text`.
|
||||||
Number of unicode characters in token.text.
|
|
||||||
|
RETURNS (int): The number of unicode characters in the token.
|
||||||
"""
|
"""
|
||||||
return self.c.lex.length
|
return self.c.lex.length
|
||||||
|
|
||||||
|
@ -75,37 +80,35 @@ cdef class Token:
|
||||||
raise ValueError(op)
|
raise ValueError(op)
|
||||||
|
|
||||||
cpdef bint check_flag(self, attr_id_t flag_id) except -1:
|
cpdef bint check_flag(self, attr_id_t flag_id) except -1:
|
||||||
"""
|
"""Check the value of a boolean flag.
|
||||||
Check the value of a boolean flag.
|
|
||||||
|
|
||||||
Arguments:
|
flag_id (int): The ID of the flag attribute.
|
||||||
flag_id (int): The ID of the flag attribute.
|
RETURNS (bool): Whether the flag is set.
|
||||||
Returns:
|
|
||||||
is_set (bool): Whether the flag is set.
|
EXAMPLE:
|
||||||
|
>>> from spacy.attrs import IS_TITLE
|
||||||
|
>>> doc = nlp(u'Give it back! He pleaded.')
|
||||||
|
>>> token = doc[0]
|
||||||
|
>>> token.check_flag(IS_TITLE)
|
||||||
|
True
|
||||||
"""
|
"""
|
||||||
return Lexeme.c_check_flag(self.c.lex, flag_id)
|
return Lexeme.c_check_flag(self.c.lex, flag_id)
|
||||||
|
|
||||||
def nbor(self, int i=1):
|
def nbor(self, int i=1):
|
||||||
"""
|
"""Get a neighboring token.
|
||||||
Get a neighboring token.
|
|
||||||
|
|
||||||
Arguments:
|
i (int): The relative position of the token to get. Defaults to 1.
|
||||||
i (int): The relative position of the token to get. Defaults to 1.
|
RETURNS (Token): The token at position `self.doc[self.i+i]`.
|
||||||
Returns:
|
|
||||||
neighbor (Token): The token at position self.doc[self.i+i]
|
|
||||||
"""
|
"""
|
||||||
return self.doc[self.i+i]
|
return self.doc[self.i+i]
|
||||||
|
|
||||||
def similarity(self, other):
|
def similarity(self, other):
|
||||||
"""
|
"""Make a semantic similarity estimate. The default estimate is cosine
|
||||||
Compute a semantic similarity estimate. Defaults to cosine over vectors.
|
similarity using an average of word vectors.
|
||||||
|
|
||||||
Arguments:
|
other (object): The object to compare with. By default, accepts `Doc`,
|
||||||
other:
|
`Span`, `Token` and `Lexeme` objects.
|
||||||
The object to compare with. By default, accepts Doc, Span,
|
RETURNS (float): A scalar similarity score. Higher is more similar.
|
||||||
Token and Lexeme objects.
|
|
||||||
Returns:
|
|
||||||
score (float): A scalar similarity score. Higher is more similar.
|
|
||||||
"""
|
"""
|
||||||
if 'similarity' in self.doc.user_token_hooks:
|
if 'similarity' in self.doc.user_token_hooks:
|
||||||
return self.doc.user_token_hooks['similarity'](self)
|
return self.doc.user_token_hooks['similarity'](self)
|
||||||
|
@ -114,10 +117,14 @@ cdef class Token:
|
||||||
return numpy.dot(self.vector, other.vector) / (self.vector_norm * other.vector_norm)
|
return numpy.dot(self.vector, other.vector) / (self.vector_norm * other.vector_norm)
|
||||||
|
|
||||||
property lex_id:
|
property lex_id:
|
||||||
|
"""ID of the token's lexical type.
|
||||||
|
|
||||||
|
RETURNS (int): ID of the token's lexical type."""
|
||||||
def __get__(self):
|
def __get__(self):
|
||||||
return self.c.lex.id
|
return self.c.lex.id
|
||||||
|
|
||||||
property rank:
|
property rank:
|
||||||
|
# TODO: add docstring
|
||||||
def __get__(self):
|
def __get__(self):
|
||||||
return self.c.lex.id
|
return self.c.lex.id
|
||||||
|
|
||||||
|
@ -126,10 +133,19 @@ cdef class Token:
|
||||||
return self.text_with_ws
|
return self.text_with_ws
|
||||||
|
|
||||||
property text:
|
property text:
|
||||||
|
"""A unicode representation of the token text.
|
||||||
|
|
||||||
|
RETURNS (unicode): The original verbatim text of the token.
|
||||||
|
"""
|
||||||
def __get__(self):
|
def __get__(self):
|
||||||
return self.orth_
|
return self.orth_
|
||||||
|
|
||||||
property text_with_ws:
|
property text_with_ws:
|
||||||
|
"""The text content of the token with a trailing whitespace character if
|
||||||
|
it has one.
|
||||||
|
|
||||||
|
RETURNS (unicode): The text content of the span (with trailing whitespace).
|
||||||
|
"""
|
||||||
def __get__(self):
|
def __get__(self):
|
||||||
cdef unicode orth = self.vocab.strings[self.c.lex.orth]
|
cdef unicode orth = self.vocab.strings[self.c.lex.orth]
|
||||||
if self.c.spacy:
|
if self.c.spacy:
|
||||||
|
@ -184,6 +200,10 @@ cdef class Token:
|
||||||
return self.c.lex.suffix
|
return self.c.lex.suffix
|
||||||
|
|
||||||
property lemma:
|
property lemma:
|
||||||
|
"""Base form of the word, with no inflectional suffixes.
|
||||||
|
|
||||||
|
RETURNS (int): Token lemma.
|
||||||
|
"""
|
||||||
def __get__(self):
|
def __get__(self):
|
||||||
return self.c.lemma
|
return self.c.lemma
|
||||||
def __set__(self, int lemma):
|
def __set__(self, int lemma):
|
||||||
|
@ -206,8 +226,10 @@ cdef class Token:
|
||||||
self.c.dep = label
|
self.c.dep = label
|
||||||
|
|
||||||
property has_vector:
|
property has_vector:
|
||||||
"""
|
"""A boolean value indicating whether a word vector is associated with
|
||||||
A boolean value indicating whether a word vector is associated with the object.
|
the object.
|
||||||
|
|
||||||
|
RETURNS (bool): Whether a word vector is associated with the object.
|
||||||
"""
|
"""
|
||||||
def __get__(self):
|
def __get__(self):
|
||||||
if 'has_vector' in self.doc.user_token_hooks:
|
if 'has_vector' in self.doc.user_token_hooks:
|
||||||
|
@ -220,10 +242,10 @@ cdef class Token:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
property vector:
|
property vector:
|
||||||
"""
|
"""A real-valued meaning representation.
|
||||||
A real-valued meaning representation.
|
|
||||||
|
|
||||||
Type: numpy.ndarray[ndim=1, dtype='float32']
|
RETURNS (numpy.ndarray[ndim=1, dtype='float32']): A 1D numpy array
|
||||||
|
representing the token's semantics.
|
||||||
"""
|
"""
|
||||||
def __get__(self):
|
def __get__(self):
|
||||||
if 'vector' in self.doc.user_token_hooks:
|
if 'vector' in self.doc.user_token_hooks:
|
||||||
|
@ -239,15 +261,11 @@ cdef class Token:
|
||||||
vector_view = <float[:length,]>self.c.lex.vector
|
vector_view = <float[:length,]>self.c.lex.vector
|
||||||
return numpy.asarray(vector_view)
|
return numpy.asarray(vector_view)
|
||||||
|
|
||||||
property repvec:
|
|
||||||
def __get__(self):
|
|
||||||
raise AttributeError("repvec was renamed to vector in v0.100")
|
|
||||||
|
|
||||||
property has_repvec:
|
|
||||||
def __get__(self):
|
|
||||||
raise AttributeError("has_repvec was renamed to has_vector in v0.100")
|
|
||||||
|
|
||||||
property vector_norm:
|
property vector_norm:
|
||||||
|
"""The L2 norm of the document's vector representation.
|
||||||
|
|
||||||
|
RETURNS (float): The L2 norm of the vector representation.
|
||||||
|
"""
|
||||||
def __get__(self):
|
def __get__(self):
|
||||||
if 'vector_norm' in self.doc.user_token_hooks:
|
if 'vector_norm' in self.doc.user_token_hooks:
|
||||||
return self.doc.user_token_hooks['vector_norm'](self)
|
return self.doc.user_token_hooks['vector_norm'](self)
|
||||||
|
@ -324,28 +342,26 @@ cdef class Token:
|
||||||
yield from word.subtree
|
yield from word.subtree
|
||||||
|
|
||||||
property left_edge:
|
property left_edge:
|
||||||
"""
|
"""The leftmost token of this token's syntactic descendents.
|
||||||
The leftmost token of this token's syntactic descendents.
|
|
||||||
|
|
||||||
Returns: Token The first token such that self.is_ancestor(token)
|
RETURNS (Token): The first token such that `self.is_ancestor(token)`.
|
||||||
"""
|
"""
|
||||||
def __get__(self):
|
def __get__(self):
|
||||||
return self.doc[self.c.l_edge]
|
return self.doc[self.c.l_edge]
|
||||||
|
|
||||||
property right_edge:
|
property right_edge:
|
||||||
"""
|
"""The rightmost token of this token's syntactic descendents.
|
||||||
The rightmost token of this token's syntactic descendents.
|
|
||||||
|
|
||||||
Returns: Token The last token such that self.is_ancestor(token)
|
RETURNS (Token): The last token such that `self.is_ancestor(token)`.
|
||||||
"""
|
"""
|
||||||
def __get__(self):
|
def __get__(self):
|
||||||
return self.doc[self.c.r_edge]
|
return self.doc[self.c.r_edge]
|
||||||
|
|
||||||
property ancestors:
|
property ancestors:
|
||||||
"""
|
"""A sequence of this token's syntactic ancestors.
|
||||||
A sequence of this token's syntactic ancestors.
|
|
||||||
|
|
||||||
Yields: Token A sequence of ancestor tokens such that ancestor.is_ancestor(self)
|
YIELDS (Token): A sequence of ancestor tokens such that
|
||||||
|
`ancestor.is_ancestor(self)`.
|
||||||
"""
|
"""
|
||||||
def __get__(self):
|
def __get__(self):
|
||||||
cdef const TokenC* head_ptr = self.c
|
cdef const TokenC* head_ptr = self.c
|
||||||
|
@ -357,33 +373,25 @@ cdef class Token:
|
||||||
yield self.doc[head_ptr - (self.c - self.i)]
|
yield self.doc[head_ptr - (self.c - self.i)]
|
||||||
i += 1
|
i += 1
|
||||||
|
|
||||||
def is_ancestor_of(self, descendant):
|
|
||||||
# TODO: Remove after backward compatibility check.
|
|
||||||
return self.is_ancestor(descendant)
|
|
||||||
|
|
||||||
def is_ancestor(self, descendant):
|
def is_ancestor(self, descendant):
|
||||||
"""
|
"""Check whether this token is a parent, grandparent, etc. of another
|
||||||
Check whether this token is a parent, grandparent, etc. of another
|
|
||||||
in the dependency tree.
|
in the dependency tree.
|
||||||
|
|
||||||
Arguments:
|
descendant (Token): Another token.
|
||||||
descendant (Token): Another token.
|
RETURNS (bool): Whether this token is the ancestor of the descendant.
|
||||||
Returns:
|
|
||||||
is_ancestor (bool): Whether this token is the ancestor of the descendant.
|
|
||||||
"""
|
"""
|
||||||
if self.doc is not descendant.doc:
|
if self.doc is not descendant.doc:
|
||||||
return False
|
return False
|
||||||
return any( ancestor.i == self.i for ancestor in descendant.ancestors )
|
return any( ancestor.i == self.i for ancestor in descendant.ancestors )
|
||||||
|
|
||||||
property head:
|
property head:
|
||||||
"""
|
"""The syntactic parent, or "governor", of this token.
|
||||||
The syntactic parent, or "governor", of this token.
|
|
||||||
|
|
||||||
Returns: Token
|
RETURNS (Token): The token head.
|
||||||
"""
|
"""
|
||||||
def __get__(self):
|
def __get__(self):
|
||||||
"""
|
"""The token predicted by the parser to be the head of the current
|
||||||
The token predicted by the parser to be the head of the current token.
|
token.
|
||||||
"""
|
"""
|
||||||
return self.doc[self.i + self.c.head]
|
return self.doc[self.i + self.c.head]
|
||||||
def __set__(self, Token new_head):
|
def __set__(self, Token new_head):
|
||||||
|
@ -477,10 +485,9 @@ cdef class Token:
|
||||||
self.c.head = rel_newhead_i
|
self.c.head = rel_newhead_i
|
||||||
|
|
||||||
property conjuncts:
|
property conjuncts:
|
||||||
"""
|
"""A sequence of coordinated tokens, including the token itself.
|
||||||
A sequence of coordinated tokens, including the token itself.
|
|
||||||
|
|
||||||
Yields: Token A coordinated token
|
YIELDS (Token): A coordinated token.
|
||||||
"""
|
"""
|
||||||
def __get__(self):
|
def __get__(self):
|
||||||
"""Get a list of conjoined words."""
|
"""Get a list of conjoined words."""
|
||||||
|
@ -495,25 +502,46 @@ cdef class Token:
|
||||||
yield from word.conjuncts
|
yield from word.conjuncts
|
||||||
|
|
||||||
property ent_type:
|
property ent_type:
|
||||||
|
"""Named entity type.
|
||||||
|
|
||||||
|
RETURNS (int): Named entity type.
|
||||||
|
"""
|
||||||
def __get__(self):
|
def __get__(self):
|
||||||
return self.c.ent_type
|
return self.c.ent_type
|
||||||
|
|
||||||
property ent_iob:
|
property ent_iob:
|
||||||
|
"""IOB code of named entity tag. `1="I", 2="O", 3="B"`. 0 means no tag
|
||||||
|
is assigned.
|
||||||
|
|
||||||
|
RETURNS (int): IOB code of named entity tag.
|
||||||
|
"""
|
||||||
def __get__(self):
|
def __get__(self):
|
||||||
return self.c.ent_iob
|
return self.c.ent_iob
|
||||||
|
|
||||||
property ent_type_:
|
property ent_type_:
|
||||||
|
"""Named entity type.
|
||||||
|
|
||||||
|
RETURNS (unicode): Named entity type.
|
||||||
|
"""
|
||||||
def __get__(self):
|
def __get__(self):
|
||||||
return self.vocab.strings[self.c.ent_type]
|
return self.vocab.strings[self.c.ent_type]
|
||||||
|
|
||||||
property ent_iob_:
|
property ent_iob_:
|
||||||
|
"""IOB code of named entity tag. "B" means the token begins an entity,
|
||||||
|
"I" means it is inside an entity, "O" means it is outside an entity, and
|
||||||
|
"" means no entity tag is set.
|
||||||
|
|
||||||
|
RETURNS (unicode): IOB code of named entity tag.
|
||||||
|
"""
|
||||||
def __get__(self):
|
def __get__(self):
|
||||||
iob_strings = ('', 'I', 'O', 'B')
|
iob_strings = ('', 'I', 'O', 'B')
|
||||||
return iob_strings[self.c.ent_iob]
|
return iob_strings[self.c.ent_iob]
|
||||||
|
|
||||||
property ent_id:
|
property ent_id:
|
||||||
"""
|
"""ID of the entity the token is an instance of, if any. Usually
|
||||||
An (integer) entity ID. Usually assigned by patterns in the Matcher.
|
assigned by patterns in the Matcher.
|
||||||
|
|
||||||
|
RETURNS (int): ID of the entity.
|
||||||
"""
|
"""
|
||||||
def __get__(self):
|
def __get__(self):
|
||||||
return self.c.ent_id
|
return self.c.ent_id
|
||||||
|
@ -522,8 +550,10 @@ cdef class Token:
|
||||||
self.c.ent_id = key
|
self.c.ent_id = key
|
||||||
|
|
||||||
property ent_id_:
|
property ent_id_:
|
||||||
"""
|
"""ID of the entity the token is an instance of, if any. Usually
|
||||||
A (string) entity ID. Usually assigned by patterns in the Matcher.
|
assigned by patterns in the Matcher.
|
||||||
|
|
||||||
|
RETURNS (unicode): ID of the entity.
|
||||||
"""
|
"""
|
||||||
def __get__(self):
|
def __get__(self):
|
||||||
return self.vocab.strings[self.c.ent_id]
|
return self.vocab.strings[self.c.ent_id]
|
||||||
|
@ -564,6 +594,10 @@ cdef class Token:
|
||||||
return self.vocab.strings[self.c.lex.lang]
|
return self.vocab.strings[self.c.lex.lang]
|
||||||
|
|
||||||
property lemma_:
|
property lemma_:
|
||||||
|
"""Base form of the word, with no inflectional suffixes.
|
||||||
|
|
||||||
|
RETURNS (unicode): Token lemma.
|
||||||
|
"""
|
||||||
def __get__(self):
|
def __get__(self):
|
||||||
return self.vocab.strings[self.c.lemma]
|
return self.vocab.strings[self.c.lemma]
|
||||||
def __set__(self, unicode lemma_):
|
def __set__(self, unicode lemma_):
|
||||||
|
|
|
@ -4,9 +4,255 @@ include ../../_includes/_mixins
|
||||||
|
|
||||||
p An individual token — i.e. a word, punctuation symbol, whitespace, etc.
|
p An individual token — i.e. a word, punctuation symbol, whitespace, etc.
|
||||||
|
|
||||||
|
+h(2, "init") Token.__init__
|
||||||
|
+tag method
|
||||||
|
|
||||||
|
p Construct a #[code Token] object.
|
||||||
|
|
||||||
|
+aside-code("Example").
|
||||||
|
doc = nlp(u'Give it back! He pleaded.')
|
||||||
|
token = doc[0]
|
||||||
|
|
||||||
|
+table(["Name", "Type", "Description"])
|
||||||
|
+row
|
||||||
|
+cell #[code vocab]
|
||||||
|
+cell #[code Vocab]
|
||||||
|
+cell A storage container for lexical types.
|
||||||
|
|
||||||
|
+row
|
||||||
|
+cell #[code doc]
|
||||||
|
+cell #[code Doc]
|
||||||
|
+cell The parent document.
|
||||||
|
|
||||||
|
+row
|
||||||
|
+cell #[code offset]
|
||||||
|
+cell int
|
||||||
|
+cell The index of the token within the document.
|
||||||
|
|
||||||
|
+footrow
|
||||||
|
+cell returns
|
||||||
|
+cell #[code Token]
|
||||||
|
+cell The newly constructed object.
|
||||||
|
|
||||||
|
+h(2, "len") Token.__len__
|
||||||
|
+tag method
|
||||||
|
|
||||||
|
p The number of unicode characters in the token, i.e. #[code token.text].
|
||||||
|
|
||||||
|
+aside-code("Example").
|
||||||
|
doc = nlp(u'Give it back! He pleaded.')
|
||||||
|
token = doc[0]
|
||||||
|
assert len(token) == 4
|
||||||
|
|
||||||
|
+table(["Name", "Type", "Description"])
|
||||||
|
+footrow
|
||||||
|
+cell returns
|
||||||
|
+cell int
|
||||||
|
+cell The number of unicode characters in the token.
|
||||||
|
|
||||||
|
+h(2, "check_flag") Token.check_flag
|
||||||
|
+tag method
|
||||||
|
|
||||||
|
p Check the value of a boolean flag.
|
||||||
|
|
||||||
|
+aside-code("Example").
|
||||||
|
from spacy.attrs import IS_TITLE
|
||||||
|
doc = nlp(u'Give it back! He pleaded.')
|
||||||
|
token = doc[0]
|
||||||
|
token.check_flag(IS_TITLE)
|
||||||
|
# True
|
||||||
|
|
||||||
|
+table(["Name", "Type", "Description"])
|
||||||
|
+row
|
||||||
|
+cell #[code flag_id]
|
||||||
|
+cell int
|
||||||
|
+cell The attribute ID of the flag to check.
|
||||||
|
|
||||||
|
+footrow
|
||||||
|
+cell returns
|
||||||
|
+cell bool
|
||||||
|
+cell Whether the flag is set.
|
||||||
|
|
||||||
|
+h(2, "nbor") Token.nbor
|
||||||
|
+tag method
|
||||||
|
|
||||||
|
p Get a neighboring token.
|
||||||
|
|
||||||
|
+aside-code("Example").
|
||||||
|
doc = nlp(u'Give it back! He pleaded.')
|
||||||
|
token = doc[0]
|
||||||
|
token.nbor()
|
||||||
|
# it
|
||||||
|
|
||||||
|
+table(["Name", "Type", "Description"])
|
||||||
|
+row
|
||||||
|
+cell #[code i]
|
||||||
|
+cell int
|
||||||
|
+cell The relative position of the token to get. Defaults to #[code 1].
|
||||||
|
|
||||||
|
+footrow
|
||||||
|
+cell returns
|
||||||
|
+cell #[code Token]
|
||||||
|
+cell The token at position #[code self.doc[self.i+i]].
|
||||||
|
|
||||||
|
+h(2, "similarity") Token.similarity
|
||||||
|
+tag method
|
||||||
|
|
||||||
|
p Compute a semantic similarity estimate. Defaults to cosine over vectors.
|
||||||
|
|
||||||
|
+aside-code("Example").
|
||||||
|
apples, and, oranges = nlp(u'apples and oranges')
|
||||||
|
apples_oranges = apples.similarity(oranges)
|
||||||
|
oranges_apples = oranges.similarity(apples)
|
||||||
|
assert apples_oranges == oranges_apples
|
||||||
|
|
||||||
|
+table(["Name", "Type", "Description"])
|
||||||
|
+row
|
||||||
|
+cell other
|
||||||
|
+cell -
|
||||||
|
+cell
|
||||||
|
| The object to compare with. By default, accepts #[code Doc],
|
||||||
|
| #[code Span], #[code Token] and #[code Lexeme] objects.
|
||||||
|
|
||||||
|
+footrow
|
||||||
|
+cell returns
|
||||||
|
+cell float
|
||||||
|
+cell A scalar similarity score. Higher is more similar.
|
||||||
|
|
||||||
|
+h(2, "is_ancestor") Token.is_ancestor
|
||||||
|
+tag method
|
||||||
|
|
||||||
|
p
|
||||||
|
| Check whether this token is a parent, grandparent, etc. of another
|
||||||
|
| in the dependency tree.
|
||||||
|
|
||||||
|
+table(["Name", "Type", "Description"])
|
||||||
|
+row
|
||||||
|
+cell descendant
|
||||||
|
+cell #[code Token]
|
||||||
|
+cell Another token.
|
||||||
|
|
||||||
|
+footrow
|
||||||
|
+cell returns
|
||||||
|
+cell bool
|
||||||
|
+cell Whether this token is the ancestor of the descendant.
|
||||||
|
|
||||||
|
+h(2, "has_vector") Token.has_vector
|
||||||
|
+tag property
|
||||||
|
+tag requires model
|
||||||
|
|
||||||
|
p
|
||||||
|
| A boolean value indicating whether a word vector is associated with the
|
||||||
|
| token.
|
||||||
|
|
||||||
|
+aside-code("Example").
|
||||||
|
apple = nlp(u'apple')
|
||||||
|
assert apple.has_vector
|
||||||
|
|
||||||
|
+table(["Name", "Type", "Description"])
|
||||||
|
+footrow
|
||||||
|
+cell returns
|
||||||
|
+cell bool
|
||||||
|
+cell Whether the token has a vector data attached.
|
||||||
|
|
||||||
|
+h(2, "vector") Token.vector
|
||||||
|
+tag property
|
||||||
|
+tag requires model
|
||||||
|
|
||||||
|
p
|
||||||
|
| A real-valued meaning representation.
|
||||||
|
|
||||||
|
+aside-code("Example").
|
||||||
|
apple = nlp(u'apple')
|
||||||
|
(apple.vector.dtype, apple.vector.shape)
|
||||||
|
# (dtype('float32'), (300,))
|
||||||
|
|
||||||
|
+table(["Name", "Type", "Description"])
|
||||||
|
+footrow
|
||||||
|
+cell returns
|
||||||
|
+cell #[code numpy.ndarray[ndim=1, dtype='float32']]
|
||||||
|
+cell A 1D numpy array representing the token's semantics.
|
||||||
|
|
||||||
|
+h(2, "vector_norm") Span.vector_norm
|
||||||
|
+tag property
|
||||||
|
+tag requires model
|
||||||
|
|
||||||
|
p
|
||||||
|
| The L2 norm of the token's vector representation.
|
||||||
|
|
||||||
|
+table(["Name", "Type", "Description"])
|
||||||
|
+footrow
|
||||||
|
+cell returns
|
||||||
|
+cell float
|
||||||
|
+cell The L2 norm of the vector representation.
|
||||||
|
|
||||||
|
+h(2, "conjuncts") Token.conjuncts
|
||||||
|
+tag property
|
||||||
|
|
||||||
|
p A sequence of coordinated tokens, including the token itself.
|
||||||
|
|
||||||
|
+table(["Name", "Type", "Description"])
|
||||||
|
+footrow
|
||||||
|
+cell yields
|
||||||
|
+cell #[code Token]
|
||||||
|
+cell A coordinated token.
|
||||||
|
|
||||||
|
+h(2, "children") Token.children
|
||||||
|
+tag property
|
||||||
|
|
||||||
|
p A sequence of the token's immediate syntactic children.
|
||||||
|
|
||||||
|
+table(["Name", "Type", "Description"])
|
||||||
|
+footrow
|
||||||
|
+cell yields
|
||||||
|
+cell #[code Token]
|
||||||
|
+cell A child token such that #[code child.head==self].
|
||||||
|
|
||||||
|
+h(2, "subtree") Token.subtree
|
||||||
|
+tag property
|
||||||
|
|
||||||
|
p A sequence of all the token's syntactic descendents.
|
||||||
|
|
||||||
|
+table(["Name", "Type", "Description"])
|
||||||
|
+footrow
|
||||||
|
+cell yields
|
||||||
|
+cell #[code Token]
|
||||||
|
+cell A descendant token such that #[code self.is_ancestor(descendant)].
|
||||||
|
|
||||||
|
+h(2, "ancestors") Token.ancestors
|
||||||
|
+tag property
|
||||||
|
|
||||||
|
p The rightmost token of this token's syntactic descendants.
|
||||||
|
|
||||||
|
+table(["Name", "Type", "Description"])
|
||||||
|
+footrow
|
||||||
|
+cell yields
|
||||||
|
+cell #[code Token]
|
||||||
|
+cell
|
||||||
|
| A sequence of ancestor tokens such that
|
||||||
|
| #[code ancestor.is_ancestor(self)].
|
||||||
|
|
||||||
+h(2, "attributes") Attributes
|
+h(2, "attributes") Attributes
|
||||||
|
|
||||||
+table(["Name", "Type", "Description"])
|
+table(["Name", "Type", "Description"])
|
||||||
|
+row
|
||||||
|
+cell #[code text]
|
||||||
|
+cell unicode
|
||||||
|
+cell Verbatim text content.
|
||||||
|
+row
|
||||||
|
+cell #[code text_with_ws]
|
||||||
|
+cell unicode
|
||||||
|
+cell Text content, with trailing space character if present.
|
||||||
|
|
||||||
|
+row
|
||||||
|
+cell #[code whitespace]
|
||||||
|
+cell int
|
||||||
|
+cell Trailing space character if present.
|
||||||
|
+row
|
||||||
|
+cell #[code whitespace_]
|
||||||
|
+cell unicode
|
||||||
|
+cell Trailing space character if present.
|
||||||
|
|
||||||
+row
|
+row
|
||||||
+cell #[code vocab]
|
+cell #[code vocab]
|
||||||
+cell #[code Vocab]
|
+cell #[code Vocab]
|
||||||
|
@ -17,14 +263,31 @@ p An individual token — i.e. a word, punctuation symbol, whitespace, etc.
|
||||||
+cell #[code Doc]
|
+cell #[code Doc]
|
||||||
+cell The parent document.
|
+cell The parent document.
|
||||||
|
|
||||||
|
+row
|
||||||
|
+cell #[code head]
|
||||||
|
+cell #[code Token]
|
||||||
|
+cell The syntactic parent, or "governor", of this token.
|
||||||
|
|
||||||
|
+row
|
||||||
|
+cell #[code left_edge]
|
||||||
|
+cell #[code Token]
|
||||||
|
+cell The leftmost token of this token's syntactic descendants.
|
||||||
|
|
||||||
|
+row
|
||||||
|
+cell #[code right_edge]
|
||||||
|
+cell #[code Token]
|
||||||
|
+cell The rightmost token of this token's syntactic descendents.
|
||||||
|
|
||||||
+row
|
+row
|
||||||
+cell #[code i]
|
+cell #[code i]
|
||||||
+cell int
|
+cell int
|
||||||
+cell The index of the token within the parent document.
|
+cell The index of the token within the parent document.
|
||||||
|
|
||||||
+row
|
+row
|
||||||
+cell #[code ent_type]
|
+cell #[code ent_type]
|
||||||
+cell int
|
+cell int
|
||||||
+cell Named entity type.
|
+cell Named entity type.
|
||||||
|
|
||||||
+row
|
+row
|
||||||
+cell #[code ent_type_]
|
+cell #[code ent_type_]
|
||||||
+cell unicode
|
+cell unicode
|
||||||
|
@ -42,19 +305,23 @@ p An individual token — i.e. a word, punctuation symbol, whitespace, etc.
|
||||||
+cell unicode
|
+cell unicode
|
||||||
+cell
|
+cell
|
||||||
| IOB code of named entity tag. #[code "B"]
|
| IOB code of named entity tag. #[code "B"]
|
||||||
| means the token begins an entity, #[code "I"] means it inside an
|
| means the token begins an entity, #[code "I"] means it is inside
|
||||||
| entity, #[code "O"] means it is outside an entity, and
|
| an entity, #[code "O"] means it is outside an entity, and
|
||||||
| #[code ""] means no entity tag is set.
|
| #[code ""] means no entity tag is set.
|
||||||
|
|
||||||
+row
|
+row
|
||||||
+cell #[code ent_id]
|
+cell #[code ent_id]
|
||||||
+cell int
|
+cell int
|
||||||
+cell ID of the entity the token is an instance of, if any.
|
+cell
|
||||||
|
| ID of the entity the token is an instance of, if any. Usually
|
||||||
|
| assigned by patterns in the Matcher.
|
||||||
|
|
||||||
+row
|
+row
|
||||||
+cell #[code ent_id_]
|
+cell #[code ent_id_]
|
||||||
+cell unicode
|
+cell unicode
|
||||||
+cell ID of the entity the token is an instance of, if any.
|
+cell
|
||||||
|
| ID of the entity the token is an instance of, if any. Usually
|
||||||
|
| assigned by patterns in the Matcher.
|
||||||
|
|
||||||
+row
|
+row
|
||||||
+cell #[code lemma]
|
+cell #[code lemma]
|
||||||
|
@ -229,232 +496,3 @@ p An individual token — i.e. a word, punctuation symbol, whitespace, etc.
|
||||||
+cell #[code lex_id]
|
+cell #[code lex_id]
|
||||||
+cell int
|
+cell int
|
||||||
+cell ID of the token's lexical type.
|
+cell ID of the token's lexical type.
|
||||||
|
|
||||||
+row
|
|
||||||
+cell #[code text]
|
|
||||||
+cell unicode
|
|
||||||
+cell Verbatim text content.
|
|
||||||
+row
|
|
||||||
+cell #[code text_with_ws]
|
|
||||||
+cell unicode
|
|
||||||
+cell Text content, with trailing space character if present.
|
|
||||||
|
|
||||||
+row
|
|
||||||
+cell #[code whitespace]
|
|
||||||
+cell int
|
|
||||||
+cell Trailing space character if present.
|
|
||||||
+row
|
|
||||||
+cell #[code whitespace_]
|
|
||||||
+cell unicode
|
|
||||||
+cell Trailing space character if present.
|
|
||||||
|
|
||||||
|
|
||||||
+h(2, "init") Token.__init__
|
|
||||||
+tag method
|
|
||||||
|
|
||||||
p Construct a #[code Token] object.
|
|
||||||
|
|
||||||
+table(["Name", "Type", "Description"])
|
|
||||||
+row
|
|
||||||
+cell #[code vocab]
|
|
||||||
+cell #[code Vocab]
|
|
||||||
+cell A storage container for lexical types.
|
|
||||||
|
|
||||||
+row
|
|
||||||
+cell #[code doc]
|
|
||||||
+cell #[code Doc]
|
|
||||||
+cell The parent document.
|
|
||||||
|
|
||||||
+row
|
|
||||||
+cell #[code offset]
|
|
||||||
+cell int
|
|
||||||
+cell The index of the token within the document.
|
|
||||||
|
|
||||||
+footrow
|
|
||||||
+cell returns
|
|
||||||
+cell #[code Token]
|
|
||||||
+cell The newly constructed object.
|
|
||||||
|
|
||||||
+h(2, "len") Token.__len__
|
|
||||||
+tag method
|
|
||||||
|
|
||||||
p Get the number of unicode characters in the token.
|
|
||||||
|
|
||||||
+table(["Name", "Type", "Description"])
|
|
||||||
+footrow
|
|
||||||
+cell returns
|
|
||||||
+cell int
|
|
||||||
+cell The number of unicode characters in the token.
|
|
||||||
|
|
||||||
|
|
||||||
+h(2, "check_flag") Token.check_flag
|
|
||||||
+tag method
|
|
||||||
|
|
||||||
p Check the value of a boolean flag.
|
|
||||||
|
|
||||||
+table(["Name", "Type", "Description"])
|
|
||||||
+row
|
|
||||||
+cell #[code flag_id]
|
|
||||||
+cell int
|
|
||||||
+cell The attribute ID of the flag to check.
|
|
||||||
|
|
||||||
+footrow
|
|
||||||
+cell returns
|
|
||||||
+cell bool
|
|
||||||
+cell Whether the flag is set.
|
|
||||||
|
|
||||||
+h(2, "nbor") Token.nbor
|
|
||||||
+tag method
|
|
||||||
|
|
||||||
p Get a neighboring token.
|
|
||||||
|
|
||||||
+table(["Name", "Type", "Description"])
|
|
||||||
+row
|
|
||||||
+cell #[code i]
|
|
||||||
+cell int
|
|
||||||
+cell The relative position of the token to get. Defaults to #[code 1].
|
|
||||||
|
|
||||||
+footrow
|
|
||||||
+cell returns
|
|
||||||
+cell #[code Token]
|
|
||||||
+cell The token at position #[code self.doc[self.i+i]]
|
|
||||||
|
|
||||||
+h(2, "similarity") Token.similarity
|
|
||||||
+tag method
|
|
||||||
|
|
||||||
p Compute a semantic similarity estimate. Defaults to cosine over vectors.
|
|
||||||
|
|
||||||
+table(["Name", "Type", "Description"])
|
|
||||||
+row
|
|
||||||
+cell other
|
|
||||||
+cell -
|
|
||||||
+cell
|
|
||||||
| The object to compare with. By default, accepts #[code Doc],
|
|
||||||
| #[code Span], #[code Token] and #[code Lexeme] objects.
|
|
||||||
|
|
||||||
+footrow
|
|
||||||
+cell returns
|
|
||||||
+cell float
|
|
||||||
+cell A scalar similarity score. Higher is more similar.
|
|
||||||
|
|
||||||
+h(2, "is_ancestor") Token.is_ancestor
|
|
||||||
+tag method
|
|
||||||
|
|
||||||
p
|
|
||||||
| Check whether this token is a parent, grandparent, etc. of another
|
|
||||||
| in the dependency tree.
|
|
||||||
|
|
||||||
+table(["Name", "Type", "Description"])
|
|
||||||
+row
|
|
||||||
+cell descendant
|
|
||||||
+cell #[code Token]
|
|
||||||
+cell Another token.
|
|
||||||
|
|
||||||
+footrow
|
|
||||||
+cell returns
|
|
||||||
+cell bool
|
|
||||||
+cell Whether this token is the ancestor of the descendant.
|
|
||||||
|
|
||||||
|
|
||||||
+h(2, "vector") Token.vector
|
|
||||||
+tag property
|
|
||||||
|
|
||||||
p A real-valued meaning representation.
|
|
||||||
|
|
||||||
+table(["Name", "Type", "Description"])
|
|
||||||
+footrow
|
|
||||||
+cell returns
|
|
||||||
+cell #[code numpy.ndarray[ndim=1, dtype='float32']]
|
|
||||||
+cell A 1D numpy array representing the token's semantics.
|
|
||||||
|
|
||||||
+h(2, "has_vector") Token.has_vector
|
|
||||||
+tag property
|
|
||||||
|
|
||||||
p
|
|
||||||
| A boolean value indicating whether a word vector is associated with the
|
|
||||||
| object.
|
|
||||||
|
|
||||||
+table(["Name", "Type", "Description"])
|
|
||||||
+footrow
|
|
||||||
+cell returns
|
|
||||||
+cell bool
|
|
||||||
+cell Whether the token has a vector data attached.
|
|
||||||
|
|
||||||
+h(2, "head") Token.head
|
|
||||||
+tag property
|
|
||||||
|
|
||||||
p The syntactic parent, or "governor", of this token.
|
|
||||||
|
|
||||||
+table(["Name", "Type", "Description"])
|
|
||||||
+footrow
|
|
||||||
+cell returns
|
|
||||||
+cell #[code Token]
|
|
||||||
+cell The head.
|
|
||||||
|
|
||||||
+h(2, "conjuncts") Token.conjuncts
|
|
||||||
+tag property
|
|
||||||
|
|
||||||
p A sequence of coordinated tokens, including the token itself.
|
|
||||||
|
|
||||||
+table(["Name", "Type", "Description"])
|
|
||||||
+footrow
|
|
||||||
+cell yields
|
|
||||||
+cell #[code Token]
|
|
||||||
+cell A coordinated token.
|
|
||||||
|
|
||||||
+h(2, "children") Token.children
|
|
||||||
+tag property
|
|
||||||
|
|
||||||
p A sequence of the token's immediate syntactic children.
|
|
||||||
|
|
||||||
+table(["Name", "Type", "Description"])
|
|
||||||
+footrow
|
|
||||||
+cell yields
|
|
||||||
+cell #[code Token]
|
|
||||||
+cell A child token such that #[code child.head==self].
|
|
||||||
|
|
||||||
+h(2, "subtree") Token.subtree
|
|
||||||
+tag property
|
|
||||||
|
|
||||||
p A sequence of all the token's syntactic descendents.
|
|
||||||
|
|
||||||
+table(["Name", "Type", "Description"])
|
|
||||||
+footrow
|
|
||||||
+cell yields
|
|
||||||
+cell #[code Token]
|
|
||||||
+cell A descendant token such that #[code self.is_ancestor(descendant)].
|
|
||||||
|
|
||||||
+h(2, "left_edge") Token.left_edge
|
|
||||||
+tag property
|
|
||||||
|
|
||||||
p The leftmost token of this token's syntactic descendants.
|
|
||||||
|
|
||||||
+table(["Name", "Type", "Description"])
|
|
||||||
+footrow
|
|
||||||
+cell returns
|
|
||||||
+cell #[code Token]
|
|
||||||
+cell The first token such that #[code self.is_ancestor(token)].
|
|
||||||
|
|
||||||
+h(2, "right_edge") Token.right_edge
|
|
||||||
+tag property
|
|
||||||
|
|
||||||
p The rightmost token of this token's syntactic descendents.
|
|
||||||
|
|
||||||
+table(["Name", "Type", "Description"])
|
|
||||||
+footrow
|
|
||||||
+cell returns
|
|
||||||
+cell #[code Token]
|
|
||||||
+cell The last token such that #[code self.is_ancestor(token)].
|
|
||||||
|
|
||||||
+h(2, "ancestors") Token.ancestors
|
|
||||||
+tag property
|
|
||||||
|
|
||||||
p The rightmost token of this token's syntactic descendants.
|
|
||||||
|
|
||||||
+table(["Name", "Type", "Description"])
|
|
||||||
+footrow
|
|
||||||
+cell yields
|
|
||||||
+cell #[code Token]
|
|
||||||
+cell
|
|
||||||
| A sequence of ancestor tokens such that
|
|
||||||
| #[code ancestor.is_ancestor(self)].
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user