mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-25 17:36:30 +03:00
Tidy up Doc, Token and Span and add missing docs
This commit is contained in:
parent
a6135336f5
commit
544a407b93
|
@ -326,7 +326,8 @@ cdef class Doc:
|
|||
if self._vector is not None:
|
||||
return self._vector
|
||||
elif not len(self):
|
||||
self._vector = numpy.zeros((self.vocab.vectors_length,), dtype='f')
|
||||
self._vector = numpy.zeros((self.vocab.vectors_length,),
|
||||
dtype='f')
|
||||
return self._vector
|
||||
elif self.has_vector:
|
||||
vector = numpy.zeros((self.vocab.vectors_length,), dtype='f')
|
||||
|
@ -338,7 +339,8 @@ cdef class Doc:
|
|||
self._vector = self.tensor.mean(axis=0)
|
||||
return self._vector
|
||||
else:
|
||||
return numpy.zeros((self.vocab.vectors_length,), dtype='float32')
|
||||
return numpy.zeros((self.vocab.vectors_length,),
|
||||
dtype='float32')
|
||||
|
||||
def __set__(self, value):
|
||||
self._vector = value
|
||||
|
@ -424,7 +426,8 @@ cdef class Doc:
|
|||
def __set__(self, ents):
|
||||
# TODO:
|
||||
# 1. Allow negative matches
|
||||
# 2. Ensure pre-set NERs are not over-written during statistical prediction
|
||||
# 2. Ensure pre-set NERs are not over-written during statistical
|
||||
# prediction
|
||||
# 3. Test basic data-driven ORTH gazetteer
|
||||
# 4. Test more nuanced date and currency regex
|
||||
cdef int i
|
||||
|
@ -433,7 +436,7 @@ cdef class Doc:
|
|||
# At this point we don't know whether the NER has run over the
|
||||
# Doc. If the ent_iob is missing, leave it missing.
|
||||
if self.c[i].ent_iob != 0:
|
||||
self.c[i].ent_iob = 2 # Means O. Non-O are set from ents.
|
||||
self.c[i].ent_iob = 2 # Means O. Non-O are set from ents.
|
||||
cdef attr_t ent_type
|
||||
cdef int start, end
|
||||
for ent_info in ents:
|
||||
|
@ -574,18 +577,19 @@ cdef class Doc:
|
|||
# Allow strings, e.g. 'lemma' or 'LEMMA'
|
||||
py_attr_ids = [(IDS[id_.upper()] if hasattr(id_, 'upper') else id_)
|
||||
for id_ in py_attr_ids]
|
||||
# Make an array from the attributes --- otherwise our inner loop is Python
|
||||
# dict iteration.
|
||||
# Make an array from the attributes --- otherwise our inner loop is
|
||||
# Python dict iteration.
|
||||
attr_ids = numpy.asarray(py_attr_ids, dtype=numpy.uint64)
|
||||
output = numpy.ndarray(shape=(self.length, len(attr_ids)), dtype=numpy.uint64)
|
||||
output = numpy.ndarray(shape=(self.length, len(attr_ids)),
|
||||
dtype=numpy.uint64)
|
||||
for i in range(self.length):
|
||||
for j, feature in enumerate(attr_ids):
|
||||
output[i, j] = get_token_attr(&self.c[i], feature)
|
||||
# Handle 1d case
|
||||
return output if len(attr_ids) >= 2 else output.reshape((self.length,))
|
||||
|
||||
|
||||
def count_by(self, attr_id_t attr_id, exclude=None, PreshCounter counts=None):
|
||||
def count_by(self, attr_id_t attr_id, exclude=None,
|
||||
PreshCounter counts=None):
|
||||
"""Count the frequencies of a given attribute. Produces a dict of
|
||||
`{attribute (int): count (ints)}` frequencies, keyed by the values of
|
||||
the given attribute ID.
|
||||
|
@ -708,7 +712,8 @@ cdef class Doc:
|
|||
elif (token_j.head == token_j) and (token_k.head == token_k):
|
||||
lca_index = -1
|
||||
else:
|
||||
lca_index = __pairwise_lca(token_j.head, token_k.head, lca_matrix)
|
||||
lca_index = __pairwise_lca(token_j.head, token_k.head,
|
||||
lca_matrix)
|
||||
lca_matrix[token_j.i][token_k.i] = lca_index
|
||||
lca_matrix[token_k.i][token_j.i] = lca_index
|
||||
|
||||
|
@ -728,7 +733,7 @@ cdef class Doc:
|
|||
"""Save the current state to a directory.
|
||||
|
||||
path (unicode or Path): A path to a directory, which will be created if
|
||||
it doesn't exist. Paths may be either strings or `Path`-like objects.
|
||||
it doesn't exist. Paths may be either strings or Path-like objects.
|
||||
"""
|
||||
with path.open('wb') as file_:
|
||||
file_.write(self.to_bytes(**exclude))
|
||||
|
@ -751,7 +756,7 @@ cdef class Doc:
|
|||
RETURNS (bytes): A losslessly serialized copy of the `Doc`, including
|
||||
all annotations.
|
||||
"""
|
||||
array_head = [LENGTH,SPACY,TAG,LEMMA,HEAD,DEP,ENT_IOB,ENT_TYPE]
|
||||
array_head = [LENGTH, SPACY, TAG, LEMMA, HEAD, DEP, ENT_IOB, ENT_TYPE]
|
||||
# Msgpack doesn't distinguish between lists and tuples, which is
|
||||
# vexing for user data. As a best guess, we *know* that within
|
||||
# keys, we must have tuples. In values we just have to hope
|
||||
|
@ -794,7 +799,8 @@ cdef class Doc:
|
|||
# keys, we must have tuples. In values we just have to hope
|
||||
# users don't mind getting a list instead of a tuple.
|
||||
if 'user_data' not in exclude and 'user_data_keys' in msg:
|
||||
user_data_keys = msgpack.loads(msg['user_data_keys'], use_list=False)
|
||||
user_data_keys = msgpack.loads(msg['user_data_keys'],
|
||||
use_list=False)
|
||||
user_data_values = msgpack.loads(msg['user_data_values'])
|
||||
for key, value in zip(user_data_keys, user_data_values):
|
||||
self.user_data[key] = value
|
||||
|
@ -853,7 +859,8 @@ cdef class Doc:
|
|||
"Doc.merge received %d non-keyword arguments. Expected either "
|
||||
"3 arguments (deprecated), or 0 (use keyword arguments). "
|
||||
"Arguments supplied:\n%s\n"
|
||||
"Keyword arguments: %s\n" % (len(args), repr(args), repr(attributes)))
|
||||
"Keyword arguments: %s\n" % (len(args), repr(args),
|
||||
repr(attributes)))
|
||||
|
||||
# More deprecated attribute handling =/
|
||||
if 'label' in attributes:
|
||||
|
|
|
@ -128,14 +128,17 @@ cdef class Span:
|
|||
|
||||
@property
|
||||
def _(self):
|
||||
"""User space for adding custom attribute extensions."""
|
||||
return Underscore(Underscore.span_extensions, self,
|
||||
start=self.start_char, end=self.end_char)
|
||||
|
||||
def as_doc(self):
|
||||
'''Create a Doc object view of the Span's data.
|
||||
# TODO: fix
|
||||
"""Create a `Doc` object view of the Span's data. This is mostly
|
||||
useful for C-typed interfaces.
|
||||
|
||||
This is mostly useful for C-typed interfaces.
|
||||
'''
|
||||
RETURNS (Doc): The `Doc` view of the span.
|
||||
"""
|
||||
cdef Doc doc = Doc(self.doc.vocab)
|
||||
doc.length = self.end-self.start
|
||||
doc.c = &self.doc.c[self.start]
|
||||
|
@ -259,10 +262,7 @@ cdef class Span:
|
|||
self.end = end + 1
|
||||
|
||||
property sent:
|
||||
"""The sentence span that this span is a part of.
|
||||
|
||||
RETURNS (Span): The sentence span that the span is a part of.
|
||||
"""
|
||||
"""RETURNS (Span): The sentence span that the span is a part of."""
|
||||
def __get__(self):
|
||||
if 'sent' in self.doc.user_span_hooks:
|
||||
return self.doc.user_span_hooks['sent'](self)
|
||||
|
@ -275,13 +275,10 @@ cdef class Span:
|
|||
n += 1
|
||||
if n >= self.doc.length:
|
||||
raise RuntimeError
|
||||
return self.doc[root.l_edge : root.r_edge + 1]
|
||||
return self.doc[root.l_edge:root.r_edge + 1]
|
||||
|
||||
property has_vector:
|
||||
"""A boolean value indicating whether a word vector is associated with
|
||||
the object.
|
||||
|
||||
RETURNS (bool): Whether a word vector is associated with the object.
|
||||
"""RETURNS (bool): Whether a word vector is associated with the object.
|
||||
"""
|
||||
def __get__(self):
|
||||
if 'has_vector' in self.doc.user_span_hooks:
|
||||
|
@ -303,10 +300,7 @@ cdef class Span:
|
|||
return self._vector
|
||||
|
||||
property vector_norm:
|
||||
"""The L2 norm of the document's vector representation.
|
||||
|
||||
RETURNS (float): The L2 norm of the vector representation.
|
||||
"""
|
||||
"""RETURNS (float): The L2 norm of the vector representation."""
|
||||
def __get__(self):
|
||||
if 'vector_norm' in self.doc.user_span_hooks:
|
||||
return self.doc.user_span_hooks['vector'](self)
|
||||
|
@ -320,7 +314,9 @@ cdef class Span:
|
|||
return self._vector_norm
|
||||
|
||||
property sentiment:
|
||||
# TODO: docstring
|
||||
"""RETURNS (float): A scalar value indicating the positivity or
|
||||
negativity of the span.
|
||||
"""
|
||||
def __get__(self):
|
||||
if 'sentiment' in self.doc.user_span_hooks:
|
||||
return self.doc.user_span_hooks['sentiment'](self)
|
||||
|
@ -328,10 +324,7 @@ cdef class Span:
|
|||
return sum([token.sentiment for token in self]) / len(self)
|
||||
|
||||
property text:
|
||||
"""A unicode representation of the span text.
|
||||
|
||||
RETURNS (unicode): The original verbatim text of the span.
|
||||
"""
|
||||
"""RETURNS (unicode): The original verbatim text of the span."""
|
||||
def __get__(self):
|
||||
text = self.text_with_ws
|
||||
if self[-1].whitespace_:
|
||||
|
@ -364,10 +357,11 @@ cdef class Span:
|
|||
"requires a statistical model to be installed and loaded. "
|
||||
"For more info, see the "
|
||||
"documentation: \n%s\n" % about.__docs_models__)
|
||||
# Accumulate the result before beginning to iterate over it. This prevents
|
||||
# the tokenisation from being changed out from under us during the iteration.
|
||||
# The tricky thing here is that Span accepts its tokenisation changing,
|
||||
# so it's okay once we have the Span objects. See Issue #375
|
||||
# Accumulate the result before beginning to iterate over it. This
|
||||
# prevents the tokenisation from being changed out from under us
|
||||
# during the iteration. The tricky thing here is that Span accepts
|
||||
# its tokenisation changing, so it's okay once we have the Span
|
||||
# objects. See Issue #375
|
||||
spans = []
|
||||
cdef attr_t label
|
||||
for start, end, label in self.doc.noun_chunks_iterator(self):
|
||||
|
@ -459,7 +453,7 @@ cdef class Span:
|
|||
YIELDS (Token):A left-child of a token of the span.
|
||||
"""
|
||||
def __get__(self):
|
||||
for token in reversed(self): # Reverse, so we get tokens in order
|
||||
for token in reversed(self): # Reverse, so we get tokens in order
|
||||
for left in token.lefts:
|
||||
if left.i < self.start:
|
||||
yield left
|
||||
|
@ -476,6 +470,20 @@ cdef class Span:
|
|||
if right.i >= self.end:
|
||||
yield right
|
||||
|
||||
property n_lefts:
|
||||
"""RETURNS (int): The number of leftward immediate children of the
|
||||
span, in the syntactic dependency parse.
|
||||
"""
|
||||
# TODO: implement
|
||||
raise NotImplementedError()
|
||||
|
||||
property n_rights:
|
||||
"""RETURNS (int): The number of rightward immediate children of the
|
||||
span, in the syntactic dependency parse.
|
||||
"""
|
||||
# TODO: implement
|
||||
raise NotImplementedError()
|
||||
|
||||
property subtree:
|
||||
"""Tokens that descend from tokens in the span, but fall outside it.
|
||||
|
||||
|
@ -489,29 +497,21 @@ cdef class Span:
|
|||
yield from word.subtree
|
||||
|
||||
property ent_id:
|
||||
"""An (integer) entity ID.
|
||||
|
||||
RETURNS (uint64): The entity ID.
|
||||
"""
|
||||
"""RETURNS (uint64): The entity ID."""
|
||||
def __get__(self):
|
||||
return self.root.ent_id
|
||||
|
||||
def __set__(self, hash_t key):
|
||||
# TODO
|
||||
raise NotImplementedError(
|
||||
"Can't yet set ent_id from Span. Vote for this feature on "
|
||||
"the issue tracker: http://github.com/explosion/spaCy/issues")
|
||||
|
||||
property ent_id_:
|
||||
"""A (string) entity ID. Usually assigned by patterns in the `Matcher`.
|
||||
|
||||
RETURNS (unicode): The entity ID.
|
||||
"""
|
||||
"""RETURNS (unicode): The (string) entity ID."""
|
||||
def __get__(self):
|
||||
return self.root.ent_id_
|
||||
|
||||
def __set__(self, hash_t key):
|
||||
# TODO
|
||||
raise NotImplementedError(
|
||||
"Can't yet set ent_id_ from Span. Vote for this feature on the "
|
||||
"issue tracker: http://github.com/explosion/spaCy/issues")
|
||||
|
@ -525,10 +525,7 @@ cdef class Span:
|
|||
return ''.join([t.orth_ for t in self]).strip()
|
||||
|
||||
property lemma_:
|
||||
"""The span's lemma.
|
||||
|
||||
RETURNS (unicode): The span's lemma.
|
||||
"""
|
||||
"""RETURNS (unicode): The span's lemma."""
|
||||
def __get__(self):
|
||||
return ' '.join([t.lemma_ for t in self]).strip()
|
||||
|
||||
|
@ -543,15 +540,12 @@ cdef class Span:
|
|||
return ''.join([t.text_with_ws.lower() for t in self]).strip()
|
||||
|
||||
property string:
|
||||
"""Deprecated: Use Span.text instead."""
|
||||
"""Deprecated: Use Span.text_with_ws instead."""
|
||||
def __get__(self):
|
||||
return ''.join([t.text_with_ws for t in self])
|
||||
|
||||
property label_:
|
||||
"""The span's label.
|
||||
|
||||
RETURNS (unicode): The span's label.
|
||||
"""
|
||||
"""RETURNS (unicode): The span's label."""
|
||||
def __get__(self):
|
||||
return self.doc.vocab.strings[self.label]
|
||||
|
||||
|
|
|
@ -145,37 +145,32 @@ cdef class Token:
|
|||
return self.doc.user_token_hooks['similarity'](self)
|
||||
if self.vector_norm == 0 or other.vector_norm == 0:
|
||||
return 0.0
|
||||
return numpy.dot(self.vector, other.vector) / (self.vector_norm * other.vector_norm)
|
||||
return (numpy.dot(self.vector, other.vector) /
|
||||
(self.vector_norm * other.vector_norm))
|
||||
|
||||
property lex_id:
|
||||
"""ID of the token's lexical type.
|
||||
|
||||
RETURNS (int): ID of the token's lexical type."""
|
||||
"""RETURNS (int): Sequential ID of the token's lexical type."""
|
||||
def __get__(self):
|
||||
return self.c.lex.id
|
||||
|
||||
property rank:
|
||||
# TODO: add docstring
|
||||
"""RETURNS (int): Sequential ID of the token's lexical type, used to
|
||||
index into tables, e.g. for word vectors."""
|
||||
def __get__(self):
|
||||
return self.c.lex.id
|
||||
|
||||
property string:
|
||||
"""Deprecated: Use Token.text_with_ws instead."""
|
||||
def __get__(self):
|
||||
return self.text_with_ws
|
||||
|
||||
property text:
|
||||
"""A unicode representation of the token text.
|
||||
|
||||
RETURNS (unicode): The original verbatim text of the token.
|
||||
"""
|
||||
"""RETURNS (unicode): The original verbatim text of the token."""
|
||||
def __get__(self):
|
||||
return self.orth_
|
||||
|
||||
property text_with_ws:
|
||||
"""The text content of the token with a trailing whitespace character
|
||||
if it has one.
|
||||
|
||||
RETURNS (unicode): The text content of the span (with trailing
|
||||
"""RETURNS (unicode): The text content of the span (with trailing
|
||||
whitespace).
|
||||
"""
|
||||
def __get__(self):
|
||||
|
@ -186,74 +181,104 @@ cdef class Token:
|
|||
return orth
|
||||
|
||||
property prob:
|
||||
"""RETURNS (float): Smoothed log probability estimate of token type."""
|
||||
def __get__(self):
|
||||
return self.c.lex.prob
|
||||
|
||||
property sentiment:
|
||||
"""RETURNS (float): A scalar value indicating the positivity or
|
||||
negativity of the token."""
|
||||
def __get__(self):
|
||||
if 'sentiment' in self.doc.user_token_hooks:
|
||||
return self.doc.user_token_hooks['sentiment'](self)
|
||||
return self.c.lex.sentiment
|
||||
|
||||
property lang:
|
||||
"""RETURNS (uint64): ID of the language of the parent document's
|
||||
vocabulary.
|
||||
"""
|
||||
def __get__(self):
|
||||
return self.c.lex.lang
|
||||
|
||||
property idx:
|
||||
"""RETURNS (int): The character offset of the token within the parent
|
||||
document.
|
||||
"""
|
||||
def __get__(self):
|
||||
return self.c.idx
|
||||
|
||||
property cluster:
|
||||
"""RETURNS (int): Brown cluster ID."""
|
||||
def __get__(self):
|
||||
return self.c.lex.cluster
|
||||
|
||||
property orth:
|
||||
"""RETURNS (uint64): ID of the verbatim text content."""
|
||||
def __get__(self):
|
||||
return self.c.lex.orth
|
||||
|
||||
property lower:
|
||||
"""RETURNS (uint64): ID of the lowercase token text."""
|
||||
def __get__(self):
|
||||
return self.c.lex.lower
|
||||
|
||||
property norm:
|
||||
"""RETURNS (uint64): ID of the token's norm, i.e. a normalised form of
|
||||
the token text. Usually set in the language's tokenizer exceptions
|
||||
or norm exceptions.
|
||||
"""
|
||||
def __get__(self):
|
||||
return self.c.lex.norm
|
||||
|
||||
property shape:
|
||||
"""RETURNS (uint64): ID of the token's shape, a transform of the
|
||||
tokens's string, to show orthographic features (e.g. "Xxxx", "dd").
|
||||
"""
|
||||
def __get__(self):
|
||||
return self.c.lex.shape
|
||||
|
||||
property prefix:
|
||||
"""RETURNS (uint64): ID of a length-N substring from the start of the
|
||||
token. Defaults to `N=1`.
|
||||
"""
|
||||
def __get__(self):
|
||||
return self.c.lex.prefix
|
||||
|
||||
property suffix:
|
||||
"""RETURNS (uint64): ID of a length-N substring from the end of the
|
||||
token. Defaults to `N=3`.
|
||||
"""
|
||||
def __get__(self):
|
||||
return self.c.lex.suffix
|
||||
|
||||
property lemma:
|
||||
"""Base form of the word, with no inflectional suffixes.
|
||||
|
||||
RETURNS (uint64): Token lemma.
|
||||
"""RETURNS (uint64): ID of the base form of the word, with no
|
||||
inflectional suffixes.
|
||||
"""
|
||||
def __get__(self):
|
||||
return self.c.lemma
|
||||
|
||||
def __set__(self, attr_t lemma):
|
||||
self.c.lemma = lemma
|
||||
|
||||
property pos:
|
||||
"""RETURNS (uint64): ID of coarse-grained part-of-speech tag."""
|
||||
def __get__(self):
|
||||
return self.c.pos
|
||||
|
||||
property tag:
|
||||
"""RETURNS (uint64): ID of fine-grained part-of-speech tag."""
|
||||
def __get__(self):
|
||||
return self.c.tag
|
||||
|
||||
def __set__(self, attr_t tag):
|
||||
self.vocab.morphology.assign_tag(self.c, tag)
|
||||
|
||||
property dep:
|
||||
"""RETURNS (uint64): ID of syntactic dependency label."""
|
||||
def __get__(self):
|
||||
return self.c.dep
|
||||
|
||||
def __set__(self, attr_t label):
|
||||
self.c.dep = label
|
||||
|
||||
|
@ -294,14 +319,21 @@ cdef class Token:
|
|||
return numpy.sqrt((vector ** 2).sum())
|
||||
|
||||
property n_lefts:
|
||||
"""RETURNS (int): The number of leftward immediate children of the
|
||||
word, in the syntactic dependency parse.
|
||||
"""
|
||||
def __get__(self):
|
||||
return self.c.l_kids
|
||||
|
||||
property n_rights:
|
||||
"""RETURNS (int): The number of rightward immediate children of the
|
||||
word, in the syntactic dependency parse.
|
||||
"""
|
||||
def __get__(self):
|
||||
return self.c.r_kids
|
||||
|
||||
property sent_start:
|
||||
# TODO: fix and document
|
||||
def __get__(self):
|
||||
return self.c.sent_start
|
||||
|
||||
|
@ -321,10 +353,12 @@ cdef class Token:
|
|||
"one of: None, True, False")
|
||||
|
||||
property lefts:
|
||||
"""The leftward immediate children of the word, in the syntactic
|
||||
dependency parse.
|
||||
|
||||
YIELDS (Token): A left-child of the token.
|
||||
"""
|
||||
def __get__(self):
|
||||
"""The leftward immediate children of the word, in the syntactic
|
||||
dependency parse.
|
||||
"""
|
||||
cdef int nr_iter = 0
|
||||
cdef const TokenC* ptr = self.c - (self.i - self.c.l_edge)
|
||||
while ptr < self.c:
|
||||
|
@ -338,10 +372,12 @@ cdef class Token:
|
|||
"while looking for token.lefts")
|
||||
|
||||
property rights:
|
||||
"""The rightward immediate children of the word, in the syntactic
|
||||
dependency parse.
|
||||
|
||||
YIELDS (Token): A right-child of the token.
|
||||
"""
|
||||
def __get__(self):
|
||||
"""The rightward immediate children of the word, in the syntactic
|
||||
dependency parse.
|
||||
"""
|
||||
cdef const TokenC* ptr = self.c + (self.c.r_edge - self.i)
|
||||
tokens = []
|
||||
cdef int nr_iter = 0
|
||||
|
@ -420,18 +456,17 @@ cdef class Token:
|
|||
"""
|
||||
if self.doc is not descendant.doc:
|
||||
return False
|
||||
return any( ancestor.i == self.i for ancestor in descendant.ancestors )
|
||||
return any(ancestor.i == self.i for ancestor in descendant.ancestors)
|
||||
|
||||
property head:
|
||||
"""The syntactic parent, or "governor", of this token.
|
||||
|
||||
RETURNS (Token): The token head.
|
||||
RETURNS (Token): The token predicted by the parser to be the head of
|
||||
the current token.
|
||||
"""
|
||||
def __get__(self):
|
||||
"""The token predicted by the parser to be the head of the current
|
||||
token.
|
||||
"""
|
||||
return self.doc[self.i + self.c.head]
|
||||
|
||||
def __set__(self, Token new_head):
|
||||
# this function sets the head of self to new_head
|
||||
# and updates the counters for left/right dependents
|
||||
|
@ -451,7 +486,7 @@ cdef class Token:
|
|||
cdef Token anc, child
|
||||
|
||||
# update number of deps of old head
|
||||
if self.c.head > 0: # left dependent
|
||||
if self.c.head > 0: # left dependent
|
||||
old_head.c.l_kids -= 1
|
||||
if self.c.l_edge == old_head.c.l_edge:
|
||||
# the token dominates the left edge so the left edge of
|
||||
|
@ -543,12 +578,10 @@ cdef class Token:
|
|||
yield from word.conjuncts
|
||||
|
||||
property ent_type:
|
||||
"""Named entity type.
|
||||
|
||||
RETURNS (uint64): Named entity type.
|
||||
"""
|
||||
"""RETURNS (uint64): Named entity type."""
|
||||
def __get__(self):
|
||||
return self.c.ent_type
|
||||
|
||||
def __set__(self, ent_type):
|
||||
self.c.ent_type = ent_type
|
||||
|
||||
|
@ -562,12 +595,10 @@ cdef class Token:
|
|||
return self.c.ent_iob
|
||||
|
||||
property ent_type_:
|
||||
"""Named entity type.
|
||||
|
||||
RETURNS (unicode): Named entity type.
|
||||
"""
|
||||
"""RETURNS (unicode): Named entity type."""
|
||||
def __get__(self):
|
||||
return self.vocab.strings[self.c.ent_type]
|
||||
|
||||
def __set__(self, ent_type):
|
||||
self.c.ent_type = self.vocab.strings.add(ent_type)
|
||||
|
||||
|
@ -583,9 +614,8 @@ cdef class Token:
|
|||
return iob_strings[self.c.ent_iob]
|
||||
|
||||
property ent_id:
|
||||
"""ID of the entity the token is an instance of, if any.
|
||||
|
||||
RETURNS (uint64): ID of the entity.
|
||||
"""RETURNS (uint64): ID of the entity the token is an instance of,
|
||||
if any.
|
||||
"""
|
||||
def __get__(self):
|
||||
return self.c.ent_id
|
||||
|
@ -594,9 +624,8 @@ cdef class Token:
|
|||
self.c.ent_id = key
|
||||
|
||||
property ent_id_:
|
||||
"""ID of the entity the token is an instance of, if any.
|
||||
|
||||
RETURNS (unicode): ID of the entity.
|
||||
"""RETURNS (unicode): ID of the entity the token is an instance of,
|
||||
if any.
|
||||
"""
|
||||
def __get__(self):
|
||||
return self.vocab.strings[self.c.ent_id]
|
||||
|
@ -605,230 +634,192 @@ cdef class Token:
|
|||
self.c.ent_id = self.vocab.strings.add(name)
|
||||
|
||||
property whitespace_:
|
||||
"""Trailing space character if present.
|
||||
|
||||
RETURNS (unicode): The whitespace character.
|
||||
"""RETURNS (unicode): The trailing whitespace character, if present.
|
||||
"""
|
||||
def __get__(self):
|
||||
return ' ' if self.c.spacy else ''
|
||||
|
||||
property orth_:
|
||||
"""Verbatim text content (identical to `Token.text`). Existst mostly
|
||||
for consistency with the other attributes.
|
||||
|
||||
RETURNS (unicode): The token text.
|
||||
"""RETURNS (unicode): Verbatim text content (identical to
|
||||
`Token.text`). Existst mostly for consistency with the other
|
||||
attributes.
|
||||
"""
|
||||
def __get__(self):
|
||||
return self.vocab.strings[self.c.lex.orth]
|
||||
|
||||
property lower_:
|
||||
"""Lowercase form of the token text. Equivalent to
|
||||
`Token.text.lower()`.
|
||||
|
||||
RETURNS (unicode): The lowercase token text.
|
||||
"""RETURNS (unicode): The lowercase token text. Equivalent to
|
||||
`Token.text.lower()`.
|
||||
"""
|
||||
def __get__(self):
|
||||
return self.vocab.strings[self.c.lex.lower]
|
||||
|
||||
property norm_:
|
||||
"""The token's norm, i.e. a normalised form of the token text.
|
||||
Usually set in the language's tokenizer exceptions or norm exceptions.
|
||||
|
||||
RETURNS (unicode): The norm.
|
||||
"""RETURNS (unicode): The token's norm, i.e. a normalised form of the
|
||||
token text. Usually set in the language's tokenizer exceptions or
|
||||
norm exceptions.
|
||||
"""
|
||||
def __get__(self):
|
||||
return self.vocab.strings[self.c.lex.norm]
|
||||
|
||||
property shape_:
|
||||
"""Transform of the tokens's string, to show orthographic features.
|
||||
For example, "Xxxx" or "dd".
|
||||
|
||||
RETURNS (unicode): The token shape.
|
||||
"""RETURNS (unicode): Transform of the tokens's string, to show
|
||||
orthographic features. For example, "Xxxx" or "dd".
|
||||
"""
|
||||
def __get__(self):
|
||||
return self.vocab.strings[self.c.lex.shape]
|
||||
|
||||
property prefix_:
|
||||
"""A length-N substring from the start of the token. Defaults to `N=1`.
|
||||
|
||||
RETURNS (unicode): The token's prefix.
|
||||
"""RETURNS (unicode): A length-N substring from the start of the token.
|
||||
Defaults to `N=1`.
|
||||
"""
|
||||
def __get__(self):
|
||||
return self.vocab.strings[self.c.lex.prefix]
|
||||
|
||||
property suffix_:
|
||||
"""A length-N substring from the end of the token. Defaults to `N=3`.
|
||||
|
||||
RETURNS (unicode): The token's suffix.
|
||||
"""RETURNS (unicode): A length-N substring from the end of the token.
|
||||
Defaults to `N=3`.
|
||||
"""
|
||||
def __get__(self):
|
||||
return self.vocab.strings[self.c.lex.suffix]
|
||||
|
||||
property lang_:
|
||||
"""Language of the parent document's vocabulary, e.g. 'en'.
|
||||
|
||||
RETURNS (unicode): The language code.
|
||||
"""RETURNS (unicode): Language of the parent document's vocabulary,
|
||||
e.g. 'en'.
|
||||
"""
|
||||
def __get__(self):
|
||||
return self.vocab.strings[self.c.lex.lang]
|
||||
|
||||
property lemma_:
|
||||
"""Base form of the word, with no inflectional suffixes.
|
||||
|
||||
RETURNS (unicode): Token lemma.
|
||||
"""RETURNS (unicode): The token lemma, i.e. the base form of the word,
|
||||
with no inflectional suffixes.
|
||||
"""
|
||||
def __get__(self):
|
||||
return self.vocab.strings[self.c.lemma]
|
||||
|
||||
def __set__(self, unicode lemma_):
|
||||
self.c.lemma = self.vocab.strings.add(lemma_)
|
||||
|
||||
property pos_:
|
||||
"""Coarse-grained part-of-speech.
|
||||
|
||||
RETURNS (unicode): The part-of-speech tag.
|
||||
"""
|
||||
"""RETURNS (unicode): Coarse-grained part-of-speech tag."""
|
||||
def __get__(self):
|
||||
return parts_of_speech.NAMES[self.c.pos]
|
||||
|
||||
property tag_:
|
||||
"""Fine-grained part-of-speech.
|
||||
|
||||
RETURNS (unicode): The part-of-speech tag.
|
||||
"""
|
||||
"""RETURNS (unicode): Fine-grained part-of-speech tag."""
|
||||
def __get__(self):
|
||||
return self.vocab.strings[self.c.tag]
|
||||
|
||||
def __set__(self, tag):
|
||||
self.tag = self.vocab.strings.add(tag)
|
||||
|
||||
property dep_:
|
||||
"""Syntactic dependency relation.
|
||||
|
||||
RETURNS (unicode): The dependency label.
|
||||
"""
|
||||
"""RETURNS (unicode): The syntactic dependency label."""
|
||||
def __get__(self):
|
||||
return self.vocab.strings[self.c.dep]
|
||||
|
||||
def __set__(self, unicode label):
|
||||
self.c.dep = self.vocab.strings.add(label)
|
||||
|
||||
property is_oov:
|
||||
"""Is the token out-of-vocabulary?
|
||||
|
||||
RETURNS (bool): Whether the token is out-of-vocabulary.
|
||||
"""
|
||||
def __get__(self): return Lexeme.c_check_flag(self.c.lex, IS_OOV)
|
||||
"""RETURNS (bool): Whether the token is out-of-vocabulary."""
|
||||
def __get__(self):
|
||||
return Lexeme.c_check_flag(self.c.lex, IS_OOV)
|
||||
|
||||
property is_stop:
|
||||
"""Is the token part of a "stop list"? (defined by the language data)
|
||||
|
||||
RETURNS (bool): Whether the token is a stop word.
|
||||
"""RETURNS (bool): Whether the token is a stop word, i.e. part of a
|
||||
"stop list" defined by the language data.
|
||||
"""
|
||||
def __get__(self): return Lexeme.c_check_flag(self.c.lex, IS_STOP)
|
||||
def __get__(self):
|
||||
return Lexeme.c_check_flag(self.c.lex, IS_STOP)
|
||||
|
||||
property is_alpha:
|
||||
"""Does the token consist of alphabetic characters? Equivalent to
|
||||
`token.text.isalpha()`.
|
||||
|
||||
RETURNS (bool): Whether the token consists of alpha characters.
|
||||
"""RETURNS (bool): Whether the token consists of alpha characters.
|
||||
Equivalent to `token.text.isalpha()`.
|
||||
"""
|
||||
def __get__(self): return Lexeme.c_check_flag(self.c.lex, IS_ALPHA)
|
||||
def __get__(self):
|
||||
return Lexeme.c_check_flag(self.c.lex, IS_ALPHA)
|
||||
|
||||
property is_ascii:
|
||||
"""Does the token consist of ASCII characters? Equivalent to
|
||||
`[any(ord(c) >= 128 for c in token.text)]`.
|
||||
|
||||
RETURNS (bool): Whether the token consists of ASCII characters.
|
||||
"""RETURNS (bool): Whether the token consists of ASCII characters.
|
||||
Equivalent to `[any(ord(c) >= 128 for c in token.text)]`.
|
||||
"""
|
||||
def __get__(self): return Lexeme.c_check_flag(self.c.lex, IS_ASCII)
|
||||
def __get__(self):
|
||||
return Lexeme.c_check_flag(self.c.lex, IS_ASCII)
|
||||
|
||||
property is_digit:
|
||||
"""Does the token consist of digits? Equivalent to
|
||||
`token.text.isdigit()`.
|
||||
|
||||
RETURNS (bool): Whether the token consists of digits.
|
||||
"""RETURNS (bool): Whether the token consists of digits. Equivalent to
|
||||
`token.text.isdigit()`.
|
||||
"""
|
||||
def __get__(self): return Lexeme.c_check_flag(self.c.lex, IS_DIGIT)
|
||||
def __get__(self):
|
||||
return Lexeme.c_check_flag(self.c.lex, IS_DIGIT)
|
||||
|
||||
property is_lower:
|
||||
"""Is the token in lowercase? Equivalent to `token.text.islower()`.
|
||||
|
||||
RETURNS (bool): Whether the token is in lowercase.
|
||||
"""RETURNS (bool): Whether the token is in lowercase. Equivalent to
|
||||
`token.text.islower()`.
|
||||
"""
|
||||
def __get__(self): return Lexeme.c_check_flag(self.c.lex, IS_LOWER)
|
||||
def __get__(self):
|
||||
return Lexeme.c_check_flag(self.c.lex, IS_LOWER)
|
||||
|
||||
property is_upper:
|
||||
"""Is the token in uppercase? Equivalent to `token.text.isupper()`.
|
||||
|
||||
RETURNS (bool): Whether the token is in uppercase.
|
||||
"""RETURNS (bool): Whether the token is in uppercase. Equivalent to
|
||||
`token.text.isupper()`
|
||||
"""
|
||||
def __get__(self): return Lexeme.c_check_flag(self.c.lex, IS_UPPER)
|
||||
def __get__(self):
|
||||
return Lexeme.c_check_flag(self.c.lex, IS_UPPER)
|
||||
|
||||
property is_title:
|
||||
"""Is the token in titlecase? Equivalent to `token.text.istitle()`.
|
||||
|
||||
RETURNS (bool): Whether the token is in titlecase.
|
||||
"""RETURNS (bool): Whether the token is in titlecase. Equivalent to
|
||||
`token.text.istitle()`.
|
||||
"""
|
||||
def __get__(self): return Lexeme.c_check_flag(self.c.lex, IS_TITLE)
|
||||
def __get__(self):
|
||||
return Lexeme.c_check_flag(self.c.lex, IS_TITLE)
|
||||
|
||||
property is_punct:
|
||||
"""Is the token punctuation?
|
||||
|
||||
RETURNS (bool): Whether the token is punctuation.
|
||||
"""
|
||||
def __get__(self): return Lexeme.c_check_flag(self.c.lex, IS_PUNCT)
|
||||
"""RETURNS (bool): Whether the token is punctuation."""
|
||||
def __get__(self):
|
||||
return Lexeme.c_check_flag(self.c.lex, IS_PUNCT)
|
||||
|
||||
property is_space:
|
||||
"""Does the token consist of whitespace characters? Equivalent to
|
||||
`token.text.isspace()`.
|
||||
|
||||
RETURNS (bool): Whether the token consists of whitespace characters.
|
||||
"""RETURNS (bool): Whether the token consists of whitespace characters.
|
||||
Equivalent to `token.text.isspace()`.
|
||||
"""
|
||||
def __get__(self): return Lexeme.c_check_flag(self.c.lex, IS_SPACE)
|
||||
def __get__(self):
|
||||
return Lexeme.c_check_flag(self.c.lex, IS_SPACE)
|
||||
|
||||
property is_bracket:
|
||||
"""Is the token a bracket?
|
||||
|
||||
RETURNS (bool): Whether the token is a bracket.
|
||||
"""
|
||||
def __get__(self): return Lexeme.c_check_flag(self.c.lex, IS_BRACKET)
|
||||
"""RETURNS (bool): Whether the token is a bracket."""
|
||||
def __get__(self):
|
||||
return Lexeme.c_check_flag(self.c.lex, IS_BRACKET)
|
||||
|
||||
property is_quote:
|
||||
"""Is the token a quotation mark?
|
||||
|
||||
RETURNS (bool): Whether the token is a quotation mark.
|
||||
"""
|
||||
def __get__(self): return Lexeme.c_check_flag(self.c.lex, IS_QUOTE)
|
||||
"""RETURNS (bool): Whether the token is a quotation mark."""
|
||||
def __get__(self):
|
||||
return Lexeme.c_check_flag(self.c.lex, IS_QUOTE)
|
||||
|
||||
property is_left_punct:
|
||||
"""Is the token a left punctuation mark, e.g. "("?
|
||||
|
||||
RETURNS (bool): Whether the token is a left punctuation mark.
|
||||
"""
|
||||
def __get__(self): return Lexeme.c_check_flag(self.c.lex, IS_LEFT_PUNCT)
|
||||
"""RETURNS (bool): Whether the token is a left punctuation mark."""
|
||||
def __get__(self):
|
||||
return Lexeme.c_check_flag(self.c.lex, IS_LEFT_PUNCT)
|
||||
|
||||
property is_right_punct:
|
||||
"""Is the token a left punctuation mark, e.g. "("?
|
||||
|
||||
RETURNS (bool): Whether the token is a left punctuation mark.
|
||||
"""
|
||||
def __get__(self): return Lexeme.c_check_flag(self.c.lex, IS_RIGHT_PUNCT)
|
||||
"""RETURNS (bool): Whether the token is a left punctuation mark."""
|
||||
def __get__(self):
|
||||
return Lexeme.c_check_flag(self.c.lex, IS_RIGHT_PUNCT)
|
||||
|
||||
property like_url:
|
||||
"""Does the token resemble a URL?
|
||||
|
||||
RETURNS (bool): Whether the token resembles a URL.
|
||||
"""
|
||||
def __get__(self): return Lexeme.c_check_flag(self.c.lex, LIKE_URL)
|
||||
"""RETURNS (bool): Whether the token resembles a URL."""
|
||||
def __get__(self):
|
||||
return Lexeme.c_check_flag(self.c.lex, LIKE_URL)
|
||||
|
||||
property like_num:
|
||||
"""Does the token represent a number? e.g. "10.9", "10", "ten", etc.
|
||||
|
||||
RETURNS (bool): Whether the token resembles a number.
|
||||
"""RETURNS (bool): Whether the token resembles a number, e.g. "10.9",
|
||||
"10", "ten", etc.
|
||||
"""
|
||||
def __get__(self): return Lexeme.c_check_flag(self.c.lex, LIKE_NUM)
|
||||
def __get__(self):
|
||||
return Lexeme.c_check_flag(self.c.lex, LIKE_NUM)
|
||||
|
||||
property like_email:
|
||||
"""Does the token resemble an email address?
|
||||
|
||||
RETURNS (bool): Whether the token resembles an email address.
|
||||
"""
|
||||
def __get__(self): return Lexeme.c_check_flag(self.c.lex, LIKE_EMAIL)
|
||||
"""RETURNS (bool): Whether the token resembles an email address."""
|
||||
def __get__(self):
|
||||
return Lexeme.c_check_flag(self.c.lex, LIKE_EMAIL)
|
||||
|
|
|
@ -784,3 +784,10 @@ p
|
|||
+cell
|
||||
| A dictionary that allows customisation of properties of
|
||||
| #[code Span] children.
|
||||
|
||||
+row
|
||||
+cell #[code _]
|
||||
+cell #[code Underscore]
|
||||
+cell
|
||||
| User space for adding custom
|
||||
| #[+a("/usage/processing-pipelines#custom-components-attributes") attribute extensions].
|
||||
|
|
|
@ -369,7 +369,7 @@ p
|
|||
+tag property
|
||||
+tag-model("parse")
|
||||
|
||||
p Tokens that are to the left of the span, whose head is within the span.
|
||||
p Tokens that are to the left of the span, whose heads are within the span.
|
||||
|
||||
+aside-code("Example").
|
||||
doc = nlp(u'I like New York in Autumn.')
|
||||
|
@ -386,7 +386,7 @@ p Tokens that are to the left of the span, whose head is within the span.
|
|||
+tag property
|
||||
+tag-model("parse")
|
||||
|
||||
p Tokens that are to the right of the span, whose head is within the span.
|
||||
p Tokens that are to the right of the span, whose heads are within the span.
|
||||
|
||||
+aside-code("Example").
|
||||
doc = nlp(u'I like New York in Autumn.')
|
||||
|
@ -399,6 +399,42 @@ p Tokens that are to the right of the span, whose head is within the span.
|
|||
+cell #[code Token]
|
||||
+cell A right-child of a token of the span.
|
||||
|
||||
+h(2, "n_lefts") Span.n_lefts
|
||||
+tag property
|
||||
+tag-model("parse")
|
||||
|
||||
p
|
||||
| The number of tokens that are to the left of the span, whose heads are
|
||||
| within the span.
|
||||
|
||||
+aside-code("Example").
|
||||
doc = nlp(u'I like New York in Autumn.')
|
||||
assert doc[3:7].n_lefts == 1
|
||||
|
||||
+table(["Name", "Type", "Description"])
|
||||
+row("foot")
|
||||
+cell returns
|
||||
+cell int
|
||||
+cell The number of left-child tokens.
|
||||
|
||||
+h(2, "n_rights") Span.n_rights
|
||||
+tag property
|
||||
+tag-model("parse")
|
||||
|
||||
p
|
||||
| The number of tokens that are to the right of the span, whose heads are
|
||||
| within the span.
|
||||
|
||||
+aside-code("Example").
|
||||
doc = nlp(u'I like New York in Autumn.')
|
||||
assert doc[2:4].n_rights == 1
|
||||
|
||||
+table(["Name", "Type", "Description"])
|
||||
+row("foot")
|
||||
+cell returns
|
||||
+cell int
|
||||
+cell The number of right-child tokens.
|
||||
|
||||
+h(2, "subtree") Span.subtree
|
||||
+tag property
|
||||
+tag-model("parse")
|
||||
|
@ -553,3 +589,17 @@ p
|
|||
+cell #[code ent_id_]
|
||||
+cell unicode
|
||||
+cell The string ID of the named entity the token is an instance of.
|
||||
|
||||
+row
|
||||
+cell #[code sentiment]
|
||||
+cell float
|
||||
+cell
|
||||
| A scalar value indicating the positivity or negativity of the
|
||||
| span.
|
||||
|
||||
+row
|
||||
+cell #[code _]
|
||||
+cell #[code Underscore]
|
||||
+cell
|
||||
| User space for adding custom
|
||||
| #[+a("/usage/processing-pipelines#custom-components-attributes") attribute extensions].
|
||||
|
|
|
@ -302,6 +302,80 @@ p A sequence of the token's immediate syntactic children.
|
|||
+cell #[code Token]
|
||||
+cell A child token such that #[code child.head==self].
|
||||
|
||||
+h(2, "lefts") Token.lefts
|
||||
+tag property
|
||||
+tag-model("parse")
|
||||
|
||||
p
|
||||
| The leftward immediate children of the word, in the syntactic dependency
|
||||
| parse.
|
||||
|
||||
+aside-code("Example").
|
||||
doc = nlp(u'I like New York in Autumn.')
|
||||
lefts = [t.text for t in doc[3].lefts]
|
||||
assert lefts == [u'New']
|
||||
|
||||
+table(["Name", "Type", "Description"])
|
||||
+row("foot")
|
||||
+cell yields
|
||||
+cell #[code Token]
|
||||
+cell A left-child of the token.
|
||||
|
||||
+h(2, "rights") Token.rights
|
||||
+tag property
|
||||
+tag-model("parse")
|
||||
|
||||
p
|
||||
| The rightward immediate children of the word, in the syntactic
|
||||
| dependency parse.
|
||||
|
||||
+aside-code("Example").
|
||||
doc = nlp(u'I like New York in Autumn.')
|
||||
rights = [t.text for t in doc[3].rights]
|
||||
assert rights == [u'in']
|
||||
|
||||
+table(["Name", "Type", "Description"])
|
||||
+row("foot")
|
||||
+cell yields
|
||||
+cell #[code Token]
|
||||
+cell A right-child of the token.
|
||||
|
||||
+h(2, "n_lefts") Token.n_lefts
|
||||
+tag property
|
||||
+tag-model("parse")
|
||||
|
||||
p
|
||||
| The number of leftward immediate children of the word, in the syntactic
|
||||
| dependency parse.
|
||||
|
||||
+aside-code("Example").
|
||||
doc = nlp(u'I like New York in Autumn.')
|
||||
assert doc[3].n_lefts == 1
|
||||
|
||||
+table(["Name", "Type", "Description"])
|
||||
+row("foot")
|
||||
+cell returns
|
||||
+cell int
|
||||
+cell The number of left-child tokens.
|
||||
|
||||
+h(2, "n_rights") Token.n_rights
|
||||
+tag property
|
||||
+tag-model("parse")
|
||||
|
||||
p
|
||||
| The number of rightward immediate children of the word, in the syntactic
|
||||
| dependency parse.
|
||||
|
||||
+aside-code("Example").
|
||||
doc = nlp(u'I like New York in Autumn.')
|
||||
assert doc[3].n_rights == 1
|
||||
|
||||
+table(["Name", "Type", "Description"])
|
||||
+row("foot")
|
||||
+cell returns
|
||||
+cell int
|
||||
+cell The number of right-child tokens.
|
||||
|
||||
+h(2, "subtree") Token.subtree
|
||||
+tag property
|
||||
+tag-model("parse")
|
||||
|
@ -713,9 +787,30 @@ p The L2 norm of the token's vector representation.
|
|||
+row
|
||||
+cell #[code sentiment]
|
||||
+cell float
|
||||
+cell A scalar value indicating the positivity or negativity of the token.
|
||||
+cell
|
||||
| A scalar value indicating the positivity or negativity of the
|
||||
| token.
|
||||
|
||||
+row
|
||||
+cell #[code lex_id]
|
||||
+cell int
|
||||
+cell ID of the token's lexical type.
|
||||
+cell Sequential ID of the token's lexical type.
|
||||
|
||||
+row
|
||||
+cell #[code rank]
|
||||
+cell int
|
||||
+cell
|
||||
| Sequential ID of the token's lexical type, used to index into
|
||||
| tagles, e.g. for word vectors.
|
||||
|
||||
+row
|
||||
+cell #[code cluster]
|
||||
+cell int
|
||||
+cell Brown cluster ID.
|
||||
|
||||
+row
|
||||
+cell #[code _]
|
||||
+cell #[code Underscore]
|
||||
+cell
|
||||
| User space for adding custom
|
||||
| #[+a("/usage/processing-pipelines#custom-components-attributes") attribute extensions].
|
||||
|
|
|
@ -111,11 +111,13 @@ p
|
|||
|
||||
p
|
||||
| A few more convenience attributes are provided for iterating around the
|
||||
| local tree from the token. The #[code .lefts] and #[code .rights]
|
||||
| attributes provide sequences of syntactic children that occur before and
|
||||
| after the token. Both sequences are in sentences order. There are also
|
||||
| two integer-typed attributes, #[code .n_rights] and #[code .n_lefts],
|
||||
| that give the number of left and right children.
|
||||
| local tree from the token. The #[+api("token#lefts") #[code Token.lefts]]
|
||||
| and #[+api("token#rights") #[code Token.rights]] attributes provide
|
||||
| sequences of syntactic children that occur before and after the token.
|
||||
| Both sequences are in sentence order. There are also two integer-typed
|
||||
| attributes, #[+api("token#n_rights") #[code Token.n_rights]] and
|
||||
| #[+api("token#n_lefts") #[code Token.n_lefts]], that give the number of
|
||||
| left and right children.
|
||||
|
||||
+code.
|
||||
doc = nlp(u'bright red apples on the tree')
|
||||
|
@ -126,10 +128,11 @@ p
|
|||
|
||||
p
|
||||
| You can get a whole phrase by its syntactic head using the
|
||||
| #[code .subtree] attribute. This returns an ordered sequence of tokens.
|
||||
| You can walk up the tree with the #[code .ancestors] attribute, and
|
||||
| check dominance with the #[+api("token#is_ancestor") #[code .is_ancestor()]]
|
||||
| method.
|
||||
| #[+api("token#subtree") #[code Token.subtree]] attribute. This returns an
|
||||
| ordered sequence of tokens. You can walk up the tree with the
|
||||
| #[+api("token#ancestors") #[code Token.ancestors]] attribute, and
|
||||
| check dominance with
|
||||
| #[+api("token#is_ancestor") #[code Token.is_ancestor()]].
|
||||
|
||||
+aside("Projective vs. non-projective")
|
||||
| For the #[+a("/models/en") default English model], the
|
||||
|
|
Loading…
Reference in New Issue
Block a user