Tidy up property code style (#3391)

Use decorator if properties only have a getter and existing syntax if there's getter and setter
This commit is contained in:
Ines Montani 2019-03-11 15:59:09 +01:00 committed by Matthew Honnibal
parent c3df4d1108
commit 47e9c274ef
7 changed files with 490 additions and 491 deletions

View File

@ -161,17 +161,17 @@ cdef class Lexeme:
Lexeme.c_from_bytes(self.c, lex_data) Lexeme.c_from_bytes(self.c, lex_data)
self.orth = self.c.orth self.orth = self.c.orth
property has_vector: @property
def has_vector(self):
"""RETURNS (bool): Whether a word vector is associated with the object. """RETURNS (bool): Whether a word vector is associated with the object.
""" """
def __get__(self): return self.vocab.has_vector(self.c.orth)
return self.vocab.has_vector(self.c.orth)
property vector_norm: @property
def vector_norm(self):
"""RETURNS (float): The L2 norm of the vector representation.""" """RETURNS (float): The L2 norm of the vector representation."""
def __get__(self): vector = self.vector
vector = self.vector return numpy.sqrt((vector**2).sum())
return numpy.sqrt((vector**2).sum())
property vector: property vector:
"""A real-valued meaning representation. """A real-valued meaning representation.
@ -209,17 +209,17 @@ cdef class Lexeme:
def __set__(self, float sentiment): def __set__(self, float sentiment):
self.c.sentiment = sentiment self.c.sentiment = sentiment
property orth_: @property
def orth_(self):
"""RETURNS (unicode): The original verbatim text of the lexeme """RETURNS (unicode): The original verbatim text of the lexeme
(identical to `Lexeme.text`). Exists mostly for consistency with (identical to `Lexeme.text`). Exists mostly for consistency with
the other attributes.""" the other attributes."""
def __get__(self): return self.vocab.strings[self.c.orth]
return self.vocab.strings[self.c.orth]
property text: @property
def text(self):
"""RETURNS (unicode): The original verbatim text of the lexeme.""" """RETURNS (unicode): The original verbatim text of the lexeme."""
def __get__(self): return self.orth_
return self.orth_
property lower: property lower:
"""RETURNS (unicode): Lowercase form of the lexeme.""" """RETURNS (unicode): Lowercase form of the lexeme."""

View File

@ -369,9 +369,9 @@ cdef class ArcEager(TransitionSystem):
actions[LEFT].setdefault('dep', 0) actions[LEFT].setdefault('dep', 0)
return actions return actions
property action_types: @property
def __get__(self): def action_types(self):
return (SHIFT, REDUCE, LEFT, RIGHT, BREAK) return (SHIFT, REDUCE, LEFT, RIGHT, BREAK)
def get_cost(self, StateClass state, GoldParse gold, action): def get_cost(self, StateClass state, GoldParse gold, action):
cdef Transition t = self.lookup_transition(action) cdef Transition t = self.lookup_transition(action)

View File

@ -80,9 +80,9 @@ cdef class BiluoPushDown(TransitionSystem):
actions[action][label] += 1 actions[action][label] += 1
return actions return actions
property action_types: @property
def __get__(self): def action_types(self):
return (BEGIN, IN, LAST, UNIT, OUT) return (BEGIN, IN, LAST, UNIT, OUT)
def move_name(self, int move, attr_t label): def move_name(self, int move, attr_t label):
if move == OUT: if move == OUT:

View File

@ -384,7 +384,8 @@ cdef class Doc:
xp = get_array_module(vector) xp = get_array_module(vector)
return xp.dot(vector, other.vector) / (self.vector_norm * other.vector_norm) return xp.dot(vector, other.vector) / (self.vector_norm * other.vector_norm)
property has_vector: @property
def has_vector(self):
"""A boolean value indicating whether a word vector is associated with """A boolean value indicating whether a word vector is associated with
the object. the object.
@ -392,15 +393,14 @@ cdef class Doc:
DOCS: https://spacy.io/api/doc#has_vector DOCS: https://spacy.io/api/doc#has_vector
""" """
def __get__(self): if "has_vector" in self.user_hooks:
if "has_vector" in self.user_hooks: return self.user_hooks["has_vector"](self)
return self.user_hooks["has_vector"](self) elif self.vocab.vectors.data.size:
elif self.vocab.vectors.data.size: return True
return True elif self.tensor.size:
elif self.tensor.size: return True
return True else:
else: return False
return False
property vector: property vector:
"""A real-valued meaning representation. Defaults to an average of the """A real-valued meaning representation. Defaults to an average of the
@ -453,22 +453,22 @@ cdef class Doc:
def __set__(self, value): def __set__(self, value):
self._vector_norm = value self._vector_norm = value
property text: @property
def text(self):
"""A unicode representation of the document text. """A unicode representation of the document text.
RETURNS (unicode): The original verbatim text of the document. RETURNS (unicode): The original verbatim text of the document.
""" """
def __get__(self): return "".join(t.text_with_ws for t in self)
return "".join(t.text_with_ws for t in self)
property text_with_ws: @property
def text_with_ws(self):
"""An alias of `Doc.text`, provided for duck-type compatibility with """An alias of `Doc.text`, provided for duck-type compatibility with
`Span` and `Token`. `Span` and `Token`.
RETURNS (unicode): The original verbatim text of the document. RETURNS (unicode): The original verbatim text of the document.
""" """
def __get__(self): return self.text
return self.text
property ents: property ents:
"""The named entities in the document. Returns a tuple of named entity """The named entities in the document. Returns a tuple of named entity
@ -545,7 +545,8 @@ cdef class Doc:
# Set start as B # Set start as B
self.c[start].ent_iob = 3 self.c[start].ent_iob = 3
property noun_chunks: @property
def noun_chunks(self):
"""Iterate over the base noun phrases in the document. Yields base """Iterate over the base noun phrases in the document. Yields base
noun-phrase #[code Span] objects, if the document has been noun-phrase #[code Span] objects, if the document has been
syntactically parsed. A base noun phrase, or "NP chunk", is a noun syntactically parsed. A base noun phrase, or "NP chunk", is a noun
@ -557,22 +558,22 @@ cdef class Doc:
DOCS: https://spacy.io/api/doc#noun_chunks DOCS: https://spacy.io/api/doc#noun_chunks
""" """
def __get__(self): if not self.is_parsed:
if not self.is_parsed: raise ValueError(Errors.E029)
raise ValueError(Errors.E029) # Accumulate the result before beginning to iterate over it. This
# Accumulate the result before beginning to iterate over it. This # prevents the tokenisation from being changed out from under us
# prevents the tokenisation from being changed out from under us # during the iteration. The tricky thing here is that Span accepts
# during the iteration. The tricky thing here is that Span accepts # its tokenisation changing, so it's okay once we have the Span
# its tokenisation changing, so it's okay once we have the Span # objects. See Issue #375.
# objects. See Issue #375. spans = []
spans = [] if self.noun_chunks_iterator is not None:
if self.noun_chunks_iterator is not None: for start, end, label in self.noun_chunks_iterator(self):
for start, end, label in self.noun_chunks_iterator(self): spans.append(Span(self, start, end, label=label))
spans.append(Span(self, start, end, label=label)) for span in spans:
for span in spans: yield span
yield span
property sents: @property
def sents(self):
"""Iterate over the sentences in the document. Yields sentence `Span` """Iterate over the sentences in the document. Yields sentence `Span`
objects. Sentence spans have no label. To improve accuracy on informal objects. Sentence spans have no label. To improve accuracy on informal
texts, spaCy calculates sentence boundaries from the syntactic texts, spaCy calculates sentence boundaries from the syntactic
@ -583,19 +584,18 @@ cdef class Doc:
DOCS: https://spacy.io/api/doc#sents DOCS: https://spacy.io/api/doc#sents
""" """
def __get__(self): if not self.is_sentenced:
if not self.is_sentenced: raise ValueError(Errors.E030)
raise ValueError(Errors.E030) if "sents" in self.user_hooks:
if "sents" in self.user_hooks: yield from self.user_hooks["sents"](self)
yield from self.user_hooks["sents"](self) else:
else: start = 0
start = 0 for i in range(1, self.length):
for i in range(1, self.length): if self.c[i].sent_start == 1:
if self.c[i].sent_start == 1: yield Span(self, start, i)
yield Span(self, start, i) start = i
start = i if start != self.length:
if start != self.length: yield Span(self, start, self.length)
yield Span(self, start, self.length)
@property @property
def lang(self): def lang(self):

View File

@ -322,46 +322,47 @@ cdef class Span:
self.start = start self.start = start
self.end = end + 1 self.end = end + 1
property vocab: @property
def vocab(self):
"""RETURNS (Vocab): The Span's Doc's vocab.""" """RETURNS (Vocab): The Span's Doc's vocab."""
def __get__(self): return self.doc.vocab
return self.doc.vocab
property sent: @property
def sent(self):
"""RETURNS (Span): The sentence span that the span is a part of.""" """RETURNS (Span): The sentence span that the span is a part of."""
def __get__(self): if "sent" in self.doc.user_span_hooks:
if "sent" in self.doc.user_span_hooks: return self.doc.user_span_hooks["sent"](self)
return self.doc.user_span_hooks["sent"](self) # This should raise if not parsed / no custom sentence boundaries
# This should raise if not parsed / no custom sentence boundaries self.doc.sents
self.doc.sents # If doc is parsed we can use the deps to find the sentence
# If doc is parsed we can use the deps to find the sentence # otherwise we use the `sent_start` token attribute
# otherwise we use the `sent_start` token attribute cdef int n = 0
cdef int n = 0 cdef int i
cdef int i if self.doc.is_parsed:
if self.doc.is_parsed: root = &self.doc.c[self.start]
root = &self.doc.c[self.start] while root.head != 0:
while root.head != 0: root += root.head
root += root.head n += 1
n += 1 if n >= self.doc.length:
if n >= self.doc.length: raise RuntimeError(Errors.E038)
raise RuntimeError(Errors.E038) return self.doc[root.l_edge:root.r_edge + 1]
return self.doc[root.l_edge:root.r_edge + 1] elif self.doc.is_sentenced:
elif self.doc.is_sentenced: # Find start of the sentence
# Find start of the sentence start = self.start
start = self.start while self.doc.c[start].sent_start != 1 and start > 0:
while self.doc.c[start].sent_start != 1 and start > 0: start += -1
start += -1 # Find end of the sentence
# Find end of the sentence end = self.end
end = self.end n = 0
n = 0 while end < self.doc.length and self.doc.c[end].sent_start != 1:
while end < self.doc.length and self.doc.c[end].sent_start != 1: end += 1
end += 1 n += 1
n += 1 if n >= self.doc.length:
if n >= self.doc.length: break
break return self.doc[start:end]
return self.doc[start:end]
property ents: @property
def ents(self):
"""The named entities in the span. Returns a tuple of named entity """The named entities in the span. Returns a tuple of named entity
`Span` objects, if the entity recognizer has been applied. `Span` objects, if the entity recognizer has been applied.
@ -369,14 +370,14 @@ cdef class Span:
DOCS: https://spacy.io/api/span#ents DOCS: https://spacy.io/api/span#ents
""" """
def __get__(self): ents = []
ents = [] for ent in self.doc.ents:
for ent in self.doc.ents: if ent.start >= self.start and ent.end <= self.end:
if ent.start >= self.start and ent.end <= self.end: ents.append(ent)
ents.append(ent) return ents
return ents
property has_vector: @property
def has_vector(self):
"""A boolean value indicating whether a word vector is associated with """A boolean value indicating whether a word vector is associated with
the object. the object.
@ -384,17 +385,17 @@ cdef class Span:
DOCS: https://spacy.io/api/span#has_vector DOCS: https://spacy.io/api/span#has_vector
""" """
def __get__(self): if "has_vector" in self.doc.user_span_hooks:
if "has_vector" in self.doc.user_span_hooks: return self.doc.user_span_hooks["has_vector"](self)
return self.doc.user_span_hooks["has_vector"](self) elif self.vocab.vectors.data.size > 0:
elif self.vocab.vectors.data.size > 0: return any(token.has_vector for token in self)
return any(token.has_vector for token in self) elif self.doc.tensor.size > 0:
elif self.doc.tensor.size > 0: return True
return True else:
else: return False
return False
property vector: @property
def vector(self):
"""A real-valued meaning representation. Defaults to an average of the """A real-valued meaning representation. Defaults to an average of the
token vectors. token vectors.
@ -403,61 +404,61 @@ cdef class Span:
DOCS: https://spacy.io/api/span#vector DOCS: https://spacy.io/api/span#vector
""" """
def __get__(self): if "vector" in self.doc.user_span_hooks:
if "vector" in self.doc.user_span_hooks: return self.doc.user_span_hooks["vector"](self)
return self.doc.user_span_hooks["vector"](self) if self._vector is None:
if self._vector is None: self._vector = sum(t.vector for t in self) / len(self)
self._vector = sum(t.vector for t in self) / len(self) return self._vector
return self._vector
property vector_norm: @property
def vector_norm(self):
"""The L2 norm of the span's vector representation. """The L2 norm of the span's vector representation.
RETURNS (float): The L2 norm of the vector representation. RETURNS (float): The L2 norm of the vector representation.
DOCS: https://spacy.io/api/span#vector_norm DOCS: https://spacy.io/api/span#vector_norm
""" """
def __get__(self): if "vector_norm" in self.doc.user_span_hooks:
if "vector_norm" in self.doc.user_span_hooks: return self.doc.user_span_hooks["vector"](self)
return self.doc.user_span_hooks["vector"](self) cdef float value
cdef float value cdef double norm = 0
cdef double norm = 0 if self._vector_norm is None:
if self._vector_norm is None: norm = 0
norm = 0 for value in self.vector:
for value in self.vector: norm += value * value
norm += value * value self._vector_norm = sqrt(norm) if norm != 0 else 0
self._vector_norm = sqrt(norm) if norm != 0 else 0 return self._vector_norm
return self._vector_norm
property sentiment: @property
def sentiment(self):
"""RETURNS (float): A scalar value indicating the positivity or """RETURNS (float): A scalar value indicating the positivity or
negativity of the span. negativity of the span.
""" """
def __get__(self): if "sentiment" in self.doc.user_span_hooks:
if "sentiment" in self.doc.user_span_hooks: return self.doc.user_span_hooks["sentiment"](self)
return self.doc.user_span_hooks["sentiment"](self) else:
else: return sum([token.sentiment for token in self]) / len(self)
return sum([token.sentiment for token in self]) / len(self)
property text: @property
def text(self):
"""RETURNS (unicode): The original verbatim text of the span.""" """RETURNS (unicode): The original verbatim text of the span."""
def __get__(self): text = self.text_with_ws
text = self.text_with_ws if self[-1].whitespace_:
if self[-1].whitespace_: text = text[:-1]
text = text[:-1] return text
return text
property text_with_ws: @property
def text_with_ws(self):
"""The text content of the span with a trailing whitespace character if """The text content of the span with a trailing whitespace character if
the last token has one. the last token has one.
RETURNS (unicode): The text content of the span (with trailing RETURNS (unicode): The text content of the span (with trailing
whitespace). whitespace).
""" """
def __get__(self): return "".join([t.text_with_ws for t in self])
return "".join([t.text_with_ws for t in self])
property noun_chunks: @property
def noun_chunks(self):
"""Yields base noun-phrase `Span` objects, if the document has been """Yields base noun-phrase `Span` objects, if the document has been
syntactically parsed. A base noun phrase, or "NP chunk", is a noun syntactically parsed. A base noun phrase, or "NP chunk", is a noun
phrase that does not permit other NPs to be nested within it so no phrase that does not permit other NPs to be nested within it so no
@ -468,23 +469,23 @@ cdef class Span:
DOCS: https://spacy.io/api/span#noun_chunks DOCS: https://spacy.io/api/span#noun_chunks
""" """
def __get__(self): if not self.doc.is_parsed:
if not self.doc.is_parsed: raise ValueError(Errors.E029)
raise ValueError(Errors.E029) # Accumulate the result before beginning to iterate over it. This
# Accumulate the result before beginning to iterate over it. This # prevents the tokenisation from being changed out from under us
# prevents the tokenisation from being changed out from under us # during the iteration. The tricky thing here is that Span accepts
# during the iteration. The tricky thing here is that Span accepts # its tokenisation changing, so it's okay once we have the Span
# its tokenisation changing, so it's okay once we have the Span # objects. See Issue #375
# objects. See Issue #375 spans = []
spans = [] cdef attr_t label
cdef attr_t label if self.doc.noun_chunks_iterator is not None:
if self.doc.noun_chunks_iterator is not None: for start, end, label in self.doc.noun_chunks_iterator(self):
for start, end, label in self.doc.noun_chunks_iterator(self): spans.append(Span(self.doc, start, end, label=label))
spans.append(Span(self.doc, start, end, label=label)) for span in spans:
for span in spans: yield span
yield span
property root: @property
def root(self):
"""The token with the shortest path to the root of the """The token with the shortest path to the root of the
sentence (or the root itself). If multiple tokens are equally sentence (or the root itself). If multiple tokens are equally
high in the tree, the first token is taken. high in the tree, the first token is taken.
@ -493,41 +494,41 @@ cdef class Span:
DOCS: https://spacy.io/api/span#root DOCS: https://spacy.io/api/span#root
""" """
def __get__(self): self._recalculate_indices()
self._recalculate_indices() if "root" in self.doc.user_span_hooks:
if "root" in self.doc.user_span_hooks: return self.doc.user_span_hooks["root"](self)
return self.doc.user_span_hooks["root"](self) # This should probably be called 'head', and the other one called
# This should probably be called 'head', and the other one called # 'gov'. But we went with 'head' elsehwhere, and now we're stuck =/
# 'gov'. But we went with 'head' elsehwhere, and now we're stuck =/ cdef int i
cdef int i # First, we scan through the Span, and check whether there's a word
# First, we scan through the Span, and check whether there's a word # with head==0, i.e. a sentence root. If so, we can return it. The
# with head==0, i.e. a sentence root. If so, we can return it. The # longer the span, the more likely it contains a sentence root, and
# longer the span, the more likely it contains a sentence root, and # in this case we return in linear time.
# in this case we return in linear time. for i in range(self.start, self.end):
for i in range(self.start, self.end): if self.doc.c[i].head == 0:
if self.doc.c[i].head == 0: return self.doc[i]
return self.doc[i] # If we don't have a sentence root, we do something that's not so
# If we don't have a sentence root, we do something that's not so # algorithmically clever, but I think should be quite fast,
# algorithmically clever, but I think should be quite fast, # especially for short spans.
# especially for short spans. # For each word, we count the path length, and arg min this measure.
# For each word, we count the path length, and arg min this measure. # We could use better tree logic to save steps here...But I
# We could use better tree logic to save steps here...But I # think this should be okay.
# think this should be okay. cdef int current_best = self.doc.length
cdef int current_best = self.doc.length cdef int root = -1
cdef int root = -1 for i in range(self.start, self.end):
for i in range(self.start, self.end): if self.start <= (i+self.doc.c[i].head) < self.end:
if self.start <= (i+self.doc.c[i].head) < self.end: continue
continue words_to_root = _count_words_to_root(&self.doc.c[i], self.doc.length)
words_to_root = _count_words_to_root(&self.doc.c[i], self.doc.length) if words_to_root < current_best:
if words_to_root < current_best: current_best = words_to_root
current_best = words_to_root root = i
root = i if root == -1:
if root == -1: return self.doc[self.start]
return self.doc[self.start] else:
else: return self.doc[root]
return self.doc[root]
property lefts: @property
def lefts(self):
"""Tokens that are to the left of the span, whose head is within the """Tokens that are to the left of the span, whose head is within the
`Span`. `Span`.
@ -535,13 +536,13 @@ cdef class Span:
DOCS: https://spacy.io/api/span#lefts DOCS: https://spacy.io/api/span#lefts
""" """
def __get__(self): for token in reversed(self): # Reverse, so we get tokens in order
for token in reversed(self): # Reverse, so we get tokens in order for left in token.lefts:
for left in token.lefts: if left.i < self.start:
if left.i < self.start: yield left
yield left
property rights: @property
def rights(self):
"""Tokens that are to the right of the Span, whose head is within the """Tokens that are to the right of the Span, whose head is within the
`Span`. `Span`.
@ -549,13 +550,13 @@ cdef class Span:
DOCS: https://spacy.io/api/span#rights DOCS: https://spacy.io/api/span#rights
""" """
def __get__(self): for token in self:
for token in self: for right in token.rights:
for right in token.rights: if right.i >= self.end:
if right.i >= self.end: yield right
yield right
property n_lefts: @property
def n_lefts(self):
"""The number of tokens that are to the left of the span, whose """The number of tokens that are to the left of the span, whose
heads are within the span. heads are within the span.
@ -564,10 +565,10 @@ cdef class Span:
DOCS: https://spacy.io/api/span#n_lefts DOCS: https://spacy.io/api/span#n_lefts
""" """
def __get__(self): return len(list(self.lefts))
return len(list(self.lefts))
property n_rights: @property
def n_rights(self):
"""The number of tokens that are to the right of the span, whose """The number of tokens that are to the right of the span, whose
heads are within the span. heads are within the span.
@ -576,22 +577,21 @@ cdef class Span:
DOCS: https://spacy.io/api/span#n_rights DOCS: https://spacy.io/api/span#n_rights
""" """
def __get__(self): return len(list(self.rights))
return len(list(self.rights))
property subtree: @property
def subtree(self):
"""Tokens within the span and tokens which descend from them. """Tokens within the span and tokens which descend from them.
YIELDS (Token): A token within the span, or a descendant from it. YIELDS (Token): A token within the span, or a descendant from it.
DOCS: https://spacy.io/api/span#subtree DOCS: https://spacy.io/api/span#subtree
""" """
def __get__(self): for word in self.lefts:
for word in self.lefts: yield from word.subtree
yield from word.subtree yield from self
yield from self for word in self.rights:
for word in self.rights: yield from word.subtree
yield from word.subtree
property ent_id: property ent_id:
"""RETURNS (uint64): The entity ID.""" """RETURNS (uint64): The entity ID."""
@ -609,33 +609,33 @@ cdef class Span:
def __set__(self, hash_t key): def __set__(self, hash_t key):
raise NotImplementedError(TempErrors.T007.format(attr="ent_id_")) raise NotImplementedError(TempErrors.T007.format(attr="ent_id_"))
property orth_: @property
def orth_(self):
"""Verbatim text content (identical to `Span.text`). Exists mostly for """Verbatim text content (identical to `Span.text`). Exists mostly for
consistency with other attributes. consistency with other attributes.
RETURNS (unicode): The span's text.""" RETURNS (unicode): The span's text."""
def __get__(self): return self.text
return self.text
property lemma_: @property
def lemma_(self):
"""RETURNS (unicode): The span's lemma.""" """RETURNS (unicode): The span's lemma."""
def __get__(self): return " ".join([t.lemma_ for t in self]).strip()
return " ".join([t.lemma_ for t in self]).strip()
property upper_: @property
def upper_(self):
"""Deprecated. Use `Span.text.upper()` instead.""" """Deprecated. Use `Span.text.upper()` instead."""
def __get__(self): return "".join([t.text_with_ws.upper() for t in self]).strip()
return "".join([t.text_with_ws.upper() for t in self]).strip()
property lower_: @property
def lower_(self):
"""Deprecated. Use `Span.text.lower()` instead.""" """Deprecated. Use `Span.text.lower()` instead."""
def __get__(self): return "".join([t.text_with_ws.lower() for t in self]).strip()
return "".join([t.text_with_ws.lower() for t in self]).strip()
property string: @property
def string(self):
"""Deprecated: Use `Span.text_with_ws` instead.""" """Deprecated: Use `Span.text_with_ws` instead."""
def __get__(self): return "".join([t.text_with_ws for t in self])
return "".join([t.text_with_ws for t in self])
property label_: property label_:
"""RETURNS (unicode): The span's label.""" """RETURNS (unicode): The span's label."""

View File

@ -218,111 +218,111 @@ cdef class Token:
xp = get_array_module(vector) xp = get_array_module(vector)
return (xp.dot(vector, other.vector) / (self.vector_norm * other.vector_norm)) return (xp.dot(vector, other.vector) / (self.vector_norm * other.vector_norm))
property lex_id: @property
def lex_id(self):
"""RETURNS (int): Sequential ID of the token's lexical type.""" """RETURNS (int): Sequential ID of the token's lexical type."""
def __get__(self): return self.c.lex.id
return self.c.lex.id
property rank: @property
def rank(self):
"""RETURNS (int): Sequential ID of the token's lexical type, used to """RETURNS (int): Sequential ID of the token's lexical type, used to
index into tables, e.g. for word vectors.""" index into tables, e.g. for word vectors."""
def __get__(self): return self.c.lex.id
return self.c.lex.id
property string: @property
def string(self):
"""Deprecated: Use Token.text_with_ws instead.""" """Deprecated: Use Token.text_with_ws instead."""
def __get__(self): return self.text_with_ws
return self.text_with_ws
property text: @property
def text(self):
"""RETURNS (unicode): The original verbatim text of the token.""" """RETURNS (unicode): The original verbatim text of the token."""
def __get__(self): return self.orth_
return self.orth_
property text_with_ws: @property
def text_with_ws(self):
"""RETURNS (unicode): The text content of the span (with trailing """RETURNS (unicode): The text content of the span (with trailing
whitespace). whitespace).
""" """
def __get__(self): cdef unicode orth = self.vocab.strings[self.c.lex.orth]
cdef unicode orth = self.vocab.strings[self.c.lex.orth] if self.c.spacy:
if self.c.spacy: return orth + " "
return orth + " " else:
else: return orth
return orth
property prob: @property
def prob(self):
"""RETURNS (float): Smoothed log probability estimate of token type.""" """RETURNS (float): Smoothed log probability estimate of token type."""
def __get__(self): return self.c.lex.prob
return self.c.lex.prob
property sentiment: @property
def sentiment(self):
"""RETURNS (float): A scalar value indicating the positivity or """RETURNS (float): A scalar value indicating the positivity or
negativity of the token.""" negativity of the token."""
def __get__(self): if "sentiment" in self.doc.user_token_hooks:
if "sentiment" in self.doc.user_token_hooks: return self.doc.user_token_hooks["sentiment"](self)
return self.doc.user_token_hooks["sentiment"](self) return self.c.lex.sentiment
return self.c.lex.sentiment
property lang: @property
def lang(self):
"""RETURNS (uint64): ID of the language of the parent document's """RETURNS (uint64): ID of the language of the parent document's
vocabulary. vocabulary.
""" """
def __get__(self): return self.c.lex.lang
return self.c.lex.lang
property idx: @property
def idx(self):
"""RETURNS (int): The character offset of the token within the parent """RETURNS (int): The character offset of the token within the parent
document. document.
""" """
def __get__(self): return self.c.idx
return self.c.idx
property cluster: @property
def cluster(self):
"""RETURNS (int): Brown cluster ID.""" """RETURNS (int): Brown cluster ID."""
def __get__(self): return self.c.lex.cluster
return self.c.lex.cluster
property orth: @property
def orth(self):
"""RETURNS (uint64): ID of the verbatim text content.""" """RETURNS (uint64): ID of the verbatim text content."""
def __get__(self): return self.c.lex.orth
return self.c.lex.orth
property lower: @property
def lower(self):
"""RETURNS (uint64): ID of the lowercase token text.""" """RETURNS (uint64): ID of the lowercase token text."""
def __get__(self): return self.c.lex.lower
return self.c.lex.lower
property norm: @property
def norm(self):
"""RETURNS (uint64): ID of the token's norm, i.e. a normalised form of """RETURNS (uint64): ID of the token's norm, i.e. a normalised form of
the token text. Usually set in the language's tokenizer exceptions the token text. Usually set in the language's tokenizer exceptions
or norm exceptions. or norm exceptions.
""" """
def __get__(self): if self.c.norm == 0:
if self.c.norm == 0: return self.c.lex.norm
return self.c.lex.norm else:
else: return self.c.norm
return self.c.norm
property shape: @property
def shape(self):
"""RETURNS (uint64): ID of the token's shape, a transform of the """RETURNS (uint64): ID of the token's shape, a transform of the
tokens's string, to show orthographic features (e.g. "Xxxx", "dd"). tokens's string, to show orthographic features (e.g. "Xxxx", "dd").
""" """
def __get__(self): return self.c.lex.shape
return self.c.lex.shape
property prefix: @property
def prefix(self):
"""RETURNS (uint64): ID of a length-N substring from the start of the """RETURNS (uint64): ID of a length-N substring from the start of the
token. Defaults to `N=1`. token. Defaults to `N=1`.
""" """
def __get__(self): return self.c.lex.prefix
return self.c.lex.prefix
property suffix: @property
def suffix(self):
"""RETURNS (uint64): ID of a length-N substring from the end of the """RETURNS (uint64): ID of a length-N substring from the end of the
token. Defaults to `N=3`. token. Defaults to `N=3`.
""" """
def __get__(self): return self.c.lex.suffix
return self.c.lex.suffix
property lemma: property lemma:
"""RETURNS (uint64): ID of the base form of the word, with no """RETURNS (uint64): ID of the base form of the word, with no
@ -362,7 +362,8 @@ cdef class Token:
def __set__(self, attr_t label): def __set__(self, attr_t label):
self.c.dep = label self.c.dep = label
property has_vector: @property
def has_vector(self):
"""A boolean value indicating whether a word vector is associated with """A boolean value indicating whether a word vector is associated with
the object. the object.
@ -370,14 +371,14 @@ cdef class Token:
DOCS: https://spacy.io/api/token#has_vector DOCS: https://spacy.io/api/token#has_vector
""" """
def __get__(self): if "has_vector" in self.doc.user_token_hooks:
if 'has_vector' in self.doc.user_token_hooks: return self.doc.user_token_hooks["has_vector"](self)
return self.doc.user_token_hooks["has_vector"](self) if self.vocab.vectors.size == 0 and self.doc.tensor.size != 0:
if self.vocab.vectors.size == 0 and self.doc.tensor.size != 0: return True
return True return self.vocab.has_vector(self.c.lex.orth)
return self.vocab.has_vector(self.c.lex.orth)
property vector: @property
def vector(self):
"""A real-valued meaning representation. """A real-valued meaning representation.
RETURNS (numpy.ndarray[ndim=1, dtype='float32']): A 1D numpy array RETURNS (numpy.ndarray[ndim=1, dtype='float32']): A 1D numpy array
@ -385,28 +386,28 @@ cdef class Token:
DOCS: https://spacy.io/api/token#vector DOCS: https://spacy.io/api/token#vector
""" """
def __get__(self): if "vector" in self.doc.user_token_hooks:
if 'vector' in self.doc.user_token_hooks: return self.doc.user_token_hooks["vector"](self)
return self.doc.user_token_hooks["vector"](self) if self.vocab.vectors.size == 0 and self.doc.tensor.size != 0:
if self.vocab.vectors.size == 0 and self.doc.tensor.size != 0: return self.doc.tensor[self.i]
return self.doc.tensor[self.i] else:
else: return self.vocab.get_vector(self.c.lex.orth)
return self.vocab.get_vector(self.c.lex.orth)
property vector_norm: @property
def vector_norm(self):
"""The L2 norm of the token's vector representation. """The L2 norm of the token's vector representation.
RETURNS (float): The L2 norm of the vector representation. RETURNS (float): The L2 norm of the vector representation.
DOCS: https://spacy.io/api/token#vector_norm DOCS: https://spacy.io/api/token#vector_norm
""" """
def __get__(self): if "vector_norm" in self.doc.user_token_hooks:
if 'vector_norm' in self.doc.user_token_hooks: return self.doc.user_token_hooks["vector_norm"](self)
return self.doc.user_token_hooks["vector_norm"](self) vector = self.vector
vector = self.vector return numpy.sqrt((vector ** 2).sum())
return numpy.sqrt((vector ** 2).sum())
property n_lefts: @property
def n_lefts(self):
"""The number of leftward immediate children of the word, in the """The number of leftward immediate children of the word, in the
syntactic dependency parse. syntactic dependency parse.
@ -415,10 +416,10 @@ cdef class Token:
DOCS: https://spacy.io/api/token#n_lefts DOCS: https://spacy.io/api/token#n_lefts
""" """
def __get__(self): return self.c.l_kids
return self.c.l_kids
property n_rights: @property
def n_rights(self):
"""The number of rightward immediate children of the word, in the """The number of rightward immediate children of the word, in the
syntactic dependency parse. syntactic dependency parse.
@ -427,15 +428,14 @@ cdef class Token:
DOCS: https://spacy.io/api/token#n_rights DOCS: https://spacy.io/api/token#n_rights
""" """
def __get__(self): return self.c.r_kids
return self.c.r_kids
property sent: @property
def sent(self):
"""RETURNS (Span): The sentence span that the token is a part of.""" """RETURNS (Span): The sentence span that the token is a part of."""
def __get__(self): if 'sent' in self.doc.user_token_hooks:
if 'sent' in self.doc.user_token_hooks: return self.doc.user_token_hooks["sent"](self)
return self.doc.user_token_hooks["sent"](self) return self.doc[self.i : self.i+1].sent
return self.doc[self.i : self.i+1].sent
property sent_start: property sent_start:
def __get__(self): def __get__(self):
@ -479,7 +479,8 @@ cdef class Token:
else: else:
raise ValueError(Errors.E044.format(value=value)) raise ValueError(Errors.E044.format(value=value))
property lefts: @property
def lefts(self):
"""The leftward immediate children of the word, in the syntactic """The leftward immediate children of the word, in the syntactic
dependency parse. dependency parse.
@ -487,19 +488,19 @@ cdef class Token:
DOCS: https://spacy.io/api/token#lefts DOCS: https://spacy.io/api/token#lefts
""" """
def __get__(self): cdef int nr_iter = 0
cdef int nr_iter = 0 cdef const TokenC* ptr = self.c - (self.i - self.c.l_edge)
cdef const TokenC* ptr = self.c - (self.i - self.c.l_edge) while ptr < self.c:
while ptr < self.c: if ptr + ptr.head == self.c:
if ptr + ptr.head == self.c: yield self.doc[ptr - (self.c - self.i)]
yield self.doc[ptr - (self.c - self.i)] ptr += 1
ptr += 1 nr_iter += 1
nr_iter += 1 # This is ugly, but it's a way to guard out infinite loops
# This is ugly, but it's a way to guard out infinite loops if nr_iter >= 10000000:
if nr_iter >= 10000000: raise RuntimeError(Errors.E045.format(attr="token.lefts"))
raise RuntimeError(Errors.E045.format(attr="token.lefts"))
property rights: @property
def rights(self):
"""The rightward immediate children of the word, in the syntactic """The rightward immediate children of the word, in the syntactic
dependency parse. dependency parse.
@ -507,33 +508,33 @@ cdef class Token:
DOCS: https://spacy.io/api/token#rights DOCS: https://spacy.io/api/token#rights
""" """
def __get__(self): cdef const TokenC* ptr = self.c + (self.c.r_edge - self.i)
cdef const TokenC* ptr = self.c + (self.c.r_edge - self.i) tokens = []
tokens = [] cdef int nr_iter = 0
cdef int nr_iter = 0 while ptr > self.c:
while ptr > self.c: if ptr + ptr.head == self.c:
if ptr + ptr.head == self.c: tokens.append(self.doc[ptr - (self.c - self.i)])
tokens.append(self.doc[ptr - (self.c - self.i)]) ptr -= 1
ptr -= 1 nr_iter += 1
nr_iter += 1 if nr_iter >= 10000000:
if nr_iter >= 10000000: raise RuntimeError(Errors.E045.format(attr="token.rights"))
raise RuntimeError(Errors.E045.format(attr="token.rights")) tokens.reverse()
tokens.reverse() for t in tokens:
for t in tokens: yield t
yield t
property children: @property
def children(self):
"""A sequence of the token's immediate syntactic children. """A sequence of the token's immediate syntactic children.
YIELDS (Token): A child token such that `child.head==self`. YIELDS (Token): A child token such that `child.head==self`.
DOCS: https://spacy.io/api/token#children DOCS: https://spacy.io/api/token#children
""" """
def __get__(self): yield from self.lefts
yield from self.lefts yield from self.rights
yield from self.rights
property subtree: @property
def subtree(self):
"""A sequence containing the token and all the token's syntactic """A sequence containing the token and all the token's syntactic
descendants. descendants.
@ -542,30 +543,30 @@ cdef class Token:
DOCS: https://spacy.io/api/token#subtree DOCS: https://spacy.io/api/token#subtree
""" """
def __get__(self): for word in self.lefts:
for word in self.lefts: yield from word.subtree
yield from word.subtree yield self
yield self for word in self.rights:
for word in self.rights: yield from word.subtree
yield from word.subtree
property left_edge: @property
def left_edge(self):
"""The leftmost token of this token's syntactic descendents. """The leftmost token of this token's syntactic descendents.
RETURNS (Token): The first token such that `self.is_ancestor(token)`. RETURNS (Token): The first token such that `self.is_ancestor(token)`.
""" """
def __get__(self): return self.doc[self.c.l_edge]
return self.doc[self.c.l_edge]
property right_edge: @property
def right_edge(self):
"""The rightmost token of this token's syntactic descendents. """The rightmost token of this token's syntactic descendents.
RETURNS (Token): The last token such that `self.is_ancestor(token)`. RETURNS (Token): The last token such that `self.is_ancestor(token)`.
""" """
def __get__(self): return self.doc[self.c.r_edge]
return self.doc[self.c.r_edge]
property ancestors: @property
def ancestors(self):
"""A sequence of this token's syntactic ancestors. """A sequence of this token's syntactic ancestors.
YIELDS (Token): A sequence of ancestor tokens such that YIELDS (Token): A sequence of ancestor tokens such that
@ -573,15 +574,14 @@ cdef class Token:
DOCS: https://spacy.io/api/token#ancestors DOCS: https://spacy.io/api/token#ancestors
""" """
def __get__(self): cdef const TokenC* head_ptr = self.c
cdef const TokenC* head_ptr = self.c # Guard against infinite loop, no token can have
# Guard against infinite loop, no token can have # more ancestors than tokens in the tree.
# more ancestors than tokens in the tree. cdef int i = 0
cdef int i = 0 while head_ptr.head != 0 and i < self.doc.length:
while head_ptr.head != 0 and i < self.doc.length: head_ptr += head_ptr.head
head_ptr += head_ptr.head yield self.doc[head_ptr - (self.c - self.i)]
yield self.doc[head_ptr - (self.c - self.i)] i += 1
i += 1
def is_ancestor(self, descendant): def is_ancestor(self, descendant):
"""Check whether this token is a parent, grandparent, etc. of another """Check whether this token is a parent, grandparent, etc. of another
@ -685,23 +685,23 @@ cdef class Token:
# Set new head # Set new head
self.c.head = rel_newhead_i self.c.head = rel_newhead_i
property conjuncts: @property
def conjuncts(self):
"""A sequence of coordinated tokens, including the token itself. """A sequence of coordinated tokens, including the token itself.
YIELDS (Token): A coordinated token. YIELDS (Token): A coordinated token.
DOCS: https://spacy.io/api/token#conjuncts DOCS: https://spacy.io/api/token#conjuncts
""" """
def __get__(self): cdef Token word
cdef Token word if "conjuncts" in self.doc.user_token_hooks:
if "conjuncts" in self.doc.user_token_hooks: yield from self.doc.user_token_hooks["conjuncts"](self)
yield from self.doc.user_token_hooks["conjuncts"](self) else:
else: if self.dep != conj:
if self.dep != conj: for word in self.rights:
for word in self.rights: if word.dep == conj:
if word.dep == conj: yield word
yield word yield from word.conjuncts
yield from word.conjuncts
property ent_type: property ent_type:
"""RETURNS (uint64): Named entity type.""" """RETURNS (uint64): Named entity type."""
@ -711,15 +711,6 @@ cdef class Token:
def __set__(self, ent_type): def __set__(self, ent_type):
self.c.ent_type = ent_type self.c.ent_type = ent_type
property ent_iob:
"""IOB code of named entity tag. `1="I", 2="O", 3="B"`. 0 means no tag
is assigned.
RETURNS (uint64): IOB code of named entity tag.
"""
def __get__(self):
return self.c.ent_iob
property ent_type_: property ent_type_:
"""RETURNS (unicode): Named entity type.""" """RETURNS (unicode): Named entity type."""
def __get__(self): def __get__(self):
@ -728,16 +719,25 @@ cdef class Token:
def __set__(self, ent_type): def __set__(self, ent_type):
self.c.ent_type = self.vocab.strings.add(ent_type) self.c.ent_type = self.vocab.strings.add(ent_type)
property ent_iob_: @property
def ent_iob(self):
"""IOB code of named entity tag. `1="I", 2="O", 3="B"`. 0 means no tag
is assigned.
RETURNS (uint64): IOB code of named entity tag.
"""
return self.c.ent_iob
@property
def ent_iob_(self):
"""IOB code of named entity tag. "B" means the token begins an entity, """IOB code of named entity tag. "B" means the token begins an entity,
"I" means it is inside an entity, "O" means it is outside an entity, "I" means it is inside an entity, "O" means it is outside an entity,
and "" means no entity tag is set. and "" means no entity tag is set.
RETURNS (unicode): IOB code of named entity tag. RETURNS (unicode): IOB code of named entity tag.
""" """
def __get__(self): iob_strings = ("", "I", "O", "B")
iob_strings = ("", "I", "O", "B") return iob_strings[self.c.ent_iob]
return iob_strings[self.c.ent_iob]
property ent_id: property ent_id:
"""RETURNS (uint64): ID of the entity the token is an instance of, """RETURNS (uint64): ID of the entity the token is an instance of,
@ -759,26 +759,25 @@ cdef class Token:
def __set__(self, name): def __set__(self, name):
self.c.ent_id = self.vocab.strings.add(name) self.c.ent_id = self.vocab.strings.add(name)
property whitespace_: @property
"""RETURNS (unicode): The trailing whitespace character, if present. def whitespace_(self):
""" """RETURNS (unicode): The trailing whitespace character, if present."""
def __get__(self): return " " if self.c.spacy else ""
return " " if self.c.spacy else ""
property orth_: @property
def orth_(self):
"""RETURNS (unicode): Verbatim text content (identical to """RETURNS (unicode): Verbatim text content (identical to
`Token.text`). Exists mostly for consistency with the other `Token.text`). Exists mostly for consistency with the other
attributes. attributes.
""" """
def __get__(self): return self.vocab.strings[self.c.lex.orth]
return self.vocab.strings[self.c.lex.orth]
property lower_: @property
def lower_(self):
"""RETURNS (unicode): The lowercase token text. Equivalent to """RETURNS (unicode): The lowercase token text. Equivalent to
`Token.text.lower()`. `Token.text.lower()`.
""" """
def __get__(self): return self.vocab.strings[self.c.lex.lower]
return self.vocab.strings[self.c.lex.lower]
property norm_: property norm_:
"""RETURNS (unicode): The token's norm, i.e. a normalised form of the """RETURNS (unicode): The token's norm, i.e. a normalised form of the
@ -791,33 +790,33 @@ cdef class Token:
def __set__(self, unicode norm_): def __set__(self, unicode norm_):
self.c.norm = self.vocab.strings.add(norm_) self.c.norm = self.vocab.strings.add(norm_)
property shape_: @property
def shape_(self):
"""RETURNS (unicode): Transform of the tokens's string, to show """RETURNS (unicode): Transform of the tokens's string, to show
orthographic features. For example, "Xxxx" or "dd". orthographic features. For example, "Xxxx" or "dd".
""" """
def __get__(self): return self.vocab.strings[self.c.lex.shape]
return self.vocab.strings[self.c.lex.shape]
property prefix_: @property
def prefix_(self):
"""RETURNS (unicode): A length-N substring from the start of the token. """RETURNS (unicode): A length-N substring from the start of the token.
Defaults to `N=1`. Defaults to `N=1`.
""" """
def __get__(self): return self.vocab.strings[self.c.lex.prefix]
return self.vocab.strings[self.c.lex.prefix]
property suffix_: @property
def suffix_(self):
"""RETURNS (unicode): A length-N substring from the end of the token. """RETURNS (unicode): A length-N substring from the end of the token.
Defaults to `N=3`. Defaults to `N=3`.
""" """
def __get__(self): return self.vocab.strings[self.c.lex.suffix]
return self.vocab.strings[self.c.lex.suffix]
property lang_: @property
def lang_(self):
"""RETURNS (unicode): Language of the parent document's vocabulary, """RETURNS (unicode): Language of the parent document's vocabulary,
e.g. 'en'. e.g. 'en'.
""" """
def __get__(self): return self.vocab.strings[self.c.lex.lang]
return self.vocab.strings[self.c.lex.lang]
property lemma_: property lemma_:
"""RETURNS (unicode): The token lemma, i.e. the base form of the word, """RETURNS (unicode): The token lemma, i.e. the base form of the word,
@ -856,110 +855,110 @@ cdef class Token:
def __set__(self, unicode label): def __set__(self, unicode label):
self.c.dep = self.vocab.strings.add(label) self.c.dep = self.vocab.strings.add(label)
property is_oov: @property
def is_oov(self):
"""RETURNS (bool): Whether the token is out-of-vocabulary.""" """RETURNS (bool): Whether the token is out-of-vocabulary."""
def __get__(self): return Lexeme.c_check_flag(self.c.lex, IS_OOV)
return Lexeme.c_check_flag(self.c.lex, IS_OOV)
property is_stop: @property
def is_stop(self):
"""RETURNS (bool): Whether the token is a stop word, i.e. part of a """RETURNS (bool): Whether the token is a stop word, i.e. part of a
"stop list" defined by the language data. "stop list" defined by the language data.
""" """
def __get__(self): return Lexeme.c_check_flag(self.c.lex, IS_STOP)
return Lexeme.c_check_flag(self.c.lex, IS_STOP)
property is_alpha: @property
def is_alpha(self):
"""RETURNS (bool): Whether the token consists of alpha characters. """RETURNS (bool): Whether the token consists of alpha characters.
Equivalent to `token.text.isalpha()`. Equivalent to `token.text.isalpha()`.
""" """
def __get__(self): return Lexeme.c_check_flag(self.c.lex, IS_ALPHA)
return Lexeme.c_check_flag(self.c.lex, IS_ALPHA)
property is_ascii: @property
def is_ascii(self):
"""RETURNS (bool): Whether the token consists of ASCII characters. """RETURNS (bool): Whether the token consists of ASCII characters.
Equivalent to `[any(ord(c) >= 128 for c in token.text)]`. Equivalent to `[any(ord(c) >= 128 for c in token.text)]`.
""" """
def __get__(self): return Lexeme.c_check_flag(self.c.lex, IS_ASCII)
return Lexeme.c_check_flag(self.c.lex, IS_ASCII)
property is_digit: @property
def is_digit(self):
"""RETURNS (bool): Whether the token consists of digits. Equivalent to """RETURNS (bool): Whether the token consists of digits. Equivalent to
`token.text.isdigit()`. `token.text.isdigit()`.
""" """
def __get__(self): return Lexeme.c_check_flag(self.c.lex, IS_DIGIT)
return Lexeme.c_check_flag(self.c.lex, IS_DIGIT)
property is_lower: @property
def is_lower(self):
"""RETURNS (bool): Whether the token is in lowercase. Equivalent to """RETURNS (bool): Whether the token is in lowercase. Equivalent to
`token.text.islower()`. `token.text.islower()`.
""" """
def __get__(self): return Lexeme.c_check_flag(self.c.lex, IS_LOWER)
return Lexeme.c_check_flag(self.c.lex, IS_LOWER)
property is_upper: @property
def is_upper(self):
"""RETURNS (bool): Whether the token is in uppercase. Equivalent to """RETURNS (bool): Whether the token is in uppercase. Equivalent to
`token.text.isupper()` `token.text.isupper()`
""" """
def __get__(self): return Lexeme.c_check_flag(self.c.lex, IS_UPPER)
return Lexeme.c_check_flag(self.c.lex, IS_UPPER)
property is_title: @property
def is_title(self):
"""RETURNS (bool): Whether the token is in titlecase. Equivalent to """RETURNS (bool): Whether the token is in titlecase. Equivalent to
`token.text.istitle()`. `token.text.istitle()`.
""" """
def __get__(self): return Lexeme.c_check_flag(self.c.lex, IS_TITLE)
return Lexeme.c_check_flag(self.c.lex, IS_TITLE)
property is_punct: @property
def is_punct(self):
"""RETURNS (bool): Whether the token is punctuation.""" """RETURNS (bool): Whether the token is punctuation."""
def __get__(self): return Lexeme.c_check_flag(self.c.lex, IS_PUNCT)
return Lexeme.c_check_flag(self.c.lex, IS_PUNCT)
property is_space: @property
def is_space(self):
"""RETURNS (bool): Whether the token consists of whitespace characters. """RETURNS (bool): Whether the token consists of whitespace characters.
Equivalent to `token.text.isspace()`. Equivalent to `token.text.isspace()`.
""" """
def __get__(self): return Lexeme.c_check_flag(self.c.lex, IS_SPACE)
return Lexeme.c_check_flag(self.c.lex, IS_SPACE)
property is_bracket: @property
def is_bracket(self):
"""RETURNS (bool): Whether the token is a bracket.""" """RETURNS (bool): Whether the token is a bracket."""
def __get__(self): return Lexeme.c_check_flag(self.c.lex, IS_BRACKET)
return Lexeme.c_check_flag(self.c.lex, IS_BRACKET)
property is_quote: @property
def is_quote(self):
"""RETURNS (bool): Whether the token is a quotation mark.""" """RETURNS (bool): Whether the token is a quotation mark."""
def __get__(self): return Lexeme.c_check_flag(self.c.lex, IS_QUOTE)
return Lexeme.c_check_flag(self.c.lex, IS_QUOTE)
property is_left_punct: @property
def is_left_punct(self):
"""RETURNS (bool): Whether the token is a left punctuation mark.""" """RETURNS (bool): Whether the token is a left punctuation mark."""
def __get__(self): return Lexeme.c_check_flag(self.c.lex, IS_LEFT_PUNCT)
return Lexeme.c_check_flag(self.c.lex, IS_LEFT_PUNCT)
property is_right_punct: @property
def is_right_punct(self):
"""RETURNS (bool): Whether the token is a right punctuation mark.""" """RETURNS (bool): Whether the token is a right punctuation mark."""
def __get__(self): return Lexeme.c_check_flag(self.c.lex, IS_RIGHT_PUNCT)
return Lexeme.c_check_flag(self.c.lex, IS_RIGHT_PUNCT)
property is_currency: @property
def is_currency(self):
"""RETURNS (bool): Whether the token is a currency symbol.""" """RETURNS (bool): Whether the token is a currency symbol."""
def __get__(self): return Lexeme.c_check_flag(self.c.lex, IS_CURRENCY)
return Lexeme.c_check_flag(self.c.lex, IS_CURRENCY)
property like_url: @property
def like_url(self):
"""RETURNS (bool): Whether the token resembles a URL.""" """RETURNS (bool): Whether the token resembles a URL."""
def __get__(self): return Lexeme.c_check_flag(self.c.lex, LIKE_URL)
return Lexeme.c_check_flag(self.c.lex, LIKE_URL)
property like_num: @property
def like_num(self):
"""RETURNS (bool): Whether the token resembles a number, e.g. "10.9", """RETURNS (bool): Whether the token resembles a number, e.g. "10.9",
"10", "ten", etc. "10", "ten", etc.
""" """
def __get__(self): return Lexeme.c_check_flag(self.c.lex, LIKE_NUM)
return Lexeme.c_check_flag(self.c.lex, LIKE_NUM)
property like_email: @property
def like_email(self):
"""RETURNS (bool): Whether the token resembles an email address.""" """RETURNS (bool): Whether the token resembles an email address."""
def __get__(self): return Lexeme.c_check_flag(self.c.lex, LIKE_EMAIL)
return Lexeme.c_check_flag(self.c.lex, LIKE_EMAIL)

View File

@ -60,12 +60,12 @@ cdef class Vocab:
self.morphology = Morphology(self.strings, tag_map, lemmatizer) self.morphology = Morphology(self.strings, tag_map, lemmatizer)
self.vectors = Vectors() self.vectors = Vectors()
property lang: @property
def __get__(self): def lang(self):
langfunc = None langfunc = None
if self.lex_attr_getters: if self.lex_attr_getters:
langfunc = self.lex_attr_getters.get(LANG, None) langfunc = self.lex_attr_getters.get(LANG, None)
return langfunc("_") if langfunc else "" return langfunc("_") if langfunc else ""
property writing_system: property writing_system:
"""A dict with information about the language's writing system. To get """A dict with information about the language's writing system. To get