Convert properties to decorator syntax (#13390)

This commit is contained in:
Joe Schiff 2024-04-16 05:51:14 -04:00 committed by GitHub
parent f5e85fa05a
commit 2e96797696
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 665 additions and 584 deletions

View File

@ -164,41 +164,44 @@ cdef class Lexeme:
vector = self.vector
return numpy.sqrt((vector**2).sum())
property vector:
@property
def vector(self):
"""A real-valued meaning representation.
RETURNS (numpy.ndarray[ndim=1, dtype='float32']): A 1D numpy array
representing the lexeme's semantics.
"""
def __get__(self):
cdef int length = self.vocab.vectors_length
if length == 0:
raise ValueError(Errors.E010)
return self.vocab.get_vector(self.c.orth)
def __set__(self, vector):
@vector.setter
def vector(self, vector):
if len(vector) != self.vocab.vectors_length:
raise ValueError(Errors.E073.format(new_length=len(vector),
length=self.vocab.vectors_length))
self.vocab.set_vector(self.c.orth, vector)
property rank:
@property
def rank(self):
"""RETURNS (str): Sequential ID of the lexeme's lexical type, used
to index into tables, e.g. for word vectors."""
def __get__(self):
return self.c.id
def __set__(self, value):
@rank.setter
def rank(self, value):
self.c.id = value
property sentiment:
@property
def sentiment(self):
"""RETURNS (float): A scalar value indicating the positivity or
negativity of the lexeme."""
def __get__(self):
sentiment_table = self.vocab.lookups.get_table("lexeme_sentiment", {})
return sentiment_table.get(self.c.orth, 0.0)
def __set__(self, float x):
@sentiment.setter
def sentiment(self, float x):
if "lexeme_sentiment" not in self.vocab.lookups:
self.vocab.lookups.add_table("lexeme_sentiment")
sentiment_table = self.vocab.lookups.get_table("lexeme_sentiment")
@ -216,151 +219,166 @@ cdef class Lexeme:
"""RETURNS (str): The original verbatim text of the lexeme."""
return self.orth_
property lower:
@property
def lower(self):
"""RETURNS (uint64): Lowercase form of the lexeme."""
def __get__(self):
return self.c.lower
def __set__(self, attr_t x):
@lower.setter
def lower(self, attr_t x):
self.c.lower = x
property norm:
@property
def norm(self):
"""RETURNS (uint64): The lexeme's norm, i.e. a normalised form of the
lexeme text.
"""
def __get__(self):
return self.c.norm
def __set__(self, attr_t x):
@norm.setter
def norm(self, attr_t x):
if "lexeme_norm" not in self.vocab.lookups:
self.vocab.lookups.add_table("lexeme_norm")
norm_table = self.vocab.lookups.get_table("lexeme_norm")
norm_table[self.c.orth] = self.vocab.strings[x]
self.c.norm = x
property shape:
@property
def shape(self):
"""RETURNS (uint64): Transform of the word's string, to show
orthographic features.
"""
def __get__(self):
return self.c.shape
def __set__(self, attr_t x):
@shape.setter
def shape(self, attr_t x):
self.c.shape = x
property prefix:
@property
def prefix(self):
"""RETURNS (uint64): Length-N substring from the start of the word.
Defaults to `N=1`.
"""
def __get__(self):
return self.c.prefix
def __set__(self, attr_t x):
@prefix.setter
def prefix(self, attr_t x):
self.c.prefix = x
property suffix:
@property
def suffix(self):
"""RETURNS (uint64): Length-N substring from the end of the word.
Defaults to `N=3`.
"""
def __get__(self):
return self.c.suffix
def __set__(self, attr_t x):
@suffix.setter
def suffix(self, attr_t x):
self.c.suffix = x
property cluster:
@property
def cluster(self):
"""RETURNS (int): Brown cluster ID."""
def __get__(self):
cluster_table = self.vocab.lookups.get_table("lexeme_cluster", {})
return cluster_table.get(self.c.orth, 0)
def __set__(self, int x):
@cluster.setter
def cluster(self, int x):
cluster_table = self.vocab.lookups.get_table("lexeme_cluster", {})
cluster_table[self.c.orth] = x
property lang:
@property
def lang(self):
"""RETURNS (uint64): Language of the parent vocabulary."""
def __get__(self):
return self.c.lang
def __set__(self, attr_t x):
@lang.setter
def lang(self, attr_t x):
self.c.lang = x
property prob:
@property
def prob(self):
"""RETURNS (float): Smoothed log probability estimate of the lexeme's
type."""
def __get__(self):
prob_table = self.vocab.lookups.get_table("lexeme_prob", {})
settings_table = self.vocab.lookups.get_table("lexeme_settings", {})
default_oov_prob = settings_table.get("oov_prob", -20.0)
return prob_table.get(self.c.orth, default_oov_prob)
def __set__(self, float x):
@prob.setter
def prob(self, float x):
prob_table = self.vocab.lookups.get_table("lexeme_prob", {})
prob_table[self.c.orth] = x
property lower_:
@property
def lower_(self):
"""RETURNS (str): Lowercase form of the word."""
def __get__(self):
return self.vocab.strings[self.c.lower]
def __set__(self, str x):
@lower_.setter
def lower_(self, str x):
self.c.lower = self.vocab.strings.add(x)
property norm_:
@property
def norm_(self):
"""RETURNS (str): The lexeme's norm, i.e. a normalised form of the
lexeme text.
"""
def __get__(self):
return self.vocab.strings[self.c.norm]
def __set__(self, str x):
@norm_.setter
def norm_(self, str x):
self.norm = self.vocab.strings.add(x)
property shape_:
@property
def shape_(self):
"""RETURNS (str): Transform of the word's string, to show
orthographic features.
"""
def __get__(self):
return self.vocab.strings[self.c.shape]
def __set__(self, str x):
@shape_.setter
def shape_(self, str x):
self.c.shape = self.vocab.strings.add(x)
property prefix_:
@property
def prefix_(self):
"""RETURNS (str): Length-N substring from the start of the word.
Defaults to `N=1`.
"""
def __get__(self):
return self.vocab.strings[self.c.prefix]
def __set__(self, str x):
@prefix_.setter
def prefix_(self, str x):
self.c.prefix = self.vocab.strings.add(x)
property suffix_:
@property
def suffix_(self):
"""RETURNS (str): Length-N substring from the end of the word.
Defaults to `N=3`.
"""
def __get__(self):
return self.vocab.strings[self.c.suffix]
def __set__(self, str x):
@suffix_.setter
def suffix_(self, str x):
self.c.suffix = self.vocab.strings.add(x)
property lang_:
@property
def lang_(self):
"""RETURNS (str): Language of the parent vocabulary."""
def __get__(self):
return self.vocab.strings[self.c.lang]
def __set__(self, str x):
@lang_.setter
def lang_(self, str x):
self.c.lang = self.vocab.strings.add(x)
property flags:
@property
def flags(self):
"""RETURNS (uint64): Container of the lexeme's binary flags."""
def __get__(self):
return self.c.flags
def __set__(self, flags_t x):
@flags.setter
def flags(self, flags_t x):
self.c.flags = x
@property
@ -368,154 +386,171 @@ cdef class Lexeme:
"""RETURNS (bool): Whether the lexeme is out-of-vocabulary."""
return self.orth not in self.vocab.vectors
property is_stop:
@property
def is_stop(self):
"""RETURNS (bool): Whether the lexeme is a stop word."""
def __get__(self):
return Lexeme.c_check_flag(self.c, IS_STOP)
def __set__(self, bint x):
@is_stop.setter
def is_stop(self, bint x):
Lexeme.c_set_flag(self.c, IS_STOP, x)
property is_alpha:
@property
def is_alpha(self):
"""RETURNS (bool): Whether the lexeme consists of alphabetic
characters. Equivalent to `lexeme.text.isalpha()`.
"""
def __get__(self):
return Lexeme.c_check_flag(self.c, IS_ALPHA)
def __set__(self, bint x):
@is_alpha.setter
def is_alpha(self, bint x):
Lexeme.c_set_flag(self.c, IS_ALPHA, x)
property is_ascii:
@property
def is_ascii(self):
"""RETURNS (bool): Whether the lexeme consists of ASCII characters.
Equivalent to `[any(ord(c) >= 128 for c in lexeme.text)]`.
"""
def __get__(self):
return Lexeme.c_check_flag(self.c, IS_ASCII)
def __set__(self, bint x):
@is_ascii.setter
def is_ascii(self, bint x):
Lexeme.c_set_flag(self.c, IS_ASCII, x)
property is_digit:
@property
def is_digit(self):
"""RETURNS (bool): Whether the lexeme consists of digits. Equivalent
to `lexeme.text.isdigit()`.
"""
def __get__(self):
return Lexeme.c_check_flag(self.c, IS_DIGIT)
def __set__(self, bint x):
@is_digit.setter
def is_digit(self, bint x):
Lexeme.c_set_flag(self.c, IS_DIGIT, x)
property is_lower:
@property
def is_lower(self):
"""RETURNS (bool): Whether the lexeme is in lowercase. Equivalent to
`lexeme.text.islower()`.
"""
def __get__(self):
return Lexeme.c_check_flag(self.c, IS_LOWER)
def __set__(self, bint x):
@is_lower.setter
def is_lower(self, bint x):
Lexeme.c_set_flag(self.c, IS_LOWER, x)
property is_upper:
@property
def is_upper(self):
"""RETURNS (bool): Whether the lexeme is in uppercase. Equivalent to
`lexeme.text.isupper()`.
"""
def __get__(self):
return Lexeme.c_check_flag(self.c, IS_UPPER)
def __set__(self, bint x):
@is_upper.setter
def is_upper(self, bint x):
Lexeme.c_set_flag(self.c, IS_UPPER, x)
property is_title:
@property
def is_title(self):
"""RETURNS (bool): Whether the lexeme is in titlecase. Equivalent to
`lexeme.text.istitle()`.
"""
def __get__(self):
return Lexeme.c_check_flag(self.c, IS_TITLE)
def __set__(self, bint x):
@is_title.setter
def is_title(self, bint x):
Lexeme.c_set_flag(self.c, IS_TITLE, x)
property is_punct:
@property
def is_punct(self):
"""RETURNS (bool): Whether the lexeme is punctuation."""
def __get__(self):
return Lexeme.c_check_flag(self.c, IS_PUNCT)
def __set__(self, bint x):
@is_punct.setter
def is_punct(self, bint x):
Lexeme.c_set_flag(self.c, IS_PUNCT, x)
property is_space:
@property
def is_space(self):
"""RETURNS (bool): Whether the lexeme consist of whitespace characters.
Equivalent to `lexeme.text.isspace()`.
"""
def __get__(self):
return Lexeme.c_check_flag(self.c, IS_SPACE)
def __set__(self, bint x):
@is_space.setter
def is_space(self, bint x):
Lexeme.c_set_flag(self.c, IS_SPACE, x)
property is_bracket:
@property
def is_bracket(self):
"""RETURNS (bool): Whether the lexeme is a bracket."""
def __get__(self):
return Lexeme.c_check_flag(self.c, IS_BRACKET)
def __set__(self, bint x):
@is_bracket.setter
def is_bracket(self, bint x):
Lexeme.c_set_flag(self.c, IS_BRACKET, x)
property is_quote:
@property
def is_quote(self):
"""RETURNS (bool): Whether the lexeme is a quotation mark."""
def __get__(self):
return Lexeme.c_check_flag(self.c, IS_QUOTE)
def __set__(self, bint x):
@is_quote.setter
def is_quote(self, bint x):
Lexeme.c_set_flag(self.c, IS_QUOTE, x)
property is_left_punct:
@property
def is_left_punct(self):
"""RETURNS (bool): Whether the lexeme is left punctuation, e.g. (."""
def __get__(self):
return Lexeme.c_check_flag(self.c, IS_LEFT_PUNCT)
def __set__(self, bint x):
@is_left_punct.setter
def is_left_punct(self, bint x):
Lexeme.c_set_flag(self.c, IS_LEFT_PUNCT, x)
property is_right_punct:
@property
def is_right_punct(self):
"""RETURNS (bool): Whether the lexeme is right punctuation, e.g. )."""
def __get__(self):
return Lexeme.c_check_flag(self.c, IS_RIGHT_PUNCT)
def __set__(self, bint x):
@is_right_punct.setter
def is_right_punct(self, bint x):
Lexeme.c_set_flag(self.c, IS_RIGHT_PUNCT, x)
property is_currency:
@property
def is_currency(self):
"""RETURNS (bool): Whether the lexeme is a currency symbol, e.g. $, €."""
def __get__(self):
return Lexeme.c_check_flag(self.c, IS_CURRENCY)
def __set__(self, bint x):
@is_currency.setter
def is_currency(self, bint x):
Lexeme.c_set_flag(self.c, IS_CURRENCY, x)
property like_url:
@property
def like_url(self):
"""RETURNS (bool): Whether the lexeme resembles a URL."""
def __get__(self):
return Lexeme.c_check_flag(self.c, LIKE_URL)
def __set__(self, bint x):
@like_url.setter
def like_url(self, bint x):
Lexeme.c_set_flag(self.c, LIKE_URL, x)
property like_num:
@property
def like_num(self):
"""RETURNS (bool): Whether the lexeme represents a number, e.g. "10.9",
"10", "ten", etc.
"""
def __get__(self):
return Lexeme.c_check_flag(self.c, LIKE_NUM)
def __set__(self, bint x):
@like_num.setter
def like_num(self, bint x):
Lexeme.c_set_flag(self.c, LIKE_NUM, x)
property like_email:
@property
def like_email(self):
"""RETURNS (bool): Whether the lexeme resembles an email address."""
def __get__(self):
return Lexeme.c_check_flag(self.c, LIKE_EMAIL)
def __set__(self, bint x):
@like_email.setter
def like_email(self, bint x):
Lexeme.c_set_flag(self.c, LIKE_EMAIL, x)

View File

@ -70,51 +70,57 @@ cdef class Tokenizer:
self._special_matcher = PhraseMatcher(self.vocab)
self._load_special_cases(rules)
property token_match:
def __get__(self):
@property
def token_match(self):
return self._token_match
def __set__(self, token_match):
@token_match.setter
def token_match(self, token_match):
self._token_match = token_match
self._reload_special_cases()
property url_match:
def __get__(self):
@property
def url_match(self):
return self._url_match
def __set__(self, url_match):
@url_match.setter
def url_match(self, url_match):
self._url_match = url_match
self._reload_special_cases()
property prefix_search:
def __get__(self):
@property
def prefix_search(self):
return self._prefix_search
def __set__(self, prefix_search):
@prefix_search.setter
def prefix_search(self, prefix_search):
self._prefix_search = prefix_search
self._reload_special_cases()
property suffix_search:
def __get__(self):
@property
def suffix_search(self):
return self._suffix_search
def __set__(self, suffix_search):
@suffix_search.setter
def suffix_search(self, suffix_search):
self._suffix_search = suffix_search
self._reload_special_cases()
property infix_finditer:
def __get__(self):
@property
def infix_finditer(self):
return self._infix_finditer
def __set__(self, infix_finditer):
@infix_finditer.setter
def infix_finditer(self, infix_finditer):
self._infix_finditer = infix_finditer
self._reload_special_cases()
property rules:
def __get__(self):
@property
def rules(self):
return self._rules
def __set__(self, rules):
@rules.setter
def rules(self, rules):
self._rules = {}
self._flush_cache()
self._flush_specials()
@ -122,11 +128,12 @@ cdef class Tokenizer:
self._specials = PreshMap()
self._load_special_cases(rules)
property faster_heuristics:
def __get__(self):
@property
def faster_heuristics(self):
return bool(self._faster_heuristics)
def __set__(self, faster_heuristics):
@faster_heuristics.setter
def faster_heuristics(self, faster_heuristics):
self._faster_heuristics = bool(faster_heuristics)
self._reload_special_cases()

View File

@ -667,7 +667,8 @@ cdef class Doc:
else:
return False
property vector:
@property
def vector(self):
"""A real-valued meaning representation. Defaults to an average of the
token vectors.
@ -676,7 +677,6 @@ cdef class Doc:
DOCS: https://spacy.io/api/doc#vector
"""
def __get__(self):
if "vector" in self.user_hooks:
return self.user_hooks["vector"](self)
if self._vector is not None:
@ -694,17 +694,18 @@ cdef class Doc:
else:
return xp.zeros((self.vocab.vectors_length,), dtype="float32")
def __set__(self, value):
@vector.setter
def vector(self, value):
self._vector = value
property vector_norm:
@property
def vector_norm(self):
"""The L2 norm of the document's vector representation.
RETURNS (float): The L2 norm of the vector representation.
DOCS: https://spacy.io/api/doc#vector_norm
"""
def __get__(self):
if "vector_norm" in self.user_hooks:
return self.user_hooks["vector_norm"](self)
cdef float value
@ -716,7 +717,8 @@ cdef class Doc:
self._vector_norm = sqrt(norm) if norm != 0 else 0
return self._vector_norm
def __set__(self, value):
@vector_norm.setter
def vector_norm(self, value):
self._vector_norm = value
@property
@ -736,7 +738,8 @@ cdef class Doc:
"""
return self.text
property ents:
@property
def ents(self):
"""The named entities in the document. Returns a tuple of named entity
`Span` objects, if the entity recognizer has been applied.
@ -744,7 +747,6 @@ cdef class Doc:
DOCS: https://spacy.io/api/doc#ents
"""
def __get__(self):
cdef int i
cdef const TokenC* token
cdef int start = -1
@ -779,7 +781,8 @@ cdef class Doc:
output = [o for o in output if o.label_ != ""]
return tuple(output)
def __set__(self, ents):
@ents.setter
def ents(self, ents):
# TODO:
# 1. Test basic data-driven ORTH gazetteer
# 2. Test more nuanced date and currency regex

View File

@ -757,77 +757,86 @@ cdef class Span:
for word in self.rights:
yield from word.subtree
property start:
def __get__(self):
@property
def start(self):
return self.c.start
def __set__(self, int start):
@start.setter
def start(self, int start):
if start < 0:
raise IndexError(Errors.E1032.format(var="start", forbidden="< 0", value=start))
self.c.start = start
property end:
def __get__(self):
@property
def end(self):
return self.c.end
def __set__(self, int end):
@end.setter
def end(self, int end):
if end < 0:
raise IndexError(Errors.E1032.format(var="end", forbidden="< 0", value=end))
self.c.end = end
property start_char:
def __get__(self):
@property
def start_char(self):
return self.c.start_char
def __set__(self, int start_char):
@start_char.setter
def start_char(self, int start_char):
if start_char < 0:
raise IndexError(Errors.E1032.format(var="start_char", forbidden="< 0", value=start_char))
self.c.start_char = start_char
property end_char:
def __get__(self):
@property
def end_char(self):
return self.c.end_char
def __set__(self, int end_char):
@end_char.setter
def end_char(self, int end_char):
if end_char < 0:
raise IndexError(Errors.E1032.format(var="end_char", forbidden="< 0", value=end_char))
self.c.end_char = end_char
property label:
def __get__(self):
@property
def label(self):
return self.c.label
def __set__(self, attr_t label):
@label.setter
def label(self, attr_t label):
self.c.label = label
property kb_id:
def __get__(self):
@property
def kb_id(self):
return self.c.kb_id
def __set__(self, attr_t kb_id):
@kb_id.setter
def kb_id(self, attr_t kb_id):
self.c.kb_id = kb_id
property id:
def __get__(self):
@property
def id(self):
return self.c.id
def __set__(self, attr_t id):
@id.setter
def id(self, attr_t id):
self.c.id = id
property ent_id:
@property
def ent_id(self):
"""RETURNS (uint64): The entity ID."""
def __get__(self):
return self.root.ent_id
def __set__(self, hash_t key):
@ent_id.setter
def ent_id(self, hash_t key):
raise NotImplementedError(Errors.E200.format(attr="ent_id"))
property ent_id_:
@property
def ent_id_(self):
"""RETURNS (str): The (string) entity ID."""
def __get__(self):
return self.root.ent_id_
def __set__(self, str key):
@ent_id_.setter
def ent_id_(self, str key):
raise NotImplementedError(Errors.E200.format(attr="ent_id_"))
@property
@ -843,28 +852,31 @@ cdef class Span:
"""RETURNS (str): The span's lemma."""
return "".join([t.lemma_ + t.whitespace_ for t in self]).strip()
property label_:
@property
def label_(self):
"""RETURNS (str): The span's label."""
def __get__(self):
return self.doc.vocab.strings[self.label]
def __set__(self, str label_):
@label_.setter
def label_(self, str label_):
self.label = self.doc.vocab.strings.add(label_)
property kb_id_:
@property
def kb_id_(self):
"""RETURNS (str): The span's KB ID."""
def __get__(self):
return self.doc.vocab.strings[self.kb_id]
def __set__(self, str kb_id_):
@kb_id_.setter
def kb_id_(self, str kb_id_):
self.kb_id = self.doc.vocab.strings.add(kb_id_)
property id_:
@property
def id_(self):
"""RETURNS (str): The span's ID."""
def __get__(self):
return self.doc.vocab.strings[self.id]
def __set__(self, str id_):
@id_.setter
def id_(self, str id_):
self.id = self.doc.vocab.strings.add(id_)

View File

@ -249,11 +249,12 @@ cdef class Token:
"""
return not self.c.morph == 0
property morph:
def __get__(self):
@property
def morph(self):
return MorphAnalysis.from_id(self.vocab, self.c.morph)
def __set__(self, MorphAnalysis morph):
@morph.setter
def morph(self, MorphAnalysis morph):
# Check that the morph has the same vocab
if self.vocab != morph.vocab:
raise ValueError(Errors.E1013)
@ -377,38 +378,42 @@ cdef class Token:
"""
return self.c.lex.suffix
property lemma:
@property
def lemma(self):
"""RETURNS (uint64): ID of the base form of the word, with no
inflectional suffixes.
"""
def __get__(self):
return self.c.lemma
def __set__(self, attr_t lemma):
@lemma.setter
def lemma(self, attr_t lemma):
self.c.lemma = lemma
property pos:
@property
def pos(self):
"""RETURNS (uint64): ID of coarse-grained part-of-speech tag."""
def __get__(self):
return self.c.pos
def __set__(self, pos):
@pos.setter
def pos(self, pos):
self.c.pos = pos
property tag:
@property
def tag(self):
"""RETURNS (uint64): ID of fine-grained part-of-speech tag."""
def __get__(self):
return self.c.tag
def __set__(self, attr_t tag):
@tag.setter
def tag(self, attr_t tag):
self.c.tag = tag
property dep:
@property
def dep(self):
"""RETURNS (uint64): ID of syntactic dependency label."""
def __get__(self):
return self.c.dep
def __set__(self, attr_t label):
@dep.setter
def dep(self, attr_t label):
self.c.dep = label
@property
@ -494,8 +499,8 @@ cdef class Token:
return self.doc.user_token_hooks["sent"](self)
return self.doc[self.i : self.i+1].sent
property sent_start:
def __get__(self):
@property
def sent_start(self):
"""Deprecated: use Token.is_sent_start instead."""
# Raising a deprecation warning here causes errors for autocomplete
# Handle broken backwards compatibility case: doc[0].sent_start
@ -505,17 +510,18 @@ cdef class Token:
else:
return self.c.sent_start
def __set__(self, value):
@sent_start.setter
def sent_start(self, value):
self.is_sent_start = value
property is_sent_start:
@property
def is_sent_start(self):
"""A boolean value indicating whether the token starts a sentence.
`None` if unknown. Defaults to `True` for the first token in the `Doc`.
RETURNS (bool / None): Whether the token starts a sentence.
None if unknown.
"""
def __get__(self):
if self.c.sent_start == 0:
return None
elif self.c.sent_start < 0:
@ -523,7 +529,8 @@ cdef class Token:
else:
return True
def __set__(self, value):
@is_sent_start.setter
def is_sent_start(self, value):
if self.doc.has_annotation("DEP"):
raise ValueError(Errors.E043)
if value is None:
@ -535,7 +542,8 @@ cdef class Token:
else:
raise ValueError(Errors.E044.format(value=value))
property is_sent_end:
@property
def is_sent_end(self):
"""A boolean value indicating whether the token ends a sentence.
`None` if unknown. Defaults to `True` for the last token in the `Doc`.
@ -544,7 +552,6 @@ cdef class Token:
DOCS: https://spacy.io/api/token#is_sent_end
"""
def __get__(self):
if self.i + 1 == len(self.doc):
return True
elif self.doc[self.i+1].is_sent_start is None:
@ -554,7 +561,8 @@ cdef class Token:
else:
return False
def __set__(self, value):
@is_sent_end.setter
def is_sent_end(self, value):
raise ValueError(Errors.E196)
@property
@ -682,20 +690,21 @@ cdef class Token:
"""
return not Token.missing_head(self.c)
property head:
@property
def head(self):
"""The syntactic parent, or "governor", of this token.
If token.has_head() is `False`, this method will return itself.
RETURNS (Token): The token predicted by the parser to be the head of
the current token.
"""
def __get__(self):
if not self.has_head():
return self
else:
return self.doc[self.i + self.c.head]
def __set__(self, Token new_head):
@head.setter
def head(self, Token new_head):
# This function sets the head of self to new_head and updates the
# counters for left/right dependents and left/right corner for the
# new and the old head
@ -744,20 +753,22 @@ cdef class Token:
queue.append(child)
return tuple([w for w in output if w.i != self.i])
property ent_type:
@property
def ent_type(self):
"""RETURNS (uint64): Named entity type."""
def __get__(self):
return self.c.ent_type
def __set__(self, ent_type):
@ent_type.setter
def ent_type(self, ent_type):
self.c.ent_type = ent_type
property ent_type_:
@property
def ent_type_(self):
"""RETURNS (str): Named entity type."""
def __get__(self):
return self.vocab.strings[self.c.ent_type]
def __set__(self, ent_type):
@ent_type_.setter
def ent_type_(self, ent_type):
self.c.ent_type = self.vocab.strings.add(ent_type)
@property
@ -784,40 +795,44 @@ cdef class Token:
"""
return self.iob_strings()[self.c.ent_iob]
property ent_id:
@property
def ent_id(self):
"""RETURNS (uint64): ID of the entity the token is an instance of,
if any.
"""
def __get__(self):
return self.c.ent_id
def __set__(self, hash_t key):
@ent_id.setter
def ent_id(self, hash_t key):
self.c.ent_id = key
property ent_id_:
@property
def ent_id_(self):
"""RETURNS (str): ID of the entity the token is an instance of,
if any.
"""
def __get__(self):
return self.vocab.strings[self.c.ent_id]
def __set__(self, name):
@ent_id_.setter
def ent_id_(self, name):
self.c.ent_id = self.vocab.strings.add(name)
property ent_kb_id:
@property
def ent_kb_id(self):
"""RETURNS (uint64): Named entity KB ID."""
def __get__(self):
return self.c.ent_kb_id
def __set__(self, attr_t ent_kb_id):
@ent_kb_id.setter
def ent_kb_id(self, attr_t ent_kb_id):
self.c.ent_kb_id = ent_kb_id
property ent_kb_id_:
@property
def ent_kb_id_(self):
"""RETURNS (str): Named entity KB ID."""
def __get__(self):
return self.vocab.strings[self.c.ent_kb_id]
def __set__(self, ent_kb_id):
@ent_kb_id_.setter
def ent_kb_id_(self, ent_kb_id):
self.c.ent_kb_id = self.vocab.strings.add(ent_kb_id)
@property
@ -840,15 +855,16 @@ cdef class Token:
"""
return self.vocab.strings[self.c.lex.lower]
property norm_:
@property
def norm_(self):
"""RETURNS (str): The token's norm, i.e. a normalised form of the
token text. Usually set in the language's tokenizer exceptions or
norm exceptions.
"""
def __get__(self):
return self.vocab.strings[self.norm]
def __set__(self, str norm_):
@norm_.setter
def norm_(self, str norm_):
self.c.norm = self.vocab.strings.add(norm_)
@property
@ -879,32 +895,35 @@ cdef class Token:
"""
return self.vocab.strings[self.c.lex.lang]
property lemma_:
@property
def lemma_(self):
"""RETURNS (str): The token lemma, i.e. the base form of the word,
with no inflectional suffixes.
"""
def __get__(self):
return self.vocab.strings[self.c.lemma]
def __set__(self, str lemma_):
@lemma_.setter
def lemma_(self, str lemma_):
self.c.lemma = self.vocab.strings.add(lemma_)
property pos_:
@property
def pos_(self):
"""RETURNS (str): Coarse-grained part-of-speech tag."""
def __get__(self):
return parts_of_speech.NAMES[self.c.pos]
def __set__(self, pos_name):
@pos_.setter
def pos_(self, pos_name):
if pos_name not in parts_of_speech.IDS:
raise ValueError(Errors.E1021.format(pp=pos_name))
self.c.pos = parts_of_speech.IDS[pos_name]
property tag_:
@property
def tag_(self):
"""RETURNS (str): Fine-grained part-of-speech tag."""
def __get__(self):
return self.vocab.strings[self.c.tag]
def __set__(self, tag):
@tag_.setter
def tag_(self, tag):
self.tag = self.vocab.strings.add(tag)
def has_dep(self):
@ -915,12 +934,13 @@ cdef class Token:
"""
return not Token.missing_dep(self.c)
property dep_:
@property
def dep_(self):
"""RETURNS (str): The syntactic dependency label."""
def __get__(self):
return self.vocab.strings[self.c.dep]
def __set__(self, str label):
@dep_.setter
def dep_(self, str label):
self.c.dep = self.vocab.strings.add(label)
@property

View File

@ -88,20 +88,22 @@ cdef class Example:
def __len__(self):
return len(self.predicted)
property predicted:
def __get__(self):
@property
def predicted(self):
return self.x
def __set__(self, doc):
@predicted.setter
def predicted(self, doc):
self.x = doc
self._cached_alignment = None
self._cached_words_x = [t.text for t in doc]
property reference:
def __get__(self):
@property
def reference(self):
return self.y
def __set__(self, doc):
@reference.setter
def reference(self, doc):
self.y = doc
self._cached_alignment = None
self._cached_words_y = [t.text for t in doc]
@ -420,8 +422,8 @@ cdef class Example:
seen_indices.update(indices)
return output
property text:
def __get__(self):
@property
def text(self):
return self.x.text
def __str__(self):

View File

@ -88,11 +88,12 @@ cdef class Vocab:
self.writing_system = writing_system
self.get_noun_chunks = get_noun_chunks
property vectors:
def __get__(self):
@property
def vectors(self):
return self._vectors
def __set__(self, vectors):
@vectors.setter
def vectors(self, vectors):
if hasattr(vectors, "strings"):
for s in vectors.strings:
self.strings.add(s)
@ -464,11 +465,12 @@ cdef class Vocab:
key = Lexeme.get_struct_attr(lex.c, self.vectors.attr)
return key in self.vectors
property lookups:
def __get__(self):
@property
def lookups(self):
return self._lookups
def __set__(self, lookups):
@lookups.setter
def lookups(self, lookups):
self._lookups = lookups
if lookups.has_table("lexeme_norm"):
self.lex_attr_getters[NORM] = util.add_lookups(