Convert properties to decorator syntax (#13390)

This commit is contained in:
Joe Schiff 2024-04-16 05:51:14 -04:00 committed by GitHub
parent f5e85fa05a
commit 2e96797696
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 665 additions and 584 deletions

View File

@ -164,41 +164,44 @@ cdef class Lexeme:
vector = self.vector vector = self.vector
return numpy.sqrt((vector**2).sum()) return numpy.sqrt((vector**2).sum())
property vector: @property
def vector(self):
"""A real-valued meaning representation. """A real-valued meaning representation.
RETURNS (numpy.ndarray[ndim=1, dtype='float32']): A 1D numpy array RETURNS (numpy.ndarray[ndim=1, dtype='float32']): A 1D numpy array
representing the lexeme's semantics. representing the lexeme's semantics.
""" """
def __get__(self):
cdef int length = self.vocab.vectors_length cdef int length = self.vocab.vectors_length
if length == 0: if length == 0:
raise ValueError(Errors.E010) raise ValueError(Errors.E010)
return self.vocab.get_vector(self.c.orth) return self.vocab.get_vector(self.c.orth)
def __set__(self, vector): @vector.setter
def vector(self, vector):
if len(vector) != self.vocab.vectors_length: if len(vector) != self.vocab.vectors_length:
raise ValueError(Errors.E073.format(new_length=len(vector), raise ValueError(Errors.E073.format(new_length=len(vector),
length=self.vocab.vectors_length)) length=self.vocab.vectors_length))
self.vocab.set_vector(self.c.orth, vector) self.vocab.set_vector(self.c.orth, vector)
property rank: @property
def rank(self):
"""RETURNS (str): Sequential ID of the lexeme's lexical type, used """RETURNS (str): Sequential ID of the lexeme's lexical type, used
to index into tables, e.g. for word vectors.""" to index into tables, e.g. for word vectors."""
def __get__(self):
return self.c.id return self.c.id
def __set__(self, value): @rank.setter
def rank(self, value):
self.c.id = value self.c.id = value
property sentiment: @property
def sentiment(self):
"""RETURNS (float): A scalar value indicating the positivity or """RETURNS (float): A scalar value indicating the positivity or
negativity of the lexeme.""" negativity of the lexeme."""
def __get__(self):
sentiment_table = self.vocab.lookups.get_table("lexeme_sentiment", {}) sentiment_table = self.vocab.lookups.get_table("lexeme_sentiment", {})
return sentiment_table.get(self.c.orth, 0.0) return sentiment_table.get(self.c.orth, 0.0)
def __set__(self, float x): @sentiment.setter
def sentiment(self, float x):
if "lexeme_sentiment" not in self.vocab.lookups: if "lexeme_sentiment" not in self.vocab.lookups:
self.vocab.lookups.add_table("lexeme_sentiment") self.vocab.lookups.add_table("lexeme_sentiment")
sentiment_table = self.vocab.lookups.get_table("lexeme_sentiment") sentiment_table = self.vocab.lookups.get_table("lexeme_sentiment")
@ -216,151 +219,166 @@ cdef class Lexeme:
"""RETURNS (str): The original verbatim text of the lexeme.""" """RETURNS (str): The original verbatim text of the lexeme."""
return self.orth_ return self.orth_
property lower: @property
def lower(self):
"""RETURNS (uint64): Lowercase form of the lexeme.""" """RETURNS (uint64): Lowercase form of the lexeme."""
def __get__(self):
return self.c.lower return self.c.lower
def __set__(self, attr_t x): @lower.setter
def lower(self, attr_t x):
self.c.lower = x self.c.lower = x
property norm: @property
def norm(self):
"""RETURNS (uint64): The lexeme's norm, i.e. a normalised form of the """RETURNS (uint64): The lexeme's norm, i.e. a normalised form of the
lexeme text. lexeme text.
""" """
def __get__(self):
return self.c.norm return self.c.norm
def __set__(self, attr_t x): @norm.setter
def norm(self, attr_t x):
if "lexeme_norm" not in self.vocab.lookups: if "lexeme_norm" not in self.vocab.lookups:
self.vocab.lookups.add_table("lexeme_norm") self.vocab.lookups.add_table("lexeme_norm")
norm_table = self.vocab.lookups.get_table("lexeme_norm") norm_table = self.vocab.lookups.get_table("lexeme_norm")
norm_table[self.c.orth] = self.vocab.strings[x] norm_table[self.c.orth] = self.vocab.strings[x]
self.c.norm = x self.c.norm = x
property shape: @property
def shape(self):
"""RETURNS (uint64): Transform of the word's string, to show """RETURNS (uint64): Transform of the word's string, to show
orthographic features. orthographic features.
""" """
def __get__(self):
return self.c.shape return self.c.shape
def __set__(self, attr_t x): @shape.setter
def shape(self, attr_t x):
self.c.shape = x self.c.shape = x
property prefix: @property
def prefix(self):
"""RETURNS (uint64): Length-N substring from the start of the word. """RETURNS (uint64): Length-N substring from the start of the word.
Defaults to `N=1`. Defaults to `N=1`.
""" """
def __get__(self):
return self.c.prefix return self.c.prefix
def __set__(self, attr_t x): @prefix.setter
def prefix(self, attr_t x):
self.c.prefix = x self.c.prefix = x
property suffix: @property
def suffix(self):
"""RETURNS (uint64): Length-N substring from the end of the word. """RETURNS (uint64): Length-N substring from the end of the word.
Defaults to `N=3`. Defaults to `N=3`.
""" """
def __get__(self):
return self.c.suffix return self.c.suffix
def __set__(self, attr_t x): @suffix.setter
def suffix(self, attr_t x):
self.c.suffix = x self.c.suffix = x
property cluster: @property
def cluster(self):
"""RETURNS (int): Brown cluster ID.""" """RETURNS (int): Brown cluster ID."""
def __get__(self):
cluster_table = self.vocab.lookups.get_table("lexeme_cluster", {}) cluster_table = self.vocab.lookups.get_table("lexeme_cluster", {})
return cluster_table.get(self.c.orth, 0) return cluster_table.get(self.c.orth, 0)
def __set__(self, int x): @cluster.setter
def cluster(self, int x):
cluster_table = self.vocab.lookups.get_table("lexeme_cluster", {}) cluster_table = self.vocab.lookups.get_table("lexeme_cluster", {})
cluster_table[self.c.orth] = x cluster_table[self.c.orth] = x
property lang: @property
def lang(self):
"""RETURNS (uint64): Language of the parent vocabulary.""" """RETURNS (uint64): Language of the parent vocabulary."""
def __get__(self):
return self.c.lang return self.c.lang
def __set__(self, attr_t x): @lang.setter
def lang(self, attr_t x):
self.c.lang = x self.c.lang = x
property prob: @property
def prob(self):
"""RETURNS (float): Smoothed log probability estimate of the lexeme's """RETURNS (float): Smoothed log probability estimate of the lexeme's
type.""" type."""
def __get__(self):
prob_table = self.vocab.lookups.get_table("lexeme_prob", {}) prob_table = self.vocab.lookups.get_table("lexeme_prob", {})
settings_table = self.vocab.lookups.get_table("lexeme_settings", {}) settings_table = self.vocab.lookups.get_table("lexeme_settings", {})
default_oov_prob = settings_table.get("oov_prob", -20.0) default_oov_prob = settings_table.get("oov_prob", -20.0)
return prob_table.get(self.c.orth, default_oov_prob) return prob_table.get(self.c.orth, default_oov_prob)
def __set__(self, float x): @prob.setter
def prob(self, float x):
prob_table = self.vocab.lookups.get_table("lexeme_prob", {}) prob_table = self.vocab.lookups.get_table("lexeme_prob", {})
prob_table[self.c.orth] = x prob_table[self.c.orth] = x
property lower_: @property
def lower_(self):
"""RETURNS (str): Lowercase form of the word.""" """RETURNS (str): Lowercase form of the word."""
def __get__(self):
return self.vocab.strings[self.c.lower] return self.vocab.strings[self.c.lower]
def __set__(self, str x): @lower_.setter
def lower_(self, str x):
self.c.lower = self.vocab.strings.add(x) self.c.lower = self.vocab.strings.add(x)
property norm_: @property
def norm_(self):
"""RETURNS (str): The lexeme's norm, i.e. a normalised form of the """RETURNS (str): The lexeme's norm, i.e. a normalised form of the
lexeme text. lexeme text.
""" """
def __get__(self):
return self.vocab.strings[self.c.norm] return self.vocab.strings[self.c.norm]
def __set__(self, str x): @norm_.setter
def norm_(self, str x):
self.norm = self.vocab.strings.add(x) self.norm = self.vocab.strings.add(x)
property shape_: @property
def shape_(self):
"""RETURNS (str): Transform of the word's string, to show """RETURNS (str): Transform of the word's string, to show
orthographic features. orthographic features.
""" """
def __get__(self):
return self.vocab.strings[self.c.shape] return self.vocab.strings[self.c.shape]
def __set__(self, str x): @shape_.setter
def shape_(self, str x):
self.c.shape = self.vocab.strings.add(x) self.c.shape = self.vocab.strings.add(x)
property prefix_: @property
def prefix_(self):
"""RETURNS (str): Length-N substring from the start of the word. """RETURNS (str): Length-N substring from the start of the word.
Defaults to `N=1`. Defaults to `N=1`.
""" """
def __get__(self):
return self.vocab.strings[self.c.prefix] return self.vocab.strings[self.c.prefix]
def __set__(self, str x): @prefix_.setter
def prefix_(self, str x):
self.c.prefix = self.vocab.strings.add(x) self.c.prefix = self.vocab.strings.add(x)
property suffix_: @property
def suffix_(self):
"""RETURNS (str): Length-N substring from the end of the word. """RETURNS (str): Length-N substring from the end of the word.
Defaults to `N=3`. Defaults to `N=3`.
""" """
def __get__(self):
return self.vocab.strings[self.c.suffix] return self.vocab.strings[self.c.suffix]
def __set__(self, str x): @suffix_.setter
def suffix_(self, str x):
self.c.suffix = self.vocab.strings.add(x) self.c.suffix = self.vocab.strings.add(x)
property lang_: @property
def lang_(self):
"""RETURNS (str): Language of the parent vocabulary.""" """RETURNS (str): Language of the parent vocabulary."""
def __get__(self):
return self.vocab.strings[self.c.lang] return self.vocab.strings[self.c.lang]
def __set__(self, str x): @lang_.setter
def lang_(self, str x):
self.c.lang = self.vocab.strings.add(x) self.c.lang = self.vocab.strings.add(x)
property flags: @property
def flags(self):
"""RETURNS (uint64): Container of the lexeme's binary flags.""" """RETURNS (uint64): Container of the lexeme's binary flags."""
def __get__(self):
return self.c.flags return self.c.flags
def __set__(self, flags_t x): @flags.setter
def flags(self, flags_t x):
self.c.flags = x self.c.flags = x
@property @property
@ -368,154 +386,171 @@ cdef class Lexeme:
"""RETURNS (bool): Whether the lexeme is out-of-vocabulary.""" """RETURNS (bool): Whether the lexeme is out-of-vocabulary."""
return self.orth not in self.vocab.vectors return self.orth not in self.vocab.vectors
property is_stop: @property
def is_stop(self):
"""RETURNS (bool): Whether the lexeme is a stop word.""" """RETURNS (bool): Whether the lexeme is a stop word."""
def __get__(self):
return Lexeme.c_check_flag(self.c, IS_STOP) return Lexeme.c_check_flag(self.c, IS_STOP)
def __set__(self, bint x): @is_stop.setter
def is_stop(self, bint x):
Lexeme.c_set_flag(self.c, IS_STOP, x) Lexeme.c_set_flag(self.c, IS_STOP, x)
property is_alpha: @property
def is_alpha(self):
"""RETURNS (bool): Whether the lexeme consists of alphabetic """RETURNS (bool): Whether the lexeme consists of alphabetic
characters. Equivalent to `lexeme.text.isalpha()`. characters. Equivalent to `lexeme.text.isalpha()`.
""" """
def __get__(self):
return Lexeme.c_check_flag(self.c, IS_ALPHA) return Lexeme.c_check_flag(self.c, IS_ALPHA)
def __set__(self, bint x): @is_alpha.setter
def is_alpha(self, bint x):
Lexeme.c_set_flag(self.c, IS_ALPHA, x) Lexeme.c_set_flag(self.c, IS_ALPHA, x)
property is_ascii: @property
def is_ascii(self):
"""RETURNS (bool): Whether the lexeme consists of ASCII characters. """RETURNS (bool): Whether the lexeme consists of ASCII characters.
Equivalent to `[any(ord(c) >= 128 for c in lexeme.text)]`. Equivalent to `[any(ord(c) >= 128 for c in lexeme.text)]`.
""" """
def __get__(self):
return Lexeme.c_check_flag(self.c, IS_ASCII) return Lexeme.c_check_flag(self.c, IS_ASCII)
def __set__(self, bint x): @is_ascii.setter
def is_ascii(self, bint x):
Lexeme.c_set_flag(self.c, IS_ASCII, x) Lexeme.c_set_flag(self.c, IS_ASCII, x)
property is_digit: @property
def is_digit(self):
"""RETURNS (bool): Whether the lexeme consists of digits. Equivalent """RETURNS (bool): Whether the lexeme consists of digits. Equivalent
to `lexeme.text.isdigit()`. to `lexeme.text.isdigit()`.
""" """
def __get__(self):
return Lexeme.c_check_flag(self.c, IS_DIGIT) return Lexeme.c_check_flag(self.c, IS_DIGIT)
def __set__(self, bint x): @is_digit.setter
def is_digit(self, bint x):
Lexeme.c_set_flag(self.c, IS_DIGIT, x) Lexeme.c_set_flag(self.c, IS_DIGIT, x)
property is_lower: @property
def is_lower(self):
"""RETURNS (bool): Whether the lexeme is in lowercase. Equivalent to """RETURNS (bool): Whether the lexeme is in lowercase. Equivalent to
`lexeme.text.islower()`. `lexeme.text.islower()`.
""" """
def __get__(self):
return Lexeme.c_check_flag(self.c, IS_LOWER) return Lexeme.c_check_flag(self.c, IS_LOWER)
def __set__(self, bint x): @is_lower.setter
def is_lower(self, bint x):
Lexeme.c_set_flag(self.c, IS_LOWER, x) Lexeme.c_set_flag(self.c, IS_LOWER, x)
property is_upper: @property
def is_upper(self):
"""RETURNS (bool): Whether the lexeme is in uppercase. Equivalent to """RETURNS (bool): Whether the lexeme is in uppercase. Equivalent to
`lexeme.text.isupper()`. `lexeme.text.isupper()`.
""" """
def __get__(self):
return Lexeme.c_check_flag(self.c, IS_UPPER) return Lexeme.c_check_flag(self.c, IS_UPPER)
def __set__(self, bint x): @is_upper.setter
def is_upper(self, bint x):
Lexeme.c_set_flag(self.c, IS_UPPER, x) Lexeme.c_set_flag(self.c, IS_UPPER, x)
property is_title: @property
def is_title(self):
"""RETURNS (bool): Whether the lexeme is in titlecase. Equivalent to """RETURNS (bool): Whether the lexeme is in titlecase. Equivalent to
`lexeme.text.istitle()`. `lexeme.text.istitle()`.
""" """
def __get__(self):
return Lexeme.c_check_flag(self.c, IS_TITLE) return Lexeme.c_check_flag(self.c, IS_TITLE)
def __set__(self, bint x): @is_title.setter
def is_title(self, bint x):
Lexeme.c_set_flag(self.c, IS_TITLE, x) Lexeme.c_set_flag(self.c, IS_TITLE, x)
property is_punct: @property
def is_punct(self):
"""RETURNS (bool): Whether the lexeme is punctuation.""" """RETURNS (bool): Whether the lexeme is punctuation."""
def __get__(self):
return Lexeme.c_check_flag(self.c, IS_PUNCT) return Lexeme.c_check_flag(self.c, IS_PUNCT)
def __set__(self, bint x): @is_punct.setter
def is_punct(self, bint x):
Lexeme.c_set_flag(self.c, IS_PUNCT, x) Lexeme.c_set_flag(self.c, IS_PUNCT, x)
property is_space: @property
def is_space(self):
"""RETURNS (bool): Whether the lexeme consist of whitespace characters. """RETURNS (bool): Whether the lexeme consist of whitespace characters.
Equivalent to `lexeme.text.isspace()`. Equivalent to `lexeme.text.isspace()`.
""" """
def __get__(self):
return Lexeme.c_check_flag(self.c, IS_SPACE) return Lexeme.c_check_flag(self.c, IS_SPACE)
def __set__(self, bint x): @is_space.setter
def is_space(self, bint x):
Lexeme.c_set_flag(self.c, IS_SPACE, x) Lexeme.c_set_flag(self.c, IS_SPACE, x)
property is_bracket: @property
def is_bracket(self):
"""RETURNS (bool): Whether the lexeme is a bracket.""" """RETURNS (bool): Whether the lexeme is a bracket."""
def __get__(self):
return Lexeme.c_check_flag(self.c, IS_BRACKET) return Lexeme.c_check_flag(self.c, IS_BRACKET)
def __set__(self, bint x): @is_bracket.setter
def is_bracket(self, bint x):
Lexeme.c_set_flag(self.c, IS_BRACKET, x) Lexeme.c_set_flag(self.c, IS_BRACKET, x)
property is_quote: @property
def is_quote(self):
"""RETURNS (bool): Whether the lexeme is a quotation mark.""" """RETURNS (bool): Whether the lexeme is a quotation mark."""
def __get__(self):
return Lexeme.c_check_flag(self.c, IS_QUOTE) return Lexeme.c_check_flag(self.c, IS_QUOTE)
def __set__(self, bint x): @is_quote.setter
def is_quote(self, bint x):
Lexeme.c_set_flag(self.c, IS_QUOTE, x) Lexeme.c_set_flag(self.c, IS_QUOTE, x)
property is_left_punct: @property
def is_left_punct(self):
"""RETURNS (bool): Whether the lexeme is left punctuation, e.g. (.""" """RETURNS (bool): Whether the lexeme is left punctuation, e.g. (."""
def __get__(self):
return Lexeme.c_check_flag(self.c, IS_LEFT_PUNCT) return Lexeme.c_check_flag(self.c, IS_LEFT_PUNCT)
def __set__(self, bint x): @is_left_punct.setter
def is_left_punct(self, bint x):
Lexeme.c_set_flag(self.c, IS_LEFT_PUNCT, x) Lexeme.c_set_flag(self.c, IS_LEFT_PUNCT, x)
property is_right_punct: @property
def is_right_punct(self):
"""RETURNS (bool): Whether the lexeme is right punctuation, e.g. ).""" """RETURNS (bool): Whether the lexeme is right punctuation, e.g. )."""
def __get__(self):
return Lexeme.c_check_flag(self.c, IS_RIGHT_PUNCT) return Lexeme.c_check_flag(self.c, IS_RIGHT_PUNCT)
def __set__(self, bint x): @is_right_punct.setter
def is_right_punct(self, bint x):
Lexeme.c_set_flag(self.c, IS_RIGHT_PUNCT, x) Lexeme.c_set_flag(self.c, IS_RIGHT_PUNCT, x)
property is_currency: @property
def is_currency(self):
"""RETURNS (bool): Whether the lexeme is a currency symbol, e.g. $, €.""" """RETURNS (bool): Whether the lexeme is a currency symbol, e.g. $, €."""
def __get__(self):
return Lexeme.c_check_flag(self.c, IS_CURRENCY) return Lexeme.c_check_flag(self.c, IS_CURRENCY)
def __set__(self, bint x): @is_currency.setter
def is_currency(self, bint x):
Lexeme.c_set_flag(self.c, IS_CURRENCY, x) Lexeme.c_set_flag(self.c, IS_CURRENCY, x)
property like_url: @property
def like_url(self):
"""RETURNS (bool): Whether the lexeme resembles a URL.""" """RETURNS (bool): Whether the lexeme resembles a URL."""
def __get__(self):
return Lexeme.c_check_flag(self.c, LIKE_URL) return Lexeme.c_check_flag(self.c, LIKE_URL)
def __set__(self, bint x): @like_url.setter
def like_url(self, bint x):
Lexeme.c_set_flag(self.c, LIKE_URL, x) Lexeme.c_set_flag(self.c, LIKE_URL, x)
property like_num: @property
def like_num(self):
"""RETURNS (bool): Whether the lexeme represents a number, e.g. "10.9", """RETURNS (bool): Whether the lexeme represents a number, e.g. "10.9",
"10", "ten", etc. "10", "ten", etc.
""" """
def __get__(self):
return Lexeme.c_check_flag(self.c, LIKE_NUM) return Lexeme.c_check_flag(self.c, LIKE_NUM)
def __set__(self, bint x): @like_num.setter
def like_num(self, bint x):
Lexeme.c_set_flag(self.c, LIKE_NUM, x) Lexeme.c_set_flag(self.c, LIKE_NUM, x)
property like_email: @property
def like_email(self):
"""RETURNS (bool): Whether the lexeme resembles an email address.""" """RETURNS (bool): Whether the lexeme resembles an email address."""
def __get__(self):
return Lexeme.c_check_flag(self.c, LIKE_EMAIL) return Lexeme.c_check_flag(self.c, LIKE_EMAIL)
def __set__(self, bint x): @like_email.setter
def like_email(self, bint x):
Lexeme.c_set_flag(self.c, LIKE_EMAIL, x) Lexeme.c_set_flag(self.c, LIKE_EMAIL, x)

View File

@ -70,51 +70,57 @@ cdef class Tokenizer:
self._special_matcher = PhraseMatcher(self.vocab) self._special_matcher = PhraseMatcher(self.vocab)
self._load_special_cases(rules) self._load_special_cases(rules)
property token_match: @property
def __get__(self): def token_match(self):
return self._token_match return self._token_match
def __set__(self, token_match): @token_match.setter
def token_match(self, token_match):
self._token_match = token_match self._token_match = token_match
self._reload_special_cases() self._reload_special_cases()
property url_match: @property
def __get__(self): def url_match(self):
return self._url_match return self._url_match
def __set__(self, url_match): @url_match.setter
def url_match(self, url_match):
self._url_match = url_match self._url_match = url_match
self._reload_special_cases() self._reload_special_cases()
property prefix_search: @property
def __get__(self): def prefix_search(self):
return self._prefix_search return self._prefix_search
def __set__(self, prefix_search): @prefix_search.setter
def prefix_search(self, prefix_search):
self._prefix_search = prefix_search self._prefix_search = prefix_search
self._reload_special_cases() self._reload_special_cases()
property suffix_search: @property
def __get__(self): def suffix_search(self):
return self._suffix_search return self._suffix_search
def __set__(self, suffix_search): @suffix_search.setter
def suffix_search(self, suffix_search):
self._suffix_search = suffix_search self._suffix_search = suffix_search
self._reload_special_cases() self._reload_special_cases()
property infix_finditer: @property
def __get__(self): def infix_finditer(self):
return self._infix_finditer return self._infix_finditer
def __set__(self, infix_finditer): @infix_finditer.setter
def infix_finditer(self, infix_finditer):
self._infix_finditer = infix_finditer self._infix_finditer = infix_finditer
self._reload_special_cases() self._reload_special_cases()
property rules: @property
def __get__(self): def rules(self):
return self._rules return self._rules
def __set__(self, rules): @rules.setter
def rules(self, rules):
self._rules = {} self._rules = {}
self._flush_cache() self._flush_cache()
self._flush_specials() self._flush_specials()
@ -122,11 +128,12 @@ cdef class Tokenizer:
self._specials = PreshMap() self._specials = PreshMap()
self._load_special_cases(rules) self._load_special_cases(rules)
property faster_heuristics: @property
def __get__(self): def faster_heuristics(self):
return bool(self._faster_heuristics) return bool(self._faster_heuristics)
def __set__(self, faster_heuristics): @faster_heuristics.setter
def faster_heuristics(self, faster_heuristics):
self._faster_heuristics = bool(faster_heuristics) self._faster_heuristics = bool(faster_heuristics)
self._reload_special_cases() self._reload_special_cases()

View File

@ -667,7 +667,8 @@ cdef class Doc:
else: else:
return False return False
property vector: @property
def vector(self):
"""A real-valued meaning representation. Defaults to an average of the """A real-valued meaning representation. Defaults to an average of the
token vectors. token vectors.
@ -676,7 +677,6 @@ cdef class Doc:
DOCS: https://spacy.io/api/doc#vector DOCS: https://spacy.io/api/doc#vector
""" """
def __get__(self):
if "vector" in self.user_hooks: if "vector" in self.user_hooks:
return self.user_hooks["vector"](self) return self.user_hooks["vector"](self)
if self._vector is not None: if self._vector is not None:
@ -694,17 +694,18 @@ cdef class Doc:
else: else:
return xp.zeros((self.vocab.vectors_length,), dtype="float32") return xp.zeros((self.vocab.vectors_length,), dtype="float32")
def __set__(self, value): @vector.setter
def vector(self, value):
self._vector = value self._vector = value
property vector_norm: @property
def vector_norm(self):
"""The L2 norm of the document's vector representation. """The L2 norm of the document's vector representation.
RETURNS (float): The L2 norm of the vector representation. RETURNS (float): The L2 norm of the vector representation.
DOCS: https://spacy.io/api/doc#vector_norm DOCS: https://spacy.io/api/doc#vector_norm
""" """
def __get__(self):
if "vector_norm" in self.user_hooks: if "vector_norm" in self.user_hooks:
return self.user_hooks["vector_norm"](self) return self.user_hooks["vector_norm"](self)
cdef float value cdef float value
@ -716,7 +717,8 @@ cdef class Doc:
self._vector_norm = sqrt(norm) if norm != 0 else 0 self._vector_norm = sqrt(norm) if norm != 0 else 0
return self._vector_norm return self._vector_norm
def __set__(self, value): @vector_norm.setter
def vector_norm(self, value):
self._vector_norm = value self._vector_norm = value
@property @property
@ -736,7 +738,8 @@ cdef class Doc:
""" """
return self.text return self.text
property ents: @property
def ents(self):
"""The named entities in the document. Returns a tuple of named entity """The named entities in the document. Returns a tuple of named entity
`Span` objects, if the entity recognizer has been applied. `Span` objects, if the entity recognizer has been applied.
@ -744,7 +747,6 @@ cdef class Doc:
DOCS: https://spacy.io/api/doc#ents DOCS: https://spacy.io/api/doc#ents
""" """
def __get__(self):
cdef int i cdef int i
cdef const TokenC* token cdef const TokenC* token
cdef int start = -1 cdef int start = -1
@ -779,7 +781,8 @@ cdef class Doc:
output = [o for o in output if o.label_ != ""] output = [o for o in output if o.label_ != ""]
return tuple(output) return tuple(output)
def __set__(self, ents): @ents.setter
def ents(self, ents):
# TODO: # TODO:
# 1. Test basic data-driven ORTH gazetteer # 1. Test basic data-driven ORTH gazetteer
# 2. Test more nuanced date and currency regex # 2. Test more nuanced date and currency regex

View File

@ -757,77 +757,86 @@ cdef class Span:
for word in self.rights: for word in self.rights:
yield from word.subtree yield from word.subtree
property start: @property
def __get__(self): def start(self):
return self.c.start return self.c.start
def __set__(self, int start): @start.setter
def start(self, int start):
if start < 0: if start < 0:
raise IndexError(Errors.E1032.format(var="start", forbidden="< 0", value=start)) raise IndexError(Errors.E1032.format(var="start", forbidden="< 0", value=start))
self.c.start = start self.c.start = start
property end: @property
def __get__(self): def end(self):
return self.c.end return self.c.end
def __set__(self, int end): @end.setter
def end(self, int end):
if end < 0: if end < 0:
raise IndexError(Errors.E1032.format(var="end", forbidden="< 0", value=end)) raise IndexError(Errors.E1032.format(var="end", forbidden="< 0", value=end))
self.c.end = end self.c.end = end
property start_char: @property
def __get__(self): def start_char(self):
return self.c.start_char return self.c.start_char
def __set__(self, int start_char): @start_char.setter
def start_char(self, int start_char):
if start_char < 0: if start_char < 0:
raise IndexError(Errors.E1032.format(var="start_char", forbidden="< 0", value=start_char)) raise IndexError(Errors.E1032.format(var="start_char", forbidden="< 0", value=start_char))
self.c.start_char = start_char self.c.start_char = start_char
property end_char: @property
def __get__(self): def end_char(self):
return self.c.end_char return self.c.end_char
def __set__(self, int end_char): @end_char.setter
def end_char(self, int end_char):
if end_char < 0: if end_char < 0:
raise IndexError(Errors.E1032.format(var="end_char", forbidden="< 0", value=end_char)) raise IndexError(Errors.E1032.format(var="end_char", forbidden="< 0", value=end_char))
self.c.end_char = end_char self.c.end_char = end_char
property label: @property
def __get__(self): def label(self):
return self.c.label return self.c.label
def __set__(self, attr_t label): @label.setter
def label(self, attr_t label):
self.c.label = label self.c.label = label
property kb_id: @property
def __get__(self): def kb_id(self):
return self.c.kb_id return self.c.kb_id
def __set__(self, attr_t kb_id): @kb_id.setter
def kb_id(self, attr_t kb_id):
self.c.kb_id = kb_id self.c.kb_id = kb_id
property id: @property
def __get__(self): def id(self):
return self.c.id return self.c.id
def __set__(self, attr_t id): @id.setter
def id(self, attr_t id):
self.c.id = id self.c.id = id
property ent_id: @property
def ent_id(self):
"""RETURNS (uint64): The entity ID.""" """RETURNS (uint64): The entity ID."""
def __get__(self):
return self.root.ent_id return self.root.ent_id
def __set__(self, hash_t key): @ent_id.setter
def ent_id(self, hash_t key):
raise NotImplementedError(Errors.E200.format(attr="ent_id")) raise NotImplementedError(Errors.E200.format(attr="ent_id"))
property ent_id_: @property
def ent_id_(self):
"""RETURNS (str): The (string) entity ID.""" """RETURNS (str): The (string) entity ID."""
def __get__(self):
return self.root.ent_id_ return self.root.ent_id_
def __set__(self, str key): @ent_id_.setter
def ent_id_(self, str key):
raise NotImplementedError(Errors.E200.format(attr="ent_id_")) raise NotImplementedError(Errors.E200.format(attr="ent_id_"))
@property @property
@ -843,28 +852,31 @@ cdef class Span:
"""RETURNS (str): The span's lemma.""" """RETURNS (str): The span's lemma."""
return "".join([t.lemma_ + t.whitespace_ for t in self]).strip() return "".join([t.lemma_ + t.whitespace_ for t in self]).strip()
property label_: @property
def label_(self):
"""RETURNS (str): The span's label.""" """RETURNS (str): The span's label."""
def __get__(self):
return self.doc.vocab.strings[self.label] return self.doc.vocab.strings[self.label]
def __set__(self, str label_): @label_.setter
def label_(self, str label_):
self.label = self.doc.vocab.strings.add(label_) self.label = self.doc.vocab.strings.add(label_)
property kb_id_: @property
def kb_id_(self):
"""RETURNS (str): The span's KB ID.""" """RETURNS (str): The span's KB ID."""
def __get__(self):
return self.doc.vocab.strings[self.kb_id] return self.doc.vocab.strings[self.kb_id]
def __set__(self, str kb_id_): @kb_id_.setter
def kb_id_(self, str kb_id_):
self.kb_id = self.doc.vocab.strings.add(kb_id_) self.kb_id = self.doc.vocab.strings.add(kb_id_)
property id_: @property
def id_(self):
"""RETURNS (str): The span's ID.""" """RETURNS (str): The span's ID."""
def __get__(self):
return self.doc.vocab.strings[self.id] return self.doc.vocab.strings[self.id]
def __set__(self, str id_): @id_.setter
def id_(self, str id_):
self.id = self.doc.vocab.strings.add(id_) self.id = self.doc.vocab.strings.add(id_)

View File

@ -249,11 +249,12 @@ cdef class Token:
""" """
return not self.c.morph == 0 return not self.c.morph == 0
property morph: @property
def __get__(self): def morph(self):
return MorphAnalysis.from_id(self.vocab, self.c.morph) return MorphAnalysis.from_id(self.vocab, self.c.morph)
def __set__(self, MorphAnalysis morph): @morph.setter
def morph(self, MorphAnalysis morph):
# Check that the morph has the same vocab # Check that the morph has the same vocab
if self.vocab != morph.vocab: if self.vocab != morph.vocab:
raise ValueError(Errors.E1013) raise ValueError(Errors.E1013)
@ -377,38 +378,42 @@ cdef class Token:
""" """
return self.c.lex.suffix return self.c.lex.suffix
property lemma: @property
def lemma(self):
"""RETURNS (uint64): ID of the base form of the word, with no """RETURNS (uint64): ID of the base form of the word, with no
inflectional suffixes. inflectional suffixes.
""" """
def __get__(self):
return self.c.lemma return self.c.lemma
def __set__(self, attr_t lemma): @lemma.setter
def lemma(self, attr_t lemma):
self.c.lemma = lemma self.c.lemma = lemma
property pos: @property
def pos(self):
"""RETURNS (uint64): ID of coarse-grained part-of-speech tag.""" """RETURNS (uint64): ID of coarse-grained part-of-speech tag."""
def __get__(self):
return self.c.pos return self.c.pos
def __set__(self, pos): @pos.setter
def pos(self, pos):
self.c.pos = pos self.c.pos = pos
property tag: @property
def tag(self):
"""RETURNS (uint64): ID of fine-grained part-of-speech tag.""" """RETURNS (uint64): ID of fine-grained part-of-speech tag."""
def __get__(self):
return self.c.tag return self.c.tag
def __set__(self, attr_t tag): @tag.setter
def tag(self, attr_t tag):
self.c.tag = tag self.c.tag = tag
property dep: @property
def dep(self):
"""RETURNS (uint64): ID of syntactic dependency label.""" """RETURNS (uint64): ID of syntactic dependency label."""
def __get__(self):
return self.c.dep return self.c.dep
def __set__(self, attr_t label): @dep.setter
def dep(self, attr_t label):
self.c.dep = label self.c.dep = label
@property @property
@ -494,8 +499,8 @@ cdef class Token:
return self.doc.user_token_hooks["sent"](self) return self.doc.user_token_hooks["sent"](self)
return self.doc[self.i : self.i+1].sent return self.doc[self.i : self.i+1].sent
property sent_start: @property
def __get__(self): def sent_start(self):
"""Deprecated: use Token.is_sent_start instead.""" """Deprecated: use Token.is_sent_start instead."""
# Raising a deprecation warning here causes errors for autocomplete # Raising a deprecation warning here causes errors for autocomplete
# Handle broken backwards compatibility case: doc[0].sent_start # Handle broken backwards compatibility case: doc[0].sent_start
@ -505,17 +510,18 @@ cdef class Token:
else: else:
return self.c.sent_start return self.c.sent_start
def __set__(self, value): @sent_start.setter
def sent_start(self, value):
self.is_sent_start = value self.is_sent_start = value
property is_sent_start: @property
def is_sent_start(self):
"""A boolean value indicating whether the token starts a sentence. """A boolean value indicating whether the token starts a sentence.
`None` if unknown. Defaults to `True` for the first token in the `Doc`. `None` if unknown. Defaults to `True` for the first token in the `Doc`.
RETURNS (bool / None): Whether the token starts a sentence. RETURNS (bool / None): Whether the token starts a sentence.
None if unknown. None if unknown.
""" """
def __get__(self):
if self.c.sent_start == 0: if self.c.sent_start == 0:
return None return None
elif self.c.sent_start < 0: elif self.c.sent_start < 0:
@ -523,7 +529,8 @@ cdef class Token:
else: else:
return True return True
def __set__(self, value): @is_sent_start.setter
def is_sent_start(self, value):
if self.doc.has_annotation("DEP"): if self.doc.has_annotation("DEP"):
raise ValueError(Errors.E043) raise ValueError(Errors.E043)
if value is None: if value is None:
@ -535,7 +542,8 @@ cdef class Token:
else: else:
raise ValueError(Errors.E044.format(value=value)) raise ValueError(Errors.E044.format(value=value))
property is_sent_end: @property
def is_sent_end(self):
"""A boolean value indicating whether the token ends a sentence. """A boolean value indicating whether the token ends a sentence.
`None` if unknown. Defaults to `True` for the last token in the `Doc`. `None` if unknown. Defaults to `True` for the last token in the `Doc`.
@ -544,7 +552,6 @@ cdef class Token:
DOCS: https://spacy.io/api/token#is_sent_end DOCS: https://spacy.io/api/token#is_sent_end
""" """
def __get__(self):
if self.i + 1 == len(self.doc): if self.i + 1 == len(self.doc):
return True return True
elif self.doc[self.i+1].is_sent_start is None: elif self.doc[self.i+1].is_sent_start is None:
@ -554,7 +561,8 @@ cdef class Token:
else: else:
return False return False
def __set__(self, value): @is_sent_end.setter
def is_sent_end(self, value):
raise ValueError(Errors.E196) raise ValueError(Errors.E196)
@property @property
@ -682,20 +690,21 @@ cdef class Token:
""" """
return not Token.missing_head(self.c) return not Token.missing_head(self.c)
property head: @property
def head(self):
"""The syntactic parent, or "governor", of this token. """The syntactic parent, or "governor", of this token.
If token.has_head() is `False`, this method will return itself. If token.has_head() is `False`, this method will return itself.
RETURNS (Token): The token predicted by the parser to be the head of RETURNS (Token): The token predicted by the parser to be the head of
the current token. the current token.
""" """
def __get__(self):
if not self.has_head(): if not self.has_head():
return self return self
else: else:
return self.doc[self.i + self.c.head] return self.doc[self.i + self.c.head]
def __set__(self, Token new_head): @head.setter
def head(self, Token new_head):
# This function sets the head of self to new_head and updates the # This function sets the head of self to new_head and updates the
# counters for left/right dependents and left/right corner for the # counters for left/right dependents and left/right corner for the
# new and the old head # new and the old head
@ -744,20 +753,22 @@ cdef class Token:
queue.append(child) queue.append(child)
return tuple([w for w in output if w.i != self.i]) return tuple([w for w in output if w.i != self.i])
property ent_type: @property
def ent_type(self):
"""RETURNS (uint64): Named entity type.""" """RETURNS (uint64): Named entity type."""
def __get__(self):
return self.c.ent_type return self.c.ent_type
def __set__(self, ent_type): @ent_type.setter
def ent_type(self, ent_type):
self.c.ent_type = ent_type self.c.ent_type = ent_type
property ent_type_: @property
def ent_type_(self):
"""RETURNS (str): Named entity type.""" """RETURNS (str): Named entity type."""
def __get__(self):
return self.vocab.strings[self.c.ent_type] return self.vocab.strings[self.c.ent_type]
def __set__(self, ent_type): @ent_type_.setter
def ent_type_(self, ent_type):
self.c.ent_type = self.vocab.strings.add(ent_type) self.c.ent_type = self.vocab.strings.add(ent_type)
@property @property
@ -784,40 +795,44 @@ cdef class Token:
""" """
return self.iob_strings()[self.c.ent_iob] return self.iob_strings()[self.c.ent_iob]
property ent_id: @property
def ent_id(self):
"""RETURNS (uint64): ID of the entity the token is an instance of, """RETURNS (uint64): ID of the entity the token is an instance of,
if any. if any.
""" """
def __get__(self):
return self.c.ent_id return self.c.ent_id
def __set__(self, hash_t key): @ent_id.setter
def ent_id(self, hash_t key):
self.c.ent_id = key self.c.ent_id = key
property ent_id_: @property
def ent_id_(self):
"""RETURNS (str): ID of the entity the token is an instance of, """RETURNS (str): ID of the entity the token is an instance of,
if any. if any.
""" """
def __get__(self):
return self.vocab.strings[self.c.ent_id] return self.vocab.strings[self.c.ent_id]
def __set__(self, name): @ent_id_.setter
def ent_id_(self, name):
self.c.ent_id = self.vocab.strings.add(name) self.c.ent_id = self.vocab.strings.add(name)
property ent_kb_id: @property
def ent_kb_id(self):
"""RETURNS (uint64): Named entity KB ID.""" """RETURNS (uint64): Named entity KB ID."""
def __get__(self):
return self.c.ent_kb_id return self.c.ent_kb_id
def __set__(self, attr_t ent_kb_id): @ent_kb_id.setter
def ent_kb_id(self, attr_t ent_kb_id):
self.c.ent_kb_id = ent_kb_id self.c.ent_kb_id = ent_kb_id
property ent_kb_id_: @property
def ent_kb_id_(self):
"""RETURNS (str): Named entity KB ID.""" """RETURNS (str): Named entity KB ID."""
def __get__(self):
return self.vocab.strings[self.c.ent_kb_id] return self.vocab.strings[self.c.ent_kb_id]
def __set__(self, ent_kb_id): @ent_kb_id_.setter
def ent_kb_id_(self, ent_kb_id):
self.c.ent_kb_id = self.vocab.strings.add(ent_kb_id) self.c.ent_kb_id = self.vocab.strings.add(ent_kb_id)
@property @property
@ -840,15 +855,16 @@ cdef class Token:
""" """
return self.vocab.strings[self.c.lex.lower] return self.vocab.strings[self.c.lex.lower]
property norm_: @property
def norm_(self):
"""RETURNS (str): The token's norm, i.e. a normalised form of the """RETURNS (str): The token's norm, i.e. a normalised form of the
token text. Usually set in the language's tokenizer exceptions or token text. Usually set in the language's tokenizer exceptions or
norm exceptions. norm exceptions.
""" """
def __get__(self):
return self.vocab.strings[self.norm] return self.vocab.strings[self.norm]
def __set__(self, str norm_): @norm_.setter
def norm_(self, str norm_):
self.c.norm = self.vocab.strings.add(norm_) self.c.norm = self.vocab.strings.add(norm_)
@property @property
@ -879,32 +895,35 @@ cdef class Token:
""" """
return self.vocab.strings[self.c.lex.lang] return self.vocab.strings[self.c.lex.lang]
property lemma_: @property
def lemma_(self):
"""RETURNS (str): The token lemma, i.e. the base form of the word, """RETURNS (str): The token lemma, i.e. the base form of the word,
with no inflectional suffixes. with no inflectional suffixes.
""" """
def __get__(self):
return self.vocab.strings[self.c.lemma] return self.vocab.strings[self.c.lemma]
def __set__(self, str lemma_): @lemma_.setter
def lemma_(self, str lemma_):
self.c.lemma = self.vocab.strings.add(lemma_) self.c.lemma = self.vocab.strings.add(lemma_)
property pos_: @property
def pos_(self):
"""RETURNS (str): Coarse-grained part-of-speech tag.""" """RETURNS (str): Coarse-grained part-of-speech tag."""
def __get__(self):
return parts_of_speech.NAMES[self.c.pos] return parts_of_speech.NAMES[self.c.pos]
def __set__(self, pos_name): @pos_.setter
def pos_(self, pos_name):
if pos_name not in parts_of_speech.IDS: if pos_name not in parts_of_speech.IDS:
raise ValueError(Errors.E1021.format(pp=pos_name)) raise ValueError(Errors.E1021.format(pp=pos_name))
self.c.pos = parts_of_speech.IDS[pos_name] self.c.pos = parts_of_speech.IDS[pos_name]
property tag_: @property
def tag_(self):
"""RETURNS (str): Fine-grained part-of-speech tag.""" """RETURNS (str): Fine-grained part-of-speech tag."""
def __get__(self):
return self.vocab.strings[self.c.tag] return self.vocab.strings[self.c.tag]
def __set__(self, tag): @tag_.setter
def tag_(self, tag):
self.tag = self.vocab.strings.add(tag) self.tag = self.vocab.strings.add(tag)
def has_dep(self): def has_dep(self):
@ -915,12 +934,13 @@ cdef class Token:
""" """
return not Token.missing_dep(self.c) return not Token.missing_dep(self.c)
property dep_: @property
def dep_(self):
"""RETURNS (str): The syntactic dependency label.""" """RETURNS (str): The syntactic dependency label."""
def __get__(self):
return self.vocab.strings[self.c.dep] return self.vocab.strings[self.c.dep]
def __set__(self, str label): @dep_.setter
def dep_(self, str label):
self.c.dep = self.vocab.strings.add(label) self.c.dep = self.vocab.strings.add(label)
@property @property

View File

@ -88,20 +88,22 @@ cdef class Example:
def __len__(self): def __len__(self):
return len(self.predicted) return len(self.predicted)
property predicted: @property
def __get__(self): def predicted(self):
return self.x return self.x
def __set__(self, doc): @predicted.setter
def predicted(self, doc):
self.x = doc self.x = doc
self._cached_alignment = None self._cached_alignment = None
self._cached_words_x = [t.text for t in doc] self._cached_words_x = [t.text for t in doc]
property reference: @property
def __get__(self): def reference(self):
return self.y return self.y
def __set__(self, doc): @reference.setter
def reference(self, doc):
self.y = doc self.y = doc
self._cached_alignment = None self._cached_alignment = None
self._cached_words_y = [t.text for t in doc] self._cached_words_y = [t.text for t in doc]
@ -420,8 +422,8 @@ cdef class Example:
seen_indices.update(indices) seen_indices.update(indices)
return output return output
property text: @property
def __get__(self): def text(self):
return self.x.text return self.x.text
def __str__(self): def __str__(self):

View File

@ -88,11 +88,12 @@ cdef class Vocab:
self.writing_system = writing_system self.writing_system = writing_system
self.get_noun_chunks = get_noun_chunks self.get_noun_chunks = get_noun_chunks
property vectors: @property
def __get__(self): def vectors(self):
return self._vectors return self._vectors
def __set__(self, vectors): @vectors.setter
def vectors(self, vectors):
if hasattr(vectors, "strings"): if hasattr(vectors, "strings"):
for s in vectors.strings: for s in vectors.strings:
self.strings.add(s) self.strings.add(s)
@ -464,11 +465,12 @@ cdef class Vocab:
key = Lexeme.get_struct_attr(lex.c, self.vectors.attr) key = Lexeme.get_struct_attr(lex.c, self.vectors.attr)
return key in self.vectors return key in self.vectors
property lookups: @property
def __get__(self): def lookups(self):
return self._lookups return self._lookups
def __set__(self, lookups): @lookups.setter
def lookups(self, lookups):
self._lookups = lookups self._lookups = lookups
if lookups.has_table("lexeme_norm"): if lookups.has_table("lexeme_norm"):
self.lex_attr_getters[NORM] = util.add_lookups( self.lex_attr_getters[NORM] = util.add_lookups(