mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-26 01:04:34 +03:00
Update docstrings and API docs for Lexeme
This commit is contained in:
parent
7ed8a92ed1
commit
27de0834b2
|
@ -30,19 +30,16 @@ memset(&EMPTY_LEXEME, 0, sizeof(LexemeC))
|
|||
|
||||
|
||||
cdef class Lexeme:
|
||||
"""
|
||||
An entry in the vocabulary. A Lexeme has no string context --- it's a
|
||||
"""An entry in the vocabulary. A `Lexeme` has no string context – it's a
|
||||
word-type, as opposed to a word token. It therefore has no part-of-speech
|
||||
tag, dependency parse, or lemma (lemmatization depends on the part-of-speech
|
||||
tag).
|
||||
"""
|
||||
def __init__(self, Vocab vocab, int orth):
|
||||
"""
|
||||
Create a Lexeme object.
|
||||
"""Create a Lexeme object.
|
||||
|
||||
Arguments:
|
||||
vocab (Vocab): The parent vocabulary
|
||||
orth (int): The orth id of the lexeme.
|
||||
vocab (Vocab): The parent vocabulary
|
||||
orth (int): The orth id of the lexeme.
|
||||
Returns (Lexeme): The newly constructd object.
|
||||
"""
|
||||
self.vocab = vocab
|
||||
|
@ -82,35 +79,28 @@ cdef class Lexeme:
|
|||
return self.c.orth
|
||||
|
||||
def set_flag(self, attr_id_t flag_id, bint value):
|
||||
"""
|
||||
Change the value of a boolean flag.
|
||||
"""Change the value of a boolean flag.
|
||||
|
||||
Arguments:
|
||||
flag_id (int): The attribute ID of the flag to set.
|
||||
value (bool): The new value of the flag.
|
||||
flag_id (int): The attribute ID of the flag to set.
|
||||
value (bool): The new value of the flag.
|
||||
"""
|
||||
Lexeme.c_set_flag(self.c, flag_id, value)
|
||||
|
||||
def check_flag(self, attr_id_t flag_id):
|
||||
"""
|
||||
Check the value of a boolean flag.
|
||||
"""Check the value of a boolean flag.
|
||||
|
||||
Arguments:
|
||||
flag_id (int): The attribute ID of the flag to query.
|
||||
Returns (bool): The value of the flag.
|
||||
flag_id (int): The attribute ID of the flag to query.
|
||||
RETURNS (bool): The value of the flag.
|
||||
"""
|
||||
return True if Lexeme.c_check_flag(self.c, flag_id) else False
|
||||
|
||||
def similarity(self, other):
|
||||
"""
|
||||
Compute a semantic similarity estimate. Defaults to cosine over vectors.
|
||||
"""Compute a semantic similarity estimate. Defaults to cosine over
|
||||
vectors.
|
||||
|
||||
Arguments:
|
||||
other:
|
||||
The object to compare with. By default, accepts Doc, Span,
|
||||
Token and Lexeme objects.
|
||||
Returns:
|
||||
score (float): A scalar similarity score. Higher is more similar.
|
||||
other (object): The object to compare with. By default, accepts `Doc`,
|
||||
`Span`, `Token` and `Lexeme` objects.
|
||||
RETURNS (float): A scalar similarity score. Higher is more similar.
|
||||
"""
|
||||
if self.vector_norm == 0 or other.vector_norm == 0:
|
||||
return 0.0
|
||||
|
@ -140,6 +130,11 @@ cdef class Lexeme:
|
|||
self.orth = self.c.orth
|
||||
|
||||
property has_vector:
|
||||
"""A boolean value indicating whether a word vector is associated with
|
||||
the object.
|
||||
|
||||
RETURNS (bool): Whether a word vector is associated with the object.
|
||||
"""
|
||||
def __get__(self):
|
||||
cdef int i
|
||||
for i in range(self.vocab.vectors_length):
|
||||
|
@ -149,6 +144,10 @@ cdef class Lexeme:
|
|||
return False
|
||||
|
||||
property vector_norm:
|
||||
"""The L2 norm of the lexeme's vector representation.
|
||||
|
||||
RETURNS (float): The L2 norm of the vector representation.
|
||||
"""
|
||||
def __get__(self):
|
||||
return self.c.l2_norm
|
||||
|
||||
|
@ -156,6 +155,11 @@ cdef class Lexeme:
|
|||
self.c.l2_norm = value
|
||||
|
||||
property vector:
|
||||
"""A real-valued meaning representation.
|
||||
|
||||
RETURNS (numpy.ndarray[ndim=1, dtype='float32']): A 1D numpy array
|
||||
representing the lexeme's semantics.
|
||||
"""
|
||||
def __get__(self):
|
||||
cdef int length = self.vocab.vectors_length
|
||||
if length == 0:
|
||||
|
@ -196,6 +200,14 @@ cdef class Lexeme:
|
|||
def __get__(self):
|
||||
return self.vocab.strings[self.c.orth]
|
||||
|
||||
property text:
|
||||
"""A unicode representation of the token text.
|
||||
|
||||
RETURNS (unicode): The original verbatim text of the token.
|
||||
"""
|
||||
def __get__(self):
|
||||
return self.orth_
|
||||
|
||||
property lower:
|
||||
def __get__(self): return self.c.lower
|
||||
def __set__(self, int x): self.c.lower = x
|
||||
|
|
|
@ -2,7 +2,154 @@
|
|||
|
||||
include ../../_includes/_mixins
|
||||
|
||||
p An entry in the vocabulary.
|
||||
p
|
||||
| An entry in the vocabulary. A #[code Lexeme] has no string context – it's
|
||||
| a word-type, as opposed to a word token. It therefore has no
|
||||
| part-of-speech tag, dependency parse, or lemma (if lemmatization depends
|
||||
| on the part-of-speech tag).
|
||||
|
||||
+h(2, "init") Lexeme.__init__
|
||||
+tag method
|
||||
|
||||
p Create a #[code Lexeme] object.
|
||||
|
||||
+table(["Name", "Type", "Description"])
|
||||
+row
|
||||
+cell #[code vocab]
|
||||
+cell #[code Vocab]
|
||||
+cell The parent vocabulary.
|
||||
|
||||
+row
|
||||
+cell #[code orth]
|
||||
+cell int
|
||||
+cell The orth id of the lexeme.
|
||||
|
||||
+footrow
|
||||
+cell returns
|
||||
+cell #[code Lexeme]
|
||||
+cell The newly constructed object.
|
||||
|
||||
+h(2, "set_flag") Lexeme.set_flag
|
||||
+tag method
|
||||
|
||||
p Change the value of a boolean flag.
|
||||
|
||||
+aside-code("Example").
|
||||
COOL_FLAG = nlp.vocab.add_flag(lambda text: False)
|
||||
nlp.vocab[u'spaCy'].set_flag(COOL_FLAG, True)
|
||||
|
||||
+table(["Name", "Type", "Description"])
|
||||
+row
|
||||
+cell #[code flag_id]
|
||||
+cell int
|
||||
+cell The attribute ID of the flag to set.
|
||||
|
||||
+row
|
||||
+cell #[code value]
|
||||
+cell bool
|
||||
+cell The new value of the flag.
|
||||
|
||||
+h(2, "check_flag") Lexeme.check_flag
|
||||
+tag method
|
||||
|
||||
p Check the value of a boolean flag.
|
||||
|
||||
+aside-code("Example").
|
||||
is_my_library = lambda text: text in ['spaCy', 'Thinc']
|
||||
MY_LIBRARY = nlp.vocab.add_flag(is_my_library)
|
||||
assert nlp.vocab[u'spaCy'].check_flag(MY_LIBRARY) == True
|
||||
|
||||
+table(["Name", "Type", "Description"])
|
||||
+row
|
||||
+cell #[code flag_id]
|
||||
+cell int
|
||||
+cell The attribute ID of the flag to query.
|
||||
|
||||
+footrow
|
||||
+cell returns
|
||||
+cell bool
|
||||
+cell The value of the flag.
|
||||
|
||||
+h(2, "similarity") Lexeme.similarity
|
||||
+tag method
|
||||
+tag-model("vectors")
|
||||
|
||||
p Compute a semantic similarity estimate. Defaults to cosine over vectors.
|
||||
|
||||
+aside-code("Example").
|
||||
apple = nlp.vocab[u'apple']
|
||||
orange = nlp.vocab[u'orange']
|
||||
apple_orange = apple.similarity(orange)
|
||||
orange_apple = orange.similarity(apple)
|
||||
assert apple_orange == orange_apple
|
||||
|
||||
+table(["Name", "Type", "Description"])
|
||||
+row
|
||||
+cell other
|
||||
+cell -
|
||||
+cell
|
||||
| The object to compare with. By default, accepts #[code Doc],
|
||||
| #[code Span], #[code Token] and #[code Lexeme] objects.
|
||||
|
||||
+footrow
|
||||
+cell returns
|
||||
+cell float
|
||||
+cell A scalar similarity score. Higher is more similar.
|
||||
|
||||
|
||||
+h(2, "has_vector") Lexeme.has_vector
|
||||
+tag property
|
||||
+tag-model("vectors")
|
||||
|
||||
p
|
||||
| A boolean value indicating whether a word vector is associated with the
|
||||
| lexeme.
|
||||
|
||||
+aside-code("Example").
|
||||
apple = nlp.vocab[u'apple']
|
||||
assert apple.has_vector
|
||||
|
||||
+table(["Name", "Type", "Description"])
|
||||
+footrow
|
||||
+cell returns
|
||||
+cell bool
|
||||
+cell Whether the lexeme has a vector data attached.
|
||||
|
||||
+h(2, "vector") Lexeme.vector
|
||||
+tag property
|
||||
+tag-model("vectors")
|
||||
|
||||
p A real-valued meaning representation.
|
||||
|
||||
+aside-code("Example").
|
||||
apple = nlp.vocab[u'apple']
|
||||
assert apple.vector.dtype == 'float32'
|
||||
assert apple.vector.shape == (300,)
|
||||
|
||||
+table(["Name", "Type", "Description"])
|
||||
+footrow
|
||||
+cell returns
|
||||
+cell #[code numpy.ndarray[ndim=1, dtype='float32']]
|
||||
+cell A 1D numpy array representing the lexeme's semantics.
|
||||
|
||||
+h(2, "vector_norm") Lexeme.vector_norm
|
||||
+tag property
|
||||
+tag-model("vectors")
|
||||
|
||||
p The L2 norm of the lexeme's vector representation.
|
||||
|
||||
+aside-code("Example").
|
||||
apple = nlp.vocab[u'apple']
|
||||
pasta = nlp.vocab[u'pasta']
|
||||
apple.vector_norm # 7.1346845626831055
|
||||
pasta.vector_norm # 7.759851932525635
|
||||
assert apple.vector_norm != pasta.vector_norm
|
||||
|
||||
+table(["Name", "Type", "Description"])
|
||||
+footrow
|
||||
+cell returns
|
||||
+cell float
|
||||
+cell The L2 norm of the vector representation.
|
||||
|
||||
+h(2, "attributes") Attributes
|
||||
|
||||
|
@ -12,6 +159,16 @@ p An entry in the vocabulary.
|
|||
+cell #[code Vocab]
|
||||
+cell
|
||||
|
||||
+row
|
||||
+cell #[code text]
|
||||
+cell unicode
|
||||
+cell Verbatim text content.
|
||||
|
||||
+row
|
||||
+cell #[code lex_id]
|
||||
+cell int
|
||||
+cell ID of the lexeme's lexical type.
|
||||
|
||||
+row
|
||||
+cell #[code lower]
|
||||
+cell int
|
||||
|
@ -124,116 +281,9 @@ p An entry in the vocabulary.
|
|||
+row
|
||||
+cell #[code prob]
|
||||
+cell float
|
||||
+cell Smoothed log probability estimate of token's type.
|
||||
+cell Smoothed log probability estimate of lexeme's type.
|
||||
|
||||
+row
|
||||
+cell #[code sentiment]
|
||||
+cell float
|
||||
+cell A scalar value indicating the positivity or negativity of the token.
|
||||
+row
|
||||
+cell #[code lex_id]
|
||||
+cell int
|
||||
+cell ID of the token's lexical type.
|
||||
|
||||
+row
|
||||
+cell #[code text]
|
||||
+cell unicode
|
||||
+cell Verbatim text content.
|
||||
|
||||
+h(2, "init") Lexeme.__init__
|
||||
+tag method
|
||||
|
||||
p Create a #[code Lexeme] object.
|
||||
|
||||
+table(["Name", "Type", "Description"])
|
||||
+row
|
||||
+cell #[code vocab]
|
||||
+cell #[code Vocab]
|
||||
+cell The parent vocabulary.
|
||||
|
||||
+row
|
||||
+cell #[code orth]
|
||||
+cell int
|
||||
+cell The orth id of the lexeme.
|
||||
|
||||
+footrow
|
||||
+cell returns
|
||||
+cell #[code Lexeme]
|
||||
+cell The newly constructed object.
|
||||
|
||||
+h(2, "set_flag") Lexeme.set_flag
|
||||
+tag method
|
||||
|
||||
p Change the value of a boolean flag.
|
||||
|
||||
+table(["Name", "Type", "Description"])
|
||||
+row
|
||||
+cell #[code flag_id]
|
||||
+cell int
|
||||
+cell The attribute ID of the flag to set.
|
||||
|
||||
+row
|
||||
+cell #[code value]
|
||||
+cell bool
|
||||
+cell The new value of the flag.
|
||||
|
||||
+footrow
|
||||
+cell returns
|
||||
+cell #[code None]
|
||||
+cell -
|
||||
|
||||
+h(2, "check_flag") Lexeme.check_flag
|
||||
+tag method
|
||||
|
||||
p Check the value of a boolean flag.
|
||||
|
||||
+table(["Name", "Type", "Description"])
|
||||
+row
|
||||
+cell #[code flag_id]
|
||||
+cell int
|
||||
+cell The attribute ID of the flag to query.
|
||||
|
||||
+footrow
|
||||
+cell returns
|
||||
+cell bool
|
||||
+cell The value of the flag.
|
||||
|
||||
+h(2, "similarity") Lexeme.similarity
|
||||
+tag method
|
||||
|
||||
p Compute a semantic similarity estimate. Defaults to cosine over vectors.
|
||||
|
||||
+table(["Name", "Type", "Description"])
|
||||
+row
|
||||
+cell #[code other]
|
||||
+cell -
|
||||
+cell
|
||||
| The object to compare with. By default, accepts #[code Doc],
|
||||
| #[code Span], #[code Token] and #[code Lexeme] objects.
|
||||
|
||||
+footrow
|
||||
+cell returns
|
||||
+cell float
|
||||
+cell A scalar similarity score. Higher is more similar.
|
||||
|
||||
+h(2, "vector") Lexeme.vector
|
||||
+tag property
|
||||
|
||||
p A real-valued meaning representation.
|
||||
|
||||
+table(["Name", "Type", "Description"])
|
||||
+footrow
|
||||
+cell returns
|
||||
+cell #[code numpy.ndarray[ndim=1, dtype='float32']]
|
||||
+cell A real-valued meaning representation.
|
||||
|
||||
+h(2, "has_vector") Lexeme.has_vector
|
||||
+tag property
|
||||
|
||||
p A boolean value indicating whether a word vector is associated with the object.
|
||||
|
||||
+table(["Name", "Type", "Description"])
|
||||
+footrow
|
||||
+cell returns
|
||||
+cell bool
|
||||
+cell Whether a word vector is associated with the object.
|
||||
+cell A scalar value indicating the positivity or negativity of the lexeme.
|
||||
|
|
Loading…
Reference in New Issue
Block a user