mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-26 01:04:34 +03:00
Update docstrings and API docs for Lexeme
This commit is contained in:
parent
7ed8a92ed1
commit
27de0834b2
|
@ -30,19 +30,16 @@ memset(&EMPTY_LEXEME, 0, sizeof(LexemeC))
|
||||||
|
|
||||||
|
|
||||||
cdef class Lexeme:
|
cdef class Lexeme:
|
||||||
"""
|
"""An entry in the vocabulary. A `Lexeme` has no string context – it's a
|
||||||
An entry in the vocabulary. A Lexeme has no string context --- it's a
|
|
||||||
word-type, as opposed to a word token. It therefore has no part-of-speech
|
word-type, as opposed to a word token. It therefore has no part-of-speech
|
||||||
tag, dependency parse, or lemma (lemmatization depends on the part-of-speech
|
tag, dependency parse, or lemma (lemmatization depends on the part-of-speech
|
||||||
tag).
|
tag).
|
||||||
"""
|
"""
|
||||||
def __init__(self, Vocab vocab, int orth):
|
def __init__(self, Vocab vocab, int orth):
|
||||||
"""
|
"""Create a Lexeme object.
|
||||||
Create a Lexeme object.
|
|
||||||
|
|
||||||
Arguments:
|
vocab (Vocab): The parent vocabulary
|
||||||
vocab (Vocab): The parent vocabulary
|
orth (int): The orth id of the lexeme.
|
||||||
orth (int): The orth id of the lexeme.
|
|
||||||
Returns (Lexeme): The newly constructd object.
|
Returns (Lexeme): The newly constructd object.
|
||||||
"""
|
"""
|
||||||
self.vocab = vocab
|
self.vocab = vocab
|
||||||
|
@ -82,35 +79,28 @@ cdef class Lexeme:
|
||||||
return self.c.orth
|
return self.c.orth
|
||||||
|
|
||||||
def set_flag(self, attr_id_t flag_id, bint value):
|
def set_flag(self, attr_id_t flag_id, bint value):
|
||||||
"""
|
"""Change the value of a boolean flag.
|
||||||
Change the value of a boolean flag.
|
|
||||||
|
|
||||||
Arguments:
|
flag_id (int): The attribute ID of the flag to set.
|
||||||
flag_id (int): The attribute ID of the flag to set.
|
value (bool): The new value of the flag.
|
||||||
value (bool): The new value of the flag.
|
|
||||||
"""
|
"""
|
||||||
Lexeme.c_set_flag(self.c, flag_id, value)
|
Lexeme.c_set_flag(self.c, flag_id, value)
|
||||||
|
|
||||||
def check_flag(self, attr_id_t flag_id):
|
def check_flag(self, attr_id_t flag_id):
|
||||||
"""
|
"""Check the value of a boolean flag.
|
||||||
Check the value of a boolean flag.
|
|
||||||
|
|
||||||
Arguments:
|
flag_id (int): The attribute ID of the flag to query.
|
||||||
flag_id (int): The attribute ID of the flag to query.
|
RETURNS (bool): The value of the flag.
|
||||||
Returns (bool): The value of the flag.
|
|
||||||
"""
|
"""
|
||||||
return True if Lexeme.c_check_flag(self.c, flag_id) else False
|
return True if Lexeme.c_check_flag(self.c, flag_id) else False
|
||||||
|
|
||||||
def similarity(self, other):
|
def similarity(self, other):
|
||||||
"""
|
"""Compute a semantic similarity estimate. Defaults to cosine over
|
||||||
Compute a semantic similarity estimate. Defaults to cosine over vectors.
|
vectors.
|
||||||
|
|
||||||
Arguments:
|
other (object): The object to compare with. By default, accepts `Doc`,
|
||||||
other:
|
`Span`, `Token` and `Lexeme` objects.
|
||||||
The object to compare with. By default, accepts Doc, Span,
|
RETURNS (float): A scalar similarity score. Higher is more similar.
|
||||||
Token and Lexeme objects.
|
|
||||||
Returns:
|
|
||||||
score (float): A scalar similarity score. Higher is more similar.
|
|
||||||
"""
|
"""
|
||||||
if self.vector_norm == 0 or other.vector_norm == 0:
|
if self.vector_norm == 0 or other.vector_norm == 0:
|
||||||
return 0.0
|
return 0.0
|
||||||
|
@ -140,6 +130,11 @@ cdef class Lexeme:
|
||||||
self.orth = self.c.orth
|
self.orth = self.c.orth
|
||||||
|
|
||||||
property has_vector:
|
property has_vector:
|
||||||
|
"""A boolean value indicating whether a word vector is associated with
|
||||||
|
the object.
|
||||||
|
|
||||||
|
RETURNS (bool): Whether a word vector is associated with the object.
|
||||||
|
"""
|
||||||
def __get__(self):
|
def __get__(self):
|
||||||
cdef int i
|
cdef int i
|
||||||
for i in range(self.vocab.vectors_length):
|
for i in range(self.vocab.vectors_length):
|
||||||
|
@ -149,6 +144,10 @@ cdef class Lexeme:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
property vector_norm:
|
property vector_norm:
|
||||||
|
"""The L2 norm of the lexeme's vector representation.
|
||||||
|
|
||||||
|
RETURNS (float): The L2 norm of the vector representation.
|
||||||
|
"""
|
||||||
def __get__(self):
|
def __get__(self):
|
||||||
return self.c.l2_norm
|
return self.c.l2_norm
|
||||||
|
|
||||||
|
@ -156,6 +155,11 @@ cdef class Lexeme:
|
||||||
self.c.l2_norm = value
|
self.c.l2_norm = value
|
||||||
|
|
||||||
property vector:
|
property vector:
|
||||||
|
"""A real-valued meaning representation.
|
||||||
|
|
||||||
|
RETURNS (numpy.ndarray[ndim=1, dtype='float32']): A 1D numpy array
|
||||||
|
representing the lexeme's semantics.
|
||||||
|
"""
|
||||||
def __get__(self):
|
def __get__(self):
|
||||||
cdef int length = self.vocab.vectors_length
|
cdef int length = self.vocab.vectors_length
|
||||||
if length == 0:
|
if length == 0:
|
||||||
|
@ -196,6 +200,14 @@ cdef class Lexeme:
|
||||||
def __get__(self):
|
def __get__(self):
|
||||||
return self.vocab.strings[self.c.orth]
|
return self.vocab.strings[self.c.orth]
|
||||||
|
|
||||||
|
property text:
|
||||||
|
"""A unicode representation of the token text.
|
||||||
|
|
||||||
|
RETURNS (unicode): The original verbatim text of the token.
|
||||||
|
"""
|
||||||
|
def __get__(self):
|
||||||
|
return self.orth_
|
||||||
|
|
||||||
property lower:
|
property lower:
|
||||||
def __get__(self): return self.c.lower
|
def __get__(self): return self.c.lower
|
||||||
def __set__(self, int x): self.c.lower = x
|
def __set__(self, int x): self.c.lower = x
|
||||||
|
|
|
@ -2,7 +2,154 @@
|
||||||
|
|
||||||
include ../../_includes/_mixins
|
include ../../_includes/_mixins
|
||||||
|
|
||||||
p An entry in the vocabulary.
|
p
|
||||||
|
| An entry in the vocabulary. A #[code Lexeme] has no string context – it's
|
||||||
|
| a word-type, as opposed to a word token. It therefore has no
|
||||||
|
| part-of-speech tag, dependency parse, or lemma (if lemmatization depends
|
||||||
|
| on the part-of-speech tag).
|
||||||
|
|
||||||
|
+h(2, "init") Lexeme.__init__
|
||||||
|
+tag method
|
||||||
|
|
||||||
|
p Create a #[code Lexeme] object.
|
||||||
|
|
||||||
|
+table(["Name", "Type", "Description"])
|
||||||
|
+row
|
||||||
|
+cell #[code vocab]
|
||||||
|
+cell #[code Vocab]
|
||||||
|
+cell The parent vocabulary.
|
||||||
|
|
||||||
|
+row
|
||||||
|
+cell #[code orth]
|
||||||
|
+cell int
|
||||||
|
+cell The orth id of the lexeme.
|
||||||
|
|
||||||
|
+footrow
|
||||||
|
+cell returns
|
||||||
|
+cell #[code Lexeme]
|
||||||
|
+cell The newly constructed object.
|
||||||
|
|
||||||
|
+h(2, "set_flag") Lexeme.set_flag
|
||||||
|
+tag method
|
||||||
|
|
||||||
|
p Change the value of a boolean flag.
|
||||||
|
|
||||||
|
+aside-code("Example").
|
||||||
|
COOL_FLAG = nlp.vocab.add_flag(lambda text: False)
|
||||||
|
nlp.vocab[u'spaCy'].set_flag(COOL_FLAG, True)
|
||||||
|
|
||||||
|
+table(["Name", "Type", "Description"])
|
||||||
|
+row
|
||||||
|
+cell #[code flag_id]
|
||||||
|
+cell int
|
||||||
|
+cell The attribute ID of the flag to set.
|
||||||
|
|
||||||
|
+row
|
||||||
|
+cell #[code value]
|
||||||
|
+cell bool
|
||||||
|
+cell The new value of the flag.
|
||||||
|
|
||||||
|
+h(2, "check_flag") Lexeme.check_flag
|
||||||
|
+tag method
|
||||||
|
|
||||||
|
p Check the value of a boolean flag.
|
||||||
|
|
||||||
|
+aside-code("Example").
|
||||||
|
is_my_library = lambda text: text in ['spaCy', 'Thinc']
|
||||||
|
MY_LIBRARY = nlp.vocab.add_flag(is_my_library)
|
||||||
|
assert nlp.vocab[u'spaCy'].check_flag(MY_LIBRARY) == True
|
||||||
|
|
||||||
|
+table(["Name", "Type", "Description"])
|
||||||
|
+row
|
||||||
|
+cell #[code flag_id]
|
||||||
|
+cell int
|
||||||
|
+cell The attribute ID of the flag to query.
|
||||||
|
|
||||||
|
+footrow
|
||||||
|
+cell returns
|
||||||
|
+cell bool
|
||||||
|
+cell The value of the flag.
|
||||||
|
|
||||||
|
+h(2, "similarity") Lexeme.similarity
|
||||||
|
+tag method
|
||||||
|
+tag-model("vectors")
|
||||||
|
|
||||||
|
p Compute a semantic similarity estimate. Defaults to cosine over vectors.
|
||||||
|
|
||||||
|
+aside-code("Example").
|
||||||
|
apple = nlp.vocab[u'apple']
|
||||||
|
orange = nlp.vocab[u'orange']
|
||||||
|
apple_orange = apple.similarity(orange)
|
||||||
|
orange_apple = orange.similarity(apple)
|
||||||
|
assert apple_orange == orange_apple
|
||||||
|
|
||||||
|
+table(["Name", "Type", "Description"])
|
||||||
|
+row
|
||||||
|
+cell other
|
||||||
|
+cell -
|
||||||
|
+cell
|
||||||
|
| The object to compare with. By default, accepts #[code Doc],
|
||||||
|
| #[code Span], #[code Token] and #[code Lexeme] objects.
|
||||||
|
|
||||||
|
+footrow
|
||||||
|
+cell returns
|
||||||
|
+cell float
|
||||||
|
+cell A scalar similarity score. Higher is more similar.
|
||||||
|
|
||||||
|
|
||||||
|
+h(2, "has_vector") Lexeme.has_vector
|
||||||
|
+tag property
|
||||||
|
+tag-model("vectors")
|
||||||
|
|
||||||
|
p
|
||||||
|
| A boolean value indicating whether a word vector is associated with the
|
||||||
|
| lexeme.
|
||||||
|
|
||||||
|
+aside-code("Example").
|
||||||
|
apple = nlp.vocab[u'apple']
|
||||||
|
assert apple.has_vector
|
||||||
|
|
||||||
|
+table(["Name", "Type", "Description"])
|
||||||
|
+footrow
|
||||||
|
+cell returns
|
||||||
|
+cell bool
|
||||||
|
+cell Whether the lexeme has a vector data attached.
|
||||||
|
|
||||||
|
+h(2, "vector") Lexeme.vector
|
||||||
|
+tag property
|
||||||
|
+tag-model("vectors")
|
||||||
|
|
||||||
|
p A real-valued meaning representation.
|
||||||
|
|
||||||
|
+aside-code("Example").
|
||||||
|
apple = nlp.vocab[u'apple']
|
||||||
|
assert apple.vector.dtype == 'float32'
|
||||||
|
assert apple.vector.shape == (300,)
|
||||||
|
|
||||||
|
+table(["Name", "Type", "Description"])
|
||||||
|
+footrow
|
||||||
|
+cell returns
|
||||||
|
+cell #[code numpy.ndarray[ndim=1, dtype='float32']]
|
||||||
|
+cell A 1D numpy array representing the lexeme's semantics.
|
||||||
|
|
||||||
|
+h(2, "vector_norm") Lexeme.vector_norm
|
||||||
|
+tag property
|
||||||
|
+tag-model("vectors")
|
||||||
|
|
||||||
|
p The L2 norm of the lexeme's vector representation.
|
||||||
|
|
||||||
|
+aside-code("Example").
|
||||||
|
apple = nlp.vocab[u'apple']
|
||||||
|
pasta = nlp.vocab[u'pasta']
|
||||||
|
apple.vector_norm # 7.1346845626831055
|
||||||
|
pasta.vector_norm # 7.759851932525635
|
||||||
|
assert apple.vector_norm != pasta.vector_norm
|
||||||
|
|
||||||
|
+table(["Name", "Type", "Description"])
|
||||||
|
+footrow
|
||||||
|
+cell returns
|
||||||
|
+cell float
|
||||||
|
+cell The L2 norm of the vector representation.
|
||||||
|
|
||||||
+h(2, "attributes") Attributes
|
+h(2, "attributes") Attributes
|
||||||
|
|
||||||
|
@ -12,6 +159,16 @@ p An entry in the vocabulary.
|
||||||
+cell #[code Vocab]
|
+cell #[code Vocab]
|
||||||
+cell
|
+cell
|
||||||
|
|
||||||
|
+row
|
||||||
|
+cell #[code text]
|
||||||
|
+cell unicode
|
||||||
|
+cell Verbatim text content.
|
||||||
|
|
||||||
|
+row
|
||||||
|
+cell #[code lex_id]
|
||||||
|
+cell int
|
||||||
|
+cell ID of the lexeme's lexical type.
|
||||||
|
|
||||||
+row
|
+row
|
||||||
+cell #[code lower]
|
+cell #[code lower]
|
||||||
+cell int
|
+cell int
|
||||||
|
@ -124,116 +281,9 @@ p An entry in the vocabulary.
|
||||||
+row
|
+row
|
||||||
+cell #[code prob]
|
+cell #[code prob]
|
||||||
+cell float
|
+cell float
|
||||||
+cell Smoothed log probability estimate of token's type.
|
+cell Smoothed log probability estimate of lexeme's type.
|
||||||
|
|
||||||
+row
|
+row
|
||||||
+cell #[code sentiment]
|
+cell #[code sentiment]
|
||||||
+cell float
|
+cell float
|
||||||
+cell A scalar value indicating the positivity or negativity of the token.
|
+cell A scalar value indicating the positivity or negativity of the lexeme.
|
||||||
+row
|
|
||||||
+cell #[code lex_id]
|
|
||||||
+cell int
|
|
||||||
+cell ID of the token's lexical type.
|
|
||||||
|
|
||||||
+row
|
|
||||||
+cell #[code text]
|
|
||||||
+cell unicode
|
|
||||||
+cell Verbatim text content.
|
|
||||||
|
|
||||||
+h(2, "init") Lexeme.__init__
|
|
||||||
+tag method
|
|
||||||
|
|
||||||
p Create a #[code Lexeme] object.
|
|
||||||
|
|
||||||
+table(["Name", "Type", "Description"])
|
|
||||||
+row
|
|
||||||
+cell #[code vocab]
|
|
||||||
+cell #[code Vocab]
|
|
||||||
+cell The parent vocabulary.
|
|
||||||
|
|
||||||
+row
|
|
||||||
+cell #[code orth]
|
|
||||||
+cell int
|
|
||||||
+cell The orth id of the lexeme.
|
|
||||||
|
|
||||||
+footrow
|
|
||||||
+cell returns
|
|
||||||
+cell #[code Lexeme]
|
|
||||||
+cell The newly constructed object.
|
|
||||||
|
|
||||||
+h(2, "set_flag") Lexeme.set_flag
|
|
||||||
+tag method
|
|
||||||
|
|
||||||
p Change the value of a boolean flag.
|
|
||||||
|
|
||||||
+table(["Name", "Type", "Description"])
|
|
||||||
+row
|
|
||||||
+cell #[code flag_id]
|
|
||||||
+cell int
|
|
||||||
+cell The attribute ID of the flag to set.
|
|
||||||
|
|
||||||
+row
|
|
||||||
+cell #[code value]
|
|
||||||
+cell bool
|
|
||||||
+cell The new value of the flag.
|
|
||||||
|
|
||||||
+footrow
|
|
||||||
+cell returns
|
|
||||||
+cell #[code None]
|
|
||||||
+cell -
|
|
||||||
|
|
||||||
+h(2, "check_flag") Lexeme.check_flag
|
|
||||||
+tag method
|
|
||||||
|
|
||||||
p Check the value of a boolean flag.
|
|
||||||
|
|
||||||
+table(["Name", "Type", "Description"])
|
|
||||||
+row
|
|
||||||
+cell #[code flag_id]
|
|
||||||
+cell int
|
|
||||||
+cell The attribute ID of the flag to query.
|
|
||||||
|
|
||||||
+footrow
|
|
||||||
+cell returns
|
|
||||||
+cell bool
|
|
||||||
+cell The value of the flag.
|
|
||||||
|
|
||||||
+h(2, "similarity") Lexeme.similarity
|
|
||||||
+tag method
|
|
||||||
|
|
||||||
p Compute a semantic similarity estimate. Defaults to cosine over vectors.
|
|
||||||
|
|
||||||
+table(["Name", "Type", "Description"])
|
|
||||||
+row
|
|
||||||
+cell #[code other]
|
|
||||||
+cell -
|
|
||||||
+cell
|
|
||||||
| The object to compare with. By default, accepts #[code Doc],
|
|
||||||
| #[code Span], #[code Token] and #[code Lexeme] objects.
|
|
||||||
|
|
||||||
+footrow
|
|
||||||
+cell returns
|
|
||||||
+cell float
|
|
||||||
+cell A scalar similarity score. Higher is more similar.
|
|
||||||
|
|
||||||
+h(2, "vector") Lexeme.vector
|
|
||||||
+tag property
|
|
||||||
|
|
||||||
p A real-valued meaning representation.
|
|
||||||
|
|
||||||
+table(["Name", "Type", "Description"])
|
|
||||||
+footrow
|
|
||||||
+cell returns
|
|
||||||
+cell #[code numpy.ndarray[ndim=1, dtype='float32']]
|
|
||||||
+cell A real-valued meaning representation.
|
|
||||||
|
|
||||||
+h(2, "has_vector") Lexeme.has_vector
|
|
||||||
+tag property
|
|
||||||
|
|
||||||
p A boolean value indicating whether a word vector is associated with the object.
|
|
||||||
|
|
||||||
+table(["Name", "Type", "Description"])
|
|
||||||
+footrow
|
|
||||||
+cell returns
|
|
||||||
+cell bool
|
|
||||||
+cell Whether a word vector is associated with the object.
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user