Update docstrings and API docs for Lexeme

This commit is contained in:
ines 2017-05-20 15:13:42 +02:00
parent 7ed8a92ed1
commit 27de0834b2
2 changed files with 197 additions and 135 deletions

View File

@ -30,19 +30,16 @@ memset(&EMPTY_LEXEME, 0, sizeof(LexemeC))
cdef class Lexeme: cdef class Lexeme:
""" """An entry in the vocabulary. A `Lexeme` has no string context it's a
An entry in the vocabulary. A Lexeme has no string context --- it's a
word-type, as opposed to a word token. It therefore has no part-of-speech word-type, as opposed to a word token. It therefore has no part-of-speech
tag, dependency parse, or lemma (lemmatization depends on the part-of-speech tag, dependency parse, or lemma (lemmatization depends on the part-of-speech
tag). tag).
""" """
def __init__(self, Vocab vocab, int orth): def __init__(self, Vocab vocab, int orth):
""" """Create a Lexeme object.
Create a Lexeme object.
Arguments: vocab (Vocab): The parent vocabulary
vocab (Vocab): The parent vocabulary orth (int): The orth id of the lexeme.
orth (int): The orth id of the lexeme.
Returns (Lexeme): The newly constructd object. Returns (Lexeme): The newly constructd object.
""" """
self.vocab = vocab self.vocab = vocab
@ -82,35 +79,28 @@ cdef class Lexeme:
return self.c.orth return self.c.orth
def set_flag(self, attr_id_t flag_id, bint value): def set_flag(self, attr_id_t flag_id, bint value):
""" """Change the value of a boolean flag.
Change the value of a boolean flag.
Arguments: flag_id (int): The attribute ID of the flag to set.
flag_id (int): The attribute ID of the flag to set. value (bool): The new value of the flag.
value (bool): The new value of the flag.
""" """
Lexeme.c_set_flag(self.c, flag_id, value) Lexeme.c_set_flag(self.c, flag_id, value)
def check_flag(self, attr_id_t flag_id): def check_flag(self, attr_id_t flag_id):
""" """Check the value of a boolean flag.
Check the value of a boolean flag.
Arguments: flag_id (int): The attribute ID of the flag to query.
flag_id (int): The attribute ID of the flag to query. RETURNS (bool): The value of the flag.
Returns (bool): The value of the flag.
""" """
return True if Lexeme.c_check_flag(self.c, flag_id) else False return True if Lexeme.c_check_flag(self.c, flag_id) else False
def similarity(self, other): def similarity(self, other):
""" """Compute a semantic similarity estimate. Defaults to cosine over
Compute a semantic similarity estimate. Defaults to cosine over vectors. vectors.
Arguments: other (object): The object to compare with. By default, accepts `Doc`,
other: `Span`, `Token` and `Lexeme` objects.
The object to compare with. By default, accepts Doc, Span, RETURNS (float): A scalar similarity score. Higher is more similar.
Token and Lexeme objects.
Returns:
score (float): A scalar similarity score. Higher is more similar.
""" """
if self.vector_norm == 0 or other.vector_norm == 0: if self.vector_norm == 0 or other.vector_norm == 0:
return 0.0 return 0.0
@ -140,6 +130,11 @@ cdef class Lexeme:
self.orth = self.c.orth self.orth = self.c.orth
property has_vector: property has_vector:
"""A boolean value indicating whether a word vector is associated with
the object.
RETURNS (bool): Whether a word vector is associated with the object.
"""
def __get__(self): def __get__(self):
cdef int i cdef int i
for i in range(self.vocab.vectors_length): for i in range(self.vocab.vectors_length):
@ -149,6 +144,10 @@ cdef class Lexeme:
return False return False
property vector_norm: property vector_norm:
"""The L2 norm of the lexeme's vector representation.
RETURNS (float): The L2 norm of the vector representation.
"""
def __get__(self): def __get__(self):
return self.c.l2_norm return self.c.l2_norm
@ -156,6 +155,11 @@ cdef class Lexeme:
self.c.l2_norm = value self.c.l2_norm = value
property vector: property vector:
"""A real-valued meaning representation.
RETURNS (numpy.ndarray[ndim=1, dtype='float32']): A 1D numpy array
representing the lexeme's semantics.
"""
def __get__(self): def __get__(self):
cdef int length = self.vocab.vectors_length cdef int length = self.vocab.vectors_length
if length == 0: if length == 0:
@ -196,6 +200,14 @@ cdef class Lexeme:
def __get__(self): def __get__(self):
return self.vocab.strings[self.c.orth] return self.vocab.strings[self.c.orth]
property text:
"""A unicode representation of the token text.
RETURNS (unicode): The original verbatim text of the token.
"""
def __get__(self):
return self.orth_
property lower: property lower:
def __get__(self): return self.c.lower def __get__(self): return self.c.lower
def __set__(self, int x): self.c.lower = x def __set__(self, int x): self.c.lower = x

View File

@ -2,7 +2,154 @@
include ../../_includes/_mixins include ../../_includes/_mixins
p An entry in the vocabulary. p
| An entry in the vocabulary. A #[code Lexeme] has no string context it's
| a word-type, as opposed to a word token. It therefore has no
| part-of-speech tag, dependency parse, or lemma (if lemmatization depends
| on the part-of-speech tag).
+h(2, "init") Lexeme.__init__
+tag method
p Create a #[code Lexeme] object.
+table(["Name", "Type", "Description"])
+row
+cell #[code vocab]
+cell #[code Vocab]
+cell The parent vocabulary.
+row
+cell #[code orth]
+cell int
+cell The orth id of the lexeme.
+footrow
+cell returns
+cell #[code Lexeme]
+cell The newly constructed object.
+h(2, "set_flag") Lexeme.set_flag
+tag method
p Change the value of a boolean flag.
+aside-code("Example").
COOL_FLAG = nlp.vocab.add_flag(lambda text: False)
nlp.vocab[u'spaCy'].set_flag(COOL_FLAG, True)
+table(["Name", "Type", "Description"])
+row
+cell #[code flag_id]
+cell int
+cell The attribute ID of the flag to set.
+row
+cell #[code value]
+cell bool
+cell The new value of the flag.
+h(2, "check_flag") Lexeme.check_flag
+tag method
p Check the value of a boolean flag.
+aside-code("Example").
is_my_library = lambda text: text in ['spaCy', 'Thinc']
MY_LIBRARY = nlp.vocab.add_flag(is_my_library)
assert nlp.vocab[u'spaCy'].check_flag(MY_LIBRARY) == True
+table(["Name", "Type", "Description"])
+row
+cell #[code flag_id]
+cell int
+cell The attribute ID of the flag to query.
+footrow
+cell returns
+cell bool
+cell The value of the flag.
+h(2, "similarity") Lexeme.similarity
+tag method
+tag-model("vectors")
p Compute a semantic similarity estimate. Defaults to cosine over vectors.
+aside-code("Example").
apple = nlp.vocab[u'apple']
orange = nlp.vocab[u'orange']
apple_orange = apple.similarity(orange)
orange_apple = orange.similarity(apple)
assert apple_orange == orange_apple
+table(["Name", "Type", "Description"])
+row
+cell other
+cell -
+cell
| The object to compare with. By default, accepts #[code Doc],
| #[code Span], #[code Token] and #[code Lexeme] objects.
+footrow
+cell returns
+cell float
+cell A scalar similarity score. Higher is more similar.
+h(2, "has_vector") Lexeme.has_vector
+tag property
+tag-model("vectors")
p
| A boolean value indicating whether a word vector is associated with the
| lexeme.
+aside-code("Example").
apple = nlp.vocab[u'apple']
assert apple.has_vector
+table(["Name", "Type", "Description"])
+footrow
+cell returns
+cell bool
+cell Whether the lexeme has a vector data attached.
+h(2, "vector") Lexeme.vector
+tag property
+tag-model("vectors")
p A real-valued meaning representation.
+aside-code("Example").
apple = nlp.vocab[u'apple']
assert apple.vector.dtype == 'float32'
assert apple.vector.shape == (300,)
+table(["Name", "Type", "Description"])
+footrow
+cell returns
+cell #[code numpy.ndarray[ndim=1, dtype='float32']]
+cell A 1D numpy array representing the lexeme's semantics.
+h(2, "vector_norm") Lexeme.vector_norm
+tag property
+tag-model("vectors")
p The L2 norm of the lexeme's vector representation.
+aside-code("Example").
apple = nlp.vocab[u'apple']
pasta = nlp.vocab[u'pasta']
apple.vector_norm # 7.1346845626831055
pasta.vector_norm # 7.759851932525635
assert apple.vector_norm != pasta.vector_norm
+table(["Name", "Type", "Description"])
+footrow
+cell returns
+cell float
+cell The L2 norm of the vector representation.
+h(2, "attributes") Attributes +h(2, "attributes") Attributes
@ -12,6 +159,16 @@ p An entry in the vocabulary.
+cell #[code Vocab] +cell #[code Vocab]
+cell +cell
+row
+cell #[code text]
+cell unicode
+cell Verbatim text content.
+row
+cell #[code lex_id]
+cell int
+cell ID of the lexeme's lexical type.
+row +row
+cell #[code lower] +cell #[code lower]
+cell int +cell int
@ -124,116 +281,9 @@ p An entry in the vocabulary.
+row +row
+cell #[code prob] +cell #[code prob]
+cell float +cell float
+cell Smoothed log probability estimate of token's type. +cell Smoothed log probability estimate of lexeme's type.
+row +row
+cell #[code sentiment] +cell #[code sentiment]
+cell float +cell float
+cell A scalar value indicating the positivity or negativity of the token. +cell A scalar value indicating the positivity or negativity of the lexeme.
+row
+cell #[code lex_id]
+cell int
+cell ID of the token's lexical type.
+row
+cell #[code text]
+cell unicode
+cell Verbatim text content.
+h(2, "init") Lexeme.__init__
+tag method
p Create a #[code Lexeme] object.
+table(["Name", "Type", "Description"])
+row
+cell #[code vocab]
+cell #[code Vocab]
+cell The parent vocabulary.
+row
+cell #[code orth]
+cell int
+cell The orth id of the lexeme.
+footrow
+cell returns
+cell #[code Lexeme]
+cell The newly constructed object.
+h(2, "set_flag") Lexeme.set_flag
+tag method
p Change the value of a boolean flag.
+table(["Name", "Type", "Description"])
+row
+cell #[code flag_id]
+cell int
+cell The attribute ID of the flag to set.
+row
+cell #[code value]
+cell bool
+cell The new value of the flag.
+footrow
+cell returns
+cell #[code None]
+cell -
+h(2, "check_flag") Lexeme.check_flag
+tag method
p Check the value of a boolean flag.
+table(["Name", "Type", "Description"])
+row
+cell #[code flag_id]
+cell int
+cell The attribute ID of the flag to query.
+footrow
+cell returns
+cell bool
+cell The value of the flag.
+h(2, "similarity") Lexeme.similarity
+tag method
p Compute a semantic similarity estimate. Defaults to cosine over vectors.
+table(["Name", "Type", "Description"])
+row
+cell #[code other]
+cell -
+cell
| The object to compare with. By default, accepts #[code Doc],
| #[code Span], #[code Token] and #[code Lexeme] objects.
+footrow
+cell returns
+cell float
+cell A scalar similarity score. Higher is more similar.
+h(2, "vector") Lexeme.vector
+tag property
p A real-valued meaning representation.
+table(["Name", "Type", "Description"])
+footrow
+cell returns
+cell #[code numpy.ndarray[ndim=1, dtype='float32']]
+cell A real-valued meaning representation.
+h(2, "has_vector") Lexeme.has_vector
+tag property
p A boolean value indicating whether a word vector is associated with the object.
+table(["Name", "Type", "Description"])
+footrow
+cell returns
+cell bool
+cell Whether a word vector is associated with the object.