Update docstrings and API docs for Lexeme

2025-10-25 21:21:10 +03:00 · 2017-05-20 15:13:42 +02:00 · 2017-05-20 15:13:42 +02:00 · 27de0834b2
commit 27de0834b2
parent 7ed8a92ed1
2 changed files with 197 additions and 135 deletions
--- a/spacy/lexeme.pyx
+++ b/spacy/lexeme.pyx
@ -30,19 +30,16 @@ memset(&EMPTY_LEXEME, 0, sizeof(LexemeC))
 cdef class Lexeme:
-    """
+    """An entry in the vocabulary. A `Lexeme` has no string context – it's a
    An entry in the vocabulary.  A Lexeme has no string context --- it's a
    word-type, as opposed to a word token.  It therefore has no part-of-speech
    tag, dependency parse, or lemma (lemmatization depends on the part-of-speech
    tag).
    """
    def __init__(self, Vocab vocab, int orth):
-        """
+        """Create a Lexeme object.
        Create a Lexeme object.
-        Arguments:
+        vocab (Vocab): The parent vocabulary
-            vocab (Vocab): The parent vocabulary
+        orth (int): The orth id of the lexeme.
            orth (int): The orth id of the lexeme.
        Returns (Lexeme): The newly constructd object.
        """
        self.vocab = vocab
@ -82,35 +79,28 @@ cdef class Lexeme:
        return self.c.orth
    def set_flag(self, attr_id_t flag_id, bint value):
-        """
+        """Change the value of a boolean flag.
        Change the value of a boolean flag.
-        Arguments:
+        flag_id (int): The attribute ID of the flag to set.
-            flag_id (int): The attribute ID of the flag to set.
+        value (bool): The new value of the flag.
            value (bool): The new value of the flag.
        """
        Lexeme.c_set_flag(self.c, flag_id, value)
    def check_flag(self, attr_id_t flag_id):
-        """
+        """Check the value of a boolean flag.
        Check the value of a boolean flag.
-        Arguments:
+        flag_id (int): The attribute ID of the flag to query.
-            flag_id (int): The attribute ID of the flag to query.
+        RETURNS (bool): The value of the flag.
        Returns (bool): The value of the flag.
        """
        return True if Lexeme.c_check_flag(self.c, flag_id) else False
    def similarity(self, other):
-        """
+        """Compute a semantic similarity estimate. Defaults to cosine over
-        Compute a semantic similarity estimate. Defaults to cosine over vectors.
+        vectors.
-        Arguments:
+        other (object): The object to compare with. By default, accepts `Doc`,
-            other:
+            `Span`, `Token` and `Lexeme` objects.
-                The object to compare with. By default, accepts Doc, Span,
+        RETURNS (float): A scalar similarity score. Higher is more similar.
                Token and Lexeme objects.
        Returns:
            score (float): A scalar similarity score. Higher is more similar.
        """
        if self.vector_norm == 0 or other.vector_norm == 0:
            return 0.0
@ -140,6 +130,11 @@ cdef class Lexeme:
        self.orth = self.c.orth
    property has_vector:
        """A boolean value indicating whether a word vector is associated with
        the object.
        RETURNS (bool): Whether a word vector is associated with the object.
        """
        def __get__(self):
            cdef int i
            for i in range(self.vocab.vectors_length):
@ -149,6 +144,10 @@ cdef class Lexeme:
                return False
    property vector_norm:
        """The L2 norm of the lexeme's vector representation.
        RETURNS (float): The L2 norm of the vector representation.
        """
        def __get__(self):
            return self.c.l2_norm
@ -156,6 +155,11 @@ cdef class Lexeme:
            self.c.l2_norm = value
    property vector:
        """A real-valued meaning representation.
        RETURNS (numpy.ndarray[ndim=1, dtype='float32']): A 1D numpy array
            representing the lexeme's semantics.
        """
        def __get__(self):
            cdef int length = self.vocab.vectors_length
            if length == 0:
@ -196,6 +200,14 @@ cdef class Lexeme:
        def __get__(self):
            return self.vocab.strings[self.c.orth]
    property text:
        """A unicode representation of the token text.
        RETURNS (unicode): The original verbatim text of the token.
        """
        def __get__(self):
            return self.orth_
    property lower:
        def __get__(self): return self.c.lower
        def __set__(self, int x): self.c.lower = x
--- a/website/docs/api/lexeme.jade
+++ b/website/docs/api/lexeme.jade
@ -2,7 +2,154 @@
 include ../../_includes/_mixins
-p An entry in the vocabulary.
+p
    |  An entry in the vocabulary. A #[code Lexeme] has no string context – it's
    |  a word-type, as opposed to a word token. It therefore has no
    |  part-of-speech tag, dependency parse, or lemma (if lemmatization depends
    |  on the part-of-speech tag).
 +h(2, "init") Lexeme.__init__
    +tag method
 p Create a #[code Lexeme] object.
 +table(["Name", "Type", "Description"])
    +row
        +cell #[code vocab]
        +cell #[code Vocab]
        +cell The parent vocabulary.
    +row
        +cell #[code orth]
        +cell int
        +cell The orth id of the lexeme.
    +footrow
        +cell returns
        +cell #[code Lexeme]
        +cell The newly constructed object.
 +h(2, "set_flag") Lexeme.set_flag
    +tag method
 p Change the value of a boolean flag.
 +aside-code("Example").
    COOL_FLAG = nlp.vocab.add_flag(lambda text: False)
    nlp.vocab[u'spaCy'].set_flag(COOL_FLAG, True)
 +table(["Name", "Type", "Description"])
    +row
        +cell #[code flag_id]
        +cell int
        +cell The attribute ID of the flag to set.
    +row
        +cell #[code value]
        +cell bool
        +cell The new value of the flag.
 +h(2, "check_flag") Lexeme.check_flag
    +tag method
 p Check the value of a boolean flag.
 +aside-code("Example").
    is_my_library = lambda text: text in ['spaCy', 'Thinc']
    MY_LIBRARY = nlp.vocab.add_flag(is_my_library)
    assert nlp.vocab[u'spaCy'].check_flag(MY_LIBRARY) == True
 +table(["Name", "Type", "Description"])
    +row
        +cell #[code flag_id]
        +cell int
        +cell The attribute ID of the flag to query.
    +footrow
        +cell returns
        +cell bool
        +cell The value of the flag.
 +h(2, "similarity") Lexeme.similarity
    +tag method
    +tag-model("vectors")
 p Compute a semantic similarity estimate. Defaults to cosine over vectors.
 +aside-code("Example").
    apple = nlp.vocab[u'apple']
    orange = nlp.vocab[u'orange']
    apple_orange = apple.similarity(orange)
    orange_apple = orange.similarity(apple)
    assert apple_orange == orange_apple
 +table(["Name", "Type", "Description"])
    +row
        +cell other
        +cell -
        +cell
            |  The object to compare with. By default, accepts #[code Doc],
            |  #[code Span], #[code Token] and #[code Lexeme] objects.
    +footrow
        +cell returns
        +cell float
        +cell A scalar similarity score. Higher is more similar.
 +h(2, "has_vector") Lexeme.has_vector
    +tag property
    +tag-model("vectors")
 p
    |  A boolean value indicating whether a word vector is associated with the
    |  lexeme.
 +aside-code("Example").
    apple = nlp.vocab[u'apple']
    assert apple.has_vector
 +table(["Name", "Type", "Description"])
    +footrow
        +cell returns
        +cell bool
        +cell Whether the lexeme has a vector data attached.
 +h(2, "vector") Lexeme.vector
    +tag property
    +tag-model("vectors")
 p A real-valued meaning representation.
 +aside-code("Example").
    apple = nlp.vocab[u'apple']
    assert apple.vector.dtype == 'float32'
    assert apple.vector.shape == (300,)
 +table(["Name", "Type", "Description"])
    +footrow
        +cell returns
        +cell #[code numpy.ndarray[ndim=1, dtype='float32']]
        +cell A 1D numpy array representing the lexeme's semantics.
 +h(2, "vector_norm") Lexeme.vector_norm
    +tag property
    +tag-model("vectors")
 p The L2 norm of the lexeme's vector representation.
 +aside-code("Example").
    apple = nlp.vocab[u'apple']
    pasta = nlp.vocab[u'pasta']
    apple.vector_norm # 7.1346845626831055
    pasta.vector_norm # 7.759851932525635
    assert apple.vector_norm != pasta.vector_norm
 +table(["Name", "Type", "Description"])
    +footrow
        +cell returns
        +cell float
        +cell The L2 norm of the vector representation.
 +h(2, "attributes") Attributes
@ -12,6 +159,16 @@ p An entry in the vocabulary.
        +cell #[code Vocab]
        +cell
    +row
        +cell #[code text]
        +cell unicode
        +cell Verbatim text content.
    +row
        +cell #[code lex_id]
        +cell int
        +cell ID of the lexeme's lexical type.
    +row
        +cell #[code lower]
        +cell int
@ -124,116 +281,9 @@ p An entry in the vocabulary.
    +row
        +cell #[code prob]
        +cell float
-        +cell Smoothed log probability estimate of token's type.
+        +cell Smoothed log probability estimate of lexeme's type.
    +row
        +cell #[code sentiment]
        +cell float
-        +cell A scalar value indicating the positivity or negativity of the token.
+        +cell A scalar value indicating the positivity or negativity of the lexeme.
    +row
        +cell #[code lex_id]
        +cell int
        +cell ID of the token's lexical type.
    +row
        +cell #[code text]
        +cell unicode
        +cell Verbatim text content.
 +h(2, "init") Lexeme.__init__
    +tag method
 p Create a #[code Lexeme] object.
 +table(["Name", "Type", "Description"])
    +row
        +cell #[code vocab]
        +cell #[code Vocab]
        +cell The parent vocabulary.
    +row
        +cell #[code orth]
        +cell int
        +cell The orth id of the lexeme.
    +footrow
        +cell returns
        +cell #[code Lexeme]
        +cell The newly constructed object.
 +h(2, "set_flag") Lexeme.set_flag
    +tag method
 p Change the value of a boolean flag.
 +table(["Name", "Type", "Description"])
    +row
        +cell #[code flag_id]
        +cell int
        +cell The attribute ID of the flag to set.
    +row
        +cell #[code value]
        +cell bool
        +cell The new value of the flag.
    +footrow
        +cell returns
        +cell #[code None]
        +cell -
 +h(2, "check_flag") Lexeme.check_flag
    +tag method
 p Check the value of a boolean flag.
 +table(["Name", "Type", "Description"])
    +row
        +cell #[code flag_id]
        +cell int
        +cell The attribute ID of the flag to query.
    +footrow
        +cell returns
        +cell bool
        +cell The value of the flag.
 +h(2, "similarity") Lexeme.similarity
    +tag method
 p Compute a semantic similarity estimate. Defaults to cosine over vectors.
 +table(["Name", "Type", "Description"])
    +row
        +cell #[code other]
        +cell -
        +cell
            |  The object to compare with. By default, accepts #[code Doc],
            |  #[code Span], #[code Token] and #[code Lexeme] objects.
    +footrow
        +cell returns
        +cell float
        +cell A scalar similarity score. Higher is more similar.
 +h(2, "vector") Lexeme.vector
    +tag property
 p A real-valued meaning representation.
 +table(["Name", "Type", "Description"])
    +footrow
        +cell returns
        +cell #[code numpy.ndarray[ndim=1, dtype='float32']]
        +cell A real-valued meaning representation.
 +h(2, "has_vector") Lexeme.has_vector
    +tag property
 p A boolean value indicating whether a word vector is associated with the object.
 +table(["Name", "Type", "Description"])
    +footrow
        +cell returns
        +cell bool
        +cell Whether a word vector is associated with the object.