From 27de0834b27442711e5aa2b6821a2182cca27b5b Mon Sep 17 00:00:00 2001
From: ines <ines@ines.io>
Date: Sat, 20 May 2017 15:13:42 +0200
Subject: [PATCH] Update docstrings and API docs for Lexeme

---
 spacy/lexeme.pyx             |  62 ++++----
 website/docs/api/lexeme.jade | 270 +++++++++++++++++++++--------------
 2 files changed, 197 insertions(+), 135 deletions(-)

diff --git a/spacy/lexeme.pyx b/spacy/lexeme.pyx
index effffbac8..a09a57261 100644
--- a/spacy/lexeme.pyx
+++ b/spacy/lexeme.pyx
@@ -30,19 +30,16 @@ memset(&EMPTY_LEXEME, 0, sizeof(LexemeC))
 
 
 cdef class Lexeme:
-    """
-    An entry in the vocabulary.  A Lexeme has no string context --- it's a
+    """An entry in the vocabulary. A `Lexeme` has no string context – it's a
     word-type, as opposed to a word token.  It therefore has no part-of-speech
     tag, dependency parse, or lemma (lemmatization depends on the part-of-speech
     tag).
     """
     def __init__(self, Vocab vocab, int orth):
-        """
-        Create a Lexeme object.
+        """Create a Lexeme object.
 
-        Arguments:
-            vocab (Vocab): The parent vocabulary
-            orth (int): The orth id of the lexeme.
+        vocab (Vocab): The parent vocabulary
+        orth (int): The orth id of the lexeme.
         Returns (Lexeme): The newly constructd object.
         """
         self.vocab = vocab
@@ -82,35 +79,28 @@ cdef class Lexeme:
         return self.c.orth
 
     def set_flag(self, attr_id_t flag_id, bint value):
-        """
-        Change the value of a boolean flag.
+        """Change the value of a boolean flag.
 
-        Arguments:
-            flag_id (int): The attribute ID of the flag to set.
-            value (bool): The new value of the flag.
+        flag_id (int): The attribute ID of the flag to set.
+        value (bool): The new value of the flag.
         """
         Lexeme.c_set_flag(self.c, flag_id, value)
 
     def check_flag(self, attr_id_t flag_id):
-        """
-        Check the value of a boolean flag.
+        """Check the value of a boolean flag.
 
-        Arguments:
-            flag_id (int): The attribute ID of the flag to query.
-        Returns (bool): The value of the flag.
+        flag_id (int): The attribute ID of the flag to query.
+        RETURNS (bool): The value of the flag.
         """
         return True if Lexeme.c_check_flag(self.c, flag_id) else False
 
     def similarity(self, other):
-        """
-        Compute a semantic similarity estimate. Defaults to cosine over vectors.
+        """Compute a semantic similarity estimate. Defaults to cosine over
+        vectors.
 
-        Arguments:
-            other:
-                The object to compare with. By default, accepts Doc, Span,
-                Token and Lexeme objects.
-        Returns:
-            score (float): A scalar similarity score. Higher is more similar.
+        other (object): The object to compare with. By default, accepts `Doc`,
+            `Span`, `Token` and `Lexeme` objects.
+        RETURNS (float): A scalar similarity score. Higher is more similar.
         """
         if self.vector_norm == 0 or other.vector_norm == 0:
             return 0.0
@@ -140,6 +130,11 @@ cdef class Lexeme:
         self.orth = self.c.orth
 
     property has_vector:
+        """A boolean value indicating whether a word vector is associated with
+        the object.
+
+        RETURNS (bool): Whether a word vector is associated with the object.
+        """
         def __get__(self):
             cdef int i
             for i in range(self.vocab.vectors_length):
@@ -149,6 +144,10 @@ cdef class Lexeme:
                 return False
 
     property vector_norm:
+        """The L2 norm of the lexeme's vector representation.
+
+        RETURNS (float): The L2 norm of the vector representation.
+        """
         def __get__(self):
             return self.c.l2_norm
 
@@ -156,6 +155,11 @@ cdef class Lexeme:
             self.c.l2_norm = value
 
     property vector:
+        """A real-valued meaning representation.
+
+        RETURNS (numpy.ndarray[ndim=1, dtype='float32']): A 1D numpy array
+            representing the lexeme's semantics.
+        """
         def __get__(self):
             cdef int length = self.vocab.vectors_length
             if length == 0:
@@ -196,6 +200,14 @@ cdef class Lexeme:
         def __get__(self):
             return self.vocab.strings[self.c.orth]
 
+    property text:
+        """A unicode representation of the token text.
+
+        RETURNS (unicode): The original verbatim text of the token.
+        """
+        def __get__(self):
+            return self.orth_
+
     property lower:
         def __get__(self): return self.c.lower
         def __set__(self, int x): self.c.lower = x
diff --git a/website/docs/api/lexeme.jade b/website/docs/api/lexeme.jade
index c23d7a27a..f23d37a94 100644
--- a/website/docs/api/lexeme.jade
+++ b/website/docs/api/lexeme.jade
@@ -2,7 +2,154 @@
 
 include ../../_includes/_mixins
 
-p An entry in the vocabulary.
+p
+    |  An entry in the vocabulary. A #[code Lexeme] has no string context – it's
+    |  a word-type, as opposed to a word token. It therefore has no
+    |  part-of-speech tag, dependency parse, or lemma (if lemmatization depends
+    |  on the part-of-speech tag).
+
++h(2, "init") Lexeme.__init__
+    +tag method
+
+p Create a #[code Lexeme] object.
+
++table(["Name", "Type", "Description"])
+    +row
+        +cell #[code vocab]
+        +cell #[code Vocab]
+        +cell The parent vocabulary.
+
+    +row
+        +cell #[code orth]
+        +cell int
+        +cell The orth id of the lexeme.
+
+    +footrow
+        +cell returns
+        +cell #[code Lexeme]
+        +cell The newly constructed object.
+
++h(2, "set_flag") Lexeme.set_flag
+    +tag method
+
+p Change the value of a boolean flag.
+
++aside-code("Example").
+    COOL_FLAG = nlp.vocab.add_flag(lambda text: False)
+    nlp.vocab[u'spaCy'].set_flag(COOL_FLAG, True)
+
++table(["Name", "Type", "Description"])
+    +row
+        +cell #[code flag_id]
+        +cell int
+        +cell The attribute ID of the flag to set.
+
+    +row
+        +cell #[code value]
+        +cell bool
+        +cell The new value of the flag.
+
++h(2, "check_flag") Lexeme.check_flag
+    +tag method
+
+p Check the value of a boolean flag.
+
++aside-code("Example").
+    is_my_library = lambda text: text in ['spaCy', 'Thinc']
+    MY_LIBRARY = nlp.vocab.add_flag(is_my_library)
+    assert nlp.vocab[u'spaCy'].check_flag(MY_LIBRARY) == True
+
++table(["Name", "Type", "Description"])
+    +row
+        +cell #[code flag_id]
+        +cell int
+        +cell The attribute ID of the flag to query.
+
+    +footrow
+        +cell returns
+        +cell bool
+        +cell The value of the flag.
+
++h(2, "similarity") Lexeme.similarity
+    +tag method
+    +tag-model("vectors")
+
+p Compute a semantic similarity estimate. Defaults to cosine over vectors.
+
++aside-code("Example").
+    apple = nlp.vocab[u'apple']
+    orange = nlp.vocab[u'orange']
+    apple_orange = apple.similarity(orange)
+    orange_apple = orange.similarity(apple)
+    assert apple_orange == orange_apple
+
++table(["Name", "Type", "Description"])
+    +row
+        +cell other
+        +cell -
+        +cell
+            |  The object to compare with. By default, accepts #[code Doc],
+            |  #[code Span], #[code Token] and #[code Lexeme] objects.
+
+    +footrow
+        +cell returns
+        +cell float
+        +cell A scalar similarity score. Higher is more similar.
+
+
++h(2, "has_vector") Lexeme.has_vector
+    +tag property
+    +tag-model("vectors")
+
+p
+    |  A boolean value indicating whether a word vector is associated with the
+    |  lexeme.
+
++aside-code("Example").
+    apple = nlp.vocab[u'apple']
+    assert apple.has_vector
+
++table(["Name", "Type", "Description"])
+    +footrow
+        +cell returns
+        +cell bool
+        +cell Whether the lexeme has a vector data attached.
+
++h(2, "vector") Lexeme.vector
+    +tag property
+    +tag-model("vectors")
+
+p A real-valued meaning representation.
+
++aside-code("Example").
+    apple = nlp.vocab[u'apple']
+    assert apple.vector.dtype == 'float32'
+    assert apple.vector.shape == (300,)
+
++table(["Name", "Type", "Description"])
+    +footrow
+        +cell returns
+        +cell #[code numpy.ndarray[ndim=1, dtype='float32']]
+        +cell A 1D numpy array representing the lexeme's semantics.
+
++h(2, "vector_norm") Lexeme.vector_norm
+    +tag property
+    +tag-model("vectors")
+
+p The L2 norm of the lexeme's vector representation.
+
++aside-code("Example").
+    apple = nlp.vocab[u'apple']
+    pasta = nlp.vocab[u'pasta']
+    apple.vector_norm # 7.1346845626831055
+    pasta.vector_norm # 7.759851932525635
+    assert apple.vector_norm != pasta.vector_norm
+
++table(["Name", "Type", "Description"])
+    +footrow
+        +cell returns
+        +cell float
+        +cell The L2 norm of the vector representation.
 
 +h(2, "attributes") Attributes
 
@@ -12,6 +159,16 @@ p An entry in the vocabulary.
         +cell #[code Vocab]
         +cell
 
+    +row
+        +cell #[code text]
+        +cell unicode
+        +cell Verbatim text content.
+
+    +row
+        +cell #[code lex_id]
+        +cell int
+        +cell ID of the lexeme's lexical type.
+
     +row
         +cell #[code lower]
         +cell int
@@ -124,116 +281,9 @@ p An entry in the vocabulary.
     +row
         +cell #[code prob]
         +cell float
-        +cell Smoothed log probability estimate of token's type.
+        +cell Smoothed log probability estimate of lexeme's type.
 
     +row
         +cell #[code sentiment]
         +cell float
-        +cell A scalar value indicating the positivity or negativity of the token.
-    +row
-        +cell #[code lex_id]
-        +cell int
-        +cell ID of the token's lexical type.
-
-    +row
-        +cell #[code text]
-        +cell unicode
-        +cell Verbatim text content.
-
-+h(2, "init") Lexeme.__init__
-    +tag method
-
-p Create a #[code Lexeme] object.
-
-+table(["Name", "Type", "Description"])
-    +row
-        +cell #[code vocab]
-        +cell #[code Vocab]
-        +cell The parent vocabulary.
-
-    +row
-        +cell #[code orth]
-        +cell int
-        +cell The orth id of the lexeme.
-
-    +footrow
-        +cell returns
-        +cell #[code Lexeme]
-        +cell The newly constructed object.
-
-+h(2, "set_flag") Lexeme.set_flag
-    +tag method
-
-p Change the value of a boolean flag.
-
-+table(["Name", "Type", "Description"])
-    +row
-        +cell #[code flag_id]
-        +cell int
-        +cell The attribute ID of the flag to set.
-
-    +row
-        +cell #[code value]
-        +cell bool
-        +cell The new value of the flag.
-
-    +footrow
-        +cell returns
-        +cell #[code None]
-        +cell -
-
-+h(2, "check_flag") Lexeme.check_flag
-    +tag method
-
-p Check the value of a boolean flag.
-
-+table(["Name", "Type", "Description"])
-    +row
-        +cell #[code flag_id]
-        +cell int
-        +cell The attribute ID of the flag to query.
-
-    +footrow
-        +cell returns
-        +cell bool
-        +cell The value of the flag.
-
-+h(2, "similarity") Lexeme.similarity
-    +tag method
-
-p Compute a semantic similarity estimate. Defaults to cosine over vectors.
-
-+table(["Name", "Type", "Description"])
-    +row
-        +cell #[code other]
-        +cell -
-        +cell
-            |  The object to compare with. By default, accepts #[code Doc],
-            |  #[code Span], #[code Token] and #[code Lexeme] objects.
-
-    +footrow
-        +cell returns
-        +cell float
-        +cell A scalar similarity score. Higher is more similar.
-
-+h(2, "vector") Lexeme.vector
-    +tag property
-
-p A real-valued meaning representation.
-
-+table(["Name", "Type", "Description"])
-    +footrow
-        +cell returns
-        +cell #[code numpy.ndarray[ndim=1, dtype='float32']]
-        +cell A real-valued meaning representation.
-
-+h(2, "has_vector") Lexeme.has_vector
-    +tag property
-
-p A boolean value indicating whether a word vector is associated with the object.
-
-+table(["Name", "Type", "Description"])
-    +footrow
-        +cell returns
-        +cell bool
-        +cell Whether a word vector is associated with the object.
+        +cell A scalar value indicating the positivity or negativity of the lexeme.