From af348025ecbe0229b016e341c1c9dc43625957f4 Mon Sep 17 00:00:00 2001
From: ines <ines@ines.io>
Date: Tue, 23 May 2017 23:19:09 +0200
Subject: [PATCH] Update word vectors & similarity workflow

---
 .../docs/usage/word-vectors-similarities.jade | 75 +++++++++----------
 1 file changed, 36 insertions(+), 39 deletions(-)

diff --git a/website/docs/usage/word-vectors-similarities.jade b/website/docs/usage/word-vectors-similarities.jade
index 3cc0a67a8..00e200f59 100644
--- a/website/docs/usage/word-vectors-similarities.jade
+++ b/website/docs/usage/word-vectors-similarities.jade
@@ -6,46 +6,40 @@ p
     |  Dense, real valued vectors representing distributional similarity
     |  information are now a cornerstone of practical NLP. The most common way
     |  to train these vectors is the #[+a("https://en.wikipedia.org/wiki/Word2vec") word2vec]
-    |  family of algorithms.
-
-+aside("Tip")
-    |  If you need to train a word2vec model, we recommend the implementation in
-    |  the Python library #[+a("https://radimrehurek.com/gensim/") Gensim].
-
-p
-    |  spaCy makes using word vectors very easy. The
-    |  #[+api("lexeme") #[code Lexeme]], #[+api("token") #[code Token]],
-    |  #[+api("span") #[code Span]] and #[+api("doc") #[code Doc]] classes all
-    |  have a #[code .vector] property, which is a 1-dimensional numpy array of
-    |  32-bit floats:
-
-+code.
-    import numpy
-
-    apples, and_, oranges = nlp(u'apples and oranges')
-    print(apples.vector.shape)
-    # (1,)
-    apples.similarity(oranges)
-
-p
-    |  By default, #[code Token.vector] returns the vector for its underlying
-    |  lexeme, while #[code Doc.vector] and #[code Span.vector] return an
-    |  average of the vectors of their tokens. You can customize these
-    |  behaviours by modifying the #[code doc.user_hooks],
-    |  #[code doc.user_span_hooks] and #[code doc.user_token_hooks]
-    |  dictionaries.
-
-+aside-code("Example").
-    # TODO
-
-p
-    |  The default English model installs vectors for one million vocabulary
-    |  entries, using the 300-dimensional vectors trained on the Common Crawl
+    |  family of algorithms. The default
+    |  #[+a("/docs/usage/models#available") English model] installs
+    |  300-dimensional vectors trained on the Common Crawl
     |  corpus using the #[+a("http://nlp.stanford.edu/projects/glove/") GloVe]
     |  algorithm. The GloVe common crawl vectors have become a de facto
     |  standard for practical NLP.
 
-+aside-code("Example").
++aside("Tip: Training a word2vec model")
+    |  If you need to train a word2vec model, we recommend the implementation in
+    |  the Python library #[+a("https://radimrehurek.com/gensim/") Gensim].
+
++h(2, "101") Similarity and word vectors 101
+    +tag-model("vectors")
+
+include _spacy-101/_similarity
+include _spacy-101/_word-vectors
+
+
++h(2, "custom") Customising word vectors
+
+p
+    |  By default, #[+api("token#vector") #[code Token.vector]] returns the
+    |  vector for its underlying #[+api("lexeme") #[code Lexeme]], while
+    |  #[+api("doc#vector") #[code Doc.vector]] and
+    |  #[+api("span#vector") #[code Span.vector]] return an average of the
+    |  vectors of their tokens.
+
+p
+    |  You can customize these
+    |  behaviours by modifying the #[code doc.user_hooks],
+    |  #[code doc.user_span_hooks] and #[code doc.user_token_hooks]
+    |  dictionaries.
+
++code("Example").
     # TODO
 
 p
@@ -56,11 +50,14 @@ p
     |  can use the #[code vocab.vectors_from_bin_loc()] method, which accepts a
     |  path to a binary file written by #[code vocab.dump_vectors()].
 
-+aside-code("Example").
++code("Example").
     # TODO
 
 p
-    |  You can also load vectors from memory, by writing to the #[code lexeme.vector]
-    |  property. If the vectors you are writing are of different dimensionality
+    |  You can also load vectors from memory by writing to the
+    |  #[+api("lexeme#vector") #[code Lexeme.vector]] property. If the vectors
+    |  you are writing are of different dimensionality
     |  from the ones currently loaded, you should first call
     |  #[code vocab.resize_vectors(new_size)].
+
++h(2, "similarity") Similarity