From e0f9ccdaa317859d5b675ad5f404b93c16af8167 Mon Sep 17 00:00:00 2001
From: ines <ines@ines.io>
Date: Sun, 28 May 2017 23:26:13 +0200
Subject: [PATCH] Update texts and rename vectorizer to tensorizer

---
 website/assets/img/docs/pipeline.svg              |  2 +-
 website/docs/usage/_spacy-101/_pipelines.jade     | 15 ++++++++++-----
 website/docs/usage/_spacy-101/_vocab.jade         |  8 ++++----
 .../docs/usage/language-processing-pipeline.jade  | 12 ++++++------
 website/docs/usage/spacy-101.jade                 |  6 +++---
 website/docs/usage/v2.jade                        |  4 +++-
 6 files changed, 27 insertions(+), 20 deletions(-)
diff --git a/website/assets/img/docs/pipeline.svg b/website/assets/img/docs/pipeline.svg
index 8f9dc6dac..9c34636dc 100644
--- a/website/assets/img/docs/pipeline.svg
+++ b/website/assets/img/docs/pipeline.svg
@@ -18,7 +18,7 @@
     <path fill="#f8cecc" stroke="#b85450" stroke-width="2" stroke-miterlimit="10" d="M176 58h103.3L296 88l-16.8 30H176l16.8-30z"/>
     <text class="svg__pipeline__text-small" dy="0.75em" dx="-0.25em" width="58" height="14" transform="translate(206.5 80.5)">tokenizer</text>
     <path fill="#ffe6cc" stroke="#d79b00" stroke-width="2" stroke-miterlimit="10" d="M314 58h103.3L434 88l-16.8 30H314l16.8-30z"/>
-    <text class="svg__pipeline__text-small" dy="0.75em" dx="-0.25em" width="62" height="14" transform="translate(342.5 80.5)">vectorizer</text>
+    <text class="svg__pipeline__text-small" dy="0.75em" dx="-0.25em" width="62" height="14" transform="translate(342.5 80.5)">tensorizer</text>
     <path fill="none" stroke="#999" stroke-width="2" stroke-miterlimit="10" d="M296.5 88.2h24.7"/>
     <path fill="#999" stroke="#999" stroke-width="2" stroke-miterlimit="10" d="M327.2 88.2l-8 4 2-4-2-4z"/>
     <path fill="#ffe6cc" stroke="#d79b00" stroke-width="2" stroke-miterlimit="10" d="M416 58h103.3L536 88l-16.8 30H416l16.8-30z"/>
diff --git a/website/docs/usage/_spacy-101/_pipelines.jade b/website/docs/usage/_spacy-101/_pipelines.jade
index edf553805..654ca86e4 100644
--- a/website/docs/usage/_spacy-101/_pipelines.jade
+++ b/website/docs/usage/_spacy-101/_pipelines.jade
@@ -6,7 +6,7 @@ p
     |  different steps – this is also referred to as the
     |  #[strong processing pipeline]. The pipeline used by the
     |  #[+a("/docs/usage/models") default models] consists of a
-    |  vectorizer, a tagger, a parser and an entity recognizer. Each pipeline
+    |  tensorizer, a tagger, a parser and an entity recognizer. Each pipeline
     |  component returns the processed #[code Doc], which is then passed on to
     |  the next component.
 
@@ -21,21 +21,24 @@ p
     |  #[strong Creates:] Objects, attributes and properties modified and set by
     |  the component.
 
-+table(["Name", "Component", "Creates"])
++table(["Name", "Component", "Creates", "Description"])
     +row
         +cell tokenizer
         +cell #[+api("tokenizer") #[code Tokenizer]]
         +cell #[code Doc]
+        +cell Segment text into tokens.
 
     +row("divider")
-        +cell vectorizer
-        +cell #[code Vectorizer]
+        +cell tensorizer
+        +cell #[code TokenVectorEncoder]
         +cell #[code Doc.tensor]
+        +cell Create feature representation tensor for #[code Doc].
 
     +row
         +cell tagger
         +cell #[+api("tagger") #[code Tagger]]
         +cell #[code Doc[i].tag]
+        +cell Assign part-of-speech tags.
 
     +row
         +cell parser
@@ -43,11 +46,13 @@ p
         +cell
             |  #[code Doc[i].head], #[code Doc[i].dep], #[code Doc.sents],
             |  #[code Doc.noun_chunks]
+        +cell Assign dependency labels.
 
     +row
         +cell ner
         +cell #[+api("entityrecognizer") #[code EntityRecognizer]]
         +cell #[code Doc.ents], #[code Doc[i].ent_iob], #[code Doc[i].ent_type]
+        +cell Detect and label named entities.
 
 p
     |  The processing pipeline always #[strong depends on the statistical model]
@@ -57,4 +62,4 @@ p
     |  in its meta data, as a simple list containing the component names:
 
 +code(false, "json").
-    "pipeline": ["vectorizer", "tagger", "parser", "ner"]
+    "pipeline": ["tensorizer", "tagger", "parser", "ner"]
diff --git a/website/docs/usage/_spacy-101/_vocab.jade b/website/docs/usage/_spacy-101/_vocab.jade
index 45a16af80..e59518a25 100644
--- a/website/docs/usage/_spacy-101/_vocab.jade
+++ b/website/docs/usage/_spacy-101/_vocab.jade
@@ -102,8 +102,8 @@ p
     assert doc.vocab.strings[3197928453018144401L] == u'coffee' # 👍
 
 p
-    |  If the doc's vocabulary doesn't contain a hash for "coffee", spaCy will
+    |  If the vocabulary doesn't contain a hash for "coffee", spaCy will
     |  throw an error. So you either need to add it manually, or initialise the
-    |  new #[code Doc] with the shared vocab. To prevent this problem, spaCy
-    |  will ususally export the vocab when you save a #[code Doc] or #[code nlp]
-    |  object.
+    |  new #[code Doc] with the shared vocabulary. To prevent this problem,
+    |  spaCy will also export the #[code Vocab] when you save a
+    |  #[code Doc] or #[code nlp] object.
diff --git a/website/docs/usage/language-processing-pipeline.jade b/website/docs/usage/language-processing-pipeline.jade
index ffad01ead..e4df4bba5 100644
--- a/website/docs/usage/language-processing-pipeline.jade
+++ b/website/docs/usage/language-processing-pipeline.jade
@@ -10,7 +10,7 @@ include _spacy-101/_pipelines
 
 p
     |  spaCy makes it very easy to create your own pipelines consisting of
-    |  reusable components – this includes spaCy's default vectorizer, tagger,
+    |  reusable components – this includes spaCy's default tensorizer, tagger,
     |  parser and entity regcognizer, but also your own custom processing
     |  functions. A pipeline component can be added to an already existing
     |  #[code nlp] object, specified when initialising a #[code Language] class,
@@ -56,7 +56,7 @@ p
 
 p
     | ... the model tells spaCy to use the pipeline
-    |  #[code ["vectorizer", "tagger", "parser", "ner"]]. spaCy will then look
+    |  #[code ["tensorizer", "tagger", "parser", "ner"]]. spaCy will then look
     |  up each string in its internal factories registry and initialise the
     |  individual components. It'll then load #[code spacy.lang.en.English],
     |  pass it the path to the model's data directory, and return it for you
@@ -230,7 +230,7 @@ p
 p
     |  Let's say you have trained your own document sentiment model on English
     |  text. After tokenization, you want spaCy to first execute the
-    |  #[strong default vectorizer], followed by a custom
+    |  #[strong default tensorizer], followed by a custom
     |  #[strong sentiment component] that adds a #[code .sentiment]
     |  property to the #[code Doc], containing your model's sentiment precition.
 
@@ -293,13 +293,13 @@ p
         "lang": "en",
         "version": "1.0.0",
         "spacy_version": "&gt;=2.0.0,&lt;3.0.0",
-        "pipeline": ["vectorizer", "sentiment"]
+        "pipeline": ["tensorizer", "sentiment"]
     }
 
 p
     |  When you load your new model, spaCy will call the model's #[code load()]
     |  method. This will return a #[code Language] object with a pipeline
-    |  containing the default vectorizer, and the sentiment component returned
+    |  containing the default tensorizer, and the sentiment component returned
     |  by your custom #[code "sentiment"] factory.
 
 +code.
@@ -324,7 +324,7 @@ p
 
 +code.
     nlp = spacy.load('en', disable['parser', 'tagger'])
-    nlp = English().from_disk('/model', disable=['vectorizer', 'ner'])
+    nlp = English().from_disk('/model', disable=['tensorizer', 'ner'])
     doc = nlp(u"I don't want parsed", disable=['parser'])
 
 p
diff --git a/website/docs/usage/spacy-101.jade b/website/docs/usage/spacy-101.jade
index 49ba1e64c..f3ce0ad83 100644
--- a/website/docs/usage/spacy-101.jade
+++ b/website/docs/usage/spacy-101.jade
@@ -303,9 +303,9 @@ include _spacy-101/_training
 p
     |  We're very happy to see the spaCy community grow and include a mix of
     |  people from all kinds of different backgrounds – computational
-    |  linguistics, data science, deep learning and research. If you'd like to
-    |  get involved, below are some answers to the most important questions and
-    |  resources for further reading.
+    |  linguistics, data science, deep learning, research and more. If you'd
+    |  like to get involved, below are some answers to the most important
+    |  questions and resources for further reading.
 
 +h(3, "faq-help-code") Help, my code isn't working!
 
diff --git a/website/docs/usage/v2.jade b/website/docs/usage/v2.jade
index 7b9f282a6..944ed56f5 100644
--- a/website/docs/usage/v2.jade
+++ b/website/docs/usage/v2.jade
@@ -67,7 +67,9 @@ p
     |  mapping #[strong no longer depends on the vocabulary state], making a lot
     |  of workflows much simpler, especially during training. Unlike integer IDs
     |  in spaCy v1.x, hash values will #[strong always match] – even across
-    |  models. Strings can now be added explicitly using the new #[+api("stringstore#add") #[code Stringstore.add]] method.
+    |  models. Strings can now be added explicitly using the new
+    |  #[+api("stringstore#add") #[code Stringstore.add]] method. A token's hash
+    |  is available via #[code token.orth].
 
 +infobox
     |  #[strong API:] #[+api("stringstore") #[code StringStore]]