From 1ddbeddca2df65947b900f63ccc04ce4dad152ae Mon Sep 17 00:00:00 2001
From: Ines Montani <ines.montani@gmail.com>
Date: Sat, 22 Jul 2017 15:00:58 +0200
Subject: [PATCH 1/8] Fix typo

---
 website/docs/api/annotation.jade | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/website/docs/api/annotation.jade b/website/docs/api/annotation.jade
index 048e69897..ce18878b7 100644
--- a/website/docs/api/annotation.jade
+++ b/website/docs/api/annotation.jade
@@ -40,7 +40,7 @@ p
 +h(2, "pos-tagging") Part-of-speech Tagging
 
 +aside("Tip: Understanding tags")
-    |  You can also use #[code spacy.explain()] to get the escription for the
+    |  You can also use #[code spacy.explain()] to get the description for the
     |  string representation of a tag. For example,
     |  #[code spacy.explain("RB")] will return "adverb".
 

From 23d976ed00abb0d04ef1a35a7d42db0ef3e1942b Mon Sep 17 00:00:00 2001
From: ines <ines@ines.io>
Date: Sat, 22 Jul 2017 17:55:14 +0200
Subject: [PATCH 2/8] Add Doc.cats attribute and missing v2 tag

---
 website/docs/api/doc.jade | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/website/docs/api/doc.jade b/website/docs/api/doc.jade
index f82a26c9e..929985144 100644
--- a/website/docs/api/doc.jade
+++ b/website/docs/api/doc.jade
@@ -558,10 +558,20 @@ p
         +cell The store of lexical types.
 
     +row
-        +cell #[code tensor]
+        +cell #[code tensor] #[+tag-new(2)]
         +cell object
         +cell Container for dense vector representations.
 
+    +row
+        +cell #[code cats] #[+tag-new(2)]
+        +cell dictionary
+        +cell
+            |  Maps either a label to a score for categories applied to whole
+            |  document, or #[code (start_char, end_char, label)] to score for
+            |  categories applied to spans. #[code start_char] and #[code end_char]
+            |  should be character offsets, label can be either a string or an
+            |  integer ID, and score should be a float.
+
     +row
         +cell #[code user_data]
         +cell -

From d2a7e5b8e52ae8d90d365225ddc3eca918cf316a Mon Sep 17 00:00:00 2001
From: ines <ines@ines.io>
Date: Sat, 22 Jul 2017 17:55:35 +0200
Subject: [PATCH 3/8] Add GoldParse.cats attribute

---
 website/docs/api/goldparse.jade | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/website/docs/api/goldparse.jade b/website/docs/api/goldparse.jade
index 7818912c3..03118343d 100644
--- a/website/docs/api/goldparse.jade
+++ b/website/docs/api/goldparse.jade
@@ -103,6 +103,14 @@ p
         +cell list
         +cell The alignment from gold tokenization to candidate tokenization.
 
+    +row
+        +cell #[code cats] #[+tag-new(2)]
+        +cell list
+        +cell
+            |  Entries in the list should be either a label, or a
+            |  #[code (start, end, label)] triple. The tuple form is used for
+            |  categories applied to spans of the document.
+
 
 +h(2, "util") Utilities
 

From d05ab1b3a0f7ac4e586b4a295d7be8cccd49fa2c Mon Sep 17 00:00:00 2001
From: ines <ines@ines.io>
Date: Sat, 22 Jul 2017 17:55:53 +0200
Subject: [PATCH 4/8] Add text classification to 101 overview and change order

---
 website/docs/usage/spacy-101.jade | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/website/docs/usage/spacy-101.jade b/website/docs/usage/spacy-101.jade
index f657ebf11..a54e5cf66 100644
--- a/website/docs/usage/spacy-101.jade
+++ b/website/docs/usage/spacy-101.jade
@@ -129,13 +129,6 @@ p
             |  locations.
         +cell #[+procon("pro")]
 
-    +row
-        +cell #[strong Rule-based Matching]
-        +cell
-            |  Finding sequences of tokens based on their texts and linguistic
-            |  annotations, similar to regular expressions.
-        +cell #[+procon("con")]
-
     +row
         +cell #[strong Similarity]
         +cell
@@ -143,6 +136,18 @@ p
             |  are to each other.
         +cell #[+procon("pro")]
 
+    +row
+        +cell #[strong Text classification]
+        +cell Assigning categories or labels to a whole document, or parts of a document.
+        +cell #[+procon("pro")]
+
+    +row
+        +cell #[strong Rule-based Matching]
+        +cell
+            |  Finding sequences of tokens based on their texts and linguistic
+            |  annotations, similar to regular expressions.
+        +cell #[+procon("con")]
+
     +row
         +cell #[strong Training]
         +cell Updating and improving a statistical model's predictions.

From 0fb89dd204a4caec3d48bff3b8d0ec5868415759 Mon Sep 17 00:00:00 2001
From: ines <ines@ines.io>
Date: Sat, 22 Jul 2017 17:56:07 +0200
Subject: [PATCH 5/8] Add text classification usage guide template

---
 website/docs/usage/_data.json               | 6 ++++++
 website/docs/usage/text-classification.jade | 5 +++++
 2 files changed, 11 insertions(+)
 create mode 100644 website/docs/usage/text-classification.jade

diff --git a/website/docs/usage/_data.json b/website/docs/usage/_data.json
index 81deeb402..c8373a095 100644
--- a/website/docs/usage/_data.json
+++ b/website/docs/usage/_data.json
@@ -16,6 +16,7 @@
             "Rule-based matching": "rule-based-matching",
             "Adding languages": "adding-languages",
             "Processing pipelines": "language-processing-pipeline",
+            "Text classification": "text-classification",
             "Deep learning": "deep-learning",
             "Production use": "production-use",
             "Training": "training",
@@ -106,6 +107,11 @@
         "next": "production use"
     },
 
+    "text-classification": {
+        "title": "Text classification",
+        "next": "training"
+    },
+
     "production-use": {
         "title": "Production use",
         "next": "training"
diff --git a/website/docs/usage/text-classification.jade b/website/docs/usage/text-classification.jade
new file mode 100644
index 000000000..33e384dbd
--- /dev/null
+++ b/website/docs/usage/text-classification.jade
@@ -0,0 +1,5 @@
+//- 💫 DOCS > USAGE > TEXT CLASSIFICATION
+
+include ../../_includes/_mixins
+
++under-construction

From ab1a4e8b3c2426289ce0d0cb549e95494a53dfca Mon Sep 17 00:00:00 2001
From: ines <ines@ines.io>
Date: Sat, 22 Jul 2017 17:56:25 +0200
Subject: [PATCH 6/8] Add Tensorizer API docs stub

---
 website/docs/api/_data.json      | 7 +++++++
 website/docs/api/tensorizer.jade | 7 +++++++
 2 files changed, 14 insertions(+)
 create mode 100644 website/docs/api/tensorizer.jade

diff --git a/website/docs/api/_data.json b/website/docs/api/_data.json
index a2e447dc8..1102c679a 100644
--- a/website/docs/api/_data.json
+++ b/website/docs/api/_data.json
@@ -17,6 +17,7 @@
             "Span": "span",
             "Language": "language",
             "Tokenizer": "tokenizer",
+            "Tensorizer": "tensorizer",
             "Tagger": "tagger",
             "DependencyParser": "dependencyparser",
             "EntityRecognizer": "entityrecognizer",
@@ -147,6 +148,12 @@
         "source": "spacy/pipeline.pyx"
     },
 
+    "tensorizer": {
+        "title": "Tensorizer",
+        "tag": "class",
+        "source": "spacy/pipeline.pyx"
+    },
+
     "goldparse": {
         "title": "GoldParse",
         "tag": "class",
diff --git a/website/docs/api/tensorizer.jade b/website/docs/api/tensorizer.jade
new file mode 100644
index 000000000..9abd6793b
--- /dev/null
+++ b/website/docs/api/tensorizer.jade
@@ -0,0 +1,7 @@
+//- 💫 DOCS > API > TENSORIZER
+
+include ../../_includes/_mixins
+
+p Add a tensor with position-sensitive meaning representations to a #[code Doc].
+
++under-construction

From f085b88f9d61f701f4b7d937b584dfe6ef4fc35d Mon Sep 17 00:00:00 2001
From: ines <ines@ines.io>
Date: Sat, 22 Jul 2017 17:56:33 +0200
Subject: [PATCH 7/8] Add TextCategorizer API docs stub

---
 website/docs/api/_data.json           |  7 +++++++
 website/docs/api/textcategorizer.jade | 21 +++++++++++++++++++++
 2 files changed, 28 insertions(+)
 create mode 100644 website/docs/api/textcategorizer.jade

diff --git a/website/docs/api/_data.json b/website/docs/api/_data.json
index 1102c679a..e413f200c 100644
--- a/website/docs/api/_data.json
+++ b/website/docs/api/_data.json
@@ -21,6 +21,7 @@
             "Tagger": "tagger",
             "DependencyParser": "dependencyparser",
             "EntityRecognizer": "entityrecognizer",
+            "TextCategorizer": "textcategorizer",
             "Matcher": "matcher",
             "Lexeme": "lexeme",
             "Vocab": "vocab",
@@ -130,6 +131,12 @@
         "source": "spacy/pipeline.pyx"
     },
 
+    "textcategorizer": {
+        "title": "TextCategorizer",
+        "tag": "class",
+        "source": "spacy/pipeline.pyx"
+    },
+
     "dependencyparser": {
         "title": "DependencyParser",
         "tag": "class",
diff --git a/website/docs/api/textcategorizer.jade b/website/docs/api/textcategorizer.jade
new file mode 100644
index 000000000..926d957f7
--- /dev/null
+++ b/website/docs/api/textcategorizer.jade
@@ -0,0 +1,21 @@
+//- 💫 DOCS > API > TEXTCATEGORIZER
+
+include ../../_includes/_mixins
+
+p
+    |  Add text categorization models to spaCy pipelines. The model supports
+    |  classification with multiple, non-mutually exclusive labels.
+
+p
+    |  You can change the model architecture rather easily, but by default, the
+    |  #[code TextCategorizer] class uses a convolutional neural network to
+    |  assign position-sensitive vectors to each word in the document. This step
+    |  is similar to the #[+api("tensorizer") #[code Tensorizer]] component, but the
+    |  #[code TextCategorizer] uses its own CNN model, to avoid sharing weights
+    |  with the other pipeline components. The document tensor is then
+    |  summarized by concatenating max and mean pooling, and a multilayer
+    |  perceptron is used to predict an output vector of length #[code nr_class],
+    |  before a logistic activation is applied elementwise. The value of each
+    |  output neuron is the probability that some class is present.
+
++under-construction

From ab8ffbaab7f01524c6633e94d3c485704dbd4b2d Mon Sep 17 00:00:00 2001
From: ines <ines@ines.io>
Date: Sat, 22 Jul 2017 17:56:51 +0200
Subject: [PATCH 8/8] Add text classification to v2 overview

---
 website/docs/usage/v2.jade | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/website/docs/usage/v2.jade b/website/docs/usage/v2.jade
index bbcfe865f..d9727c62b 100644
--- a/website/docs/usage/v2.jade
+++ b/website/docs/usage/v2.jade
@@ -38,6 +38,7 @@ p
         +item #[+a("#summary") Summary]
         +item #[+a("#features") New features]
         +item #[+a("#features-pipelines") Improved processing pipelines]
+        +item #[+a("#features-text-classification") Text classification]
         +item #[+a("#features-hash-ids") Hash values instead of integer IDs]
         +item #[+a("#features-serializer") Saving, loading and serialization]
         +item #[+a("#features-displacy") displaCy visualizer]
@@ -102,6 +103,26 @@ p
     |  #[strong API:] #[+api("language") #[code Language]]
     |  #[strong Usage:] #[+a("/docs/usage/language-processing-pipeline") Processing text]
 
++h(3, "features-text-classification") Text classification
+
++aside-code("Example").
+    from spacy.lang.en import English
+    nlp = English(pipeline=['tensorizer', 'tagger', 'textcat'])
+
+p
+    |  spaCy v2.0 lets you add text categorization models to spaCy pipelines.
+    |  The model supports classification with multiple, non-mutually exclusive
+    |  labels – so multiple labels can apply at once. You can change the model
+    |  architecture rather easily, but by default, the #[code TextCategorizer]
+    |  class uses a convolutional neural network to assign position-sensitive
+    |  vectors to each word in the document.
+
++infobox
+    |  #[strong API:] #[+api("textcategorizer") #[code TextCategorizer]],
+    |  #[+api("doc#attributes") #[code Doc.cats]],
+    |  #[+api("goldparse#attributes") #[code GoldParse.cats]]#[br]
+    |  #[strong Usage:] #[+a("/docs/usage/text-classification") Text classification]
+
 +h(3, "features-hash-ids") Hash values instead of integer IDs
 
 +aside-code("Example").