diff --git a/website/docs/api/doc.jade b/website/docs/api/doc.jade
index bb56331f7..9b8392fcb 100644
--- a/website/docs/api/doc.jade
+++ b/website/docs/api/doc.jade
@@ -278,7 +278,8 @@ p Loads state from a directory. Modifies the object in place and returns it.
 
 +aside-code("Example").
     from spacy.tokens import Doc
-    doc = Doc().from_disk('/path/to/doc')
+    from spacy.vocab import Vocab
+    doc = Doc(Vocab()).from_disk('/path/to/doc')
 
 +table(["Name", "Type", "Description"])
     +row
diff --git a/website/docs/api/lexeme.jade b/website/docs/api/lexeme.jade
index dba6fdf59..a0487be9b 100644
--- a/website/docs/api/lexeme.jade
+++ b/website/docs/api/lexeme.jade
@@ -212,62 +212,74 @@ p The L2 norm of the lexeme's vector representation.
     +row
         +cell #[code is_alpha]
         +cell bool
-        +cell Equivalent to #[code word.orth_.isalpha()].
+        +cell
+            |  Does the lexeme consist of alphabetic characters? Equivalent to
+            |  #[code lexeme.text.isalpha()].
 
     +row
         +cell #[code is_ascii]
         +cell bool
-        +cell Equivalent to #[code [any(ord(c) >= 128 for c in word.orth_)]].
+        +cell
+            |  Does the lexeme consist of ASCII characters? Equivalent to
+            |  #[code [any(ord(c) >= 128 for c in lexeme.text)]].
 
     +row
         +cell #[code is_digit]
         +cell bool
-        +cell Equivalent to #[code word.orth_.isdigit()].
+        +cell
+            |  Does the lexeme consist of digits? Equivalent to
+            |  #[code lexeme.text.isdigit()].
 
     +row
         +cell #[code is_lower]
         +cell bool
-        +cell Equivalent to #[code word.orth_.islower()].
+        +cell
+            |  Is the lexeme in lowercase? Equivalent to
+            |  #[code lexeme.text.islower()].
 
     +row
         +cell #[code is_title]
         +cell bool
-        +cell Equivalent to #[code word.orth_.istitle()].
+        +cell
+            |  Is the lexeme in titlecase? Equivalent to
+            |  #[code lexeme.text.istitle()].
 
     +row
         +cell #[code is_punct]
         +cell bool
-        +cell Equivalent to #[code word.orth_.ispunct()].
+        +cell Is the lexeme punctuation?
 
     +row
         +cell #[code is_space]
         +cell bool
-        +cell Equivalent to #[code word.orth_.isspace()].
+        +cell
+            |  Does the lexeme consist of whitespace characters? Equivalent to
+            |  #[code lexeme.text.isspace()].
 
     +row
         +cell #[code like_url]
         +cell bool
-        +cell Does the word resemble a URL?
+        +cell Does the lexeme resemble a URL?
 
     +row
         +cell #[code like_num]
         +cell bool
-        +cell Does the word represent a number? e.g. “10.9”, “10”, “ten”, etc.
+        +cell Does the lexeme represent a number? e.g. "10.9", "10", "ten", etc.
 
     +row
         +cell #[code like_email]
         +cell bool
-        +cell Does the word resemble an email address?
+        +cell Does the lexeme resemble an email address?
 
     +row
         +cell #[code is_oov]
         +cell bool
-        +cell Is the word out-of-vocabulary?
+        +cell Is the lexeme out-of-vocabulary?
 
     +row
         +cell #[code is_stop]
         +cell bool
-        +cell Is the word part of a "stop list"?
+        +cell Is the lexeme part of a "stop list"?
 
     +row
         +cell #[code lang]
diff --git a/website/docs/api/matcher.jade b/website/docs/api/matcher.jade
index 541cceeda..e2972fdc0 100644
--- a/website/docs/api/matcher.jade
+++ b/website/docs/api/matcher.jade
@@ -5,13 +5,14 @@ include ../../_includes/_mixins
 p Match sequences of tokens, based on pattern rules.
 
 +infobox("⚠️ Deprecation note")
-    |  As of spaCy 2.0, #[code Matcher.add_pattern] and #[code Matcher.add_entity]
-    |  are deprecated and have been replaced with a simpler
-    |  #[+api("matcher#add") #[code Matcher.add]] that lets you add a list of
-    |  patterns and a callback for a given match ID. #[code Matcher.get_entity]
-    |  is now called #[+api("matcher#get") #[code matcher.get]].
-    |  #[code Matcher.load] (not useful, as it didn't allow specifying callbacks),
-    |  and #[code Matcher.has_entity] (now redundant) have been removed.
+    .o-block
+        |  As of spaCy 2.0, #[code Matcher.add_pattern] and #[code Matcher.add_entity]
+        |  are deprecated and have been replaced with a simpler
+        |  #[+api("matcher#add") #[code Matcher.add]] that lets you add a list of
+        |  patterns and a callback for a given match ID. #[code Matcher.get_entity]
+        |  is now called #[+api("matcher#get") #[code matcher.get]].
+        |  #[code Matcher.load] (not useful, as it didn't allow specifying callbacks),
+        |  and #[code Matcher.has_entity] (now redundant) have been removed.
 
 +h(2, "init") Matcher.__init__
     +tag method
@@ -56,17 +57,6 @@ p Find all token sequences matching the supplied patterns on the #[code Doc].
     doc = nlp(u'hello world!')
     matches = matcher(doc)
 
-+infobox("Important note")
-    |  By default, the matcher #[strong does not perform any action] on matches,
-    |  like tagging matched phrases with entity types. Instead, actions need to
-    |  be specified when #[strong adding patterns or entities], by
-    |  passing in a callback function as the #[code on_match] argument on
-    |  #[+api("matcher#add") #[code add]]. This allows you to define custom
-    |  actions per pattern within the same matcher. For example, you might only
-    |  want to merge some entity types, and set custom flags for other matched
-    |  patterns. For more details and examples, see the usage workflow on
-    |  #[+a("/docs/usage/rule-based-matching") rule-based matching].
-
 +table(["Name", "Type", "Description"])
     +row
         +cell #[code doc]
@@ -81,6 +71,17 @@ p Find all token sequences matching the supplied patterns on the #[code Doc].
             |  matches. A match tuple describes a span #[code doc[start:end]].
             |  The #[code match_id] is the ID of the added match pattern.
 
++infobox("Important note")
+    |  By default, the matcher #[strong does not perform any action] on matches,
+    |  like tagging matched phrases with entity types. Instead, actions need to
+    |  be specified when #[strong adding patterns or entities], by
+    |  passing in a callback function as the #[code on_match] argument on
+    |  #[+api("matcher#add") #[code add]]. This allows you to define custom
+    |  actions per pattern within the same matcher. For example, you might only
+    |  want to merge some entity types, and set custom flags for other matched
+    |  patterns. For more details and examples, see the usage workflow on
+    |  #[+a("/docs/usage/rule-based-matching") rule-based matching].
+
 +h(2, "pipe") Matcher.pipe
     +tag method
 
@@ -201,6 +202,20 @@ p
             |  Match pattern. A pattern consists of a list of dicts, where each
             |  dict describes a token.
 
++infobox("⚠️ Deprecation note")
+    .o-block
+        |  As of spaCy 2.0, #[code Matcher.add_pattern] and #[code Matcher.add_entity]
+        |  are deprecated and have been replaced with a simpler
+        |  #[+api("matcher#add") #[code Matcher.add]] that lets you add a list of
+        |  patterns and a callback for a given match ID.
+
+    +code-new.
+        matcher.add('GoogleNow', merge_phrases, [{ORTH: 'Google'}, {ORTH: 'Now'}])
+
+    +code-old.
+        matcher.add_entity('GoogleNow', on_match=merge_phrases)
+        matcher.add_pattern('GoogleNow', [{ORTH: 'Google'}, {ORTH: 'Now'}])
+
 +h(2, "remove") Matcher.remove
     +tag method
     +tag-new(2)
diff --git a/website/docs/api/spacy.jade b/website/docs/api/spacy.jade
index 6ad88c1a8..f2fcfde2c 100644
--- a/website/docs/api/spacy.jade
+++ b/website/docs/api/spacy.jade
@@ -20,12 +20,7 @@ p
     nlp = spacy.load('/path/to/en') # unicode path
     nlp = spacy.load(Path('/path/to/en')) # pathlib Path
 
-+infobox("⚠️ Deprecation note")
-    |  As of spaCy 2.0, the #[code path] keyword argument is deprecated. spaCy
-    |  will also raise an error if no model could be loaded and never just
-    |  return an empty #[code Language] object. If you need a blank language,
-    |  you need to import it explicitly (#[code from spacy.lang.en import English])
-    |  or use #[+api("util#get_lang_class") #[code util.get_lang_class]].
+    nlp = spacy.load('en', disable['parser', 'tagger'])
 
 +table(["Name", "Type", "Description"])
     +row
@@ -34,15 +29,28 @@ p
         +cell Model to load, i.e. shortcut link, package name or path.
 
     +row
-        +cell #[code **overrides]
-        +cell -
-        +cell Override or disable components.
+        +cell #[code disable]
+        +cell list
+        +cell
+            |  Names of pipeline components to
+            |  #[+a("/docs/usage/language-processing-pipeline#disabling") disable].
 
     +footrow
         +cell returns
         +cell #[code Language]
         +cell A #[code Language] object with the loaded model.
 
++infobox("⚠️ Deprecation note")
+    .o-block
+        |  As of spaCy 2.0, the #[code path] keyword argument is deprecated. spaCy
+        |  will also raise an error if no model could be loaded and never just
+        |  return an empty #[code Language] object. If you need a blank language,
+        |  you need to import it explicitly (#[code from spacy.lang.en import English])
+        |  or use #[+api("util#get_lang_class") #[code util.get_lang_class]].
+
+    +code-new nlp = spacy.load('/model')
+    +code-old nlp = spacy.load('en', path='/model')
+
 +h(2, "info") spacy.info
     +tag function
 
@@ -98,3 +106,37 @@ p
         +cell returns
         +cell unicode
         +cell The explanation, or #[code None] if not found in the glossary.
+
++h(2, "set_factory") spacy.set_factory
+    +tag function
+    +tag-new(2)
+
+p
+    |  Set a factory that returns a custom
+    |  #[+a("/docs/usage/language-processing-pipeline") processing pipeline]
+    |  component. Factories are useful for creating stateful components, especially ones which depend on shared data.
+
++aside-code("Example").
+    def my_factory(vocab):
+        def my_component(doc):
+            return doc
+        return my_component
+
+    spacy.set_factory('my_factory', my_factory)
+    nlp = Language(pipeline=['my_factory'])
+
++table(["Name", "Type", "Description"])
+    +row
+        +cell #[code factory_id]
+        +cell unicode
+        +cell
+            |  Unique name of factory. If added to a new pipeline, spaCy will
+            |  look up the factory for this ID and use it to create the
+            |  component.
+
+    +row
+        +cell #[code factory]
+        +cell callable
+        +cell
+            |  Callable that takes a #[code Vocab] object and returns a pipeline
+            |  component.
diff --git a/website/docs/api/stringstore.jade b/website/docs/api/stringstore.jade
index f684d48ad..f09352c79 100644
--- a/website/docs/api/stringstore.jade
+++ b/website/docs/api/stringstore.jade
@@ -119,7 +119,7 @@ p Save the current state to a directory.
             |  A path to a directory, which will be created if it doesn't exist.
             |  Paths may be either strings or #[code Path]-like objects.
 
-+h(2, "from_disk") Tokenizer.from_disk
++h(2, "from_disk") StringStore.from_disk
     +tag method
     +tag-new(2)
 
@@ -139,10 +139,10 @@ p Loads state from a directory. Modifies the object in place and returns it.
 
     +footrow
         +cell returns
-        +cell #[code Tokenizer]
-        +cell The modified #[code Tokenizer] object.
+        +cell #[code StringStore]
+        +cell The modified #[code StringStore] object.
 
-+h(2, "to_bytes") Tokenizer.to_bytes
++h(2, "to_bytes") StringStore.to_bytes
     +tag method
 
 p Serialize the current state to a binary string.
@@ -159,9 +159,9 @@ p Serialize the current state to a binary string.
     +footrow
         +cell returns
         +cell bytes
-        +cell The serialized form of the #[code Tokenizer] object.
+        +cell The serialized form of the #[code StringStore] object.
 
-+h(2, "from_bytes") Tokenizer.from_bytes
++h(2, "from_bytes") StringStore.from_bytes
     +tag method
 
 p Load state from a binary string.
diff --git a/website/docs/api/token.jade b/website/docs/api/token.jade
index 744446ec2..ee989047c 100644
--- a/website/docs/api/token.jade
+++ b/website/docs/api/token.jade
@@ -370,116 +370,131 @@ p The L2 norm of the token's vector representation.
         +cell #[code lemma]
         +cell int
         +cell
-            |  Base form of the word, with no inflectional suffixes.
+            |  Base form of the token, with no inflectional suffixes.
 
     +row
         +cell #[code lemma_]
         +cell unicode
-        +cell Base form of the word, with no inflectional suffixes.
+        +cell Base form of the token, with no inflectional suffixes.
 
     +row
         +cell #[code lower]
         +cell int
-        +cell Lower-case form of the word.
+        +cell Lower-case form of the token.
 
     +row
         +cell #[code lower_]
         +cell unicode
-        +cell Lower-case form of the word.
+        +cell Lower-case form of the token.
 
     +row
         +cell #[code shape]
         +cell int
-        +cell Transform of the word's string, to show orthographic features.
+        +cell
+            |  Transform of the tokens's string, to show orthographic features.
+            |  For example, "Xxxx" or "dd".
 
     +row
         +cell #[code shape_]
         +cell unicode
-        +cell A transform of the word's string, to show orthographic features.
+            |  Transform of the tokens's string, to show orthographic features.
+            |  For example, "Xxxx" or "dd".
 
     +row
         +cell #[code prefix]
         +cell int
         +cell Integer ID of a length-N substring from the start of the
-            |  word. Defaults to #[code N=1].
+            |  token. Defaults to #[code N=1].
 
     +row
         +cell #[code prefix_]
         +cell unicode
         +cell
-            |  A length-N substring from the start of the word. Defaults to
+            |  A length-N substring from the start of the token. Defaults to
             |  #[code N=1].
 
     +row
         +cell #[code suffix]
         +cell int
         +cell
-            |  Length-N substring from the end of the word. Defaults to #[code N=3].
+            |  Length-N substring from the end of the token. Defaults to #[code N=3].
 
     +row
         +cell #[code suffix_]
         +cell unicode
-        +cell Length-N substring from the end of the word. Defaults to #[code N=3].
+        +cell Length-N substring from the end of the token. Defaults to #[code N=3].
 
     +row
         +cell #[code is_alpha]
         +cell bool
-        +cell Equivalent to #[code word.orth_.isalpha()].
+        +cell
+            |  Does the token consist of alphabetic characters? Equivalent to
+            |  #[code token.text.isalpha()].
 
     +row
         +cell #[code is_ascii]
         +cell bool
-        +cell Equivalent to #[code [any(ord(c) >= 128 for c in word.orth_)]].
+        +cell
+            |  Does the token consist of ASCII characters? Equivalent to
+            |  #[code [any(ord(c) >= 128 for c in token.text)]].
 
     +row
         +cell #[code is_digit]
         +cell bool
-        +cell Equivalent to #[code word.orth_.isdigit()].
+        +cell
+            |  Does the token consist of digits? Equivalent to
+            |  #[code token.text.isdigit()].
 
     +row
         +cell #[code is_lower]
         +cell bool
-        +cell Equivalent to #[code word.orth_.islower()].
+        +cell
+            |  Is the token in lowercase? Equivalent to
+            |  #[code token.text.islower()].
 
     +row
         +cell #[code is_title]
         +cell bool
-        +cell Equivalent to #[code word.orth_.istitle()].
+        +cell
+            |  Is the token in titlecase? Equivalent to
+            |  #[code token.text.istitle()].
 
     +row
         +cell #[code is_punct]
         +cell bool
-        +cell Equivalent to #[code word.orth_.ispunct()].
+        +cell Is the token punctuation?
 
     +row
         +cell #[code is_space]
         +cell bool
-        +cell Equivalent to #[code word.orth_.isspace()].
+        +cell
+            |  Does the token consist of whitespace characters? Equivalent to
+            |  #[code token.text.isspace()].
 
     +row
         +cell #[code like_url]
         +cell bool
-        +cell Does the word resemble a URL?
+        +cell Does the token resemble a URL?
 
     +row
         +cell #[code like_num]
         +cell bool
-        +cell Does the word represent a number? e.g. “10.9”, “10”, “ten”, etc.
+        +cell Does the token represent a number? e.g. "10.9", "10", "ten", etc.
 
     +row
         +cell #[code like_email]
         +cell bool
-        +cell Does the word resemble an email address?
+        +cell Does the token resemble an email address?
 
     +row
         +cell #[code is_oov]
         +cell bool
-        +cell Is the word out-of-vocabulary?
+        +cell Is the token out-of-vocabulary?
 
     +row
         +cell #[code is_stop]
         +cell bool
-        +cell Is the word part of a "stop list"?
+        +cell Is the token part of a "stop list"?
 
     +row
         +cell #[code pos]