From 205c73a58914b3fd9aebdd0708582fb7a80fd625 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B6rn=20B=C3=B6ing?= <b.boeing@online.de>
Date: Wed, 10 Jul 2019 10:16:48 +0200
Subject: [PATCH] Update tokenizer and doc init example  (#3939)

* Fix Doc.to_json hyperlink

* Update tokenizer and doc init examples

* Change "matchin rules" to "punctuation rules"

* Auto-format
---
 spacy/tokens/doc.pyx          | 5 +++--
 website/docs/api/doc.md       | 2 +-
 website/docs/api/tokenizer.md | 8 +++++++-
 3 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/spacy/tokens/doc.pyx b/spacy/tokens/doc.pyx
index 131c43d37..373771247 100644
--- a/spacy/tokens/doc.pyx
+++ b/spacy/tokens/doc.pyx
@@ -85,13 +85,14 @@ cdef class Doc:
     Python-level `Token` and `Span` objects are views of this array, i.e.
     they don't own the data themselves.
 
-    EXAMPLE: Construction 1
+    EXAMPLE:
+        Construction 1
         >>> doc = nlp(u'Some text')
 
         Construction 2
         >>> from spacy.tokens import Doc
         >>> doc = Doc(nlp.vocab, words=[u'hello', u'world', u'!'],
-                      spaces=[True, False, False])
+        >>>           spaces=[True, False, False])
 
     DOCS: https://spacy.io/api/doc
     """
diff --git a/website/docs/api/doc.md b/website/docs/api/doc.md
index f5a94335f..bf9801564 100644
--- a/website/docs/api/doc.md
+++ b/website/docs/api/doc.md
@@ -264,7 +264,7 @@ ancestor is found, e.g. if span excludes a necessary ancestor.
 | ----------- | -------------------------------------- | ----------------------------------------------- |
 | **RETURNS** | `numpy.ndarray[ndim=2, dtype='int32']` | The lowest common ancestor matrix of the `Doc`. |
 
-## Doc.to_json {#to_json, tag="method" new="2.1"}
+## Doc.to_json {#to_json tag="method" new="2.1"}
 
 Convert a Doc to JSON. The format it produces will be the new format for the
 [`spacy train`](/api/cli#train) command (not implemented yet). If custom
diff --git a/website/docs/api/tokenizer.md b/website/docs/api/tokenizer.md
index 5bc0df625..67e67f5c9 100644
--- a/website/docs/api/tokenizer.md
+++ b/website/docs/api/tokenizer.md
@@ -9,7 +9,10 @@ Segment text, and create `Doc` objects with the discovered segment boundaries.
 
 ## Tokenizer.\_\_init\_\_ {#init tag="method"}
 
-Create a `Tokenizer`, to create `Doc` objects given unicode text.
+Create a `Tokenizer`, to create `Doc` objects given unicode text. For examples
+of how to construct a custom tokenizer with different tokenization rules, see
+the
+[usage documentation](https://spacy.io/usage/linguistic-features#native-tokenizers).
 
 > #### Example
 >
@@ -18,11 +21,14 @@ Create a `Tokenizer`, to create `Doc` objects given unicode text.
 > from spacy.tokenizer import Tokenizer
 > from spacy.lang.en import English
 > nlp = English()
+> # Create a blank Tokenizer with just the English vocab
 > tokenizer = Tokenizer(nlp.vocab)
 >
 > # Construction 2
 > from spacy.lang.en import English
 > nlp = English()
+> # Create a Tokenizer with the default settings for English
+> # including punctuation rules and exceptions
 > tokenizer = nlp.Defaults.create_tokenizer(nlp)
 > ```