Start updating website for v3 [ci skip]

2025-11-10 04:47:51 +03:00 · 2020-07-01 21:26:39 +02:00 · 2020-07-01 21:26:39 +02:00 · fe4cfd0632
commit fe4cfd0632
parent 94a0cf46fd
38 changed files with 325 additions and 1314 deletions
--- a/README.md
+++ b/README.md
@ -194,7 +194,7 @@ pip install https://github.com/explosion/spacy-models/releases/download/en_core_
 ### Loading and using models
-To load a model, use `spacy.load()` with the model name, a shortcut link or a
+To load a model, use `spacy.load()` with the model name or a
 path to the model data directory.
 ```python
--- a/netlify.toml
+++ b/netlify.toml
@ -38,6 +38,8 @@ redirects = [
    {from = "/docs/usage/showcase", to = "/universe", force = true},
    {from = "/tutorials/load-new-word-vectors", to = "/usage/vectors-similarity#custom", force = true},
    {from = "/tutorials", to = "/usage/examples", force = true},
    # Old documentation pages (v2.x)
    {from = "/usage/adding-languages", to = "/usage/linguistic-features", force = true},
    # Rewrite all other docs pages to /
    {from = "/docs/*", to = "/:splat"},
    # Updated documentation pages
--- a/spacy/cli/download.py
+++ b/spacy/cli/download.py
@ -16,7 +16,7 @@ from ..util import is_package, get_base_version, run_command
 def download_cli(
    # fmt: off
    ctx: typer.Context,
-    model: str = Arg(..., help="Model to download (shortcut or name)"),
+    model: str = Arg(..., help="Name of model to download"),
    direct: bool = Opt(False, "--direct", "-d", "-D", help="Force direct download of name + version"),
    # fmt: on
 ):
--- a/website/docs/api/cli.md
+++ b/website/docs/api/cli.md
@ -4,7 +4,6 @@ teaser: Download, train and package models, and debug spaCy
 source: spacy/cli
 menu:
  - ['Download', 'download']
  - ['Link', 'link']
  - ['Info', 'info']
  - ['Validate', 'validate']
  - ['Convert', 'convert']
@ -16,18 +15,16 @@ menu:
  - ['Package', 'package']
 ---
-As of v1.7.0, spaCy comes with new command line helpers to download and link
+For a list of available commands, type `spacy --help`.
-models and show useful debugging information. For a list of available commands,
+
-type `spacy --help`.
+<!-- TODO: add notes on autocompletion etc. -->
 ## Download {#download}
 Download [models](/usage/models) for spaCy. The downloader finds the
-best-matching compatible version, uses `pip install` to download the model as a
+best-matching compatible version and uses `pip install` to download the model as
-package and creates a [shortcut link](/usage/models#usage) if the model was
+a package. Direct downloads don't perform any compatibility checks and require
-downloaded via a shortcut. Direct downloads don't perform any compatibility
+the model name to be specified with its version (e.g. `en_core_web_sm-2.2.0`).
 checks and require the model name to be specified with its version (e.g.
 `en_core_web_sm-2.2.0`).
 > #### Downloading best practices
 >
@ -43,42 +40,13 @@ checks and require the model name to be specified with its version (e.g.
 $ python -m spacy download [model] [--direct] [pip args]
 ```
-| Argument                              | Type               | Description                                                                                                                                                                                                    |
+| Argument                              | Type       | Description                                                                                                                                                                                                    |
-| ------------------------------------- | ------------------ | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| ------------------------------------- | ---------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `model`                               | positional         | Model name or shortcut (`en`, `de`, `en_core_web_sm`).                                                                                                                                                         |
+| `model`                               | positional | Model name, e.g. `en_core_web_sm`..                                                                                                                                                                            |
-| `--direct`, `-d`                      | flag               | Force direct download of exact model version.                                                                                                                                                                  |
+| `--direct`, `-d`                      | flag       | Force direct download of exact model version.                                                                                                                                                                  |
-| pip args <Tag variant="new">2.1</Tag> | -                  | Additional installation options to be passed to `pip install` when installing the model package. For example, `--user` to install to the user home directory or `--no-deps` to not install model dependencies. |
+| pip args <Tag variant="new">2.1</Tag> | -          | Additional installation options to be passed to `pip install` when installing the model package. For example, `--user` to install to the user home directory or `--no-deps` to not install model dependencies. |
-| `--help`, `-h`                        | flag               | Show help message and available arguments.                                                                                                                                                                     |
+| `--help`, `-h`                        | flag       | Show help message and available arguments.                                                                                                                                                                     |
-| **CREATES**                           | directory, symlink | The installed model package in your `site-packages` directory and a shortcut link as a symlink in `spacy/data` if installed via shortcut.                                                                      |
+| **CREATES**                           | directory  | The installed model package in your `site-packages` directory.                                                                                                                                                 |
 ## Link {#link}
 Create a [shortcut link](/usage/models#usage) for a model, either a Python
 package or a local directory. This will let you load models from any location
 using a custom name via [`spacy.load()`](/api/top-level#spacy.load).
 <Infobox title="Important note" variant="warning">
 In spaCy v1.x, you had to use the model data directory to set up a shortcut link
 for a local path. As of v2.0, spaCy expects all shortcut links to be **loadable
 model packages**. If you want to load a data directory, call
 [`spacy.load()`](/api/top-level#spacy.load) or
 [`Language.from_disk()`](/api/language#from_disk) with the path, or use the
 [`package`](/api/cli#package) command to create a model package.
 </Infobox>
 ```bash
 $ python -m spacy link [origin] [link_name] [--force]
 ```
 | Argument        | Type       | Description                                                     |
 | --------------- | ---------- | --------------------------------------------------------------- |
 | `origin`        | positional | Model name if package, or path to local directory.              |
 | `link_name`     | positional | Name of the shortcut link to create.                            |
 | `--force`, `-f` | flag       | Force overwriting of existing link.                             |
 | `--help`, `-h`  | flag       | Show help message and available arguments.                      |
 | **CREATES**     | symlink    | A shortcut link of the given name as a symlink in `spacy/data`. |
 ## Info {#info}
@ -94,30 +62,28 @@ $ python -m spacy info [--markdown] [--silent]
 $ python -m spacy info [model] [--markdown] [--silent]
 ```
-| Argument                                         | Type       | Description                                                   |
+| Argument                                         | Type       | Description                                    |
-| ------------------------------------------------ | ---------- | ------------------------------------------------------------- |
+| ------------------------------------------------ | ---------- | ---------------------------------------------- |
-| `model`                                          | positional | A model, i.e. shortcut link, package name or path (optional). |
+| `model`                                          | positional | A model, i.e. package name or path (optional). |
-| `--markdown`, `-md`                              | flag       | Print information as Markdown.                                |
+| `--markdown`, `-md`                              | flag       | Print information as Markdown.                 |
-| `--silent`, `-s` <Tag variant="new">2.0.12</Tag> | flag       | Don't print anything, just return the values.                 |
+| `--silent`, `-s` <Tag variant="new">2.0.12</Tag> | flag       | Don't print anything, just return the values.  |
-| `--help`, `-h`                                   | flag       | Show help message and available arguments.                    |
+| `--help`, `-h`                                   | flag       | Show help message and available arguments.     |
-| **PRINTS**                                       | `stdout`   | Information about your spaCy installation.                    |
+| **PRINTS**                                       | `stdout`   | Information about your spaCy installation.     |
 ## Validate {#validate new="2"}
-Find all models installed in the current environment (both packages and shortcut
+Find all models installed in the current environment and check whether they are
-links) and check whether they are compatible with the currently installed
+compatible with the currently installed version of spaCy. Should be run after
-version of spaCy. Should be run after upgrading spaCy via `pip install -U spacy`
+upgrading spaCy via `pip install -U spacy` to ensure that all installed models
-to ensure that all installed models are can be used with the new version. The
+are can be used with the new version. It will show a list of models and their
-command is also useful to detect out-of-sync model links resulting from links
+installed versions. If any model is out of date, the latest compatible versions
-created in different virtual environments. It will show a list of models and
+and command for updating are shown.
 their installed versions. If any model is out of date, the latest compatible
 versions and command for updating are shown.
 > #### Automated validation
 >
 > You can also use the `validate` command as part of your build process or test
 > suite, to ensure all models are up to date before proceeding. If incompatible
-> models or shortcut links are found, it will return `1`.
+> models are found, it will return `1`.
 ```bash
 $ python -m spacy validate
@ -526,16 +492,6 @@ JSONL-formatted [vocabulary file](<(/api/annotation#vocab-jsonl)>) as
 `--jsonl-loc` with optional `id` values that correspond to the vectors table.
 Just loading in vectors will not automatically populate the vocab.
 <Infobox title="Deprecation note" variant="warning">
 As of v2.1.0, the `--freqs-loc` and `--clusters-loc` are deprecated and have
 been replaced with the `--jsonl-loc` argument, which lets you pass in a a
 [JSONL](http://jsonlines.org/) file containing one lexical entry per line. For
 more details on the format, see the
 [annotation specs](/api/annotation#vocab-jsonl).
 </Infobox>
 ```bash
 $ python -m spacy init-model [lang] [output_dir] [--jsonl-loc] [--vectors-loc]
 [--prune-vectors]
@ -569,7 +525,7 @@ $ python -m spacy evaluate [model] [data_path] [--displacy-path] [--displacy-lim
 | Argument                  | Type           | Description                                                                                                                                              |
 | ------------------------- | -------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `model`                   | positional     | Model to evaluate. Can be a package or shortcut link name, or a path to a model data directory.                                                          |
+| `model`                   | positional     | Model to evaluate. Can be a package or a path to a model data directory.                                                                                 |
 | `data_path`               | positional     | Location of JSON-formatted evaluation data.                                                                                                              |
 | `--displacy-path`, `-dp`  | option         | Directory to output rendered parses as HTML. If not set, no visualizations will be generated.                                                            |
 | `--displacy-limit`, `-dl` | option         | Number of parses to generate per file. Defaults to `25`. Keep in mind that a significantly higher number might cause the `.html` files to render slowly. |
--- a/website/docs/api/doc.md
+++ b/website/docs/api/doc.md
@ -202,8 +202,8 @@ the character indices don't map to a valid span.
 | ------------------------------------ | ---------------------------------------- | --------------------------------------------------------------------- |
 | `start`                              | int                                      | The index of the first character of the span.                         |
 | `end`                                | int                                      | The index of the last character after the span.                       |
-| `label`                              | uint64 / unicode                         | A label to attach to the span, e.g. for named entities.               |
+| `label`                              | uint64 / str                             | A label to attach to the span, e.g. for named entities.               |
-| `kb_id` <Tag variant="new">2.2</Tag> | uint64 / unicode                         | An ID from a knowledge base to capture the meaning of a named entity. |
+| `kb_id` <Tag variant="new">2.2</Tag> | uint64 / str                             | An ID from a knowledge base to capture the meaning of a named entity. |
 | `vector`                             | `numpy.ndarray[ndim=1, dtype='float32']` | A meaning representation of the span.                                 |
 | **RETURNS**                          | `Span`                                   | The newly constructed object or `None`.                               |
@ -297,15 +297,6 @@ They'll be added to an `"_"` key in the data, e.g. `"_": {"foo": "bar"}`.
 | `underscore` | list | Optional list of string names of custom JSON-serializable `doc._.` attributes. |
 | **RETURNS**  | dict | The JSON-formatted data.                                                       |
 <Infobox title="Deprecation note" variant="warning">
 spaCy previously implemented a `Doc.print_tree` method that returned a similar
 JSON-formatted representation of a `Doc`. As of v2.1, this method is deprecated
 in favor of `Doc.to_json`. If you need more complex nested representations, you
 might want to write your own function to extract the data.
 </Infobox>
 ## Doc.to_array {#to_array tag="method"}
 Export given token attributes to a numpy `ndarray`. If `attr_ids` is a sequence
@ -507,14 +498,6 @@ underlying lexeme (if they're context-independent lexical attributes like
 ## Doc.merge {#merge tag="method"}
 <Infobox title="Deprecation note" variant="danger">
 As of v2.1.0, `Doc.merge` still works but is considered deprecated. You should
 use the new and less error-prone [`Doc.retokenize`](/api/doc#retokenize)
 instead.
 </Infobox>
 Retokenize the document, such that the span at `doc.text[start_idx : end_idx]`
 is merged into a single token. If `start_idx` and `end_idx` do not mark start
 and end token boundaries, the document remains unchanged.
@ -646,26 +629,26 @@ The L2 norm of the document's vector representation.
 ## Attributes {#attributes}
-| Name                                    | Type         | Description                                                                                                                                                                                                                                                                                |
+| Name                                    | Type         | Description                                                                                                                                                                     |
-| --------------------------------------- | ------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
+| --------------------------------------- | ------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `text`                                  | str          | A unicode representation of the document text.                                                                                                                                                                                                                                             |
+| `text`                                  | str          | A string representation of the document text.                                                                                                                                   |
-| `text_with_ws`                          | str          | An alias of `Doc.text`, provided for duck-type compatibility with `Span` and `Token`.                                                                                                                                                                                                      |
+| `text_with_ws`                          | str          | An alias of `Doc.text`, provided for duck-type compatibility with `Span` and `Token`.                                                                                           |
-| `mem`                                   | `Pool`       | The document's local memory heap, for all C data it owns.                                                                                                                                                                                                                                  |
+| `mem`                                   | `Pool`       | The document's local memory heap, for all C data it owns.                                                                                                                       |
-| `vocab`                                 | `Vocab`      | The store of lexical types.                                                                                                                                                                                                                                                                |
+| `vocab`                                 | `Vocab`      | The store of lexical types.                                                                                                                                                     |
-| `tensor` <Tag variant="new">2</Tag>     | `ndarray`    | Container for dense vector representations.                                                                                                                                                                                                                                                |
+| `tensor` <Tag variant="new">2</Tag>     | `ndarray`    | Container for dense vector representations.                                                                                                                                     |
-| `cats` <Tag variant="new">2</Tag>       | dict         | Maps a label to a score for categories applied to the document. The label is a string and the score should be a float.                                                                                     |
+| `cats` <Tag variant="new">2</Tag>       | dict         | Maps a label to a score for categories applied to the document. The label is a string and the score should be a float.                                                          |
-| `user_data`                             | -            | A generic storage area, for user custom data.                                                                                                                                                                                                                                              |
+| `user_data`                             | -            | A generic storage area, for user custom data.                                                                                                                                   |
-| `lang` <Tag variant="new">2.1</Tag>     | int          | Language of the document's vocabulary.                                                                                                                                                                                                                                                     |
+| `lang` <Tag variant="new">2.1</Tag>     | int          | Language of the document's vocabulary.                                                                                                                                          |
-| `lang_` <Tag variant="new">2.1</Tag>    | str          | Language of the document's vocabulary.                                                                                                                                                                                                                                                     |
+| `lang_` <Tag variant="new">2.1</Tag>    | str          | Language of the document's vocabulary.                                                                                                                                          |
-| `is_tagged`                             | bool         | A flag indicating that the document has been part-of-speech tagged. Returns `True` if the `Doc` is empty.                                                                                                                                                                                  |
+| `is_tagged`                             | bool         | A flag indicating that the document has been part-of-speech tagged. Returns `True` if the `Doc` is empty.                                                                       |
-| `is_parsed`                             | bool         | A flag indicating that the document has been syntactically parsed. Returns `True` if the `Doc` is empty.                                                                                                                                                                                   |
+| `is_parsed`                             | bool         | A flag indicating that the document has been syntactically parsed. Returns `True` if the `Doc` is empty.                                                                        |
-| `is_sentenced`                          | bool         | A flag indicating that sentence boundaries have been applied to the document. Returns `True` if the `Doc` is empty.                                                                                                                                                                        |
+| `is_sentenced`                          | bool         | A flag indicating that sentence boundaries have been applied to the document. Returns `True` if the `Doc` is empty.                                                             |
-| `is_nered` <Tag variant="new">2.1</Tag> | bool         | A flag indicating that named entities have been set. Will return `True` if the `Doc` is empty, or if _any_ of the tokens has an entity tag set, even if the others are unknown.                                                                                                            |
+| `is_nered` <Tag variant="new">2.1</Tag> | bool         | A flag indicating that named entities have been set. Will return `True` if the `Doc` is empty, or if _any_ of the tokens has an entity tag set, even if the others are unknown. |
-| `sentiment`                             | float        | The document's positivity/negativity score, if available.                                                                                                                                                                                                                                  |
+| `sentiment`                             | float        | The document's positivity/negativity score, if available.                                                                                                                       |
-| `user_hooks`                            | dict         | A dictionary that allows customization of the `Doc`'s properties.                                                                                                                                                                                                                          |
+| `user_hooks`                            | dict         | A dictionary that allows customization of the `Doc`'s properties.                                                                                                               |
-| `user_token_hooks`                      | dict         | A dictionary that allows customization of properties of `Token` children.                                                                                                                                                                                                                  |
+| `user_token_hooks`                      | dict         | A dictionary that allows customization of properties of `Token` children.                                                                                                       |
-| `user_span_hooks`                       | dict         | A dictionary that allows customization of properties of `Span` children.                                                                                                                                                                                                                   |
+| `user_span_hooks`                       | dict         | A dictionary that allows customization of properties of `Span` children.                                                                                                        |
-| `_`                                     | `Underscore` | User space for adding custom [attribute extensions](/usage/processing-pipelines#custom-components-attributes).                                                                                                                                                                             |
+| `_`                                     | `Underscore` | User space for adding custom [attribute extensions](/usage/processing-pipelines#custom-components-attributes).                                                                  |
 ## Serialization fields {#serialization-fields}
--- a/website/docs/api/docbin.md
+++ b/website/docs/api/docbin.md
@ -22,7 +22,7 @@ the msgpack object has the following structure:
    "tokens": bytes,          # Serialized numpy uint64 array with the token data
    "spaces": bytes,          # Serialized numpy boolean array with spaces data
    "lengths": bytes,         # Serialized numpy int32 array with the doc lengths
-    "strings": List[unicode]  # List of unique strings in the token data
+    "strings": List[str]      # List of unique strings in the token data
 }
 ```
--- a/website/docs/api/entityruler.md
+++ b/website/docs/api/entityruler.md
@ -36,7 +36,7 @@ be a token pattern (list) or a phrase pattern (string). For example:
 | --------------------- | ------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------- |
 | `nlp`                 | `Language`    | The shared nlp object to pass the vocab to the matchers and process phrase patterns.                                                                  |
 | `patterns`            | iterable      | Optional patterns to load in.                                                                                                                         |
-| `phrase_matcher_attr` | int / unicode | Optional attr to pass to the internal [`PhraseMatcher`](/api/phrasematcher). defaults to `None`                                                       |
+| `phrase_matcher_attr` | int / str     | Optional attr to pass to the internal [`PhraseMatcher`](/api/phrasematcher). defaults to `None`                                                       |
 | `validate`            | bool          | Whether patterns should be validated, passed to Matcher and PhraseMatcher as `validate`. Defaults to `False`.                                         |
 | `overwrite_ents`      | bool          | If existing entities are present, e.g. entities added by the model, overwrite them by matches if necessary. Defaults to `False`.                      |
 | `**cfg`               | -             | Other config parameters. If pipeline component is loaded as part of a model pipeline, this will include all keyword arguments passed to `spacy.load`. |
--- a/website/docs/api/goldparse.md
+++ b/website/docs/api/goldparse.md
@ -15,7 +15,7 @@ missing – the gradient for those labels will be zero.
 | Name              | Type        | Description                                                                                                                                                                                                                            |
 | ----------------- | ----------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
 | `doc`             | `Doc`       | The document the annotations refer to.                                                                                                                                                                                                 |
-| `words`           | iterable    | A sequence of unicode word strings.                                                                                                                                                                                                    |
+| `words`           | iterable    | A sequence of word strings.                                                                                                                                                                                                            |
 | `tags`            | iterable    | A sequence of strings, representing tag annotations.                                                                                                                                                                                   |
 | `heads`           | iterable    | A sequence of integers, representing syntactic head offsets.                                                                                                                                                                           |
 | `deps`            | iterable    | A sequence of strings, representing the syntactic relation types.                                                                                                                                                                      |
@ -136,14 +136,13 @@ The returned tuple contains the following alignment information:
 Encode labelled spans into per-token tags, using the
 [BILUO scheme](/api/annotation#biluo) (Begin, In, Last, Unit, Out). Returns a
-list of unicode strings, describing the tags. Each tag string will be of the
+list of strings, describing the tags. Each tag string will be of the form of
-form of either `""`, `"O"` or `"{action}-{label}"`, where action is one of
+either `""`, `"O"` or `"{action}-{label}"`, where action is one of `"B"`, `"I"`,
-`"B"`, `"I"`, `"L"`, `"U"`. The string `"-"` is used where the entity offsets
+`"L"`, `"U"`. The string `"-"` is used where the entity offsets don't align with
-don't align with the tokenization in the `Doc` object. The training algorithm
+the tokenization in the `Doc` object. The training algorithm will view these as
-will view these as missing values. `O` denotes a non-entity token. `B` denotes
+missing values. `O` denotes a non-entity token. `B` denotes the beginning of a
-the beginning of a multi-token entity, `I` the inside of an entity of three or
+multi-token entity, `I` the inside of an entity of three or more tokens, and `L`
-more tokens, and `L` the end of an entity of two or more tokens. `U` denotes a
+the end of an entity of two or more tokens. `U` denotes a single-token entity.
 single-token entity.
 > #### Example
 >
--- a/website/docs/api/language.md
+++ b/website/docs/api/language.md
@ -72,17 +72,6 @@ Pipeline components to prevent from being loaded can now be added as a list to
 Process texts as a stream, and yield `Doc` objects in order. This is usually
 more efficient than processing texts one-by-one.
 <Infobox title="Important note for spaCy v2.0.x" variant="danger">
 Early versions of spaCy used simple statistical models that could be efficiently
 multi-threaded, as we were able to entirely release Python's global interpreter
 lock. The multi-threading was controlled using the `n_threads` keyword argument
 to the `.pipe` method. This keyword argument is now deprecated as of v2.1.0. A
 new keyword argument, `n_process`, was introduced to control parallel inference
 via multiprocessing in v2.2.2.
 </Infobox>
 > #### Example
 >
 > ```python
@ -91,15 +80,15 @@ via multiprocessing in v2.2.2.
 >     assert doc.is_parsed
 > ```
-| Name                                         | Type  | Description                                                                                                                                                |
+| Name                                         | Type     | Description                                                                                                                                                |
-| -------------------------------------------- | ----- | ---------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| -------------------------------------------- | -------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `texts`                                      | -     | A sequence of unicode objects.                                                                                                                             |
+| `texts`                                      | iterable | A sequence of strings.                                                                                                                                     |
-| `as_tuples`                                  | bool  | If set to `True`, inputs should be a sequence of `(text, context)` tuples. Output will then be a sequence of `(doc, context)` tuples. Defaults to `False`. |
+| `as_tuples`                                  | bool     | If set to `True`, inputs should be a sequence of `(text, context)` tuples. Output will then be a sequence of `(doc, context)` tuples. Defaults to `False`. |
-| `batch_size`                                 | int   | The number of texts to buffer.                                                                                                                             |
+| `batch_size`                                 | int      | The number of texts to buffer.                                                                                                                             |
-| `disable`                                    | list  | Names of pipeline components to [disable](/usage/processing-pipelines#disabling).                                                                          |
+| `disable`                                    | list     | Names of pipeline components to [disable](/usage/processing-pipelines#disabling).                                                                          |
-| `component_cfg` <Tag variant="new">2.1</Tag> | dict  | Config parameters for specific pipeline components, keyed by component name.                                                                               |
+| `component_cfg` <Tag variant="new">2.1</Tag> | dict     | Config parameters for specific pipeline components, keyed by component name.                                                                               |
-| `n_process` <Tag variant="new">2.2.2</Tag>   | int   | Number of processors to use, only supported in Python 3. Defaults to `1`.                                                                                  |
+| `n_process` <Tag variant="new">2.2.2</Tag>   | int      | Number of processors to use, only supported in Python 3. Defaults to `1`.                                                                                  |
-| **YIELDS**                                   | `Doc` | Documents in the order of the original text.                                                                                                               |
+| **YIELDS**                                   | `Doc`    | Documents in the order of the original text.                                                                                                               |
 ## Language.update {#update tag="method"}
@ -116,7 +105,7 @@ Update the models in the pipeline.
 | Name                                         | Type     | Description                                                                                                                                                                                                         |
 | -------------------------------------------- | -------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `docs`                                       | iterable | A batch of `Doc` objects or unicode. If unicode, a `Doc` object will be created from the text.                                                                                                                      |
+| `docs`                                       | iterable | A batch of `Doc` objects or strings. If strings, a `Doc` object will be created from the text.                                                                                                                      |
 | `golds`                                      | iterable | A batch of `GoldParse` objects or dictionaries. Dictionaries will be used to create [`GoldParse`](/api/goldparse) objects. For the available keys and their usage, see [`GoldParse.__init__`](/api/goldparse#init). |
 | `drop`                                       | float    | The dropout rate.                                                                                                                                                                                                   |
 | `sgd`                                        | callable | An optimizer.                                                                                                                                                                                                       |
@ -134,14 +123,14 @@ Evaluate a model's pipeline components.
 > print(scorer.scores)
 > ```
-| Name                                         | Type     | Description                                                                                                                                                   |
+| Name                                         | Type     | Description                                                                                                                                                                                                                                                                                |
-| -------------------------------------------- | -------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| -------------------------------------------- | -------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
 | `docs_golds`                                 | iterable | Tuples of `Doc` and `GoldParse` objects, such that the `Doc` objects contain the predictions and the `GoldParse` objects the correct annotations. Alternatively, `(text, annotations)` tuples of raw text and a dict (see [simple training style](/usage/training#training-simple-style)). |
-| `verbose`                                    | bool     | Print debugging information.                                                                                                                                  |
+| `verbose`                                    | bool     | Print debugging information.                                                                                                                                                                                                                                                               |
-| `batch_size`                                 | int      | The batch size to use.                                                                                                                                        |
+| `batch_size`                                 | int      | The batch size to use.                                                                                                                                                                                                                                                                     |
-| `scorer`                                     | `Scorer` | Optional [`Scorer`](/api/scorer) to use. If not passed in, a new one will be created.                                                                         |
+| `scorer`                                     | `Scorer` | Optional [`Scorer`](/api/scorer) to use. If not passed in, a new one will be created.                                                                                                                                                                                                      |
-| `component_cfg` <Tag variant="new">2.1</Tag> | dict     | Config parameters for specific pipeline components, keyed by component name.                                                                                  |
+| `component_cfg` <Tag variant="new">2.1</Tag> | dict     | Config parameters for specific pipeline components, keyed by component name.                                                                                                                                                                                                               |
-| **RETURNS**                                  | Scorer   | The scorer containing the evaluation scores.                                                                                                                  |
+| **RETURNS**                                  | Scorer   | The scorer containing the evaluation scores.                                                                                                                                                                                                                                               |
 ## Language.begin_training {#begin_training tag="method"}
@ -400,20 +389,6 @@ loaded object.
 | `exclude`   | list         | Names of pipeline components or [serialization fields](#serialization-fields) to exclude. |
 | **RETURNS** | `Language`   | The modified `Language` object.                                                           |
 <Infobox title="Changed in v2.0" variant="warning">
 As of spaCy v2.0, the `save_to_directory` method has been renamed to `to_disk`,
 to improve consistency across classes. Pipeline components to prevent from being
 loaded can now be added as a list to `disable` (v2.0) or `exclude` (v2.1),
 instead of specifying one keyword argument per component.
 ```diff
 - nlp = spacy.load("en", tagger=False, entity=False)
 + nlp = English().from_disk("/model", exclude=["tagger", "ner"])
 ```
 </Infobox>
 ## Language.to_bytes {#to_bytes tag="method"}
 Serialize the current state to a binary string.
@ -470,7 +445,7 @@ per component.
 | ------------------------------------------ | ----------- | ----------------------------------------------------------------------------------------------- |
 | `vocab`                                    | `Vocab`     | A container for the lexical types.                                                              |
 | `tokenizer`                                | `Tokenizer` | The tokenizer.                                                                                  |
-| `make_doc`                                 | `callable`  | Callable that takes a unicode text and returns a `Doc`.                                         |
+| `make_doc`                                 | `callable`  | Callable that takes a string and returns a `Doc`.                                               |
 | `pipeline`                                 | list        | List of `(name, component)` tuples describing the current processing pipeline, in order.        |
 | `pipe_names` <Tag variant="new">2</Tag>    | list        | List of pipeline component names, in order.                                                     |
 | `pipe_labels` <Tag variant="new">2.2</Tag> | dict        | List of labels set by the pipeline components, if available, keyed by component name.           |
--- a/website/docs/api/lemmatizer.md
+++ b/website/docs/api/lemmatizer.md
@ -31,20 +31,6 @@ when a `Language` subclass and its `Vocab` is initialized.
 | `lookups` <Tag variant="new">2.2</Tag> | [`Lookups`](/api/lookups) | The lookups object containing the (optional) tables `"lemma_rules"`, `"lemma_index"`, `"lemma_exc"` and `"lemma_lookup"`. |
 | **RETURNS**                            | `Lemmatizer`              | The newly created object.                                                                                                 |
 <Infobox title="Deprecation note" variant="danger">
 As of v2.2, the lemmatizer is initialized with a [`Lookups`](/api/lookups)
 object containing tables for the different components. This makes it easier for
 spaCy to share and serialize rules and lookup tables via the `Vocab`, and allows
 users to modify lemmatizer data at runtime by updating `nlp.vocab.lookups`.
 ```diff
 - lemmatizer = Lemmatizer(rules=lemma_rules)
 + lemmatizer = Lemmatizer(lookups)
 ```
 </Infobox>
 ## Lemmatizer.\_\_call\_\_ {#call tag="method"}
 Lemmatize a string.
--- a/website/docs/api/phrasematcher.md
+++ b/website/docs/api/phrasematcher.md
@ -39,7 +39,7 @@ be shown.
 | --------------------------------------- | --------------- | ------------------------------------------------------------------------------------------- |
 | `vocab`                                 | `Vocab`         | The vocabulary object, which must be shared with the documents the matcher will operate on. |
 | `max_length`                            | int             | Deprecated argument - the `PhraseMatcher` does not have a phrase length limit anymore.      |
-| `attr` <Tag variant="new">2.1</Tag>     | int / unicode   | The token attribute to match on. Defaults to `ORTH`, i.e. the verbatim token text.          |
+| `attr` <Tag variant="new">2.1</Tag>     | int / str       | The token attribute to match on. Defaults to `ORTH`, i.e. the verbatim token text.          |
 | `validate` <Tag variant="new">2.1</Tag> | bool            | Validate patterns added to the matcher.                                                     |
 | **RETURNS**                             | `PhraseMatcher` | The newly constructed object.                                                               |
--- a/website/docs/api/span.md
+++ b/website/docs/api/span.md
@ -18,15 +18,15 @@ Create a Span object from the slice `doc[start : end]`.
 > assert [t.text for t in span] ==  ["it", "back", "!"]
 > ```
-| Name        | Type                                     | Description                                                                                                       |
+| Name        | Type                                     | Description                                                                                               |
-| ----------- | ---------------------------------------- | ----------------------------------------------------------------------------------------------------------------- |
+| ----------- | ---------------------------------------- | --------------------------------------------------------------------------------------------------------- |
-| `doc`       | `Doc`                                    | The parent document.                                                                                              |
+| `doc`       | `Doc`                                    | The parent document.                                                                                      |
-| `start`     | int                                      | The index of the first token of the span.                                                                         |
+| `start`     | int                                      | The index of the first token of the span.                                                                 |
-| `end`       | int                                      | The index of the first token after the span.                                                                      |
+| `end`       | int                                      | The index of the first token after the span.                                                              |
-| `label`     | int / unicode                            | A label to attach to the span, e.g. for named entities. As of v2.1, the label can also be a unicode string.       |
+| `label`     | int / str                                | A label to attach to the span, e.g. for named entities. As of v2.1, the label can also be a string.       |
-| `kb_id`     | int / unicode                            | A knowledge base ID to attach to the span, e.g. for named entities. The ID can be an integer or a unicode string. |
+| `kb_id`     | int / str                                | A knowledge base ID to attach to the span, e.g. for named entities. The ID can be an integer or a string. |
-| `vector`    | `numpy.ndarray[ndim=1, dtype='float32']` | A meaning representation of the span.                                                                             |
+| `vector`    | `numpy.ndarray[ndim=1, dtype='float32']` | A meaning representation of the span.                                                                     |
-| **RETURNS** | `Span`                                   | The newly constructed object.                                                                                     |
+| **RETURNS** | `Span`                                   | The newly constructed object.                                                                             |
 ## Span.\_\_getitem\_\_ {#getitem tag="method"}
@ -189,8 +189,8 @@ the character indices don't map to a valid span.
 | ----------- | ---------------------------------------- | --------------------------------------------------------------------- |
 | `start`     | int                                      | The index of the first character of the span.                         |
 | `end`       | int                                      | The index of the last character after the span.                       |
-| `label`     | uint64 / unicode                         | A label to attach to the span, e.g. for named entities.               |
+| `label`     | uint64 / str                             | A label to attach to the span, e.g. for named entities.               |
-| `kb_id`     | uint64 / unicode                         | An ID from a knowledge base to capture the meaning of a named entity. |
+| `kb_id`     | uint64 / str                             | An ID from a knowledge base to capture the meaning of a named entity. |
 | `vector`    | `numpy.ndarray[ndim=1, dtype='float32']` | A meaning representation of the span.                                 |
 | **RETURNS** | `Span`                                   | The newly constructed object or `None`.                               |
@ -255,33 +255,6 @@ shape `(N, M)`, where `N` is the length of the document. The values will be
 | `attr_ids`  | list                          | A list of attribute ID ints.                                                                             |
 | **RETURNS** | `numpy.ndarray[long, ndim=2]` | A feature matrix, with one row per word, and one column per attribute indicated in the input `attr_ids`. |
 ## Span.merge {#merge tag="method"}
 <Infobox title="Deprecation note" variant="danger">
 As of v2.1.0, `Span.merge` still works but is considered deprecated. You should
 use the new and less error-prone [`Doc.retokenize`](/api/doc#retokenize)
 instead.
 </Infobox>
 Retokenize the document, such that the span is merged into a single token.
 > #### Example
 >
 > ```python
 > doc = nlp("I like New York in Autumn.")
 > span = doc[2:4]
 > span.merge()
 > assert len(doc) == 6
 > assert doc[2].text == "New York"
 > ```
 | Name           | Type    | Description                                                                                                               |
 | -------------- | ------- | ------------------------------------------------------------------------------------------------------------------------- |
 | `**attributes` | -       | Attributes to assign to the merged token. By default, attributes are inherited from the syntactic root token of the span. |
 | **RETURNS**    | `Token` | The newly merged token.                                                                                                   |
 ## Span.ents {#ents tag="property" new="2.0.13" model="ner"}
 The named entities in the span. Returns a tuple of named entity `Span` objects,
@ -497,7 +470,7 @@ The L2 norm of the span's vector representation.
 | `end`                                   | int          | The token offset for the end of the span.                                                                      |
 | `start_char`                            | int          | The character offset for the start of the span.                                                                |
 | `end_char`                              | int          | The character offset for the end of the span.                                                                  |
-| `text`                                  | str          | A unicode representation of the span text.                                                                     |
+| `text`                                  | str          | A string representation of the span text.                                                                      |
 | `text_with_ws`                          | str          | The text content of the span with a trailing whitespace character if the last token has one.                   |
 | `orth`                                  | int          | ID of the verbatim text content.                                                                               |
 | `orth_`                                 | str          | Verbatim text content (identical to `Span.text`). Exists mostly for consistency with the other attributes.     |
--- a/website/docs/api/stringstore.md
+++ b/website/docs/api/stringstore.md
@ -19,10 +19,10 @@ Create the `StringStore`.
 > stringstore = StringStore(["apple", "orange"])
 > ```
-| Name        | Type          | Description                                        |
+| Name        | Type          | Description                                |
-| ----------- | ------------- | -------------------------------------------------- |
+| ----------- | ------------- | ------------------------------------------ |
-| `strings`   | iterable      | A sequence of unicode strings to add to the store. |
+| `strings`   | iterable      | A sequence of strings to add to the store. |
-| **RETURNS** | `StringStore` | The newly constructed object.                      |
+| **RETURNS** | `StringStore` | The newly constructed object.              |
 ## StringStore.\_\_len\_\_ {#len tag="method"}
@ -52,10 +52,10 @@ Retrieve a string from a given hash, or vice versa.
 > assert stringstore[apple_hash] == "apple"
 > ```
-| Name           | Type                     | Description                |
+| Name           | Type                 | Description                |
-| -------------- | ------------------------ | -------------------------- |
+| -------------- | -------------------- | -------------------------- |
-| `string_or_id` | bytes, unicode or uint64 | The value to encode.       |
+| `string_or_id` | bytes, str or uint64 | The value to encode.       |
-| **RETURNS**    | str or int               | The value to be retrieved. |
+| **RETURNS**    | str or int           | The value to be retrieved. |
 ## StringStore.\_\_contains\_\_ {#contains tag="method"}
--- a/website/docs/api/token.md
+++ b/website/docs/api/token.md
@ -58,7 +58,7 @@ For details, see the documentation on
 | Name      | Type     | Description                                                                                                                             |
 | --------- | -------- | --------------------------------------------------------------------------------------------------------------------------------------- |
-| `name`    | unicode  | Name of the attribute to set by the extension. For example, `'my_attr'` will be available as `token._.my_attr`.                         |
+| `name`    | str      | Name of the attribute to set by the extension. For example, `'my_attr'` will be available as `token._.my_attr`.                         |
 | `default` | -        | Optional default value of the attribute if no getter or method is defined.                                                              |
 | `method`  | callable | Set a custom method on the object, for example `token._.compare(other_token)`.                                                          |
 | `getter`  | callable | Getter function that takes the object and returns an attribute value. Is called when the user accesses the `._` attribute.              |
@ -80,10 +80,10 @@ Look up a previously registered extension by name. Returns a 4-tuple
 > assert extension == (False, None, None, None)
 > ```
-| Name        | Type    | Description                                                   |
+| Name        | Type  | Description                                                   |
-| ----------- | ------- | ------------------------------------------------------------- |
+| ----------- | ----- | ------------------------------------------------------------- |
-| `name`      | unicode | Name of the extension.                                        |
+| `name`      | str   | Name of the extension.                                        |
-| **RETURNS** | tuple   | A `(default, method, getter, setter)` tuple of the extension. |
+| **RETURNS** | tuple | A `(default, method, getter, setter)` tuple of the extension. |
 ## Token.has_extension {#has_extension tag="classmethod" new="2"}
@ -97,10 +97,10 @@ Check whether an extension has been registered on the `Token` class.
 > assert Token.has_extension("is_fruit")
 > ```
-| Name        | Type    | Description                                |
+| Name        | Type | Description                                |
-| ----------- | ------- | ------------------------------------------ |
+| ----------- | ---- | ------------------------------------------ |
-| `name`      | unicode | Name of the extension to check.            |
+| `name`      | str  | Name of the extension to check.            |
-| **RETURNS** | bool    | Whether the extension has been registered. |
+| **RETURNS** | bool | Whether the extension has been registered. |
 ## Token.remove_extension {#remove_extension tag="classmethod" new=""2.0.11""}
@ -115,10 +115,10 @@ Remove a previously registered extension.
 > assert not Token.has_extension("is_fruit")
 > ```
-| Name        | Type    | Description                                                           |
+| Name        | Type  | Description                                                           |
-| ----------- | ------- | --------------------------------------------------------------------- |
+| ----------- | ----- | --------------------------------------------------------------------- |
-| `name`      | unicode | Name of the extension.                                                |
+| `name`      | str   | Name of the extension.                                                |
-| **RETURNS** | tuple   | A `(default, method, getter, setter)` tuple of the removed extension. |
+| **RETURNS** | tuple | A `(default, method, getter, setter)` tuple of the removed extension. |
 ## Token.check_flag {#check_flag tag="method"}
@ -339,21 +339,6 @@ unknown. Defaults to `True` for the first token in the `Doc`.
 | ----------- | ---- | ------------------------------------ |
 | **RETURNS** | bool | Whether the token starts a sentence. |
 <Infobox title="Changed in v2.0" variant="warning">
 As of spaCy v2.0, the `Token.sent_start` property is deprecated and has been
 replaced with `Token.is_sent_start`, which returns a boolean value instead of a
 misleading `0` for `False` and `1` for `True`. It also now returns `None` if the
 answer is unknown, and fixes a quirk in the old logic that would always set the
 property to `0` for the first word of the document.
 ```diff
 - assert doc[4].sent_start == 1
 + assert doc[4].is_sent_start == True
 ```
 </Infobox>
 ## Token.has_vector {#has_vector tag="property" model="vectors"}
 A boolean value indicating whether a word vector is associated with the token.
@ -412,11 +397,11 @@ The L2 norm of the token's vector representation.
 | -------------------------------------------- | ------------ | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
 | `doc`                                        | `Doc`        | The parent document.                                                                                                                                                                                                                                           |
 | `sent` <Tag variant="new">2.0.12</Tag>       | `Span`       | The sentence span that this token is a part of.                                                                                                                                                                                                                |
-| `text`                                       | unicode      | Verbatim text content.                                                                                                                                                                                                                                         |
+| `text`                                       | str          | Verbatim text content.                                                                                                                                                                                                                                         |
-| `text_with_ws`                               | unicode      | Text content, with trailing space character if present.                                                                                                                                                                                                        |
+| `text_with_ws`                               | str          | Text content, with trailing space character if present.                                                                                                                                                                                                        |
-| `whitespace_`                                | unicode      | Trailing space character if present.                                                                                                                                                                                                                           |
+| `whitespace_`                                | str          | Trailing space character if present.                                                                                                                                                                                                                           |
 | `orth`                                       | int          | ID of the verbatim text content.                                                                                                                                                                                                                               |
-| `orth_`                                      | unicode      | Verbatim text content (identical to `Token.text`). Exists mostly for consistency with the other attributes.                                                                                                                                                    |
+| `orth_`                                      | str          | Verbatim text content (identical to `Token.text`). Exists mostly for consistency with the other attributes.                                                                                                                                                    |
 | `vocab`                                      | `Vocab`      | The vocab object of the parent `Doc`.                                                                                                                                                                                                                          |
 | `tensor` <Tag variant="new">2.1.7</Tag>      | `ndarray`    | The tokens's slice of the parent `Doc`'s tensor.                                                                                                                                                                                                               |
 | `head`                                       | `Token`      | The syntactic parent, or "governor", of this token.                                                                                                                                                                                                            |
@ -424,25 +409,25 @@ The L2 norm of the token's vector representation.
 | `right_edge`                                 | `Token`      | The rightmost token of this token's syntactic descendants.                                                                                                                                                                                                     |
 | `i`                                          | int          | The index of the token within the parent document.                                                                                                                                                                                                             |
 | `ent_type`                                   | int          | Named entity type.                                                                                                                                                                                                                                             |
-| `ent_type_`                                  | unicode      | Named entity type.                                                                                                                                                                                                                                             |
+| `ent_type_`                                  | str          | Named entity type.                                                                                                                                                                                                                                             |
 | `ent_iob`                                    | int          | IOB code of named entity tag. `3` means the token begins an entity, `2` means it is outside an entity, `1` means it is inside an entity, and `0` means no entity tag is set.                                                                                   |
-| `ent_iob_`                                   | unicode      | IOB code of named entity tag. "B" means the token begins an entity, "I" means it is inside an entity, "O" means it is outside an entity, and "" means no entity tag is set.                                                                                    |
+| `ent_iob_`                                   | str          | IOB code of named entity tag. "B" means the token begins an entity, "I" means it is inside an entity, "O" means it is outside an entity, and "" means no entity tag is set.                                                                                    |
 | `ent_kb_id` <Tag variant="new">2.2</Tag>     | int          | Knowledge base ID that refers to the named entity this token is a part of, if any.                                                                                                                                                                             |
-| `ent_kb_id_` <Tag variant="new">2.2</Tag>    | unicode      | Knowledge base ID that refers to the named entity this token is a part of, if any.                                                                                                                                                                             |
+| `ent_kb_id_` <Tag variant="new">2.2</Tag>    | str          | Knowledge base ID that refers to the named entity this token is a part of, if any.                                                                                                                                                                             |
 | `ent_id`                                     | int          | ID of the entity the token is an instance of, if any. Currently not used, but potentially for coreference resolution.                                                                                                                                          |
-| `ent_id_`                                    | unicode      | ID of the entity the token is an instance of, if any. Currently not used, but potentially for coreference resolution.                                                                                                                                          |
+| `ent_id_`                                    | str          | ID of the entity the token is an instance of, if any. Currently not used, but potentially for coreference resolution.                                                                                                                                          |
 | `lemma`                                      | int          | Base form of the token, with no inflectional suffixes.                                                                                                                                                                                                         |
-| `lemma_`                                     | unicode      | Base form of the token, with no inflectional suffixes.                                                                                                                                                                                                         |
+| `lemma_`                                     | str          | Base form of the token, with no inflectional suffixes.                                                                                                                                                                                                         |
 | `norm`                                       | int          | The token's norm, i.e. a normalized form of the token text. Usually set in the language's [tokenizer exceptions](/usage/adding-languages#tokenizer-exceptions) or [norm exceptions](/usage/adding-languages#norm-exceptions).                                  |
-| `norm_`                                      | unicode      | The token's norm, i.e. a normalized form of the token text. Usually set in the language's [tokenizer exceptions](/usage/adding-languages#tokenizer-exceptions) or [norm exceptions](/usage/adding-languages#norm-exceptions).                                  |
+| `norm_`                                      | str          | The token's norm, i.e. a normalized form of the token text. Usually set in the language's [tokenizer exceptions](/usage/adding-languages#tokenizer-exceptions) or [norm exceptions](/usage/adding-languages#norm-exceptions).                                  |
 | `lower`                                      | int          | Lowercase form of the token.                                                                                                                                                                                                                                   |
-| `lower_`                                     | unicode      | Lowercase form of the token text. Equivalent to `Token.text.lower()`.                                                                                                                                                                                          |
+| `lower_`                                     | str          | Lowercase form of the token text. Equivalent to `Token.text.lower()`.                                                                                                                                                                                          |
 | `shape`                                      | int          | Transform of the tokens's string, to show orthographic features. Alphabetic characters are replaced by `x` or `X`, and numeric characters are replaced by `d`, and sequences of the same character are truncated after length 4. For example,`"Xxxx"`or`"dd"`. |
-| `shape_`                                     | unicode      | Transform of the tokens's string, to show orthographic features. Alphabetic characters are replaced by `x` or `X`, and numeric characters are replaced by `d`, and sequences of the same character are truncated after length 4. For example,`"Xxxx"`or`"dd"`. |
+| `shape_`                                     | str          | Transform of the tokens's string, to show orthographic features. Alphabetic characters are replaced by `x` or `X`, and numeric characters are replaced by `d`, and sequences of the same character are truncated after length 4. For example,`"Xxxx"`or`"dd"`. |
 | `prefix`                                     | int          | Hash value of a length-N substring from the start of the token. Defaults to `N=1`.                                                                                                                                                                             |
-| `prefix_`                                    | unicode      | A length-N substring from the start of the token. Defaults to `N=1`.                                                                                                                                                                                           |
+| `prefix_`                                    | str          | A length-N substring from the start of the token. Defaults to `N=1`.                                                                                                                                                                                           |
 | `suffix`                                     | int          | Hash value of a length-N substring from the end of the token. Defaults to `N=3`.                                                                                                                                                                               |
-| `suffix_`                                    | unicode      | Length-N substring from the end of the token. Defaults to `N=3`.                                                                                                                                                                                               |
+| `suffix_`                                    | str          | Length-N substring from the end of the token. Defaults to `N=3`.                                                                                                                                                                                               |
 | `is_alpha`                                   | bool         | Does the token consist of alphabetic characters? Equivalent to `token.text.isalpha()`.                                                                                                                                                                         |
 | `is_ascii`                                   | bool         | Does the token consist of ASCII characters? Equivalent to `all(ord(c) < 128 for c in token.text)`.                                                                                                                                                             |
 | `is_digit`                                   | bool         | Does the token consist of digits? Equivalent to `token.text.isdigit()`.                                                                                                                                                                                        |
@ -459,16 +444,16 @@ The L2 norm of the token's vector representation.
 | `like_url`                                   | bool         | Does the token resemble a URL?                                                                                                                                                                                                                                 |
 | `like_num`                                   | bool         | Does the token represent a number? e.g. "10.9", "10", "ten", etc.                                                                                                                                                                                              |
 | `like_email`                                 | bool         | Does the token resemble an email address?                                                                                                                                                                                                                      |
-| `is_oov`                                     | bool         | Does the token have a word vector?                                                                                                                                                                                                                              |
+| `is_oov`                                     | bool         | Does the token have a word vector?                                                                                                                                                                                                                             |
 | `is_stop`                                    | bool         | Is the token part of a "stop list"?                                                                                                                                                                                                                            |
 | `pos`                                        | int          | Coarse-grained part-of-speech from the [Universal POS tag set](https://universaldependencies.org/docs/u/pos/).                                                                                                                                                 |
-| `pos_`                                       | unicode      | Coarse-grained part-of-speech from the [Universal POS tag set](https://universaldependencies.org/docs/u/pos/).                                                                                                                                                 |
+| `pos_`                                       | str          | Coarse-grained part-of-speech from the [Universal POS tag set](https://universaldependencies.org/docs/u/pos/).                                                                                                                                                 |
 | `tag`                                        | int          | Fine-grained part-of-speech.                                                                                                                                                                                                                                   |
-| `tag_`                                       | unicode      | Fine-grained part-of-speech.                                                                                                                                                                                                                                   |
+| `tag_`                                       | str          | Fine-grained part-of-speech.                                                                                                                                                                                                                                   |
 | `dep`                                        | int          | Syntactic dependency relation.                                                                                                                                                                                                                                 |
-| `dep_`                                       | unicode      | Syntactic dependency relation.                                                                                                                                                                                                                                 |
+| `dep_`                                       | str          | Syntactic dependency relation.                                                                                                                                                                                                                                 |
 | `lang`                                       | int          | Language of the parent document's vocabulary.                                                                                                                                                                                                                  |
-| `lang_`                                      | unicode      | Language of the parent document's vocabulary.                                                                                                                                                                                                                  |
+| `lang_`                                      | str          | Language of the parent document's vocabulary.                                                                                                                                                                                                                  |
 | `prob`                                       | float        | Smoothed log probability estimate of token's word type (context-independent entry in the vocabulary).                                                                                                                                                          |
 | `idx`                                        | int          | The character offset of the token within the parent document.                                                                                                                                                                                                  |
 | `sentiment`                                  | float        | A scalar value indicating the positivity or negativity of the token.                                                                                                                                                                                           |
--- a/website/docs/api/top-level.md
+++ b/website/docs/api/top-level.md
@ -11,22 +11,20 @@ menu:
 ### spacy.load {#spacy.load tag="function" model="any"}
-Load a model via its [shortcut link](/usage/models#usage), the name of an
+Load a model using the name of an installed
-installed [model package](/usage/training#models-generating), a unicode path or
+[model package](/usage/training#models-generating), a string path or a
-a `Path`-like object. spaCy will try resolving the load argument in this order.
+`Path`-like object. spaCy will try resolving the load argument in this order. If
-If a model is loaded from a shortcut link or package name, spaCy will assume
+a model is loaded from a model name, spaCy will assume it's a Python package and
-it's a Python package and import it and call the model's own `load()` method. If
+import it and call the model's own `load()` method. If a model is loaded from a
-a model is loaded from a path, spaCy will assume it's a data directory, read the
+path, spaCy will assume it's a data directory, read the language and pipeline
-language and pipeline settings off the meta.json and initialize the `Language`
+settings off the meta.json and initialize the `Language` class. The data will be
-class. The data will be loaded in via
+loaded in via [`Language.from_disk`](/api/language#from_disk).
 [`Language.from_disk`](/api/language#from_disk).
 > #### Example
 >
 > ```python
 > nlp = spacy.load("en") # shortcut link
 > nlp = spacy.load("en_core_web_sm") # package
-> nlp = spacy.load("/path/to/en") # unicode path
+> nlp = spacy.load("/path/to/en") # string path
 > nlp = spacy.load(Path("/path/to/en")) # pathlib Path
 >
 > nlp = spacy.load("en_core_web_sm", disable=["parser", "tagger"])
@ -34,7 +32,7 @@ class. The data will be loaded in via
 | Name        | Type         | Description                                                                       |
 | ----------- | ------------ | --------------------------------------------------------------------------------- |
-| `name`      | str / `Path` | Model to load, i.e. shortcut link, package name or path.                          |
+| `name`      | str / `Path` | Model to load, i.e. package name or path.                                         |
 | `disable`   | list         | Names of pipeline components to [disable](/usage/processing-pipelines#disabling). |
 | **RETURNS** | `Language`   | A `Language` object with the loaded model.                                        |
@ -98,10 +96,10 @@ meta data as a dictionary instead, you can use the `meta` attribute on your
 > spacy.info("de", markdown=True)
 > ```
-| Name       | Type | Description                                                   |
+| Name       | Type | Description                                      |
-| ---------- | ---- | ------------------------------------------------------------- |
+| ---------- | ---- | ------------------------------------------------ |
-| `model`    | str  | A model, i.e. shortcut link, package name or path (optional). |
+| `model`    | str  | A model, i.e. a package name or path (optional). |
-| `markdown` | bool | Print information as Markdown.                                |
+| `markdown` | bool | Print information as Markdown.                   |
 ### spacy.explain {#spacy.explain tag="function"}
@ -375,12 +373,12 @@ loaded lazily, to avoid expensive setup code associated with the language data.
 ### util.load_model {#util.load_model tag="function" new="2"}
-Load a model from a shortcut link, package or data path. If called with a
+Load a model from a package or data path. If called with a package name, spaCy
-shortcut link or package name, spaCy will assume the model is a Python package
+will assume the model is a Python package and import and call its `load()`
-and import and call its `load()` method. If called with a path, spaCy will
+method. If called with a path, spaCy will assume it's a data directory, read the
-assume it's a data directory, read the language and pipeline settings from the
+language and pipeline settings from the meta.json and initialize a `Language`
-meta.json and initialize a `Language` class. The model data will then be loaded
+class. The model data will then be loaded in via
-in via [`Language.from_disk()`](/api/language#from_disk).
+[`Language.from_disk()`](/api/language#from_disk).
 > #### Example
 >
@ -392,7 +390,7 @@ in via [`Language.from_disk()`](/api/language#from_disk).
 | Name          | Type       | Description                                              |
 | ------------- | ---------- | -------------------------------------------------------- |
-| `name`        | str        | Package name, shortcut link or model path.               |
+| `name`        | str        | Package name or model path.                              |
 | `**overrides` | -          | Specific overrides, like pipeline components to disable. |
 | **RETURNS**   | `Language` | `Language` class with the loaded model.                  |
--- a/website/docs/api/vectors.md
+++ b/website/docs/api/vectors.md
@ -124,7 +124,7 @@ Check whether a key has been mapped to a vector entry in the table.
 Add a key to the table, optionally setting a vector value as well. Keys can be
 mapped to an existing vector by setting `row`, or a new vector can be added.
-When adding unicode keys, keep in mind that the `Vectors` class itself has no
+When adding string keys, keep in mind that the `Vectors` class itself has no
 [`StringStore`](/api/stringstore), so you have to store the hash-to-string
 mapping separately. If you need to manage the strings, you should use the
 `Vectors` via the [`Vocab`](/api/vocab) class, e.g. `vocab.vectors`.
--- a/website/docs/api/vocab.md
+++ b/website/docs/api/vocab.md
@ -30,7 +30,7 @@ Create the vocabulary.
 | `lookups`                                    | `Lookups`            | A [`Lookups`](/api/lookups) that stores the `lemma_\*`, `lexeme_norm` and other large lookup tables. Defaults to `None`.                                    |
 | `lookups_extra` <Tag variant="new">2.3</Tag> | `Lookups`            | A [`Lookups`](/api/lookups) that stores the optional `lexeme_cluster`/`lexeme_prob`/`lexeme_sentiment`/`lexeme_settings` lookup tables. Defaults to `None`. |
 | `oov_prob`                                   | float                | The default OOV probability. Defaults to `-20.0`.                                                                                                           |
-| `vectors_name` <Tag variant="new">2.2</Tag>  | unicode              | A name to identify the vectors table.                                                                                                                       |
+| `vectors_name` <Tag variant="new">2.2</Tag>  | str                  | A name to identify the vectors table.                                                                                                                       |
 | **RETURNS**                                  | `Vocab`              | The newly constructed object.                                                                                                                               |
 ## Vocab.\_\_len\_\_ {#len tag="method"}
@ -50,8 +50,8 @@ Get the current number of lexemes in the vocabulary.
 ## Vocab.\_\_getitem\_\_ {#getitem tag="method"}
-Retrieve a lexeme, given an int ID or a unicode string. If a previously unseen
+Retrieve a lexeme, given an int ID or a string. If a previously unseen string is
-unicode string is given, a new lexeme is created and stored.
+given, a new lexeme is created and stored.
 > #### Example
 >
@ -60,10 +60,10 @@ unicode string is given, a new lexeme is created and stored.
 > assert nlp.vocab[apple] == nlp.vocab["apple"]
 > ```
-| Name           | Type          | Description                                      |
+| Name           | Type      | Description                              |
-| -------------- | ------------- | ------------------------------------------------ |
+| -------------- | --------- | ---------------------------------------- |
-| `id_or_string` | int / unicode | The hash value of a word, or its unicode string. |
+| `id_or_string` | int / str | The hash value of a word, or its string. |
-| **RETURNS**    | `Lexeme`      | The lexeme indicated by the given ID.            |
+| **RETURNS**    | `Lexeme`  | The lexeme indicated by the given ID.    |
 ## Vocab.\_\_iter\_\_ {#iter tag="method"}
@ -182,7 +182,7 @@ subword features by average over ngrams of `orth` (introduced in spaCy `v2.1`).
 | Name                                | Type                                     | Description                                                                                    |
 | ----------------------------------- | ---------------------------------------- | ---------------------------------------------------------------------------------------------- |
-| `orth`                              | int / unicode                            | The hash value of a word, or its unicode string.                                               |
+| `orth`                              | int / str                                | The hash value of a word, or its unicode string.                                               |
 | `minn` <Tag variant="new">2.1</Tag> | int                                      | Minimum n-gram length used for FastText's ngram computation. Defaults to the length of `orth`. |
 | `maxn` <Tag variant="new">2.1</Tag> | int                                      | Maximum n-gram length used for FastText's ngram computation. Defaults to the length of `orth`. |
 | **RETURNS**                         | `numpy.ndarray[ndim=1, dtype='float32']` | A word vector. Size and shape are determined by the `Vocab.vectors` instance.                  |
@ -200,7 +200,7 @@ or hash value.
 | Name     | Type                                     | Description                                      |
 | -------- | ---------------------------------------- | ------------------------------------------------ |
-| `orth`   | int / unicode                            | The hash value of a word, or its unicode string. |
+| `orth`   | int / str                                | The hash value of a word, or its unicode string. |
 | `vector` | `numpy.ndarray[ndim=1, dtype='float32']` | The vector to set.                               |
 ## Vocab.has_vector {#has_vector tag="method" new="2"}
@ -215,10 +215,10 @@ Words can be looked up by string or hash value.
 >     vector = nlp.vocab.get_vector("apple")
 > ```
-| Name        | Type          | Description                                      |
+| Name        | Type      | Description                                      |
-| ----------- | ------------- | ------------------------------------------------ |
+| ----------- | --------- | ------------------------------------------------ |
-| `orth`      | int / unicode | The hash value of a word, or its unicode string. |
+| `orth`      | int / str | The hash value of a word, or its unicode string. |
-| **RETURNS** | bool          | Whether the word has a vector.                   |
+| **RETURNS** | bool      | Whether the word has a vector.                   |
 ## Vocab.to_disk {#to_disk tag="method" new="2"}
--- a/website/docs/images/spacy-streamlit.png
+++ b/website/docs/images/spacy-streamlit.png
--- a/website/docs/usage/adding-languages.md
+++ b/website/docs/usage/adding-languages.md
@ -1,675 +0,0 @@
 ---
 title: Adding Languages
 next: /usage/training
 menu:
  - ['Language Data', 'language-data']
  - ['Testing', 'testing']
  - ['Training', 'training']
 ---
 Adding full support for a language touches many different parts of the spaCy
 library. This guide explains how to fit everything together, and points you to
 the specific workflows for each component.
 > #### Working on spaCy's source
 >
 > To add a new language to spaCy, you'll need to **modify the library's code**.
 > The easiest way to do this is to clone the
 > [repository](https://github.com/explosion/spaCy/tree/master/) and **build
 > spaCy from source**. For more information on this, see the
 > [installation guide](/usage). Unlike spaCy's core, which is mostly written in
 > Cython, all language data is stored in regular Python files. This means that
 > you won't have to rebuild anything in between – you can simply make edits and
 > reload spaCy to test them.
 <Grid cols={2}>
 <div>
 Obviously, there are lots of ways you can organize your code when you implement
 your own language data. This guide will focus on how it's done within spaCy. For
 full language support, you'll need to create a `Language` subclass, define
 custom **language data**, like a stop list and tokenizer exceptions and test the
 new tokenizer. Once the language is set up, you can **build the vocabulary**,
 including word frequencies, Brown clusters and word vectors. Finally, you can
 **train the tagger and parser**, and save the model to a directory.
 For some languages, you may also want to develop a solution for lemmatization
 and morphological analysis.
 </div>
 <Infobox title="Table of Contents" id="toc">
 - [Language data 101](#101)
 - [The Language subclass](#language-subclass)
 - [Stop words](#stop-words)
 - [Tokenizer exceptions](#tokenizer-exceptions)
 - [Norm exceptions](#norm-exceptions)
 - [Lexical attributes](#lex-attrs)
 - [Syntax iterators](#syntax-iterators)
 - [Lemmatizer](#lemmatizer)
 - [Tag map](#tag-map)
 - [Morph rules](#morph-rules)
 - [Testing the language](#testing)
 - [Training](#training)
 </Infobox>
 </Grid>
 ## Language data {#language-data}
 import LanguageData101 from 'usage/101/\_language-data.md'
 <LanguageData101 />
 The individual components **expose variables** that can be imported within a
 language module, and added to the language's `Defaults`. Some components, like
 the punctuation rules, usually don't need much customization and can be imported
 from the global rules. Others, like the tokenizer and norm exceptions, are very
 specific and will make a big difference to spaCy's performance on the particular
 language and training a language model.
 | Variable               | Type  | Description                                                                                                |
 | ---------------------- | ----- | ---------------------------------------------------------------------------------------------------------- |
 | `STOP_WORDS`           | set   | Individual words.                                                                                          |
 | `TOKENIZER_EXCEPTIONS` | dict  | Keyed by strings mapped to list of one dict per token with token attributes.                               |
 | `TOKEN_MATCH`          | regex | Regexes to match complex tokens, e.g. URLs.                                                                |
 | `NORM_EXCEPTIONS`      | dict  | Keyed by strings, mapped to their norms.                                                                   |
 | `TOKENIZER_PREFIXES`   | list  | Strings or regexes, usually not customized.                                                                |
 | `TOKENIZER_SUFFIXES`   | list  | Strings or regexes, usually not customized.                                                                |
 | `TOKENIZER_INFIXES`    | list  | Strings or regexes, usually not customized.                                                                |
 | `LEX_ATTRS`            | dict  | Attribute ID mapped to function.                                                                           |
 | `SYNTAX_ITERATORS`     | dict  | Iterator ID mapped to function. Currently only supports `'noun_chunks'`.                                   |
 | `TAG_MAP`              | dict  | Keyed by strings mapped to [Universal Dependencies](http://universaldependencies.org/u/pos/all.html) tags. |
 | `MORPH_RULES`          | dict  | Keyed by strings mapped to a dict of their morphological features.                                         |
 > #### Should I ever update the global data?
 >
 > Reusable language data is collected as atomic pieces in the root of the
 > [`spacy.lang`](https://github.com/explosion/spaCy/tree/master/spacy/lang)
 > module. Often, when a new language is added, you'll find a pattern or symbol
 > that's missing. Even if it isn't common in other languages, it might be best
 > to add it to the shared language data, unless it has some conflicting
 > interpretation. For instance, we don't expect to see guillemot quotation
 > symbols (`»` and `«`) in English text. But if we do see them, we'd probably
 > prefer the tokenizer to split them off.
 <Infobox title="For languages with non-latin characters">
 In order for the tokenizer to split suffixes, prefixes and infixes, spaCy needs
 to know the language's character set. If the language you're adding uses
 non-latin characters, you might need to define the required character classes in
 the global
 [`char_classes.py`](https://github.com/explosion/spaCy/tree/master/spacy/lang/char_classes.py).
 For efficiency, spaCy uses hard-coded unicode ranges to define character
 classes, the definitions of which can be found on
 [Wikipedia](https://en.wikipedia.org/wiki/Unicode_block). If the language
 requires very specific punctuation rules, you should consider overwriting the
 default regular expressions with your own in the language's `Defaults`.
 </Infobox>
 ### Creating a language subclass {#language-subclass}
 Language-specific code and resources should be organized into a sub-package of
 spaCy, named according to the language's
 [ISO code](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes). For instance,
 code and resources specific to Spanish are placed into a directory
 `spacy/lang/es`, which can be imported as `spacy.lang.es`.
 To get started, you can check out the
 [existing languages](https://github.com/explosion/spacy/tree/master/spacy/lang).
 Here's what the class could look like:
 ```python
 ### __init__.py (excerpt)
 # import language-specific data
 from .stop_words import STOP_WORDS
 from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
 from .lex_attrs import LEX_ATTRS
 from ..tokenizer_exceptions import BASE_EXCEPTIONS
 from ...language import Language
 from ...attrs import LANG
 from ...util import update_exc
 # Create Defaults class in the module scope (necessary for pickling!)
 class XxxxxDefaults(Language.Defaults):
    lex_attr_getters = dict(Language.Defaults.lex_attr_getters)
    lex_attr_getters[LANG] = lambda text: "xx" # language ISO code
    # Optional: replace flags with custom functions, e.g. like_num()
    lex_attr_getters.update(LEX_ATTRS)
    # Merge base exceptions and custom tokenizer exceptions
    tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS)
    stop_words = STOP_WORDS
 # Create actual Language class
 class Xxxxx(Language):
    lang = "xx" # Language ISO code
    Defaults = XxxxxDefaults # Override defaults
 # Set default export – this allows the language class to be lazy-loaded
 __all__ = ["Xxxxx"]
 ```
 <Infobox title="Why lazy-loading?">
 Some languages contain large volumes of custom data, like lemmatizer lookup
 tables, or complex regular expression that are expensive to compute. As of spaCy
 v2.0, `Language` classes are not imported on initialization and are only loaded
 when you import them directly, or load a model that requires a language to be
 loaded. To lazy-load languages in your application, you can use the
 [`util.get_lang_class`](/api/top-level#util.get_lang_class) helper function with
 the two-letter language code as its argument.
 </Infobox>
 ### Stop words {#stop-words}
 A ["stop list"](https://en.wikipedia.org/wiki/Stop_words) is a classic trick
 from the early days of information retrieval when search was largely about
 keyword presence and absence. It is still sometimes useful today to filter out
 common words from a bag-of-words model. To improve readability, `STOP_WORDS` are
 separated by spaces and newlines, and added as a multiline string.
 > #### What does spaCy consider a stop word?
 >
 > There's no particularly principled logic behind what words should be added to
 > the stop list. Make a list that you think might be useful to people and is
 > likely to be unsurprising. As a rule of thumb, words that are very rare are
 > unlikely to be useful stop words.
 ```python
 ### Example
 STOP_WORDS = set("""
 a about above across after afterwards again against all almost alone along
 already also although always am among amongst amount an and another any anyhow
 anyone anything anyway anywhere are around as at
 back be became because become becomes becoming been before beforehand behind
 being below beside besides between beyond both bottom but by
 """.split())
 ```
 <Infobox title="Important note" variant="warning">
 When adding stop words from an online source, always **include the link** in a
 comment. Make sure to **proofread** and double-check the words carefully. A lot
 of the lists available online have been passed around for years and often
 contain mistakes, like unicode errors or random words that have once been added
 for a specific use case, but don't actually qualify.
 </Infobox>
 ### Tokenizer exceptions {#tokenizer-exceptions}
 spaCy's [tokenization algorithm](/usage/linguistic-features#how-tokenizer-works)
 lets you deal with whitespace-delimited chunks separately. This makes it easy to
 define special-case rules, without worrying about how they interact with the
 rest of the tokenizer. Whenever the key string is matched, the special-case rule
 is applied, giving the defined sequence of tokens.
 Tokenizer exceptions can be added in the following format:
 ```python
 ### tokenizer_exceptions.py (excerpt)
 TOKENIZER_EXCEPTIONS = {
    "don't": [
        {ORTH: "do"},
        {ORTH: "n't", NORM: "not"}]
 }
 ```
 <Infobox title="Important note" variant="warning">
 If an exception consists of more than one token, the `ORTH` values combined
 always need to **match the original string**. The way the original string is
 split up can be pretty arbitrary sometimes – for example `"gonna"` is split into
 `"gon"` (norm "going") and `"na"` (norm "to"). Because of how the tokenizer
 works, it's currently not possible to split single-letter strings into multiple
 tokens.
 </Infobox>
 > #### Generating tokenizer exceptions
 >
 > Keep in mind that generating exceptions only makes sense if there's a clearly
 > defined and **finite number** of them, like common contractions in English.
 > This is not always the case – in Spanish for instance, infinitive or
 > imperative reflexive verbs and pronouns are one token (e.g. "vestirme"). In
 > cases like this, spaCy shouldn't be generating exceptions for _all verbs_.
 > Instead, this will be handled at a later stage after part-of-speech tagging
 > and lemmatization.
 When adding the tokenizer exceptions to the `Defaults`, you can use the
 [`update_exc`](/api/top-level#util.update_exc) helper function to merge them
 with the global base exceptions (including one-letter abbreviations and
 emoticons). The function performs a basic check to make sure exceptions are
 provided in the correct format. It can take any number of exceptions dicts as
 its arguments, and will update and overwrite the exception in this order. For
 example, if your language's tokenizer exceptions include a custom tokenization
 pattern for "a.", it will overwrite the base exceptions with the language's
 custom one.
 ```python
 ### Example
 from ...util import update_exc
 BASE_EXCEPTIONS =  {"a.": [{ORTH: "a."}], ":)": [{ORTH: ":)"}]}
 TOKENIZER_EXCEPTIONS = {"a.": [{ORTH: "a.", NORM: "all"}]}
 tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS)
 # {"a.": [{ORTH: "a.", NORM: "all"}], ":)": [{ORTH: ":)"}]}
 ```
 ### Norm exceptions {#norm-exceptions new="2"}
 In addition to `ORTH`, tokenizer exceptions can also set a `NORM` attribute.
 This is useful to specify a normalized version of the token – for example, the
 norm of "n't" is "not". By default, a token's norm equals its lowercase text. If
 the lowercase spelling of a word exists, norms should always be in lowercase.
 > #### Norms vs. lemmas
 >
 > ```python
 > doc = nlp("I'm gonna realise")
 > norms = [token.norm_ for token in doc]
 > lemmas = [token.lemma_ for token in doc]
 > assert norms == ["i", "am", "going", "to", "realize"]
 > assert lemmas == ["i", "be", "go", "to", "realise"]
 > ```
 spaCy usually tries to normalize words with different spellings to a single,
 common spelling. This has no effect on any other token attributes, or
 tokenization in general, but it ensures that **equivalent tokens receive similar
 representations**. This can improve the model's predictions on words that
 weren't common in the training data, but are equivalent to other words – for
 example, "realise" and "realize", or "thx" and "thanks".
 Similarly, spaCy also includes
 [global base norms](https://github.com/explosion/spaCy/tree/master/spacy/lang/norm_exceptions.py)
 for normalizing different styles of quotation marks and currency symbols. Even
 though `$` and `€` are very different, spaCy normalizes them both to `$`. This
 way, they'll always be seen as similar, no matter how common they were in the
 training data.
 As of spaCy v2.3, language-specific norm exceptions are provided as a
 JSON dictionary in the package
 [`spacy-lookups-data`](https://github.com/explosion/spacy-lookups-data) rather
 than in the main library. For a full example, see
 [`en_lexeme_norm.json`](https://github.com/explosion/spacy-lookups-data/blob/master/spacy_lookups_data/data/en_lexeme_norm.json).
 ```json
 ### Example
 {
    "cos": "because",
    "fav": "favorite",
    "accessorise": "accessorize",
    "accessorised": "accessorized"
 }
 ```
 If you're adding tables for a new languages, be sure to add the tables to
 [`spacy_lookups_data/__init__.py`](https://github.com/explosion/spacy-lookups-data/blob/master/spacy_lookups_data/__init__.py)
 and register the entry point under `spacy_lookups` in
 [`setup.cfg`](https://github.com/explosion/spacy-lookups-data/blob/master/setup.cfg).
 Alternatively, you can initialize your language [`Vocab`](/api/vocab) with a
 [`Lookups`](/api/lookups) object that includes the table `lexeme_norm`.
 <Accordion title="Norm exceptions in spaCy v2.0-v2.2" id="norm-exceptions-v2.2">
 Previously in spaCy v2.0-v2.2, norm exceptions were provided as a simple python
 dictionary. For more examples, see the English
 [`norm_exceptions.py`](https://github.com/explosion/spaCy/tree/v2.2.x/spacy/lang/en/norm_exceptions.py).
 ```python
 ### Example
 NORM_EXCEPTIONS = {
    "cos": "because",
    "fav": "favorite",
    "accessorise": "accessorize",
    "accessorised": "accessorized"
 }
 ```
 To add the custom norm exceptions lookup table, you can use the `add_lookups()`
 helper functions. It takes the default attribute getter function as its first
 argument, plus a variable list of dictionaries. If a string's norm is found in
 one of the dictionaries, that value is used – otherwise, the default function is
 called and the token is assigned its default norm.
 ```python
 lex_attr_getters[NORM] = add_lookups(Language.Defaults.lex_attr_getters[NORM],
                                     NORM_EXCEPTIONS, BASE_NORMS)
 ```
 The order of the dictionaries is also the lookup order – so if your language's
 norm exceptions overwrite any of the global exceptions, they should be added
 first. Also note that the tokenizer exceptions will always have priority over
 the attribute getters.
 </Accordion>
 ### Lexical attributes {#lex-attrs new="2"}
 spaCy provides a range of [`Token` attributes](/api/token#attributes) that
 return useful information on that token – for example, whether it's uppercase or
 lowercase, a left or right punctuation mark, or whether it resembles a number or
 email address. Most of these functions, like `is_lower` or `like_url` should be
 language-independent. Others, like `like_num` (which includes both digits and
 number words), requires some customization.
 > #### Best practices
 >
 > Keep in mind that those functions are only intended to be an approximation.
 > It's always better to prioritize simplicity and performance over covering very
 > specific edge cases.
 >
 > English number words are pretty simple, because even large numbers consist of
 > individual tokens, and we can get away with splitting and matching strings
 > against a list. In other languages, like German, "two hundred and thirty-four"
 > is one word, and thus one token. Here, it's best to match a string against a
 > list of number word fragments (instead of a technically almost infinite list
 > of possible number words).
 Here's an example from the English
 [`lex_attrs.py`](https://github.com/explosion/spaCy/tree/master/spacy/lang/en/lex_attrs.py):
 ```python
 ### lex_attrs.py
 _num_words = ["zero", "one", "two", "three", "four", "five", "six", "seven",
              "eight", "nine", "ten", "eleven", "twelve", "thirteen", "fourteen",
              "fifteen", "sixteen", "seventeen", "eighteen", "nineteen", "twenty",
              "thirty", "forty", "fifty", "sixty", "seventy", "eighty", "ninety",
              "hundred", "thousand", "million", "billion", "trillion", "quadrillion",
              "gajillion", "bazillion"]
 def like_num(text):
    text = text.replace(",", "").replace(".", "")
    if text.isdigit():
        return True
    if text.count("/") == 1:
        num, denom = text.split("/")
        if num.isdigit() and denom.isdigit():
            return True
    if text.lower() in _num_words:
        return True
    return False
 LEX_ATTRS = {
    LIKE_NUM: like_num
 }
 ```
 By updating the default lexical attributes with a custom `LEX_ATTRS` dictionary
 in the language's defaults via `lex_attr_getters.update(LEX_ATTRS)`, only the
 new custom functions are overwritten.
 ### Syntax iterators {#syntax-iterators}
 Syntax iterators are functions that compute views of a `Doc` object based on its
 syntax. At the moment, this data is only used for extracting
 [noun chunks](/usage/linguistic-features#noun-chunks), which are available as
 the [`Doc.noun_chunks`](/api/doc#noun_chunks) property. Because base noun
 phrases work differently across languages, the rules to compute them are part of
 the individual language's data. If a language does not include a noun chunks
 iterator, the property won't be available. For examples, see the existing syntax
 iterators:
 > #### Noun chunks example
 >
 > ```python
 > doc = nlp("A phrase with another phrase occurs.")
 > chunks = list(doc.noun_chunks)
 > assert chunks[0].text == "A phrase"
 > assert chunks[1].text == "another phrase"
 > ```
 | Language         | Code | Source                                                                                                            |
 | ---------------- | ---- | ----------------------------------------------------------------------------------------------------------------- |
 | English          | `en` | [`lang/en/syntax_iterators.py`](https://github.com/explosion/spaCy/tree/master/spacy/lang/en/syntax_iterators.py) |
 | German           | `de` | [`lang/de/syntax_iterators.py`](https://github.com/explosion/spaCy/tree/master/spacy/lang/de/syntax_iterators.py) |
 | French           | `fr` | [`lang/fr/syntax_iterators.py`](https://github.com/explosion/spaCy/tree/master/spacy/lang/fr/syntax_iterators.py) |
 | Spanish          | `es` | [`lang/es/syntax_iterators.py`](https://github.com/explosion/spaCy/tree/master/spacy/lang/es/syntax_iterators.py) |
 | Greek            | `el` | [`lang/el/syntax_iterators.py`](https://github.com/explosion/spaCy/tree/master/spacy/lang/el/syntax_iterators.py) |
 | Norwegian Bokmål | `nb` | [`lang/nb/syntax_iterators.py`](https://github.com/explosion/spaCy/tree/master/spacy/lang/nb/syntax_iterators.py) |
 | Swedish          | `sv` | [`lang/sv/syntax_iterators.py`](https://github.com/explosion/spaCy/tree/master/spacy/lang/sv/syntax_iterators.py) |
 | Indonesian       | `id` | [`lang/id/syntax_iterators.py`](https://github.com/explosion/spaCy/tree/master/spacy/lang/id/syntax_iterators.py) |
 | Persian          | `fa` | [`lang/fa/syntax_iterators.py`](https://github.com/explosion/spaCy/tree/master/spacy/lang/fa/syntax_iterators.py) |
 ### Lemmatizer {#lemmatizer new="2"}
 As of v2.0, spaCy supports simple lookup-based lemmatization. This is usually
 the quickest and easiest way to get started. The data is stored in a dictionary
 mapping a string to its lemma. To determine a token's lemma, spaCy simply looks
 it up in the table. Here's an example from the Spanish language data:
 ```json
 ### es_lemma_lookup.json (excerpt)
 {
  "aba": "abar",
  "ababa": "abar",
  "ababais": "abar",
  "ababan": "abar",
  "ababanes": "ababán",
  "ababas": "abar",
  "ababoles": "ababol",
  "ababábites": "ababábite"
 }
 ```
 #### Adding JSON resources {#lemmatizer-resources new="2.2"}
 As of v2.2, resources for the lemmatizer are stored as JSON and have been moved
 to a separate repository and package,
 [`spacy-lookups-data`](https://github.com/explosion/spacy-lookups-data). The
 package exposes the data files via language-specific
 [entry points](/usage/saving-loading#entry-points) that spaCy reads when
 constructing the `Vocab` and [`Lookups`](/api/lookups). This allows easier
 access to the data, serialization with the models and file compression on disk
 (so your spaCy installation is smaller). If you want to use the lookup tables
 without a pretrained model, you have to explicitly install spaCy with lookups
 via `pip install spacy[lookups]` or by installing
 [`spacy-lookups-data`](https://github.com/explosion/spacy-lookups-data) in the
 same environment as spaCy.
 ### Tag map {#tag-map}
 Most treebanks define a custom part-of-speech tag scheme, striking a balance
 between level of detail and ease of prediction. While it's useful to have custom
 tagging schemes, it's also useful to have a common scheme, to which the more
 specific tags can be related. The tagger can learn a tag scheme with any
 arbitrary symbols. However, you need to define how those symbols map down to the
 [Universal Dependencies tag set](http://universaldependencies.org/u/pos/all.html).
 This is done by providing a tag map.
 The keys of the tag map should be **strings in your tag set**. The values should
 be a dictionary. The dictionary must have an entry POS whose value is one of the
 [Universal Dependencies](http://universaldependencies.org/u/pos/all.html) tags.
 Optionally, you can also include morphological features or other token
 attributes in the tag map as well. This allows you to do simple
 [rule-based morphological analysis](/usage/linguistic-features#rule-based-morphology).
 ```python
 ### Example
 from ..symbols import POS, NOUN, VERB, DET
 TAG_MAP = {
    "NNS":  {POS: NOUN, "Number": "plur"},
    "VBG":  {POS: VERB, "VerbForm": "part", "Tense": "pres", "Aspect": "prog"},
    "DT":   {POS: DET}
 }
 ```
 ### Morph rules {#morph-rules}
 The morphology rules let you set token attributes such as lemmas, keyed by the
 extended part-of-speech tag and token text. The morphological features and their
 possible values are language-specific and based on the
 [Universal Dependencies scheme](http://universaldependencies.org).
 ```python
 ### Example
 from ..symbols import LEMMA
 MORPH_RULES = {
    "VBZ": {
        "am": {LEMMA: "be", "VerbForm": "Fin", "Person": "One", "Tense": "Pres", "Mood": "Ind"},
        "are": {LEMMA: "be", "VerbForm": "Fin", "Person": "Two", "Tense": "Pres", "Mood": "Ind"},
        "is": {LEMMA: "be", "VerbForm": "Fin", "Person": "Three", "Tense": "Pres", "Mood": "Ind"},
        "'re": {LEMMA: "be", "VerbForm": "Fin", "Person": "Two", "Tense": "Pres", "Mood": "Ind"},
        "'s": {LEMMA: "be", "VerbForm": "Fin", "Person": "Three", "Tense": "Pres", "Mood": "Ind"}
    }
 }
 ```
 In the example of `"am"`, the attributes look like this:
 | Attribute           | Description                                                                                                                    |
 | ------------------- | ------------------------------------------------------------------------------------------------------------------------------ |
 | `LEMMA: "be"`       | Base form, e.g. "to be".                                                                                                       |
 | `"VerbForm": "Fin"` | Finite verb. Finite verbs have a subject and can be the root of an independent clause – "I am." is a valid, complete sentence. |
 | `"Person": "One"`   | First person, i.e. "**I** am".                                                                                                 |
 | `"Tense": "Pres"`   | Present tense, i.e. actions that are happening right now or actions that usually happen.                                       |
 | `"Mood": "Ind"`     | Indicative, i.e. something happens, has happened or will happen (as opposed to imperative or conditional).                     |
 <Infobox title="Important note" variant="warning">
 The morphological attributes are currently **not all used by spaCy**. Full
 integration is still being developed. In the meantime, it can still be useful to
 add them, especially if the language you're adding includes important
 distinctions and special cases. This ensures that as soon as full support is
 introduced, your language will be able to assign all possible attributes.
 </Infobox>
 ## Testing the new language {#testing}
 Before using the new language or submitting a
 [pull request](https://github.com/explosion/spaCy/pulls) to spaCy, you should
 make sure it works as expected. This is especially important if you've added
 custom regular expressions for token matching or punctuation – you don't want to
 be causing regressions.
 <Infobox title="spaCy's test suite">
 spaCy uses the [pytest framework](https://docs.pytest.org/en/latest/) for
 testing. For more details on how the tests are structured and best practices for
 writing your own tests, see our
 [tests documentation](https://github.com/explosion/spaCy/tree/master/spacy/tests).
 </Infobox>
 ### Writing language-specific tests {#testing-custom}
 It's recommended to always add at least some tests with examples specific to the
 language. Language tests should be located in
 [`tests/lang`](https://github.com/explosion/spaCy/tree/master/spacy/tests/lang)
 in a directory named after the language ID. You'll also need to create a fixture
 for your tokenizer in the
 [`conftest.py`](https://github.com/explosion/spaCy/tree/master/spacy/tests/conftest.py).
 Always use the [`get_lang_class`](/api/top-level#util.get_lang_class) helper
 function within the fixture, instead of importing the class at the top of the
 file. This will load the language data only when it's needed. (Otherwise, _all
 data_ would be loaded every time you run a test.)
 ```python
@pytest.fixture
 def en_tokenizer():
    return util.get_lang_class("en").Defaults.create_tokenizer()
 ```
 When adding test cases, always
 [`parametrize`](https://github.com/explosion/spaCy/tree/master/spacy/tests#parameters)
 them – this will make it easier for others to add more test cases without having
 to modify the test itself. You can also add parameter tuples, for example, a
 test sentence and its expected length, or a list of expected tokens. Here's an
 example of an English tokenizer test for combinations of punctuation and
 abbreviations:
 ```python
 ### Example test
@pytest.mark.parametrize('text,length', [
    ("The U.S. Army likes Shock and Awe.", 8),
    ("U.N. regulations are not a part of their concern.", 10),
    ("“Isn't it?”", 6)])
 def test_en_tokenizer_handles_punct_abbrev(en_tokenizer, text, length):
    tokens = en_tokenizer(text)
    assert len(tokens) == length
 ```
 ## Training a language model {#training}
 Much of spaCy's functionality requires models to be trained from labeled data.
 For instance, in order to use the named entity recognizer, you need to first
 train a model on text annotated with examples of the entities you want to
 recognize. The parser, part-of-speech tagger and text categorizer all also
 require models to be trained from labeled examples. The word vectors, word
 probabilities and word clusters also require training, although these can be
 trained from unlabeled text, which tends to be much easier to collect.
 ### Creating a vocabulary file {#vocab-file}
 spaCy expects that common words will be cached in a [`Vocab`](/api/vocab)
 instance. The vocabulary caches lexical features. spaCy loads the vocabulary
 from binary data, in order to keep loading efficient. The easiest way to save
 out a new binary vocabulary file is to use the `spacy init-model` command, which
 expects a JSONL file with words and their lexical attributes. See the docs on
 the [vocab JSONL format](/api/annotation#vocab-jsonl) for details.
 #### Training the word vectors {#word-vectors}
 [Word2vec](https://en.wikipedia.org/wiki/Word2vec) and related algorithms let
 you train useful word similarity models from unlabeled text. This is a key part
 of using deep learning for NLP with limited labeled data. The vectors are also
 useful by themselves – they power the `.similarity` methods in spaCy. For best
 results, you should pre-process the text with spaCy before training the Word2vec
 model. This ensures your tokenization will match. You can use our
 [word vectors training script](https://github.com/explosion/spacy/tree/master/bin/train_word_vectors.py),
 which pre-processes the text with your language-specific tokenizer and trains
 the model using [Gensim](https://radimrehurek.com/gensim/). The `vectors.bin`
 file should consist of one word and vector per line.
 ```python
 https://github.com/explosion/spacy/tree/master/bin/train_word_vectors.py
 ```
 If you don't have a large sample of text available, you can also convert word
 vectors produced by a variety of other tools into spaCy's format. See the docs
 on [converting word vectors](/usage/vectors-similarity#converting) for details.
 ### Creating or converting a training corpus {#training-corpus}
 The easiest way to train spaCy's tagger, parser, entity recognizer or text
 categorizer is to use the [`spacy train`](/api/cli#train) command-line utility.
 In order to use this, you'll need training and evaluation data in the
 [JSON format](/api/annotation#json-input) spaCy expects for training.
 If your data is in one of the supported formats, the easiest solution might be
 to use the [`spacy convert`](/api/cli#convert) command-line utility. This
 supports several popular formats, including the IOB format for named entity
 recognition, the JSONL format produced by our annotation tool
 [Prodigy](https://prodi.gy), and the
 [CoNLL-U](http://universaldependencies.org/docs/format.html) format used by the
 [Universal Dependencies](http://universaldependencies.org/) corpus.
 One thing to keep in mind is that spaCy expects to train its models from **whole
 documents**, not just single sentences. If your corpus only contains single
 sentences, spaCy's models will never learn to expect multi-sentence documents,
 leading to low performance on real text. To mitigate this problem, you can use
 the `-n` argument to the `spacy convert` command, to merge some of the sentences
 into longer pseudo-documents.
 ### Training the tagger and parser {#train-tagger-parser}
 Once you have your training and evaluation data in the format spaCy expects, you
 can train your model use the using spaCy's [`train`](/api/cli#train) command.
 Note that training statistical models still involves a degree of
 trial-and-error. You may need to tune one or more settings, also called
 "hyper-parameters", to achieve optimal performance. See the
 [usage guide on training](/usage/training#tagger-parser) for more details.
--- a/website/docs/usage/index.md
+++ b/website/docs/usage/index.md
@ -15,21 +15,9 @@ spaCy is compatible with **64-bit CPython 3.6+** and runs on **Unix/Linux**,
 > #### 📖 Looking for the old docs?
 >
-> To help you make the transition from v1.x to v2.0, we've uploaded the old
+> To help you make the transition from v2.x to v3.0, we've uploaded the old
-> website to [**legacy.spacy.io**](https://legacy.spacy.io/docs). Wherever
+> website to [**v2.spacy.io**](https://v2.spacy.io/docs). To see what's changed
-> possible, the new docs also include notes on features that have changed in
+> and how to migrate, see the guide on [v3.0 guide](/usage/v3).
 > v2.0, and features that were introduced in the new version.
 <Infobox variant="warning" title="Important note for Python 3.8">
 We can't yet ship pre-compiled binary wheels for spaCy that work on Python 3.8,
 as we're still waiting for our CI providers and other tooling to support it.
 This means that in order to run spaCy on Python 3.8, you'll need
 [a compiler installed](#source) and compile the library and its Cython
 dependencies locally. If this is causing problems for you, the easiest solution
 is to **use Python 3.7** in the meantime.
 </Infobox>
 ## Quickstart {hidden="true"}
@ -95,29 +83,29 @@ and pull requests to the recipe and setup are always appreciated.
 ### Upgrading spaCy {#upgrading}
-> #### Upgrading from v1 to v2
+> #### Upgrading from v2 to v3
 >
 > Although we've tried to keep breaking changes to a minimum, upgrading from
-> spaCy v1.x to v2.x may still require some changes to your code base. For
+> spaCy v2.x to v3.x may still require some changes to your code base. For
-> details see the sections on [backwards incompatibilities](/usage/v2#incompat)
+> details see the sections on [backwards incompatibilities](/usage/v3#incompat)
-> and [migrating](/usage/v2#migrating). Also remember to download the new
+> and [migrating](/usage/v3#migrating). Also remember to download the new
 > models, and retrain your own models.
 When updating to a newer version of spaCy, it's generally recommended to start
 with a clean virtual environment. If you're upgrading to a new major version,
 make sure you have the latest **compatible models** installed, and that there
-are no old shortcut links or incompatible model packages left over in your
+are no old and incompatible model packages left over in your environment, as
-environment, as this can often lead to unexpected results and errors. If you've
+this can often lead to unexpected results and errors. If you've trained your own
-trained your own models, keep in mind that your train and runtime inputs must
+models, keep in mind that your train and runtime inputs must match. This means
-match. This means you'll have to **retrain your models** with the new version.
+you'll have to **retrain your models** with the new version.
-As of v2.0, spaCy also provides a [`validate`](/api/cli#validate) command, which
+spaCy also provides a [`validate`](/api/cli#validate) command, which lets you
-lets you verify that all installed models are compatible with your spaCy
+verify that all installed models are compatible with your spaCy version. If
-version. If incompatible models are found, tips and installation instructions
+incompatible models are found, tips and installation instructions are printed.
-are printed. The command is also useful to detect out-of-sync model links
+The command is also useful to detect out-of-sync model links resulting from
-resulting from links created in different virtual environments. It's recommended
+links created in different virtual environments. It's recommended to run the
-to run the command with `python -m` to make sure you're executing the correct
+command with `python -m` to make sure you're executing the correct version of
-version of spaCy.
+spaCy.
 ```bash
 pip install -U spacy
@ -268,24 +256,6 @@ language's `Language` class instead, for example
 </Accordion>
 <Accordion title="Symbolic link privilege not held" id="symlink-privilege">
 ```
 OSError: symbolic link privilege not held
 ```
 To create [shortcut links](/usage/models#usage) that let you load models by
 name, spaCy creates a symbolic link in the `spacy/data` directory. This means
 your user needs permission to do this. The above error mostly occurs when doing
 a system-wide installation, which will create the symlinks in a system
 directory. Run the `download` or `link` command as administrator (on Windows,
 you can either right-click on your terminal or shell and select "Run as
 Administrator"), set the `--user` flag when installing a model or use a virtual
 environment to install spaCy in a user directory, instead of doing a system-wide
 installation.
 </Accordion>
 <Accordion title="No such option: --no-cache-dir" id="no-cache-dir">
 ```
@ -363,14 +333,12 @@ ImportError: No module named 'en_core_web_sm'
 ```
 As of spaCy v1.7, all models can be installed as Python packages. This means
-that they'll become importable modules of your application. When creating
+that they'll become importable modules of your application. If this fails, it's
-[shortcut links](/usage/models#usage), spaCy will also try to import the model
+usually a sign that the package is not installed in the current environment. Run
-to load its meta data. If this fails, it's usually a sign that the package is
+`pip list` or `pip freeze` to check which model packages you have installed, and
-not installed in the current environment. Run `pip list` or `pip freeze` to
+install the [correct models](/models) if necessary. If you're importing a model
-check which model packages you have installed, and install the
+manually at the top of a file, make sure to use the name of the package, not the
-[correct models](/models) if necessary. If you're importing a model manually at
+shortcut link you've created.
 the top of a file, make sure to use the name of the package, not the shortcut
 link you've created.
 </Accordion>
--- a/website/docs/usage/linguistic-features.md
+++ b/website/docs/usage/linguistic-features.md
@ -2,13 +2,14 @@
 title: Linguistic Features
 next: /usage/rule-based-matching
 menu:
  - ['Tokenization', 'tokenization']
  - ['POS Tagging', 'pos-tagging']
  - ['Dependency Parse', 'dependency-parse']
  - ['Named Entities', 'named-entities']
  - ['Entity Linking', 'entity-linking']
  - ['Tokenization', 'tokenization']
  - ['Merging & Splitting', 'retokenization']
  - ['Sentence Segmentation', 'sbd']
  - ['Language data', 'language-data']
 ---
 Processing raw text intelligently is difficult: most words are rare, and it's
@ -297,8 +298,8 @@ different languages, see the
 ### Visualizing dependencies {#displacy}
 The best way to understand spaCy's dependency parser is interactively. To make
-this easier, spaCy v2.0+ comes with a visualization module. You can pass a `Doc`
+this easier, spaCy comes with a visualization module. You can pass a `Doc` or a
-or a list of `Doc` objects to displaCy and run
+list of `Doc` objects to displaCy and run
 [`displacy.serve`](/api/top-level#displacy.serve) to run the web server, or
 [`displacy.render`](/api/top-level#displacy.render) to generate the raw markup.
 If you want to know how to write rules that hook into some type of syntactic
@ -339,25 +340,6 @@ nlp = English().from_disk("/model", disable=["parser"])
 doc = nlp("I don't want parsed", disable=["parser"])
 ```
 <Infobox title="Important note: disabling pipeline components" variant="warning">
 Since spaCy v2.0 comes with better support for customizing the processing
 pipeline components, the `parser` keyword argument has been replaced with
 `disable`, which takes a list of
 [pipeline component names](/usage/processing-pipelines). This lets you disable
 both default and custom components when loading a model, or initializing a
 Language class via [`from_disk`](/api/language#from_disk).
 ```diff
 + nlp = spacy.load("en_core_web_sm", disable=["parser"])
 + doc = nlp("I don't want parsed", disable=["parser"])
 - nlp = spacy.load("en_core_web_sm", parser=False)
 - doc = nlp("I don't want parsed", parse=False)
 ```
 </Infobox>
 ## Named Entity Recognition {#named-entities}
 spaCy features an extremely fast statistical entity recognition system, that
@ -551,8 +533,8 @@ The
 [displaCy <sup>ENT</sup> visualizer](https://explosion.ai/demos/displacy-ent)
 lets you explore an entity recognition model's behavior interactively. If you're
 training a model, it's very useful to run the visualization yourself. To help
-you do that, spaCy v2.0+ comes with a visualization module. You can pass a `Doc`
+you do that, spaCy comes with a visualization module. You can pass a `Doc` or a
-or a list of `Doc` objects to displaCy and run
+list of `Doc` objects to displaCy and run
 [`displacy.serve`](/api/top-level#displacy.serve) to run the web server, or
 [`displacy.render`](/api/top-level#displacy.render) to generate the raw markup.
@ -789,8 +771,8 @@ The algorithm can be summarized as follows:
   token.
 3. Check whether we have an explicitly defined special case for this substring.
   If we do, use it.
-4. Otherwise, try to consume one prefix. If we consumed a prefix, go back to
+4. Otherwise, try to consume one prefix. If we consumed a prefix, go back to #2,
-   #2, so that the token match and special cases always get priority.
+   so that the token match and special cases always get priority.
 5. If we didn't consume a prefix, try to consume a suffix and then go back to
   #2.
 6. If we can't consume a prefix or a suffix, look for a URL match.
@ -843,7 +825,7 @@ domain. There are six things you may need to define:
   be split, overriding the infix rules. Useful for things like numbers.
 6. An optional boolean function `url_match`, which is similar to `token_match`
   except that prefixes and suffixes are removed before applying the match.
- 
+
 <Infobox title="Important note: token match in spaCy v2.2" variant="warning">
 In spaCy v2.2.2-v2.2.4, the `token_match` was equivalent to the `url_match`
@ -1470,13 +1452,8 @@ doc = nlp(text)
 print("After:", [sent.text for sent in doc.sents])
 ```
-## Rule-based matching {#rule-based-matching hidden="true"}
+## Language data {#language-data}
-<div id="rule-based-matching">
+import LanguageData101 from 'usage/101/\_language-data.md'
 <Infobox title="📖 Rule-based matching" id="rule-based-matching">
-The documentation on rule-based matching
+<LanguageData101 />
 [has moved to its own page](/usage/rule-based-matching).
 </Infobox>
 </div>
--- a/website/docs/usage/models.md
+++ b/website/docs/usage/models.md
@ -17,10 +17,10 @@ your file system.
 > #### Important note
 >
-> If you're upgrading to spaCy v1.7.x or v2.x, you need to **download the new
+> If you're upgrading to spaCy v3.x, you need to **download the new models**. If
-> models**. If you've trained statistical models that use spaCy's annotations,
+> you've trained statistical models that use spaCy's annotations, you should
-> you should **retrain your models** after updating spaCy. If you don't retrain,
+> **retrain your models** after updating spaCy. If you don't retrain, you may
-> you may suffer train/test skew, which might decrease your accuracy.
+> suffer train/test skew, which might decrease your accuracy.
 ## Quickstart {hidden="true"}
@ -74,10 +74,10 @@ import Languages from 'widgets/languages.js'
 > nlp = get_lang_class('xx')
 > ```
-As of v2.0, spaCy supports models trained on more than one language. This is
+spaCy also supports models trained on more than one language. This is especially
-especially useful for named entity recognition. The language ID used for
+useful for named entity recognition. The language ID used for multi-language or
-multi-language or language-neutral models is `xx`. The language class, a generic
+language-neutral models is `xx`. The language class, a generic subclass
-subclass containing only the base language data, can be found in
+containing only the base language data, can be found in
 [`lang/xx`](https://github.com/explosion/spaCy/tree/master/spacy/lang/xx).
 To load your model with the neutral, multi-language class, simply set
@ -134,11 +134,11 @@ $ pip install https://github.com/honnibal/pkuseg-python/archive/master.zip
 The `meta` argument of the `Chinese` language class supports the following
 following tokenizer config settings:
-| Name               | Type    | Description                                                                                          |
+| Name               | Type | Description                                                                                          |
-| ------------------ | ------- | ---------------------------------------------------------------------------------------------------- |
+| ------------------ | ---- | ---------------------------------------------------------------------------------------------------- |
-| `pkuseg_model`     | unicode | **Required:** Name of a model provided by `pkuseg` or the path to a local model directory.           |
+| `pkuseg_model`     | str  | **Required:** Name of a model provided by `pkuseg` or the path to a local model directory.           |
-| `pkuseg_user_dict` | unicode | Optional path to a file with one word per line which overrides the default `pkuseg` user dictionary. |
+| `pkuseg_user_dict` | str  | Optional path to a file with one word per line which overrides the default `pkuseg` user dictionary. |
-| `require_pkuseg`   | bool    | Overrides all `jieba` settings (optional but strongly recommended).                                  |
+| `require_pkuseg`   | bool | Overrides all `jieba` settings (optional but strongly recommended).                                  |
 ```python
 ### Examples
@ -209,10 +209,9 @@ nlp = Chinese(meta={"tokenizer": {"config": {"pkuseg_model": "/path/to/pkuseg_mo
 The Japanese language class uses
 [SudachiPy](https://github.com/WorksApplications/SudachiPy) for word
 segmentation and part-of-speech tagging. The default Japanese language class and
-the provided Japanese models use SudachiPy split mode `A`.
+the provided Japanese models use SudachiPy split mode `A`. The `meta` argument
-
+of the `Japanese` language class can be used to configure the split mode to `A`,
-The `meta` argument of the `Japanese` language class can be used to configure
+`B` or `C`.
 the split mode to `A`, `B` or `C`.
 <Infobox variant="warning">
@ -224,34 +223,31 @@ used for training the current [Japanese models](/models/ja).
 ## Installing and using models {#download}
 > #### Downloading models in spaCy < v1.7
 >
 > In older versions of spaCy, you can still use the old download commands. This
 > will download and install the models into the `spacy/data` directory.
 >
 > ```bash
 >  python -m spacy.en.download all
 >  python -m spacy.de.download all
 >  python -m spacy.en.download glove
 > ```
 >
 > The old models are also
 > [attached to the v1.6.0 release](https://github.com/explosion/spaCy/tree/v1.6.0).
 > To download and install them manually, unpack the archive, drop the contained
 > directory into `spacy/data`.
 The easiest way to download a model is via spaCy's
 [`download`](/api/cli#download) command. It takes care of finding the
 best-matching model compatible with your spaCy installation.
 > #### Important note for v3.0
 >
 > Note that as of spaCy v3.0, model shortcut links that create (potentially
 > brittle) symlinks in your spaCy installation are **deprecated**. To download
 > and load an installed model, use its full name:
 >
 > ```diff
 > - python -m spacy download en
 > + python -m spacy dowmload en_core_web_sm
 > ```
 >
 > ```diff
 > - nlp = spacy.load("en")
 > + nlp = spacy.load("en_core_web_sm")
 > ```
 ```bash
 # Download best-matching version of specific model for your spaCy installation
 python -m spacy download en_core_web_sm
-# Out-of-the-box: download best-matching default model and create shortcut link
+# Download exact model version
 python -m spacy download en
 # Download exact model version (doesn't create shortcut link)
 python -m spacy download en_core_web_sm-2.2.0 --direct
 ```
@ -269,18 +265,6 @@ nlp = spacy.load("en_core_web_sm")
 doc = nlp("This is a sentence.")
 ```
 <Infobox title="Important note" variant="warning">
 If you're downloading the models using a shortcut like `"en"`, spaCy will create
 a symlink within the `spacy/data` directory. This means that your user needs the
 **required permissions**. If you've installed spaCy to a system directory and
 don't have admin privileges, the model linking may fail. The easiest solution is
 to re-run the command as admin, set the `--user` flag or use a virtual
 environment. For more info on this, see the
 [troubleshooting guide](/usage/#symlink-privilege).
 </Infobox>
 ### Installation via pip {#download-pip}
 To download a model directly using [pip](https://pypi.python.org/pypi/pip),
@ -291,15 +275,14 @@ click on the archive link and copy it to your clipboard.
 ```bash
 # With external URL
-pip install https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-2.2.0/en_core_web_sm-2.2.0.tar.gz
+pip install https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.0.0/en_core_web_sm-3.0.0.tar.gz
 # With local file
-pip install /Users/you/en_core_web_sm-2.2.0.tar.gz
+pip install /Users/you/en_core_web_sm-3.0.0.tar.gz
 ```
 By default, this will install the model into your `site-packages` directory. You
-can then use `spacy.load()` to load it via its package name, create a
+can then use `spacy.load()` to load it via its package name or
 [shortcut link](#usage-link) to assign it a custom name, or
 [import it](#usage-import) explicitly as a module. If you need to download
 models as part of an automated process, we recommend using pip with a direct
 link, instead of relying on spaCy's [`download`](/api/cli#download) command.
@ -319,29 +302,38 @@ model data.
 ```yaml
 ### Directory structure {highlight="7"}
-└── en_core_web_md-2.2.0.tar.gz       # downloaded archive
+└── en_core_web_md-3.0.0.tar.gz       # downloaded archive
    ├── meta.json                     # model meta data
    ├── setup.py                      # setup file for pip installation
    └── en_core_web_md                # 📦 model package
        ├── __init__.py               # init for pip installation
        ├── meta.json                 # model meta data
-        └── en_core_web_md-2.2.0      # model data
+        └── en_core_web_md-3.0.0      # model data
 ```
 You can place the **model package directory** anywhere on your local file
-system. To use it with spaCy, assign it a name by creating a shortcut link for
+system.
 the data directory.
 ### Using models with spaCy {#usage}
 To load a model, use [`spacy.load`](/api/top-level#spacy.load) with the model's
-shortcut link, package name or a path to the data directory:
+package name or a path to the data directory:
 > #### Important note for v3.0
 >
 > Note that as of spaCy v3.0, model shortcut links that create (potentially
 > brittle) symlinks in your spaCy installation are **deprecated**. To load an
 > installed model, use its full name:
 >
 > ```diff
 > - nlp = spacy.load("en")
 > + nlp = spacy.load("en_core_web_sm")
 > ```
 ```python
 import spacy
 nlp = spacy.load("en_core_web_sm")           # load model package "en_core_web_sm"
 nlp = spacy.load("/path/to/en_core_web_sm")  # load package from a directory
 nlp = spacy.load("en")                       # load model with shortcut link "en"
 doc = nlp("This is a sentence.")
 ```
@ -356,55 +348,6 @@ will return the model's version.
 </Infobox>
 ### Using custom shortcut links {#usage-link}
 While previous versions of spaCy required you to maintain a data directory
 containing the models for each installation, you can now choose **how and where
 you want to keep your data**. For example, you could download all models
 manually and put them into a local directory. Whenever your spaCy projects need
 a model, you create a shortcut link to tell spaCy to load it from there. This
 means you'll never end up with duplicate data.
 The [`link`](/api/cli#link) command will create a symlink in the `spacy/data`
 directory.
 > #### Why does spaCy use symlinks?
 >
 > Symlinks were originally introduced to maintain backwards compatibility, as
 > older versions expected model data to live within `spacy/data`. However, we
 > decided to keep using them in v2.0 instead of opting for a config file.
 > There'll always be a need for assigning and saving custom model names or IDs.
 > And your system already comes with a native solution to mapping unicode
 > aliases to file paths: symbolic links.
 ```bash
 $ python -m spacy link [package name or path] [shortcut] [--force]
 ```
 The first argument is the **package name** (if the model was installed via pip),
 or a local path to the the **model package**. The second argument is the
 internal name you want to use for the model. Setting the `--force` flag will
 overwrite any existing links.
 ```bash
 ### Examples
 # set up shortcut link to load installed package as "en_default"
 python -m spacy link en_core_web_md en_default
 # set up shortcut link to load local model as "my_amazing_model"
 python -m spacy link /Users/you/model my_amazing_model
 ```
 <Infobox title="Important note" variant="warning">
 In order to create a symlink, your user needs the **required permissions**. If
 you've installed spaCy to a system directory and don't have admin privileges,
 the `spacy link` command may fail. The easiest solution is to re-run the command
 as admin, set the `--user` flag or use a virtual environment. For more info on
 this, see the [troubleshooting guide](/usage/#symlink-privilege).
 </Infobox>
 ### Importing models as modules {#usage-import}
 If you've installed a model via spaCy's downloader, or directly via pip, you can
@ -488,10 +431,9 @@ turn it into a loadable package.
 ### Loading and testing models {#models-loading}
-Downloading models directly via pip won't call spaCy's link
+Models are regular Python packages, so you can also import them as a package
-[`package`](/api/cli#link) command, which creates symlinks for model shortcuts.
+using Python's native `import` syntax, and then call the `load` method to load
-This means that you'll have to run this command separately, or use the native
+the model data and return an `nlp` object:
 `import` syntax to load the models:
 ```python
 import en_core_web_sm
--- a/website/docs/usage/processing-pipelines.md
+++ b/website/docs/usage/processing-pipelines.md
@ -295,25 +295,6 @@ nlp.rename_pipe("ner", "entityrecognizer")
 nlp.replace_pipe("tagger", my_custom_tagger)
 ```
 <Infobox title="Important note: disabling pipeline components" variant="warning">
 Since spaCy v2.0 comes with better support for customizing the processing
 pipeline components, the `parser`, `tagger` and `entity` keyword arguments have
 been replaced with `disable`, which takes a list of pipeline component names.
 This lets you disable pre-defined components when loading a model, or
 initializing a Language class via [`from_disk`](/api/language#from_disk).
 ```diff
 - nlp = spacy.load('en', tagger=False, entity=False)
 - doc = nlp("I don't want parsed", parse=False)
 + nlp = spacy.load("en", disable=["ner"])
 + nlp.remove_pipe("parser")
 + doc = nlp("I don't want parsed")
 ```
 </Infobox>
 ## Creating custom pipeline components {#custom-components}
 A component receives a `Doc` object and can modify it – for example, by using
@ -532,13 +513,13 @@ nlp = spacy.load("your_custom_model", terms=["tree kangaroo"], label="ANIMAL")
 <Infobox title="Important note" variant="warning">
-When you load a model via its shortcut or package name, like `en_core_web_sm`,
+When you load a model via its package name, like `en_core_web_sm`, spaCy will
-spaCy will import the package and then call its `load()` method. This means that
+import the package and then call its `load()` method. This means that custom
-custom code in the model's `__init__.py` will be executed, too. This is **not
+code in the model's `__init__.py` will be executed, too. This is **not the
-the case** if you're loading a model from a path containing the model data.
+case** if you're loading a model from a path containing the model data. Here,
-Here, spaCy will only read in the `meta.json`. If you want to use custom
+spaCy will only read in the `meta.json`. If you want to use custom factories
-factories with a model loaded from a path, you need to add them to
+with a model loaded from a path, you need to add them to `Language.factories`
-`Language.factories` _before_ you load the model.
+_before_ you load the model.
 </Infobox>
@ -719,8 +700,8 @@ class SimilarityModel(object):
 ## Developing plugins and wrappers {#plugins}
 We're very excited about all the new possibilities for community extensions and
-plugins in spaCy v2.0, and we can't wait to see what you build with it! To get
+plugins in spaCy, and we can't wait to see what you build with it! To get you
-you started, here are a few tips, tricks and best
+started, here are a few tips, tricks and best
 practices. [See here](/universe/?category=pipeline) for examples of other spaCy
 extensions.
--- a/website/docs/usage/projects.md
+++ b/website/docs/usage/projects.md
@ -0,0 +1,5 @@
 ---
 title: Projects
 ---
 TODO: write
--- a/website/docs/usage/saving-loading.md
+++ b/website/docs/usage/saving-loading.md
@ -13,15 +13,6 @@ import Serialization101 from 'usage/101/\_serialization.md'
 <Serialization101 />
 <Infobox title="Important note" variant="warning">
 In spaCy v2.0, the API for saving and loading has changed to only use the four
 methods listed above consistently across objects and classes. For an overview of
 the changes, see [this table](/usage/v2#incompat) and the notes on
 [migrating](/usage/v2#migrating-saving-loading).
 </Infobox>
 ### Serializing the pipeline {#pipeline}
 When serializing the pipeline, keep in mind that this will only save out the
--- a/website/docs/usage/training.md
+++ b/website/docs/usage/training.md
@ -1,6 +1,6 @@
 ---
-title: Training spaCy's Statistical Models
+title: Training Models
-next: /usage/adding-languages
+next: /usage/projects
 menu:
  - ['Basics', 'basics']
  - ['NER', 'ner']
--- a/website/docs/usage/v2.md
+++ b/website/docs/usage/v2.md
@ -253,11 +253,10 @@ have a `to_bytes()`, `from_bytes()`, `to_disk()` and `from_disk()` method that
 supports the Pickle protocol.
 The improved `spacy.load` makes loading models easier and more transparent. You
-can load a model by supplying its [shortcut link](/usage/models#usage), the name
+can load a model by supplying its shortcut link, the name of an installed
-of an installed [model package](/models) or a path. The `Language` class to
+[model package](/models) or a path. The `Language` class to initialize will be
-initialize will be determined based on the model's settings. For a blank
+determined based on the model's settings. For a blank language, you can import
-language, you can import the class directly, e.g.
+the class directly, e.g. `from spacy.lang.en import English` or use
 `from spacy.lang.en import English` or use
 [`spacy.blank()`](/api/top-level#spacy.blank).
 <Infobox>
--- a/website/docs/usage/visualizers.md
+++ b/website/docs/usage/visualizers.md
@ -7,35 +7,26 @@ menu:
  - ['Entities', 'ent']
  - ['Jupyter Notebooks', 'jupyter']
  - ['Rendering HTML', 'html']
  - ['Web app usage', 'webapp']
 ---
-As of v2.0, our popular visualizers,
+Visualizing a dependency parse or named entities in a text is not only a fun NLP
 demo – it can also be incredibly helpful in speeding up development and
 debugging your code and training process. That's why our popular visualizers,
 [displaCy](https://explosion.ai/demos/displacy) and
-[displaCy <sup>ENT</sup>](https://explosion.ai/demos/displacy-ent) are finally
+[displaCy <sup>ENT</sup>](https://explosion.ai/demos/displacy-ent) are also an
-an official part of the library. Visualizing a dependency parse or named
+official part of the core library. If you're running a
-entities in a text is not only a fun NLP demo – it can also be incredibly
+[Jupyter](https://jupyter.org) notebook, displaCy will detect this and return
-helpful in speeding up development and debugging your code and training process.
+the markup in a format [ready to be rendered and exported](#jupyter).
 If you're running a [Jupyter](https://jupyter.org) notebook, displaCy will
 detect this and return the markup in a format
 [ready to be rendered and exported](#jupyter).
 > #### What about the old visualizers?
 >
 > Our JavaScript-based visualizers
 > [`displacy.js`](https://github.com/explosion/displacy) and
 > [`displacy-ent.js`](https://github.com/explosion/displacy-ent) will still be
 > available on GitHub. If you're looking to implement web-based visualizations,
 > we generally recommend using those instead of spaCy's built-in `displacy`
 > module. It'll allow your application to perform all rendering on the client
 > and only rely on the server for the text processing. The generated markup is
 > also more compatible with modern web standards.
 The quickest way to visualize `Doc` is to use
 [`displacy.serve`](/api/top-level#displacy.serve). This will spin up a simple
 web server and let you view the result straight from your browser. displaCy can
 either take a single `Doc` or a list of `Doc` objects as its first argument.
 This lets you construct them however you like – using any model or modifications
-you like.
+you like. If you're using [Streamlit](https://streamlit.io), check out the
 [`spacy-streamlit`](https://github.com/explosion/spacy-streamlit) package that
 helps you integrate spaCy visualizations into your apps!
 ## Visualizing the dependency parse {#dep}
@ -338,7 +329,7 @@ position.
 }
 ```
-### Using displaCy in a web application {#webapp}
+## Using displaCy in a web application {#webapp}
 If you want to use the visualizers as part of a web application, for example to
 create something like our [online demo](https://explosion.ai/demos/displacy),
@ -359,40 +350,13 @@ JSON-formatted output.
 > on the client in JavaScript. displaCy.js creates the markup as DOM nodes and
 > will never insert raw HTML.
-The `parse_deps` function takes a `Doc` object and returns a dictionary in a
+<Grid cols={2}>
 format that can be rendered by displaCy.
-```python
+Alternatively, if you're using [Streamlit](https://streamlit.io), check out the
-### Example
+[`spacy-streamlit`](https://github.com/explosion/spacy-streamlit) package that
-import spacy
+helps you integrate spaCy visualizations into your apps. It includes a full
-from spacy import displacy
+embedded visualizer, as well as individual components.
-nlp = spacy.load("en_core_web_sm")
+![](../images/spacy-streamlit.png)]
-def displacy_service(text):
+</Grid>
    doc = nlp(text)
    return displacy.parse_deps(doc)
 ```
 Using a library like [Flask](http://flask.pocoo.org/) or
 [Hug](http://www.hug.rest/), you can easily turn the above code into a simple
 REST API that receives a text and returns a JSON-formatted parse. In your
 front-end, include [`displacy.js`](https://github.com/explosion/displacy) and
 initialize it with the API URL and the ID or query selector of the container to
 render the visualization in, e.g. `'#displacy'` for `<div id="displacy">`.
 ```javascript
 /// script.js
 var displacy = new displaCy('http://localhost:8080', {
  container: '#displacy',
 })
 function parse(text) {
  displacy.parse(text)
 }
 ```
 When you call `parse`, it will make a request to your API, receive the
 JSON-formatted parse and render it in your container. To create an interactive
 experience, you could trigger this function by a button and read the text from
 an `<input>` field.
--- a/website/meta/sidebars.json
+++ b/website/meta/sidebars.json
@ -8,6 +8,7 @@
                    { "text": "Installation", "url": "/usage" },
                    { "text": "Models & Languages", "url": "/usage/models" },
                    { "text": "Facts & Figures", "url": "/usage/facts-figures" },
                    { "text": "spaCy 101", "url": "/usage/spacy-101" },
                    { "text": "New in v3.0", "url": "/usage/v3" }
                ]
            },
@ -19,8 +20,8 @@
                    { "text": "Processing Pipelines", "url": "/usage/processing-pipelines" },
                    { "text": "Vectors & Similarity", "url": "/usage/vectors-similarity" },
                    { "text": "Training Models", "url": "/usage/training" },
                    { "text": "spaCy Projects", "url": "/usage/projects", "tag": "new" },
                    { "text": "Saving & Loading", "url": "/usage/saving-loading" },
                    { "text": "Adding Languages", "url": "/usage/adding-languages" },
                    { "text": "Visualizers", "url": "/usage/visualizers" }
                ]
            },
--- a/website/src/components/sidebar.js
+++ b/website/src/components/sidebar.js
@ -4,6 +4,7 @@ import classNames from 'classnames'
 import { window } from 'browser-monads'
 import Link from './link'
 import Tag from './tag'
 import Dropdown from './dropdown'
 import classes from '../styles/sidebar.module.sass'
@ -65,7 +66,7 @@ const Sidebar = ({ items, pageMenu, slug }) => {
            {items.map((section, i) => (
                <ul className={classes.section} key={i}>
                    <li className={classes.label}>{section.label}</li>
-                    {section.items.map(({ text, url, onClick, menu, isActive }, j) => {
+                    {section.items.map(({ text, url, tag, onClick, menu, isActive }, j) => {
                        const currentMenu = menu || pageMenu || []
                        const active = isActive || slug === url
                        const itemClassNames = classNames(classes.link, {
@ -82,6 +83,7 @@ const Sidebar = ({ items, pageMenu, slug }) => {
                                    hideIcon
                                >
                                    {text}
                                    {tag && <Tag spaced>{tag}</Tag>}
                                </Link>
                                {active && !!currentMenu.length && (
                                    <ul className={classes.crumbs}>
--- a/website/src/components/tag.js
+++ b/website/src/components/tag.js
@ -6,6 +6,8 @@ import { isString } from './util'
 import Icon from './icon'
 import classes from '../styles/tag.module.sass'
 const MIN_VERSION = 3
 const Tag = ({ spaced, variant, tooltip, children }) => {
    if (variant === 'new') {
        const isValid = isString(children) && !isNaN(children)
@ -13,8 +15,8 @@ const Tag = ({ spaced, variant, tooltip, children }) => {
        const tooltipText = `This feature is new and was introduced in spaCy v${version}`
        // TODO: we probably want to handle this more elegantly, but the idea is
        // that we can hide tags referring to old versions
-        // const hideTag = version.startsWith('2')
+        const major = isString(version) ? Number(version.split('.')[0]) : version
-        return (
+        return major < MIN_VERSION ? null : (
            <TagTemplate spaced={spaced} tooltip={tooltipText}>
                v{version}
            </TagTemplate>
--- a/website/src/fonts/jetbrainsmono-regular.woff
+++ b/website/src/fonts/jetbrainsmono-regular.woff
--- a/website/src/fonts/jetbrainsmono-regular.woff2
+++ b/website/src/fonts/jetbrainsmono-regular.woff2
--- a/website/src/styles/code.module.sass
+++ b/website/src/styles/code.module.sass
@ -16,7 +16,7 @@
 .code,
 .juniper-input pre,
 .juniper-output
-    font: var(--font-size-xs)/var(--line-height-lg) var(--font-code) !important
+    font: var(--font-size-code)/var(--line-height-code) var(--font-code) !important
    -webkit-font-smoothing: subpixel-antialiased
    -moz-osx-font-smoothing: auto
--- a/website/src/styles/layout.sass
+++ b/website/src/styles/layout.sass
@ -12,18 +12,20 @@
    // Fonts
    --font-primary: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Helvetica, Arial, sans-serif, 'Apple Color Emoji', 'Segoe UI Emoji', 'Segoe UI Symbol'
    --font-secondary: 'HK Grotesk', Roboto, Helvetica, Arial, sans-serif
-    --font-code: Menlo, Monaco, Consolas, 'Liberation Mono', 'Courier New', monospace
+    --font-code: "Jetbrains Mono", Menlo, Monaco, Consolas, 'Liberation Mono', 'Courier New', monospace
    // Font Sizes
    --font-size-xs: 1.1rem
    --font-size-sm: 1.3rem
    --font-size-md: 1.35rem
    --font-size-lg: 1.4rem
    --font-size-code: 1.2rem
    --line-height-xs: 1.25
    --line-height-sm: 1.375
    --line-height-md: 1.5
    --line-height-lg: 1.9
    --line-height-code: 1.8
    // Spacing
    --spacing-xs: 1rem
@ -148,6 +150,13 @@
    src: url("../fonts/hkgrotesk-bolditalic.woff2") format("woff2"), url("../fonts/hkgrotesk-bolditalic.woff") format("woff")
    font-display: swap
@font-face
    font-family: "JetBrains Mono"
    font-style: normal
    font-weight: 500
    font-display: fallback
    src: url("../fonts/jetbrainsmono-regular.woff") format("woff"), url("../fonts/jetbrainsmono-regular.woff2") format("woff2")
 /* Reset */
 *, *:before, *:after
--- a/website/src/templates/docs.js
+++ b/website/src/templates/docs.js
@ -154,6 +154,7 @@ const query = graphql`
                        items {
                            text
                            url
                            tag
                        }
                    }
                }
--- a/website/src/widgets/changelog.js
+++ b/website/src/widgets/changelog.js
@ -1,4 +1,4 @@
-import React, { useState, useEffect } from 'react'
+import React, { useState, useEffect, Fragment } from 'react'
 import { window } from 'browser-monads'
 import Link from '../components/link'
@ -101,12 +101,12 @@ const Changelog = () => {
            </p>
            <p>
-                {prereleases.map(({ title, date, url, tag }) => (
+                {prereleases.map(({ title, date, url, tag }, i) => (
-                    <>
+                    <Fragment key={i}>
                        <Link to={url} hideIcon data-tooltip={`${date}: ${title}`}>
                            <InlineCode>{tag}</InlineCode>
                        </Link>{' '}
-                    </>
+                    </Fragment>
                ))}
            </p>
        </>
--- a/website/src/widgets/quickstart-install.js
+++ b/website/src/widgets/quickstart-install.js
@ -23,11 +23,6 @@ const DATA = [
            { id: 'source', title: 'from source' },
        ],
    },
    {
        id: 'python',
        title: 'Python version',
        options: [{ id: '2', title: '2.x' }, { id: '3', title: '3.x', checked: true }],
    },
    {
        id: 'config',
        title: 'Configuration',
@ -70,15 +65,7 @@ const QuickstartInstall = ({ id, title }) => (
            ]
            return (
                <Quickstart data={data} title={title} id={id}>
-                    <QS config="venv" python="2">
+                    <QS config="venv">python -m venv .env</QS>
                        python -m pip install -U virtualenv
                    </QS>
                    <QS config="venv" python="2">
                        virtualenv .env
                    </QS>
                    <QS config="venv" python="3">
                        python -m venv .env
                    </QS>
                    <QS config="venv" os="mac">
                        source .env/bin/activate
                    </QS>