Use consistent spelling

2025-07-04 20:03:13 +03:00 · 2019-10-02 10:37:39 +02:00 · 2019-10-02 10:37:39 +02:00 · b6670bf0c2
commit b6670bf0c2
parent 208629615d
27 changed files with 69 additions and 69 deletions
--- a/README.md
+++ b/README.md
@ -3,9 +3,9 @@
 # spaCy: Industrial-strength NLP

 spaCy is a library for advanced Natural Language Processing in Python and
-[pre-trained statistical models](https://spacy.io/models) and word vectors, and
 Cython. It's built on the very latest research, and was designed from day one to
 be used in real products. spaCy comes with
+[pretrained statistical models](https://spacy.io/models) and word vectors, and
 currently supports tokenization for **50+ languages**. It features
 state-of-the-art speed, convolutional **neural network models** for tagging,
 parsing and **named entity recognition** and easy **deep learning** integration.
@ -73,7 +73,7 @@ it.
 -   Non-destructive **tokenization**
 -   **Named entity** recognition
 -   Support for **50+ languages**
-   Pre-trained [statistical models](https://spacy.io/models) and word vectors
+-   pretrained [statistical models](https://spacy.io/models) and word vectors
 -   State-of-the-art speed
 -   Easy **deep learning** integration
 -   Part-of-speech tagging
--- a/bin/ud/ud_train.py
+++ b/bin/ud/ud_train.py
@ -376,7 +376,7 @@ def initialize_pipeline(nlp, docs, golds, config, device):


 def _load_pretrained_tok2vec(nlp, loc):
-    """Load pre-trained weights for the 'token-to-vector' part of the component
+    """Load pretrained weights for the 'token-to-vector' part of the component
    models, which is typically a CNN. See 'spacy pretrain'. Experimental.
    """
    with Path(loc).open("rb") as file_:
@ -472,7 +472,7 @@ class TreebankPaths(object):
    gpu_device=("Use GPU", "option", "g", int),
    use_oracle_segments=("Use oracle segments", "flag", "G", int),
    vectors_dir=(
-        "Path to directory with pre-trained vectors, named e.g. en/",
+        "Path to directory with pretrained vectors, named e.g. en/",
        "option",
        "v",
        Path,
--- a/bin/wiki_entity_linking/kb_creator.py
+++ b/bin/wiki_entity_linking/kb_creator.py
@ -38,10 +38,10 @@ def create_kb(
    # check the length of the nlp vectors
    if "vectors" in nlp.meta and nlp.vocab.vectors.size:
        input_dim = nlp.vocab.vectors_length
-        logger.info("Loaded pre-trained vectors of size %s" % input_dim)
+        logger.info("Loaded pretrained vectors of size %s" % input_dim)
    else:
        raise ValueError(
-            "The `nlp` object should have access to pre-trained word vectors, "
+            "The `nlp` object should have access to pretrained word vectors, "
            " cf. https://spacy.io/usage/models#languages."
        )

--- a/bin/wiki_entity_linking/wikidata_pretrain_kb.py
+++ b/bin/wiki_entity_linking/wikidata_pretrain_kb.py
@ -83,7 +83,7 @@ def main(
    # check the length of the nlp vectors
    if "vectors" not in nlp.meta or not nlp.vocab.vectors.size:
        raise ValueError(
-            "The `nlp` object should have access to pre-trained word vectors, "
+            "The `nlp` object should have access to pretrained word vectors, "
            " cf. https://spacy.io/usage/models#languages."
        )

--- a/bin/wiki_entity_linking/wikidata_train_entity_linker.py
+++ b/bin/wiki_entity_linking/wikidata_train_entity_linker.py
@ -65,7 +65,7 @@ def main(

    # check that there is a NER component in the pipeline
    if "ner" not in nlp.pipe_names:
-        raise ValueError("The `nlp` object should have a pre-trained `ner` component.")
+        raise ValueError("The `nlp` object should have a pretrained `ner` component.")

    # STEP 2: create a training dataset from WP
    logger.info("STEP 2: reading training dataset from {}".format(training_path))
--- a/examples/training/pretrain_kb.py
+++ b/examples/training/pretrain_kb.py
@ -27,7 +27,7 @@ from bin.wiki_entity_linking.train_descriptions import EntityEncoder
 # Q7381115 (Russ Cochran): publisher
 ENTITIES = {"Q2146908": ("American golfer", 342), "Q7381115": ("publisher", 17)}

-INPUT_DIM = 300  # dimension of pre-trained input vectors
+INPUT_DIM = 300  # dimension of pretrained input vectors
 DESC_WIDTH = 64  # dimension of output entity vectors


@ -39,7 +39,7 @@ DESC_WIDTH = 64  # dimension of output entity vectors
 )
 def main(vocab_path=None, model=None, output_dir=None, n_iter=50):
    """Load the model, create the KB and pretrain the entity encodings.
-    Either an nlp model or a vocab is needed to provide access to pre-trained word embeddings.
+    Either an nlp model or a vocab is needed to provide access to pretrained word embeddings.
    If an output_dir is provided, the KB will be stored there in a file 'kb'.
    When providing an nlp model, the updated vocab will also be written to a directory in the output_dir."""
    if model is None and vocab_path is None:
--- a/examples/training/pretrain_textcat.py
+++ b/examples/training/pretrain_textcat.py
@ -1,9 +1,9 @@
 """This script is experimental.

 Try pre-training the CNN component of the text categorizer using a cheap
-language modelling-like objective. Specifically, we load pre-trained vectors
+language modelling-like objective. Specifically, we load pretrained vectors
 (from something like word2vec, GloVe, FastText etc), and use the CNN to
-predict the tokens' pre-trained vectors. This isn't as easy as it sounds:
+predict the tokens' pretrained vectors. This isn't as easy as it sounds:
 we're not merely doing compression here, because heavy dropout is applied,
 including over the input words. This means the model must often (50% of the time)
 use the context in order to predict the word.
--- a/examples/training/train_new_entity_type.py
+++ b/examples/training/train_new_entity_type.py
@ -2,7 +2,7 @@
 # coding: utf8
 """Example of training an additional entity type

-This script shows how to add a new entity type to an existing pre-trained NER
+This script shows how to add a new entity type to an existing pretrained NER
 model. To keep the example short and simple, only four sentences are provided
 as examples. In practice, you'll need many more — a few hundred would be a
 good start. You will also likely need to mix in examples of other entity
--- a/spacy/cli/pretrain.py
+++ b/spacy/cli/pretrain.py
@ -96,9 +96,9 @@ def pretrain(
    """
    Pre-train the 'token-to-vector' (tok2vec) layer of pipeline components,
    using an approximate language-modelling objective. Specifically, we load
-    pre-trained vectors, and train a component like a CNN, BiLSTM, etc to predict
-    vectors which match the pre-trained ones. The weights are saved to a directory
-    after each epoch. You can then pass a path to one of these pre-trained weights
+    pretrained vectors, and train a component like a CNN, BiLSTM, etc to predict
+    vectors which match the pretrained ones. The weights are saved to a directory
+    after each epoch. You can then pass a path to one of these pretrained weights
    files to the 'spacy train' command.

    This technique may be especially helpful if you have little labelled data.
@ -156,7 +156,7 @@ def pretrain(
            subword_features=True,  # Set to False for Chinese etc
        ),
    )
-    # Load in pre-trained weights
+    # Load in pretrained weights
    if init_tok2vec is not None:
        components = _load_pretrained_tok2vec(nlp, init_tok2vec)
        msg.text("Loaded pretrained tok2vec for: {}".format(components))
--- a/spacy/cli/train.py
+++ b/spacy/cli/train.py
@ -241,7 +241,7 @@ def train(

    nlp._optimizer = None

-    # Load in pre-trained weights
+    # Load in pretrained weights
    if init_tok2vec is not None:
        components = _load_pretrained_tok2vec(nlp, init_tok2vec)
        msg.text("Loaded pretrained tok2vec for: {}".format(components))
@ -529,7 +529,7 @@ def _load_vectors(nlp, vectors):


 def _load_pretrained_tok2vec(nlp, loc):
-    """Load pre-trained weights for the 'token-to-vector' part of the component
+    """Load pretrained weights for the 'token-to-vector' part of the component
    models, which is typically a CNN. See 'spacy pretrain'. Experimental.
    """
    with loc.open("rb") as file_:
--- a/spacy/errors.py
+++ b/spacy/errors.py
@ -356,7 +356,7 @@ class Errors(object):
    E113 = ("The newly split token can only have one root (head = 0).")
    E114 = ("The newly split token needs to have a root (head = 0).")
    E115 = ("All subtokens must have associated heads.")
-    E116 = ("Cannot currently add labels to pre-trained text classifier. Add "
+    E116 = ("Cannot currently add labels to pretrained text classifier. Add "
            "labels before training begins. This functionality was available "
            "in previous versions, but had significant bugs that led to poor "
            "performance.")
@ -482,7 +482,7 @@ class Errors(object):
            "Current DocBin: {current}\nOther DocBin: {other}")
    E167 = ("Unknown morphological feature: '{feat}' ({feat_id}). This can "
            "happen if the tagger was trained with a different set of "
-            "morphological features. If you're using a pre-trained model, make "
+            "morphological features. If you're using a pretrained model, make "
            "sure that your models are up to date:\npython -m spacy validate")
    E168 = ("Unknown field: {field}")
    E169 = ("Can't find module: {module}")
@ -499,13 +499,13 @@ class Errors(object):

@add_codes
 class TempErrors(object):
-    T003 = ("Resizing pre-trained Tagger models is not currently supported.")
+    T003 = ("Resizing pretrained Tagger models is not currently supported.")
    T004 = ("Currently parser depth is hard-coded to 1. Received: {value}.")
    T007 = ("Can't yet set {attr} from Span. Vote for this feature on the "
            "issue tracker: http://github.com/explosion/spaCy/issues")
    T008 = ("Bad configuration of Tagger. This is probably a bug within "
            "spaCy. We changed the name of an internal attribute for loading "
-            "pre-trained vectors, and the class has been passed the old name "
+            "pretrained vectors, and the class has been passed the old name "
            "(pretrained_dims) but not the new name (pretrained_vectors).")


--- a/spacy/language.py
+++ b/spacy/language.py
@ -521,7 +521,7 @@ class Language(object):
        """Make a "rehearsal" update to the models in the pipeline, to prevent
        forgetting. Rehearsal updates run an initial copy of the model over some
        data, and update the model so its current predictions are more like the
-        initial ones. This is useful for keeping a pre-trained model on-track,
+        initial ones. This is useful for keeping a pretrained model on-track,
        even if you're updating it with a smaller set of examples.

        docs (iterable): A batch of `Doc` objects.
@ -627,7 +627,7 @@ class Language(object):
        return self._optimizer

    def resume_training(self, sgd=None, **cfg):
-        """Continue training a pre-trained model.
+        """Continue training a pretrained model.

        Create and return an optimizer, and initialize "rehearsal" for any pipeline
        component that has a .rehearse() method. Rehearsal is used to prevent
--- a/spacy/pipeline/pipes.pyx
+++ b/spacy/pipeline/pipes.pyx
@ -125,7 +125,7 @@ class Pipe(object):
    def add_label(self, label):
        """Add an output label, to be predicted by the model.

-        It's possible to extend pre-trained models with new labels,
+        It's possible to extend pretrained models with new labels,
        but care should be taken to avoid the "catastrophic forgetting"
        problem.
        """
--- a/website/docs/api/cli.md
+++ b/website/docs/api/cli.md
@ -439,10 +439,10 @@ $ token_vector_width=256 learn_rate=0.0001 spacy train [...]
 ## Pretrain {#pretrain new="2.1" tag="experimental"}

 Pre-train the "token to vector" (`tok2vec`) layer of pipeline components, using
-an approximate language-modeling objective. Specifically, we load pre-trained
+an approximate language-modeling objective. Specifically, we load pretrained
 vectors, and train a component like a CNN, BiLSTM, etc to predict vectors which
-match the pre-trained ones. The weights are saved to a directory after each
-epoch. You can then pass a path to one of these pre-trained weights files to the
+match the pretrained ones. The weights are saved to a directory after each
+epoch. You can then pass a path to one of these pretrained weights files to the
 `spacy train` command.

 This technique may be especially helpful if you have little labelled data.
@ -476,7 +476,7 @@ $ python -m spacy pretrain [texts_loc] [vectors_model] [output_dir]
 | `--n-save-every`, `-se`                               | option     | Save model every X batches.                                                                                                                                                     |
 | `--init-tok2vec`, `-t2v` <Tag variant="new">2.1</Tag> | option     | Path to pretrained weights for the token-to-vector parts of the models. See `spacy pretrain`. Experimental.                                                                     |
 | `--epoch-start`, `-es` <Tag variant="new">2.1.5</Tag> | option     | The epoch to start counting at. Only relevant when using `--init-tok2vec` and the given weight file has been renamed. Prevents unintended overwriting of existing weight files. |
-| **CREATES**                                           | weights    | The pre-trained weights that can be used to initialize `spacy train`.                                                                                                           |
+| **CREATES**                                           | weights    | The pretrained weights that can be used to initialize `spacy train`.                                                                                                           |

 ### JSONL format for raw text {#pretrain-jsonl}

--- a/website/docs/api/kb.md
+++ b/website/docs/api/kb.md
@ -6,11 +6,11 @@ source: spacy/kb.pyx
 new: 2.2
 ---

-The `KnowledgeBase` object provides a method to generate [`Candidate`](/api/kb/#candidate_init) 
-objects, which are plausible external identifiers given a certain textual mention. 
+The `KnowledgeBase` object provides a method to generate [`Candidate`](/api/kb/#candidate_init)
+objects, which are plausible external identifiers given a certain textual mention.
 Each such `Candidate` holds information from the relevant KB entities,
-such as its frequency in text and possible aliases. 
-Each entity in the knowledge base also has a pre-trained entity vector of a fixed size.
+such as its frequency in text and possible aliases.
+Each entity in the knowledge base also has a pretrained entity vector of a fixed size.

 ## KnowledgeBase.\_\_init\_\_ {#init tag="method"}

@ -26,9 +26,9 @@ Create the knowledge base.

 | Name                    | Type             | Description                               |
 | ----------------------- | ---------------- | ----------------------------------------- |
-| `vocab`                 | `Vocab`          | A `Vocab` object.                         |  
-| `entity_vector_length`  | int              | Length of the fixed-size entity vectors.  |   
-| **RETURNS**             | `KnowledgeBase`  | The newly constructed object.             |  
+| `vocab`                 | `Vocab`          | A `Vocab` object.                         |
+| `entity_vector_length`  | int              | Length of the fixed-size entity vectors.  |
+| **RETURNS**             | `KnowledgeBase`  | The newly constructed object.             |


 ## KnowledgeBase.entity_vector_length {#entity_vector_length tag="property"}
@ -41,7 +41,7 @@ The length of the fixed-size entity vectors in the knowledge base.

 ## KnowledgeBase.add_entity {#add_entity tag="method"}

-Add an entity to the knowledge base, specifying its corpus frequency 
+Add an entity to the knowledge base, specifying its corpus frequency
 and entity vector, which should be of length [`entity_vector_length`](/api/kb#entity_vector_length).

 > #### Example
@ -55,11 +55,11 @@ and entity vector, which should be of length [`entity_vector_length`](/api/kb#en
 | --------------- | ------------- | ------------------------------------------------- |
 | `entity`        | unicode       | The unique entity identifier                      |
 | `freq`          | float         | The frequency of the entity in a typical corpus   |
-| `entity_vector` | vector        | The pre-trained vector of the entity              |
+| `entity_vector` | vector        | The pretrained vector of the entity              |

 ## KnowledgeBase.set_entities {#set_entities tag="method"}

-Define the full list of entities in the knowledge base, specifying the corpus frequency 
+Define the full list of entities in the knowledge base, specifying the corpus frequency
 and entity vector for each entity.

 > #### Example
@ -76,9 +76,9 @@ and entity vector for each entity.

 ## KnowledgeBase.add_alias {#add_alias tag="method"}

-Add an alias or mention to the knowledge base, specifying its potential KB identifiers 
+Add an alias or mention to the knowledge base, specifying its potential KB identifiers
 and their prior probabilities. The entity identifiers should refer to entities previously
-added with [`add_entity`](/api/kb#add_entity) or [`set_entities`](/api/kb#set_entities). 
+added with [`add_entity`](/api/kb#add_entity) or [`set_entities`](/api/kb#set_entities).
 The sum of the prior probabilities should not exceed 1.

 > #### Example
@ -151,8 +151,8 @@ Get a list of all aliases in the knowledge base.

 ## KnowledgeBase.get_candidates {#get_candidates tag="method"}

-Given a certain textual mention as input, retrieve a list of candidate entities 
-of type [`Candidate`](/api/kb/#candidate_init).  
+Given a certain textual mention as input, retrieve a list of candidate entities
+of type [`Candidate`](/api/kb/#candidate_init).

 > #### Example
 >
@ -167,7 +167,7 @@ of type [`Candidate`](/api/kb/#candidate_init).

 ## KnowledgeBase.get_vector {#get_vector tag="method"}

-Given a certain entity ID, retrieve its pre-trained entity vector.
+Given a certain entity ID, retrieve its pretrained entity vector.

 > #### Example
 >
@ -182,7 +182,7 @@ Given a certain entity ID, retrieve its pre-trained entity vector.

 ## KnowledgeBase.get_prior_prob {#get_prior_prob tag="method"}

-Given a certain entity ID and a certain textual mention, retrieve 
+Given a certain entity ID and a certain textual mention, retrieve
 the prior probability of the fact that the mention links to the entity ID.

 > #### Example
@ -213,7 +213,7 @@ Save the current state of the knowledge base to a directory.

 ## KnowledgeBase.load_bulk {#load_bulk tag="method"}

-Restore the state of the knowledge base from a given directory. Note that the [`Vocab`](/api/vocab) 
+Restore the state of the knowledge base from a given directory. Note that the [`Vocab`](/api/vocab)
 should also be the same as the one used to create the KB.

 > #### Example
@ -265,4 +265,4 @@ of a `KnowledgeBase`.
 | `alias_`               | unicode      | The alias or textual mention                                       |
 | `prior_prob`           | long         | The prior probability of the `alias` referring to the `entity`     |
 | `entity_freq`          | long         | The frequency of the entity in a typical corpus                    |
-| `entity_vector`        | vector       | The pre-trained vector of the entity                               |
+| `entity_vector`        | vector       | The pretrained vector of the entity                               |
--- a/website/docs/usage/adding-languages.md
+++ b/website/docs/usage/adding-languages.md
@ -440,7 +440,7 @@ package exposes the data files via language-specific
 constructing the `Vocab` and [`Lookups`](/api/lookups). This allows easier
 access to the data, serialization with the models and file compression on disk
 (so your spaCy installation is smaller). If you want to use the lookup tables
-without a pre-trained model, you have to explicitly install spaCy with lookups
+without a pretrained model, you have to explicitly install spaCy with lookups
 via `pip install spacy[lookups]` or by installing
 [`spacy-lookups-data`](https://github.com/explosion/spacy-lookups-data) in the
 same environment as spaCy.
--- a/website/docs/usage/examples.md
+++ b/website/docs/usage/examples.md
@ -93,7 +93,7 @@ https://github.com/explosion/spaCy/tree/master/examples/pipeline/multi_processin
 ### Training spaCy's Named Entity Recognizer {#training-ner}

 This example shows how to update spaCy's entity recognizer with your own
-examples, starting off with an existing, pre-trained model, or from scratch
+examples, starting off with an existing, pretrained model, or from scratch
 using a blank `Language` class.

 ```python
@ -102,7 +102,7 @@ https://github.com/explosion/spaCy/tree/master/examples/training/train_ner.py

 ### Training an additional entity type {#new-entity-type}

-This script shows how to add a new entity type to an existing pre-trained NER
+This script shows how to add a new entity type to an existing pretrained NER
 model. To keep the example short and simple, only four sentences are provided as
 examples. In practice, you'll need many more — a few hundred would be a good
 start.
@ -114,7 +114,7 @@ https://github.com/explosion/spaCy/tree/master/examples/training/train_new_entit
 ### Training spaCy's Dependency Parser {#parser}

 This example shows how to update spaCy's dependency parser, starting off with an
-existing, pre-trained model, or from scratch using a blank `Language` class.
+existing, pretrained model, or from scratch using a blank `Language` class.

 ```python
 https://github.com/explosion/spaCy/tree/master/examples/training/train_parser.py
--- a/website/docs/usage/facts-figures.md
+++ b/website/docs/usage/facts-figures.md
@ -137,7 +137,7 @@ pre-processing.

 ### Model comparison {#spacy-models}

-In this section, we provide benchmark accuracies for the pre-trained model
+In this section, we provide benchmark accuracies for the pretrained model
 pipelines we distribute with spaCy. Evaluations are conducted end-to-end from
 raw text, with no "gold standard" pre-processing, over text from a mix of genres
 where possible.
--- a/website/docs/usage/index.md
+++ b/website/docs/usage/index.md
@ -56,7 +56,7 @@ run `pip install spacy[lookups]` or install
 [`spacy-lookups-data`](https://github.com/explosion/spacy-lookups-data)
 separately. The lookups package is needed to create blank models with
 lemmatization data, and to lemmatize in languages that don't yet come with
-pre-trained models and aren't powered by third-party libraries.
+pretrained models and aren't powered by third-party libraries.

 </Infobox>

--- a/website/docs/usage/linguistic-features.md
+++ b/website/docs/usage/linguistic-features.md
@ -508,7 +508,7 @@ responsibility for ensuring that the data is left in a consistent state.

 <Infobox title="Annotation scheme">

-For details on the entity types available in spaCy's pre-trained models, see the
+For details on the entity types available in spaCy's pretrained models, see the
 [NER annotation scheme](/api/annotation#named-entities).

 </Infobox>
@ -998,7 +998,7 @@ can sometimes tokenize things differently – for example, `"I'm"` →
 In situations like that, you often want to align the tokenization so that you
 can merge annotations from different sources together, or take vectors predicted
 by a
-[pre-trained BERT model](https://github.com/huggingface/pytorch-transformers)
+[pretrained BERT model](https://github.com/huggingface/pytorch-transformers)
 and apply them to spaCy tokens. spaCy's [`gold.align`](/api/goldparse#align)
 helper returns a `(cost, a2b, b2a, a2b_multi, b2a_multi)` tuple describing the
 number of misaligned tokens, the one-to-one mappings of token indices in both
--- a/website/docs/usage/processing-pipelines.md
+++ b/website/docs/usage/processing-pipelines.md
@ -860,7 +860,7 @@ def custom_ner_wrapper(doc):

 The `custom_ner_wrapper` can then be added to the pipeline of a blank model
 using [`nlp.add_pipe`](/api/language#add_pipe). You can also replace the
-existing entity recognizer of a pre-trained model with
+existing entity recognizer of a pretrained model with
 [`nlp.replace_pipe`](/api/language#replace_pipe).

 Here's another example of a custom model, `your_custom_model`, that takes a list
--- a/website/docs/usage/rule-based-matching.md
+++ b/website/docs/usage/rule-based-matching.md
@ -1078,7 +1078,7 @@ order to implement more abstract logic.

 ### Example: Expanding named entities {#models-rules-ner}

-When using the a pre-trained
+When using the a pretrained
 [named entity recognition](/usage/linguistic-features/#named-entities) model to
 extract information from your texts, you may find that the predicted span only
 includes parts of the entity you're looking for. Sometimes, this happens if
--- a/website/docs/usage/training.md
+++ b/website/docs/usage/training.md
@ -321,7 +321,7 @@ the `drop` keyword argument. See the [`Language`](/api/language) and
 ## Training the named entity recognizer {#ner}

 All [spaCy models](/models) support online learning, so you can update a
-pre-trained model with new examples. You'll usually need to provide many
+pretrained model with new examples. You'll usually need to provide many
 **examples** to meaningfully improve the system — a few hundred is a good start,
 although more is better.

@ -347,7 +347,7 @@ your data** to find a solution that works best for you.
 ### Updating the Named Entity Recognizer {#example-train-ner}

 This example shows how to update spaCy's entity recognizer with your own
-examples, starting off with an existing, pre-trained model, or from scratch
+examples, starting off with an existing, pretrained model, or from scratch
 using a blank `Language` class. To do this, you'll need **example texts** and
 the **character offsets** and **labels** of each entity contained in the texts.

@ -376,7 +376,7 @@ https://github.com/explosion/spaCy/tree/master/examples/training/train_ner.py
 ### Training an additional entity type {#example-new-entity-type}

 This script shows how to add a new entity type `ANIMAL` to an existing
-pre-trained NER model, or an empty `Language` class. To keep the example short
+pretrained NER model, or an empty `Language` class. To keep the example short
 and simple, only a few sentences are provided as examples. In practice, you'll
 need many more — a few hundred would be a good start. You will also likely need
 to mix in examples of other entity types, which might be obtained by running the
@ -440,7 +440,7 @@ https://github.com/explosion/spaCy/tree/master/examples/training/train_parser.py
   training the parser.
 2. **Add the dependency labels** to the parser using the
   [`add_label`](/api/dependencyparser#add_label) method. If you're starting off
-   with a pre-trained spaCy model, this is usually not necessary – but it
+   with a pretrained spaCy model, this is usually not necessary – but it
   doesn't hurt either, just to be safe.
 3. **Shuffle and loop over** the examples. For each example, **update the
   model** by calling [`nlp.update`](/api/language#update), which steps through
@ -624,7 +624,7 @@ https://github.com/explosion/spaCy/tree/master/examples/training/pretrain_kb.py
   a pre-defined [`vocab`](/api/vocab) object.
 2. **Pretrain the entity embeddings** by running the descriptions of the
   entities through a simple encoder-decoder network. The current implementation
-   requires the `nlp` model to have access to pre-trained word embeddings, but a
+   requires the `nlp` model to have access to pretrained word embeddings, but a
   custom implementation of this encoding step can also be used.
 3. **Construct the KB** by defining all entities with their pretrained vectors,
   and all aliases with their prior probabilities.
--- a/website/docs/usage/v2-2.md
+++ b/website/docs/usage/v2-2.md
@ -324,9 +324,9 @@ check if all of your models are up to date, you can run the

 - The lemmatization tables have been moved to their own package,
  [`spacy-lookups-data`](https://github.com/explosion/spacy-lookups-data), which
-  is not installed by default. If you're using pre-trained models, **nothing
+  is not installed by default. If you're using pretrained models, **nothing
  changes**, because the tables are now included in the model packages. If you
-  want to use the lemmatizer for other languages that don't yet have pre-trained
+  want to use the lemmatizer for other languages that don't yet have pretrained
  models (e.g. Turkish or Croatian) or start off with a blank model that
  contains lookup data (e.g. `spacy.blank("en")`), you'll need to **explicitly
  install spaCy plus data** via `pip install spacy[lookups]`.
--- a/website/meta/universe.json
+++ b/website/meta/universe.json
@ -1677,7 +1677,7 @@
        {
            "id": "spacy-pytorch-transformers",
            "title": "spacy-pytorch-transformers",
-            "slogan": "spaCy pipelines for pre-trained BERT, XLNet and GPT-2",
+            "slogan": "spaCy pipelines for pretrained BERT, XLNet and GPT-2",
            "description": "This package provides spaCy model pipelines that wrap [Hugging Face's `pytorch-transformers`](https://github.com/huggingface/pytorch-transformers) package, so you can use them in spaCy. The result is convenient access to state-of-the-art transformer architectures, such as BERT, GPT-2, XLNet, etc.",
            "github": "explosion/spacy-pytorch-transformers",
            "url": "https://explosion.ai/blog/spacy-pytorch-transformers",
@ -1855,7 +1855,7 @@
                {
                    "id": "models",
                    "title": "Models",
-                    "description": "Third-party pre-trained models for different languages and domains"
+                    "description": "Third-party pretrained models for different languages and domains"
                }
            ]
        },
--- a/website/src/templates/models.js
+++ b/website/src/templates/models.js
@ -345,7 +345,7 @@ const Models = ({ pageContext, repo, children }) => {

    return (
        <>
-            <Title title={title} teaser={`Available pre-trained statistical models for ${title}`} />
+            <Title title={title} teaser={`Available pretrained statistical models for ${title}`} />
            <StaticQuery
                query={query}
                render={({ site }) =>
--- a/website/src/widgets/landing.js
+++ b/website/src/widgets/landing.js
@ -126,7 +126,7 @@ const Landing = ({ data }) => {
                            {counts.modelLangs} languages
                        </Li>
                        <Li>
-                            Pre-trained <strong>word vectors</strong>
+                            pretrained <strong>word vectors</strong>
                        </Li>
                        <Li>State-of-the-art speed</Li>
                        <Li>