diff --git a/spacy/cli/converters/conllu2json.py b/spacy/cli/converters/conllu2json.py
index 0b2920802..1ece755b8 100644
--- a/spacy/cli/converters/conllu2json.py
+++ b/spacy/cli/converters/conllu2json.py
@@ -111,8 +111,8 @@ def get_entities(lines, tag_pattern, ner_map=None):
     final entity type with `ner_map` if mapping present. Entity tag is 'O' if
     the pattern is not matched.
 
-    lines (unicode): CONLL-U lines for one sentences
-    tag_pattern (unicode): Regex pattern for entity tag
+    lines (str): CONLL-U lines for one sentences
+    tag_pattern (str): Regex pattern for entity tag
     ner_map (dict): Map old NER tag names to new ones, '' maps to O.
     RETURNS (list): List of BILUO entity tags
     """
@@ -187,8 +187,8 @@ def example_from_conllu_sentence(
     """Create an Example from the lines for one CoNLL-U sentence, merging
     subtokens and appending morphology to tags if required.
 
-    lines (unicode): The non-comment lines for a CoNLL-U sentence
-    ner_tag_pattern (unicode): The regex pattern for matching NER in MISC col
+    lines (str): The non-comment lines for a CoNLL-U sentence
+    ner_tag_pattern (str): The regex pattern for matching NER in MISC col
     RETURNS (Example): An example containing the annotation
     """
     # create a Doc with each subtoken as its own token
diff --git a/spacy/displacy/__init__.py b/spacy/displacy/__init__.py
index 3f84dabce..2c377a043 100644
--- a/spacy/displacy/__init__.py
+++ b/spacy/displacy/__init__.py
@@ -22,13 +22,13 @@ def render(
     """Render displaCy visualisation.
 
     docs (list or Doc): Document(s) to visualise.
-    style (unicode): Visualisation style, 'dep' or 'ent'.
+    style (str): Visualisation style, 'dep' or 'ent'.
     page (bool): Render markup as full HTML page.
     minify (bool): Minify HTML markup.
     jupyter (bool): Override Jupyter auto-detection.
     options (dict): Visualiser-specific options, e.g. colors.
     manual (bool): Don't parse `Doc` and instead expect a dict/list of dicts.
-    RETURNS (unicode): Rendered HTML markup.
+    RETURNS (str): Rendered HTML markup.
 
     DOCS: https://spacy.io/api/top-level#displacy.render
     USAGE: https://spacy.io/usage/visualizers
@@ -73,13 +73,13 @@ def serve(
     """Serve displaCy visualisation.
 
     docs (list or Doc): Document(s) to visualise.
-    style (unicode): Visualisation style, 'dep' or 'ent'.
+    style (str): Visualisation style, 'dep' or 'ent'.
     page (bool): Render markup as full HTML page.
     minify (bool): Minify HTML markup.
     options (dict): Visualiser-specific options, e.g. colors.
     manual (bool): Don't parse `Doc` and instead expect a dict/list of dicts.
     port (int): Port to serve visualisation.
-    host (unicode): Host to serve visualisation.
+    host (str): Host to serve visualisation.
 
     DOCS: https://spacy.io/api/top-level#displacy.serve
     USAGE: https://spacy.io/usage/visualizers
diff --git a/spacy/displacy/render.py b/spacy/displacy/render.py
index 0d4cdb77f..d3572ce78 100644
--- a/spacy/displacy/render.py
+++ b/spacy/displacy/render.py
@@ -47,7 +47,7 @@ class DependencyRenderer(object):
         parsed (list): Dependency parses to render.
         page (bool): Render parses wrapped as full HTML page.
         minify (bool): Minify HTML markup.
-        RETURNS (unicode): Rendered SVG or HTML markup.
+        RETURNS (str): Rendered SVG or HTML markup.
         """
         # Create a random ID prefix to make sure parses don't receive the
         # same ID, even if they're identical
@@ -78,7 +78,7 @@ class DependencyRenderer(object):
         render_id (int): Unique ID, typically index of document.
         words (list): Individual words and their tags.
         arcs (list): Individual arcs and their start, end, direction and label.
-        RETURNS (unicode): Rendered SVG markup.
+        RETURNS (str): Rendered SVG markup.
         """
         self.levels = self.get_levels(arcs)
         self.highest_level = len(self.levels)
@@ -112,10 +112,10 @@ class DependencyRenderer(object):
     ):
         """Render individual word.
 
-        text (unicode): Word text.
-        tag (unicode): Part-of-speech tag.
+        text (str): Word text.
+        tag (str): Part-of-speech tag.
         i (int): Unique ID, typically word index.
-        RETURNS (unicode): Rendered SVG markup.
+        RETURNS (str): Rendered SVG markup.
         """
         y = self.offset_y + self.word_spacing
         x = self.offset_x + i * self.distance
@@ -131,12 +131,12 @@ class DependencyRenderer(object):
     def render_arrow(self, label, start, end, direction, i):
         """Render individual arrow.
 
-        label (unicode): Dependency label.
+        label (str): Dependency label.
         start (int): Index of start word.
         end (int): Index of end word.
-        direction (unicode): Arrow direction, 'left' or 'right'.
+        direction (str): Arrow direction, 'left' or 'right'.
         i (int): Unique ID, typically arrow index.
-        RETURNS (unicode): Rendered SVG markup.
+        RETURNS (str): Rendered SVG markup.
         """
         if start < 0 or end < 0:
             error_args = dict(start=start, end=end, label=label, dir=direction)
@@ -179,7 +179,7 @@ class DependencyRenderer(object):
         y (int): Y-coordinate of arrow start and end point.
         y_curve (int): Y-corrdinate of Cubic Bézier y_curve point.
         x_end (int): X-coordinate of arrow end point.
-        RETURNS (unicode): Definition of the arc path ('d' attribute).
+        RETURNS (str): Definition of the arc path ('d' attribute).
         """
         template = "M{x},{y} C{x},{c} {e},{c} {e},{y}"
         if self.compact:
@@ -189,11 +189,11 @@ class DependencyRenderer(object):
     def get_arrowhead(self, direction, x, y, end):
         """Render individual arrow head.
 
-        direction (unicode): Arrow direction, 'left' or 'right'.
+        direction (str): Arrow direction, 'left' or 'right'.
         x (int): X-coordinate of arrow start point.
         y (int): Y-coordinate of arrow start and end point.
         end (int): X-coordinate of arrow end point.
-        RETURNS (unicode): Definition of the arrow head path ('d' attribute).
+        RETURNS (str): Definition of the arrow head path ('d' attribute).
         """
         if direction == "left":
             pos1, pos2, pos3 = (x, x - self.arrow_width + 2, x + self.arrow_width - 2)
@@ -279,7 +279,7 @@ class EntityRenderer(object):
         parsed (list): Dependency parses to render.
         page (bool): Render parses wrapped as full HTML page.
         minify (bool): Minify HTML markup.
-        RETURNS (unicode): Rendered HTML markup.
+        RETURNS (str): Rendered HTML markup.
         """
         rendered = []
         for i, p in enumerate(parsed):
@@ -300,7 +300,7 @@ class EntityRenderer(object):
     def render_ents(self, text, spans, title):
         """Render entities in text.
 
-        text (unicode): Original text.
+        text (str): Original text.
         spans (list): Individual entity spans and their start, end and label.
         title (unicode or None): Document title set in Doc.user_data['title'].
         """
diff --git a/spacy/errors.py b/spacy/errors.py
index 4d38ab586..932bb1eff 100644
--- a/spacy/errors.py
+++ b/spacy/errors.py
@@ -598,7 +598,7 @@ class MatchPatternError(ValueError):
     def __init__(self, key, errors):
         """Custom error for validating match patterns.
 
-        key (unicode): The name of the matcher rule.
+        key (str): The name of the matcher rule.
         errors (dict): Validation errors (sequence of strings) mapped to pattern
             ID, i.e. the index of the added pattern.
         """
diff --git a/spacy/glossary.py b/spacy/glossary.py
index 938a575cd..c4a6a5c45 100644
--- a/spacy/glossary.py
+++ b/spacy/glossary.py
@@ -1,8 +1,8 @@
 def explain(term):
     """Get a description for a given POS tag, dependency label or entity type.
 
-    term (unicode): The term to explain.
-    RETURNS (unicode): The explanation, or `None` if not found in the glossary.
+    term (str): The term to explain.
+    RETURNS (str): The explanation, or `None` if not found in the glossary.
 
     EXAMPLE:
         >>> spacy.explain(u'NORP')
diff --git a/spacy/kb.pyx b/spacy/kb.pyx
index 86a8d49b8..8d8464f3c 100644
--- a/spacy/kb.pyx
+++ b/spacy/kb.pyx
@@ -38,7 +38,7 @@ cdef class Candidate:
 
     @property
     def entity_(self):
-        """RETURNS (unicode): ID/name of this entity in the KB"""
+        """RETURNS (str): ID/name of this entity in the KB"""
         return self.kb.vocab.strings[self.entity_hash]
 
     @property
@@ -48,7 +48,7 @@ cdef class Candidate:
 
     @property
     def alias_(self):
-        """RETURNS (unicode): ID of the original alias"""
+        """RETURNS (str): ID of the original alias"""
         return self.kb.vocab.strings[self.alias_hash]
 
     @property
diff --git a/spacy/language.py b/spacy/language.py
index 5286bd3b9..e3b770723 100644
--- a/spacy/language.py
+++ b/spacy/language.py
@@ -122,7 +122,7 @@ class Language(object):
 
     Defaults (class): Settings, data and factory methods for creating the `nlp`
         object and processing pipeline.
-    lang (unicode): Two-letter language ID, i.e. ISO code.
+    lang (str): Two-letter language ID, i.e. ISO code.
 
     DOCS: https://spacy.io/api/language
     """
@@ -287,7 +287,7 @@ class Language(object):
     def get_pipe(self, name):
         """Get a pipeline component for a given component name.
 
-        name (unicode): Name of pipeline component to get.
+        name (str): Name of pipeline component to get.
         RETURNS (callable): The pipeline component.
 
         DOCS: https://spacy.io/api/language#get_pipe
@@ -300,7 +300,7 @@ class Language(object):
     def create_pipe(self, name, config=dict()):
         """Create a pipeline component from a factory.
 
-        name (unicode): Factory name to look up in `Language.factories`.
+        name (str): Factory name to look up in `Language.factories`.
         config (dict): Configuration parameters to initialise component.
         RETURNS (callable): Pipeline component.
 
@@ -343,12 +343,12 @@ class Language(object):
         of before/after/first/last can be set. Default behaviour is "last".
 
         component (callable): The pipeline component.
-        name (unicode): Name of pipeline component. Overwrites existing
+        name (str): Name of pipeline component. Overwrites existing
             component.name attribute if available. If no name is set and
             the component exposes no name attribute, component.__name__ is
             used. An error is raised if a name already exists in the pipeline.
-        before (unicode): Component name to insert component directly before.
-        after (unicode): Component name to insert component directly after.
+        before (str): Component name to insert component directly before.
+        after (str): Component name to insert component directly after.
         first (bool): Insert component first / not first in the pipeline.
         last (bool): Insert component last / not last in the pipeline.
 
@@ -389,7 +389,7 @@ class Language(object):
         """Check if a component name is present in the pipeline. Equivalent to
         `name in nlp.pipe_names`.
 
-        name (unicode): Name of the component.
+        name (str): Name of the component.
         RETURNS (bool): Whether a component of the name exists in the pipeline.
 
         DOCS: https://spacy.io/api/language#has_pipe
@@ -399,7 +399,7 @@ class Language(object):
     def replace_pipe(self, name, component):
         """Replace a component in the pipeline.
 
-        name (unicode): Name of the component to replace.
+        name (str): Name of the component to replace.
         component (callable): Pipeline component.
 
         DOCS: https://spacy.io/api/language#replace_pipe
@@ -418,8 +418,8 @@ class Language(object):
     def rename_pipe(self, old_name, new_name):
         """Rename a pipeline component.
 
-        old_name (unicode): Name of the component to rename.
-        new_name (unicode): New name of the component.
+        old_name (str): Name of the component to rename.
+        new_name (str): New name of the component.
 
         DOCS: https://spacy.io/api/language#rename_pipe
         """
@@ -433,7 +433,7 @@ class Language(object):
     def remove_pipe(self, name):
         """Remove a component from the pipeline.
 
-        name (unicode): Name of the component to remove.
+        name (str): Name of the component to remove.
         RETURNS (tuple): A `(name, component)` tuple of the removed component.
 
         DOCS: https://spacy.io/api/language#remove_pipe
@@ -450,7 +450,7 @@ class Language(object):
         and can contain arbitrary whitespace. Alignment into the original string
         is preserved.
 
-        text (unicode): The text to be processed.
+        text (str): The text to be processed.
         disable (list): Names of the pipeline components to disable.
         component_cfg (dict): An optional dictionary with extra keyword arguments
             for specific components.
@@ -1086,7 +1086,7 @@ class component(object):
     ):
         """Decorate a pipeline component.
 
-        name (unicode): Default component and factory name.
+        name (str): Default component and factory name.
         assigns (list): Attributes assigned by component, e.g. `["token.pos"]`.
         requires (list): Attributes required by component, e.g. `["token.dep"]`.
         retokenizes (bool): Whether the component changes the tokenization.
diff --git a/spacy/lemmatizer.py b/spacy/lemmatizer.py
index 3ba86c169..aeedbde84 100644
--- a/spacy/lemmatizer.py
+++ b/spacy/lemmatizer.py
@@ -29,8 +29,8 @@ class Lemmatizer(object):
     def __call__(self, string, univ_pos, morphology=None):
         """Lemmatize a string.
 
-        string (unicode): The string to lemmatize, e.g. the token text.
-        univ_pos (unicode / int): The token's universal part-of-speech tag.
+        string (str): The string to lemmatize, e.g. the token text.
+        univ_pos (str / int): The token's universal part-of-speech tag.
         morphology (dict): The token's morphological features following the
             Universal Dependencies scheme.
         RETURNS (list): The available lemmas for the string.
@@ -69,7 +69,7 @@ class Lemmatizer(object):
         Check whether we're dealing with an uninflected paradigm, so we can
         avoid lemmatization entirely.
 
-        univ_pos (unicode / int): The token's universal part-of-speech tag.
+        univ_pos (str / int): The token's universal part-of-speech tag.
         morphology (dict): The token's morphological features following the
             Universal Dependencies scheme.
         """
@@ -128,10 +128,10 @@ class Lemmatizer(object):
         """Look up a lemma in the table, if available. If no lemma is found,
         the original string is returned.
 
-        string (unicode): The original string.
+        string (str): The original string.
         orth (int): Optional hash of the string to look up. If not set, the
             string will be used and hashed.
-        RETURNS (unicode): The lemma if the string was found, otherwise the
+        RETURNS (str): The lemma if the string was found, otherwise the
             original string.
         """
         lookup_table = self.lookups.get_table("lemma_lookup", {})
diff --git a/spacy/lexeme.pyx b/spacy/lexeme.pyx
index 20e175f03..911112d50 100644
--- a/spacy/lexeme.pyx
+++ b/spacy/lexeme.pyx
@@ -190,7 +190,7 @@ cdef class Lexeme:
             self.vocab.set_vector(self.c.orth, vector)
 
     property rank:
-        """RETURNS (unicode): Sequential ID of the lexemes's lexical type, used
+        """RETURNS (str): Sequential ID of the lexemes's lexical type, used
             to index into tables, e.g. for word vectors."""
         def __get__(self):
             return self.c.id
@@ -209,18 +209,18 @@ cdef class Lexeme:
 
     @property
     def orth_(self):
-        """RETURNS (unicode): The original verbatim text of the lexeme
+        """RETURNS (str): The original verbatim text of the lexeme
             (identical to `Lexeme.text`). Exists mostly for consistency with
             the other attributes."""
         return self.vocab.strings[self.c.orth]
 
     @property
     def text(self):
-        """RETURNS (unicode): The original verbatim text of the lexeme."""
+        """RETURNS (str): The original verbatim text of the lexeme."""
         return self.orth_
 
     property lower:
-        """RETURNS (unicode): Lowercase form of the lexeme."""
+        """RETURNS (str): Lowercase form of the lexeme."""
         def __get__(self):
             return self.c.lower
 
@@ -293,7 +293,7 @@ cdef class Lexeme:
             self.c.prob = x
 
     property lower_:
-        """RETURNS (unicode): Lowercase form of the word."""
+        """RETURNS (str): Lowercase form of the word."""
         def __get__(self):
             return self.vocab.strings[self.c.lower]
 
@@ -301,7 +301,7 @@ cdef class Lexeme:
             self.c.lower = self.vocab.strings.add(x)
 
     property norm_:
-        """RETURNS (unicode): The lexemes's norm, i.e. a normalised form of the
+        """RETURNS (str): The lexemes's norm, i.e. a normalised form of the
             lexeme text.
         """
         def __get__(self):
@@ -311,7 +311,7 @@ cdef class Lexeme:
             self.c.norm = self.vocab.strings.add(x)
 
     property shape_:
-        """RETURNS (unicode): Transform of the word's string, to show
+        """RETURNS (str): Transform of the word's string, to show
             orthographic features.
         """
         def __get__(self):
@@ -321,7 +321,7 @@ cdef class Lexeme:
             self.c.shape = self.vocab.strings.add(x)
 
     property prefix_:
-        """RETURNS (unicode): Length-N substring from the start of the word.
+        """RETURNS (str): Length-N substring from the start of the word.
             Defaults to `N=1`.
         """
         def __get__(self):
@@ -331,7 +331,7 @@ cdef class Lexeme:
             self.c.prefix = self.vocab.strings.add(x)
 
     property suffix_:
-        """RETURNS (unicode): Length-N substring from the end of the word.
+        """RETURNS (str): Length-N substring from the end of the word.
             Defaults to `N=3`.
         """
         def __get__(self):
@@ -341,7 +341,7 @@ cdef class Lexeme:
             self.c.suffix = self.vocab.strings.add(x)
 
     property lang_:
-        """RETURNS (unicode): Language of the parent vocabulary."""
+        """RETURNS (str): Language of the parent vocabulary."""
         def __get__(self):
             return self.vocab.strings[self.c.lang]
 
diff --git a/spacy/lookups.py b/spacy/lookups.py
index a9d371b79..5661897e1 100644
--- a/spacy/lookups.py
+++ b/spacy/lookups.py
@@ -31,7 +31,7 @@ class Lookups(object):
         """Check if the lookups contain a table of a given name. Delegates to
         Lookups.has_table.
 
-        name (unicode): Name of the table.
+        name (str): Name of the table.
         RETURNS (bool): Whether a table of that name is in the lookups.
         """
         return self.has_table(name)
@@ -48,7 +48,7 @@ class Lookups(object):
     def add_table(self, name, data=SimpleFrozenDict()):
         """Add a new table to the lookups. Raises an error if the table exists.
 
-        name (unicode): Unique name of table.
+        name (str): Unique name of table.
         data (dict): Optional data to add to the table.
         RETURNS (Table): The newly added table.
 
@@ -64,7 +64,7 @@ class Lookups(object):
         """Get a table. Raises an error if the table doesn't exist and no
         default value is provided.
 
-        name (unicode): Name of the table.
+        name (str): Name of the table.
         default: Optional default value to return if table doesn't exist.
         RETURNS (Table): The table.
 
@@ -79,7 +79,7 @@ class Lookups(object):
     def remove_table(self, name):
         """Remove a table. Raises an error if the table doesn't exist.
 
-        name (unicode): Name of the table to remove.
+        name (str): Name of the table to remove.
         RETURNS (Table): The removed table.
 
         DOCS: https://spacy.io/api/lookups#remove_table
@@ -91,7 +91,7 @@ class Lookups(object):
     def has_table(self, name):
         """Check if the lookups contain a table of a given name.
 
-        name (unicode): Name of the table.
+        name (str): Name of the table.
         RETURNS (bool): Whether a table of that name exists.
 
         DOCS: https://spacy.io/api/lookups#has_table
@@ -125,7 +125,7 @@ class Lookups(object):
         """Save the lookups to a directory as lookups.bin. Expects a path to a
         directory, which will be created if it doesn't exist.
 
-        path (unicode / Path): The file path.
+        path (str / Path): The file path.
 
         DOCS: https://spacy.io/api/lookups#to_disk
         """
@@ -141,7 +141,7 @@ class Lookups(object):
         """Load lookups from a directory containing a lookups.bin. Will skip
         loading if the file doesn't exist.
 
-        path (unicode / Path): The directory path.
+        path (str / Path): The directory path.
         RETURNS (Lookups): The loaded lookups.
 
         DOCS: https://spacy.io/api/lookups#from_disk
@@ -167,7 +167,7 @@ class Table(OrderedDict):
         """Initialize a new table from a dict.
 
         data (dict): The dictionary.
-        name (unicode): Optional table name for reference.
+        name (str): Optional table name for reference.
         RETURNS (Table): The newly created object.
 
         DOCS: https://spacy.io/api/lookups#table.from_dict
@@ -179,7 +179,7 @@ class Table(OrderedDict):
     def __init__(self, name=None, data=None):
         """Initialize a new table.
 
-        name (unicode): Optional table name for reference.
+        name (str): Optional table name for reference.
         data (dict): Initial data, used to hint Bloom Filter.
         RETURNS (Table): The newly created object.
 
@@ -197,7 +197,7 @@ class Table(OrderedDict):
     def __setitem__(self, key, value):
         """Set new key/value pair. String keys will be hashed.
 
-        key (unicode / int): The key to set.
+        key (str / int): The key to set.
         value: The value to set.
         """
         key = get_string_id(key)
@@ -208,7 +208,7 @@ class Table(OrderedDict):
         """Set new key/value pair. String keys will be hashed.
         Same as table[key] = value.
 
-        key (unicode / int): The key to set.
+        key (str / int): The key to set.
         value: The value to set.
         """
         self[key] = value
@@ -216,7 +216,7 @@ class Table(OrderedDict):
     def __getitem__(self, key):
         """Get the value for a given key. String keys will be hashed.
 
-        key (unicode / int): The key to get.
+        key (str / int): The key to get.
         RETURNS: The value.
         """
         key = get_string_id(key)
@@ -225,7 +225,7 @@ class Table(OrderedDict):
     def get(self, key, default=None):
         """Get the value for a given key. String keys will be hashed.
 
-        key (unicode / int): The key to get.
+        key (str / int): The key to get.
         default: The default value to return.
         RETURNS: The value.
         """
@@ -235,7 +235,7 @@ class Table(OrderedDict):
     def __contains__(self, key):
         """Check whether a key is in the table. String keys will be hashed.
 
-        key (unicode / int): The key to check.
+        key (str / int): The key to check.
         RETURNS (bool): Whether the key is in the table.
         """
         key = get_string_id(key)
diff --git a/spacy/matcher/dependencymatcher.pyx b/spacy/matcher/dependencymatcher.pyx
index ff707a71c..732931380 100644
--- a/spacy/matcher/dependencymatcher.pyx
+++ b/spacy/matcher/dependencymatcher.pyx
@@ -66,7 +66,7 @@ cdef class DependencyMatcher:
     def __contains__(self, key):
         """Check whether the matcher contains rules for a match ID.
 
-        key (unicode): The match ID.
+        key (str): The match ID.
         RETURNS (bool): Whether the matcher contains rules for this match ID.
         """
         return self._normalize_key(key) in self._patterns
diff --git a/spacy/matcher/matcher.pyx b/spacy/matcher/matcher.pyx
index 2bcb82a2a..225eba9a9 100644
--- a/spacy/matcher/matcher.pyx
+++ b/spacy/matcher/matcher.pyx
@@ -63,7 +63,7 @@ cdef class Matcher:
     def __contains__(self, key):
         """Check whether the matcher contains rules for a match ID.
 
-        key (unicode): The match ID.
+        key (str): The match ID.
         RETURNS (bool): Whether the matcher contains rules for this match ID.
         """
         return self._normalize_key(key) in self._patterns
@@ -97,7 +97,7 @@ cdef class Matcher:
         number of arguments). The on_match callback becomes an optional keyword
         argument.
 
-        key (unicode): The match ID.
+        key (str): The match ID.
         patterns (list): The patterns to add for the given key.
         on_match (callable): Optional callback executed on match.
         *_patterns (list): For backwards compatibility: list of patterns to add
@@ -138,7 +138,7 @@ cdef class Matcher:
         """Remove a rule from the matcher. A KeyError is raised if the key does
         not exist.
 
-        key (unicode): The ID of the match rule.
+        key (str): The ID of the match rule.
         """
         norm_key = self._normalize_key(key)
         if not norm_key in self._patterns:
diff --git a/spacy/matcher/phrasematcher.pyx b/spacy/matcher/phrasematcher.pyx
index 14cc39787..f7ce44ece 100644
--- a/spacy/matcher/phrasematcher.pyx
+++ b/spacy/matcher/phrasematcher.pyx
@@ -70,7 +70,7 @@ cdef class PhraseMatcher:
     def __contains__(self, key):
         """Check whether the matcher contains rules for a match ID.
 
-        key (unicode): The match ID.
+        key (str): The match ID.
         RETURNS (bool): Whether the matcher contains rules for this match ID.
 
         DOCS: https://spacy.io/api/phrasematcher#contains
@@ -85,7 +85,7 @@ cdef class PhraseMatcher:
         """Remove a rule from the matcher by match ID. A KeyError is raised if
         the key does not exist.
 
-        key (unicode): The match ID.
+        key (str): The match ID.
 
         DOCS: https://spacy.io/api/phrasematcher#remove
         """
@@ -159,7 +159,7 @@ cdef class PhraseMatcher:
         number of arguments). The on_match callback becomes an optional keyword
         argument.
 
-        key (unicode): The match ID.
+        key (str): The match ID.
         docs (list): List of `Doc` objects representing match patterns.
         on_match (callable): Callback executed on match.
         *_docs (Doc): For backwards compatibility: list of patterns to add
diff --git a/spacy/morphology.pyx b/spacy/morphology.pyx
index 0b53b124c..5dcf81ea7 100644
--- a/spacy/morphology.pyx
+++ b/spacy/morphology.pyx
@@ -198,8 +198,8 @@ cdef class Morphology:
         """Add a special-case rule to the morphological analyser. Tokens whose
         tag and orth match the rule will receive the specified properties.
 
-        tag (unicode): The part-of-speech tag to key the exception.
-        orth (unicode): The word-form to key the exception.
+        tag (str): The part-of-speech tag to key the exception.
+        orth (str): The word-form to key the exception.
         """
         attrs = dict(attrs)
         attrs = _normalize_props(attrs)
diff --git a/spacy/pipe_analysis.py b/spacy/pipe_analysis.py
index 4c0950453..971ebe518 100644
--- a/spacy/pipe_analysis.py
+++ b/spacy/pipe_analysis.py
@@ -11,7 +11,7 @@ def analyze_pipes(pipeline, name, pipe, index, warn=True):
     fulfilled (e.g. if previous components assign the attributes).
 
     pipeline (list): A list of (name, pipe) tuples e.g. nlp.pipeline.
-    name (unicode): The name of the pipeline component to analyze.
+    name (str): The name of the pipeline component to analyze.
     pipe (callable): The pipeline component function to analyze.
     index (int): The index of the component in the pipeline.
     warn (bool): Show user warning if problem is found.
@@ -125,7 +125,7 @@ def get_assigns_for_attr(pipeline, attr):
     """Get all pipeline components that assign an attr, e.g. "doc.tensor".
 
     pipeline (list): A list of (name, pipe) tuples e.g. nlp.pipeline.
-    attr (unicode): The attribute to check.
+    attr (str): The attribute to check.
     RETURNS (list): (name, pipeline) tuples of components that assign the attr.
     """
     return _get_feature_for_attr(pipeline, attr, "assigns")
@@ -135,7 +135,7 @@ def get_requires_for_attr(pipeline, attr):
     """Get all pipeline components that require an attr, e.g. "doc.tensor".
 
     pipeline (list): A list of (name, pipe) tuples e.g. nlp.pipeline.
-    attr (unicode): The attribute to check.
+    attr (str): The attribute to check.
     RETURNS (list): (name, pipeline) tuples of components that require the attr.
     """
     return _get_feature_for_attr(pipeline, attr, "requires")
diff --git a/spacy/pipeline/entityruler.py b/spacy/pipeline/entityruler.py
index 58160c2e9..cdacc82f6 100644
--- a/spacy/pipeline/entityruler.py
+++ b/spacy/pipeline/entityruler.py
@@ -315,7 +315,7 @@ class EntityRuler(object):
         """Load the entity ruler from a file. Expects a file containing
         newline-delimited JSON (JSONL) with one entry per line.
 
-        path (unicode / Path): The JSONL file to load.
+        path (str / Path): The JSONL file to load.
         **kwargs: Other config paramters, mostly for consistency.
 
         RETURNS (EntityRuler): The loaded entity ruler.
@@ -351,7 +351,7 @@ class EntityRuler(object):
         """Save the entity ruler patterns to a directory. The patterns will be
         saved as newline-delimited JSON (JSONL).
 
-        path (unicode / Path): The JSONL file to save.
+        path (str / Path): The JSONL file to save.
         **kwargs: Other config paramters, mostly for consistency.
 
         DOCS: https://spacy.io/api/entityruler#to_disk
diff --git a/spacy/pipeline/functions.py b/spacy/pipeline/functions.py
index 6e9d4197c..622791512 100644
--- a/spacy/pipeline/functions.py
+++ b/spacy/pipeline/functions.py
@@ -50,7 +50,7 @@ def merge_subtokens(doc, label="subtok"):
     """Merge subtokens into a single token.
 
     doc (Doc): The Doc object.
-    label (unicode): The subtoken dependency label.
+    label (str): The subtoken dependency label.
     RETURNS (Doc): The Doc object with merged subtokens.
 
     DOCS: https://spacy.io/api/pipeline-functions#merge_subtokens
diff --git a/spacy/strings.pyx b/spacy/strings.pyx
index a30f11729..9fe5af154 100644
--- a/spacy/strings.pyx
+++ b/spacy/strings.pyx
@@ -152,7 +152,7 @@ cdef class StringStore:
     def add(self, string):
         """Add a string to the StringStore.
 
-        string (unicode): The string to add.
+        string (str): The string to add.
         RETURNS (uint64): The string's hash value.
         """
         if isinstance(string, unicode):
@@ -179,7 +179,7 @@ cdef class StringStore:
     def __contains__(self, string not None):
         """Check whether a string is in the store.
 
-        string (unicode): The string to check.
+        string (str): The string to check.
         RETURNS (bool): Whether the store contains the string.
         """
         cdef hash_t key
@@ -205,7 +205,7 @@ cdef class StringStore:
     def __iter__(self):
         """Iterate over the strings in the store, in order.
 
-        YIELDS (unicode): A string in the store.
+        YIELDS (str): A string in the store.
         """
         cdef int i
         cdef hash_t key
diff --git a/spacy/tokenizer.pyx b/spacy/tokenizer.pyx
index 7e75052f7..b628b1171 100644
--- a/spacy/tokenizer.pyx
+++ b/spacy/tokenizer.pyx
@@ -134,7 +134,7 @@ cdef class Tokenizer:
     def __call__(self, unicode string):
         """Tokenize a string.
 
-        string (unicode): The string to tokenize.
+        string (str): The string to tokenize.
         RETURNS (Doc): A container for linguistic annotations.
 
         DOCS: https://spacy.io/api/tokenizer#call
@@ -147,7 +147,7 @@ cdef class Tokenizer:
     cdef Doc _tokenize_affixes(self, unicode string, bint with_special_cases):
         """Tokenize according to affix and token_match settings.
 
-        string (unicode): The string to tokenize.
+        string (str): The string to tokenize.
         RETURNS (Doc): A container for linguistic annotations.
         """
         if len(string) >= (2 ** 30):
@@ -527,7 +527,7 @@ cdef class Tokenizer:
     def find_infix(self, unicode string):
         """Find internal split points of the string, such as hyphens.
 
-        string (unicode): The string to segment.
+        string (str): The string to segment.
         RETURNS (list): A list of `re.MatchObject` objects that have `.start()`
             and `.end()` methods, denoting the placement of internal segment
             separators, e.g. hyphens.
@@ -542,7 +542,7 @@ cdef class Tokenizer:
         """Find the length of a prefix that should be segmented from the
         string, or None if no prefix rules match.
 
-        string (unicode): The string to segment.
+        string (str): The string to segment.
         RETURNS (int): The length of the prefix if present, otherwise `None`.
 
         DOCS: https://spacy.io/api/tokenizer#find_prefix
@@ -556,7 +556,7 @@ cdef class Tokenizer:
         """Find the length of a suffix that should be segmented from the
         string, or None if no suffix rules match.
 
-        string (unicode): The string to segment.
+        string (str): The string to segment.
         Returns (int): The length of the suffix if present, otherwise `None`.
 
         DOCS: https://spacy.io/api/tokenizer#find_suffix
@@ -576,7 +576,7 @@ cdef class Tokenizer:
     def _validate_special_case(self, chunk, substrings):
         """Check whether the `ORTH` fields match the string.
 
-        string (unicode): The string to specially tokenize.
+        string (str): The string to specially tokenize.
         substrings (iterable): A sequence of dicts, where each dict describes
             a token and its attributes.
         """
@@ -588,7 +588,7 @@ cdef class Tokenizer:
     def add_special_case(self, unicode string, substrings):
         """Add a special-case tokenization rule.
 
-        string (unicode): The string to specially tokenize.
+        string (str): The string to specially tokenize.
         substrings (iterable): A sequence of dicts, where each dict describes
             a token and its attributes. The `ORTH` fields of the attributes
             must exactly match the string when they are concatenated.
@@ -629,7 +629,7 @@ cdef class Tokenizer:
         produced are identical to `nlp.tokenizer()` except for whitespace
         tokens.
 
-        string (unicode): The string to tokenize.
+        string (str): The string to tokenize.
         RETURNS (list): A list of (pattern_string, token_string) tuples
 
         DOCS: https://spacy.io/api/tokenizer#explain
diff --git a/spacy/tokens/doc.pyx b/spacy/tokens/doc.pyx
index 0716b2b3d..f6d0dbf4a 100644
--- a/spacy/tokens/doc.pyx
+++ b/spacy/tokens/doc.pyx
@@ -107,7 +107,7 @@ cdef class Doc:
     def set_extension(cls, name, **kwargs):
         """Define a custom attribute which becomes available as `Doc._`.
 
-        name (unicode): Name of the attribute to set.
+        name (str): Name of the attribute to set.
         default: Optional default value of the attribute.
         getter (callable): Optional getter function.
         setter (callable): Optional setter function.
@@ -125,7 +125,7 @@ cdef class Doc:
     def get_extension(cls, name):
         """Look up a previously registered extension by name.
 
-        name (unicode): Name of the extension.
+        name (str): Name of the extension.
         RETURNS (tuple): A `(default, method, getter, setter)` tuple.
 
         DOCS: https://spacy.io/api/doc#get_extension
@@ -136,7 +136,7 @@ cdef class Doc:
     def has_extension(cls, name):
         """Check whether an extension has been registered.
 
-        name (unicode): Name of the extension.
+        name (str): Name of the extension.
         RETURNS (bool): Whether the extension has been registered.
 
         DOCS: https://spacy.io/api/doc#has_extension
@@ -147,7 +147,7 @@ cdef class Doc:
     def remove_extension(cls, name):
         """Remove a previously registered extension.
 
-        name (unicode): Name of the extension.
+        name (str): Name of the extension.
         RETURNS (tuple): A `(default, method, getter, setter)` tuple of the
             removed extension.
 
@@ -473,7 +473,7 @@ cdef class Doc:
     def text(self):
         """A unicode representation of the document text.
 
-        RETURNS (unicode): The original verbatim text of the document.
+        RETURNS (str): The original verbatim text of the document.
         """
         return "".join(t.text_with_ws for t in self)
 
@@ -482,7 +482,7 @@ cdef class Doc:
         """An alias of `Doc.text`, provided for duck-type compatibility with
         `Span` and `Token`.
 
-        RETURNS (unicode): The original verbatim text of the document.
+        RETURNS (str): The original verbatim text of the document.
         """
         return self.text
 
@@ -628,7 +628,7 @@ cdef class Doc:
 
     @property
     def lang_(self):
-        """RETURNS (unicode): Language of the doc's vocabulary, e.g. 'en'."""
+        """RETURNS (str): Language of the doc's vocabulary, e.g. 'en'."""
         return self.vocab.lang
 
     cdef int push_back(self, LexemeOrToken lex_or_tok, bint has_space) except -1:
diff --git a/spacy/tokens/span.pyx b/spacy/tokens/span.pyx
index 66e8d8c3e..59323c393 100644
--- a/spacy/tokens/span.pyx
+++ b/spacy/tokens/span.pyx
@@ -33,7 +33,7 @@ cdef class Span:
     def set_extension(cls, name, **kwargs):
         """Define a custom attribute which becomes available as `Span._`.
 
-        name (unicode): Name of the attribute to set.
+        name (str): Name of the attribute to set.
         default: Optional default value of the attribute.
         getter (callable): Optional getter function.
         setter (callable): Optional setter function.
@@ -51,7 +51,7 @@ cdef class Span:
     def get_extension(cls, name):
         """Look up a previously registered extension by name.
 
-        name (unicode): Name of the extension.
+        name (str): Name of the extension.
         RETURNS (tuple): A `(default, method, getter, setter)` tuple.
 
         DOCS: https://spacy.io/api/span#get_extension
@@ -62,7 +62,7 @@ cdef class Span:
     def has_extension(cls, name):
         """Check whether an extension has been registered.
 
-        name (unicode): Name of the extension.
+        name (str): Name of the extension.
         RETURNS (bool): Whether the extension has been registered.
 
         DOCS: https://spacy.io/api/span#has_extension
@@ -73,7 +73,7 @@ cdef class Span:
     def remove_extension(cls, name):
         """Remove a previously registered extension.
 
-        name (unicode): Name of the extension.
+        name (str): Name of the extension.
         RETURNS (tuple): A `(default, method, getter, setter)` tuple of the
             removed extension.
 
@@ -501,7 +501,7 @@ cdef class Span:
 
     @property
     def text(self):
-        """RETURNS (unicode): The original verbatim text of the span."""
+        """RETURNS (str): The original verbatim text of the span."""
         text = self.text_with_ws
         if self[-1].whitespace_:
             text = text[:-1]
@@ -512,7 +512,7 @@ cdef class Span:
         """The text content of the span with a trailing whitespace character if
         the last token has one.
 
-        RETURNS (unicode): The text content of the span (with trailing
+        RETURNS (str): The text content of the span (with trailing
             whitespace).
         """
         return "".join([t.text_with_ws for t in self])
@@ -688,7 +688,7 @@ cdef class Span:
             raise NotImplementedError(TempErrors.T007.format(attr="ent_id"))
 
     property ent_id_:
-        """RETURNS (unicode): The (string) entity ID."""
+        """RETURNS (str): The (string) entity ID."""
         def __get__(self):
             return self.root.ent_id_
 
@@ -700,12 +700,12 @@ cdef class Span:
         """Verbatim text content (identical to `Span.text`). Exists mostly for
         consistency with other attributes.
 
-        RETURNS (unicode): The span's text."""
+        RETURNS (str): The span's text."""
         return self.text
 
     @property
     def lemma_(self):
-        """RETURNS (unicode): The span's lemma."""
+        """RETURNS (str): The span's lemma."""
         return " ".join([t.lemma_ for t in self]).strip()
 
     @property
@@ -724,7 +724,7 @@ cdef class Span:
         return "".join([t.text_with_ws for t in self])
 
     property label_:
-        """RETURNS (unicode): The span's label."""
+        """RETURNS (str): The span's label."""
         def __get__(self):
             return self.doc.vocab.strings[self.label]
 
@@ -734,7 +734,7 @@ cdef class Span:
             raise NotImplementedError(Errors.E129.format(start=self.start, end=self.end, label=label_))
 
     property kb_id_:
-        """RETURNS (unicode): The named entity's KB ID."""
+        """RETURNS (str): The named entity's KB ID."""
         def __get__(self):
             return self.doc.vocab.strings[self.kb_id]
 
diff --git a/spacy/tokens/token.pyx b/spacy/tokens/token.pyx
index 2486ed991..0d1e82322 100644
--- a/spacy/tokens/token.pyx
+++ b/spacy/tokens/token.pyx
@@ -36,7 +36,7 @@ cdef class Token:
     def set_extension(cls, name, **kwargs):
         """Define a custom attribute which becomes available as `Token._`.
 
-        name (unicode): Name of the attribute to set.
+        name (str): Name of the attribute to set.
         default: Optional default value of the attribute.
         getter (callable): Optional getter function.
         setter (callable): Optional setter function.
@@ -54,7 +54,7 @@ cdef class Token:
     def get_extension(cls, name):
         """Look up a previously registered extension by name.
 
-        name (unicode): Name of the extension.
+        name (str): Name of the extension.
         RETURNS (tuple): A `(default, method, getter, setter)` tuple.
 
         DOCS: https://spacy.io/api/token#get_extension
@@ -65,7 +65,7 @@ cdef class Token:
     def has_extension(cls, name):
         """Check whether an extension has been registered.
 
-        name (unicode): Name of the extension.
+        name (str): Name of the extension.
         RETURNS (bool): Whether the extension has been registered.
 
         DOCS: https://spacy.io/api/token#has_extension
@@ -76,7 +76,7 @@ cdef class Token:
     def remove_extension(cls, name):
         """Remove a previously registered extension.
 
-        name (unicode): Name of the extension.
+        name (str): Name of the extension.
         RETURNS (tuple): A `(default, method, getter, setter)` tuple of the
             removed extension.
 
@@ -244,12 +244,12 @@ cdef class Token:
 
     @property
     def text(self):
-        """RETURNS (unicode): The original verbatim text of the token."""
+        """RETURNS (str): The original verbatim text of the token."""
         return self.orth_
 
     @property
     def text_with_ws(self):
-        """RETURNS (unicode): The text content of the span (with trailing
+        """RETURNS (str): The text content of the span (with trailing
             whitespace).
         """
         cdef unicode orth = self.vocab.strings[self.c.lex.orth]
@@ -740,7 +740,7 @@ cdef class Token:
             self.c.ent_type = ent_type
 
     property ent_type_:
-        """RETURNS (unicode): Named entity type."""
+        """RETURNS (str): Named entity type."""
         def __get__(self):
             return self.vocab.strings[self.c.ent_type]
 
@@ -763,7 +763,7 @@ cdef class Token:
         and "" means no entity tag is set. "B" with an empty ent_type
         means that the token is blocked from further processing by NER.
 
-        RETURNS (unicode): IOB code of named entity tag.
+        RETURNS (str): IOB code of named entity tag.
         """
         iob_strings = ("", "I", "O", "B")
         return iob_strings[self.c.ent_iob]
@@ -779,7 +779,7 @@ cdef class Token:
             self.c.ent_id = key
 
     property ent_id_:
-        """RETURNS (unicode): ID of the entity the token is an instance of,
+        """RETURNS (str): ID of the entity the token is an instance of,
             if any.
         """
         def __get__(self):
@@ -797,7 +797,7 @@ cdef class Token:
             self.c.ent_kb_id = ent_kb_id
 
     property ent_kb_id_:
-        """RETURNS (unicode): Named entity KB ID."""
+        """RETURNS (str): Named entity KB ID."""
         def __get__(self):
             return self.vocab.strings[self.c.ent_kb_id]
 
@@ -806,12 +806,12 @@ cdef class Token:
 
     @property
     def whitespace_(self):
-        """RETURNS (unicode): The trailing whitespace character, if present."""
+        """RETURNS (str): The trailing whitespace character, if present."""
         return " " if self.c.spacy else ""
 
     @property
     def orth_(self):
-        """RETURNS (unicode): Verbatim text content (identical to
+        """RETURNS (str): Verbatim text content (identical to
             `Token.text`). Exists mostly for consistency with the other
             attributes.
         """
@@ -819,13 +819,13 @@ cdef class Token:
 
     @property
     def lower_(self):
-        """RETURNS (unicode): The lowercase token text. Equivalent to
+        """RETURNS (str): The lowercase token text. Equivalent to
             `Token.text.lower()`.
         """
         return self.vocab.strings[self.c.lex.lower]
 
     property norm_:
-        """RETURNS (unicode): The token's norm, i.e. a normalised form of the
+        """RETURNS (str): The token's norm, i.e. a normalised form of the
             token text. Usually set in the language's tokenizer exceptions or
             norm exceptions.
         """
@@ -837,34 +837,34 @@ cdef class Token:
 
     @property
     def shape_(self):
-        """RETURNS (unicode): Transform of the tokens's string, to show
+        """RETURNS (str): Transform of the tokens's string, to show
             orthographic features. For example, "Xxxx" or "dd".
         """
         return self.vocab.strings[self.c.lex.shape]
 
     @property
     def prefix_(self):
-        """RETURNS (unicode): A length-N substring from the start of the token.
+        """RETURNS (str): A length-N substring from the start of the token.
             Defaults to `N=1`.
         """
         return self.vocab.strings[self.c.lex.prefix]
 
     @property
     def suffix_(self):
-        """RETURNS (unicode): A length-N substring from the end of the token.
+        """RETURNS (str): A length-N substring from the end of the token.
             Defaults to `N=3`.
         """
         return self.vocab.strings[self.c.lex.suffix]
 
     @property
     def lang_(self):
-        """RETURNS (unicode): Language of the parent document's vocabulary,
+        """RETURNS (str): Language of the parent document's vocabulary,
             e.g. 'en'.
         """
         return self.vocab.strings[self.c.lex.lang]
 
     property lemma_:
-        """RETURNS (unicode): The token lemma, i.e. the base form of the word,
+        """RETURNS (str): The token lemma, i.e. the base form of the word,
             with no inflectional suffixes.
         """
         def __get__(self):
@@ -877,7 +877,7 @@ cdef class Token:
             self.c.lemma = self.vocab.strings.add(lemma_)
 
     property pos_:
-        """RETURNS (unicode): Coarse-grained part-of-speech tag."""
+        """RETURNS (str): Coarse-grained part-of-speech tag."""
         def __get__(self):
             return parts_of_speech.NAMES[self.c.pos]
 
@@ -885,7 +885,7 @@ cdef class Token:
             self.c.pos = parts_of_speech.IDS[pos_name]
 
     property tag_:
-        """RETURNS (unicode): Fine-grained part-of-speech tag."""
+        """RETURNS (str): Fine-grained part-of-speech tag."""
         def __get__(self):
             return self.vocab.strings[self.c.tag]
 
@@ -893,7 +893,7 @@ cdef class Token:
             self.tag = self.vocab.strings.add(tag)
 
     property dep_:
-        """RETURNS (unicode): The syntactic dependency label."""
+        """RETURNS (str): The syntactic dependency label."""
         def __get__(self):
             return self.vocab.strings[self.c.dep]
 
diff --git a/spacy/util.py b/spacy/util.py
index 41af881c9..fc5837755 100644
--- a/spacy/util.py
+++ b/spacy/util.py
@@ -58,7 +58,7 @@ def lang_class_is_loaded(lang):
     loaded lazily, to avoid expensive setup code associated with the language
     data.
 
-    lang (unicode): Two-letter language code, e.g. 'en'.
+    lang (str): Two-letter language code, e.g. 'en'.
     RETURNS (bool): Whether a Language class has been loaded.
     """
     return lang in registry.languages
@@ -67,7 +67,7 @@ def lang_class_is_loaded(lang):
 def get_lang_class(lang):
     """Import and load a Language class.
 
-    lang (unicode): Two-letter language code, e.g. 'en'.
+    lang (str): Two-letter language code, e.g. 'en'.
     RETURNS (Language): Language class.
     """
     # Check if language is registered / entry point is available
@@ -85,7 +85,7 @@ def get_lang_class(lang):
 def set_lang_class(name, cls):
     """Set a custom Language class name that can be loaded via get_lang_class.
 
-    name (unicode): Name of Language class.
+    name (str): Name of Language class.
     cls (Language): Language class.
     """
     registry.languages.register(name, func=cls)
@@ -107,7 +107,7 @@ def load_language_data(path):
     """Load JSON language data using the given path as a base. If the provided
     path isn't present, will attempt to load a gzipped version before giving up.
 
-    path (unicode / Path): The data to load.
+    path (str / Path): The data to load.
     RETURNS: The loaded data.
     """
     path = ensure_path(path)
@@ -128,7 +128,7 @@ def get_module_path(module):
 def load_model(name, **overrides):
     """Load a model from a package or data path.
 
-    name (unicode): Package name or model path.
+    name (str): Package name or model path.
     **overrides: Specific overrides, like pipeline components to disable.
     RETURNS (Language): `Language` class with the loaded model.
     """
@@ -202,7 +202,7 @@ def load_model_from_init_py(init_file, **overrides):
     """Helper function to use in the `load()` method of a model package's
     __init__.py.
 
-    init_file (unicode): Path to model's __init__.py, i.e. `__file__`.
+    init_file (str): Path to model's __init__.py, i.e. `__file__`.
     **overrides: Specific overrides, like pipeline components to disable.
     RETURNS (Language): `Language` class with loaded model.
     """
@@ -227,8 +227,8 @@ def get_package_version(name):
     """Get the version of an installed package. Typically used to get model
     package versions.
 
-    name (unicode): The name of the installed Python package.
-    RETURNS (unicode / None): The version or None if package not installed.
+    name (str): The name of the installed Python package.
+    RETURNS (str / None): The version or None if package not installed.
     """
     try:
         return importlib_metadata.version(name)
@@ -338,7 +338,7 @@ def get_model_config(path):
 def is_package(name):
     """Check if string maps to a package installed via pip.
 
-    name (unicode): Name of package.
+    name (str): Name of package.
     RETURNS (bool): True if installed package, False if not.
     """
     try:
@@ -351,7 +351,7 @@ def is_package(name):
 def get_package_path(name):
     """Get the path to an installed package.
 
-    name (unicode): Package name.
+    name (str): Package name.
     RETURNS (Path): Path to installed package.
     """
     name = name.lower()  # use lowercase version to be safe
@@ -526,8 +526,8 @@ def expand_exc(excs, search, replace):
     For example, to add additional versions with typographic apostrophes.
 
     excs (dict): Tokenizer exceptions.
-    search (unicode): String to find and replace.
-    replace (unicode): Replacement.
+    search (str): String to find and replace.
+    replace (str): Replacement.
     RETURNS (dict): Combined tokenizer exceptions.
     """
 
@@ -761,8 +761,8 @@ def from_disk(path, readers, exclude):
 def import_file(name, loc):
     """Import module from a file. Used to load models from a directory.
 
-    name (unicode): Name of module to load.
-    loc (unicode / Path): Path to the file.
+    name (str): Name of module to load.
+    loc (str / Path): Path to the file.
     RETURNS: The loaded module.
     """
     loc = str(loc)
@@ -777,8 +777,8 @@ def minify_html(html):
     Disclaimer: NOT a general-purpose solution, only removes indentation and
     newlines.
 
-    html (unicode): Markup to minify.
-    RETURNS (unicode): "Minified" HTML.
+    html (str): Markup to minify.
+    RETURNS (str): "Minified" HTML.
     """
     return html.strip().replace("    ", "").replace("\n", "")
 
@@ -787,8 +787,8 @@ def escape_html(text):
     """Replace <, >, &, " with their HTML encoded representation. Intended to
     prevent HTML errors in rendered displaCy markup.
 
-    text (unicode): The original text.
-    RETURNS (unicode): Equivalent text to be safely used within HTML.
+    text (str): The original text.
+    RETURNS (str): Equivalent text to be safely used within HTML.
     """
     text = text.replace("&", "&amp;")
     text = text.replace("<", "&lt;")
diff --git a/spacy/vectors.pyx b/spacy/vectors.pyx
index e100ae915..0ed2462c6 100644
--- a/spacy/vectors.pyx
+++ b/spacy/vectors.pyx
@@ -57,7 +57,7 @@ cdef class Vectors:
         shape (tuple): Size of the table, as (# entries, # columns)
         data (numpy.ndarray): The vector data.
         keys (iterable): A sequence of keys, aligned with the data.
-        name (unicode): A name to identify the vectors table.
+        name (str): A name to identify the vectors table.
         RETURNS (Vectors): The newly created object.
 
         DOCS: https://spacy.io/api/vectors#init
@@ -237,7 +237,7 @@ cdef class Vectors:
     def find(self, *, key=None, keys=None, row=None, rows=None):
         """Look up one or more keys by row, or vice versa.
 
-        key (unicode / int): Find the row that the given key points to.
+        key (str / int): Find the row that the given key points to.
             Returns int, -1 if missing.
         keys (iterable): Find rows that the keys point to.
             Returns ndarray.
@@ -352,7 +352,7 @@ cdef class Vectors:
     def to_disk(self, path, **kwargs):
         """Save the current state to a directory.
 
-        path (unicode / Path): A path to a directory, which will be created if
+        path (str / Path): A path to a directory, which will be created if
             it doesn't exists.
 
         DOCS: https://spacy.io/api/vectors#to_disk
@@ -372,7 +372,7 @@ cdef class Vectors:
         """Loads state from a directory. Modifies the object in place and
         returns it.
 
-        path (unicode / Path): Directory path, string or Path-like object.
+        path (str / Path): Directory path, string or Path-like object.
         RETURNS (Vectors): The modified object.
 
         DOCS: https://spacy.io/api/vectors#from_disk
diff --git a/spacy/vocab.pyx b/spacy/vocab.pyx
index a1929559f..ed37f6e98 100644
--- a/spacy/vocab.pyx
+++ b/spacy/vocab.pyx
@@ -41,7 +41,7 @@ cdef class Vocab:
         strings (StringStore): StringStore that maps strings to integers, and
             vice versa.
         lookups (Lookups): Container for large lookup tables and dictionaries.
-        name (unicode): Optional name to identify the vectors table.
+        name (str): Optional name to identify the vectors table.
         RETURNS (Vocab): The newly constructed object.
         """
         lex_attr_getters = lex_attr_getters if lex_attr_getters is not None else {}
@@ -97,7 +97,7 @@ cdef class Vocab:
         See also: `Lexeme.set_flag`, `Lexeme.check_flag`, `Token.set_flag`,
         `Token.check_flag`.
 
-        flag_getter (callable): A function `f(unicode) -> bool`, to get the
+        flag_getter (callable): A function `f(str) -> bool`, to get the
             flag value.
         flag_id (int): An integer between 1 and 63 (inclusive), specifying
             the bit at which the flag will be stored. If -1, the lowest
@@ -187,7 +187,7 @@ cdef class Vocab:
     def __contains__(self, key):
         """Check whether the string or int key has an entry in the vocabulary.
 
-        string (unicode): The ID string.
+        string (str): The ID string.
         RETURNS (bool) Whether the string has an entry in the vocabulary.
 
         DOCS: https://spacy.io/api/vocab#contains
diff --git a/website/docs/api/lexeme.md b/website/docs/api/lexeme.md
index feb167a9d..39148e476 100644
--- a/website/docs/api/lexeme.md
+++ b/website/docs/api/lexeme.md
@@ -125,21 +125,21 @@ The L2 norm of the lexeme's vector representation.
 | Name                                         | Type    | Description                                                                                                                                                                                                                                                  |
 | -------------------------------------------- | ------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
 | `vocab`                                      | `Vocab` | The lexeme's vocabulary.                                                                                                                                                                                                                                     |
-| `text`                                       | unicode | Verbatim text content.                                                                                                                                                                                                                                       |
+| `text`                                       | str     | Verbatim text content.                                                                                                                                                                                                                                       |
 | `orth`                                       | int     | ID of the verbatim text content.                                                                                                                                                                                                                             |
-| `orth_`                                      | unicode | Verbatim text content (identical to `Lexeme.text`). Exists mostly for consistency with the other attributes.                                                                                                                                                 |
+| `orth_`                                      | str     | Verbatim text content (identical to `Lexeme.text`). Exists mostly for consistency with the other attributes.                                                                                                                                                 |
 | `rank`                                       | int     | Sequential ID of the lexemes's lexical type, used to index into tables, e.g. for word vectors.                                                                                                                                                               |
 | `flags`                                      | int     | Container of the lexeme's binary flags.                                                                                                                                                                                                                      |
 | `norm`                                       | int     | The lexemes's norm, i.e. a normalized form of the lexeme text.                                                                                                                                                                                               |
-| `norm_`                                      | unicode | The lexemes's norm, i.e. a normalized form of the lexeme text.                                                                                                                                                                                               |
+| `norm_`                                      | str     | The lexemes's norm, i.e. a normalized form of the lexeme text.                                                                                                                                                                                               |
 | `lower`                                      | int     | Lowercase form of the word.                                                                                                                                                                                                                                  |
-| `lower_`                                     | unicode | Lowercase form of the word.                                                                                                                                                                                                                                  |
+| `lower_`                                     | str     | Lowercase form of the word.                                                                                                                                                                                                                                  |
 | `shape`                                      | int     | Transform of the words's string, to show orthographic features. Alphabetic characters are replaced by `x` or `X`, and numeric characters are replaced by d`, and sequences of the same character are truncated after length 4. For example,`"Xxxx"`or`"dd"`. |
-| `shape_`                                     | unicode | Transform of the word's string, to show orthographic features. Alphabetic characters are replaced by `x` or `X`, and numeric characters are replaced by d`, and sequences of the same character are truncated after length 4. For example,`"Xxxx"`or`"dd"`.  |
+| `shape_`                                     | str     | Transform of the word's string, to show orthographic features. Alphabetic characters are replaced by `x` or `X`, and numeric characters are replaced by d`, and sequences of the same character are truncated after length 4. For example,`"Xxxx"`or`"dd"`.  |
 | `prefix`                                     | int     | Length-N substring from the start of the word. Defaults to `N=1`.                                                                                                                                                                                            |
-| `prefix_`                                    | unicode | Length-N substring from the start of the word. Defaults to `N=1`.                                                                                                                                                                                            |
+| `prefix_`                                    | str     | Length-N substring from the start of the word. Defaults to `N=1`.                                                                                                                                                                                            |
 | `suffix`                                     | int     | Length-N substring from the end of the word. Defaults to `N=3`.                                                                                                                                                                                              |
-| `suffix_`                                    | unicode | Length-N substring from the start of the word. Defaults to `N=3`.                                                                                                                                                                                            |
+| `suffix_`                                    | str     | Length-N substring from the start of the word. Defaults to `N=3`.                                                                                                                                                                                            |
 | `is_alpha`                                   | bool    | Does the lexeme consist of alphabetic characters? Equivalent to `lexeme.text.isalpha()`.                                                                                                                                                                     |
 | `is_ascii`                                   | bool    | Does the lexeme consist of ASCII characters? Equivalent to `[any(ord(c) >= 128 for c in lexeme.text)]`.                                                                                                                                                      |
 | `is_digit`                                   | bool    | Does the lexeme consist of digits? Equivalent to `lexeme.text.isdigit()`.                                                                                                                                                                                    |
@@ -159,7 +159,7 @@ The L2 norm of the lexeme's vector representation.
 | `is_oov`                                     | bool    | Is the lexeme out-of-vocabulary?                                                                                                                                                                                                                             |
 | `is_stop`                                    | bool    | Is the lexeme part of a "stop list"?                                                                                                                                                                                                                         |
 | `lang`                                       | int     | Language of the parent vocabulary.                                                                                                                                                                                                                           |
-| `lang_`                                      | unicode | Language of the parent vocabulary.                                                                                                                                                                                                                           |
+| `lang_`                                      | str     | Language of the parent vocabulary.                                                                                                                                                                                                                           |
 | `prob`                                       | float   | Smoothed log probability estimate of the lexeme's word type (context-independent entry in the vocabulary).                                                                                                                                                   |
 | `cluster`                                    | int     | Brown cluster ID.                                                                                                                                                                                                                                            |
 | `sentiment`                                  | float   | A scalar value indicating the positivity or negativity of the lexeme.                                                                                                                                                                                        |
diff --git a/website/docs/api/vocab.md b/website/docs/api/vocab.md
index e024ab54a..b851f6882 100644
--- a/website/docs/api/vocab.md
+++ b/website/docs/api/vocab.md
@@ -27,7 +27,7 @@ Create the vocabulary.
 | `tag_map`                                   | dict                 | A dictionary mapping fine-grained tags to coarse-grained parts-of-speech, and optionally morphological attributes. |
 | `lemmatizer`                                | object               | A lemmatizer. Defaults to `None`.                                                                                  |
 | `strings`                                   | `StringStore` / list | A [`StringStore`](/api/stringstore) that maps strings to hash values, and vice versa, or a list of strings.        |
-| `vectors_name` <Tag variant="new">2.2</Tag> | unicode              | A name to identify the vectors table.                                                                              |
+| `vectors_name` <Tag variant="new">2.2</Tag> | str                  | A name to identify the vectors table.                                                                              |
 | **RETURNS**                                 | `Vocab`              | The newly constructed object.                                                                                      |
 
 ## Vocab.\_\_len\_\_ {#len tag="method"}
@@ -91,10 +91,10 @@ given string, you need to look it up in
 > assert oov not in nlp.vocab
 > ```
 
-| Name        | Type    | Description                                        |
-| ----------- | ------- | -------------------------------------------------- |
-| `string`    | unicode | The ID string.                                     |
-| **RETURNS** | bool    | Whether the string has an entry in the vocabulary. |
+| Name        | Type | Description                                        |
+| ----------- | ---- | -------------------------------------------------- |
+| `string`    | str  | The ID string.                                     |
+| **RETURNS** | bool | Whether the string has an entry in the vocabulary. |
 
 ## Vocab.add_flag {#add_flag tag="method"}
 
@@ -117,7 +117,7 @@ using `token.check_flag(flag_id)`.
 
 | Name          | Type | Description                                                                                                                                     |
 | ------------- | ---- | ----------------------------------------------------------------------------------------------------------------------------------------------- |
-| `flag_getter` | dict | A function `f(unicode) -> bool`, to get the flag value.                                                                                         |
+| `flag_getter` | dict | A function `f(str) -> bool`, to get the flag value.                                                                                             |
 | `flag_id`     | int  | An integer between 1 and 63 (inclusive), specifying the bit at which the flag will be stored. If `-1`, the lowest available bit will be chosen. |
 | **RETURNS**   | int  | The integer ID by which the flag value can be checked.                                                                                          |
 
@@ -227,10 +227,10 @@ Save the current state to a directory.
 > nlp.vocab.to_disk("/path/to/vocab")
 > ```
 
-| Name      | Type             | Description                                                                                                           |
-| --------- | ---------------- | --------------------------------------------------------------------------------------------------------------------- |
-| `path`    | unicode / `Path` | A path to a directory, which will be created if it doesn't exist. Paths may be either strings or `Path`-like objects. |
-| `exclude` | list             | String names of [serialization fields](#serialization-fields) to exclude.                                             |
+| Name      | Type         | Description                                                                                                           |
+| --------- | ------------ | --------------------------------------------------------------------------------------------------------------------- |
+| `path`    | str / `Path` | A path to a directory, which will be created if it doesn't exist. Paths may be either strings or `Path`-like objects. |
+| `exclude` | list         | String names of [serialization fields](#serialization-fields) to exclude.                                             |
 
 ## Vocab.from_disk {#from_disk tag="method" new="2"}
 
@@ -243,11 +243,11 @@ Loads state from a directory. Modifies the object in place and returns it.
 > vocab = Vocab().from_disk("/path/to/vocab")
 > ```
 
-| Name        | Type             | Description                                                                |
-| ----------- | ---------------- | -------------------------------------------------------------------------- |
-| `path`      | unicode / `Path` | A path to a directory. Paths may be either strings or `Path`-like objects. |
-| `exclude`   | list             | String names of [serialization fields](#serialization-fields) to exclude.  |
-| **RETURNS** | `Vocab`          | The modified `Vocab` object.                                               |
+| Name        | Type         | Description                                                                |
+| ----------- | ------------ | -------------------------------------------------------------------------- |
+| `path`      | str / `Path` | A path to a directory. Paths may be either strings or `Path`-like objects. |
+| `exclude`   | list         | String names of [serialization fields](#serialization-fields) to exclude.  |
+| **RETURNS** | `Vocab`      | The modified `Vocab` object.                                               |
 
 ## Vocab.to_bytes {#to_bytes tag="method"}
 
diff --git a/website/docs/usage/rule-based-matching.md b/website/docs/usage/rule-based-matching.md
index 5f47bd2e3..a84399312 100644
--- a/website/docs/usage/rule-based-matching.md
+++ b/website/docs/usage/rule-based-matching.md
@@ -157,19 +157,19 @@ The available token pattern keys correspond to a number of
 [`Token` attributes](/api/token#attributes). The supported attributes for
 rule-based matching are:
 
-| Attribute                              | Type    |  Description                                                                                           |
-| -------------------------------------- | ------- | ------------------------------------------------------------------------------------------------------ |
-| `ORTH`                                 | unicode | The exact verbatim text of a token.                                                                    |
-| `TEXT` <Tag variant="new">2.1</Tag>    | unicode | The exact verbatim text of a token.                                                                    |
-| `LOWER`                                | unicode | The lowercase form of the token text.                                                                  |
-|  `LENGTH`                              | int     | The length of the token text.                                                                          |
-|  `IS_ALPHA`, `IS_ASCII`, `IS_DIGIT`    | bool    | Token text consists of alphabetic characters, ASCII characters, digits.                                |
-|  `IS_LOWER`, `IS_UPPER`, `IS_TITLE`    | bool    | Token text is in lowercase, uppercase, titlecase.                                                      |
-|  `IS_PUNCT`, `IS_SPACE`, `IS_STOP`     | bool    | Token is punctuation, whitespace, stop word.                                                           |
-|  `LIKE_NUM`, `LIKE_URL`, `LIKE_EMAIL`  | bool    | Token text resembles a number, URL, email.                                                             |
-|  `POS`, `TAG`, `DEP`, `LEMMA`, `SHAPE` | unicode | The token's simple and extended part-of-speech tag, dependency label, lemma, shape.                    |
-| `ENT_TYPE`                             | unicode | The token's entity label.                                                                              |
-| `_` <Tag variant="new">2.1</Tag>       | dict    | Properties in [custom extension attributes](/usage/processing-pipelines#custom-components-attributes). |
+| Attribute                              | Type |  Description                                                                                           |
+| -------------------------------------- | ---- | ------------------------------------------------------------------------------------------------------ |
+| `ORTH`                                 | str  | The exact verbatim text of a token.                                                                    |
+| `TEXT` <Tag variant="new">2.1</Tag>    | str  | The exact verbatim text of a token.                                                                    |
+| `LOWER`                                | str  | The lowercase form of the token text.                                                                  |
+|  `LENGTH`                              | int  | The length of the token text.                                                                          |
+|  `IS_ALPHA`, `IS_ASCII`, `IS_DIGIT`    | bool | Token text consists of alphabetic characters, ASCII characters, digits.                                |
+|  `IS_LOWER`, `IS_UPPER`, `IS_TITLE`    | bool | Token text is in lowercase, uppercase, titlecase.                                                      |
+|  `IS_PUNCT`, `IS_SPACE`, `IS_STOP`     | bool | Token is punctuation, whitespace, stop word.                                                           |
+|  `LIKE_NUM`, `LIKE_URL`, `LIKE_EMAIL`  | bool | Token text resembles a number, URL, email.                                                             |
+|  `POS`, `TAG`, `DEP`, `LEMMA`, `SHAPE` | str  | The token's simple and extended part-of-speech tag, dependency label, lemma, shape.                    |
+| `ENT_TYPE`                             | str  | The token's entity label.                                                                              |
+| `_` <Tag variant="new">2.1</Tag>       | dict | Properties in [custom extension attributes](/usage/processing-pipelines#custom-components-attributes). |
 
 <Accordion title="Does it matter if the attribute names are uppercase or lowercase?">
 
@@ -1101,21 +1101,28 @@ powerful model packages with binary weights _and_ rules included!
 
 ### Using a large number of phrase patterns {#entityruler-large-phrase-patterns new="2.2.4"}
 
-When using a large amount of **phrase patterns** (roughly > 10000) it's useful to understand how the `add_patterns` function of the EntityRuler works. For each **phrase pattern**,
-the EntityRuler calls the nlp object to construct a doc object. This happens in case you try
-to add the EntityRuler at the end of an existing pipeline with, for example, a POS tagger and want to 
-extract matches based on the pattern's POS signature.
+When using a large amount of **phrase patterns** (roughly > 10000) it's useful
+to understand how the `add_patterns` function of the EntityRuler works. For each
+**phrase pattern**, the EntityRuler calls the nlp object to construct a doc
+object. This happens in case you try to add the EntityRuler at the end of an
+existing pipeline with, for example, a POS tagger and want to extract matches
+based on the pattern's POS signature.
 
-In this case you would pass a config value of `phrase_matcher_attr="POS"` for the EntityRuler.
+In this case you would pass a config value of `phrase_matcher_attr="POS"` for
+the EntityRuler.
 
-Running the full language pipeline across every pattern in a large list scales linearly and can therefore take a long time on large amounts of phrase patterns.
+Running the full language pipeline across every pattern in a large list scales
+linearly and can therefore take a long time on large amounts of phrase patterns.
 
-As of spaCy 2.2.4 the `add_patterns` function has been refactored to use nlp.pipe on all phrase patterns resulting in about a 10x-20x speed up with 5,000-100,000 phrase patterns respectively. 
+As of spaCy 2.2.4 the `add_patterns` function has been refactored to use
+nlp.pipe on all phrase patterns resulting in about a 10x-20x speed up with
+5,000-100,000 phrase patterns respectively.
 
-Even with this speedup (but especially if you're using an older version) the `add_patterns` function can still take a long time.
+Even with this speedup (but especially if you're using an older version) the
+`add_patterns` function can still take a long time.
 
-An easy workaround to make this function run faster is disabling the other language pipes
-while adding the phrase patterns.
+An easy workaround to make this function run faster is disabling the other
+language pipes while adding the phrase patterns.
 
 ```python
 entityruler = EntityRuler(nlp)