unicode -> str consistency [ci skip]

2025-08-26 06:54:56 +03:00 · 2020-05-24 18:51:10 +02:00 · 2020-05-24 18:51:10 +02:00 · 1a15896ba9
commit 1a15896ba9
parent 262d306eaa
13 changed files with 26 additions and 26 deletions
--- a/spacy/cli/info.py
+++ b/spacy/cli/info.py
@ -65,7 +65,7 @@ def print_markdown(data, title=None):
    """Print data in GitHub-flavoured Markdown format for issues etc.
    data (dict or list of tuples): Label/value pairs.
-    title (unicode or None): Title, will be rendered as headline 2.
+    title (str / None): Title, will be rendered as headline 2.
    """
    markdown = []
    for key, value in data.items():
--- a/spacy/displacy/render.py
+++ b/spacy/displacy/render.py
@ -302,7 +302,7 @@ class EntityRenderer(object):
        text (str): Original text.
        spans (list): Individual entity spans and their start, end and label.
-        title (unicode or None): Document title set in Doc.user_data['title'].
+        title (str / None): Document title set in Doc.user_data['title'].
        """
        markup = ""
        offset = 0
--- a/spacy/gold.pyx
+++ b/spacy/gold.pyx
@ -140,8 +140,8 @@ class GoldCorpus(object):
    def __init__(self, train, dev, gold_preproc=False, limit=None):
        """Create a GoldCorpus.
-        train (unicode or Path): File or directory of training data.
+        train (str / Path): File or directory of training data.
-        dev (unicode or Path): File or directory of development data.
+        dev (str / Path): File or directory of development data.
        RETURNS (GoldCorpus): The newly created object.
        """
        self.limit = limit
--- a/spacy/language.py
+++ b/spacy/language.py
@ -934,7 +934,7 @@ class Language(object):
        """Save the current state to a directory.  If a model is loaded, this
        will include the model.
-        path (unicode or Path): Path to a directory, which will be created if
+        path (str / Path): Path to a directory, which will be created if
            it doesn't exist.
        exclude (list): Names of components or serialization fields to exclude.
@ -968,7 +968,7 @@ class Language(object):
        returns it. If the saved `Language` object contains a model, the
        model will be loaded.
-        path (unicode or Path): A path to a directory.
+        path (str / Path): A path to a directory.
        exclude (list): Names of components or serialization fields to exclude.
        RETURNS (Language): The modified `Language` object.
--- a/spacy/matcher/dependencymatcher.pyx
+++ b/spacy/matcher/dependencymatcher.pyx
@ -194,7 +194,7 @@ cdef class DependencyMatcher:
    def get(self, key, default=None):
        """Retrieve the pattern stored for a key.
-        key (unicode or int): The key to retrieve.
+        key (str / int): The key to retrieve.
        RETURNS (tuple): The rule, as an (on_match, patterns) tuple.
        """
        key = self._normalize_key(key)
--- a/spacy/matcher/matcher.pyx
+++ b/spacy/matcher/matcher.pyx
@ -165,7 +165,7 @@ cdef class Matcher:
    def get(self, key, default=None):
        """Retrieve the pattern stored for a key.
-        key (unicode or int): The key to retrieve.
+        key (str / int): The key to retrieve.
        RETURNS (tuple): The rule, as an (on_match, patterns) tuple.
        """
        key = self._normalize_key(key)
--- a/spacy/matcher/phrasematcher.pyx
+++ b/spacy/matcher/phrasematcher.pyx
@ -30,7 +30,7 @@ cdef class PhraseMatcher:
        """Initialize the PhraseMatcher.
        vocab (Vocab): The shared vocabulary.
-        attr (int / unicode): Token attribute to match on.
+        attr (int / str): Token attribute to match on.
        validate (bool): Perform additional validation when patterns are added.
        RETURNS (PhraseMatcher): The newly constructed object.
--- a/spacy/pipeline/entityruler.py
+++ b/spacy/pipeline/entityruler.py
@ -30,7 +30,7 @@ class EntityRuler(object):
        nlp (Language): The shared nlp object to pass the vocab to the matchers
            and process phrase patterns.
-        phrase_matcher_attr (int / unicode): Token attribute to match on, passed
+        phrase_matcher_attr (int / str): Token attribute to match on, passed
            to the internal PhraseMatcher as `attr`
        validate (bool): Whether patterns should be validated, passed to
            Matcher and PhraseMatcher as `validate`
--- a/spacy/strings.pyx
+++ b/spacy/strings.pyx
@ -109,7 +109,7 @@ cdef class StringStore:
        """Retrieve a string from a given hash, or vice versa.
        string_or_id (bytes, unicode or uint64): The value to encode.
-        Returns (unicode or uint64): The value to be retrieved.
+        Returns (str / uint64): The value to be retrieved.
        """
        if isinstance(string_or_id, basestring) and len(string_or_id) == 0:
            return 0
@ -223,7 +223,7 @@ cdef class StringStore:
    def to_disk(self, path):
        """Save the current state to a directory.
-        path (unicode or Path): A path to a directory, which will be created if
+        path (str / Path): A path to a directory, which will be created if
            it doesn't exist. Paths may be either strings or Path-like objects.
        """
        path = util.ensure_path(path)
@ -234,7 +234,7 @@ cdef class StringStore:
        """Loads state from a directory. Modifies the object in place and
        returns it.
-        path (unicode or Path): A path to a directory. Paths may be either
+        path (str / Path): A path to a directory. Paths may be either
            strings or `Path`-like objects.
        RETURNS (StringStore): The modified `StringStore` object.
        """
--- a/spacy/tokenizer.pyx
+++ b/spacy/tokenizer.pyx
@ -693,7 +693,7 @@ cdef class Tokenizer:
    def to_disk(self, path, **kwargs):
        """Save the current state to a directory.
-        path (unicode or Path): A path to a directory, which will be created if
+        path (str / Path): A path to a directory, which will be created if
            it doesn't exist.
        exclude (list): String names of serialization fields to exclude.
@ -707,7 +707,7 @@ cdef class Tokenizer:
        """Loads state from a directory. Modifies the object in place and
        returns it.
-        path (unicode or Path): A path to a directory.
+        path (str / Path): A path to a directory.
        exclude (list): String names of serialization fields to exclude.
        RETURNS (Tokenizer): The modified `Tokenizer` object.
--- a/spacy/tokens/doc.pyx
+++ b/spacy/tokens/doc.pyx
@ -843,7 +843,7 @@ cdef class Doc:
    def to_disk(self, path, **kwargs):
        """Save the current state to a directory.
-        path (unicode or Path): A path to a directory, which will be created if
+        path (str / Path): A path to a directory, which will be created if
            it doesn't exist. Paths may be either strings or Path-like objects.
        exclude (list): String names of serialization fields to exclude.
@ -857,7 +857,7 @@ cdef class Doc:
        """Loads state from a directory. Modifies the object in place and
        returns it.
-        path (unicode or Path): A path to a directory. Paths may be either
+        path (str / Path): A path to a directory. Paths may be either
            strings or `Path`-like objects.
        exclude (list): String names of serialization fields to exclude.
        RETURNS (Doc): The modified `Doc` object.
--- a/spacy/util.py
+++ b/spacy/util.py
@ -269,7 +269,7 @@ def load_config(path, create_objects=False):
    """Load a Thinc-formatted config file, optionally filling in objects where
    the config references registry entries. See "Thinc config files" for details.
-    path (unicode or Path): Path to the config file
+    path (str / Path): Path to the config file
    create_objects (bool): Whether to automatically create objects when the config
        references registry entries. Defaults to False.
@ -286,7 +286,7 @@ def load_config_from_str(string, create_objects=False):
    """Load a Thinc-formatted config, optionally filling in objects where
    the config references registry entries. See "Thinc config files" for details.
-    string (unicode or Path): Text contents of the config file.
+    string (str / Path): Text contents of the config file.
    create_objects (bool): Whether to automatically create objects when the config
        references registry entries. Defaults to False.
@ -302,7 +302,7 @@ def load_config_from_str(string, create_objects=False):
 def get_model_meta(path):
    """Get model meta.json from a directory path and validate its contents.
-    path (unicode or Path): Path to model directory.
+    path (str / Path): Path to model directory.
    RETURNS (dict): The model's meta data.
    """
    model_path = ensure_path(path)
@ -321,7 +321,7 @@ def get_model_meta(path):
 def get_model_config(path):
    """Get the model's config from a directory path.
-    path (unicode or Path): Path to model directory.
+    path (str / Path): Path to model directory.
    RETURNS (Config): The model's config data.
    """
    model_path = ensure_path(path)
--- a/spacy/vocab.pyx
+++ b/spacy/vocab.pyx
@ -336,7 +336,7 @@ cdef class Vocab:
        If `minn` is defined, then the resulting vector uses Fasttext's
        subword features by average over ngrams of `orth`.
-        orth (int / unicode): The hash value of a word, or its unicode string.
+        orth (int / str): The hash value of a word, or its unicode string.
        minn (int): Minimum n-gram length used for Fasttext's ngram computation.
            Defaults to the length of `orth`.
        maxn (int): Maximum n-gram length used for Fasttext's ngram computation.
@ -389,7 +389,7 @@ cdef class Vocab:
        """Set a vector for a word in the vocabulary. Words can be referenced
        by string or int ID.
-        orth (int / unicode): The word.
+        orth (int / str): The word.
        vector (numpy.ndarray[ndim=1, dtype='float32']): The vector to set.
        DOCS: https://spacy.io/api/vocab#set_vector
@ -411,7 +411,7 @@ cdef class Vocab:
        """Check whether a word has a vector. Returns False if no vectors have
        been loaded. Words can be looked up by string or int ID.
-        orth (int / unicode): The word.
+        orth (int / str): The word.
        RETURNS (bool): Whether the word has a vector.
        DOCS: https://spacy.io/api/vocab#has_vector
@ -423,7 +423,7 @@ cdef class Vocab:
    def to_disk(self, path, exclude=tuple(), **kwargs):
        """Save the current state to a directory.
-        path (unicode or Path): A path to a directory, which will be created if
+        path (str / Path): A path to a directory, which will be created if
            it doesn't exist.
        exclude (list): String names of serialization fields to exclude.
@ -448,7 +448,7 @@ cdef class Vocab:
        """Loads state from a directory. Modifies the object in place and
        returns it.
-        path (unicode or Path): A path to a directory.
+        path (str / Path): A path to a directory.
        exclude (list): String names of serialization fields to exclude.
        RETURNS (Vocab): The modified `Vocab` object.