mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-24 00:46:28 +03:00
unicode -> str consistency [ci skip]
This commit is contained in:
parent
262d306eaa
commit
1a15896ba9
|
@ -65,7 +65,7 @@ def print_markdown(data, title=None):
|
|||
"""Print data in GitHub-flavoured Markdown format for issues etc.
|
||||
|
||||
data (dict or list of tuples): Label/value pairs.
|
||||
title (unicode or None): Title, will be rendered as headline 2.
|
||||
title (str / None): Title, will be rendered as headline 2.
|
||||
"""
|
||||
markdown = []
|
||||
for key, value in data.items():
|
||||
|
|
|
@ -302,7 +302,7 @@ class EntityRenderer(object):
|
|||
|
||||
text (str): Original text.
|
||||
spans (list): Individual entity spans and their start, end and label.
|
||||
title (unicode or None): Document title set in Doc.user_data['title'].
|
||||
title (str / None): Document title set in Doc.user_data['title'].
|
||||
"""
|
||||
markup = ""
|
||||
offset = 0
|
||||
|
|
|
@ -140,8 +140,8 @@ class GoldCorpus(object):
|
|||
def __init__(self, train, dev, gold_preproc=False, limit=None):
|
||||
"""Create a GoldCorpus.
|
||||
|
||||
train (unicode or Path): File or directory of training data.
|
||||
dev (unicode or Path): File or directory of development data.
|
||||
train (str / Path): File or directory of training data.
|
||||
dev (str / Path): File or directory of development data.
|
||||
RETURNS (GoldCorpus): The newly created object.
|
||||
"""
|
||||
self.limit = limit
|
||||
|
|
|
@ -934,7 +934,7 @@ class Language(object):
|
|||
"""Save the current state to a directory. If a model is loaded, this
|
||||
will include the model.
|
||||
|
||||
path (unicode or Path): Path to a directory, which will be created if
|
||||
path (str / Path): Path to a directory, which will be created if
|
||||
it doesn't exist.
|
||||
exclude (list): Names of components or serialization fields to exclude.
|
||||
|
||||
|
@ -968,7 +968,7 @@ class Language(object):
|
|||
returns it. If the saved `Language` object contains a model, the
|
||||
model will be loaded.
|
||||
|
||||
path (unicode or Path): A path to a directory.
|
||||
path (str / Path): A path to a directory.
|
||||
exclude (list): Names of components or serialization fields to exclude.
|
||||
RETURNS (Language): The modified `Language` object.
|
||||
|
||||
|
|
|
@ -194,7 +194,7 @@ cdef class DependencyMatcher:
|
|||
def get(self, key, default=None):
|
||||
"""Retrieve the pattern stored for a key.
|
||||
|
||||
key (unicode or int): The key to retrieve.
|
||||
key (str / int): The key to retrieve.
|
||||
RETURNS (tuple): The rule, as an (on_match, patterns) tuple.
|
||||
"""
|
||||
key = self._normalize_key(key)
|
||||
|
|
|
@ -165,7 +165,7 @@ cdef class Matcher:
|
|||
def get(self, key, default=None):
|
||||
"""Retrieve the pattern stored for a key.
|
||||
|
||||
key (unicode or int): The key to retrieve.
|
||||
key (str / int): The key to retrieve.
|
||||
RETURNS (tuple): The rule, as an (on_match, patterns) tuple.
|
||||
"""
|
||||
key = self._normalize_key(key)
|
||||
|
|
|
@ -30,7 +30,7 @@ cdef class PhraseMatcher:
|
|||
"""Initialize the PhraseMatcher.
|
||||
|
||||
vocab (Vocab): The shared vocabulary.
|
||||
attr (int / unicode): Token attribute to match on.
|
||||
attr (int / str): Token attribute to match on.
|
||||
validate (bool): Perform additional validation when patterns are added.
|
||||
RETURNS (PhraseMatcher): The newly constructed object.
|
||||
|
||||
|
|
|
@ -30,7 +30,7 @@ class EntityRuler(object):
|
|||
|
||||
nlp (Language): The shared nlp object to pass the vocab to the matchers
|
||||
and process phrase patterns.
|
||||
phrase_matcher_attr (int / unicode): Token attribute to match on, passed
|
||||
phrase_matcher_attr (int / str): Token attribute to match on, passed
|
||||
to the internal PhraseMatcher as `attr`
|
||||
validate (bool): Whether patterns should be validated, passed to
|
||||
Matcher and PhraseMatcher as `validate`
|
||||
|
|
|
@ -109,7 +109,7 @@ cdef class StringStore:
|
|||
"""Retrieve a string from a given hash, or vice versa.
|
||||
|
||||
string_or_id (bytes, unicode or uint64): The value to encode.
|
||||
Returns (unicode or uint64): The value to be retrieved.
|
||||
Returns (str / uint64): The value to be retrieved.
|
||||
"""
|
||||
if isinstance(string_or_id, basestring) and len(string_or_id) == 0:
|
||||
return 0
|
||||
|
@ -223,7 +223,7 @@ cdef class StringStore:
|
|||
def to_disk(self, path):
|
||||
"""Save the current state to a directory.
|
||||
|
||||
path (unicode or Path): A path to a directory, which will be created if
|
||||
path (str / Path): A path to a directory, which will be created if
|
||||
it doesn't exist. Paths may be either strings or Path-like objects.
|
||||
"""
|
||||
path = util.ensure_path(path)
|
||||
|
@ -234,7 +234,7 @@ cdef class StringStore:
|
|||
"""Loads state from a directory. Modifies the object in place and
|
||||
returns it.
|
||||
|
||||
path (unicode or Path): A path to a directory. Paths may be either
|
||||
path (str / Path): A path to a directory. Paths may be either
|
||||
strings or `Path`-like objects.
|
||||
RETURNS (StringStore): The modified `StringStore` object.
|
||||
"""
|
||||
|
|
|
@ -693,7 +693,7 @@ cdef class Tokenizer:
|
|||
def to_disk(self, path, **kwargs):
|
||||
"""Save the current state to a directory.
|
||||
|
||||
path (unicode or Path): A path to a directory, which will be created if
|
||||
path (str / Path): A path to a directory, which will be created if
|
||||
it doesn't exist.
|
||||
exclude (list): String names of serialization fields to exclude.
|
||||
|
||||
|
@ -707,7 +707,7 @@ cdef class Tokenizer:
|
|||
"""Loads state from a directory. Modifies the object in place and
|
||||
returns it.
|
||||
|
||||
path (unicode or Path): A path to a directory.
|
||||
path (str / Path): A path to a directory.
|
||||
exclude (list): String names of serialization fields to exclude.
|
||||
RETURNS (Tokenizer): The modified `Tokenizer` object.
|
||||
|
||||
|
|
|
@ -843,7 +843,7 @@ cdef class Doc:
|
|||
def to_disk(self, path, **kwargs):
|
||||
"""Save the current state to a directory.
|
||||
|
||||
path (unicode or Path): A path to a directory, which will be created if
|
||||
path (str / Path): A path to a directory, which will be created if
|
||||
it doesn't exist. Paths may be either strings or Path-like objects.
|
||||
exclude (list): String names of serialization fields to exclude.
|
||||
|
||||
|
@ -857,7 +857,7 @@ cdef class Doc:
|
|||
"""Loads state from a directory. Modifies the object in place and
|
||||
returns it.
|
||||
|
||||
path (unicode or Path): A path to a directory. Paths may be either
|
||||
path (str / Path): A path to a directory. Paths may be either
|
||||
strings or `Path`-like objects.
|
||||
exclude (list): String names of serialization fields to exclude.
|
||||
RETURNS (Doc): The modified `Doc` object.
|
||||
|
|
|
@ -269,7 +269,7 @@ def load_config(path, create_objects=False):
|
|||
"""Load a Thinc-formatted config file, optionally filling in objects where
|
||||
the config references registry entries. See "Thinc config files" for details.
|
||||
|
||||
path (unicode or Path): Path to the config file
|
||||
path (str / Path): Path to the config file
|
||||
create_objects (bool): Whether to automatically create objects when the config
|
||||
references registry entries. Defaults to False.
|
||||
|
||||
|
@ -286,7 +286,7 @@ def load_config_from_str(string, create_objects=False):
|
|||
"""Load a Thinc-formatted config, optionally filling in objects where
|
||||
the config references registry entries. See "Thinc config files" for details.
|
||||
|
||||
string (unicode or Path): Text contents of the config file.
|
||||
string (str / Path): Text contents of the config file.
|
||||
create_objects (bool): Whether to automatically create objects when the config
|
||||
references registry entries. Defaults to False.
|
||||
|
||||
|
@ -302,7 +302,7 @@ def load_config_from_str(string, create_objects=False):
|
|||
def get_model_meta(path):
|
||||
"""Get model meta.json from a directory path and validate its contents.
|
||||
|
||||
path (unicode or Path): Path to model directory.
|
||||
path (str / Path): Path to model directory.
|
||||
RETURNS (dict): The model's meta data.
|
||||
"""
|
||||
model_path = ensure_path(path)
|
||||
|
@ -321,7 +321,7 @@ def get_model_meta(path):
|
|||
def get_model_config(path):
|
||||
"""Get the model's config from a directory path.
|
||||
|
||||
path (unicode or Path): Path to model directory.
|
||||
path (str / Path): Path to model directory.
|
||||
RETURNS (Config): The model's config data.
|
||||
"""
|
||||
model_path = ensure_path(path)
|
||||
|
|
|
@ -336,7 +336,7 @@ cdef class Vocab:
|
|||
If `minn` is defined, then the resulting vector uses Fasttext's
|
||||
subword features by average over ngrams of `orth`.
|
||||
|
||||
orth (int / unicode): The hash value of a word, or its unicode string.
|
||||
orth (int / str): The hash value of a word, or its unicode string.
|
||||
minn (int): Minimum n-gram length used for Fasttext's ngram computation.
|
||||
Defaults to the length of `orth`.
|
||||
maxn (int): Maximum n-gram length used for Fasttext's ngram computation.
|
||||
|
@ -389,7 +389,7 @@ cdef class Vocab:
|
|||
"""Set a vector for a word in the vocabulary. Words can be referenced
|
||||
by string or int ID.
|
||||
|
||||
orth (int / unicode): The word.
|
||||
orth (int / str): The word.
|
||||
vector (numpy.ndarray[ndim=1, dtype='float32']): The vector to set.
|
||||
|
||||
DOCS: https://spacy.io/api/vocab#set_vector
|
||||
|
@ -411,7 +411,7 @@ cdef class Vocab:
|
|||
"""Check whether a word has a vector. Returns False if no vectors have
|
||||
been loaded. Words can be looked up by string or int ID.
|
||||
|
||||
orth (int / unicode): The word.
|
||||
orth (int / str): The word.
|
||||
RETURNS (bool): Whether the word has a vector.
|
||||
|
||||
DOCS: https://spacy.io/api/vocab#has_vector
|
||||
|
@ -423,7 +423,7 @@ cdef class Vocab:
|
|||
def to_disk(self, path, exclude=tuple(), **kwargs):
|
||||
"""Save the current state to a directory.
|
||||
|
||||
path (unicode or Path): A path to a directory, which will be created if
|
||||
path (str / Path): A path to a directory, which will be created if
|
||||
it doesn't exist.
|
||||
exclude (list): String names of serialization fields to exclude.
|
||||
|
||||
|
@ -448,7 +448,7 @@ cdef class Vocab:
|
|||
"""Loads state from a directory. Modifies the object in place and
|
||||
returns it.
|
||||
|
||||
path (unicode or Path): A path to a directory.
|
||||
path (str / Path): A path to a directory.
|
||||
exclude (list): String names of serialization fields to exclude.
|
||||
RETURNS (Vocab): The modified `Vocab` object.
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user