mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-25 01:16:28 +03:00
unicode -> str consistency [ci skip]
This commit is contained in:
parent
262d306eaa
commit
1a15896ba9
|
@ -65,7 +65,7 @@ def print_markdown(data, title=None):
|
||||||
"""Print data in GitHub-flavoured Markdown format for issues etc.
|
"""Print data in GitHub-flavoured Markdown format for issues etc.
|
||||||
|
|
||||||
data (dict or list of tuples): Label/value pairs.
|
data (dict or list of tuples): Label/value pairs.
|
||||||
title (unicode or None): Title, will be rendered as headline 2.
|
title (str / None): Title, will be rendered as headline 2.
|
||||||
"""
|
"""
|
||||||
markdown = []
|
markdown = []
|
||||||
for key, value in data.items():
|
for key, value in data.items():
|
||||||
|
|
|
@ -302,7 +302,7 @@ class EntityRenderer(object):
|
||||||
|
|
||||||
text (str): Original text.
|
text (str): Original text.
|
||||||
spans (list): Individual entity spans and their start, end and label.
|
spans (list): Individual entity spans and their start, end and label.
|
||||||
title (unicode or None): Document title set in Doc.user_data['title'].
|
title (str / None): Document title set in Doc.user_data['title'].
|
||||||
"""
|
"""
|
||||||
markup = ""
|
markup = ""
|
||||||
offset = 0
|
offset = 0
|
||||||
|
|
|
@ -140,8 +140,8 @@ class GoldCorpus(object):
|
||||||
def __init__(self, train, dev, gold_preproc=False, limit=None):
|
def __init__(self, train, dev, gold_preproc=False, limit=None):
|
||||||
"""Create a GoldCorpus.
|
"""Create a GoldCorpus.
|
||||||
|
|
||||||
train (unicode or Path): File or directory of training data.
|
train (str / Path): File or directory of training data.
|
||||||
dev (unicode or Path): File or directory of development data.
|
dev (str / Path): File or directory of development data.
|
||||||
RETURNS (GoldCorpus): The newly created object.
|
RETURNS (GoldCorpus): The newly created object.
|
||||||
"""
|
"""
|
||||||
self.limit = limit
|
self.limit = limit
|
||||||
|
|
|
@ -934,7 +934,7 @@ class Language(object):
|
||||||
"""Save the current state to a directory. If a model is loaded, this
|
"""Save the current state to a directory. If a model is loaded, this
|
||||||
will include the model.
|
will include the model.
|
||||||
|
|
||||||
path (unicode or Path): Path to a directory, which will be created if
|
path (str / Path): Path to a directory, which will be created if
|
||||||
it doesn't exist.
|
it doesn't exist.
|
||||||
exclude (list): Names of components or serialization fields to exclude.
|
exclude (list): Names of components or serialization fields to exclude.
|
||||||
|
|
||||||
|
@ -968,7 +968,7 @@ class Language(object):
|
||||||
returns it. If the saved `Language` object contains a model, the
|
returns it. If the saved `Language` object contains a model, the
|
||||||
model will be loaded.
|
model will be loaded.
|
||||||
|
|
||||||
path (unicode or Path): A path to a directory.
|
path (str / Path): A path to a directory.
|
||||||
exclude (list): Names of components or serialization fields to exclude.
|
exclude (list): Names of components or serialization fields to exclude.
|
||||||
RETURNS (Language): The modified `Language` object.
|
RETURNS (Language): The modified `Language` object.
|
||||||
|
|
||||||
|
|
|
@ -194,7 +194,7 @@ cdef class DependencyMatcher:
|
||||||
def get(self, key, default=None):
|
def get(self, key, default=None):
|
||||||
"""Retrieve the pattern stored for a key.
|
"""Retrieve the pattern stored for a key.
|
||||||
|
|
||||||
key (unicode or int): The key to retrieve.
|
key (str / int): The key to retrieve.
|
||||||
RETURNS (tuple): The rule, as an (on_match, patterns) tuple.
|
RETURNS (tuple): The rule, as an (on_match, patterns) tuple.
|
||||||
"""
|
"""
|
||||||
key = self._normalize_key(key)
|
key = self._normalize_key(key)
|
||||||
|
|
|
@ -165,7 +165,7 @@ cdef class Matcher:
|
||||||
def get(self, key, default=None):
|
def get(self, key, default=None):
|
||||||
"""Retrieve the pattern stored for a key.
|
"""Retrieve the pattern stored for a key.
|
||||||
|
|
||||||
key (unicode or int): The key to retrieve.
|
key (str / int): The key to retrieve.
|
||||||
RETURNS (tuple): The rule, as an (on_match, patterns) tuple.
|
RETURNS (tuple): The rule, as an (on_match, patterns) tuple.
|
||||||
"""
|
"""
|
||||||
key = self._normalize_key(key)
|
key = self._normalize_key(key)
|
||||||
|
|
|
@ -30,7 +30,7 @@ cdef class PhraseMatcher:
|
||||||
"""Initialize the PhraseMatcher.
|
"""Initialize the PhraseMatcher.
|
||||||
|
|
||||||
vocab (Vocab): The shared vocabulary.
|
vocab (Vocab): The shared vocabulary.
|
||||||
attr (int / unicode): Token attribute to match on.
|
attr (int / str): Token attribute to match on.
|
||||||
validate (bool): Perform additional validation when patterns are added.
|
validate (bool): Perform additional validation when patterns are added.
|
||||||
RETURNS (PhraseMatcher): The newly constructed object.
|
RETURNS (PhraseMatcher): The newly constructed object.
|
||||||
|
|
||||||
|
|
|
@ -30,7 +30,7 @@ class EntityRuler(object):
|
||||||
|
|
||||||
nlp (Language): The shared nlp object to pass the vocab to the matchers
|
nlp (Language): The shared nlp object to pass the vocab to the matchers
|
||||||
and process phrase patterns.
|
and process phrase patterns.
|
||||||
phrase_matcher_attr (int / unicode): Token attribute to match on, passed
|
phrase_matcher_attr (int / str): Token attribute to match on, passed
|
||||||
to the internal PhraseMatcher as `attr`
|
to the internal PhraseMatcher as `attr`
|
||||||
validate (bool): Whether patterns should be validated, passed to
|
validate (bool): Whether patterns should be validated, passed to
|
||||||
Matcher and PhraseMatcher as `validate`
|
Matcher and PhraseMatcher as `validate`
|
||||||
|
|
|
@ -109,7 +109,7 @@ cdef class StringStore:
|
||||||
"""Retrieve a string from a given hash, or vice versa.
|
"""Retrieve a string from a given hash, or vice versa.
|
||||||
|
|
||||||
string_or_id (bytes, unicode or uint64): The value to encode.
|
string_or_id (bytes, unicode or uint64): The value to encode.
|
||||||
Returns (unicode or uint64): The value to be retrieved.
|
Returns (str / uint64): The value to be retrieved.
|
||||||
"""
|
"""
|
||||||
if isinstance(string_or_id, basestring) and len(string_or_id) == 0:
|
if isinstance(string_or_id, basestring) and len(string_or_id) == 0:
|
||||||
return 0
|
return 0
|
||||||
|
@ -223,7 +223,7 @@ cdef class StringStore:
|
||||||
def to_disk(self, path):
|
def to_disk(self, path):
|
||||||
"""Save the current state to a directory.
|
"""Save the current state to a directory.
|
||||||
|
|
||||||
path (unicode or Path): A path to a directory, which will be created if
|
path (str / Path): A path to a directory, which will be created if
|
||||||
it doesn't exist. Paths may be either strings or Path-like objects.
|
it doesn't exist. Paths may be either strings or Path-like objects.
|
||||||
"""
|
"""
|
||||||
path = util.ensure_path(path)
|
path = util.ensure_path(path)
|
||||||
|
@ -234,7 +234,7 @@ cdef class StringStore:
|
||||||
"""Loads state from a directory. Modifies the object in place and
|
"""Loads state from a directory. Modifies the object in place and
|
||||||
returns it.
|
returns it.
|
||||||
|
|
||||||
path (unicode or Path): A path to a directory. Paths may be either
|
path (str / Path): A path to a directory. Paths may be either
|
||||||
strings or `Path`-like objects.
|
strings or `Path`-like objects.
|
||||||
RETURNS (StringStore): The modified `StringStore` object.
|
RETURNS (StringStore): The modified `StringStore` object.
|
||||||
"""
|
"""
|
||||||
|
|
|
@ -693,7 +693,7 @@ cdef class Tokenizer:
|
||||||
def to_disk(self, path, **kwargs):
|
def to_disk(self, path, **kwargs):
|
||||||
"""Save the current state to a directory.
|
"""Save the current state to a directory.
|
||||||
|
|
||||||
path (unicode or Path): A path to a directory, which will be created if
|
path (str / Path): A path to a directory, which will be created if
|
||||||
it doesn't exist.
|
it doesn't exist.
|
||||||
exclude (list): String names of serialization fields to exclude.
|
exclude (list): String names of serialization fields to exclude.
|
||||||
|
|
||||||
|
@ -707,7 +707,7 @@ cdef class Tokenizer:
|
||||||
"""Loads state from a directory. Modifies the object in place and
|
"""Loads state from a directory. Modifies the object in place and
|
||||||
returns it.
|
returns it.
|
||||||
|
|
||||||
path (unicode or Path): A path to a directory.
|
path (str / Path): A path to a directory.
|
||||||
exclude (list): String names of serialization fields to exclude.
|
exclude (list): String names of serialization fields to exclude.
|
||||||
RETURNS (Tokenizer): The modified `Tokenizer` object.
|
RETURNS (Tokenizer): The modified `Tokenizer` object.
|
||||||
|
|
||||||
|
|
|
@ -843,7 +843,7 @@ cdef class Doc:
|
||||||
def to_disk(self, path, **kwargs):
|
def to_disk(self, path, **kwargs):
|
||||||
"""Save the current state to a directory.
|
"""Save the current state to a directory.
|
||||||
|
|
||||||
path (unicode or Path): A path to a directory, which will be created if
|
path (str / Path): A path to a directory, which will be created if
|
||||||
it doesn't exist. Paths may be either strings or Path-like objects.
|
it doesn't exist. Paths may be either strings or Path-like objects.
|
||||||
exclude (list): String names of serialization fields to exclude.
|
exclude (list): String names of serialization fields to exclude.
|
||||||
|
|
||||||
|
@ -857,7 +857,7 @@ cdef class Doc:
|
||||||
"""Loads state from a directory. Modifies the object in place and
|
"""Loads state from a directory. Modifies the object in place and
|
||||||
returns it.
|
returns it.
|
||||||
|
|
||||||
path (unicode or Path): A path to a directory. Paths may be either
|
path (str / Path): A path to a directory. Paths may be either
|
||||||
strings or `Path`-like objects.
|
strings or `Path`-like objects.
|
||||||
exclude (list): String names of serialization fields to exclude.
|
exclude (list): String names of serialization fields to exclude.
|
||||||
RETURNS (Doc): The modified `Doc` object.
|
RETURNS (Doc): The modified `Doc` object.
|
||||||
|
|
|
@ -269,7 +269,7 @@ def load_config(path, create_objects=False):
|
||||||
"""Load a Thinc-formatted config file, optionally filling in objects where
|
"""Load a Thinc-formatted config file, optionally filling in objects where
|
||||||
the config references registry entries. See "Thinc config files" for details.
|
the config references registry entries. See "Thinc config files" for details.
|
||||||
|
|
||||||
path (unicode or Path): Path to the config file
|
path (str / Path): Path to the config file
|
||||||
create_objects (bool): Whether to automatically create objects when the config
|
create_objects (bool): Whether to automatically create objects when the config
|
||||||
references registry entries. Defaults to False.
|
references registry entries. Defaults to False.
|
||||||
|
|
||||||
|
@ -286,7 +286,7 @@ def load_config_from_str(string, create_objects=False):
|
||||||
"""Load a Thinc-formatted config, optionally filling in objects where
|
"""Load a Thinc-formatted config, optionally filling in objects where
|
||||||
the config references registry entries. See "Thinc config files" for details.
|
the config references registry entries. See "Thinc config files" for details.
|
||||||
|
|
||||||
string (unicode or Path): Text contents of the config file.
|
string (str / Path): Text contents of the config file.
|
||||||
create_objects (bool): Whether to automatically create objects when the config
|
create_objects (bool): Whether to automatically create objects when the config
|
||||||
references registry entries. Defaults to False.
|
references registry entries. Defaults to False.
|
||||||
|
|
||||||
|
@ -302,7 +302,7 @@ def load_config_from_str(string, create_objects=False):
|
||||||
def get_model_meta(path):
|
def get_model_meta(path):
|
||||||
"""Get model meta.json from a directory path and validate its contents.
|
"""Get model meta.json from a directory path and validate its contents.
|
||||||
|
|
||||||
path (unicode or Path): Path to model directory.
|
path (str / Path): Path to model directory.
|
||||||
RETURNS (dict): The model's meta data.
|
RETURNS (dict): The model's meta data.
|
||||||
"""
|
"""
|
||||||
model_path = ensure_path(path)
|
model_path = ensure_path(path)
|
||||||
|
@ -321,7 +321,7 @@ def get_model_meta(path):
|
||||||
def get_model_config(path):
|
def get_model_config(path):
|
||||||
"""Get the model's config from a directory path.
|
"""Get the model's config from a directory path.
|
||||||
|
|
||||||
path (unicode or Path): Path to model directory.
|
path (str / Path): Path to model directory.
|
||||||
RETURNS (Config): The model's config data.
|
RETURNS (Config): The model's config data.
|
||||||
"""
|
"""
|
||||||
model_path = ensure_path(path)
|
model_path = ensure_path(path)
|
||||||
|
|
|
@ -336,7 +336,7 @@ cdef class Vocab:
|
||||||
If `minn` is defined, then the resulting vector uses Fasttext's
|
If `minn` is defined, then the resulting vector uses Fasttext's
|
||||||
subword features by average over ngrams of `orth`.
|
subword features by average over ngrams of `orth`.
|
||||||
|
|
||||||
orth (int / unicode): The hash value of a word, or its unicode string.
|
orth (int / str): The hash value of a word, or its unicode string.
|
||||||
minn (int): Minimum n-gram length used for Fasttext's ngram computation.
|
minn (int): Minimum n-gram length used for Fasttext's ngram computation.
|
||||||
Defaults to the length of `orth`.
|
Defaults to the length of `orth`.
|
||||||
maxn (int): Maximum n-gram length used for Fasttext's ngram computation.
|
maxn (int): Maximum n-gram length used for Fasttext's ngram computation.
|
||||||
|
@ -389,7 +389,7 @@ cdef class Vocab:
|
||||||
"""Set a vector for a word in the vocabulary. Words can be referenced
|
"""Set a vector for a word in the vocabulary. Words can be referenced
|
||||||
by string or int ID.
|
by string or int ID.
|
||||||
|
|
||||||
orth (int / unicode): The word.
|
orth (int / str): The word.
|
||||||
vector (numpy.ndarray[ndim=1, dtype='float32']): The vector to set.
|
vector (numpy.ndarray[ndim=1, dtype='float32']): The vector to set.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/vocab#set_vector
|
DOCS: https://spacy.io/api/vocab#set_vector
|
||||||
|
@ -411,7 +411,7 @@ cdef class Vocab:
|
||||||
"""Check whether a word has a vector. Returns False if no vectors have
|
"""Check whether a word has a vector. Returns False if no vectors have
|
||||||
been loaded. Words can be looked up by string or int ID.
|
been loaded. Words can be looked up by string or int ID.
|
||||||
|
|
||||||
orth (int / unicode): The word.
|
orth (int / str): The word.
|
||||||
RETURNS (bool): Whether the word has a vector.
|
RETURNS (bool): Whether the word has a vector.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/vocab#has_vector
|
DOCS: https://spacy.io/api/vocab#has_vector
|
||||||
|
@ -423,7 +423,7 @@ cdef class Vocab:
|
||||||
def to_disk(self, path, exclude=tuple(), **kwargs):
|
def to_disk(self, path, exclude=tuple(), **kwargs):
|
||||||
"""Save the current state to a directory.
|
"""Save the current state to a directory.
|
||||||
|
|
||||||
path (unicode or Path): A path to a directory, which will be created if
|
path (str / Path): A path to a directory, which will be created if
|
||||||
it doesn't exist.
|
it doesn't exist.
|
||||||
exclude (list): String names of serialization fields to exclude.
|
exclude (list): String names of serialization fields to exclude.
|
||||||
|
|
||||||
|
@ -448,7 +448,7 @@ cdef class Vocab:
|
||||||
"""Loads state from a directory. Modifies the object in place and
|
"""Loads state from a directory. Modifies the object in place and
|
||||||
returns it.
|
returns it.
|
||||||
|
|
||||||
path (unicode or Path): A path to a directory.
|
path (str / Path): A path to a directory.
|
||||||
exclude (list): String names of serialization fields to exclude.
|
exclude (list): String names of serialization fields to exclude.
|
||||||
RETURNS (Vocab): The modified `Vocab` object.
|
RETURNS (Vocab): The modified `Vocab` object.
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user