mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 01:48:04 +03:00 
			
		
		
		
	unicode -> str consistency [ci skip]
This commit is contained in:
		
							parent
							
								
									262d306eaa
								
							
						
					
					
						commit
						1a15896ba9
					
				| 
						 | 
					@ -65,7 +65,7 @@ def print_markdown(data, title=None):
 | 
				
			||||||
    """Print data in GitHub-flavoured Markdown format for issues etc.
 | 
					    """Print data in GitHub-flavoured Markdown format for issues etc.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    data (dict or list of tuples): Label/value pairs.
 | 
					    data (dict or list of tuples): Label/value pairs.
 | 
				
			||||||
    title (unicode or None): Title, will be rendered as headline 2.
 | 
					    title (str / None): Title, will be rendered as headline 2.
 | 
				
			||||||
    """
 | 
					    """
 | 
				
			||||||
    markdown = []
 | 
					    markdown = []
 | 
				
			||||||
    for key, value in data.items():
 | 
					    for key, value in data.items():
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -302,7 +302,7 @@ class EntityRenderer(object):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        text (str): Original text.
 | 
					        text (str): Original text.
 | 
				
			||||||
        spans (list): Individual entity spans and their start, end and label.
 | 
					        spans (list): Individual entity spans and their start, end and label.
 | 
				
			||||||
        title (unicode or None): Document title set in Doc.user_data['title'].
 | 
					        title (str / None): Document title set in Doc.user_data['title'].
 | 
				
			||||||
        """
 | 
					        """
 | 
				
			||||||
        markup = ""
 | 
					        markup = ""
 | 
				
			||||||
        offset = 0
 | 
					        offset = 0
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -140,8 +140,8 @@ class GoldCorpus(object):
 | 
				
			||||||
    def __init__(self, train, dev, gold_preproc=False, limit=None):
 | 
					    def __init__(self, train, dev, gold_preproc=False, limit=None):
 | 
				
			||||||
        """Create a GoldCorpus.
 | 
					        """Create a GoldCorpus.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        train (unicode or Path): File or directory of training data.
 | 
					        train (str / Path): File or directory of training data.
 | 
				
			||||||
        dev (unicode or Path): File or directory of development data.
 | 
					        dev (str / Path): File or directory of development data.
 | 
				
			||||||
        RETURNS (GoldCorpus): The newly created object.
 | 
					        RETURNS (GoldCorpus): The newly created object.
 | 
				
			||||||
        """
 | 
					        """
 | 
				
			||||||
        self.limit = limit
 | 
					        self.limit = limit
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -934,7 +934,7 @@ class Language(object):
 | 
				
			||||||
        """Save the current state to a directory.  If a model is loaded, this
 | 
					        """Save the current state to a directory.  If a model is loaded, this
 | 
				
			||||||
        will include the model.
 | 
					        will include the model.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        path (unicode or Path): Path to a directory, which will be created if
 | 
					        path (str / Path): Path to a directory, which will be created if
 | 
				
			||||||
            it doesn't exist.
 | 
					            it doesn't exist.
 | 
				
			||||||
        exclude (list): Names of components or serialization fields to exclude.
 | 
					        exclude (list): Names of components or serialization fields to exclude.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -968,7 +968,7 @@ class Language(object):
 | 
				
			||||||
        returns it. If the saved `Language` object contains a model, the
 | 
					        returns it. If the saved `Language` object contains a model, the
 | 
				
			||||||
        model will be loaded.
 | 
					        model will be loaded.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        path (unicode or Path): A path to a directory.
 | 
					        path (str / Path): A path to a directory.
 | 
				
			||||||
        exclude (list): Names of components or serialization fields to exclude.
 | 
					        exclude (list): Names of components or serialization fields to exclude.
 | 
				
			||||||
        RETURNS (Language): The modified `Language` object.
 | 
					        RETURNS (Language): The modified `Language` object.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -194,7 +194,7 @@ cdef class DependencyMatcher:
 | 
				
			||||||
    def get(self, key, default=None):
 | 
					    def get(self, key, default=None):
 | 
				
			||||||
        """Retrieve the pattern stored for a key.
 | 
					        """Retrieve the pattern stored for a key.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        key (unicode or int): The key to retrieve.
 | 
					        key (str / int): The key to retrieve.
 | 
				
			||||||
        RETURNS (tuple): The rule, as an (on_match, patterns) tuple.
 | 
					        RETURNS (tuple): The rule, as an (on_match, patterns) tuple.
 | 
				
			||||||
        """
 | 
					        """
 | 
				
			||||||
        key = self._normalize_key(key)
 | 
					        key = self._normalize_key(key)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -165,7 +165,7 @@ cdef class Matcher:
 | 
				
			||||||
    def get(self, key, default=None):
 | 
					    def get(self, key, default=None):
 | 
				
			||||||
        """Retrieve the pattern stored for a key.
 | 
					        """Retrieve the pattern stored for a key.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        key (unicode or int): The key to retrieve.
 | 
					        key (str / int): The key to retrieve.
 | 
				
			||||||
        RETURNS (tuple): The rule, as an (on_match, patterns) tuple.
 | 
					        RETURNS (tuple): The rule, as an (on_match, patterns) tuple.
 | 
				
			||||||
        """
 | 
					        """
 | 
				
			||||||
        key = self._normalize_key(key)
 | 
					        key = self._normalize_key(key)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -30,7 +30,7 @@ cdef class PhraseMatcher:
 | 
				
			||||||
        """Initialize the PhraseMatcher.
 | 
					        """Initialize the PhraseMatcher.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        vocab (Vocab): The shared vocabulary.
 | 
					        vocab (Vocab): The shared vocabulary.
 | 
				
			||||||
        attr (int / unicode): Token attribute to match on.
 | 
					        attr (int / str): Token attribute to match on.
 | 
				
			||||||
        validate (bool): Perform additional validation when patterns are added.
 | 
					        validate (bool): Perform additional validation when patterns are added.
 | 
				
			||||||
        RETURNS (PhraseMatcher): The newly constructed object.
 | 
					        RETURNS (PhraseMatcher): The newly constructed object.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -30,7 +30,7 @@ class EntityRuler(object):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        nlp (Language): The shared nlp object to pass the vocab to the matchers
 | 
					        nlp (Language): The shared nlp object to pass the vocab to the matchers
 | 
				
			||||||
            and process phrase patterns.
 | 
					            and process phrase patterns.
 | 
				
			||||||
        phrase_matcher_attr (int / unicode): Token attribute to match on, passed
 | 
					        phrase_matcher_attr (int / str): Token attribute to match on, passed
 | 
				
			||||||
            to the internal PhraseMatcher as `attr`
 | 
					            to the internal PhraseMatcher as `attr`
 | 
				
			||||||
        validate (bool): Whether patterns should be validated, passed to
 | 
					        validate (bool): Whether patterns should be validated, passed to
 | 
				
			||||||
            Matcher and PhraseMatcher as `validate`
 | 
					            Matcher and PhraseMatcher as `validate`
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -109,7 +109,7 @@ cdef class StringStore:
 | 
				
			||||||
        """Retrieve a string from a given hash, or vice versa.
 | 
					        """Retrieve a string from a given hash, or vice versa.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        string_or_id (bytes, unicode or uint64): The value to encode.
 | 
					        string_or_id (bytes, unicode or uint64): The value to encode.
 | 
				
			||||||
        Returns (unicode or uint64): The value to be retrieved.
 | 
					        Returns (str / uint64): The value to be retrieved.
 | 
				
			||||||
        """
 | 
					        """
 | 
				
			||||||
        if isinstance(string_or_id, basestring) and len(string_or_id) == 0:
 | 
					        if isinstance(string_or_id, basestring) and len(string_or_id) == 0:
 | 
				
			||||||
            return 0
 | 
					            return 0
 | 
				
			||||||
| 
						 | 
					@ -223,7 +223,7 @@ cdef class StringStore:
 | 
				
			||||||
    def to_disk(self, path):
 | 
					    def to_disk(self, path):
 | 
				
			||||||
        """Save the current state to a directory.
 | 
					        """Save the current state to a directory.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        path (unicode or Path): A path to a directory, which will be created if
 | 
					        path (str / Path): A path to a directory, which will be created if
 | 
				
			||||||
            it doesn't exist. Paths may be either strings or Path-like objects.
 | 
					            it doesn't exist. Paths may be either strings or Path-like objects.
 | 
				
			||||||
        """
 | 
					        """
 | 
				
			||||||
        path = util.ensure_path(path)
 | 
					        path = util.ensure_path(path)
 | 
				
			||||||
| 
						 | 
					@ -234,7 +234,7 @@ cdef class StringStore:
 | 
				
			||||||
        """Loads state from a directory. Modifies the object in place and
 | 
					        """Loads state from a directory. Modifies the object in place and
 | 
				
			||||||
        returns it.
 | 
					        returns it.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        path (unicode or Path): A path to a directory. Paths may be either
 | 
					        path (str / Path): A path to a directory. Paths may be either
 | 
				
			||||||
            strings or `Path`-like objects.
 | 
					            strings or `Path`-like objects.
 | 
				
			||||||
        RETURNS (StringStore): The modified `StringStore` object.
 | 
					        RETURNS (StringStore): The modified `StringStore` object.
 | 
				
			||||||
        """
 | 
					        """
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -693,7 +693,7 @@ cdef class Tokenizer:
 | 
				
			||||||
    def to_disk(self, path, **kwargs):
 | 
					    def to_disk(self, path, **kwargs):
 | 
				
			||||||
        """Save the current state to a directory.
 | 
					        """Save the current state to a directory.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        path (unicode or Path): A path to a directory, which will be created if
 | 
					        path (str / Path): A path to a directory, which will be created if
 | 
				
			||||||
            it doesn't exist.
 | 
					            it doesn't exist.
 | 
				
			||||||
        exclude (list): String names of serialization fields to exclude.
 | 
					        exclude (list): String names of serialization fields to exclude.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -707,7 +707,7 @@ cdef class Tokenizer:
 | 
				
			||||||
        """Loads state from a directory. Modifies the object in place and
 | 
					        """Loads state from a directory. Modifies the object in place and
 | 
				
			||||||
        returns it.
 | 
					        returns it.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        path (unicode or Path): A path to a directory.
 | 
					        path (str / Path): A path to a directory.
 | 
				
			||||||
        exclude (list): String names of serialization fields to exclude.
 | 
					        exclude (list): String names of serialization fields to exclude.
 | 
				
			||||||
        RETURNS (Tokenizer): The modified `Tokenizer` object.
 | 
					        RETURNS (Tokenizer): The modified `Tokenizer` object.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -843,7 +843,7 @@ cdef class Doc:
 | 
				
			||||||
    def to_disk(self, path, **kwargs):
 | 
					    def to_disk(self, path, **kwargs):
 | 
				
			||||||
        """Save the current state to a directory.
 | 
					        """Save the current state to a directory.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        path (unicode or Path): A path to a directory, which will be created if
 | 
					        path (str / Path): A path to a directory, which will be created if
 | 
				
			||||||
            it doesn't exist. Paths may be either strings or Path-like objects.
 | 
					            it doesn't exist. Paths may be either strings or Path-like objects.
 | 
				
			||||||
        exclude (list): String names of serialization fields to exclude.
 | 
					        exclude (list): String names of serialization fields to exclude.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -857,7 +857,7 @@ cdef class Doc:
 | 
				
			||||||
        """Loads state from a directory. Modifies the object in place and
 | 
					        """Loads state from a directory. Modifies the object in place and
 | 
				
			||||||
        returns it.
 | 
					        returns it.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        path (unicode or Path): A path to a directory. Paths may be either
 | 
					        path (str / Path): A path to a directory. Paths may be either
 | 
				
			||||||
            strings or `Path`-like objects.
 | 
					            strings or `Path`-like objects.
 | 
				
			||||||
        exclude (list): String names of serialization fields to exclude.
 | 
					        exclude (list): String names of serialization fields to exclude.
 | 
				
			||||||
        RETURNS (Doc): The modified `Doc` object.
 | 
					        RETURNS (Doc): The modified `Doc` object.
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -269,7 +269,7 @@ def load_config(path, create_objects=False):
 | 
				
			||||||
    """Load a Thinc-formatted config file, optionally filling in objects where
 | 
					    """Load a Thinc-formatted config file, optionally filling in objects where
 | 
				
			||||||
    the config references registry entries. See "Thinc config files" for details.
 | 
					    the config references registry entries. See "Thinc config files" for details.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    path (unicode or Path): Path to the config file
 | 
					    path (str / Path): Path to the config file
 | 
				
			||||||
    create_objects (bool): Whether to automatically create objects when the config
 | 
					    create_objects (bool): Whether to automatically create objects when the config
 | 
				
			||||||
        references registry entries. Defaults to False.
 | 
					        references registry entries. Defaults to False.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -286,7 +286,7 @@ def load_config_from_str(string, create_objects=False):
 | 
				
			||||||
    """Load a Thinc-formatted config, optionally filling in objects where
 | 
					    """Load a Thinc-formatted config, optionally filling in objects where
 | 
				
			||||||
    the config references registry entries. See "Thinc config files" for details.
 | 
					    the config references registry entries. See "Thinc config files" for details.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    string (unicode or Path): Text contents of the config file.
 | 
					    string (str / Path): Text contents of the config file.
 | 
				
			||||||
    create_objects (bool): Whether to automatically create objects when the config
 | 
					    create_objects (bool): Whether to automatically create objects when the config
 | 
				
			||||||
        references registry entries. Defaults to False.
 | 
					        references registry entries. Defaults to False.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -302,7 +302,7 @@ def load_config_from_str(string, create_objects=False):
 | 
				
			||||||
def get_model_meta(path):
 | 
					def get_model_meta(path):
 | 
				
			||||||
    """Get model meta.json from a directory path and validate its contents.
 | 
					    """Get model meta.json from a directory path and validate its contents.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    path (unicode or Path): Path to model directory.
 | 
					    path (str / Path): Path to model directory.
 | 
				
			||||||
    RETURNS (dict): The model's meta data.
 | 
					    RETURNS (dict): The model's meta data.
 | 
				
			||||||
    """
 | 
					    """
 | 
				
			||||||
    model_path = ensure_path(path)
 | 
					    model_path = ensure_path(path)
 | 
				
			||||||
| 
						 | 
					@ -321,7 +321,7 @@ def get_model_meta(path):
 | 
				
			||||||
def get_model_config(path):
 | 
					def get_model_config(path):
 | 
				
			||||||
    """Get the model's config from a directory path.
 | 
					    """Get the model's config from a directory path.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    path (unicode or Path): Path to model directory.
 | 
					    path (str / Path): Path to model directory.
 | 
				
			||||||
    RETURNS (Config): The model's config data.
 | 
					    RETURNS (Config): The model's config data.
 | 
				
			||||||
    """
 | 
					    """
 | 
				
			||||||
    model_path = ensure_path(path)
 | 
					    model_path = ensure_path(path)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -336,7 +336,7 @@ cdef class Vocab:
 | 
				
			||||||
        If `minn` is defined, then the resulting vector uses Fasttext's
 | 
					        If `minn` is defined, then the resulting vector uses Fasttext's
 | 
				
			||||||
        subword features by average over ngrams of `orth`.
 | 
					        subword features by average over ngrams of `orth`.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        orth (int / unicode): The hash value of a word, or its unicode string.
 | 
					        orth (int / str): The hash value of a word, or its unicode string.
 | 
				
			||||||
        minn (int): Minimum n-gram length used for Fasttext's ngram computation.
 | 
					        minn (int): Minimum n-gram length used for Fasttext's ngram computation.
 | 
				
			||||||
            Defaults to the length of `orth`.
 | 
					            Defaults to the length of `orth`.
 | 
				
			||||||
        maxn (int): Maximum n-gram length used for Fasttext's ngram computation.
 | 
					        maxn (int): Maximum n-gram length used for Fasttext's ngram computation.
 | 
				
			||||||
| 
						 | 
					@ -389,7 +389,7 @@ cdef class Vocab:
 | 
				
			||||||
        """Set a vector for a word in the vocabulary. Words can be referenced
 | 
					        """Set a vector for a word in the vocabulary. Words can be referenced
 | 
				
			||||||
        by string or int ID.
 | 
					        by string or int ID.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        orth (int / unicode): The word.
 | 
					        orth (int / str): The word.
 | 
				
			||||||
        vector (numpy.ndarray[ndim=1, dtype='float32']): The vector to set.
 | 
					        vector (numpy.ndarray[ndim=1, dtype='float32']): The vector to set.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        DOCS: https://spacy.io/api/vocab#set_vector
 | 
					        DOCS: https://spacy.io/api/vocab#set_vector
 | 
				
			||||||
| 
						 | 
					@ -411,7 +411,7 @@ cdef class Vocab:
 | 
				
			||||||
        """Check whether a word has a vector. Returns False if no vectors have
 | 
					        """Check whether a word has a vector. Returns False if no vectors have
 | 
				
			||||||
        been loaded. Words can be looked up by string or int ID.
 | 
					        been loaded. Words can be looked up by string or int ID.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        orth (int / unicode): The word.
 | 
					        orth (int / str): The word.
 | 
				
			||||||
        RETURNS (bool): Whether the word has a vector.
 | 
					        RETURNS (bool): Whether the word has a vector.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        DOCS: https://spacy.io/api/vocab#has_vector
 | 
					        DOCS: https://spacy.io/api/vocab#has_vector
 | 
				
			||||||
| 
						 | 
					@ -423,7 +423,7 @@ cdef class Vocab:
 | 
				
			||||||
    def to_disk(self, path, exclude=tuple(), **kwargs):
 | 
					    def to_disk(self, path, exclude=tuple(), **kwargs):
 | 
				
			||||||
        """Save the current state to a directory.
 | 
					        """Save the current state to a directory.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        path (unicode or Path): A path to a directory, which will be created if
 | 
					        path (str / Path): A path to a directory, which will be created if
 | 
				
			||||||
            it doesn't exist.
 | 
					            it doesn't exist.
 | 
				
			||||||
        exclude (list): String names of serialization fields to exclude.
 | 
					        exclude (list): String names of serialization fields to exclude.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -448,7 +448,7 @@ cdef class Vocab:
 | 
				
			||||||
        """Loads state from a directory. Modifies the object in place and
 | 
					        """Loads state from a directory. Modifies the object in place and
 | 
				
			||||||
        returns it.
 | 
					        returns it.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        path (unicode or Path): A path to a directory.
 | 
					        path (str / Path): A path to a directory.
 | 
				
			||||||
        exclude (list): String names of serialization fields to exclude.
 | 
					        exclude (list): String names of serialization fields to exclude.
 | 
				
			||||||
        RETURNS (Vocab): The modified `Vocab` object.
 | 
					        RETURNS (Vocab): The modified `Vocab` object.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue
	
	Block a user