mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 01:48:04 +03:00 
			
		
		
		
	initial proposal for separate vector package
This commit is contained in:
		
							parent
							
								
									7adbd7a785
								
							
						
					
					
						commit
						931c07a609
					
				| 
						 | 
				
			
			@ -2,6 +2,7 @@ from . import util
 | 
			
		|||
from .en import English
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def load(name, via=None):
 | 
			
		||||
def load(name, via=None, vectors_name=None):
 | 
			
		||||
    package = util.get_package_by_name(name, via=via)
 | 
			
		||||
    return English(package=package)
 | 
			
		||||
    vectors_package = util.get_package_by_name(vectors_name, via=via)
 | 
			
		||||
    return English(package=package, vectors_package=vectors_package)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -153,7 +153,7 @@ class Language(object):
 | 
			
		|||
        return {0: {'PER': True, 'LOC': True, 'ORG': True, 'MISC': True}}
 | 
			
		||||
 | 
			
		||||
    @classmethod
 | 
			
		||||
    def default_vocab(cls, package, get_lex_attr=None):
 | 
			
		||||
    def default_vocab(cls, package, get_lex_attr=None, vectors_package=None):
 | 
			
		||||
        if get_lex_attr is None:
 | 
			
		||||
            if package.has_file('vocab', 'oov_prob'):
 | 
			
		||||
                with package.open(('vocab', 'oov_prob')) as file_:
 | 
			
		||||
| 
						 | 
				
			
			@ -162,7 +162,8 @@ class Language(object):
 | 
			
		|||
            else:
 | 
			
		||||
                get_lex_attr = cls.default_lex_attrs()
 | 
			
		||||
        if hasattr(package, 'dir_path'):
 | 
			
		||||
            return Vocab.from_package(package, get_lex_attr=get_lex_attr)
 | 
			
		||||
            return Vocab.from_package(package, get_lex_attr=get_lex_attr,
 | 
			
		||||
                vectors_package=vectors_package)
 | 
			
		||||
        else:
 | 
			
		||||
            return Vocab.load(package, get_lex_attr)
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -198,7 +199,8 @@ class Language(object):
 | 
			
		|||
        matcher=None,
 | 
			
		||||
        serializer=None,
 | 
			
		||||
        load_vectors=True,
 | 
			
		||||
        package=None):
 | 
			
		||||
        package=None,
 | 
			
		||||
        vectors_package=None):
 | 
			
		||||
        """
 | 
			
		||||
           a model can be specified:
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -228,7 +230,7 @@ class Language(object):
 | 
			
		|||
            warn("load_vectors is deprecated", DeprecationWarning)
 | 
			
		||||
 | 
			
		||||
        if vocab in (None, True):
 | 
			
		||||
            vocab = self.default_vocab(package)
 | 
			
		||||
            vocab = self.default_vocab(package, vectors_package=vectors_package)
 | 
			
		||||
        self.vocab = vocab
 | 
			
		||||
        if tokenizer in (None, True):
 | 
			
		||||
            tokenizer = Tokenizer.from_package(package, self.vocab)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -52,7 +52,7 @@ cdef class Vocab:
 | 
			
		|||
        return cls.from_package(get_package(data_dir), get_lex_attr=get_lex_attr)
 | 
			
		||||
 | 
			
		||||
    @classmethod
 | 
			
		||||
    def from_package(cls, package, get_lex_attr=None):
 | 
			
		||||
    def from_package(cls, package, get_lex_attr=None, vectors_package=None):
 | 
			
		||||
        tag_map = package.load_json(('vocab', 'tag_map.json'), default={})
 | 
			
		||||
 | 
			
		||||
        lemmatizer = Lemmatizer.from_package(package)
 | 
			
		||||
| 
						 | 
				
			
			@ -66,7 +66,10 @@ cdef class Vocab:
 | 
			
		|||
            self.strings.load(file_)
 | 
			
		||||
        self.load_lexemes(package.file_path('vocab', 'lexemes.bin'))
 | 
			
		||||
 | 
			
		||||
        if package.has_file('vocab', 'vec.bin'):
 | 
			
		||||
        if vectors_package and vectors_package.has_file('vocab', 'vec.bin'):
 | 
			
		||||
            self.vectors_length = self.load_vectors_from_bin_loc(
 | 
			
		||||
                vectors_package.file_path('vocab', 'vec.bin'))
 | 
			
		||||
        elif package.has_file('vocab', 'vec.bin'):
 | 
			
		||||
            self.vectors_length = self.load_vectors_from_bin_loc(
 | 
			
		||||
                package.file_path('vocab', 'vec.bin'))
 | 
			
		||||
        return self
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in New Issue
	
	Block a user