mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 09:57:26 +03:00 
			
		
		
		
	* Remove POS cache for now
This commit is contained in:
		
							parent
							
								
									ff252dd535
								
							
						
					
					
						commit
						b066102d2d
					
				
							
								
								
									
										22
									
								
								spacy/en.pyx
									
									
									
									
									
								
							
							
						
						
									
										22
									
								
								spacy/en.pyx
									
									
									
									
									
								
							| 
						 | 
				
			
			@ -139,20 +139,6 @@ cdef class English(Language):
 | 
			
		|||
        name (unicode): The two letter code used by Wikipedia for the language.
 | 
			
		||||
        lexicon (Lexicon): The lexicon. Exposes the lookup method.
 | 
			
		||||
    """
 | 
			
		||||
    def load_pos_cache(self, loc):
 | 
			
		||||
        cdef int i = 0
 | 
			
		||||
        cdef hash_t key
 | 
			
		||||
        cdef int pos
 | 
			
		||||
        with open(loc) as file_:
 | 
			
		||||
            for line in file_:
 | 
			
		||||
                pieces = line.split()
 | 
			
		||||
                if i >= 500000:
 | 
			
		||||
                    break
 | 
			
		||||
                i += 1
 | 
			
		||||
                key = int(pieces[1])
 | 
			
		||||
                pos = int(pieces[2])
 | 
			
		||||
                self._pos_cache.set(key, <void*>pos)
 | 
			
		||||
 | 
			
		||||
    def get_props(self, unicode string):
 | 
			
		||||
        return {'flags': self.set_flags(string), 'dense': orth.word_shape(string)}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -181,14 +167,6 @@ cdef class English(Language):
 | 
			
		|||
        assert self.morphologizer is not None
 | 
			
		||||
        cdef dict tagdict = self.pos_tagger.tagdict
 | 
			
		||||
        for i in range(tokens.length):
 | 
			
		||||
            if USE_POS_CACHE:
 | 
			
		||||
                bigram[0] = tokens.data[i].lex.sic
 | 
			
		||||
                bigram[1] = tokens.data[i-1].lex.sic
 | 
			
		||||
                cache_key = hash64(bigram, sizeof(id_t) * 2, 0)
 | 
			
		||||
                cached = self._pos_cache.get(cache_key)
 | 
			
		||||
            if cached != NULL:
 | 
			
		||||
                t[i].pos = <int><size_t>cached
 | 
			
		||||
            else:
 | 
			
		||||
            fill_pos_context(context, i, t)
 | 
			
		||||
            t[i].pos = self.pos_tagger.predict(context)
 | 
			
		||||
            self.morphologizer.set_morph(i, t)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -46,7 +46,6 @@ cdef class Language:
 | 
			
		|||
    cpdef readonly Morphologizer morphologizer
 | 
			
		||||
    cpdef readonly GreedyParser parser
 | 
			
		||||
 | 
			
		||||
    cdef PreshMap _pos_cache
 | 
			
		||||
    cdef object _prefix_re
 | 
			
		||||
    cdef object _suffix_re
 | 
			
		||||
    cdef object _infix_re
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -34,7 +34,6 @@ cdef class Language:
 | 
			
		|||
        self.mem = Pool()
 | 
			
		||||
        self._cache = PreshMap(2 ** 25)
 | 
			
		||||
        self._specials = PreshMap(2 ** 16)
 | 
			
		||||
        self._pos_cache = PreshMap(2 ** 16)
 | 
			
		||||
        rules, prefix, suffix, infix = util.read_lang_data(name)
 | 
			
		||||
        self._prefix_re = re.compile(prefix)
 | 
			
		||||
        self._suffix_re = re.compile(suffix)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in New Issue
	
	Block a user