mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 01:48:04 +03:00 
			
		
		
		
	* Remove UniStr struct
This commit is contained in:
		
							parent
							
								
									2fc66e3723
								
							
						
					
					
						commit
						815bda201d
					
				| 
						 | 
					@ -7,9 +7,6 @@ from libc.math cimport exp as c_exp
 | 
				
			||||||
from libcpp.queue cimport priority_queue
 | 
					from libcpp.queue cimport priority_queue
 | 
				
			||||||
from libcpp.pair cimport pair
 | 
					from libcpp.pair cimport pair
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from ..structs cimport UniStr
 | 
					 | 
				
			||||||
from ..strings cimport slice_unicode
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
from cymem.cymem cimport Address, Pool
 | 
					from cymem.cymem cimport Address, Pool
 | 
				
			||||||
from preshed.maps cimport PreshMap
 | 
					from preshed.maps cimport PreshMap
 | 
				
			||||||
from preshed.counter cimport PreshCounter
 | 
					from preshed.counter cimport PreshCounter
 | 
				
			||||||
| 
						 | 
					@ -175,14 +172,13 @@ cdef class Packer:
 | 
				
			||||||
        cdef Doc tokens = Doc(self.vocab)
 | 
					        cdef Doc tokens = Doc(self.vocab)
 | 
				
			||||||
        cdef int start = 0
 | 
					        cdef int start = 0
 | 
				
			||||||
        cdef bint is_spacy
 | 
					        cdef bint is_spacy
 | 
				
			||||||
        cdef UniStr span
 | 
					 | 
				
			||||||
        cdef int length = len(string)
 | 
					        cdef int length = len(string)
 | 
				
			||||||
        cdef int i = 0
 | 
					        cdef int i = 0
 | 
				
			||||||
        cdef bint is_end_token
 | 
					        cdef bint is_end_token
 | 
				
			||||||
        for is_end_token in bits:
 | 
					        for is_end_token in bits:
 | 
				
			||||||
            if is_end_token:
 | 
					            if is_end_token:
 | 
				
			||||||
                slice_unicode(&span, string, start, i+1)
 | 
					                span = string[start:i+1]
 | 
				
			||||||
                lex = self.vocab.get(tokens.mem, &span)
 | 
					                lex = self.vocab.get(tokens.mem, span)
 | 
				
			||||||
                is_spacy = (i+1) < length and string[i+1] == u' '
 | 
					                is_spacy = (i+1) < length and string[i+1] == u' '
 | 
				
			||||||
                tokens.push_back(lex, is_spacy)
 | 
					                tokens.push_back(lex, is_spacy)
 | 
				
			||||||
                start = i + 1 + is_spacy
 | 
					                start = i + 1 + is_spacy
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -78,9 +78,3 @@ cdef struct TokenC:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    int ent_iob
 | 
					    int ent_iob
 | 
				
			||||||
    int ent_type
 | 
					    int ent_type
 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
cdef struct UniStr:
 | 
					 | 
				
			||||||
    Py_UNICODE* chars
 | 
					 | 
				
			||||||
    size_t n
 | 
					 | 
				
			||||||
    hash_t key
 | 
					 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue
	
	Block a user