mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 01:48:04 +03:00 
			
		
		
		
	* Move Span class to own file
This commit is contained in:
		
							parent
							
								
									f02c39dfaf
								
							
						
					
					
						commit
						6f47a667cf
					
				| 
						 | 
				
			
			@ -10,6 +10,7 @@ from .typedefs cimport ID, ORTH, NORM, LOWER, SHAPE, PREFIX, SUFFIX, LENGTH, CLU
 | 
			
		|||
from .typedefs cimport POS, LEMMA
 | 
			
		||||
from .parts_of_speech import UNIV_POS_NAMES
 | 
			
		||||
from .lexeme cimport check_flag
 | 
			
		||||
from .spans import Span
 | 
			
		||||
 | 
			
		||||
from unidecode import unidecode
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -132,7 +133,7 @@ cdef class Tokens:
 | 
			
		|||
            cdef int i
 | 
			
		||||
            cdef const TokenC* token
 | 
			
		||||
            cdef int start = -1
 | 
			
		||||
            cdef object label = None
 | 
			
		||||
            cdef int label = 0
 | 
			
		||||
            for i in range(self.length):
 | 
			
		||||
                token = &self.data[i]
 | 
			
		||||
                if token.ent_iob == 1:
 | 
			
		||||
| 
						 | 
				
			
			@ -140,14 +141,14 @@ cdef class Tokens:
 | 
			
		|||
                    pass
 | 
			
		||||
                elif token.ent_iob == 2:
 | 
			
		||||
                    if start != -1:
 | 
			
		||||
                        yield (start, i, label)
 | 
			
		||||
                         yield Span(self, start, i, label=label)
 | 
			
		||||
                    start = -1
 | 
			
		||||
                    label = None
 | 
			
		||||
                    label = 0
 | 
			
		||||
                elif token.ent_iob == 3:
 | 
			
		||||
                    start = i
 | 
			
		||||
                    label = self.vocab.strings[token.ent_type]
 | 
			
		||||
                    label = token.ent_type
 | 
			
		||||
            if start != -1:
 | 
			
		||||
                yield (start, self.length, label)
 | 
			
		||||
                yield Span(self, start, self.length, label=label)
 | 
			
		||||
 | 
			
		||||
    cdef int push_back(self, int idx, LexemeOrToken lex_or_tok) except -1:
 | 
			
		||||
        if self.length == self.max_length:
 | 
			
		||||
| 
						 | 
				
			
			@ -253,35 +254,6 @@ cdef class Tokens:
 | 
			
		|||
            self.data[i] = parsed[i]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
cdef class Span:
 | 
			
		||||
    """A slice from a Tokens object."""
 | 
			
		||||
    def __cinit__(self, Tokens tokens, int start, int end):
 | 
			
		||||
        self._seq = tokens
 | 
			
		||||
        self.start = start
 | 
			
		||||
        self.end = end
 | 
			
		||||
 | 
			
		||||
    def __richcmp__(self, Span other, int op):
 | 
			
		||||
        # Eq
 | 
			
		||||
        if op in (1, 2, 5):
 | 
			
		||||
            if self._seq is other._seq and \
 | 
			
		||||
               self.start == other.start and \
 | 
			
		||||
               self.end == other.end:
 | 
			
		||||
                return True
 | 
			
		||||
        return False
 | 
			
		||||
 | 
			
		||||
    def __len__(self):
 | 
			
		||||
        if self.end < self.start:
 | 
			
		||||
            return 0
 | 
			
		||||
        return self.end - self.start
 | 
			
		||||
 | 
			
		||||
    def __getitem__(self, int i):
 | 
			
		||||
        return self._seq[self.start + i]
 | 
			
		||||
 | 
			
		||||
    def __iter__(self):
 | 
			
		||||
        for i in range(self.start, self.end):
 | 
			
		||||
            yield self._seq[i]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
cdef class Token:
 | 
			
		||||
    """An individual token --- i.e. a word, a punctuation symbol, etc.  Created
 | 
			
		||||
    via Tokens.__getitem__ and Tokens.__iter__.
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in New Issue
	
	Block a user