mirror of
https://github.com/explosion/spaCy.git
synced 2025-06-05 21:53:05 +03:00
* Remove counts stuff from Language class
This commit is contained in:
parent
71ee921055
commit
02e948e7d5
|
@ -41,7 +41,6 @@ cdef class Lexicon:
|
||||||
cdef class Language:
|
cdef class Language:
|
||||||
cdef Pool _mem
|
cdef Pool _mem
|
||||||
cdef unicode name
|
cdef unicode name
|
||||||
cdef vector[size_t] counts
|
|
||||||
cdef PreshMap cache
|
cdef PreshMap cache
|
||||||
cdef PreshMap specials
|
cdef PreshMap specials
|
||||||
cpdef readonly Lexicon lexicon
|
cpdef readonly Lexicon lexicon
|
||||||
|
|
|
@ -46,19 +46,6 @@ cdef class Language:
|
||||||
self.suffix_re = re.compile(suffix)
|
self.suffix_re = re.compile(suffix)
|
||||||
self.lexicon = Lexicon(lexemes)
|
self.lexicon = Lexicon(lexemes)
|
||||||
self._load_special_tokenization(rules)
|
self._load_special_tokenization(rules)
|
||||||
self.counts = vector[size_t]()
|
|
||||||
|
|
||||||
property nr_types:
|
|
||||||
def __get__(self):
|
|
||||||
"""Return the number of lexical types in the vocabulary"""
|
|
||||||
return self.lexicon.size
|
|
||||||
|
|
||||||
property counts:
|
|
||||||
def __get__(self):
|
|
||||||
cdef size_t i
|
|
||||||
for i in range(self.lexicon.size):
|
|
||||||
count = self.counts[i] if i < self.counts.size() else 0
|
|
||||||
yield count, self.lexicon.lexemes[i].strings[<int>LexStr_orig].decode('utf8')
|
|
||||||
|
|
||||||
cpdef Lexeme lookup(self, unicode string):
|
cpdef Lexeme lookup(self, unicode string):
|
||||||
"""Retrieve (or create, if not found) a Lexeme for a string, and return it.
|
"""Retrieve (or create, if not found) a Lexeme for a string, and return it.
|
||||||
|
@ -105,12 +92,6 @@ cdef class Language:
|
||||||
if start < i:
|
if start < i:
|
||||||
string_from_slice(&span, chars, start, i)
|
string_from_slice(&span, chars, start, i)
|
||||||
self._tokenize(tokens.v, &span)
|
self._tokenize(tokens.v, &span)
|
||||||
cdef int id_
|
|
||||||
for i in range(tokens.v.size()):
|
|
||||||
id_ = tokens.id(i)
|
|
||||||
while id_ >= self.counts.size():
|
|
||||||
self.counts.push_back(0)
|
|
||||||
self.counts[id_] += 1
|
|
||||||
return tokens
|
return tokens
|
||||||
|
|
||||||
cdef int _tokenize(self, vector[LexemeC*] *tokens_v, String* string) except -1:
|
cdef int _tokenize(self, vector[LexemeC*] *tokens_v, String* string) except -1:
|
||||||
|
|
Loading…
Reference in New Issue
Block a user