* Remove counts stuff from Language class

2025-11-22 10:45:45 +03:00 · 2014-10-10 19:25:01 +11:00 · 2014-10-10 19:25:01 +11:00 · 02e948e7d5
commit 02e948e7d5
parent 71ee921055
2 changed files with 0 additions and 20 deletions
--- a/spacy/lang.pxd
+++ b/spacy/lang.pxd
@ -41,7 +41,6 @@ cdef class Lexicon:
 cdef class Language:
    cdef Pool _mem
    cdef unicode name
    cdef vector[size_t] counts
    cdef PreshMap cache
    cdef PreshMap specials
    cpdef readonly Lexicon lexicon
--- a/spacy/lang.pyx
+++ b/spacy/lang.pyx
@ -46,19 +46,6 @@ cdef class Language:
        self.suffix_re = re.compile(suffix)
        self.lexicon = Lexicon(lexemes)
        self._load_special_tokenization(rules)
        self.counts = vector[size_t]()
    property nr_types:
        def __get__(self):
            """Return the number of lexical types in the vocabulary"""
            return self.lexicon.size
    property counts:
        def __get__(self):
            cdef size_t i
            for i in range(self.lexicon.size):
                count = self.counts[i] if i < self.counts.size() else 0
                yield count, self.lexicon.lexemes[i].strings[<int>LexStr_orig].decode('utf8')
    cpdef Lexeme lookup(self, unicode string):
        """Retrieve (or create, if not found) a Lexeme for a string, and return it.
@ -105,12 +92,6 @@ cdef class Language:
        if start < i:
            string_from_slice(&span, chars, start, i)
            self._tokenize(tokens.v, &span)
        cdef int id_
        for i in range(tokens.v.size()):
            id_ = tokens.id(i)
            while id_ >= self.counts.size():
                self.counts.push_back(0)
            self.counts[id_] += 1
        return tokens
    cdef int _tokenize(self, vector[LexemeC*] *tokens_v, String* string) except -1: