Rename 'tokens' variable 'doc' in tokenizer

2025-07-15 18:52:29 +03:00 · 2017-10-17 18:21:41 +02:00 · 2017-10-17 18:21:41 +02:00 · f45973848c
commit f45973848c
parent 839de87ca9
1 changed files with 9 additions and 9 deletions
--- a/spacy/tokenizer.pyx
+++ b/spacy/tokenizer.pyx
@ -79,9 +79,9 @@ cdef class Tokenizer:
                "String is too long: %d characters. Max is 2**30." % len(string)
            )
        cdef int length = len(string)
-        cdef Doc tokens = Doc(self.vocab)
+        cdef Doc doc = Doc(self.vocab)
        if length == 0:
-            return tokens
+            return doc
        cdef int i = 0
        cdef int start = 0
        cdef bint cache_hit
@ -100,11 +100,11 @@ cdef class Tokenizer:
                    # we don't have to create the slice when we hit the cache.
                    span = string[start:i]
                    key = hash_string(span)
-                    cache_hit = self._try_cache(key, tokens)
+                    cache_hit = self._try_cache(key, doc)
                    if not cache_hit:
-                        self._tokenize(tokens, span, key)
+                        self._tokenize(doc, span, key)
                if uc == ' ':
-                    tokens.c[tokens.length - 1].spacy = True
+                    doc.c[doc.length - 1].spacy = True
                    start = i + 1
                else:
                    start = i
@ -113,11 +113,11 @@ cdef class Tokenizer:
        if start < i:
            span = string[start:]
            key = hash_string(span)
-            cache_hit = self._try_cache(key, tokens)
+            cache_hit = self._try_cache(key, doc)
            if not cache_hit:
-                self._tokenize(tokens, span, key)
+                self._tokenize(doc, span, key)
-            tokens.c[tokens.length - 1].spacy = string[-1] == ' ' and not in_ws
+            doc.c[doc.length - 1].spacy = string[-1] == ' ' and not in_ws
-        return tokens
+        return doc
    def pipe(self, texts, batch_size=1000, n_threads=2):
        """Tokenize a stream of texts.