diff --git a/spacy/strings.pxd b/spacy/strings.pxd index 77b2c1619..bd5e0f135 100644 --- a/spacy/strings.pxd +++ b/spacy/strings.pxd @@ -26,9 +26,7 @@ cdef class StringStore: cdef public PreshMap _map cdef const Utf8Str* intern_unicode(self, str py_string, bint allow_transient) - cdef const Utf8Str* _intern_utf8(self, char* utf8_string, int length, hash_t* precalculated_hash, bint allow_transient) - + cdef const Utf8Str* _intern_utf8(self, char* utf8_string, int length, hash_t* precalculated_hash, bint allow_transient) cdef vector[hash_t] _transient_keys cdef PreshMap _transient_map cdef Pool _non_temp_mem - diff --git a/spacy/strings.pyx b/spacy/strings.pyx index df8755308..5e0bd90c6 100644 --- a/spacy/strings.pyx +++ b/spacy/strings.pyx @@ -3,7 +3,7 @@ cimport cython from contextlib import contextmanager -from typing import Iterable, Iterator, List, Optional, Tuple, Union +from typing import Iterator, List, Optional from libc.stdint cimport uint32_t from libc.string cimport memcpy @@ -35,7 +35,7 @@ def get_string_id(key): This function optimises for convenience over performance, so shouldn't be used in tight loops. """ - cdef hash_t str_hash + cdef hash_t str_hash if isinstance(key, str): if len(key) == 0: return 0 @@ -49,8 +49,8 @@ def get_string_id(key): elif _try_coerce_to_hash(key, &str_hash): # Coerce the integral key to the expected primitive hash type. # This ensures that custom/overloaded "primitive" data types - # such as those implemented by numpy are not inadvertently used - # downsteam (as these are internally implemented as custom PyObjects + # such as those implemented by numpy are not inadvertently used + # downsteam (as these are internally implemented as custom PyObjects # whose comparison operators can incur a significant overhead). return str_hash else: @@ -196,7 +196,7 @@ cdef class StringStore: return self._keys.size() + self._transient_keys.size() @contextmanager - def memory_zone(self, mem: Optional[Pool]=None) -> Pool: + def memory_zone(self, mem: Optional[Pool] = None) -> Pool: """Begin a block where all resources allocated during the block will be freed at the end of it. If a resources was created within the memory zone block, accessing it outside the block is invalid. diff --git a/spacy/tokenizer.pyx b/spacy/tokenizer.pyx index 6e0f6d917..aa6728586 100644 --- a/spacy/tokenizer.pyx +++ b/spacy/tokenizer.pyx @@ -401,7 +401,7 @@ cdef class Tokenizer: with_special_cases) if len(self._cache) < self.max_cache_size: self._save_cached(&tokens.c[orig_size], orig_key, has_special, - tokens.length - orig_size) + tokens.length - orig_size) cdef str _split_affixes( self,