Cleanup Cython structs (#11337)

* cleanup Tokenizer fields

* remove unused object from vocab

* remove IS_OOV_DEPRECATED

* add back in as FLAG13

* FLAG 18 instead

* import fix

* fix clumpsy fingers

* revert symbol changes in favor of #11352

* bint instead of bool
This commit is contained in:
Sofie Van Landeghem 2022-08-22 15:52:24 +02:00 committed by GitHub
parent d757dec5c4
commit 1a5be63715
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 7 additions and 17 deletions

View File

@ -23,11 +23,7 @@ cdef class Tokenizer:
cdef object _infix_finditer
cdef object _rules
cdef PhraseMatcher _special_matcher
# TODO convert to bool in v4
cdef int _faster_heuristics
# TODO next one is unused and should be removed in v4
# https://github.com/explosion/spaCy/pull/9150
cdef int _unused_int2
cdef bint _faster_heuristics
cdef Doc _tokenize_affixes(self, str string, bint with_special_cases)
cdef int _apply_special_cases(self, Doc doc) except -1

View File

@ -8,7 +8,6 @@ from preshed.maps cimport PreshMap
cimport cython
import re
import warnings
from .tokens.doc cimport Doc
from .strings cimport hash_string
@ -16,9 +15,9 @@ from .lexeme cimport EMPTY_LEXEME
from .attrs import intify_attrs
from .symbols import ORTH, NORM
from .errors import Errors, Warnings
from .errors import Errors
from . import util
from .util import registry, get_words_and_spaces
from .util import get_words_and_spaces
from .attrs import intify_attrs
from .symbols import ORTH
from .scorer import Scorer
@ -128,10 +127,10 @@ cdef class Tokenizer:
property faster_heuristics:
def __get__(self):
return bool(self._faster_heuristics)
return self._faster_heuristics
def __set__(self, faster_heuristics):
self._faster_heuristics = bool(faster_heuristics)
self._faster_heuristics = faster_heuristics
self._reload_special_cases()
def __reduce__(self):

View File

@ -32,7 +32,6 @@ cdef class Vocab:
cdef public object writing_system
cdef public object get_noun_chunks
cdef readonly int length
cdef public object _unused_object # TODO remove in v4, see #9150
cdef public object lex_attr_getters
cdef public object cfg

View File

@ -72,7 +72,6 @@ def unpickle_vocab(
sstore: StringStore,
vectors: Any,
morphology: Any,
_unused_object: Any,
lex_attr_getters: Any,
lookups: Any,
get_noun_chunks: Any,

View File

@ -558,21 +558,18 @@ def pickle_vocab(vocab):
sstore = vocab.strings
vectors = vocab.vectors
morph = vocab.morphology
_unused_object = vocab._unused_object
lex_attr_getters = srsly.pickle_dumps(vocab.lex_attr_getters)
lookups = vocab.lookups
get_noun_chunks = vocab.get_noun_chunks
return (unpickle_vocab,
(sstore, vectors, morph, _unused_object, lex_attr_getters, lookups, get_noun_chunks))
(sstore, vectors, morph, lex_attr_getters, lookups, get_noun_chunks))
def unpickle_vocab(sstore, vectors, morphology, _unused_object,
lex_attr_getters, lookups, get_noun_chunks):
def unpickle_vocab(sstore, vectors, morphology, lex_attr_getters, lookups, get_noun_chunks):
cdef Vocab vocab = Vocab()
vocab.vectors = vectors
vocab.strings = sstore
vocab.morphology = morphology
vocab._unused_object = _unused_object
vocab.lex_attr_getters = srsly.pickle_loads(lex_attr_getters)
vocab.lookups = lookups
vocab.get_noun_chunks = get_noun_chunks