mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-24 00:04:15 +03:00
Cleanup Cython structs (#11337)
* cleanup Tokenizer fields * remove unused object from vocab * remove IS_OOV_DEPRECATED * add back in as FLAG13 * FLAG 18 instead * import fix * fix clumpsy fingers * revert symbol changes in favor of #11352 * bint instead of bool
This commit is contained in:
parent
d757dec5c4
commit
1a5be63715
|
@ -23,11 +23,7 @@ cdef class Tokenizer:
|
|||
cdef object _infix_finditer
|
||||
cdef object _rules
|
||||
cdef PhraseMatcher _special_matcher
|
||||
# TODO convert to bool in v4
|
||||
cdef int _faster_heuristics
|
||||
# TODO next one is unused and should be removed in v4
|
||||
# https://github.com/explosion/spaCy/pull/9150
|
||||
cdef int _unused_int2
|
||||
cdef bint _faster_heuristics
|
||||
|
||||
cdef Doc _tokenize_affixes(self, str string, bint with_special_cases)
|
||||
cdef int _apply_special_cases(self, Doc doc) except -1
|
||||
|
|
|
@ -8,7 +8,6 @@ from preshed.maps cimport PreshMap
|
|||
cimport cython
|
||||
|
||||
import re
|
||||
import warnings
|
||||
|
||||
from .tokens.doc cimport Doc
|
||||
from .strings cimport hash_string
|
||||
|
@ -16,9 +15,9 @@ from .lexeme cimport EMPTY_LEXEME
|
|||
|
||||
from .attrs import intify_attrs
|
||||
from .symbols import ORTH, NORM
|
||||
from .errors import Errors, Warnings
|
||||
from .errors import Errors
|
||||
from . import util
|
||||
from .util import registry, get_words_and_spaces
|
||||
from .util import get_words_and_spaces
|
||||
from .attrs import intify_attrs
|
||||
from .symbols import ORTH
|
||||
from .scorer import Scorer
|
||||
|
@ -128,10 +127,10 @@ cdef class Tokenizer:
|
|||
|
||||
property faster_heuristics:
|
||||
def __get__(self):
|
||||
return bool(self._faster_heuristics)
|
||||
return self._faster_heuristics
|
||||
|
||||
def __set__(self, faster_heuristics):
|
||||
self._faster_heuristics = bool(faster_heuristics)
|
||||
self._faster_heuristics = faster_heuristics
|
||||
self._reload_special_cases()
|
||||
|
||||
def __reduce__(self):
|
||||
|
|
|
@ -32,7 +32,6 @@ cdef class Vocab:
|
|||
cdef public object writing_system
|
||||
cdef public object get_noun_chunks
|
||||
cdef readonly int length
|
||||
cdef public object _unused_object # TODO remove in v4, see #9150
|
||||
cdef public object lex_attr_getters
|
||||
cdef public object cfg
|
||||
|
||||
|
|
|
@ -72,7 +72,6 @@ def unpickle_vocab(
|
|||
sstore: StringStore,
|
||||
vectors: Any,
|
||||
morphology: Any,
|
||||
_unused_object: Any,
|
||||
lex_attr_getters: Any,
|
||||
lookups: Any,
|
||||
get_noun_chunks: Any,
|
||||
|
|
|
@ -558,21 +558,18 @@ def pickle_vocab(vocab):
|
|||
sstore = vocab.strings
|
||||
vectors = vocab.vectors
|
||||
morph = vocab.morphology
|
||||
_unused_object = vocab._unused_object
|
||||
lex_attr_getters = srsly.pickle_dumps(vocab.lex_attr_getters)
|
||||
lookups = vocab.lookups
|
||||
get_noun_chunks = vocab.get_noun_chunks
|
||||
return (unpickle_vocab,
|
||||
(sstore, vectors, morph, _unused_object, lex_attr_getters, lookups, get_noun_chunks))
|
||||
(sstore, vectors, morph, lex_attr_getters, lookups, get_noun_chunks))
|
||||
|
||||
|
||||
def unpickle_vocab(sstore, vectors, morphology, _unused_object,
|
||||
lex_attr_getters, lookups, get_noun_chunks):
|
||||
def unpickle_vocab(sstore, vectors, morphology, lex_attr_getters, lookups, get_noun_chunks):
|
||||
cdef Vocab vocab = Vocab()
|
||||
vocab.vectors = vectors
|
||||
vocab.strings = sstore
|
||||
vocab.morphology = morphology
|
||||
vocab._unused_object = _unused_object
|
||||
vocab.lex_attr_getters = srsly.pickle_loads(lex_attr_getters)
|
||||
vocab.lookups = lookups
|
||||
vocab.get_noun_chunks = get_noun_chunks
|
||||
|
|
Loading…
Reference in New Issue
Block a user