mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-12 18:26:30 +03:00
Cleanup Cython structs (#11337)
* cleanup Tokenizer fields * remove unused object from vocab * remove IS_OOV_DEPRECATED * add back in as FLAG13 * FLAG 18 instead * import fix * fix clumpsy fingers * revert symbol changes in favor of #11352 * bint instead of bool
This commit is contained in:
parent
d757dec5c4
commit
1a5be63715
|
@ -23,11 +23,7 @@ cdef class Tokenizer:
|
||||||
cdef object _infix_finditer
|
cdef object _infix_finditer
|
||||||
cdef object _rules
|
cdef object _rules
|
||||||
cdef PhraseMatcher _special_matcher
|
cdef PhraseMatcher _special_matcher
|
||||||
# TODO convert to bool in v4
|
cdef bint _faster_heuristics
|
||||||
cdef int _faster_heuristics
|
|
||||||
# TODO next one is unused and should be removed in v4
|
|
||||||
# https://github.com/explosion/spaCy/pull/9150
|
|
||||||
cdef int _unused_int2
|
|
||||||
|
|
||||||
cdef Doc _tokenize_affixes(self, str string, bint with_special_cases)
|
cdef Doc _tokenize_affixes(self, str string, bint with_special_cases)
|
||||||
cdef int _apply_special_cases(self, Doc doc) except -1
|
cdef int _apply_special_cases(self, Doc doc) except -1
|
||||||
|
|
|
@ -8,7 +8,6 @@ from preshed.maps cimport PreshMap
|
||||||
cimport cython
|
cimport cython
|
||||||
|
|
||||||
import re
|
import re
|
||||||
import warnings
|
|
||||||
|
|
||||||
from .tokens.doc cimport Doc
|
from .tokens.doc cimport Doc
|
||||||
from .strings cimport hash_string
|
from .strings cimport hash_string
|
||||||
|
@ -16,9 +15,9 @@ from .lexeme cimport EMPTY_LEXEME
|
||||||
|
|
||||||
from .attrs import intify_attrs
|
from .attrs import intify_attrs
|
||||||
from .symbols import ORTH, NORM
|
from .symbols import ORTH, NORM
|
||||||
from .errors import Errors, Warnings
|
from .errors import Errors
|
||||||
from . import util
|
from . import util
|
||||||
from .util import registry, get_words_and_spaces
|
from .util import get_words_and_spaces
|
||||||
from .attrs import intify_attrs
|
from .attrs import intify_attrs
|
||||||
from .symbols import ORTH
|
from .symbols import ORTH
|
||||||
from .scorer import Scorer
|
from .scorer import Scorer
|
||||||
|
@ -128,10 +127,10 @@ cdef class Tokenizer:
|
||||||
|
|
||||||
property faster_heuristics:
|
property faster_heuristics:
|
||||||
def __get__(self):
|
def __get__(self):
|
||||||
return bool(self._faster_heuristics)
|
return self._faster_heuristics
|
||||||
|
|
||||||
def __set__(self, faster_heuristics):
|
def __set__(self, faster_heuristics):
|
||||||
self._faster_heuristics = bool(faster_heuristics)
|
self._faster_heuristics = faster_heuristics
|
||||||
self._reload_special_cases()
|
self._reload_special_cases()
|
||||||
|
|
||||||
def __reduce__(self):
|
def __reduce__(self):
|
||||||
|
|
|
@ -32,7 +32,6 @@ cdef class Vocab:
|
||||||
cdef public object writing_system
|
cdef public object writing_system
|
||||||
cdef public object get_noun_chunks
|
cdef public object get_noun_chunks
|
||||||
cdef readonly int length
|
cdef readonly int length
|
||||||
cdef public object _unused_object # TODO remove in v4, see #9150
|
|
||||||
cdef public object lex_attr_getters
|
cdef public object lex_attr_getters
|
||||||
cdef public object cfg
|
cdef public object cfg
|
||||||
|
|
||||||
|
|
|
@ -72,7 +72,6 @@ def unpickle_vocab(
|
||||||
sstore: StringStore,
|
sstore: StringStore,
|
||||||
vectors: Any,
|
vectors: Any,
|
||||||
morphology: Any,
|
morphology: Any,
|
||||||
_unused_object: Any,
|
|
||||||
lex_attr_getters: Any,
|
lex_attr_getters: Any,
|
||||||
lookups: Any,
|
lookups: Any,
|
||||||
get_noun_chunks: Any,
|
get_noun_chunks: Any,
|
||||||
|
|
|
@ -558,21 +558,18 @@ def pickle_vocab(vocab):
|
||||||
sstore = vocab.strings
|
sstore = vocab.strings
|
||||||
vectors = vocab.vectors
|
vectors = vocab.vectors
|
||||||
morph = vocab.morphology
|
morph = vocab.morphology
|
||||||
_unused_object = vocab._unused_object
|
|
||||||
lex_attr_getters = srsly.pickle_dumps(vocab.lex_attr_getters)
|
lex_attr_getters = srsly.pickle_dumps(vocab.lex_attr_getters)
|
||||||
lookups = vocab.lookups
|
lookups = vocab.lookups
|
||||||
get_noun_chunks = vocab.get_noun_chunks
|
get_noun_chunks = vocab.get_noun_chunks
|
||||||
return (unpickle_vocab,
|
return (unpickle_vocab,
|
||||||
(sstore, vectors, morph, _unused_object, lex_attr_getters, lookups, get_noun_chunks))
|
(sstore, vectors, morph, lex_attr_getters, lookups, get_noun_chunks))
|
||||||
|
|
||||||
|
|
||||||
def unpickle_vocab(sstore, vectors, morphology, _unused_object,
|
def unpickle_vocab(sstore, vectors, morphology, lex_attr_getters, lookups, get_noun_chunks):
|
||||||
lex_attr_getters, lookups, get_noun_chunks):
|
|
||||||
cdef Vocab vocab = Vocab()
|
cdef Vocab vocab = Vocab()
|
||||||
vocab.vectors = vectors
|
vocab.vectors = vectors
|
||||||
vocab.strings = sstore
|
vocab.strings = sstore
|
||||||
vocab.morphology = morphology
|
vocab.morphology = morphology
|
||||||
vocab._unused_object = _unused_object
|
|
||||||
vocab.lex_attr_getters = srsly.pickle_loads(lex_attr_getters)
|
vocab.lex_attr_getters = srsly.pickle_loads(lex_attr_getters)
|
||||||
vocab.lookups = lookups
|
vocab.lookups = lookups
|
||||||
vocab.get_noun_chunks = get_noun_chunks
|
vocab.get_noun_chunks = get_noun_chunks
|
||||||
|
|
Loading…
Reference in New Issue
Block a user