Fix build errors. Fix vocab.pyx.

This commit is contained in:
Raphael Mitsch 2023-07-04 08:51:27 +02:00
parent 9f62a49ebb
commit a9a66aa2f8
3 changed files with 14 additions and 12 deletions

View File

@ -157,7 +157,7 @@ cdef void sum_state_features(
const int* token_ids,
int B,
int F,
intO
int O
) nogil:
cdef int idx, b, f
cdef const float* feature

View File

@ -8,6 +8,7 @@ from thinc.api import Config, Model, SequenceCategoricalCrossentropy, set_dropou
from ..tokens.doc cimport Doc
from .. import util
from ..errors import Errors
from ..language import Language
from ..scorer import Scorer
from ..training import validate_examples, validate_get_examples

View File

@ -1,6 +1,4 @@
# cython: profile=True
from libc.string cimport memcpy
import functools
import numpy
@ -19,7 +17,6 @@ from .errors import Errors
from .lang.lex_attrs import LEX_ATTRS, get_lang, is_stop
from .lang.norm_exceptions import BASE_NORMS
from .lookups import Lookups
from .util import registry
from .vectors import Mode as VectorsMode
from .vectors import Vectors
@ -51,9 +48,17 @@ cdef class Vocab:
DOCS: https://spacy.io/api/vocab
"""
def __init__(self, lex_attr_getters=None, strings=tuple(), lookups=None,
oov_prob=-20., vectors_name=None, writing_system={},
get_noun_chunks=None, **deprecated_kwargs):
def __init__(
self,
lex_attr_getters=None,
strings=tuple(),
lookups=None,
oov_prob=-20.,
vectors_name=None,
writing_system={}, # no-cython-lint
get_noun_chunks=None,
**deprecated_kwargs
):
"""Create the vocabulary.
lex_attr_getters (dict): A dictionary mapping attribute IDs to
@ -150,7 +155,6 @@ cdef class Vocab:
cdef LexemeC* lex
cdef hash_t key = self.strings[string]
lex = <LexemeC*>self._by_orth.get(key)
cdef size_t addr
if lex != NULL:
assert lex.orth in self.strings
if lex.orth != key:
@ -183,7 +187,7 @@ cdef class Vocab:
# of the doc ownership).
# TODO: Change the C API so that the mem isn't passed in here.
mem = self.mem
#if len(string) < 3 or self.length < 10000:
# if len(string) < 3 or self.length < 10000:
# mem = self.mem
cdef bint is_oov = mem is not self.mem
lex = <LexemeC*>mem.alloc(1, sizeof(LexemeC))
@ -463,7 +467,6 @@ cdef class Vocab:
self.lookups.get_table("lexeme_norm"),
)
def to_disk(self, path, *, exclude=tuple()):
"""Save the current state to a directory.
@ -476,7 +479,6 @@ cdef class Vocab:
path = util.ensure_path(path)
if not path.exists():
path.mkdir()
setters = ["strings", "vectors"]
if "strings" not in exclude:
self.strings.to_disk(path / "strings.json")
if "vectors" not in exclude:
@ -495,7 +497,6 @@ cdef class Vocab:
DOCS: https://spacy.io/api/vocab#to_disk
"""
path = util.ensure_path(path)
getters = ["strings", "vectors"]
if "strings" not in exclude:
self.strings.from_disk(path / "strings.json") # TODO: add exclude?
if "vectors" not in exclude: