mirror of
https://github.com/explosion/spaCy.git
synced 2024-11-13 05:07:03 +03:00
cython fixes and cleanup
This commit is contained in:
parent
846472129c
commit
96f2e30c4b
|
@ -158,7 +158,6 @@ cdef class PhraseMatcher:
|
||||||
del self._callbacks[key]
|
del self._callbacks[key]
|
||||||
del self._docs[key]
|
del self._docs[key]
|
||||||
|
|
||||||
|
|
||||||
def _add_from_arrays(self, key, specs, *, on_match=None):
|
def _add_from_arrays(self, key, specs, *, on_match=None):
|
||||||
"""Add a preprocessed list of specs, with an optional callback.
|
"""Add a preprocessed list of specs, with an optional callback.
|
||||||
|
|
||||||
|
@ -194,7 +193,6 @@ cdef class PhraseMatcher:
|
||||||
result = internal_node
|
result = internal_node
|
||||||
map_set(self.mem, <MapStruct*>result, self.vocab.strings[key], NULL)
|
map_set(self.mem, <MapStruct*>result, self.vocab.strings[key], NULL)
|
||||||
|
|
||||||
|
|
||||||
def add(self, key, docs, *, on_match=None):
|
def add(self, key, docs, *, on_match=None):
|
||||||
"""Add a match-rule to the phrase-matcher. A match-rule consists of: an ID
|
"""Add a match-rule to the phrase-matcher. A match-rule consists of: an ID
|
||||||
key, a list of one or more patterns, and (optionally) an on_match callback.
|
key, a list of one or more patterns, and (optionally) an on_match callback.
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
# cython: infer_types=True, cdivision=True, boundscheck=False
|
# cython: infer_types=True, cdivision=True, boundscheck=False
|
||||||
from typing import Any, List, Optional, Tuple, TypeVar, cast
|
from typing import Any, List, Optional, Tuple, cast
|
||||||
|
|
||||||
from libc.stdlib cimport calloc, free, realloc
|
from libc.stdlib cimport calloc, free, realloc
|
||||||
from libc.string cimport memcpy, memset
|
from libc.string cimport memcpy, memset
|
||||||
|
@ -23,7 +23,7 @@ from thinc.api import (
|
||||||
|
|
||||||
from thinc.backends.cblas cimport CBlas, saxpy, sgemm
|
from thinc.backends.cblas cimport CBlas, saxpy, sgemm
|
||||||
|
|
||||||
from thinc.types import Floats1d, Floats2d, Floats3d, Floats4d, Ints1d, Ints2d
|
from thinc.types import Floats2d, Floats3d, Floats4d, Ints1d, Ints2d
|
||||||
|
|
||||||
from ..errors import Errors
|
from ..errors import Errors
|
||||||
from ..pipeline._parser_internals import _beam_utils
|
from ..pipeline._parser_internals import _beam_utils
|
||||||
|
@ -136,7 +136,7 @@ def init(
|
||||||
Y: Optional[Tuple[List[State], List[Floats2d]]] = None,
|
Y: Optional[Tuple[List[State], List[Floats2d]]] = None,
|
||||||
):
|
):
|
||||||
if X is not None:
|
if X is not None:
|
||||||
docs, moves = X
|
docs, _ = X
|
||||||
model.get_ref("tok2vec").initialize(X=docs)
|
model.get_ref("tok2vec").initialize(X=docs)
|
||||||
else:
|
else:
|
||||||
model.get_ref("tok2vec").initialize()
|
model.get_ref("tok2vec").initialize()
|
||||||
|
@ -145,7 +145,7 @@ def init(
|
||||||
current_nO = model.maybe_get_dim("nO")
|
current_nO = model.maybe_get_dim("nO")
|
||||||
if current_nO is None or current_nO != inferred_nO:
|
if current_nO is None or current_nO != inferred_nO:
|
||||||
model.attrs["resize_output"](model, inferred_nO)
|
model.attrs["resize_output"](model, inferred_nO)
|
||||||
nO = model.get_dim("nO")
|
# nO = model.get_dim("nO")
|
||||||
nP = model.get_dim("nP")
|
nP = model.get_dim("nP")
|
||||||
nH = model.get_dim("nH")
|
nH = model.get_dim("nH")
|
||||||
nI = model.get_dim("nI")
|
nI = model.get_dim("nI")
|
||||||
|
@ -194,7 +194,8 @@ class TransitionModelInputs:
|
||||||
moves: TransitionSystem,
|
moves: TransitionSystem,
|
||||||
actions: Optional[List[Ints1d]] = None,
|
actions: Optional[List[Ints1d]] = None,
|
||||||
max_moves: int = 0,
|
max_moves: int = 0,
|
||||||
states: Optional[List[State]]=None):
|
states: Optional[List[State]] = None,
|
||||||
|
):
|
||||||
"""
|
"""
|
||||||
actions (Optional[List[Ints1d]]): actions to apply for each Doc.
|
actions (Optional[List[Ints1d]]): actions to apply for each Doc.
|
||||||
docs (List[Doc]): Docs to predict transition sequences for.
|
docs (List[Doc]): Docs to predict transition sequences for.
|
||||||
|
@ -257,9 +258,10 @@ def _forward_greedy_cpu(model: Model, TransitionSystem moves, states: List[State
|
||||||
|
|
||||||
return (states, scores), backprop
|
return (states, scores), backprop
|
||||||
|
|
||||||
|
|
||||||
cdef list _parse_batch(CBlas cblas, TransitionSystem moves, StateC** states,
|
cdef list _parse_batch(CBlas cblas, TransitionSystem moves, StateC** states,
|
||||||
WeightsC weights, SizesC sizes, actions: Optional[List[Ints1d]]=None):
|
WeightsC weights, SizesC sizes, actions: Optional[List[Ints1d]]=None):
|
||||||
cdef int i, j
|
cdef int i
|
||||||
cdef vector[StateC *] unfinished
|
cdef vector[StateC *] unfinished
|
||||||
cdef ActivationsC activations = _alloc_activations(sizes)
|
cdef ActivationsC activations = _alloc_activations(sizes)
|
||||||
cdef np.ndarray step_scores
|
cdef np.ndarray step_scores
|
||||||
|
@ -371,7 +373,7 @@ def _forward_fallback(
|
||||||
for clas in set(model.attrs["unseen_classes"]):
|
for clas in set(model.attrs["unseen_classes"]):
|
||||||
if (d_scores[:, clas] < 0).any():
|
if (d_scores[:, clas] < 0).any():
|
||||||
model.attrs["unseen_classes"].remove(clas)
|
model.attrs["unseen_classes"].remove(clas)
|
||||||
d_scores *= seen_mask == False
|
d_scores *= seen_mask == False # no-cython-lint
|
||||||
# Calculate the gradients for the parameters of the output layer.
|
# Calculate the gradients for the parameters of the output layer.
|
||||||
# The weight gemm is (nS, nO) @ (nS, nH).T
|
# The weight gemm is (nS, nO) @ (nS, nH).T
|
||||||
output.inc_grad("b", d_scores.sum(axis=0))
|
output.inc_grad("b", d_scores.sum(axis=0))
|
||||||
|
@ -617,12 +619,12 @@ cdef void _predict_states(CBlas cblas, ActivationsC* A, float* scores, StateC**
|
||||||
scores[i*n.classes+j] = min_
|
scores[i*n.classes+j] = min_
|
||||||
|
|
||||||
|
|
||||||
cdef void _sum_state_features(CBlas cblas, float* output,
|
cdef void _sum_state_features(CBlas cblas, float* output, const float* cached,
|
||||||
const float* cached, const int* token_ids, SizesC n) nogil:
|
const int* token_ids, SizesC n) nogil:
|
||||||
cdef int idx, b, f, i
|
cdef int idx, b, f
|
||||||
cdef const float* feature
|
cdef const float* feature
|
||||||
cdef int B = n.states
|
cdef int B = n.states
|
||||||
cdef int O = n.hiddens * n.pieces
|
cdef int O = n.hiddens * n.pieces # no-cython-lint
|
||||||
cdef int F = n.feats
|
cdef int F = n.feats
|
||||||
cdef int T = n.tokens
|
cdef int T = n.tokens
|
||||||
padding = cached + (T * F * O)
|
padding = cached + (T * F * O)
|
||||||
|
@ -637,4 +639,3 @@ cdef void _sum_state_features(CBlas cblas, float* output,
|
||||||
feature = &cached[idx]
|
feature = &cached[idx]
|
||||||
saxpy(cblas)(O, one, <const float*>feature, 1, &output[b*O], 1)
|
saxpy(cblas)(O, one, <const float*>feature, 1, &output[b*O], 1)
|
||||||
token_ids += F
|
token_ids += F
|
||||||
|
|
||||||
|
|
|
@ -80,7 +80,6 @@ cdef class Morphology:
|
||||||
out.sort(key=lambda x: x[0])
|
out.sort(key=lambda x: x[0])
|
||||||
return dict(out)
|
return dict(out)
|
||||||
|
|
||||||
|
|
||||||
def _normalized_feat_dict_to_str(self, feats: Dict[str, str]) -> str:
|
def _normalized_feat_dict_to_str(self, feats: Dict[str, str]) -> str:
|
||||||
norm_feats_string = self.FEATURE_SEP.join([
|
norm_feats_string = self.FEATURE_SEP.join([
|
||||||
self.FIELD_SEP.join([field, self.VALUE_SEP.join(values) if isinstance(values, list) else values])
|
self.FIELD_SEP.join([field, self.VALUE_SEP.join(values) if isinstance(values, list) else values])
|
||||||
|
@ -88,7 +87,6 @@ cdef class Morphology:
|
||||||
])
|
])
|
||||||
return norm_feats_string or self.EMPTY_MORPH
|
return norm_feats_string or self.EMPTY_MORPH
|
||||||
|
|
||||||
|
|
||||||
cdef hash_t _add(self, features):
|
cdef hash_t _add(self, features):
|
||||||
"""Insert a morphological analysis in the morphology table, if not
|
"""Insert a morphological analysis in the morphology table, if not
|
||||||
already present. The morphological analysis may be provided in the UD
|
already present. The morphological analysis may be provided in the UD
|
||||||
|
|
|
@ -1,5 +1,4 @@
|
||||||
from cymem.cymem cimport Pool
|
from cymem.cymem cimport Pool
|
||||||
from libc.stdint cimport int32_t
|
|
||||||
from libcpp.memory cimport shared_ptr
|
from libcpp.memory cimport shared_ptr
|
||||||
from libcpp.vector cimport vector
|
from libcpp.vector cimport vector
|
||||||
|
|
||||||
|
|
|
@ -58,7 +58,6 @@ cdef class Beam:
|
||||||
void* extra_args) except -1
|
void* extra_args) except -1
|
||||||
cdef int check_done(self, finish_func_t finish_func, void* extra_args) except -1
|
cdef int check_done(self, finish_func_t finish_func, void* extra_args) except -1
|
||||||
|
|
||||||
|
|
||||||
cdef inline void set_cell(self, int i, int j, weight_t score, int is_valid, weight_t cost) nogil:
|
cdef inline void set_cell(self, int i, int j, weight_t score, int is_valid, weight_t cost) nogil:
|
||||||
self.scores[i][j] = score
|
self.scores[i][j] = score
|
||||||
self.is_valid[i][j] = is_valid
|
self.is_valid[i][j] = is_valid
|
||||||
|
|
|
@ -1,11 +1,8 @@
|
||||||
# cython: profile=True, experimental_cpp_class_def=True, cdivision=True, infer_types=True
|
# cython: profile=True, experimental_cpp_class_def=True, cdivision=True, infer_types=True
|
||||||
cimport cython
|
cimport cython
|
||||||
from libc.math cimport exp, log
|
|
||||||
from libc.string cimport memcpy, memset
|
|
||||||
|
|
||||||
import math
|
|
||||||
|
|
||||||
from cymem.cymem cimport Pool
|
from cymem.cymem cimport Pool
|
||||||
|
from libc.math cimport exp
|
||||||
|
from libc.string cimport memcpy, memset
|
||||||
from preshed.maps cimport PreshMap
|
from preshed.maps cimport PreshMap
|
||||||
|
|
||||||
|
|
||||||
|
@ -70,7 +67,7 @@ cdef class Beam:
|
||||||
self.costs[i][j] = costs[j]
|
self.costs[i][j] = costs[j]
|
||||||
|
|
||||||
cdef int set_table(self, weight_t** scores, int** is_valid, weight_t** costs) except -1:
|
cdef int set_table(self, weight_t** scores, int** is_valid, weight_t** costs) except -1:
|
||||||
cdef int i, j
|
cdef int i
|
||||||
for i in range(self.width):
|
for i in range(self.width):
|
||||||
memcpy(self.scores[i], scores[i], sizeof(weight_t) * self.nr_class)
|
memcpy(self.scores[i], scores[i], sizeof(weight_t) * self.nr_class)
|
||||||
memcpy(self.is_valid[i], is_valid[i], sizeof(bint) * self.nr_class)
|
memcpy(self.is_valid[i], is_valid[i], sizeof(bint) * self.nr_class)
|
||||||
|
@ -176,7 +173,6 @@ cdef class Beam:
|
||||||
beam-width, and n is the number of classes.
|
beam-width, and n is the number of classes.
|
||||||
"""
|
"""
|
||||||
cdef Entry entry
|
cdef Entry entry
|
||||||
cdef weight_t score
|
|
||||||
cdef _State* s
|
cdef _State* s
|
||||||
cdef int i, j, move_id
|
cdef int i, j, move_id
|
||||||
assert self.size >= 1
|
assert self.size >= 1
|
||||||
|
|
|
@ -319,4 +319,3 @@ cdef void c_transition_batch(TransitionSystem moves, StateC** states, const floa
|
||||||
action.do(states[i], action.label)
|
action.do(states[i], action.label)
|
||||||
states[i].history.push_back(guess)
|
states[i].history.push_back(guess)
|
||||||
free(is_valid)
|
free(is_valid)
|
||||||
|
|
||||||
|
|
|
@ -1,8 +1,7 @@
|
||||||
# cython: infer_types=True, profile=True, binding=True
|
# cython: infer_types=True, profile=True, binding=True
|
||||||
from itertools import islice
|
from itertools import islice
|
||||||
from typing import Callable, Dict, Iterable, List, Optional, Union
|
from typing import Callable, Dict, Iterable, Optional, Union
|
||||||
|
|
||||||
import srsly
|
|
||||||
from thinc.api import Config, Model
|
from thinc.api import Config, Model
|
||||||
from thinc.legacy import LegacySequenceCategoricalCrossentropy
|
from thinc.legacy import LegacySequenceCategoricalCrossentropy
|
||||||
|
|
||||||
|
|
|
@ -1,12 +1,11 @@
|
||||||
# cython: infer_types=True, profile=True, binding=True
|
# cython: infer_types=True, profile=True, binding=True
|
||||||
import warnings
|
|
||||||
from typing import Callable, Dict, Iterable, Iterator, Tuple, Union
|
from typing import Callable, Dict, Iterable, Iterator, Tuple, Union
|
||||||
|
|
||||||
import srsly
|
import srsly
|
||||||
|
|
||||||
from ..tokens.doc cimport Doc
|
from ..tokens.doc cimport Doc
|
||||||
|
|
||||||
from ..errors import Errors, Warnings
|
from ..errors import Errors
|
||||||
from ..language import Language
|
from ..language import Language
|
||||||
from ..training import Example
|
from ..training import Example
|
||||||
from ..util import raise_error
|
from ..util import raise_error
|
||||||
|
|
|
@ -1,5 +1,4 @@
|
||||||
# cython: infer_types=True, profile=True, binding=True
|
# cython: infer_types=True, profile=True, binding=True
|
||||||
import warnings
|
|
||||||
from typing import Callable, Dict, Iterable, Iterator, Optional, Tuple
|
from typing import Callable, Dict, Iterable, Iterator, Optional, Tuple
|
||||||
|
|
||||||
import srsly
|
import srsly
|
||||||
|
@ -8,7 +7,7 @@ from thinc.api import Model, Optimizer, set_dropout_rate
|
||||||
from ..tokens.doc cimport Doc
|
from ..tokens.doc cimport Doc
|
||||||
|
|
||||||
from .. import util
|
from .. import util
|
||||||
from ..errors import Errors, Warnings
|
from ..errors import Errors
|
||||||
from ..language import Language
|
from ..language import Language
|
||||||
from ..training import Example, validate_distillation_examples, validate_examples
|
from ..training import Example, validate_distillation_examples, validate_examples
|
||||||
from ..vocab import Vocab
|
from ..vocab import Vocab
|
||||||
|
@ -56,14 +55,14 @@ cdef class TrainablePipe(Pipe):
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
error_handler(self.name, self, [doc], e)
|
error_handler(self.name, self, [doc], e)
|
||||||
|
|
||||||
|
|
||||||
def distill(self,
|
def distill(self,
|
||||||
teacher_pipe: Optional["TrainablePipe"],
|
teacher_pipe: Optional["TrainablePipe"],
|
||||||
examples: Iterable["Example"],
|
examples: Iterable["Example"],
|
||||||
*,
|
*,
|
||||||
drop: float = 0.0,
|
drop: float = 0.0,
|
||||||
sgd: Optional[Optimizer] = None,
|
sgd: Optional[Optimizer] = None,
|
||||||
losses: Optional[Dict[str, float]]=None) -> Dict[str, float]:
|
losses: Optional[Dict[str, float]] = None
|
||||||
|
) -> Dict[str, float]:
|
||||||
"""Train a pipe (the student) on the predictions of another pipe
|
"""Train a pipe (the student) on the predictions of another pipe
|
||||||
(the teacher). The student is typically trained on the probability
|
(the teacher). The student is typically trained on the probability
|
||||||
distribution of the teacher, but details may differ per pipe.
|
distribution of the teacher, but details may differ per pipe.
|
||||||
|
|
|
@ -227,7 +227,8 @@ class Parser(TrainablePipe):
|
||||||
*,
|
*,
|
||||||
drop: float = 0.0,
|
drop: float = 0.0,
|
||||||
sgd: Optional[Optimizer] = None,
|
sgd: Optional[Optimizer] = None,
|
||||||
losses: Optional[Dict[str, float]]=None):
|
losses: Optional[Dict[str, float]] = None
|
||||||
|
):
|
||||||
"""Train a pipe (the student) on the predictions of another pipe
|
"""Train a pipe (the student) on the predictions of another pipe
|
||||||
(the teacher). The student is trained on the transition probabilities
|
(the teacher). The student is trained on the transition probabilities
|
||||||
of the teacher.
|
of the teacher.
|
||||||
|
@ -277,7 +278,9 @@ class Parser(TrainablePipe):
|
||||||
# teacher's distributions.
|
# teacher's distributions.
|
||||||
|
|
||||||
student_inputs = TransitionModelInputs(docs=student_docs,
|
student_inputs = TransitionModelInputs(docs=student_docs,
|
||||||
states=[state.copy() for state in states], moves=self.moves, max_moves=max_moves)
|
states=[state.copy() for state in states],
|
||||||
|
moves=self.moves,
|
||||||
|
max_moves=max_moves)
|
||||||
(student_states, student_scores), backprop_scores = self.model.begin_update(student_inputs)
|
(student_states, student_scores), backprop_scores = self.model.begin_update(student_inputs)
|
||||||
actions = _states_diff_to_actions(states, student_states)
|
actions = _states_diff_to_actions(states, student_states)
|
||||||
teacher_inputs = TransitionModelInputs(docs=[eg.reference for eg in examples],
|
teacher_inputs = TransitionModelInputs(docs=[eg.reference for eg in examples],
|
||||||
|
@ -294,7 +297,6 @@ class Parser(TrainablePipe):
|
||||||
|
|
||||||
return losses
|
return losses
|
||||||
|
|
||||||
|
|
||||||
def get_teacher_student_loss(
|
def get_teacher_student_loss(
|
||||||
self, teacher_scores: List[Floats2d], student_scores: List[Floats2d],
|
self, teacher_scores: List[Floats2d], student_scores: List[Floats2d],
|
||||||
normalize: bool = False,
|
normalize: bool = False,
|
||||||
|
@ -436,8 +438,10 @@ class Parser(TrainablePipe):
|
||||||
else:
|
else:
|
||||||
init_states, gold_states, _ = self.moves.init_gold_batch(examples)
|
init_states, gold_states, _ = self.moves.init_gold_batch(examples)
|
||||||
|
|
||||||
inputs = TransitionModelInputs(docs=docs, moves=self.moves,
|
inputs = TransitionModelInputs(docs=docs,
|
||||||
max_moves=max_moves, states=[state.copy() for state in init_states])
|
moves=self.moves,
|
||||||
|
max_moves=max_moves,
|
||||||
|
states=[state.copy() for state in init_states])
|
||||||
(pred_states, scores), backprop_scores = self.model.begin_update(inputs)
|
(pred_states, scores), backprop_scores = self.model.begin_update(inputs)
|
||||||
if sum(s.shape[0] for s in scores) == 0:
|
if sum(s.shape[0] for s in scores) == 0:
|
||||||
return losses
|
return losses
|
||||||
|
@ -483,9 +487,7 @@ class Parser(TrainablePipe):
|
||||||
cdef TransitionSystem moves = self.moves
|
cdef TransitionSystem moves = self.moves
|
||||||
cdef StateClass state
|
cdef StateClass state
|
||||||
cdef int clas
|
cdef int clas
|
||||||
cdef int nF = self.model.get_dim("nF")
|
|
||||||
cdef int nO = moves.n_moves
|
cdef int nO = moves.n_moves
|
||||||
cdef int nS = sum([len(history) for history in histories])
|
|
||||||
cdef Pool mem = Pool()
|
cdef Pool mem = Pool()
|
||||||
cdef np.ndarray costs_i
|
cdef np.ndarray costs_i
|
||||||
is_valid = <int*>mem.alloc(nO, sizeof(int))
|
is_valid = <int*>mem.alloc(nO, sizeof(int))
|
||||||
|
@ -552,8 +554,8 @@ class Parser(TrainablePipe):
|
||||||
|
|
||||||
return losses
|
return losses
|
||||||
|
|
||||||
def update_beam(self, examples, *, beam_width,
|
def update_beam(self, examples, *, beam_width, drop=0.,
|
||||||
drop=0., sgd=None, losses=None, beam_density=0.0):
|
sgd=None, losses=None, beam_density=0.0):
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
def set_output(self, nO):
|
def set_output(self, nO):
|
||||||
|
@ -678,7 +680,8 @@ class Parser(TrainablePipe):
|
||||||
return states
|
return states
|
||||||
|
|
||||||
# Parse the states that are too long with the teacher's parsing model.
|
# Parse the states that are too long with the teacher's parsing model.
|
||||||
teacher_inputs = TransitionModelInputs(docs=docs, moves=moves,
|
teacher_inputs = TransitionModelInputs(docs=docs,
|
||||||
|
moves=moves,
|
||||||
states=[state.copy() for state in to_cut])
|
states=[state.copy() for state in to_cut])
|
||||||
(teacher_states, _) = teacher_pipe.model.predict(teacher_inputs)
|
(teacher_states, _) = teacher_pipe.model.predict(teacher_inputs)
|
||||||
|
|
||||||
|
@ -778,6 +781,7 @@ def _states_to_actions(states: List[StateClass]) -> List[Ints1d]:
|
||||||
|
|
||||||
return actions
|
return actions
|
||||||
|
|
||||||
|
|
||||||
def _states_diff_to_actions(
|
def _states_diff_to_actions(
|
||||||
before_states: List[StateClass],
|
before_states: List[StateClass],
|
||||||
after_states: List[StateClass]
|
after_states: List[StateClass]
|
||||||
|
@ -798,7 +802,8 @@ def _states_diff_to_actions(
|
||||||
c_state_before = before_state.c
|
c_state_before = before_state.c
|
||||||
c_state_after = after_state.c
|
c_state_after = after_state.c
|
||||||
|
|
||||||
assert equal(c_state_before.history.begin(), c_state_before.history.end(),
|
assert equal(c_state_before.history.begin(),
|
||||||
|
c_state_before.history.end(),
|
||||||
c_state_after.history.begin())
|
c_state_after.history.begin())
|
||||||
|
|
||||||
actions = []
|
actions = []
|
||||||
|
|
|
@ -1,7 +1,6 @@
|
||||||
# cython: infer_types=True
|
# cython: infer_types=True
|
||||||
from typing import Any, Callable, Iterable, Iterator, List, Optional, Tuple, Union
|
from typing import Iterable, Iterator, List, Optional, Tuple, Union
|
||||||
|
|
||||||
cimport cython
|
|
||||||
from libc.stdint cimport uint32_t
|
from libc.stdint cimport uint32_t
|
||||||
from libc.string cimport memcpy
|
from libc.string cimport memcpy
|
||||||
from murmurhash.mrmr cimport hash64
|
from murmurhash.mrmr cimport hash64
|
||||||
|
@ -243,7 +242,6 @@ cdef class StringStore:
|
||||||
cdef int n_length_bytes
|
cdef int n_length_bytes
|
||||||
cdef int i
|
cdef int i
|
||||||
cdef Utf8Str* string = <Utf8Str*>self.mem.alloc(1, sizeof(Utf8Str))
|
cdef Utf8Str* string = <Utf8Str*>self.mem.alloc(1, sizeof(Utf8Str))
|
||||||
cdef uint32_t ulength = length
|
|
||||||
if length < sizeof(string.s):
|
if length < sizeof(string.s):
|
||||||
string.s[0] = <unsigned char>length
|
string.s[0] = <unsigned char>length
|
||||||
memcpy(&string.s[1], chars, length)
|
memcpy(&string.s[1], chars, length)
|
||||||
|
@ -301,7 +299,7 @@ cpdef hash_t get_string_id(object string_or_hash) except -1:
|
||||||
|
|
||||||
try:
|
try:
|
||||||
return hash_string(string_or_hash)
|
return hash_string(string_or_hash)
|
||||||
except:
|
except: # no-cython-lint
|
||||||
if _try_coerce_to_hash(string_or_hash, &str_hash):
|
if _try_coerce_to_hash(string_or_hash, &str_hash):
|
||||||
# Coerce the integral key to the expected primitive hash type.
|
# Coerce the integral key to the expected primitive hash type.
|
||||||
# This ensures that custom/overloaded "primitive" data types
|
# This ensures that custom/overloaded "primitive" data types
|
||||||
|
@ -318,6 +316,5 @@ cdef inline bint _try_coerce_to_hash(object key, hash_t* out_hash):
|
||||||
try:
|
try:
|
||||||
out_hash[0] = key
|
out_hash[0] = key
|
||||||
return True
|
return True
|
||||||
except:
|
except: # no-cython-lint
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
from cymem.cymem cimport Pool
|
from cymem.cymem cimport Pool
|
||||||
|
|
||||||
from spacy.pipeline._parser_internals.search cimport Beam, MaxViolation
|
from spacy.pipeline._parser_internals.search cimport Beam, MaxViolation
|
||||||
from spacy.typedefs cimport class_t, weight_t
|
from spacy.typedefs cimport class_t
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
|
@ -42,6 +42,7 @@ cdef int destroy(Pool mem, void* state, void* extra_args) except -1:
|
||||||
state = <TestState*>state
|
state = <TestState*>state
|
||||||
mem.free(state)
|
mem.free(state)
|
||||||
|
|
||||||
|
|
||||||
@cytest
|
@cytest
|
||||||
@pytest.mark.parametrize("nr_class,beam_width",
|
@pytest.mark.parametrize("nr_class,beam_width",
|
||||||
[
|
[
|
||||||
|
@ -56,10 +57,12 @@ def test_init(nr_class, beam_width):
|
||||||
assert b.width == beam_width
|
assert b.width == beam_width
|
||||||
assert b.nr_class == nr_class
|
assert b.nr_class == nr_class
|
||||||
|
|
||||||
|
|
||||||
@cytest
|
@cytest
|
||||||
def test_init_violn():
|
def test_init_violn():
|
||||||
MaxViolation()
|
MaxViolation()
|
||||||
|
|
||||||
|
|
||||||
@cytest
|
@cytest
|
||||||
@pytest.mark.parametrize("nr_class,beam_width,length",
|
@pytest.mark.parametrize("nr_class,beam_width,length",
|
||||||
[
|
[
|
||||||
|
|
|
@ -1,5 +1,4 @@
|
||||||
cimport numpy as np
|
cimport numpy as np
|
||||||
from libc.string cimport memset
|
|
||||||
|
|
||||||
from ..errors import Errors
|
from ..errors import Errors
|
||||||
from ..morphology import Morphology
|
from ..morphology import Morphology
|
||||||
|
|
|
@ -225,8 +225,8 @@ cdef class Span:
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def _(self):
|
def _(self):
|
||||||
cdef SpanC* span_c = self.span_c()
|
|
||||||
"""Custom extension attributes registered via `set_extension`."""
|
"""Custom extension attributes registered via `set_extension`."""
|
||||||
|
cdef SpanC* span_c = self.span_c()
|
||||||
return Underscore(Underscore.span_extensions, self,
|
return Underscore(Underscore.span_extensions, self,
|
||||||
start=span_c.start_char, end=span_c.end_char, label=self.label, kb_id=self.kb_id, span_id=self.id)
|
start=span_c.start_char, end=span_c.end_char, label=self.label, kb_id=self.kb_id, span_id=self.id)
|
||||||
|
|
||||||
|
@ -933,7 +933,6 @@ cdef class Span:
|
||||||
self.id_ = ent_id_
|
self.id_ = ent_id_
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
cdef int _count_words_to_root(const TokenC* token, int sent_length) except -1:
|
cdef int _count_words_to_root(const TokenC* token, int sent_length) except -1:
|
||||||
# Don't allow spaces to be the root, if there are
|
# Don't allow spaces to be the root, if there are
|
||||||
# better candidates
|
# better candidates
|
||||||
|
|
Loading…
Reference in New Issue
Block a user