mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-25 17:36:30 +03:00
cython fixes and cleanup
This commit is contained in:
parent
846472129c
commit
96f2e30c4b
|
@ -158,7 +158,6 @@ cdef class PhraseMatcher:
|
|||
del self._callbacks[key]
|
||||
del self._docs[key]
|
||||
|
||||
|
||||
def _add_from_arrays(self, key, specs, *, on_match=None):
|
||||
"""Add a preprocessed list of specs, with an optional callback.
|
||||
|
||||
|
@ -194,7 +193,6 @@ cdef class PhraseMatcher:
|
|||
result = internal_node
|
||||
map_set(self.mem, <MapStruct*>result, self.vocab.strings[key], NULL)
|
||||
|
||||
|
||||
def add(self, key, docs, *, on_match=None):
|
||||
"""Add a match-rule to the phrase-matcher. A match-rule consists of: an ID
|
||||
key, a list of one or more patterns, and (optionally) an on_match callback.
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
# cython: infer_types=True, cdivision=True, boundscheck=False
|
||||
from typing import Any, List, Optional, Tuple, TypeVar, cast
|
||||
from typing import Any, List, Optional, Tuple, cast
|
||||
|
||||
from libc.stdlib cimport calloc, free, realloc
|
||||
from libc.string cimport memcpy, memset
|
||||
|
@ -23,7 +23,7 @@ from thinc.api import (
|
|||
|
||||
from thinc.backends.cblas cimport CBlas, saxpy, sgemm
|
||||
|
||||
from thinc.types import Floats1d, Floats2d, Floats3d, Floats4d, Ints1d, Ints2d
|
||||
from thinc.types import Floats2d, Floats3d, Floats4d, Ints1d, Ints2d
|
||||
|
||||
from ..errors import Errors
|
||||
from ..pipeline._parser_internals import _beam_utils
|
||||
|
@ -136,7 +136,7 @@ def init(
|
|||
Y: Optional[Tuple[List[State], List[Floats2d]]] = None,
|
||||
):
|
||||
if X is not None:
|
||||
docs, moves = X
|
||||
docs, _ = X
|
||||
model.get_ref("tok2vec").initialize(X=docs)
|
||||
else:
|
||||
model.get_ref("tok2vec").initialize()
|
||||
|
@ -145,7 +145,7 @@ def init(
|
|||
current_nO = model.maybe_get_dim("nO")
|
||||
if current_nO is None or current_nO != inferred_nO:
|
||||
model.attrs["resize_output"](model, inferred_nO)
|
||||
nO = model.get_dim("nO")
|
||||
# nO = model.get_dim("nO")
|
||||
nP = model.get_dim("nP")
|
||||
nH = model.get_dim("nH")
|
||||
nI = model.get_dim("nI")
|
||||
|
@ -192,9 +192,10 @@ class TransitionModelInputs:
|
|||
self,
|
||||
docs: List[Doc],
|
||||
moves: TransitionSystem,
|
||||
actions: Optional[List[Ints1d]]=None,
|
||||
max_moves: int=0,
|
||||
states: Optional[List[State]]=None):
|
||||
actions: Optional[List[Ints1d]] = None,
|
||||
max_moves: int = 0,
|
||||
states: Optional[List[State]] = None,
|
||||
):
|
||||
"""
|
||||
actions (Optional[List[Ints1d]]): actions to apply for each Doc.
|
||||
docs (List[Doc]): Docs to predict transition sequences for.
|
||||
|
@ -234,12 +235,12 @@ def forward(model, inputs: TransitionModelInputs, is_train: bool):
|
|||
return _forward_greedy_cpu(model, moves, states, feats, seen_mask, actions=actions)
|
||||
else:
|
||||
return _forward_fallback(model, moves, states, tokvecs, backprop_tok2vec,
|
||||
feats, backprop_feats, seen_mask, is_train, actions=actions,
|
||||
max_moves=inputs.max_moves)
|
||||
feats, backprop_feats, seen_mask, is_train, actions=actions,
|
||||
max_moves=inputs.max_moves)
|
||||
|
||||
|
||||
def _forward_greedy_cpu(model: Model, TransitionSystem moves, states: List[StateClass], np.ndarray feats,
|
||||
np.ndarray[np.npy_bool, ndim=1] seen_mask, actions: Optional[List[Ints1d]]=None):
|
||||
np.ndarray[np.npy_bool, ndim = 1] seen_mask, actions: Optional[List[Ints1d]] = None):
|
||||
cdef vector[StateC*] c_states
|
||||
cdef StateClass state
|
||||
for state in states:
|
||||
|
@ -257,9 +258,10 @@ def _forward_greedy_cpu(model: Model, TransitionSystem moves, states: List[State
|
|||
|
||||
return (states, scores), backprop
|
||||
|
||||
|
||||
cdef list _parse_batch(CBlas cblas, TransitionSystem moves, StateC** states,
|
||||
WeightsC weights, SizesC sizes, actions: Optional[List[Ints1d]]=None):
|
||||
cdef int i, j
|
||||
cdef int i
|
||||
cdef vector[StateC *] unfinished
|
||||
cdef ActivationsC activations = _alloc_activations(sizes)
|
||||
cdef np.ndarray step_scores
|
||||
|
@ -276,7 +278,7 @@ cdef list _parse_batch(CBlas cblas, TransitionSystem moves, StateC** states,
|
|||
if actions is None:
|
||||
# Validate actions, argmax, take action.
|
||||
c_transition_batch(moves, states, <const float*>step_scores.data, sizes.classes,
|
||||
sizes.states)
|
||||
sizes.states)
|
||||
else:
|
||||
c_apply_actions(moves, states, <const int*>step_actions.data, sizes.states)
|
||||
for i in range(sizes.states):
|
||||
|
@ -302,8 +304,8 @@ def _forward_fallback(
|
|||
backprop_feats,
|
||||
seen_mask,
|
||||
is_train: bool,
|
||||
actions: Optional[List[Ints1d]]=None,
|
||||
max_moves: int=0):
|
||||
actions: Optional[List[Ints1d]] = None,
|
||||
max_moves: int = 0):
|
||||
nF = model.get_dim("nF")
|
||||
output = model.get_ref("output")
|
||||
hidden_b = model.get_param("hidden_b")
|
||||
|
@ -371,7 +373,7 @@ def _forward_fallback(
|
|||
for clas in set(model.attrs["unseen_classes"]):
|
||||
if (d_scores[:, clas] < 0).any():
|
||||
model.attrs["unseen_classes"].remove(clas)
|
||||
d_scores *= seen_mask == False
|
||||
d_scores *= seen_mask == False # no-cython-lint
|
||||
# Calculate the gradients for the parameters of the output layer.
|
||||
# The weight gemm is (nS, nO) @ (nS, nH).T
|
||||
output.inc_grad("b", d_scores.sum(axis=0))
|
||||
|
@ -571,13 +573,13 @@ cdef void _resize_activations(ActivationsC* A, SizesC n) nogil:
|
|||
A._max_size = n.states
|
||||
else:
|
||||
A.token_ids = <int*>realloc(A.token_ids,
|
||||
n.states * n.feats * sizeof(A.token_ids[0]))
|
||||
n.states * n.feats * sizeof(A.token_ids[0]))
|
||||
A.unmaxed = <float*>realloc(A.unmaxed,
|
||||
n.states * n.hiddens * n.pieces * sizeof(A.unmaxed[0]))
|
||||
n.states * n.hiddens * n.pieces * sizeof(A.unmaxed[0]))
|
||||
A.hiddens = <float*>realloc(A.hiddens,
|
||||
n.states * n.hiddens * sizeof(A.hiddens[0]))
|
||||
n.states * n.hiddens * sizeof(A.hiddens[0]))
|
||||
A.is_valid = <int*>realloc(A.is_valid,
|
||||
n.states * n.classes * sizeof(A.is_valid[0]))
|
||||
n.states * n.classes * sizeof(A.is_valid[0]))
|
||||
A._max_size = n.states
|
||||
A._curr_size = n.states
|
||||
|
||||
|
@ -599,9 +601,9 @@ cdef void _predict_states(CBlas cblas, ActivationsC* A, float* scores, StateC**
|
|||
else:
|
||||
# Compute hidden-to-output
|
||||
sgemm(cblas)(False, True, n.states, n.classes, n.hiddens,
|
||||
1.0, <const float *>A.hiddens, n.hiddens,
|
||||
<const float *>W.hidden_weights, n.hiddens,
|
||||
0.0, scores, n.classes)
|
||||
1.0, <const float *>A.hiddens, n.hiddens,
|
||||
<const float *>W.hidden_weights, n.hiddens,
|
||||
0.0, scores, n.classes)
|
||||
# Add bias
|
||||
for i in range(n.states):
|
||||
saxpy(cblas)(n.classes, 1., W.hidden_bias, 1, &scores[i*n.classes], 1)
|
||||
|
@ -617,12 +619,12 @@ cdef void _predict_states(CBlas cblas, ActivationsC* A, float* scores, StateC**
|
|||
scores[i*n.classes+j] = min_
|
||||
|
||||
|
||||
cdef void _sum_state_features(CBlas cblas, float* output,
|
||||
const float* cached, const int* token_ids, SizesC n) nogil:
|
||||
cdef int idx, b, f, i
|
||||
cdef void _sum_state_features(CBlas cblas, float* output, const float* cached,
|
||||
const int* token_ids, SizesC n) nogil:
|
||||
cdef int idx, b, f
|
||||
cdef const float* feature
|
||||
cdef int B = n.states
|
||||
cdef int O = n.hiddens * n.pieces
|
||||
cdef int O = n.hiddens * n.pieces # no-cython-lint
|
||||
cdef int F = n.feats
|
||||
cdef int T = n.tokens
|
||||
padding = cached + (T * F * O)
|
||||
|
@ -637,4 +639,3 @@ cdef void _sum_state_features(CBlas cblas, float* output,
|
|||
feature = &cached[idx]
|
||||
saxpy(cblas)(O, one, <const float*>feature, 1, &output[b*O], 1)
|
||||
token_ids += F
|
||||
|
||||
|
|
|
@ -80,15 +80,13 @@ cdef class Morphology:
|
|||
out.sort(key=lambda x: x[0])
|
||||
return dict(out)
|
||||
|
||||
|
||||
def _normalized_feat_dict_to_str(self, feats: Dict[str, str]) -> str:
|
||||
norm_feats_string = self.FEATURE_SEP.join([
|
||||
self.FIELD_SEP.join([field, self.VALUE_SEP.join(values) if isinstance(values, list) else values])
|
||||
self.FIELD_SEP.join([field, self.VALUE_SEP.join(values) if isinstance(values, list) else values])
|
||||
for field, values in feats.items()
|
||||
])
|
||||
])
|
||||
return norm_feats_string or self.EMPTY_MORPH
|
||||
|
||||
|
||||
cdef hash_t _add(self, features):
|
||||
"""Insert a morphological analysis in the morphology table, if not
|
||||
already present. The morphological analysis may be provided in the UD
|
||||
|
|
|
@ -8,7 +8,7 @@ cpdef enum univ_pos_t:
|
|||
ADV = symbols.ADV
|
||||
AUX = symbols.AUX
|
||||
CONJ = symbols.CONJ
|
||||
CCONJ = symbols.CCONJ # U20
|
||||
CCONJ = symbols.CCONJ # U20
|
||||
DET = symbols.DET
|
||||
INTJ = symbols.INTJ
|
||||
NOUN = symbols.NOUN
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
from cymem.cymem cimport Pool
|
||||
from libc.stdint cimport int32_t
|
||||
from libcpp.memory cimport shared_ptr
|
||||
from libcpp.vector cimport vector
|
||||
|
||||
|
|
|
@ -57,7 +57,6 @@ cdef class Beam:
|
|||
cdef int advance(self, trans_func_t transition_func, hash_func_t hash_func,
|
||||
void* extra_args) except -1
|
||||
cdef int check_done(self, finish_func_t finish_func, void* extra_args) except -1
|
||||
|
||||
|
||||
cdef inline void set_cell(self, int i, int j, weight_t score, int is_valid, weight_t cost) nogil:
|
||||
self.scores[i][j] = score
|
||||
|
|
|
@ -1,11 +1,8 @@
|
|||
# cython: profile=True, experimental_cpp_class_def=True, cdivision=True, infer_types=True
|
||||
cimport cython
|
||||
from libc.math cimport exp, log
|
||||
from libc.string cimport memcpy, memset
|
||||
|
||||
import math
|
||||
|
||||
from cymem.cymem cimport Pool
|
||||
from libc.math cimport exp
|
||||
from libc.string cimport memcpy, memset
|
||||
from preshed.maps cimport PreshMap
|
||||
|
||||
|
||||
|
@ -70,7 +67,7 @@ cdef class Beam:
|
|||
self.costs[i][j] = costs[j]
|
||||
|
||||
cdef int set_table(self, weight_t** scores, int** is_valid, weight_t** costs) except -1:
|
||||
cdef int i, j
|
||||
cdef int i
|
||||
for i in range(self.width):
|
||||
memcpy(self.scores[i], scores[i], sizeof(weight_t) * self.nr_class)
|
||||
memcpy(self.is_valid[i], is_valid[i], sizeof(bint) * self.nr_class)
|
||||
|
@ -176,7 +173,6 @@ cdef class Beam:
|
|||
beam-width, and n is the number of classes.
|
||||
"""
|
||||
cdef Entry entry
|
||||
cdef weight_t score
|
||||
cdef _State* s
|
||||
cdef int i, j, move_id
|
||||
assert self.size >= 1
|
||||
|
@ -269,7 +265,7 @@ cdef class MaxViolation:
|
|||
# This can happen from non-monotonic actions
|
||||
# If we find a better gold analysis this way, be sure to keep it.
|
||||
elif pred._states[i].loss <= 0 \
|
||||
and tuple(pred.histories[i]) not in seen_golds:
|
||||
and tuple(pred.histories[i]) not in seen_golds:
|
||||
g_scores.append(pred._states[i].score)
|
||||
g_hist.append(list(pred.histories[i]))
|
||||
for i in range(gold.size):
|
||||
|
|
|
@ -60,7 +60,7 @@ cdef class TransitionSystem:
|
|||
|
||||
|
||||
cdef void c_apply_actions(TransitionSystem moves, StateC** states, const int* actions,
|
||||
int batch_size) nogil
|
||||
int batch_size) nogil
|
||||
|
||||
cdef void c_transition_batch(TransitionSystem moves, StateC** states, const float* scores,
|
||||
int nr_class, int batch_size) nogil
|
||||
int nr_class, int batch_size) nogil
|
||||
|
|
|
@ -291,19 +291,19 @@ cdef class TransitionSystem:
|
|||
|
||||
|
||||
cdef void c_apply_actions(TransitionSystem moves, StateC** states, const int* actions,
|
||||
int batch_size) nogil:
|
||||
cdef int i
|
||||
cdef Transition action
|
||||
cdef StateC* state
|
||||
for i in range(batch_size):
|
||||
state = states[i]
|
||||
action = moves.c[actions[i]]
|
||||
action.do(state, action.label)
|
||||
state.history.push_back(action.clas)
|
||||
int batch_size) nogil:
|
||||
cdef int i
|
||||
cdef Transition action
|
||||
cdef StateC* state
|
||||
for i in range(batch_size):
|
||||
state = states[i]
|
||||
action = moves.c[actions[i]]
|
||||
action.do(state, action.label)
|
||||
state.history.push_back(action.clas)
|
||||
|
||||
|
||||
cdef void c_transition_batch(TransitionSystem moves, StateC** states, const float* scores,
|
||||
int nr_class, int batch_size) nogil:
|
||||
int nr_class, int batch_size) nogil:
|
||||
is_valid = <int*>calloc(moves.n_moves, sizeof(int))
|
||||
cdef int i, guess
|
||||
cdef Transition action
|
||||
|
@ -319,4 +319,3 @@ cdef void c_transition_batch(TransitionSystem moves, StateC** states, const floa
|
|||
action.do(states[i], action.label)
|
||||
states[i].history.push_back(guess)
|
||||
free(is_valid)
|
||||
|
||||
|
|
|
@ -1,8 +1,7 @@
|
|||
# cython: infer_types=True, profile=True, binding=True
|
||||
from itertools import islice
|
||||
from typing import Callable, Dict, Iterable, List, Optional, Union
|
||||
from typing import Callable, Dict, Iterable, Optional, Union
|
||||
|
||||
import srsly
|
||||
from thinc.api import Config, Model
|
||||
from thinc.legacy import LegacySequenceCategoricalCrossentropy
|
||||
|
||||
|
|
|
@ -1,12 +1,11 @@
|
|||
# cython: infer_types=True, profile=True, binding=True
|
||||
import warnings
|
||||
from typing import Callable, Dict, Iterable, Iterator, Tuple, Union
|
||||
|
||||
import srsly
|
||||
|
||||
from ..tokens.doc cimport Doc
|
||||
|
||||
from ..errors import Errors, Warnings
|
||||
from ..errors import Errors
|
||||
from ..language import Language
|
||||
from ..training import Example
|
||||
from ..util import raise_error
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
# cython: infer_types=True, profile=True, binding=True
|
||||
import warnings
|
||||
from typing import Callable, Dict, Iterable, Iterator, Optional, Tuple
|
||||
|
||||
import srsly
|
||||
|
@ -8,7 +7,7 @@ from thinc.api import Model, Optimizer, set_dropout_rate
|
|||
from ..tokens.doc cimport Doc
|
||||
|
||||
from .. import util
|
||||
from ..errors import Errors, Warnings
|
||||
from ..errors import Errors
|
||||
from ..language import Language
|
||||
from ..training import Example, validate_distillation_examples, validate_examples
|
||||
from ..vocab import Vocab
|
||||
|
@ -56,14 +55,14 @@ cdef class TrainablePipe(Pipe):
|
|||
except Exception as e:
|
||||
error_handler(self.name, self, [doc], e)
|
||||
|
||||
|
||||
def distill(self,
|
||||
teacher_pipe: Optional["TrainablePipe"],
|
||||
examples: Iterable["Example"],
|
||||
*,
|
||||
drop: float=0.0,
|
||||
sgd: Optional[Optimizer]=None,
|
||||
losses: Optional[Dict[str, float]]=None) -> Dict[str, float]:
|
||||
teacher_pipe: Optional["TrainablePipe"],
|
||||
examples: Iterable["Example"],
|
||||
*,
|
||||
drop: float = 0.0,
|
||||
sgd: Optional[Optimizer] = None,
|
||||
losses: Optional[Dict[str, float]] = None
|
||||
) -> Dict[str, float]:
|
||||
"""Train a pipe (the student) on the predictions of another pipe
|
||||
(the teacher). The student is typically trained on the probability
|
||||
distribution of the teacher, but details may differ per pipe.
|
||||
|
|
|
@ -222,12 +222,13 @@ class Parser(TrainablePipe):
|
|||
raise NotImplementedError
|
||||
|
||||
def distill(self,
|
||||
teacher_pipe: Optional[TrainablePipe],
|
||||
examples: Iterable["Example"],
|
||||
*,
|
||||
drop: float=0.0,
|
||||
sgd: Optional[Optimizer]=None,
|
||||
losses: Optional[Dict[str, float]]=None):
|
||||
teacher_pipe: Optional[TrainablePipe],
|
||||
examples: Iterable["Example"],
|
||||
*,
|
||||
drop: float = 0.0,
|
||||
sgd: Optional[Optimizer] = None,
|
||||
losses: Optional[Dict[str, float]] = None
|
||||
):
|
||||
"""Train a pipe (the student) on the predictions of another pipe
|
||||
(the teacher). The student is trained on the transition probabilities
|
||||
of the teacher.
|
||||
|
@ -277,11 +278,13 @@ class Parser(TrainablePipe):
|
|||
# teacher's distributions.
|
||||
|
||||
student_inputs = TransitionModelInputs(docs=student_docs,
|
||||
states=[state.copy() for state in states], moves=self.moves, max_moves=max_moves)
|
||||
states=[state.copy() for state in states],
|
||||
moves=self.moves,
|
||||
max_moves=max_moves)
|
||||
(student_states, student_scores), backprop_scores = self.model.begin_update(student_inputs)
|
||||
actions = _states_diff_to_actions(states, student_states)
|
||||
teacher_inputs = TransitionModelInputs(docs=[eg.reference for eg in examples],
|
||||
states=states, moves=teacher_pipe.moves, actions=actions)
|
||||
states=states, moves=teacher_pipe.moves, actions=actions)
|
||||
(_, teacher_scores) = teacher_pipe.model.predict(teacher_inputs)
|
||||
|
||||
loss, d_scores = self.get_teacher_student_loss(teacher_scores, student_scores)
|
||||
|
@ -294,10 +297,9 @@ class Parser(TrainablePipe):
|
|||
|
||||
return losses
|
||||
|
||||
|
||||
def get_teacher_student_loss(
|
||||
self, teacher_scores: List[Floats2d], student_scores: List[Floats2d],
|
||||
normalize: bool=False,
|
||||
self, teacher_scores: List[Floats2d], student_scores: List[Floats2d],
|
||||
normalize: bool = False,
|
||||
) -> Tuple[float, List[Floats2d]]:
|
||||
"""Calculate the loss and its gradient for a batch of student
|
||||
scores, relative to teacher scores.
|
||||
|
@ -320,9 +322,9 @@ class Parser(TrainablePipe):
|
|||
# ourselves.
|
||||
|
||||
teacher_scores = self.model.ops.softmax(self.model.ops.xp.vstack(teacher_scores),
|
||||
axis=-1, inplace=True)
|
||||
axis=-1, inplace=True)
|
||||
student_scores = self.model.ops.softmax(self.model.ops.xp.vstack(student_scores),
|
||||
axis=-1, inplace=True)
|
||||
axis=-1, inplace=True)
|
||||
|
||||
assert teacher_scores.shape == student_scores.shape
|
||||
|
||||
|
@ -436,13 +438,15 @@ class Parser(TrainablePipe):
|
|||
else:
|
||||
init_states, gold_states, _ = self.moves.init_gold_batch(examples)
|
||||
|
||||
inputs = TransitionModelInputs(docs=docs, moves=self.moves,
|
||||
max_moves=max_moves, states=[state.copy() for state in init_states])
|
||||
inputs = TransitionModelInputs(docs=docs,
|
||||
moves=self.moves,
|
||||
max_moves=max_moves,
|
||||
states=[state.copy() for state in init_states])
|
||||
(pred_states, scores), backprop_scores = self.model.begin_update(inputs)
|
||||
if sum(s.shape[0] for s in scores) == 0:
|
||||
return losses
|
||||
d_scores = self.get_loss((gold_states, init_states, pred_states, scores),
|
||||
examples, max_moves)
|
||||
examples, max_moves)
|
||||
backprop_scores((pred_states, d_scores))
|
||||
if sgd not in (None, False):
|
||||
self.finish_update(sgd)
|
||||
|
@ -483,9 +487,7 @@ class Parser(TrainablePipe):
|
|||
cdef TransitionSystem moves = self.moves
|
||||
cdef StateClass state
|
||||
cdef int clas
|
||||
cdef int nF = self.model.get_dim("nF")
|
||||
cdef int nO = moves.n_moves
|
||||
cdef int nS = sum([len(history) for history in histories])
|
||||
cdef Pool mem = Pool()
|
||||
cdef np.ndarray costs_i
|
||||
is_valid = <int*>mem.alloc(nO, sizeof(int))
|
||||
|
@ -552,8 +554,8 @@ class Parser(TrainablePipe):
|
|||
|
||||
return losses
|
||||
|
||||
def update_beam(self, examples, *, beam_width,
|
||||
drop=0., sgd=None, losses=None, beam_density=0.0):
|
||||
def update_beam(self, examples, *, beam_width, drop=0.,
|
||||
sgd=None, losses=None, beam_density=0.0):
|
||||
raise NotImplementedError
|
||||
|
||||
def set_output(self, nO):
|
||||
|
@ -678,9 +680,10 @@ class Parser(TrainablePipe):
|
|||
return states
|
||||
|
||||
# Parse the states that are too long with the teacher's parsing model.
|
||||
teacher_inputs = TransitionModelInputs(docs=docs, moves=moves,
|
||||
states=[state.copy() for state in to_cut])
|
||||
(teacher_states, _ ) = teacher_pipe.model.predict(teacher_inputs)
|
||||
teacher_inputs = TransitionModelInputs(docs=docs,
|
||||
moves=moves,
|
||||
states=[state.copy() for state in to_cut])
|
||||
(teacher_states, _) = teacher_pipe.model.predict(teacher_inputs)
|
||||
|
||||
# Step through the teacher's actions and store every state after
|
||||
# each multiple of max_length.
|
||||
|
@ -778,6 +781,7 @@ def _states_to_actions(states: List[StateClass]) -> List[Ints1d]:
|
|||
|
||||
return actions
|
||||
|
||||
|
||||
def _states_diff_to_actions(
|
||||
before_states: List[StateClass],
|
||||
after_states: List[StateClass]
|
||||
|
@ -798,8 +802,9 @@ def _states_diff_to_actions(
|
|||
c_state_before = before_state.c
|
||||
c_state_after = after_state.c
|
||||
|
||||
assert equal(c_state_before.history.begin(), c_state_before.history.end(),
|
||||
c_state_after.history.begin())
|
||||
assert equal(c_state_before.history.begin(),
|
||||
c_state_before.history.end(),
|
||||
c_state_after.history.begin())
|
||||
|
||||
actions = []
|
||||
while True:
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
# cython: infer_types=True
|
||||
from typing import Any, Callable, Iterable, Iterator, List, Optional, Tuple, Union
|
||||
from typing import Iterable, Iterator, List, Optional, Tuple, Union
|
||||
|
||||
cimport cython
|
||||
from libc.stdint cimport uint32_t
|
||||
from libc.string cimport memcpy
|
||||
from murmurhash.mrmr cimport hash64
|
||||
|
@ -243,7 +242,6 @@ cdef class StringStore:
|
|||
cdef int n_length_bytes
|
||||
cdef int i
|
||||
cdef Utf8Str* string = <Utf8Str*>self.mem.alloc(1, sizeof(Utf8Str))
|
||||
cdef uint32_t ulength = length
|
||||
if length < sizeof(string.s):
|
||||
string.s[0] = <unsigned char>length
|
||||
memcpy(&string.s[1], chars, length)
|
||||
|
@ -301,7 +299,7 @@ cpdef hash_t get_string_id(object string_or_hash) except -1:
|
|||
|
||||
try:
|
||||
return hash_string(string_or_hash)
|
||||
except:
|
||||
except: # no-cython-lint
|
||||
if _try_coerce_to_hash(string_or_hash, &str_hash):
|
||||
# Coerce the integral key to the expected primitive hash type.
|
||||
# This ensures that custom/overloaded "primitive" data types
|
||||
|
@ -318,6 +316,5 @@ cdef inline bint _try_coerce_to_hash(object key, hash_t* out_hash):
|
|||
try:
|
||||
out_hash[0] = key
|
||||
return True
|
||||
except:
|
||||
except: # no-cython-lint
|
||||
return False
|
||||
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
from cymem.cymem cimport Pool
|
||||
|
||||
from spacy.pipeline._parser_internals.search cimport Beam, MaxViolation
|
||||
from spacy.typedefs cimport class_t, weight_t
|
||||
from spacy.typedefs cimport class_t
|
||||
|
||||
import pytest
|
||||
|
||||
|
@ -42,32 +42,35 @@ cdef int destroy(Pool mem, void* state, void* extra_args) except -1:
|
|||
state = <TestState*>state
|
||||
mem.free(state)
|
||||
|
||||
|
||||
@cytest
|
||||
@pytest.mark.parametrize("nr_class,beam_width",
|
||||
[
|
||||
(2, 3),
|
||||
(3, 6),
|
||||
(4, 20),
|
||||
]
|
||||
)
|
||||
[
|
||||
(2, 3),
|
||||
(3, 6),
|
||||
(4, 20),
|
||||
]
|
||||
)
|
||||
def test_init(nr_class, beam_width):
|
||||
b = Beam(nr_class, beam_width)
|
||||
assert b.size == 1
|
||||
assert b.width == beam_width
|
||||
assert b.nr_class == nr_class
|
||||
|
||||
|
||||
@cytest
|
||||
def test_init_violn():
|
||||
MaxViolation()
|
||||
|
||||
|
||||
@cytest
|
||||
@pytest.mark.parametrize("nr_class,beam_width,length",
|
||||
[
|
||||
(2, 3, 3),
|
||||
(3, 6, 15),
|
||||
(4, 20, 32),
|
||||
]
|
||||
)
|
||||
[
|
||||
(2, 3, 3),
|
||||
(3, 6, 15),
|
||||
(4, 20, 32),
|
||||
]
|
||||
)
|
||||
def test_initialize(nr_class, beam_width, length):
|
||||
b = Beam(nr_class, beam_width)
|
||||
b.initialize(initialize, destroy, length, NULL)
|
||||
|
@ -79,11 +82,11 @@ def test_initialize(nr_class, beam_width, length):
|
|||
|
||||
@cytest
|
||||
@pytest.mark.parametrize("nr_class,beam_width,length,extra",
|
||||
[
|
||||
(2, 3, 4, None),
|
||||
(3, 6, 15, u"test beam 1"),
|
||||
]
|
||||
)
|
||||
[
|
||||
(2, 3, 4, None),
|
||||
(3, 6, 15, u"test beam 1"),
|
||||
]
|
||||
)
|
||||
def test_initialize_extra(nr_class, beam_width, length, extra):
|
||||
b = Beam(nr_class, beam_width)
|
||||
if extra is None:
|
||||
|
@ -97,11 +100,11 @@ def test_initialize_extra(nr_class, beam_width, length, extra):
|
|||
|
||||
@cytest
|
||||
@pytest.mark.parametrize("nr_class,beam_width,length",
|
||||
[
|
||||
(3, 6, 15),
|
||||
(4, 20, 32),
|
||||
]
|
||||
)
|
||||
[
|
||||
(3, 6, 15),
|
||||
(4, 20, 32),
|
||||
]
|
||||
)
|
||||
def test_transition(nr_class, beam_width, length):
|
||||
b = Beam(nr_class, beam_width)
|
||||
b.initialize(initialize, destroy, length, NULL)
|
||||
|
|
|
@ -1759,7 +1759,7 @@ cdef class Doc:
|
|||
data["underscore_span"] = {}
|
||||
if attr not in data["underscore_span"]:
|
||||
data["underscore_span"][attr] = []
|
||||
data["underscore_span"][attr].append({"start": start, "end": end, "value": value, "label": _label, "kb_id": _kb_id, "id":_span_id})
|
||||
data["underscore_span"][attr].append({"start": start, "end": end, "value": value, "label": _label, "kb_id": _kb_id, "id": _span_id})
|
||||
|
||||
for attr in underscore:
|
||||
if attr not in user_keys:
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
cimport numpy as np
|
||||
from libc.string cimport memset
|
||||
|
||||
from ..errors import Errors
|
||||
from ..morphology import Morphology
|
||||
|
|
|
@ -225,8 +225,8 @@ cdef class Span:
|
|||
|
||||
@property
|
||||
def _(self):
|
||||
cdef SpanC* span_c = self.span_c()
|
||||
"""Custom extension attributes registered via `set_extension`."""
|
||||
cdef SpanC* span_c = self.span_c()
|
||||
return Underscore(Underscore.span_extensions, self,
|
||||
start=span_c.start_char, end=span_c.end_char, label=self.label, kb_id=self.kb_id, span_id=self.id)
|
||||
|
||||
|
@ -933,7 +933,6 @@ cdef class Span:
|
|||
self.id_ = ent_id_
|
||||
|
||||
|
||||
|
||||
cdef int _count_words_to_root(const TokenC* token, int sent_length) except -1:
|
||||
# Don't allow spaces to be the root, if there are
|
||||
# better candidates
|
||||
|
|
Loading…
Reference in New Issue
Block a user