mirror of
https://github.com/explosion/spaCy.git
synced 2025-06-30 01:43:21 +03:00
Fix Cython lints
This commit is contained in:
parent
42fe4edfd7
commit
9b36729cbd
|
@ -45,6 +45,5 @@ cdef void predict_states(CBlas cblas, ActivationsC* A, StateC** states,
|
||||||
|
|
||||||
cdef int arg_max_if_valid(const weight_t* scores, const int* is_valid, int n) nogil
|
cdef int arg_max_if_valid(const weight_t* scores, const int* is_valid, int n) nogil
|
||||||
|
|
||||||
cdef void cpu_log_loss(float* d_scores,
|
cdef void cpu_log_loss(float* d_scores, const float* costs,
|
||||||
const float* costs, const int* is_valid, const float* scores, int O) nogil
|
const int* is_valid, const float* scores, int O) nogil
|
||||||
|
|
||||||
|
|
|
@ -13,7 +13,7 @@ from .. import util
|
||||||
from ..errors import Errors
|
from ..errors import Errors
|
||||||
|
|
||||||
from ..pipeline._parser_internals.stateclass cimport StateClass
|
from ..pipeline._parser_internals.stateclass cimport StateClass
|
||||||
from ..typedefs cimport class_t, hash_t, weight_t
|
from ..typedefs cimport weight_t
|
||||||
|
|
||||||
|
|
||||||
cdef WeightsC get_c_weights(model) except *:
|
cdef WeightsC get_c_weights(model) except *:
|
||||||
|
@ -93,16 +93,16 @@ cdef void resize_activations(ActivationsC* A, SizesC n) nogil:
|
||||||
|
|
||||||
cdef void predict_states(CBlas cblas, ActivationsC* A, StateC** states,
|
cdef void predict_states(CBlas cblas, ActivationsC* A, StateC** states,
|
||||||
const WeightsC* W, SizesC n) nogil:
|
const WeightsC* W, SizesC n) nogil:
|
||||||
cdef double one = 1.0
|
|
||||||
resize_activations(A, n)
|
resize_activations(A, n)
|
||||||
for i in range(n.states):
|
for i in range(n.states):
|
||||||
states[i].set_context_tokens(&A.token_ids[i*n.feats], n.feats)
|
states[i].set_context_tokens(&A.token_ids[i*n.feats], n.feats)
|
||||||
memset(A.unmaxed, 0, n.states * n.hiddens * n.pieces * sizeof(float))
|
memset(A.unmaxed, 0, n.states * n.hiddens * n.pieces * sizeof(float))
|
||||||
memset(A.hiddens, 0, n.states * n.hiddens * sizeof(float))
|
memset(A.hiddens, 0, n.states * n.hiddens * sizeof(float))
|
||||||
sum_state_features(cblas, A.unmaxed,
|
sum_state_features(cblas, A.unmaxed, W.feat_weights, A.token_ids, n.states,
|
||||||
W.feat_weights, A.token_ids, n.states, n.feats, n.hiddens * n.pieces)
|
n.feats, n.hiddens * n.pieces)
|
||||||
for i in range(n.states):
|
for i in range(n.states):
|
||||||
saxpy(cblas)(n.hiddens * n.pieces, 1., W.feat_bias, 1, &A.unmaxed[i*n.hiddens*n.pieces], 1)
|
saxpy(cblas)(n.hiddens * n.pieces, 1., W.feat_bias, 1,
|
||||||
|
&A.unmaxed[i*n.hiddens*n.pieces], 1)
|
||||||
for j in range(n.hiddens):
|
for j in range(n.hiddens):
|
||||||
index = i * n.hiddens * n.pieces + j * n.pieces
|
index = i * n.hiddens * n.pieces + j * n.pieces
|
||||||
which = _arg_max(&A.unmaxed[index], n.pieces)
|
which = _arg_max(&A.unmaxed[index], n.pieces)
|
||||||
|
@ -112,10 +112,10 @@ cdef void predict_states(CBlas cblas, ActivationsC* A, StateC** states,
|
||||||
memcpy(A.scores, A.hiddens, n.states * n.classes * sizeof(float))
|
memcpy(A.scores, A.hiddens, n.states * n.classes * sizeof(float))
|
||||||
else:
|
else:
|
||||||
# Compute hidden-to-output
|
# Compute hidden-to-output
|
||||||
sgemm(cblas)(False, True, n.states, n.classes, n.hiddens,
|
sgemm(cblas)(False, True, n.states, n.classes, n.hiddens, 1.0,
|
||||||
1.0, <const float *>A.hiddens, n.hiddens,
|
<const float *>A.hiddens, n.hiddens,
|
||||||
<const float *>W.hidden_weights, n.hiddens,
|
<const float *>W.hidden_weights, n.hiddens, 0.0,
|
||||||
0.0, A.scores, n.classes)
|
A.scores, n.classes)
|
||||||
# Add bias
|
# Add bias
|
||||||
for i in range(n.states):
|
for i in range(n.states):
|
||||||
saxpy(cblas)(n.classes, 1., W.hidden_bias, 1, &A.scores[i*n.classes], 1)
|
saxpy(cblas)(n.classes, 1., W.hidden_bias, 1, &A.scores[i*n.classes], 1)
|
||||||
|
@ -131,9 +131,9 @@ cdef void predict_states(CBlas cblas, ActivationsC* A, StateC** states,
|
||||||
A.scores[i*n.classes+j] = min_
|
A.scores[i*n.classes+j] = min_
|
||||||
|
|
||||||
|
|
||||||
cdef void sum_state_features(CBlas cblas, float* output,
|
cdef void sum_state_features(CBlas cblas, float* output, const float* cached,
|
||||||
const float* cached, const int* token_ids, int B, int F, int O) nogil:
|
const int* token_ids, int B, int F, int O) nogil:
|
||||||
cdef int idx, b, f, i
|
cdef int idx, b, f
|
||||||
cdef const float* feature
|
cdef const float* feature
|
||||||
padding = cached
|
padding = cached
|
||||||
cached += F * O
|
cached += F * O
|
||||||
|
@ -150,9 +150,8 @@ cdef void sum_state_features(CBlas cblas, float* output,
|
||||||
token_ids += F
|
token_ids += F
|
||||||
|
|
||||||
|
|
||||||
cdef void cpu_log_loss(float* d_scores,
|
cdef void cpu_log_loss(float* d_scores, const float* costs, const int* is_valid,
|
||||||
const float* costs, const int* is_valid, const float* scores,
|
const float* scores, int O) nogil:
|
||||||
int O) nogil:
|
|
||||||
"""Do multi-label log loss"""
|
"""Do multi-label log loss"""
|
||||||
cdef double max_, gmax, Z, gZ
|
cdef double max_, gmax, Z, gZ
|
||||||
best = arg_max_if_gold(scores, costs, is_valid, O)
|
best = arg_max_if_gold(scores, costs, is_valid, O)
|
||||||
|
@ -202,7 +201,6 @@ cdef int arg_max_if_valid(const weight_t* scores, const int* is_valid, int n) no
|
||||||
return best
|
return best
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class ParserStepModel(Model):
|
class ParserStepModel(Model):
|
||||||
def __init__(self, docs, layers, *, has_upper, unseen_classes=None, train=True,
|
def __init__(self, docs, layers, *, has_upper, unseen_classes=None, train=True,
|
||||||
dropout=0.1):
|
dropout=0.1):
|
||||||
|
@ -277,7 +275,6 @@ class ParserStepModel(Model):
|
||||||
else:
|
else:
|
||||||
self.backprops.append((token_ids, d_vector, get_d_tokvecs))
|
self.backprops.append((token_ids, d_vector, get_d_tokvecs))
|
||||||
|
|
||||||
|
|
||||||
def finish_steps(self, golds):
|
def finish_steps(self, golds):
|
||||||
# Add a padding vector to the d_tokvecs gradient, so that missing
|
# Add a padding vector to the d_tokvecs gradient, so that missing
|
||||||
# values don't affect the real gradient.
|
# values don't affect the real gradient.
|
||||||
|
@ -290,14 +287,15 @@ class ParserStepModel(Model):
|
||||||
ids = ids.flatten()
|
ids = ids.flatten()
|
||||||
d_state_features = d_state_features.reshape(
|
d_state_features = d_state_features.reshape(
|
||||||
(ids.size, d_state_features.shape[2]))
|
(ids.size, d_state_features.shape[2]))
|
||||||
self.ops.scatter_add(d_tokvecs, ids,
|
self.ops.scatter_add(d_tokvecs, ids, d_state_features)
|
||||||
d_state_features)
|
|
||||||
# Padded -- see update()
|
# Padded -- see update()
|
||||||
self.bp_tokvecs(d_tokvecs[:-1])
|
self.bp_tokvecs(d_tokvecs[:-1])
|
||||||
return d_tokvecs
|
return d_tokvecs
|
||||||
|
|
||||||
|
|
||||||
NUMPY_OPS = NumpyOps()
|
NUMPY_OPS = NumpyOps()
|
||||||
|
|
||||||
|
|
||||||
def step_forward(model: ParserStepModel, states, is_train):
|
def step_forward(model: ParserStepModel, states, is_train):
|
||||||
token_ids = model.get_token_ids(states)
|
token_ids = model.get_token_ids(states)
|
||||||
vector, get_d_tokvecs = model.state2vec(token_ids, is_train)
|
vector, get_d_tokvecs = model.state2vec(token_ids, is_train)
|
||||||
|
@ -310,7 +308,7 @@ def step_forward(model: ParserStepModel, states, is_train):
|
||||||
scores, get_d_vector = model.vec2scores(vector, is_train)
|
scores, get_d_vector = model.vec2scores(vector, is_train)
|
||||||
else:
|
else:
|
||||||
scores = NumpyOps().asarray(vector)
|
scores = NumpyOps().asarray(vector)
|
||||||
get_d_vector = lambda d_scores: d_scores
|
def get_d_vector(d_scores): return d_scores
|
||||||
# If the class is unseen, make sure its score is minimum
|
# If the class is unseen, make sure its score is minimum
|
||||||
scores[:, model._class_mask == 0] = numpy.nanmin(scores)
|
scores[:, model._class_mask == 0] = numpy.nanmin(scores)
|
||||||
|
|
||||||
|
@ -445,8 +443,8 @@ cdef class precompute_hiddens:
|
||||||
feat_weights = self.get_feat_weights()
|
feat_weights = self.get_feat_weights()
|
||||||
cdef int[:, ::1] ids = token_ids
|
cdef int[:, ::1] ids = token_ids
|
||||||
sum_state_features(cblas, <float*>state_vector.data,
|
sum_state_features(cblas, <float*>state_vector.data,
|
||||||
feat_weights, &ids[0,0],
|
feat_weights, &ids[0, 0], token_ids.shape[0],
|
||||||
token_ids.shape[0], self.nF, self.nO*self.nP)
|
self.nF, self.nO*self.nP)
|
||||||
state_vector += self.bias
|
state_vector += self.bias
|
||||||
state_vector, bp_nonlinearity = self._nonlinearity(state_vector)
|
state_vector, bp_nonlinearity = self._nonlinearity(state_vector)
|
||||||
|
|
||||||
|
|
|
@ -127,6 +127,7 @@ def make_parser(
|
||||||
scorer=scorer,
|
scorer=scorer,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@Language.factory(
|
@Language.factory(
|
||||||
"beam_parser",
|
"beam_parser",
|
||||||
assigns=["token.dep", "token.head", "token.is_sent_start", "doc.sents"],
|
assigns=["token.dep", "token.head", "token.is_sent_start", "doc.sents"],
|
||||||
|
|
|
@ -15,7 +15,7 @@ from ._parser_internals.ner cimport BiluoPushDown
|
||||||
from .transition_parser cimport Parser
|
from .transition_parser cimport Parser
|
||||||
|
|
||||||
from ..language import Language
|
from ..language import Language
|
||||||
from ..scorer import PRFScore, get_ner_prf
|
from ..scorer import get_ner_prf
|
||||||
from ..training import remove_bilu_prefix
|
from ..training import remove_bilu_prefix
|
||||||
from ..util import registry
|
from ..util import registry
|
||||||
|
|
||||||
|
@ -105,6 +105,7 @@ def make_ner(
|
||||||
scorer=scorer,
|
scorer=scorer,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@Language.factory(
|
@Language.factory(
|
||||||
"beam_ner",
|
"beam_ner",
|
||||||
assigns=["doc.ents", "token.ent_iob", "token.ent_type"],
|
assigns=["doc.ents", "token.ent_iob", "token.ent_type"],
|
||||||
|
|
|
@ -9,7 +9,7 @@ from cymem.cymem cimport Pool
|
||||||
from itertools import islice
|
from itertools import islice
|
||||||
|
|
||||||
from libc.stdlib cimport calloc, free
|
from libc.stdlib cimport calloc, free
|
||||||
from libc.string cimport memcpy, memset
|
from libc.string cimport memset
|
||||||
from libcpp.vector cimport vector
|
from libcpp.vector cimport vector
|
||||||
|
|
||||||
import random
|
import random
|
||||||
|
@ -22,14 +22,13 @@ from thinc.api import (
|
||||||
NumpyOps,
|
NumpyOps,
|
||||||
Optimizer,
|
Optimizer,
|
||||||
chain,
|
chain,
|
||||||
get_array_module,
|
|
||||||
get_ops,
|
get_ops,
|
||||||
set_dropout_rate,
|
set_dropout_rate,
|
||||||
softmax_activation,
|
softmax_activation,
|
||||||
use_ops,
|
use_ops,
|
||||||
)
|
)
|
||||||
from thinc.legacy import LegacySequenceCategoricalCrossentropy
|
from thinc.legacy import LegacySequenceCategoricalCrossentropy
|
||||||
from thinc.types import Floats2d, Ints1d
|
from thinc.types import Floats2d
|
||||||
|
|
||||||
from ..ml.parser_model cimport (
|
from ..ml.parser_model cimport (
|
||||||
ActivationsC,
|
ActivationsC,
|
||||||
|
@ -44,7 +43,6 @@ from ..ml.parser_model cimport (
|
||||||
predict_states,
|
predict_states,
|
||||||
)
|
)
|
||||||
from ..tokens.doc cimport Doc
|
from ..tokens.doc cimport Doc
|
||||||
from ._parser_internals.search cimport Beam
|
|
||||||
from ._parser_internals.stateclass cimport StateClass
|
from ._parser_internals.stateclass cimport StateClass
|
||||||
|
|
||||||
from .trainable_pipe import TrainablePipe
|
from .trainable_pipe import TrainablePipe
|
||||||
|
@ -54,11 +52,10 @@ from ._parser_internals cimport _beam_utils
|
||||||
from ._parser_internals import _beam_utils
|
from ._parser_internals import _beam_utils
|
||||||
|
|
||||||
from ..tokens.doc cimport Doc
|
from ..tokens.doc cimport Doc
|
||||||
from ..typedefs cimport weight_t
|
|
||||||
from ..vocab cimport Vocab
|
from ..vocab cimport Vocab
|
||||||
from ._parser_internals cimport _beam_utils
|
from ._parser_internals cimport _beam_utils
|
||||||
from ._parser_internals.stateclass cimport StateC, StateClass
|
from ._parser_internals.stateclass cimport StateC, StateClass
|
||||||
from ._parser_internals.transition_system cimport Transition, TransitionSystem
|
from ._parser_internals.transition_system cimport Transition
|
||||||
from .trainable_pipe cimport TrainablePipe
|
from .trainable_pipe cimport TrainablePipe
|
||||||
|
|
||||||
from .. import util
|
from .. import util
|
||||||
|
@ -434,8 +431,6 @@ cdef class Parser(TrainablePipe):
|
||||||
return batch
|
return batch
|
||||||
|
|
||||||
def beam_parse(self, docs, int beam_width, float drop=0., beam_density=0.):
|
def beam_parse(self, docs, int beam_width, float drop=0., beam_density=0.):
|
||||||
cdef Beam beam
|
|
||||||
cdef Doc doc
|
|
||||||
self._ensure_labels_are_added(docs)
|
self._ensure_labels_are_added(docs)
|
||||||
batch = _beam_utils.BeamBatch(
|
batch = _beam_utils.BeamBatch(
|
||||||
self.moves,
|
self.moves,
|
||||||
|
@ -457,14 +452,14 @@ cdef class Parser(TrainablePipe):
|
||||||
|
|
||||||
cdef void _parseC(self, CBlas cblas, StateC** states,
|
cdef void _parseC(self, CBlas cblas, StateC** states,
|
||||||
WeightsC weights, SizesC sizes) nogil:
|
WeightsC weights, SizesC sizes) nogil:
|
||||||
cdef int i, j
|
cdef int i
|
||||||
cdef vector[StateC*] unfinished
|
cdef vector[StateC*] unfinished
|
||||||
cdef ActivationsC activations = alloc_activations(sizes)
|
cdef ActivationsC activations = alloc_activations(sizes)
|
||||||
while sizes.states >= 1:
|
while sizes.states >= 1:
|
||||||
predict_states(cblas, &activations, states, &weights, sizes)
|
predict_states(cblas, &activations, states, &weights, sizes)
|
||||||
# Validate actions, argmax, take action.
|
# Validate actions, argmax, take action.
|
||||||
self.c_transition_batch(states,
|
self.c_transition_batch(states, activations.scores,
|
||||||
activations.scores, sizes.classes, sizes.states)
|
sizes.classes, sizes.states)
|
||||||
for i in range(sizes.states):
|
for i in range(sizes.states):
|
||||||
if not states[i].is_final():
|
if not states[i].is_final():
|
||||||
unfinished.push_back(states[i])
|
unfinished.push_back(states[i])
|
||||||
|
@ -552,7 +547,6 @@ cdef class Parser(TrainablePipe):
|
||||||
return losses
|
return losses
|
||||||
model, backprop_tok2vec = self.model.begin_update([eg.x for eg in examples])
|
model, backprop_tok2vec = self.model.begin_update([eg.x for eg in examples])
|
||||||
|
|
||||||
all_states = list(states)
|
|
||||||
states_golds = list(zip(states, golds))
|
states_golds = list(zip(states, golds))
|
||||||
n_moves = 0
|
n_moves = 0
|
||||||
while states_golds:
|
while states_golds:
|
||||||
|
@ -632,8 +626,8 @@ cdef class Parser(TrainablePipe):
|
||||||
del tutor
|
del tutor
|
||||||
return losses
|
return losses
|
||||||
|
|
||||||
def update_beam(self, examples, *, beam_width,
|
def update_beam(self, examples, *, beam_width, drop=0., sgd=None,
|
||||||
drop=0., sgd=None, losses=None, beam_density=0.0):
|
losses=None, beam_density=0.0):
|
||||||
states, golds, _ = self.moves.init_gold_batch(examples)
|
states, golds, _ = self.moves.init_gold_batch(examples)
|
||||||
if not states:
|
if not states:
|
||||||
return losses
|
return losses
|
||||||
|
@ -674,8 +668,8 @@ cdef class Parser(TrainablePipe):
|
||||||
for j in range(self.moves.n_moves):
|
for j in range(self.moves.n_moves):
|
||||||
if costs[j] <= 0.0 and j in unseen_classes:
|
if costs[j] <= 0.0 and j in unseen_classes:
|
||||||
unseen_classes.remove(j)
|
unseen_classes.remove(j)
|
||||||
cpu_log_loss(c_d_scores,
|
cpu_log_loss(c_d_scores, costs, is_valid, &scores[i, 0],
|
||||||
costs, is_valid, &scores[i, 0], d_scores.shape[1])
|
d_scores.shape[1])
|
||||||
c_d_scores += d_scores.shape[1]
|
c_d_scores += d_scores.shape[1]
|
||||||
# Note that we don't normalize this. See comment in update() for why.
|
# Note that we don't normalize this. See comment in update() for why.
|
||||||
if losses is not None:
|
if losses is not None:
|
||||||
|
@ -785,10 +779,7 @@ cdef class Parser(TrainablePipe):
|
||||||
long_doc[:N], and another representing long_doc[N:]. In contrast to
|
long_doc[:N], and another representing long_doc[N:]. In contrast to
|
||||||
_init_gold_batch, this version uses a teacher model to generate the
|
_init_gold_batch, this version uses a teacher model to generate the
|
||||||
cut sequences."""
|
cut sequences."""
|
||||||
cdef:
|
cdef StateClass state
|
||||||
StateClass start_state
|
|
||||||
StateClass state
|
|
||||||
Transition action
|
|
||||||
all_states = self.moves.init_batch(docs)
|
all_states = self.moves.init_batch(docs)
|
||||||
states = []
|
states = []
|
||||||
to_cut = []
|
to_cut = []
|
||||||
|
@ -810,7 +801,6 @@ cdef class Parser(TrainablePipe):
|
||||||
length += 1
|
length += 1
|
||||||
return states
|
return states
|
||||||
|
|
||||||
|
|
||||||
def _init_gold_batch(self, examples, max_length):
|
def _init_gold_batch(self, examples, max_length):
|
||||||
"""Make a square batch, of length equal to the shortest transition
|
"""Make a square batch, of length equal to the shortest transition
|
||||||
sequence or a cap. A long
|
sequence or a cap. A long
|
||||||
|
|
|
@ -1,4 +1,3 @@
|
||||||
import warnings
|
|
||||||
from collections.abc import Iterable as IterableInstance
|
from collections.abc import Iterable as IterableInstance
|
||||||
|
|
||||||
import numpy
|
import numpy
|
||||||
|
|
Loading…
Reference in New Issue
Block a user