diff --git a/pyproject.toml b/pyproject.toml index 7abd7a96f..4b0da39b9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,7 +5,7 @@ requires = [ "cymem>=2.0.2,<2.1.0", "preshed>=3.0.2,<3.1.0", "murmurhash>=0.28.0,<1.1.0", - "thinc>=8.1.0,<8.2.0", + "thinc>=9.0.0.dev0,<9.1.0", "numpy>=1.15.0", ] build-backend = "setuptools.build_meta" diff --git a/requirements.txt b/requirements.txt index 778c05e21..5bd04aa9c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,7 +3,7 @@ spacy-legacy>=3.0.10,<3.1.0 spacy-loggers>=1.0.0,<2.0.0 cymem>=2.0.2,<2.1.0 preshed>=3.0.2,<3.1.0 -thinc>=8.1.0,<8.2.0 +thinc>=9.0.0.dev0,<9.1.0 ml_datasets>=0.2.0,<0.3.0 murmurhash>=0.28.0,<1.1.0 wasabi>=0.9.1,<1.1.0 diff --git a/setup.cfg b/setup.cfg index 3c1bf5b0b..0870e032e 100644 --- a/setup.cfg +++ b/setup.cfg @@ -38,7 +38,7 @@ install_requires = murmurhash>=0.28.0,<1.1.0 cymem>=2.0.2,<2.1.0 preshed>=3.0.2,<3.1.0 - thinc>=8.1.0,<8.2.0 + thinc>=9.0.0.dev0,<9.1.0 wasabi>=0.9.1,<1.1.0 srsly>=2.4.3,<3.0.0 catalogue>=2.0.6,<2.1.0 diff --git a/setup.py b/setup.py index 55494344b..77a4cf283 100755 --- a/setup.py +++ b/setup.py @@ -48,6 +48,7 @@ MOD_NAMES = [ "spacy.pipeline._parser_internals.arc_eager", "spacy.pipeline._parser_internals.ner", "spacy.pipeline._parser_internals.nonproj", + "spacy.pipeline._parser_internals.search", "spacy.pipeline._parser_internals._state", "spacy.pipeline._parser_internals.stateclass", "spacy.pipeline._parser_internals.transition_system", @@ -67,6 +68,7 @@ MOD_NAMES = [ "spacy.matcher.dependencymatcher", "spacy.symbols", "spacy.vectors", + "spacy.tests.parser._search", ] COMPILE_OPTIONS = { "msvc": ["/Ox", "/EHsc"], diff --git a/spacy/ml/parser_model.pyx b/spacy/ml/parser_model.pyx index 055fa0bad..91558683b 100644 --- a/spacy/ml/parser_model.pyx +++ b/spacy/ml/parser_model.pyx @@ -3,7 +3,6 @@ cimport numpy as np from libc.math cimport exp from libc.string cimport memset, memcpy from libc.stdlib cimport calloc, free, realloc -from thinc.backends.linalg cimport Vec, VecVec from thinc.backends.cblas cimport saxpy, sgemm import numpy @@ -102,11 +101,10 @@ cdef void predict_states(CBlas cblas, ActivationsC* A, StateC** states, sum_state_features(cblas, A.unmaxed, W.feat_weights, A.token_ids, n.states, n.feats, n.hiddens * n.pieces) for i in range(n.states): - VecVec.add_i(&A.unmaxed[i*n.hiddens*n.pieces], - W.feat_bias, 1., n.hiddens * n.pieces) + saxpy(cblas)(n.hiddens * n.pieces, 1., W.feat_bias, 1, &A.unmaxed[i*n.hiddens*n.pieces], 1) for j in range(n.hiddens): index = i * n.hiddens * n.pieces + j * n.pieces - which = Vec.arg_max(&A.unmaxed[index], n.pieces) + which = _arg_max(&A.unmaxed[index], n.pieces) A.hiddens[i*n.hiddens + j] = A.unmaxed[index + which] memset(A.scores, 0, n.states * n.classes * sizeof(float)) if W.hidden_weights == NULL: @@ -119,8 +117,7 @@ cdef void predict_states(CBlas cblas, ActivationsC* A, StateC** states, 0.0, A.scores, n.classes) # Add bias for i in range(n.states): - VecVec.add_i(&A.scores[i*n.classes], - W.hidden_bias, 1., n.classes) + saxpy(cblas)(n.classes, 1., W.hidden_bias, 1, &A.scores[i*n.classes], 1) # Set unseen classes to minimum value i = 0 min_ = A.scores[0] @@ -158,7 +155,8 @@ cdef void cpu_log_loss(float* d_scores, """Do multi-label log loss""" cdef double max_, gmax, Z, gZ best = arg_max_if_gold(scores, costs, is_valid, O) - guess = Vec.arg_max(scores, O) + guess = _arg_max(scores, O) + if best == -1 or guess == -1: # These shouldn't happen, but if they do, we want to make sure we don't # cause an OOB access. @@ -488,3 +486,15 @@ cdef class precompute_hiddens: return d_best.reshape((d_best.shape + (1,))) return state_vector, backprop_relu + +cdef inline int _arg_max(const float* scores, const int n_classes) nogil: + if n_classes == 2: + return 0 if scores[0] > scores[1] else 1 + cdef int i + cdef int best = 0 + cdef float mode = scores[0] + for i in range(1, n_classes): + if scores[i] > mode: + mode = scores[i] + best = i + return best diff --git a/spacy/pipeline/_parser_internals/_beam_utils.pxd b/spacy/pipeline/_parser_internals/_beam_utils.pxd index de3573fbc..571f246b1 100644 --- a/spacy/pipeline/_parser_internals/_beam_utils.pxd +++ b/spacy/pipeline/_parser_internals/_beam_utils.pxd @@ -1,6 +1,6 @@ from ...typedefs cimport class_t, hash_t -# These are passed as callbacks to thinc.search.Beam +# These are passed as callbacks to .search.Beam cdef int transition_state(void* _dest, void* _src, class_t clas, void* _moves) except -1 cdef int check_final_state(void* _state, void* extra_args) except -1 diff --git a/spacy/pipeline/_parser_internals/_beam_utils.pyx b/spacy/pipeline/_parser_internals/_beam_utils.pyx index fa7df2056..610c8ddee 100644 --- a/spacy/pipeline/_parser_internals/_beam_utils.pyx +++ b/spacy/pipeline/_parser_internals/_beam_utils.pyx @@ -3,17 +3,16 @@ cimport numpy as np import numpy from cpython.ref cimport PyObject, Py_XDECREF -from thinc.extra.search cimport Beam -from thinc.extra.search import MaxViolation -from thinc.extra.search cimport MaxViolation from ...typedefs cimport hash_t, class_t from .transition_system cimport TransitionSystem, Transition from ...errors import Errors +from .search cimport Beam, MaxViolation +from .search import MaxViolation from .stateclass cimport StateC, StateClass -# These are passed as callbacks to thinc.search.Beam +# These are passed as callbacks to .search.Beam cdef int transition_state(void* _dest, void* _src, class_t clas, void* _moves) except -1: dest = _dest src = _src diff --git a/spacy/pipeline/_parser_internals/arc_eager.pyx b/spacy/pipeline/_parser_internals/arc_eager.pyx index 257b5ef8a..a79aef64a 100644 --- a/spacy/pipeline/_parser_internals/arc_eager.pyx +++ b/spacy/pipeline/_parser_internals/arc_eager.pyx @@ -15,7 +15,7 @@ from ...training.example cimport Example from .stateclass cimport StateClass from ._state cimport StateC, ArcC from ...errors import Errors -from thinc.extra.search cimport Beam +from .search cimport Beam cdef weight_t MIN_SCORE = -90000 cdef attr_t SUBTOK_LABEL = hash_string('subtok') diff --git a/spacy/pipeline/_parser_internals/ner.pyx b/spacy/pipeline/_parser_internals/ner.pyx index cc196d85a..53ed03523 100644 --- a/spacy/pipeline/_parser_internals/ner.pyx +++ b/spacy/pipeline/_parser_internals/ner.pyx @@ -6,7 +6,6 @@ from libcpp.vector cimport vector from cymem.cymem cimport Pool from collections import Counter -from thinc.extra.search cimport Beam from ...tokens.doc cimport Doc from ...tokens.span import Span @@ -17,6 +16,7 @@ from ...attrs cimport IS_SPACE from ...structs cimport TokenC, SpanC from ...training import split_bilu_label from ...training.example cimport Example +from .search cimport Beam from .stateclass cimport StateClass from ._state cimport StateC from .transition_system cimport Transition, do_func_t diff --git a/spacy/pipeline/_parser_internals/search.pxd b/spacy/pipeline/_parser_internals/search.pxd new file mode 100644 index 000000000..dfe30e1c1 --- /dev/null +++ b/spacy/pipeline/_parser_internals/search.pxd @@ -0,0 +1,89 @@ +from cymem.cymem cimport Pool + +from libc.stdint cimport uint32_t +from libc.stdint cimport uint64_t +from libcpp.pair cimport pair +from libcpp.queue cimport priority_queue +from libcpp.vector cimport vector + +from ...typedefs cimport class_t, weight_t, hash_t + +ctypedef pair[weight_t, size_t] Entry +ctypedef priority_queue[Entry] Queue + + +ctypedef int (*trans_func_t)(void* dest, void* src, class_t clas, void* x) except -1 + +ctypedef void* (*init_func_t)(Pool mem, int n, void* extra_args) except NULL + +ctypedef int (*del_func_t)(Pool mem, void* state, void* extra_args) except -1 + +ctypedef int (*finish_func_t)(void* state, void* extra_args) except -1 + +ctypedef hash_t (*hash_func_t)(void* state, void* x) except 0 + + +cdef struct _State: + void* content + class_t* hist + weight_t score + weight_t loss + int i + int t + bint is_done + + +cdef class Beam: + cdef Pool mem + cdef class_t nr_class + cdef class_t width + cdef class_t size + cdef public weight_t min_density + cdef int t + cdef readonly bint is_done + cdef list histories + cdef list _parent_histories + cdef weight_t** scores + cdef int** is_valid + cdef weight_t** costs + cdef _State* _parents + cdef _State* _states + cdef del_func_t del_func + + cdef int _fill(self, Queue* q, weight_t** scores, int** is_valid) except -1 + + cdef inline void* at(self, int i) nogil: + return self._states[i].content + + cdef int initialize(self, init_func_t init_func, del_func_t del_func, int n, void* extra_args) except -1 + cdef int advance(self, trans_func_t transition_func, hash_func_t hash_func, + void* extra_args) except -1 + cdef int check_done(self, finish_func_t finish_func, void* extra_args) except -1 + + + cdef inline void set_cell(self, int i, int j, weight_t score, int is_valid, weight_t cost) nogil: + self.scores[i][j] = score + self.is_valid[i][j] = is_valid + self.costs[i][j] = cost + + cdef int set_row(self, int i, const weight_t* scores, const int* is_valid, + const weight_t* costs) except -1 + cdef int set_table(self, weight_t** scores, int** is_valid, weight_t** costs) except -1 + + +cdef class MaxViolation: + cdef Pool mem + cdef weight_t cost + cdef weight_t delta + cdef readonly weight_t p_score + cdef readonly weight_t g_score + cdef readonly double Z + cdef readonly double gZ + cdef class_t n + cdef readonly list p_hist + cdef readonly list g_hist + cdef readonly list p_probs + cdef readonly list g_probs + + cpdef int check(self, Beam pred, Beam gold) except -1 + cpdef int check_crf(self, Beam pred, Beam gold) except -1 diff --git a/spacy/pipeline/_parser_internals/search.pyx b/spacy/pipeline/_parser_internals/search.pyx new file mode 100644 index 000000000..1d9b6dd7a --- /dev/null +++ b/spacy/pipeline/_parser_internals/search.pyx @@ -0,0 +1,306 @@ +# cython: profile=True, experimental_cpp_class_def=True, cdivision=True, infer_types=True +cimport cython +from libc.string cimport memset, memcpy +from libc.math cimport log, exp +import math + +from cymem.cymem cimport Pool +from preshed.maps cimport PreshMap + + +cdef class Beam: + def __init__(self, class_t nr_class, class_t width, weight_t min_density=0.0): + assert nr_class != 0 + assert width != 0 + self.nr_class = nr_class + self.width = width + self.min_density = min_density + self.size = 1 + self.t = 0 + self.mem = Pool() + self.del_func = NULL + self._parents = <_State*>self.mem.alloc(self.width, sizeof(_State)) + self._states = <_State*>self.mem.alloc(self.width, sizeof(_State)) + cdef int i + self.histories = [[] for i in range(self.width)] + self._parent_histories = [[] for i in range(self.width)] + + self.scores = self.mem.alloc(self.width, sizeof(weight_t*)) + self.is_valid = self.mem.alloc(self.width, sizeof(weight_t*)) + self.costs = self.mem.alloc(self.width, sizeof(weight_t*)) + for i in range(self.width): + self.scores[i] = self.mem.alloc(self.nr_class, sizeof(weight_t)) + self.is_valid[i] = self.mem.alloc(self.nr_class, sizeof(int)) + self.costs[i] = self.mem.alloc(self.nr_class, sizeof(weight_t)) + + def __len__(self): + return self.size + + property score: + def __get__(self): + return self._states[0].score + + property min_score: + def __get__(self): + return self._states[self.size-1].score + + property loss: + def __get__(self): + return self._states[0].loss + + property probs: + def __get__(self): + return _softmax([self._states[i].score for i in range(self.size)]) + + property scores: + def __get__(self): + return [self._states[i].score for i in range(self.size)] + + property histories: + def __get__(self): + return self.histories + + cdef int set_row(self, int i, const weight_t* scores, const int* is_valid, + const weight_t* costs) except -1: + cdef int j + for j in range(self.nr_class): + self.scores[i][j] = scores[j] + self.is_valid[i][j] = is_valid[j] + self.costs[i][j] = costs[j] + + cdef int set_table(self, weight_t** scores, int** is_valid, weight_t** costs) except -1: + cdef int i, j + for i in range(self.width): + memcpy(self.scores[i], scores[i], sizeof(weight_t) * self.nr_class) + memcpy(self.is_valid[i], is_valid[i], sizeof(bint) * self.nr_class) + memcpy(self.costs[i], costs[i], sizeof(int) * self.nr_class) + + cdef int initialize(self, init_func_t init_func, del_func_t del_func, int n, void* extra_args) except -1: + for i in range(self.width): + self._states[i].content = init_func(self.mem, n, extra_args) + self._parents[i].content = init_func(self.mem, n, extra_args) + self.del_func = del_func + + def __dealloc__(self): + if self.del_func == NULL: + return + + for i in range(self.width): + self.del_func(self.mem, self._states[i].content, NULL) + self.del_func(self.mem, self._parents[i].content, NULL) + + @cython.cdivision(True) + cdef int advance(self, trans_func_t transition_func, hash_func_t hash_func, + void* extra_args) except -1: + cdef weight_t** scores = self.scores + cdef int** is_valid = self.is_valid + cdef weight_t** costs = self.costs + + cdef Queue* q = new Queue() + self._fill(q, scores, is_valid) + # For a beam of width k, we only ever need 2k state objects. How? + # Each transition takes a parent and a class and produces a new state. + # So, we don't need the whole history --- just the parent. So at + # each step, we take a parent, and apply one or more extensions to + # it. + self._parents, self._states = self._states, self._parents + self._parent_histories, self.histories = self.histories, self._parent_histories + cdef weight_t score + cdef int p_i + cdef int i = 0 + cdef class_t clas + cdef _State* parent + cdef _State* state + cdef hash_t key + cdef PreshMap seen_states = PreshMap(self.width) + cdef uint64_t is_seen + cdef uint64_t one = 1 + while i < self.width and not q.empty(): + data = q.top() + p_i = data.second / self.nr_class + clas = data.second % self.nr_class + score = data.first + q.pop() + parent = &self._parents[p_i] + # Indicates terminal state reached; i.e. state is done + if parent.is_done: + # Now parent will not be changed, so we don't have to copy. + # Once finished, should also be unbranching. + self._states[i], parent[0] = parent[0], self._states[i] + parent.i = self._states[i].i + parent.t = self._states[i].t + parent.is_done = self._states[i].t + self._states[i].score = score + self.histories[i] = list(self._parent_histories[p_i]) + i += 1 + else: + state = &self._states[i] + # The supplied transition function should adjust the destination + # state to be the result of applying the class to the source state + transition_func(state.content, parent.content, clas, extra_args) + key = hash_func(state.content, extra_args) if hash_func is not NULL else 0 + is_seen = seen_states.get(key) + if key == 0 or key == 1 or not is_seen: + if key != 0 and key != 1: + seen_states.set(key, one) + state.score = score + state.loss = parent.loss + costs[p_i][clas] + self.histories[i] = list(self._parent_histories[p_i]) + self.histories[i].append(clas) + i += 1 + del q + self.size = i + assert self.size >= 1 + for i in range(self.width): + memset(self.scores[i], 0, sizeof(weight_t) * self.nr_class) + memset(self.costs[i], 0, sizeof(weight_t) * self.nr_class) + memset(self.is_valid[i], 0, sizeof(int) * self.nr_class) + self.t += 1 + + cdef int check_done(self, finish_func_t finish_func, void* extra_args) except -1: + cdef int i + for i in range(self.size): + if not self._states[i].is_done: + self._states[i].is_done = finish_func(self._states[i].content, extra_args) + for i in range(self.size): + if not self._states[i].is_done: + self.is_done = False + break + else: + self.is_done = True + + @cython.cdivision(True) + cdef int _fill(self, Queue* q, weight_t** scores, int** is_valid) except -1: + """Populate the queue from a k * n matrix of scores, where k is the + beam-width, and n is the number of classes. + """ + cdef Entry entry + cdef weight_t score + cdef _State* s + cdef int i, j, move_id + assert self.size >= 1 + cdef vector[Entry] entries + for i in range(self.size): + s = &self._states[i] + move_id = i * self.nr_class + if s.is_done: + # Update score by path average, following TACL '13 paper. + if self.histories[i]: + entry.first = s.score + (s.score / self.t) + else: + entry.first = s.score + entry.second = move_id + entries.push_back(entry) + else: + for j in range(self.nr_class): + if is_valid[i][j]: + entry.first = s.score + scores[i][j] + entry.second = move_id + j + entries.push_back(entry) + cdef double max_, Z, cutoff + if self.min_density == 0.0: + for i in range(entries.size()): + q.push(entries[i]) + elif not entries.empty(): + max_ = entries[0].first + Z = 0. + cutoff = 0. + # Softmax into probabilities, so we can prune + for i in range(entries.size()): + if entries[i].first > max_: + max_ = entries[i].first + for i in range(entries.size()): + Z += exp(entries[i].first-max_) + cutoff = (1. / Z) * self.min_density + for i in range(entries.size()): + prob = exp(entries[i].first-max_) / Z + if prob >= cutoff: + q.push(entries[i]) + + +cdef class MaxViolation: + def __init__(self): + self.p_score = 0.0 + self.g_score = 0.0 + self.Z = 0.0 + self.gZ = 0.0 + self.delta = -1 + self.cost = 0 + self.p_hist = [] + self.g_hist = [] + self.p_probs = [] + self.g_probs = [] + + cpdef int check(self, Beam pred, Beam gold) except -1: + cdef _State* p = &pred._states[0] + cdef _State* g = &gold._states[0] + cdef weight_t d = p.score - g.score + if p.loss >= 1 and (self.cost == 0 or d > self.delta): + self.cost = p.loss + self.delta = d + self.p_hist = list(pred.histories[0]) + self.g_hist = list(gold.histories[0]) + self.p_score = p.score + self.g_score = g.score + self.Z = 1e-10 + self.gZ = 1e-10 + for i in range(pred.size): + if pred._states[i].loss > 0: + self.Z += exp(pred._states[i].score) + for i in range(gold.size): + if gold._states[i].loss == 0: + prob = exp(gold._states[i].score) + self.Z += prob + self.gZ += prob + + cpdef int check_crf(self, Beam pred, Beam gold) except -1: + d = pred.score - gold.score + seen_golds = set([tuple(gold.histories[i]) for i in range(gold.size)]) + if pred.loss > 0 and (self.cost == 0 or d > self.delta): + p_hist = [] + p_scores = [] + g_hist = [] + g_scores = [] + for i in range(pred.size): + if pred._states[i].loss > 0: + p_scores.append(pred._states[i].score) + p_hist.append(list(pred.histories[i])) + # This can happen from non-monotonic actions + # If we find a better gold analysis this way, be sure to keep it. + elif pred._states[i].loss <= 0 \ + and tuple(pred.histories[i]) not in seen_golds: + g_scores.append(pred._states[i].score) + g_hist.append(list(pred.histories[i])) + for i in range(gold.size): + if gold._states[i].loss == 0: + g_scores.append(gold._states[i].score) + g_hist.append(list(gold.histories[i])) + + all_probs = _softmax(p_scores + g_scores) + p_probs = all_probs[:len(p_scores)] + g_probs_all = all_probs[len(p_scores):] + g_probs = _softmax(g_scores) + + self.cost = pred.loss + self.delta = d + self.p_hist = p_hist + self.g_hist = g_hist + # TODO: These variables are misnamed! These are the gradients of the loss. + self.p_probs = p_probs + # Intuition here: + # The gradient of the loss is: + # P(model) - P(truth) + # Normally, P(truth) is 1 for the gold + # But, if we want to do the "partial credit" scheme, we want + # to create a distribution over the gold, proportional to the scores + # awarded. + self.g_probs = [x-y for x, y in zip(g_probs_all, g_probs)] + + +def _softmax(nums): + if not nums: + return [] + max_ = max(nums) + nums = [(exp(n-max_) if n is not None else None) for n in nums] + Z = sum(n for n in nums if n is not None) + return [(n/Z if n is not None else None) for n in nums] diff --git a/spacy/pipeline/edit_tree_lemmatizer.py b/spacy/pipeline/edit_tree_lemmatizer.py index 9676e2194..0531d4ba5 100644 --- a/spacy/pipeline/edit_tree_lemmatizer.py +++ b/spacy/pipeline/edit_tree_lemmatizer.py @@ -5,8 +5,9 @@ from itertools import islice import numpy as np import srsly -from thinc.api import Config, Model, SequenceCategoricalCrossentropy +from thinc.api import Config, Model from thinc.types import ArrayXd, Floats2d, Ints1d +from thinc.legacy import LegacySequenceCategoricalCrossentropy from ._edit_tree_internals.edit_trees import EditTrees from ._edit_tree_internals.schemas import validate_edit_tree @@ -129,7 +130,9 @@ class EditTreeLemmatizer(TrainablePipe): self, examples: Iterable[Example], scores: List[Floats2d] ) -> Tuple[float, List[Floats2d]]: validate_examples(examples, "EditTreeLemmatizer.get_loss") - loss_func = SequenceCategoricalCrossentropy(normalize=False, missing_value=-1) + loss_func = LegacySequenceCategoricalCrossentropy( + normalize=False, missing_value=-1 + ) truths = [] for eg in examples: diff --git a/spacy/pipeline/morphologizer.pyx b/spacy/pipeline/morphologizer.pyx index 782a1dabe..293add9e1 100644 --- a/spacy/pipeline/morphologizer.pyx +++ b/spacy/pipeline/morphologizer.pyx @@ -1,7 +1,8 @@ # cython: infer_types=True, profile=True, binding=True from typing import Callable, Dict, Iterable, List, Optional, Union import srsly -from thinc.api import SequenceCategoricalCrossentropy, Model, Config +from thinc.api import Model, Config +from thinc.legacy import LegacySequenceCategoricalCrossentropy from thinc.types import Floats2d, Ints1d from itertools import islice @@ -290,7 +291,7 @@ class Morphologizer(Tagger): DOCS: https://spacy.io/api/morphologizer#get_loss """ validate_examples(examples, "Morphologizer.get_loss") - loss_func = SequenceCategoricalCrossentropy(names=tuple(self.labels), normalize=False) + loss_func = LegacySequenceCategoricalCrossentropy(names=tuple(self.labels), normalize=False) truths = [] for eg in examples: eg_truths = [] diff --git a/spacy/pipeline/senter.pyx b/spacy/pipeline/senter.pyx index 93a7ee796..42feeb277 100644 --- a/spacy/pipeline/senter.pyx +++ b/spacy/pipeline/senter.pyx @@ -3,7 +3,9 @@ from typing import Dict, Iterable, Optional, Callable, List, Union from itertools import islice import srsly -from thinc.api import Model, SequenceCategoricalCrossentropy, Config +from thinc.api import Model, Config +from thinc.legacy import LegacySequenceCategoricalCrossentropy + from thinc.types import Floats2d, Ints1d from ..tokens.doc cimport Doc @@ -161,7 +163,7 @@ class SentenceRecognizer(Tagger): """ validate_examples(examples, "SentenceRecognizer.get_loss") labels = self.labels - loss_func = SequenceCategoricalCrossentropy(names=labels, normalize=False) + loss_func = LegacySequenceCategoricalCrossentropy(names=labels, normalize=False) truths = [] for eg in examples: eg_truth = [] diff --git a/spacy/pipeline/tagger.pyx b/spacy/pipeline/tagger.pyx index 3b4715ce5..e12f116af 100644 --- a/spacy/pipeline/tagger.pyx +++ b/spacy/pipeline/tagger.pyx @@ -2,7 +2,8 @@ from typing import Callable, Dict, Iterable, List, Optional, Union import numpy import srsly -from thinc.api import Model, set_dropout_rate, SequenceCategoricalCrossentropy, Config +from thinc.api import Model, set_dropout_rate, Config +from thinc.legacy import LegacySequenceCategoricalCrossentropy from thinc.types import Floats2d, Ints1d import warnings from itertools import islice @@ -244,7 +245,7 @@ class Tagger(TrainablePipe): DOCS: https://spacy.io/api/tagger#rehearse """ - loss_func = SequenceCategoricalCrossentropy() + loss_func = LegacySequenceCategoricalCrossentropy() if losses is None: losses = {} losses.setdefault(self.name, 0.0) @@ -275,7 +276,7 @@ class Tagger(TrainablePipe): DOCS: https://spacy.io/api/tagger#get_loss """ validate_examples(examples, "Tagger.get_loss") - loss_func = SequenceCategoricalCrossentropy(names=self.labels, normalize=False, neg_prefix=self.cfg["neg_prefix"]) + loss_func = LegacySequenceCategoricalCrossentropy(names=self.labels, normalize=False, neg_prefix=self.cfg["neg_prefix"]) # Convert empty tag "" to missing value None so that both misaligned # tokens and tokens with missing annotation have the default missing # value None. diff --git a/spacy/pipeline/transition_parser.pyx b/spacy/pipeline/transition_parser.pyx index 340334b1a..9d7b258c6 100644 --- a/spacy/pipeline/transition_parser.pyx +++ b/spacy/pipeline/transition_parser.pyx @@ -10,12 +10,12 @@ import random import srsly from thinc.api import get_ops, set_dropout_rate, CupyOps, NumpyOps -from thinc.extra.search cimport Beam import numpy.random import numpy import warnings from ._parser_internals.stateclass cimport StateClass +from ._parser_internals.search cimport Beam from ..ml.parser_model cimport alloc_activations, free_activations from ..ml.parser_model cimport predict_states, arg_max_if_valid from ..ml.parser_model cimport WeightsC, ActivationsC, SizesC, cpu_log_loss diff --git a/spacy/tests/conftest.py b/spacy/tests/conftest.py index 2be286a57..b9c4ef715 100644 --- a/spacy/tests/conftest.py +++ b/spacy/tests/conftest.py @@ -1,6 +1,10 @@ import pytest from spacy.util import get_lang_class +import functools from hypothesis import settings +import inspect +import importlib +import sys # Functionally disable deadline settings for tests # to prevent spurious test failures in CI builds. @@ -47,6 +51,33 @@ def pytest_runtest_setup(item): pytest.skip("not referencing any issues") +# Decorator for Cython-built tests +# https://shwina.github.io/cython-testing/ +def cytest(func): + """ + Wraps `func` in a plain Python function. + """ + + @functools.wraps(func) + def wrapped(*args, **kwargs): + bound = inspect.signature(func).bind(*args, **kwargs) + return func(*bound.args, **bound.kwargs) + + return wrapped + + +def register_cython_tests(cython_mod_name: str, test_mod_name: str): + """ + Registers all callables with name `test_*` in Cython module `cython_mod_name` + as attributes in module `test_mod_name`, making them discoverable by pytest. + """ + cython_mod = importlib.import_module(cython_mod_name) + for name in dir(cython_mod): + item = getattr(cython_mod, name) + if callable(item) and name.startswith("test_"): + setattr(sys.modules[test_mod_name], name, item) + + # Fixtures for language tokenizers (languages sorted alphabetically) diff --git a/spacy/tests/parser/_search.pyx b/spacy/tests/parser/_search.pyx new file mode 100644 index 000000000..23fc81644 --- /dev/null +++ b/spacy/tests/parser/_search.pyx @@ -0,0 +1,119 @@ +# cython: infer_types=True, binding=True +from spacy.pipeline._parser_internals.search cimport Beam, MaxViolation +from spacy.typedefs cimport class_t, weight_t +from cymem.cymem cimport Pool + +from ..conftest import cytest +import pytest + +cdef struct TestState: + int length + int x + Py_UNICODE* string + + +cdef int transition(void* dest, void* src, class_t clas, void* extra_args) except -1: + dest_state = dest + src_state = src + dest_state.length = src_state.length + dest_state.x = src_state.x + dest_state.x += clas + if extra_args != NULL: + dest_state.string = extra_args + else: + dest_state.string = src_state.string + + +cdef void* initialize(Pool mem, int n, void* extra_args) except NULL: + state = mem.alloc(1, sizeof(TestState)) + state.length = n + state.x = 1 + if extra_args == NULL: + state.string = u'default' + else: + state.string = extra_args + return state + + +cdef int destroy(Pool mem, void* state, void* extra_args) except -1: + state = state + mem.free(state) + +@cytest +@pytest.mark.parametrize("nr_class,beam_width", + [ + (2, 3), + (3, 6), + (4, 20), + ] +) +def test_init(nr_class, beam_width): + b = Beam(nr_class, beam_width) + assert b.size == 1 + assert b.width == beam_width + assert b.nr_class == nr_class + +@cytest +def test_init_violn(): + MaxViolation() + +@cytest +@pytest.mark.parametrize("nr_class,beam_width,length", + [ + (2, 3, 3), + (3, 6, 15), + (4, 20, 32), + ] +) +def test_initialize(nr_class, beam_width, length): + b = Beam(nr_class, beam_width) + b.initialize(initialize, destroy, length, NULL) + for i in range(b.width): + s = b.at(i) + assert s.length == length, s.length + assert s.string == 'default' + + +@cytest +@pytest.mark.parametrize("nr_class,beam_width,length,extra", + [ + (2, 3, 4, None), + (3, 6, 15, u"test beam 1"), + ] +) +def test_initialize_extra(nr_class, beam_width, length, extra): + b = Beam(nr_class, beam_width) + if extra is None: + b.initialize(initialize, destroy, length, NULL) + else: + b.initialize(initialize, destroy, length, extra) + for i in range(b.width): + s = b.at(i) + assert s.length == length + + +@cytest +@pytest.mark.parametrize("nr_class,beam_width,length", + [ + (3, 6, 15), + (4, 20, 32), + ] +) +def test_transition(nr_class, beam_width, length): + b = Beam(nr_class, beam_width) + b.initialize(initialize, destroy, length, NULL) + b.set_cell(0, 2, 30, True, 0) + b.set_cell(0, 1, 42, False, 0) + b.advance(transition, NULL, NULL) + assert b.size == 1, b.size + assert b.score == 30, b.score + s = b.at(0) + assert s.x == 3 + assert b._states[0].score == 30, b._states[0].score + b.set_cell(0, 1, 10, True, 0) + b.set_cell(0, 2, 20, True, 0) + b.advance(transition, NULL, NULL) + assert b._states[0].score == 50, b._states[0].score + assert b._states[1].score == 40 + s = b.at(0) + assert s.x == 5 diff --git a/spacy/tests/parser/test_search.py b/spacy/tests/parser/test_search.py new file mode 100644 index 000000000..136c3a11b --- /dev/null +++ b/spacy/tests/parser/test_search.py @@ -0,0 +1,3 @@ +from ..conftest import register_cython_tests + +register_cython_tests("spacy.tests.parser._search", __name__)