Convert parser from cdef class

This commit is contained in:
Matthew Honnibal 2021-10-25 12:28:13 +02:00
parent 34aab9899f
commit 7b9c282469
5 changed files with 18 additions and 39 deletions

View File

@ -30,7 +30,6 @@ MOD_NAMES = [
"spacy.vocab",
"spacy.attrs",
"spacy.kb",
"spacy.ml.parser_model",
"spacy.morphology",
"spacy.pipeline.dep_parser",
"spacy.pipeline.morphologizer",
@ -203,7 +202,9 @@ def setup_package():
ext_modules = []
for name in MOD_NAMES:
mod_path = name.replace(".", "/") + ".pyx"
ext = Extension(name, [mod_path], language="c++", extra_compile_args=["-std=c++11"])
ext = Extension(
name, [mod_path], language="c++", extra_compile_args=["-std=c++11"]
)
ext_modules.append(ext)
print("Cythonizing sources")
ext_modules = cythonize(ext_modules, compiler_directives=COMPILER_DIRECTIVES)

View File

@ -3,8 +3,8 @@ from collections import defaultdict
from typing import Optional, Iterable
from thinc.api import Model, Config
from .transition_parser cimport Parser
from ._parser_internals.arc_eager cimport ArcEager
from .transition_parser import Parser
from ._parser_internals.arc_eager import ArcEager
from .functions import merge_subtokens
from ..language import Language
@ -199,7 +199,7 @@ def make_beam_parser(
)
cdef class DependencyParser(Parser):
class DependencyParser(Parser):
"""Pipeline component for dependency parsing.
DOCS: https://nightly.spacy.io/api/dependencyparser

View File

@ -3,8 +3,8 @@ from collections import defaultdict
from typing import Optional, Iterable
from thinc.api import Model, Config
from .transition_parser cimport Parser
from ._parser_internals.ner cimport BiluoPushDown
from .transition_parser import Parser
from ._parser_internals.ner import BiluoPushDown
from ..language import Language
from ..scorer import get_ner_prf, PRFScore
@ -160,7 +160,7 @@ def make_beam_ner(
)
cdef class EntityRecognizer(Parser):
class EntityRecognizer(Parser):
"""Pipeline component for named entity recognition.
DOCS: https://nightly.spacy.io/api/entityrecognizer

View File

@ -1,19 +0,0 @@
from cymem.cymem cimport Pool
from ..vocab cimport Vocab
from .trainable_pipe cimport TrainablePipe
from ._parser_internals.transition_system cimport Transition, TransitionSystem
from ._parser_internals._state cimport StateC
from ..ml.parser_model cimport WeightsC, ActivationsC, SizesC
cdef class Parser(TrainablePipe):
cdef public object _rehearsal_model
cdef readonly TransitionSystem moves
cdef public object _multitasks
cdef void _parseC(self, StateC** states,
WeightsC weights, SizesC sizes) nogil
cdef void c_transition_batch(self, StateC** states, const float* scores,
int nr_class, int batch_size) nogil

View File

@ -17,21 +17,19 @@ import numpy
import warnings
from ._parser_internals.stateclass cimport StateClass
from ..ml.parser_model cimport alloc_activations, free_activations
from ..ml.parser_model cimport predict_states, arg_max_if_valid
from ..ml.parser_model cimport WeightsC, ActivationsC, SizesC, cpu_log_loss
from ..ml.parser_model cimport get_c_weights, get_c_sizes
from ..tokens.doc cimport Doc
from .trainable_pipe import TrainablePipe
from ._parser_internals cimport _beam_utils
from ._parser_internals import _beam_utils
from ..vocab cimport Vocab
from ._parser_internals.transition_system cimport TransitionSystem
from ..training import validate_examples, validate_get_examples
from ..errors import Errors, Warnings
from .. import util
cdef class Parser(TrainablePipe):
class Parser(TrainablePipe):
"""
Base class of the DependencyParser and EntityRecognizer.
"""
@ -272,24 +270,23 @@ cdef class Parser(TrainablePipe):
return d_scores
def _get_costs_from_histories(self, examples, histories):
cdef TransitionSystem moves = self.moves
cdef StateClass state
cdef int clas
cdef int nF = self.model.state2vec.nF
cdef int nO = self.moves.n_moves
cdef int nO = moves.n_moves
cdef int nS = sum([len(history) for history in histories])
cdef np.ndarray costs = numpy.zeros((nS, nO), dtype="f")
cdef Pool mem = Pool()
is_valid = <int*>mem.alloc(nO, sizeof(int))
c_costs = <float*>costs.data
states = self.moves.init_states([eg.x for eg in examples])
states = moves.init_states([eg.x for eg in examples])
cdef int i = 0
for eg, state, history in zip(examples, states, histories):
gold = self.moves.init_gold(state, eg)
gold = moves.init_gold(state, eg)
for clas in history:
# Set a row into the C data of the arrays (which we return)
state.c.set_context_tokens(&c_ids[i*nF], nF)
self.moves.set_costs(is_valid, &c_costs[i*nO], state.c, gold)
action = self.moves.c[clas]
moves.set_costs(is_valid, &c_costs[i*nO], state.c, gold)
action = moves.c[clas]
action.do(state.c, action.label)
state.c.history.push_back(clas)
i += 1