mirror of
https://github.com/explosion/spaCy.git
synced 2025-02-10 08:30:34 +03:00
Convert parser from cdef class
This commit is contained in:
parent
34aab9899f
commit
7b9c282469
5
setup.py
5
setup.py
|
@ -30,7 +30,6 @@ MOD_NAMES = [
|
||||||
"spacy.vocab",
|
"spacy.vocab",
|
||||||
"spacy.attrs",
|
"spacy.attrs",
|
||||||
"spacy.kb",
|
"spacy.kb",
|
||||||
"spacy.ml.parser_model",
|
|
||||||
"spacy.morphology",
|
"spacy.morphology",
|
||||||
"spacy.pipeline.dep_parser",
|
"spacy.pipeline.dep_parser",
|
||||||
"spacy.pipeline.morphologizer",
|
"spacy.pipeline.morphologizer",
|
||||||
|
@ -203,7 +202,9 @@ def setup_package():
|
||||||
ext_modules = []
|
ext_modules = []
|
||||||
for name in MOD_NAMES:
|
for name in MOD_NAMES:
|
||||||
mod_path = name.replace(".", "/") + ".pyx"
|
mod_path = name.replace(".", "/") + ".pyx"
|
||||||
ext = Extension(name, [mod_path], language="c++", extra_compile_args=["-std=c++11"])
|
ext = Extension(
|
||||||
|
name, [mod_path], language="c++", extra_compile_args=["-std=c++11"]
|
||||||
|
)
|
||||||
ext_modules.append(ext)
|
ext_modules.append(ext)
|
||||||
print("Cythonizing sources")
|
print("Cythonizing sources")
|
||||||
ext_modules = cythonize(ext_modules, compiler_directives=COMPILER_DIRECTIVES)
|
ext_modules = cythonize(ext_modules, compiler_directives=COMPILER_DIRECTIVES)
|
||||||
|
|
|
@ -3,8 +3,8 @@ from collections import defaultdict
|
||||||
from typing import Optional, Iterable
|
from typing import Optional, Iterable
|
||||||
from thinc.api import Model, Config
|
from thinc.api import Model, Config
|
||||||
|
|
||||||
from .transition_parser cimport Parser
|
from .transition_parser import Parser
|
||||||
from ._parser_internals.arc_eager cimport ArcEager
|
from ._parser_internals.arc_eager import ArcEager
|
||||||
|
|
||||||
from .functions import merge_subtokens
|
from .functions import merge_subtokens
|
||||||
from ..language import Language
|
from ..language import Language
|
||||||
|
@ -199,7 +199,7 @@ def make_beam_parser(
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
cdef class DependencyParser(Parser):
|
class DependencyParser(Parser):
|
||||||
"""Pipeline component for dependency parsing.
|
"""Pipeline component for dependency parsing.
|
||||||
|
|
||||||
DOCS: https://nightly.spacy.io/api/dependencyparser
|
DOCS: https://nightly.spacy.io/api/dependencyparser
|
||||||
|
|
|
@ -3,8 +3,8 @@ from collections import defaultdict
|
||||||
from typing import Optional, Iterable
|
from typing import Optional, Iterable
|
||||||
from thinc.api import Model, Config
|
from thinc.api import Model, Config
|
||||||
|
|
||||||
from .transition_parser cimport Parser
|
from .transition_parser import Parser
|
||||||
from ._parser_internals.ner cimport BiluoPushDown
|
from ._parser_internals.ner import BiluoPushDown
|
||||||
|
|
||||||
from ..language import Language
|
from ..language import Language
|
||||||
from ..scorer import get_ner_prf, PRFScore
|
from ..scorer import get_ner_prf, PRFScore
|
||||||
|
@ -160,7 +160,7 @@ def make_beam_ner(
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
cdef class EntityRecognizer(Parser):
|
class EntityRecognizer(Parser):
|
||||||
"""Pipeline component for named entity recognition.
|
"""Pipeline component for named entity recognition.
|
||||||
|
|
||||||
DOCS: https://nightly.spacy.io/api/entityrecognizer
|
DOCS: https://nightly.spacy.io/api/entityrecognizer
|
||||||
|
|
|
@ -1,19 +0,0 @@
|
||||||
from cymem.cymem cimport Pool
|
|
||||||
|
|
||||||
from ..vocab cimport Vocab
|
|
||||||
from .trainable_pipe cimport TrainablePipe
|
|
||||||
from ._parser_internals.transition_system cimport Transition, TransitionSystem
|
|
||||||
from ._parser_internals._state cimport StateC
|
|
||||||
from ..ml.parser_model cimport WeightsC, ActivationsC, SizesC
|
|
||||||
|
|
||||||
|
|
||||||
cdef class Parser(TrainablePipe):
|
|
||||||
cdef public object _rehearsal_model
|
|
||||||
cdef readonly TransitionSystem moves
|
|
||||||
cdef public object _multitasks
|
|
||||||
|
|
||||||
cdef void _parseC(self, StateC** states,
|
|
||||||
WeightsC weights, SizesC sizes) nogil
|
|
||||||
|
|
||||||
cdef void c_transition_batch(self, StateC** states, const float* scores,
|
|
||||||
int nr_class, int batch_size) nogil
|
|
|
@ -17,21 +17,19 @@ import numpy
|
||||||
import warnings
|
import warnings
|
||||||
|
|
||||||
from ._parser_internals.stateclass cimport StateClass
|
from ._parser_internals.stateclass cimport StateClass
|
||||||
from ..ml.parser_model cimport alloc_activations, free_activations
|
|
||||||
from ..ml.parser_model cimport predict_states, arg_max_if_valid
|
|
||||||
from ..ml.parser_model cimport WeightsC, ActivationsC, SizesC, cpu_log_loss
|
|
||||||
from ..ml.parser_model cimport get_c_weights, get_c_sizes
|
|
||||||
from ..tokens.doc cimport Doc
|
from ..tokens.doc cimport Doc
|
||||||
from .trainable_pipe import TrainablePipe
|
from .trainable_pipe import TrainablePipe
|
||||||
from ._parser_internals cimport _beam_utils
|
from ._parser_internals cimport _beam_utils
|
||||||
from ._parser_internals import _beam_utils
|
from ._parser_internals import _beam_utils
|
||||||
|
from ..vocab cimport Vocab
|
||||||
|
from ._parser_internals.transition_system cimport TransitionSystem
|
||||||
|
|
||||||
from ..training import validate_examples, validate_get_examples
|
from ..training import validate_examples, validate_get_examples
|
||||||
from ..errors import Errors, Warnings
|
from ..errors import Errors, Warnings
|
||||||
from .. import util
|
from .. import util
|
||||||
|
|
||||||
|
|
||||||
cdef class Parser(TrainablePipe):
|
class Parser(TrainablePipe):
|
||||||
"""
|
"""
|
||||||
Base class of the DependencyParser and EntityRecognizer.
|
Base class of the DependencyParser and EntityRecognizer.
|
||||||
"""
|
"""
|
||||||
|
@ -272,24 +270,23 @@ cdef class Parser(TrainablePipe):
|
||||||
return d_scores
|
return d_scores
|
||||||
|
|
||||||
def _get_costs_from_histories(self, examples, histories):
|
def _get_costs_from_histories(self, examples, histories):
|
||||||
|
cdef TransitionSystem moves = self.moves
|
||||||
cdef StateClass state
|
cdef StateClass state
|
||||||
cdef int clas
|
cdef int clas
|
||||||
cdef int nF = self.model.state2vec.nF
|
cdef int nF = self.model.state2vec.nF
|
||||||
cdef int nO = self.moves.n_moves
|
cdef int nO = moves.n_moves
|
||||||
cdef int nS = sum([len(history) for history in histories])
|
cdef int nS = sum([len(history) for history in histories])
|
||||||
cdef np.ndarray costs = numpy.zeros((nS, nO), dtype="f")
|
cdef np.ndarray costs = numpy.zeros((nS, nO), dtype="f")
|
||||||
cdef Pool mem = Pool()
|
cdef Pool mem = Pool()
|
||||||
is_valid = <int*>mem.alloc(nO, sizeof(int))
|
is_valid = <int*>mem.alloc(nO, sizeof(int))
|
||||||
c_costs = <float*>costs.data
|
c_costs = <float*>costs.data
|
||||||
states = self.moves.init_states([eg.x for eg in examples])
|
states = moves.init_states([eg.x for eg in examples])
|
||||||
cdef int i = 0
|
cdef int i = 0
|
||||||
for eg, state, history in zip(examples, states, histories):
|
for eg, state, history in zip(examples, states, histories):
|
||||||
gold = self.moves.init_gold(state, eg)
|
gold = moves.init_gold(state, eg)
|
||||||
for clas in history:
|
for clas in history:
|
||||||
# Set a row into the C data of the arrays (which we return)
|
moves.set_costs(is_valid, &c_costs[i*nO], state.c, gold)
|
||||||
state.c.set_context_tokens(&c_ids[i*nF], nF)
|
action = moves.c[clas]
|
||||||
self.moves.set_costs(is_valid, &c_costs[i*nO], state.c, gold)
|
|
||||||
action = self.moves.c[clas]
|
|
||||||
action.do(state.c, action.label)
|
action.do(state.c, action.label)
|
||||||
state.c.history.push_back(clas)
|
state.c.history.push_back(clas)
|
||||||
i += 1
|
i += 1
|
||||||
|
|
Loading…
Reference in New Issue
Block a user