mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-12 18:26:30 +03:00
Fix issue #514 -- serializer fails when new entity type has been added. The fix here is quite ugly. It's best to add the entities ASAP after loading the NLP pipeline, to mitigate the brittleness.
This commit is contained in:
parent
79aa03fe98
commit
3e688e6d4b
|
@ -7,6 +7,7 @@ from .tagger import Tagger
|
|||
# TODO: The disorganization here is pretty embarrassing. At least it's only
|
||||
# internals.
|
||||
from .syntax.parser import get_templates as get_feature_templates
|
||||
from .attrs import DEP, ENT_TYPE
|
||||
|
||||
|
||||
cdef class EntityRecognizer(Parser):
|
||||
|
@ -14,11 +15,33 @@ cdef class EntityRecognizer(Parser):
|
|||
|
||||
feature_templates = get_feature_templates('ner')
|
||||
|
||||
def add_label(self, label):
|
||||
for action in self.moves.action_types:
|
||||
self.moves.add_action(action, label)
|
||||
if isinstance(label, basestring):
|
||||
label = self.vocab.strings[label]
|
||||
for attr, freqs in self.vocab.serializer_freqs:
|
||||
if attr == ENT_TYPE and label not in freqs:
|
||||
freqs.append([label, 1])
|
||||
# Super hacky :(
|
||||
self.vocab._serializer = None
|
||||
|
||||
|
||||
cdef class DependencyParser(Parser):
|
||||
TransitionSystem = ArcEager
|
||||
|
||||
feature_templates = get_feature_templates('basic')
|
||||
|
||||
|
||||
def add_label(self, label):
|
||||
for action in self.moves.action_types:
|
||||
self.moves.add_action(action, label)
|
||||
if isinstance(label, basestring):
|
||||
label = self.vocab.strings[label]
|
||||
for attr, freqs in self.vocab.serializer_freqs:
|
||||
if attr == DEP and label not in freqs:
|
||||
freqs.append([label, 1])
|
||||
# Super hacky :(
|
||||
self.vocab._serializer = None
|
||||
|
||||
|
||||
__all__ = [Tagger, DependencyParser, EntityRecognizer]
|
||||
|
|
|
@ -92,6 +92,7 @@ cdef class Parser:
|
|||
def __init__(self, Vocab vocab, TransitionSystem=None, ParserModel model=None, **cfg):
|
||||
if TransitionSystem is None:
|
||||
TransitionSystem = self.TransitionSystem
|
||||
self.vocab = vocab
|
||||
actions = TransitionSystem.get_actions(**cfg)
|
||||
self.moves = TransitionSystem(vocab.strings, actions)
|
||||
# TODO: Remove this when we no longer need to support old-style models
|
||||
|
@ -226,8 +227,10 @@ cdef class Parser:
|
|||
stepwise.transition(transition)
|
||||
|
||||
def add_label(self, label):
|
||||
# Doesn't set label into serializer -- subclasses override it to do that.
|
||||
for action in self.moves.action_types:
|
||||
self.moves.add_action(action, label)
|
||||
|
||||
|
||||
|
||||
cdef class StepwiseState:
|
||||
|
|
|
@ -113,9 +113,9 @@ cdef class Vocab:
|
|||
self._serializer = None
|
||||
|
||||
property serializer:
|
||||
# Having the serializer live here is super messy :(
|
||||
def __get__(self):
|
||||
if self._serializer is None:
|
||||
freqs = []
|
||||
self._serializer = Packer(self, self.serializer_freqs)
|
||||
return self._serializer
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user