From 7769bc31e383f3571f83152a191a7174692edd78 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Wed, 15 Mar 2017 09:27:41 -0500 Subject: [PATCH] Add beam-search classes --- spacy/pipeline.pyx | 37 ++++++++++++++++++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) diff --git a/spacy/pipeline.pyx b/spacy/pipeline.pyx index 45e443519..59e1994a9 100644 --- a/spacy/pipeline.pyx +++ b/spacy/pipeline.pyx @@ -29,6 +29,24 @@ cdef class EntityRecognizer(Parser): self.vocab._serializer = None +cdef class BeamEntityRecognizer(BeamParser): + """Annotate named entities on Doc objects.""" + TransitionSystem = BiluoPushDown + + feature_templates = get_feature_templates('ner') + + def add_label(self, label): + for action in self.moves.action_types: + self.moves.add_action(action, label) + if isinstance(label, basestring): + label = self.vocab.strings[label] + for attr, freqs in self.vocab.serializer_freqs: + if attr == ENT_TYPE and label not in freqs: + freqs.append([label, 1]) + # Super hacky :( + self.vocab._serializer = None + + cdef class DependencyParser(Parser): TransitionSystem = ArcEager @@ -46,4 +64,21 @@ cdef class DependencyParser(Parser): self.vocab._serializer = None -__all__ = [Tagger, DependencyParser, EntityRecognizer] +cdef class BeamDependencyParser(BeamParser): + TransitionSystem = ArcEager + + feature_templates = get_feature_templates('basic') + + def add_label(self, label): + for action in self.moves.action_types: + self.moves.add_action(action, label) + if isinstance(label, basestring): + label = self.vocab.strings[label] + for attr, freqs in self.vocab.serializer_freqs: + if attr == DEP and label not in freqs: + freqs.append([label, 1]) + # Super hacky :( + self.vocab._serializer = None + + +__all__ = [Tagger, DependencyParser, EntityRecognizer, BeamDependencyParser, BeamEntityRecognizer]