Add .blank() method to Parser. Start housing default dep labels and entity types within the Defaults class.

This commit is contained in:
Matthew Honnibal 2016-09-26 11:57:54 +02:00
parent 722199acb8
commit 2debc4e0a2
3 changed files with 18 additions and 5 deletions

View File

@ -29,6 +29,7 @@ from . import util
from .lemmatizer import Lemmatizer from .lemmatizer import Lemmatizer
from .attrs import TAG, DEP, ENT_IOB, ENT_TYPE, HEAD, PROB, LANG, IS_STOP from .attrs import TAG, DEP, ENT_IOB, ENT_TYPE, HEAD, PROB, LANG, IS_STOP
from .syntax.parser import get_templates
class BaseDefaults(object): class BaseDefaults(object):
@ -98,14 +99,14 @@ class BaseDefaults(object):
return Parser.load(self.path / 'deps', vocab, ArcEager) return Parser.load(self.path / 'deps', vocab, ArcEager)
else: else:
return Parser.blank(vocab, ArcEager, return Parser.blank(vocab, ArcEager,
Parser.default_templates('%s-parser' % self.lang)) features=self.parser_features, labels=self.parser_labels)
def Entity(self, vocab): def Entity(self, vocab):
if self.path and (self.path / 'ner').exists(): if self.path and (self.path / 'ner').exists():
return Parser.load(self.path / 'ner', vocab, BiluoPushDown) return Parser.load(self.path / 'ner', vocab, BiluoPushDown)
else: else:
return Parser.blank(vocab, BiluoPushdown, return Parser.blank(vocab, BiluoPushdown,
Parser.default_templates('%s-entity' % self.lang)) features=self.entity_features, labels=self.entity_labels)
def Matcher(self, vocab): def Matcher(self, vocab):
if self.path: if self.path:
@ -120,9 +121,13 @@ class BaseDefaults(object):
nlp.parser, nlp.parser,
nlp.entity] nlp.entity]
dep_labels = {0: {'ROOT': True}} parser_labels = {0: {'ROOT': True}}
ner_labels = {0: {'PER': True, 'LOC': True, 'ORG': True, 'MISC': True}} entity_labels = {0: {'PER': True, 'LOC': True, 'ORG': True, 'MISC': True}}
parser_features = get_templates('parser')
entity_features = get_templates('ner')
stop_words = set() stop_words = set()

View File

@ -89,6 +89,14 @@ cdef class Parser:
model.load(str(path / 'model')) model.load(str(path / 'model'))
return cls(vocab, moves, model, **cfg) return cls(vocab, moves, model, **cfg)
@classmethod
def blank(cls, Vocab vocab, moves_class, **cfg):
moves = moves_class(vocab.strings, cfg.get('labels', {}))
templates = get_templates(cfg.get('features', tuple()))
model = ParserModel(templates)
return cls(vocab, moves, model, **cfg)
def __init__(self, Vocab vocab, transition_system, ParserModel model, **cfg): def __init__(self, Vocab vocab, transition_system, ParserModel model, **cfg):
self.moves = transition_system self.moves = transition_system
self.model = model self.model = model

View File

@ -3,6 +3,7 @@ import os
import spacy import spacy
@pytest.fixture(scope="session") @pytest.fixture(scope="session")
def EN(): def EN():
return spacy.load("en") return spacy.load("en")
@ -21,7 +22,6 @@ def pytest_addoption(parser):
help="include slow tests") help="include slow tests")
def pytest_runtest_setup(item): def pytest_runtest_setup(item):
for opt in ['models', 'vectors', 'slow']: for opt in ['models', 'vectors', 'slow']:
if opt in item.keywords and not item.config.getoption("--%s" % opt): if opt in item.keywords and not item.config.getoption("--%s" % opt):