define German dummy lemmatizer until morphology is done

This commit is contained in:
Wolfgang Seeker 2016-05-02 16:04:53 +02:00
parent 6e1f1c4b9e
commit dae6bc05eb
2 changed files with 11 additions and 2 deletions

View File

@ -111,8 +111,6 @@ def train(Language, gold_tuples, model_dir, n_iter=15, feat_set=u'basic',
gold_tuples = gold_tuples[:n_sents] gold_tuples = gold_tuples[:n_sents]
nlp = Language(data_dir=model_dir, tagger=False, parser=False, entity=False) nlp = Language(data_dir=model_dir, tagger=False, parser=False, entity=False)
if nlp.lang == 'de':
nlp.vocab.morphology.lemmatizer = lambda string,pos: set([string])
nlp.tagger = Tagger.blank(nlp.vocab, Tagger.default_templates()) nlp.tagger = Tagger.blank(nlp.vocab, Tagger.default_templates())
nlp.parser = Parser.from_dir(dep_model_dir, nlp.vocab.strings, ArcEager) nlp.parser = Parser.from_dir(dep_model_dir, nlp.vocab.strings, ArcEager)
nlp.entity = Parser.from_dir(ner_model_dir, nlp.vocab.strings, BiluoPushDown) nlp.entity = Parser.from_dir(ner_model_dir, nlp.vocab.strings, BiluoPushDown)

View File

@ -3,7 +3,18 @@ from __future__ import unicode_literals, print_function
from os import path from os import path
from ..language import Language from ..language import Language
from ..vocab import Vocab
from .. import attrs
from .. import util
from .. import about
class German(Language): class German(Language):
lang = 'de' lang = 'de'
@classmethod
def default_vocab(cls, package, get_lex_attr=None, vectors_package=None):
vocab = super(German,cls).default_vocab(package,get_lex_attr,vectors_package)
# for now until the morphology is done for German
vocab.morphology.lemmatizer = lambda string,pos: set([string])
return vocab