mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-25 17:36:30 +03:00
define German dummy lemmatizer until morphology is done
This commit is contained in:
parent
6e1f1c4b9e
commit
dae6bc05eb
|
@ -111,8 +111,6 @@ def train(Language, gold_tuples, model_dir, n_iter=15, feat_set=u'basic',
|
||||||
gold_tuples = gold_tuples[:n_sents]
|
gold_tuples = gold_tuples[:n_sents]
|
||||||
|
|
||||||
nlp = Language(data_dir=model_dir, tagger=False, parser=False, entity=False)
|
nlp = Language(data_dir=model_dir, tagger=False, parser=False, entity=False)
|
||||||
if nlp.lang == 'de':
|
|
||||||
nlp.vocab.morphology.lemmatizer = lambda string,pos: set([string])
|
|
||||||
nlp.tagger = Tagger.blank(nlp.vocab, Tagger.default_templates())
|
nlp.tagger = Tagger.blank(nlp.vocab, Tagger.default_templates())
|
||||||
nlp.parser = Parser.from_dir(dep_model_dir, nlp.vocab.strings, ArcEager)
|
nlp.parser = Parser.from_dir(dep_model_dir, nlp.vocab.strings, ArcEager)
|
||||||
nlp.entity = Parser.from_dir(ner_model_dir, nlp.vocab.strings, BiluoPushDown)
|
nlp.entity = Parser.from_dir(ner_model_dir, nlp.vocab.strings, BiluoPushDown)
|
||||||
|
|
|
@ -3,7 +3,18 @@ from __future__ import unicode_literals, print_function
|
||||||
from os import path
|
from os import path
|
||||||
|
|
||||||
from ..language import Language
|
from ..language import Language
|
||||||
|
from ..vocab import Vocab
|
||||||
|
from .. import attrs
|
||||||
|
from .. import util
|
||||||
|
from .. import about
|
||||||
|
|
||||||
|
|
||||||
class German(Language):
|
class German(Language):
|
||||||
lang = 'de'
|
lang = 'de'
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def default_vocab(cls, package, get_lex_attr=None, vectors_package=None):
|
||||||
|
vocab = super(German,cls).default_vocab(package,get_lex_attr,vectors_package)
|
||||||
|
# for now until the morphology is done for German
|
||||||
|
vocab.morphology.lemmatizer = lambda string,pos: set([string])
|
||||||
|
return vocab
|
||||||
|
|
Loading…
Reference in New Issue
Block a user