Use lemmatizer in code, not from downloaded model.

This commit is contained in:
Matthew Honnibal 2017-03-15 04:52:50 -05:00
parent 42ba740dde
commit f70be44746
3 changed files with 9 additions and 5 deletions

View File

@ -31,6 +31,10 @@ class English(Language):
tag_map = TAG_MAP
stop_words = STOP_WORDS
lemma_rules = dict(LEMMA_RULES)
lemma_index = dict(LEMMA_INDEX)
lemma_exc = dict(LEMMA_EXC)
def __init__(self, **overrides):
# Make a special-case hack for loading the GloVe vectors, to support

View File

@ -21,7 +21,7 @@ EXC = {
"adj": ADJECTIVES_IRREG,
"adv": ADVERBS_IRREG,
"noun": NOUNS_IRREG,
"verbs": VERBS_IRREG
"verb": VERBS_IRREG
}

View File

@ -33,6 +33,7 @@ from .attrs import TAG, DEP, ENT_IOB, ENT_TYPE, HEAD, PROB, LANG, IS_STOP
from .syntax.parser import get_templates
from .syntax.nonproj import PseudoProjectivity
from .pipeline import DependencyParser, EntityRecognizer
from .pipeline import BeamDependencyParser, BeamEntityRecognizer
from .syntax.arc_eager import ArcEager
from .syntax.ner import BiluoPushDown
@ -40,10 +41,7 @@ from .syntax.ner import BiluoPushDown
class BaseDefaults(object):
@classmethod
def create_lemmatizer(cls, nlp=None):
if nlp is None or nlp.path is None:
return Lemmatizer({}, {}, {})
else:
return Lemmatizer.load(nlp.path, rules=cls.lemma_rules)
return Lemmatizer(cls.lemma_index, cls.lemma_exc, cls.lemma_rules)
@classmethod
def create_vocab(cls, nlp=None):
@ -169,6 +167,8 @@ class BaseDefaults(object):
stop_words = set()
lemma_rules = {}
lemma_exc = {}
lemma_index = {}
lex_attr_getters = {
attrs.LOWER: lambda string: string.lower(),