Use lemmatizer in code, not from downloaded model.

2025-08-02 03:10:22 +03:00 · 2017-03-15 04:52:50 -05:00 · 2017-03-15 04:52:50 -05:00 · f70be44746
commit f70be44746
parent 42ba740dde
3 changed files with 9 additions and 5 deletions
--- a/spacy/en/init.py
+++ b/spacy/en/init.py
@ -31,6 +31,10 @@ class English(Language):
        tag_map = TAG_MAP
        stop_words = STOP_WORDS

+        lemma_rules = dict(LEMMA_RULES)
+        lemma_index = dict(LEMMA_INDEX)
+        lemma_exc = dict(LEMMA_EXC)
+

    def __init__(self, **overrides):
        # Make a special-case hack for loading the GloVe vectors, to support
--- a/spacy/en/lemmatizer/init.py
+++ b/spacy/en/lemmatizer/init.py
@ -21,7 +21,7 @@ EXC = {
    "adj": ADJECTIVES_IRREG,
    "adv": ADVERBS_IRREG,
    "noun": NOUNS_IRREG,
-    "verbs": VERBS_IRREG
+    "verb": VERBS_IRREG
 }


--- a/spacy/language.py
+++ b/spacy/language.py
@ -33,6 +33,7 @@ from .attrs import TAG, DEP, ENT_IOB, ENT_TYPE, HEAD, PROB, LANG, IS_STOP
 from .syntax.parser import get_templates
 from .syntax.nonproj import PseudoProjectivity
 from .pipeline import DependencyParser, EntityRecognizer
+from .pipeline import BeamDependencyParser, BeamEntityRecognizer
 from .syntax.arc_eager import ArcEager
 from .syntax.ner import BiluoPushDown

@ -40,10 +41,7 @@ from .syntax.ner import BiluoPushDown
 class BaseDefaults(object):
    @classmethod
    def create_lemmatizer(cls, nlp=None):
-        if nlp is None or nlp.path is None:
-            return Lemmatizer({}, {}, {})
-        else:
-            return Lemmatizer.load(nlp.path, rules=cls.lemma_rules)
+        return Lemmatizer(cls.lemma_index, cls.lemma_exc, cls.lemma_rules)

    @classmethod
    def create_vocab(cls, nlp=None):
@ -169,6 +167,8 @@ class BaseDefaults(object):
    stop_words = set()

    lemma_rules = {}
+    lemma_exc = {}
+    lemma_index = {}

    lex_attr_getters = {
        attrs.LOWER: lambda string: string.lower(),