mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-12 18:26:30 +03:00
Use lemmatizer in code, not from downloaded model.
This commit is contained in:
parent
42ba740dde
commit
f70be44746
|
@ -31,6 +31,10 @@ class English(Language):
|
|||
tag_map = TAG_MAP
|
||||
stop_words = STOP_WORDS
|
||||
|
||||
lemma_rules = dict(LEMMA_RULES)
|
||||
lemma_index = dict(LEMMA_INDEX)
|
||||
lemma_exc = dict(LEMMA_EXC)
|
||||
|
||||
|
||||
def __init__(self, **overrides):
|
||||
# Make a special-case hack for loading the GloVe vectors, to support
|
||||
|
|
|
@ -21,7 +21,7 @@ EXC = {
|
|||
"adj": ADJECTIVES_IRREG,
|
||||
"adv": ADVERBS_IRREG,
|
||||
"noun": NOUNS_IRREG,
|
||||
"verbs": VERBS_IRREG
|
||||
"verb": VERBS_IRREG
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -33,6 +33,7 @@ from .attrs import TAG, DEP, ENT_IOB, ENT_TYPE, HEAD, PROB, LANG, IS_STOP
|
|||
from .syntax.parser import get_templates
|
||||
from .syntax.nonproj import PseudoProjectivity
|
||||
from .pipeline import DependencyParser, EntityRecognizer
|
||||
from .pipeline import BeamDependencyParser, BeamEntityRecognizer
|
||||
from .syntax.arc_eager import ArcEager
|
||||
from .syntax.ner import BiluoPushDown
|
||||
|
||||
|
@ -40,10 +41,7 @@ from .syntax.ner import BiluoPushDown
|
|||
class BaseDefaults(object):
|
||||
@classmethod
|
||||
def create_lemmatizer(cls, nlp=None):
|
||||
if nlp is None or nlp.path is None:
|
||||
return Lemmatizer({}, {}, {})
|
||||
else:
|
||||
return Lemmatizer.load(nlp.path, rules=cls.lemma_rules)
|
||||
return Lemmatizer(cls.lemma_index, cls.lemma_exc, cls.lemma_rules)
|
||||
|
||||
@classmethod
|
||||
def create_vocab(cls, nlp=None):
|
||||
|
@ -169,6 +167,8 @@ class BaseDefaults(object):
|
|||
stop_words = set()
|
||||
|
||||
lemma_rules = {}
|
||||
lemma_exc = {}
|
||||
lemma_index = {}
|
||||
|
||||
lex_attr_getters = {
|
||||
attrs.LOWER: lambda string: string.lower(),
|
||||
|
|
Loading…
Reference in New Issue
Block a user