mirror of
https://github.com/explosion/spaCy.git
synced 2025-07-11 00:32:40 +03:00
Use lemmatizer in code, not from downloaded model.
This commit is contained in:
parent
42ba740dde
commit
f70be44746
|
@ -31,6 +31,10 @@ class English(Language):
|
||||||
tag_map = TAG_MAP
|
tag_map = TAG_MAP
|
||||||
stop_words = STOP_WORDS
|
stop_words = STOP_WORDS
|
||||||
|
|
||||||
|
lemma_rules = dict(LEMMA_RULES)
|
||||||
|
lemma_index = dict(LEMMA_INDEX)
|
||||||
|
lemma_exc = dict(LEMMA_EXC)
|
||||||
|
|
||||||
|
|
||||||
def __init__(self, **overrides):
|
def __init__(self, **overrides):
|
||||||
# Make a special-case hack for loading the GloVe vectors, to support
|
# Make a special-case hack for loading the GloVe vectors, to support
|
||||||
|
|
|
@ -21,7 +21,7 @@ EXC = {
|
||||||
"adj": ADJECTIVES_IRREG,
|
"adj": ADJECTIVES_IRREG,
|
||||||
"adv": ADVERBS_IRREG,
|
"adv": ADVERBS_IRREG,
|
||||||
"noun": NOUNS_IRREG,
|
"noun": NOUNS_IRREG,
|
||||||
"verbs": VERBS_IRREG
|
"verb": VERBS_IRREG
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -33,6 +33,7 @@ from .attrs import TAG, DEP, ENT_IOB, ENT_TYPE, HEAD, PROB, LANG, IS_STOP
|
||||||
from .syntax.parser import get_templates
|
from .syntax.parser import get_templates
|
||||||
from .syntax.nonproj import PseudoProjectivity
|
from .syntax.nonproj import PseudoProjectivity
|
||||||
from .pipeline import DependencyParser, EntityRecognizer
|
from .pipeline import DependencyParser, EntityRecognizer
|
||||||
|
from .pipeline import BeamDependencyParser, BeamEntityRecognizer
|
||||||
from .syntax.arc_eager import ArcEager
|
from .syntax.arc_eager import ArcEager
|
||||||
from .syntax.ner import BiluoPushDown
|
from .syntax.ner import BiluoPushDown
|
||||||
|
|
||||||
|
@ -40,10 +41,7 @@ from .syntax.ner import BiluoPushDown
|
||||||
class BaseDefaults(object):
|
class BaseDefaults(object):
|
||||||
@classmethod
|
@classmethod
|
||||||
def create_lemmatizer(cls, nlp=None):
|
def create_lemmatizer(cls, nlp=None):
|
||||||
if nlp is None or nlp.path is None:
|
return Lemmatizer(cls.lemma_index, cls.lemma_exc, cls.lemma_rules)
|
||||||
return Lemmatizer({}, {}, {})
|
|
||||||
else:
|
|
||||||
return Lemmatizer.load(nlp.path, rules=cls.lemma_rules)
|
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def create_vocab(cls, nlp=None):
|
def create_vocab(cls, nlp=None):
|
||||||
|
@ -169,6 +167,8 @@ class BaseDefaults(object):
|
||||||
stop_words = set()
|
stop_words = set()
|
||||||
|
|
||||||
lemma_rules = {}
|
lemma_rules = {}
|
||||||
|
lemma_exc = {}
|
||||||
|
lemma_index = {}
|
||||||
|
|
||||||
lex_attr_getters = {
|
lex_attr_getters = {
|
||||||
attrs.LOWER: lambda string: string.lower(),
|
attrs.LOWER: lambda string: string.lower(),
|
||||||
|
|
Loading…
Reference in New Issue
Block a user