Add lemmatizer data as variable on language data

Don't create lookup lemmatizer within Language class and just pass in
the data so it can be set on Token creation
This commit is contained in:
ines 2017-10-11 02:24:58 +02:00
parent 0c2343d73a
commit 417d45f5d0
9 changed files with 11 additions and 41 deletions

View File

@ -12,7 +12,6 @@ from .syntax_iterators import SYNTAX_ITERATORS
from ..tokenizer_exceptions import BASE_EXCEPTIONS
from ..norm_exceptions import BASE_NORMS
from ...language import Language
from ...lemmatizerlookup import Lemmatizer
from ...attrs import LANG, NORM
from ...util import update_exc, add_lookups
@ -27,10 +26,7 @@ class GermanDefaults(Language.Defaults):
tag_map = dict(TAG_MAP)
stop_words = set(STOP_WORDS)
syntax_iterators = dict(SYNTAX_ITERATORS)
@classmethod
def create_lemmatizer(cls, nlp=None):
return Lemmatizer(LOOKUP)
lemma_lookup = dict(LOOKUP)
class German(Language):

View File

@ -7,7 +7,7 @@ from .tag_map import TAG_MAP
from .stop_words import STOP_WORDS
from .lex_attrs import LEX_ATTRS
from .morph_rules import MORPH_RULES
from .lemmatizer import LEMMA_RULES, LEMMA_INDEX, LEMMA_EXC
from .lemmatizer import LEMMA_RULES, LEMMA_INDEX, LEMMA_EXC, LOOKUP
from .syntax_iterators import SYNTAX_ITERATORS
from ..tokenizer_exceptions import BASE_EXCEPTIONS
@ -30,6 +30,7 @@ class EnglishDefaults(Language.Defaults):
lemma_rules = dict(LEMMA_RULES)
lemma_index = dict(LEMMA_INDEX)
lemma_exc = dict(LEMMA_EXC)
lemma_lookup = dict(LOOKUP)
syntax_iterators = dict(SYNTAX_ITERATORS)

View File

@ -10,7 +10,6 @@ from .syntax_iterators import SYNTAX_ITERATORS
from ..tokenizer_exceptions import BASE_EXCEPTIONS
from ..norm_exceptions import BASE_NORMS
from ...language import Language
from ...lemmatizerlookup import Lemmatizer
from ...attrs import LANG, NORM
from ...util import update_exc, add_lookups
@ -23,10 +22,7 @@ class SpanishDefaults(Language.Defaults):
tag_map = dict(TAG_MAP)
stop_words = set(STOP_WORDS)
sytax_iterators = dict(SYNTAX_ITERATORS)
@classmethod
def create_lemmatizer(cls, nlp=None):
return Lemmatizer(LOOKUP)
lemma_lookup = dict(LOOKUP)
class Spanish(Language):

View File

@ -11,7 +11,6 @@ from .syntax_iterators import SYNTAX_ITERATORS
from ..tokenizer_exceptions import BASE_EXCEPTIONS
from ..norm_exceptions import BASE_NORMS
from ...language import Language
from ...lemmatizerlookup import Lemmatizer
from ...attrs import LANG, NORM
from ...util import update_exc, add_lookups
@ -27,10 +26,7 @@ class FrenchDefaults(Language.Defaults):
suffixes = tuple(TOKENIZER_SUFFIXES)
token_match = TOKEN_MATCH
syntax_iterators = dict(SYNTAX_ITERATORS)
@classmethod
def create_lemmatizer(cls, nlp=None):
return Lemmatizer(LOOKUP)
lemma_lookup = dict(LOOKUP)
class French(Language):

View File

@ -9,7 +9,6 @@ from .lemmatizer import LOOKUP
from ..tokenizer_exceptions import BASE_EXCEPTIONS
from ..norm_exceptions import BASE_NORMS
from ...language import Language
from ...lemmatizerlookup import Lemmatizer
from ...attrs import LANG, NORM
from ...util import update_exc, add_lookups
@ -24,10 +23,7 @@ class HungarianDefaults(Language.Defaults):
suffixes = tuple(TOKENIZER_SUFFIXES)
infixes = tuple(TOKENIZER_INFIXES)
token_match = TOKEN_MATCH
@classmethod
def create_lemmatizer(cls, nlp=None):
return Lemmatizer(LOOKUP)
lemma_lookup = dict(LOOKUP)
class Hungarian(Language):

View File

@ -11,7 +11,6 @@ from .syntax_iterators import SYNTAX_ITERATORS
from ..tokenizer_exceptions import BASE_EXCEPTIONS
from ...language import Language
from ...lemmatizerlookup import Lemmatizer
from ...attrs import LANG
from ...util import update_exc
@ -26,10 +25,7 @@ class IndonesianDefaults(Language.Defaults):
suffixes = tuple(TOKENIZER_SUFFIXES)
infixes = tuple(TOKENIZER_INFIXES)
syntax_iterators = dict(SYNTAX_ITERATORS)
@classmethod
def create_lemmatizer(cls, nlp=None):
return Lemmatizer(LOOKUP)
lemma_lookup = dict(LOOKUP)
class Indonesian(Language):

View File

@ -7,7 +7,6 @@ from .lemmatizer import LOOKUP
from ..tokenizer_exceptions import BASE_EXCEPTIONS
from ..norm_exceptions import BASE_NORMS
from ...language import Language
from ...lemmatizerlookup import Lemmatizer
from ...attrs import LANG, NORM
from ...util import update_exc, add_lookups
@ -18,10 +17,7 @@ class ItalianDefaults(Language.Defaults):
lex_attr_getters[NORM] = add_lookups(Language.Defaults.lex_attr_getters[NORM], BASE_NORMS)
tokenizer_exceptions = update_exc(BASE_EXCEPTIONS)
stop_words = set(STOP_WORDS)
@classmethod
def create_lemmatizer(cls, nlp=None):
return Lemmatizer(LOOKUP)
lemma_lookup = dict(LOOKUP)
class Italian(Language):

View File

@ -9,7 +9,6 @@ from .lemmatizer import LOOKUP
from ..tokenizer_exceptions import BASE_EXCEPTIONS
from ..norm_exceptions import BASE_NORMS
from ...language import Language
from ...lemmatizerlookup import Lemmatizer
from ...attrs import LANG, NORM
from ...util import update_exc, add_lookups
@ -21,10 +20,7 @@ class PortugueseDefaults(Language.Defaults):
lex_attr_getters.update(LEX_ATTRS)
tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS)
stop_words = set(STOP_WORDS)
@classmethod
def create_lemmatizer(cls, nlp=None):
return Lemmatizer(LOOKUP)
lemma_lookup = dict(LOOKUP)
class Portuguese(Language):

View File

@ -9,7 +9,6 @@ from .lemmatizer import LEMMA_RULES, LOOKUP
from ..tokenizer_exceptions import BASE_EXCEPTIONS
from ..norm_exceptions import BASE_NORMS
from ...language import Language
from ...lemmatizerlookup import Lemmatizer
from ...attrs import LANG, NORM
from ...util import update_exc, add_lookups
@ -20,10 +19,8 @@ class SwedishDefaults(Language.Defaults):
lex_attr_getters[NORM] = add_lookups(Language.Defaults.lex_attr_getters[NORM], BASE_NORMS)
tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS)
stop_words = set(STOP_WORDS)
@classmethod
def create_lemmatizer(cls, nlp=None):
return Lemmatizer(LOOKUP)
lemma_rules = dict(LEMMA_RULES)
lemma_lookup = dict(LOOKUP)
class Swedish(Language):