Move Defaults subclass to module scope (necessary for pickling)

This commit is contained in:
ines 2017-05-20 19:02:27 +02:00
parent 27de0834b2
commit 924e8506de
16 changed files with 187 additions and 158 deletions

View File

@ -13,10 +13,7 @@ from ...attrs import LANG
from ...util import update_exc from ...util import update_exc
class Bengali(Language): class BengaliDefaults(Language.Defaults):
lang = 'bn'
class Defaults(Language.Defaults):
lex_attr_getters = dict(Language.Defaults.lex_attr_getters) lex_attr_getters = dict(Language.Defaults.lex_attr_getters)
lex_attr_getters[LANG] = lambda text: 'bn' lex_attr_getters[LANG] = lambda text: 'bn'
@ -30,4 +27,9 @@ class Bengali(Language):
infixes = tuple(TOKENIZER_INFIXES) infixes = tuple(TOKENIZER_INFIXES)
class Bengali(Language):
lang = 'bn'
Defaults = BengaliDefaults
__all__ = ['Bengali'] __all__ = ['Bengali']

View File

@ -10,10 +10,7 @@ from ...attrs import LANG
from ...util import update_exc from ...util import update_exc
class Danish(Language): class DanishDefaults(Language.Defaults):
lang = 'da'
class Defaults(Language.Defaults):
lex_attr_getters = dict(Language.Defaults.lex_attr_getters) lex_attr_getters = dict(Language.Defaults.lex_attr_getters)
lex_attr_getters[LANG] = lambda text: 'da' lex_attr_getters[LANG] = lambda text: 'da'
@ -21,4 +18,9 @@ class Danish(Language):
stop_words = set(STOP_WORDS) stop_words = set(STOP_WORDS)
class Danish(Language):
lang = 'da'
Defaults = DanishDefaults
__all__ = ['Danish'] __all__ = ['Danish']

View File

@ -14,10 +14,7 @@ from ...attrs import LANG
from ...util import update_exc from ...util import update_exc
class German(Language): class GermanDefaults(Language.Defaults):
lang = 'de'
class Defaults(Language.Defaults):
lex_attr_getters = dict(Language.Defaults.lex_attr_getters) lex_attr_getters = dict(Language.Defaults.lex_attr_getters)
lex_attr_getters[LANG] = lambda text: 'de' lex_attr_getters[LANG] = lambda text: 'de'
@ -31,4 +28,9 @@ class German(Language):
return Lemmatizer(LOOKUP) return Lemmatizer(LOOKUP)
class German(Language):
lang = 'de'
Defaults = GermanDefaults
__all__ = ['German'] __all__ = ['German']

View File

@ -15,10 +15,7 @@ from ...attrs import LANG
from ...util import update_exc from ...util import update_exc
class English(Language): class EnglishDefaults(Language.Defaults):
lang = 'en'
class Defaults(Language.Defaults):
lex_attr_getters = dict(Language.Defaults.lex_attr_getters) lex_attr_getters = dict(Language.Defaults.lex_attr_getters)
lex_attr_getters[LANG] = lambda text: 'en' lex_attr_getters[LANG] = lambda text: 'en'
lex_attr_getters.update(LEX_ATTRS) lex_attr_getters.update(LEX_ATTRS)
@ -33,4 +30,9 @@ class English(Language):
sytax_iterators = dict(SYNTAX_ITERATORS) sytax_iterators = dict(SYNTAX_ITERATORS)
class English(Language):
lang = 'en'
Defaults = EnglishDefaults
__all__ = ['English'] __all__ = ['English']

View File

@ -28,7 +28,7 @@ class SpanishDefaults(Language.Defaults):
class Spanish(Language): class Spanish(Language):
lang = 'es' lang = 'es'
Defaults = SpanishDefaults Defaults = SpanishDefaults
__all__ = ['Spanish'] __all__ = ['Spanish']

View File

@ -10,10 +10,7 @@ from ...attrs import LANG
from ...util import update_exc from ...util import update_exc
class Finnish(Language): class FinnishDefaults(Language.Defaults):
lang = 'fi'
class Defaults(Language.Defaults):
lex_attr_getters = dict(Language.Defaults.lex_attr_getters) lex_attr_getters = dict(Language.Defaults.lex_attr_getters)
lex_attr_getters[LANG] = lambda text: 'fi' lex_attr_getters[LANG] = lambda text: 'fi'
@ -21,4 +18,9 @@ class Finnish(Language):
stop_words = set(STOP_WORDS) stop_words = set(STOP_WORDS)
class Finnish(Language):
lang = 'fi'
Defaults = FinnishDefaults
__all__ = ['Finnish'] __all__ = ['Finnish']

View File

@ -13,10 +13,7 @@ from ...attrs import LANG
from ...util import update_exc from ...util import update_exc
class French(Language): class FrenchDefaults(Language.Defaults):
lang = 'fr'
class Defaults(Language.Defaults):
lex_attr_getters = dict(Language.Defaults.lex_attr_getters) lex_attr_getters = dict(Language.Defaults.lex_attr_getters)
lex_attr_getters[LANG] = lambda text: 'fr' lex_attr_getters[LANG] = lambda text: 'fr'
@ -31,4 +28,9 @@ class French(Language):
return Lemmatizer(LOOKUP) return Lemmatizer(LOOKUP)
class French(Language):
lang = 'fr'
Defaults = FrenchDefaults
__all__ = ['French'] __all__ = ['French']

View File

@ -9,10 +9,7 @@ from ...attrs import LANG
from ...util import update_exc from ...util import update_exc
class Hebrew(Language): class HebrewDefaults(Language.Defaults):
lang = 'he'
class Defaults(Language.Defaults):
lex_attr_getters = dict(Language.Defaults.lex_attr_getters) lex_attr_getters = dict(Language.Defaults.lex_attr_getters)
lex_attr_getters[LANG] = lambda text: 'he' lex_attr_getters[LANG] = lambda text: 'he'
@ -20,4 +17,9 @@ class Hebrew(Language):
stop_words = set(STOP_WORDS) stop_words = set(STOP_WORDS)
class Hebrew(Language):
lang = 'he'
Defaults = HebrewDefaults
__all__ = ['Hebrew'] __all__ = ['Hebrew']

View File

@ -13,10 +13,7 @@ from ...attrs import LANG
from ...util import update_exc from ...util import update_exc
class Hungarian(Language): class HungarianDefaults(Language.Defaults):
lang = 'hu'
class Defaults(Language.Defaults):
lex_attr_getters = dict(Language.Defaults.lex_attr_getters) lex_attr_getters = dict(Language.Defaults.lex_attr_getters)
lex_attr_getters[LANG] = lambda text: 'hu' lex_attr_getters[LANG] = lambda text: 'hu'
@ -32,4 +29,9 @@ class Hungarian(Language):
return Lemmatizer(LOOKUP) return Lemmatizer(LOOKUP)
class Hungarian(Language):
lang = 'hu'
Defaults = HungarianDefaults
__all__ = ['Hungarian'] __all__ = ['Hungarian']

View File

@ -11,10 +11,7 @@ from ...attrs import LANG
from ...util import update_exc from ...util import update_exc
class Italian(Language): class ItalianDefaults(Language.Defaults):
lang = 'it'
class Defaults(Language.Defaults):
lex_attr_getters = dict(Language.Defaults.lex_attr_getters) lex_attr_getters = dict(Language.Defaults.lex_attr_getters)
lex_attr_getters[LANG] = lambda text: 'it' lex_attr_getters[LANG] = lambda text: 'it'
@ -26,4 +23,9 @@ class Italian(Language):
return Lemmatizer(LOOKUP) return Lemmatizer(LOOKUP)
class Italian(Language):
lang = 'it'
Defaults = ItalianDefaults
__all__ = ['Italian'] __all__ = ['Italian']

View File

@ -11,10 +11,7 @@ from ...attrs import LANG
from ...util import update_exc from ...util import update_exc
class Norwegian(Language): class NorwegianDefaults(Language.Defaults):
lang = 'nb'
class Defaults(Language.Defaults):
lex_attr_getters = dict(Language.Defaults.lex_attr_getters) lex_attr_getters = dict(Language.Defaults.lex_attr_getters)
lex_attr_getters[LANG] = lambda text: 'nb' lex_attr_getters[LANG] = lambda text: 'nb'
@ -22,4 +19,9 @@ class Norwegian(Language):
stop_words = set(STOP_WORDS) stop_words = set(STOP_WORDS)
class Norwegian(Language):
lang = 'nb'
Defaults = NorwegianDefaults
__all__ = ['Norwegian'] __all__ = ['Norwegian']

View File

@ -9,11 +9,7 @@ from ...attrs import LANG
from ...util import update_exc from ...util import update_exc
class DutchDefaults(Language.Defaults):
class Dutch(Language):
lang = 'nl'
class Defaults(Language.Defaults):
lex_attr_getters = dict(Language.Defaults.lex_attr_getters) lex_attr_getters = dict(Language.Defaults.lex_attr_getters)
lex_attr_getters[LANG] = lambda text: 'nl' lex_attr_getters[LANG] = lambda text: 'nl'
@ -21,4 +17,9 @@ class Dutch(Language):
stop_words = set(STOP_WORDS) stop_words = set(STOP_WORDS)
class Dutch(Language):
lang = 'nl'
Defaults = DutchDefaults
__all__ = ['Dutch'] __all__ = ['Dutch']

View File

@ -9,10 +9,7 @@ from ...attrs import LANG
from ...util import update_exc from ...util import update_exc
class Polish(Language): class PolishDefaults(Language.Defaults):
lang = 'pl'
class Defaults(Language.Defaults):
lex_attr_getters = dict(Language.Defaults.lex_attr_getters) lex_attr_getters = dict(Language.Defaults.lex_attr_getters)
lex_attr_getters[LANG] = lambda text: 'pl' lex_attr_getters[LANG] = lambda text: 'pl'
@ -20,4 +17,9 @@ class Polish(Language):
stop_words = set(STOP_WORDS) stop_words = set(STOP_WORDS)
class Polish(Language):
lang = 'pl'
Defaults = PolishDefaults
__all__ = ['Polish'] __all__ = ['Polish']

View File

@ -13,10 +13,7 @@ from ...attrs import LANG
from ...util import update_exc from ...util import update_exc
class Portuguese(Language): class PortugueseDefaults(Language.Defaults):
lang = 'pt'
class Defaults(Language.Defaults):
lex_attr_getters = dict(Language.Defaults.lex_attr_getters) lex_attr_getters = dict(Language.Defaults.lex_attr_getters)
lex_attr_getters[LANG] = lambda text: 'pt' lex_attr_getters[LANG] = lambda text: 'pt'
lex_attr_getters.update(LEX_ATTRS) lex_attr_getters.update(LEX_ATTRS)
@ -29,4 +26,9 @@ class Portuguese(Language):
return Lemmatizer(LOOKUP) return Lemmatizer(LOOKUP)
class Portuguese(Language):
lang = 'pt'
Defaults = PortugueseDefaults
__all__ = ['Portuguese'] __all__ = ['Portuguese']

View File

@ -13,10 +13,7 @@ from ...attrs import LANG
from ...util import update_exc from ...util import update_exc
class Swedish(Language): class SwedishDefaults(Language.Defaults):
lang = 'sv'
class Defaults(Language.Defaults):
lex_attr_getters = dict(Language.Defaults.lex_attr_getters) lex_attr_getters = dict(Language.Defaults.lex_attr_getters)
lex_attr_getters[LANG] = lambda text: 'sv' lex_attr_getters[LANG] = lambda text: 'sv'
@ -28,4 +25,9 @@ class Swedish(Language):
return Lemmatizer(LOOKUP) return Lemmatizer(LOOKUP)
class Swedish(Language):
lang = 'sv'
Defaults = SwedishDefaults
__all__ = ['Swedish'] __all__ = ['Swedish']

View File

@ -56,11 +56,8 @@ p
from ...attrs import LANG from ...attrs import LANG
from ...util import update_exc from ...util import update_exc
class Xxxxx(Language): # create Defaults class in the module scope (necessary for pickling!)
lang = 'xx' # language ISO code class XxxxxDefaults(Language.Defaults):
# override defaults
class Defaults(Language.Defaults):
lex_attr_getters = dict(Language.Defaults.lex_attr_getters) lex_attr_getters = dict(Language.Defaults.lex_attr_getters)
lex_attr_getters[LANG] = lambda text: 'xx' # language ISO code lex_attr_getters[LANG] = lambda text: 'xx' # language ISO code
@ -71,6 +68,11 @@ p
tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS) tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS)
stop_words = set(STOP_WORDS) stop_words = set(STOP_WORDS)
# create actual Language class
class Xxxxx(Language):
lang = 'xx' # language ISO code
Defaults = XxxxxDefaults # override defaults
# set default export this allows the language class to be lazy-loaded # set default export this allows the language class to be lazy-loaded
__all__ = ['Xxxxx'] __all__ = ['Xxxxx']