Don't make copies of language data components

This commit is contained in:
ines 2017-10-11 15:34:55 +02:00
parent eac9e99086
commit 8ce6f96180
17 changed files with 54 additions and 54 deletions

View File

@ -17,12 +17,12 @@ class BengaliDefaults(Language.Defaults):
lex_attr_getters = dict(Language.Defaults.lex_attr_getters) lex_attr_getters = dict(Language.Defaults.lex_attr_getters)
lex_attr_getters[LANG] = lambda text: 'bn' lex_attr_getters[LANG] = lambda text: 'bn'
tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS) tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS)
tag_map = dict(TAG_MAP) tag_map = TAG_MAP
stop_words = set(STOP_WORDS) stop_words = STOP_WORDS
lemma_rules = dict(LEMMA_RULES) lemma_rules = LEMMA_RULES
prefixes = tuple(TOKENIZER_PREFIXES) prefixes = TOKENIZER_PREFIXES
suffixes = tuple(TOKENIZER_SUFFIXES) suffixes = TOKENIZER_SUFFIXES
infixes = tuple(TOKENIZER_INFIXES) infixes = TOKENIZER_INFIXES
class Bengali(Language): class Bengali(Language):

View File

@ -16,7 +16,7 @@ class DanishDefaults(Language.Defaults):
lex_attr_getters[LANG] = lambda text: 'da' lex_attr_getters[LANG] = lambda text: 'da'
lex_attr_getters[NORM] = add_lookups(Language.Defaults.lex_attr_getters[NORM], BASE_NORMS) lex_attr_getters[NORM] = add_lookups(Language.Defaults.lex_attr_getters[NORM], BASE_NORMS)
tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS) tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS)
stop_words = set(STOP_WORDS) stop_words = STOP_WORDS
class Danish(Language): class Danish(Language):

View File

@ -22,11 +22,11 @@ class GermanDefaults(Language.Defaults):
lex_attr_getters[NORM] = add_lookups(Language.Defaults.lex_attr_getters[NORM], lex_attr_getters[NORM] = add_lookups(Language.Defaults.lex_attr_getters[NORM],
NORM_EXCEPTIONS, BASE_NORMS) NORM_EXCEPTIONS, BASE_NORMS)
tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS) tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS)
infixes = tuple(TOKENIZER_INFIXES) infixes = TOKENIZER_INFIXES
tag_map = dict(TAG_MAP) tag_map = TAG_MAP
stop_words = set(STOP_WORDS) stop_words = STOP_WORDS
syntax_iterators = dict(SYNTAX_ITERATORS) syntax_iterators = SYNTAX_ITERATORS
lemma_lookup = dict(LOOKUP) lemma_lookup = LOOKUP
class German(Language): class German(Language):

View File

@ -24,14 +24,14 @@ class EnglishDefaults(Language.Defaults):
lex_attr_getters[NORM] = add_lookups(Language.Defaults.lex_attr_getters[NORM], lex_attr_getters[NORM] = add_lookups(Language.Defaults.lex_attr_getters[NORM],
BASE_NORMS, NORM_EXCEPTIONS) BASE_NORMS, NORM_EXCEPTIONS)
tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS) tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS)
tag_map = dict(TAG_MAP) tag_map = TAG_MAP
stop_words = set(STOP_WORDS) stop_words = STOP_WORDS
morph_rules = dict(MORPH_RULES) morph_rules = MORPH_RULES
lemma_rules = dict(LEMMA_RULES) lemma_rules = LEMMA_RULES
lemma_index = dict(LEMMA_INDEX) lemma_index = LEMMA_INDEX
lemma_exc = dict(LEMMA_EXC) lemma_exc = LEMMA_EXC
lemma_lookup = dict(LOOKUP) lemma_lookup = LOOKUP
syntax_iterators = dict(SYNTAX_ITERATORS) syntax_iterators = SYNTAX_ITERATORS
class English(Language): class English(Language):

View File

@ -19,10 +19,10 @@ class SpanishDefaults(Language.Defaults):
lex_attr_getters[LANG] = lambda text: 'es' lex_attr_getters[LANG] = lambda text: 'es'
lex_attr_getters[NORM] = add_lookups(Language.Defaults.lex_attr_getters[NORM], BASE_NORMS) lex_attr_getters[NORM] = add_lookups(Language.Defaults.lex_attr_getters[NORM], BASE_NORMS)
tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS) tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS)
tag_map = dict(TAG_MAP) tag_map = TAG_MAP
stop_words = set(STOP_WORDS) stop_words = STOP_WORDS
sytax_iterators = dict(SYNTAX_ITERATORS) sytax_iterators = SYNTAX_ITERATORS
lemma_lookup = dict(LOOKUP) lemma_lookup = LOOKUP
class Spanish(Language): class Spanish(Language):

View File

@ -16,7 +16,7 @@ class FinnishDefaults(Language.Defaults):
lex_attr_getters[LANG] = lambda text: 'fi' lex_attr_getters[LANG] = lambda text: 'fi'
lex_attr_getters[NORM] = add_lookups(Language.Defaults.lex_attr_getters[NORM], BASE_NORMS) lex_attr_getters[NORM] = add_lookups(Language.Defaults.lex_attr_getters[NORM], BASE_NORMS)
tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS) tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS)
stop_words = set(STOP_WORDS) stop_words = STOP_WORDS
class Finnish(Language): class Finnish(Language):

View File

@ -21,12 +21,12 @@ class FrenchDefaults(Language.Defaults):
lex_attr_getters[LANG] = lambda text: 'fr' lex_attr_getters[LANG] = lambda text: 'fr'
lex_attr_getters[NORM] = add_lookups(Language.Defaults.lex_attr_getters[NORM], BASE_NORMS) lex_attr_getters[NORM] = add_lookups(Language.Defaults.lex_attr_getters[NORM], BASE_NORMS)
tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS) tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS)
stop_words = set(STOP_WORDS) stop_words = STOP_WORDS
infixes = tuple(TOKENIZER_INFIXES) infixes = TOKENIZER_INFIXES
suffixes = tuple(TOKENIZER_SUFFIXES) suffixes = TOKENIZER_SUFFIXES
token_match = TOKEN_MATCH token_match = TOKEN_MATCH
syntax_iterators = dict(SYNTAX_ITERATORS) syntax_iterators = SYNTAX_ITERATORS
lemma_lookup = dict(LOOKUP) lemma_lookup = LOOKUP
class French(Language): class French(Language):

View File

@ -13,7 +13,7 @@ class HebrewDefaults(Language.Defaults):
lex_attr_getters = dict(Language.Defaults.lex_attr_getters) lex_attr_getters = dict(Language.Defaults.lex_attr_getters)
lex_attr_getters[LANG] = lambda text: 'he' lex_attr_getters[LANG] = lambda text: 'he'
tokenizer_exceptions = update_exc(BASE_EXCEPTIONS) tokenizer_exceptions = update_exc(BASE_EXCEPTIONS)
stop_words = set(STOP_WORDS) stop_words = STOP_WORDS
class Hebrew(Language): class Hebrew(Language):

View File

@ -18,12 +18,12 @@ class HungarianDefaults(Language.Defaults):
lex_attr_getters[LANG] = lambda text: 'hu' lex_attr_getters[LANG] = lambda text: 'hu'
lex_attr_getters[NORM] = add_lookups(Language.Defaults.lex_attr_getters[NORM], BASE_NORMS) lex_attr_getters[NORM] = add_lookups(Language.Defaults.lex_attr_getters[NORM], BASE_NORMS)
tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS) tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS)
stop_words = set(STOP_WORDS) stop_words = STOP_WORDS
prefixes = tuple(TOKENIZER_PREFIXES) prefixes = TOKENIZER_PREFIXES
suffixes = tuple(TOKENIZER_SUFFIXES) suffixes = TOKENIZER_SUFFIXES
infixes = tuple(TOKENIZER_INFIXES) infixes = TOKENIZER_INFIXES
token_match = TOKEN_MATCH token_match = TOKEN_MATCH
lemma_lookup = dict(LOOKUP) lemma_lookup = LOOKUP
class Hungarian(Language): class Hungarian(Language):

View File

@ -20,12 +20,12 @@ class IndonesianDefaults(Language.Defaults):
lex_attr_getters[LANG] = lambda text: 'id' lex_attr_getters[LANG] = lambda text: 'id'
lex_attr_getters.update(LEX_ATTRS) lex_attr_getters.update(LEX_ATTRS)
tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS) tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS)
stop_words = set(STOP_WORDS) stop_words = STOP_WORDS
prefixes = tuple(TOKENIZER_PREFIXES) prefixes = TOKENIZER_PREFIXES
suffixes = tuple(TOKENIZER_SUFFIXES) suffixes = TOKENIZER_SUFFIXES
infixes = tuple(TOKENIZER_INFIXES) infixes = TOKENIZER_INFIXES
syntax_iterators = dict(SYNTAX_ITERATORS) syntax_iterators = SYNTAX_ITERATORS
lemma_lookup = dict(LOOKUP) lemma_lookup = LOOKUP
class Indonesian(Language): class Indonesian(Language):

View File

@ -16,8 +16,8 @@ class ItalianDefaults(Language.Defaults):
lex_attr_getters[LANG] = lambda text: 'it' lex_attr_getters[LANG] = lambda text: 'it'
lex_attr_getters[NORM] = add_lookups(Language.Defaults.lex_attr_getters[NORM], BASE_NORMS) lex_attr_getters[NORM] = add_lookups(Language.Defaults.lex_attr_getters[NORM], BASE_NORMS)
tokenizer_exceptions = update_exc(BASE_EXCEPTIONS) tokenizer_exceptions = update_exc(BASE_EXCEPTIONS)
stop_words = set(STOP_WORDS) stop_words = STOP_WORDS
lemma_lookup = dict(LOOKUP) lemma_lookup = LOOKUP
class Italian(Language): class Italian(Language):

View File

@ -17,7 +17,7 @@ class NorwegianDefaults(Language.Defaults):
lex_attr_getters[LANG] = lambda text: 'nb' lex_attr_getters[LANG] = lambda text: 'nb'
lex_attr_getters[NORM] = add_lookups(Language.Defaults.lex_attr_getters[NORM], BASE_NORMS) lex_attr_getters[NORM] = add_lookups(Language.Defaults.lex_attr_getters[NORM], BASE_NORMS)
tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS) tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS)
stop_words = set(STOP_WORDS) stop_words = STOP_WORDS
class Norwegian(Language): class Norwegian(Language):

View File

@ -17,7 +17,7 @@ class DutchDefaults(Language.Defaults):
lex_attr_getters[LANG] = lambda text: 'nl' lex_attr_getters[LANG] = lambda text: 'nl'
lex_attr_getters[NORM] = add_lookups(Language.Defaults.lex_attr_getters[NORM], BASE_NORMS) lex_attr_getters[NORM] = add_lookups(Language.Defaults.lex_attr_getters[NORM], BASE_NORMS)
tokenizer_exceptions = update_exc(BASE_EXCEPTIONS) tokenizer_exceptions = update_exc(BASE_EXCEPTIONS)
stop_words = set(STOP_WORDS) stop_words = STOP_WORDS
class Dutch(Language): class Dutch(Language):

View File

@ -16,7 +16,7 @@ class PolishDefaults(Language.Defaults):
lex_attr_getters[LANG] = lambda text: 'pl' lex_attr_getters[LANG] = lambda text: 'pl'
lex_attr_getters[NORM] = add_lookups(Language.Defaults.lex_attr_getters[NORM], BASE_NORMS) lex_attr_getters[NORM] = add_lookups(Language.Defaults.lex_attr_getters[NORM], BASE_NORMS)
tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS) tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS)
stop_words = set(STOP_WORDS) stop_words = STOP_WORDS
class Polish(Language): class Polish(Language):

View File

@ -19,8 +19,8 @@ class PortugueseDefaults(Language.Defaults):
lex_attr_getters[NORM] = add_lookups(Language.Defaults.lex_attr_getters[NORM], BASE_NORMS) lex_attr_getters[NORM] = add_lookups(Language.Defaults.lex_attr_getters[NORM], BASE_NORMS)
lex_attr_getters.update(LEX_ATTRS) lex_attr_getters.update(LEX_ATTRS)
tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS) tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS)
stop_words = set(STOP_WORDS) stop_words = STOP_WORDS
lemma_lookup = dict(LOOKUP) lemma_lookup = LOOKUP
class Portuguese(Language): class Portuguese(Language):

View File

@ -18,9 +18,9 @@ class SwedishDefaults(Language.Defaults):
lex_attr_getters[LANG] = lambda text: 'sv' lex_attr_getters[LANG] = lambda text: 'sv'
lex_attr_getters[NORM] = add_lookups(Language.Defaults.lex_attr_getters[NORM], BASE_NORMS) lex_attr_getters[NORM] = add_lookups(Language.Defaults.lex_attr_getters[NORM], BASE_NORMS)
tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS) tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS)
stop_words = set(STOP_WORDS) stop_words = STOP_WORDS
lemma_rules = dict(LEMMA_RULES) lemma_rules = LEMMA_RULES
lemma_lookup = dict(LOOKUP) lemma_lookup = LOOKUP
class Swedish(Language): class Swedish(Language):

View File

@ -17,8 +17,8 @@ class ThaiDefaults(Language.Defaults):
lex_attr_getters = dict(Language.Defaults.lex_attr_getters) lex_attr_getters = dict(Language.Defaults.lex_attr_getters)
lex_attr_getters[LANG] = lambda text: 'th' lex_attr_getters[LANG] = lambda text: 'th'
tokenizer_exceptions = dict(TOKENIZER_EXCEPTIONS) tokenizer_exceptions = dict(TOKENIZER_EXCEPTIONS)
tag_map = dict(TAG_MAP) tag_map = TAG_MAP
stop_words = set(STOP_WORDS) stop_words = STOP_WORDS
class Thai(Language): class Thai(Language):