mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-25 01:16:28 +03:00
Don't make copies of language data components
This commit is contained in:
parent
eac9e99086
commit
8ce6f96180
|
@ -17,12 +17,12 @@ class BengaliDefaults(Language.Defaults):
|
||||||
lex_attr_getters = dict(Language.Defaults.lex_attr_getters)
|
lex_attr_getters = dict(Language.Defaults.lex_attr_getters)
|
||||||
lex_attr_getters[LANG] = lambda text: 'bn'
|
lex_attr_getters[LANG] = lambda text: 'bn'
|
||||||
tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS)
|
tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS)
|
||||||
tag_map = dict(TAG_MAP)
|
tag_map = TAG_MAP
|
||||||
stop_words = set(STOP_WORDS)
|
stop_words = STOP_WORDS
|
||||||
lemma_rules = dict(LEMMA_RULES)
|
lemma_rules = LEMMA_RULES
|
||||||
prefixes = tuple(TOKENIZER_PREFIXES)
|
prefixes = TOKENIZER_PREFIXES
|
||||||
suffixes = tuple(TOKENIZER_SUFFIXES)
|
suffixes = TOKENIZER_SUFFIXES
|
||||||
infixes = tuple(TOKENIZER_INFIXES)
|
infixes = TOKENIZER_INFIXES
|
||||||
|
|
||||||
|
|
||||||
class Bengali(Language):
|
class Bengali(Language):
|
||||||
|
|
|
@ -16,7 +16,7 @@ class DanishDefaults(Language.Defaults):
|
||||||
lex_attr_getters[LANG] = lambda text: 'da'
|
lex_attr_getters[LANG] = lambda text: 'da'
|
||||||
lex_attr_getters[NORM] = add_lookups(Language.Defaults.lex_attr_getters[NORM], BASE_NORMS)
|
lex_attr_getters[NORM] = add_lookups(Language.Defaults.lex_attr_getters[NORM], BASE_NORMS)
|
||||||
tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS)
|
tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS)
|
||||||
stop_words = set(STOP_WORDS)
|
stop_words = STOP_WORDS
|
||||||
|
|
||||||
|
|
||||||
class Danish(Language):
|
class Danish(Language):
|
||||||
|
|
|
@ -22,11 +22,11 @@ class GermanDefaults(Language.Defaults):
|
||||||
lex_attr_getters[NORM] = add_lookups(Language.Defaults.lex_attr_getters[NORM],
|
lex_attr_getters[NORM] = add_lookups(Language.Defaults.lex_attr_getters[NORM],
|
||||||
NORM_EXCEPTIONS, BASE_NORMS)
|
NORM_EXCEPTIONS, BASE_NORMS)
|
||||||
tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS)
|
tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS)
|
||||||
infixes = tuple(TOKENIZER_INFIXES)
|
infixes = TOKENIZER_INFIXES
|
||||||
tag_map = dict(TAG_MAP)
|
tag_map = TAG_MAP
|
||||||
stop_words = set(STOP_WORDS)
|
stop_words = STOP_WORDS
|
||||||
syntax_iterators = dict(SYNTAX_ITERATORS)
|
syntax_iterators = SYNTAX_ITERATORS
|
||||||
lemma_lookup = dict(LOOKUP)
|
lemma_lookup = LOOKUP
|
||||||
|
|
||||||
|
|
||||||
class German(Language):
|
class German(Language):
|
||||||
|
|
|
@ -24,14 +24,14 @@ class EnglishDefaults(Language.Defaults):
|
||||||
lex_attr_getters[NORM] = add_lookups(Language.Defaults.lex_attr_getters[NORM],
|
lex_attr_getters[NORM] = add_lookups(Language.Defaults.lex_attr_getters[NORM],
|
||||||
BASE_NORMS, NORM_EXCEPTIONS)
|
BASE_NORMS, NORM_EXCEPTIONS)
|
||||||
tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS)
|
tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS)
|
||||||
tag_map = dict(TAG_MAP)
|
tag_map = TAG_MAP
|
||||||
stop_words = set(STOP_WORDS)
|
stop_words = STOP_WORDS
|
||||||
morph_rules = dict(MORPH_RULES)
|
morph_rules = MORPH_RULES
|
||||||
lemma_rules = dict(LEMMA_RULES)
|
lemma_rules = LEMMA_RULES
|
||||||
lemma_index = dict(LEMMA_INDEX)
|
lemma_index = LEMMA_INDEX
|
||||||
lemma_exc = dict(LEMMA_EXC)
|
lemma_exc = LEMMA_EXC
|
||||||
lemma_lookup = dict(LOOKUP)
|
lemma_lookup = LOOKUP
|
||||||
syntax_iterators = dict(SYNTAX_ITERATORS)
|
syntax_iterators = SYNTAX_ITERATORS
|
||||||
|
|
||||||
|
|
||||||
class English(Language):
|
class English(Language):
|
||||||
|
|
|
@ -19,10 +19,10 @@ class SpanishDefaults(Language.Defaults):
|
||||||
lex_attr_getters[LANG] = lambda text: 'es'
|
lex_attr_getters[LANG] = lambda text: 'es'
|
||||||
lex_attr_getters[NORM] = add_lookups(Language.Defaults.lex_attr_getters[NORM], BASE_NORMS)
|
lex_attr_getters[NORM] = add_lookups(Language.Defaults.lex_attr_getters[NORM], BASE_NORMS)
|
||||||
tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS)
|
tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS)
|
||||||
tag_map = dict(TAG_MAP)
|
tag_map = TAG_MAP
|
||||||
stop_words = set(STOP_WORDS)
|
stop_words = STOP_WORDS
|
||||||
sytax_iterators = dict(SYNTAX_ITERATORS)
|
sytax_iterators = SYNTAX_ITERATORS
|
||||||
lemma_lookup = dict(LOOKUP)
|
lemma_lookup = LOOKUP
|
||||||
|
|
||||||
|
|
||||||
class Spanish(Language):
|
class Spanish(Language):
|
||||||
|
|
|
@ -16,7 +16,7 @@ class FinnishDefaults(Language.Defaults):
|
||||||
lex_attr_getters[LANG] = lambda text: 'fi'
|
lex_attr_getters[LANG] = lambda text: 'fi'
|
||||||
lex_attr_getters[NORM] = add_lookups(Language.Defaults.lex_attr_getters[NORM], BASE_NORMS)
|
lex_attr_getters[NORM] = add_lookups(Language.Defaults.lex_attr_getters[NORM], BASE_NORMS)
|
||||||
tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS)
|
tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS)
|
||||||
stop_words = set(STOP_WORDS)
|
stop_words = STOP_WORDS
|
||||||
|
|
||||||
|
|
||||||
class Finnish(Language):
|
class Finnish(Language):
|
||||||
|
|
|
@ -21,12 +21,12 @@ class FrenchDefaults(Language.Defaults):
|
||||||
lex_attr_getters[LANG] = lambda text: 'fr'
|
lex_attr_getters[LANG] = lambda text: 'fr'
|
||||||
lex_attr_getters[NORM] = add_lookups(Language.Defaults.lex_attr_getters[NORM], BASE_NORMS)
|
lex_attr_getters[NORM] = add_lookups(Language.Defaults.lex_attr_getters[NORM], BASE_NORMS)
|
||||||
tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS)
|
tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS)
|
||||||
stop_words = set(STOP_WORDS)
|
stop_words = STOP_WORDS
|
||||||
infixes = tuple(TOKENIZER_INFIXES)
|
infixes = TOKENIZER_INFIXES
|
||||||
suffixes = tuple(TOKENIZER_SUFFIXES)
|
suffixes = TOKENIZER_SUFFIXES
|
||||||
token_match = TOKEN_MATCH
|
token_match = TOKEN_MATCH
|
||||||
syntax_iterators = dict(SYNTAX_ITERATORS)
|
syntax_iterators = SYNTAX_ITERATORS
|
||||||
lemma_lookup = dict(LOOKUP)
|
lemma_lookup = LOOKUP
|
||||||
|
|
||||||
|
|
||||||
class French(Language):
|
class French(Language):
|
||||||
|
|
|
@ -13,7 +13,7 @@ class HebrewDefaults(Language.Defaults):
|
||||||
lex_attr_getters = dict(Language.Defaults.lex_attr_getters)
|
lex_attr_getters = dict(Language.Defaults.lex_attr_getters)
|
||||||
lex_attr_getters[LANG] = lambda text: 'he'
|
lex_attr_getters[LANG] = lambda text: 'he'
|
||||||
tokenizer_exceptions = update_exc(BASE_EXCEPTIONS)
|
tokenizer_exceptions = update_exc(BASE_EXCEPTIONS)
|
||||||
stop_words = set(STOP_WORDS)
|
stop_words = STOP_WORDS
|
||||||
|
|
||||||
|
|
||||||
class Hebrew(Language):
|
class Hebrew(Language):
|
||||||
|
|
|
@ -18,12 +18,12 @@ class HungarianDefaults(Language.Defaults):
|
||||||
lex_attr_getters[LANG] = lambda text: 'hu'
|
lex_attr_getters[LANG] = lambda text: 'hu'
|
||||||
lex_attr_getters[NORM] = add_lookups(Language.Defaults.lex_attr_getters[NORM], BASE_NORMS)
|
lex_attr_getters[NORM] = add_lookups(Language.Defaults.lex_attr_getters[NORM], BASE_NORMS)
|
||||||
tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS)
|
tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS)
|
||||||
stop_words = set(STOP_WORDS)
|
stop_words = STOP_WORDS
|
||||||
prefixes = tuple(TOKENIZER_PREFIXES)
|
prefixes = TOKENIZER_PREFIXES
|
||||||
suffixes = tuple(TOKENIZER_SUFFIXES)
|
suffixes = TOKENIZER_SUFFIXES
|
||||||
infixes = tuple(TOKENIZER_INFIXES)
|
infixes = TOKENIZER_INFIXES
|
||||||
token_match = TOKEN_MATCH
|
token_match = TOKEN_MATCH
|
||||||
lemma_lookup = dict(LOOKUP)
|
lemma_lookup = LOOKUP
|
||||||
|
|
||||||
|
|
||||||
class Hungarian(Language):
|
class Hungarian(Language):
|
||||||
|
|
|
@ -20,12 +20,12 @@ class IndonesianDefaults(Language.Defaults):
|
||||||
lex_attr_getters[LANG] = lambda text: 'id'
|
lex_attr_getters[LANG] = lambda text: 'id'
|
||||||
lex_attr_getters.update(LEX_ATTRS)
|
lex_attr_getters.update(LEX_ATTRS)
|
||||||
tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS)
|
tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS)
|
||||||
stop_words = set(STOP_WORDS)
|
stop_words = STOP_WORDS
|
||||||
prefixes = tuple(TOKENIZER_PREFIXES)
|
prefixes = TOKENIZER_PREFIXES
|
||||||
suffixes = tuple(TOKENIZER_SUFFIXES)
|
suffixes = TOKENIZER_SUFFIXES
|
||||||
infixes = tuple(TOKENIZER_INFIXES)
|
infixes = TOKENIZER_INFIXES
|
||||||
syntax_iterators = dict(SYNTAX_ITERATORS)
|
syntax_iterators = SYNTAX_ITERATORS
|
||||||
lemma_lookup = dict(LOOKUP)
|
lemma_lookup = LOOKUP
|
||||||
|
|
||||||
|
|
||||||
class Indonesian(Language):
|
class Indonesian(Language):
|
||||||
|
|
|
@ -16,8 +16,8 @@ class ItalianDefaults(Language.Defaults):
|
||||||
lex_attr_getters[LANG] = lambda text: 'it'
|
lex_attr_getters[LANG] = lambda text: 'it'
|
||||||
lex_attr_getters[NORM] = add_lookups(Language.Defaults.lex_attr_getters[NORM], BASE_NORMS)
|
lex_attr_getters[NORM] = add_lookups(Language.Defaults.lex_attr_getters[NORM], BASE_NORMS)
|
||||||
tokenizer_exceptions = update_exc(BASE_EXCEPTIONS)
|
tokenizer_exceptions = update_exc(BASE_EXCEPTIONS)
|
||||||
stop_words = set(STOP_WORDS)
|
stop_words = STOP_WORDS
|
||||||
lemma_lookup = dict(LOOKUP)
|
lemma_lookup = LOOKUP
|
||||||
|
|
||||||
|
|
||||||
class Italian(Language):
|
class Italian(Language):
|
||||||
|
|
|
@ -17,7 +17,7 @@ class NorwegianDefaults(Language.Defaults):
|
||||||
lex_attr_getters[LANG] = lambda text: 'nb'
|
lex_attr_getters[LANG] = lambda text: 'nb'
|
||||||
lex_attr_getters[NORM] = add_lookups(Language.Defaults.lex_attr_getters[NORM], BASE_NORMS)
|
lex_attr_getters[NORM] = add_lookups(Language.Defaults.lex_attr_getters[NORM], BASE_NORMS)
|
||||||
tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS)
|
tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS)
|
||||||
stop_words = set(STOP_WORDS)
|
stop_words = STOP_WORDS
|
||||||
|
|
||||||
|
|
||||||
class Norwegian(Language):
|
class Norwegian(Language):
|
||||||
|
|
|
@ -17,7 +17,7 @@ class DutchDefaults(Language.Defaults):
|
||||||
lex_attr_getters[LANG] = lambda text: 'nl'
|
lex_attr_getters[LANG] = lambda text: 'nl'
|
||||||
lex_attr_getters[NORM] = add_lookups(Language.Defaults.lex_attr_getters[NORM], BASE_NORMS)
|
lex_attr_getters[NORM] = add_lookups(Language.Defaults.lex_attr_getters[NORM], BASE_NORMS)
|
||||||
tokenizer_exceptions = update_exc(BASE_EXCEPTIONS)
|
tokenizer_exceptions = update_exc(BASE_EXCEPTIONS)
|
||||||
stop_words = set(STOP_WORDS)
|
stop_words = STOP_WORDS
|
||||||
|
|
||||||
|
|
||||||
class Dutch(Language):
|
class Dutch(Language):
|
||||||
|
|
|
@ -16,7 +16,7 @@ class PolishDefaults(Language.Defaults):
|
||||||
lex_attr_getters[LANG] = lambda text: 'pl'
|
lex_attr_getters[LANG] = lambda text: 'pl'
|
||||||
lex_attr_getters[NORM] = add_lookups(Language.Defaults.lex_attr_getters[NORM], BASE_NORMS)
|
lex_attr_getters[NORM] = add_lookups(Language.Defaults.lex_attr_getters[NORM], BASE_NORMS)
|
||||||
tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS)
|
tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS)
|
||||||
stop_words = set(STOP_WORDS)
|
stop_words = STOP_WORDS
|
||||||
|
|
||||||
|
|
||||||
class Polish(Language):
|
class Polish(Language):
|
||||||
|
|
|
@ -19,8 +19,8 @@ class PortugueseDefaults(Language.Defaults):
|
||||||
lex_attr_getters[NORM] = add_lookups(Language.Defaults.lex_attr_getters[NORM], BASE_NORMS)
|
lex_attr_getters[NORM] = add_lookups(Language.Defaults.lex_attr_getters[NORM], BASE_NORMS)
|
||||||
lex_attr_getters.update(LEX_ATTRS)
|
lex_attr_getters.update(LEX_ATTRS)
|
||||||
tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS)
|
tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS)
|
||||||
stop_words = set(STOP_WORDS)
|
stop_words = STOP_WORDS
|
||||||
lemma_lookup = dict(LOOKUP)
|
lemma_lookup = LOOKUP
|
||||||
|
|
||||||
|
|
||||||
class Portuguese(Language):
|
class Portuguese(Language):
|
||||||
|
|
|
@ -18,9 +18,9 @@ class SwedishDefaults(Language.Defaults):
|
||||||
lex_attr_getters[LANG] = lambda text: 'sv'
|
lex_attr_getters[LANG] = lambda text: 'sv'
|
||||||
lex_attr_getters[NORM] = add_lookups(Language.Defaults.lex_attr_getters[NORM], BASE_NORMS)
|
lex_attr_getters[NORM] = add_lookups(Language.Defaults.lex_attr_getters[NORM], BASE_NORMS)
|
||||||
tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS)
|
tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS)
|
||||||
stop_words = set(STOP_WORDS)
|
stop_words = STOP_WORDS
|
||||||
lemma_rules = dict(LEMMA_RULES)
|
lemma_rules = LEMMA_RULES
|
||||||
lemma_lookup = dict(LOOKUP)
|
lemma_lookup = LOOKUP
|
||||||
|
|
||||||
|
|
||||||
class Swedish(Language):
|
class Swedish(Language):
|
||||||
|
|
|
@ -17,8 +17,8 @@ class ThaiDefaults(Language.Defaults):
|
||||||
lex_attr_getters = dict(Language.Defaults.lex_attr_getters)
|
lex_attr_getters = dict(Language.Defaults.lex_attr_getters)
|
||||||
lex_attr_getters[LANG] = lambda text: 'th'
|
lex_attr_getters[LANG] = lambda text: 'th'
|
||||||
tokenizer_exceptions = dict(TOKENIZER_EXCEPTIONS)
|
tokenizer_exceptions = dict(TOKENIZER_EXCEPTIONS)
|
||||||
tag_map = dict(TAG_MAP)
|
tag_map = TAG_MAP
|
||||||
stop_words = set(STOP_WORDS)
|
stop_words = STOP_WORDS
|
||||||
|
|
||||||
|
|
||||||
class Thai(Language):
|
class Thai(Language):
|
||||||
|
|
Loading…
Reference in New Issue
Block a user