From 8ce6f96180ab37f7f4ec0676868b0d8b3ae18787 Mon Sep 17 00:00:00 2001 From: ines Date: Wed, 11 Oct 2017 15:34:55 +0200 Subject: [PATCH] Don't make copies of language data components --- spacy/lang/bn/__init__.py | 12 ++++++------ spacy/lang/da/__init__.py | 2 +- spacy/lang/de/__init__.py | 10 +++++----- spacy/lang/en/__init__.py | 16 ++++++++-------- spacy/lang/es/__init__.py | 8 ++++---- spacy/lang/fi/__init__.py | 2 +- spacy/lang/fr/__init__.py | 10 +++++----- spacy/lang/he/__init__.py | 2 +- spacy/lang/hu/__init__.py | 10 +++++----- spacy/lang/id/__init__.py | 12 ++++++------ spacy/lang/it/__init__.py | 4 ++-- spacy/lang/nb/__init__.py | 2 +- spacy/lang/nl/__init__.py | 2 +- spacy/lang/pl/__init__.py | 2 +- spacy/lang/pt/__init__.py | 4 ++-- spacy/lang/sv/__init__.py | 6 +++--- spacy/lang/th/__init__.py | 4 ++-- 17 files changed, 54 insertions(+), 54 deletions(-) diff --git a/spacy/lang/bn/__init__.py b/spacy/lang/bn/__init__.py index 1a76123ea..ff560afae 100644 --- a/spacy/lang/bn/__init__.py +++ b/spacy/lang/bn/__init__.py @@ -17,12 +17,12 @@ class BengaliDefaults(Language.Defaults): lex_attr_getters = dict(Language.Defaults.lex_attr_getters) lex_attr_getters[LANG] = lambda text: 'bn' tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS) - tag_map = dict(TAG_MAP) - stop_words = set(STOP_WORDS) - lemma_rules = dict(LEMMA_RULES) - prefixes = tuple(TOKENIZER_PREFIXES) - suffixes = tuple(TOKENIZER_SUFFIXES) - infixes = tuple(TOKENIZER_INFIXES) + tag_map = TAG_MAP + stop_words = STOP_WORDS + lemma_rules = LEMMA_RULES + prefixes = TOKENIZER_PREFIXES + suffixes = TOKENIZER_SUFFIXES + infixes = TOKENIZER_INFIXES class Bengali(Language): diff --git a/spacy/lang/da/__init__.py b/spacy/lang/da/__init__.py index b255a04b9..86e47c00d 100644 --- a/spacy/lang/da/__init__.py +++ b/spacy/lang/da/__init__.py @@ -16,7 +16,7 @@ class DanishDefaults(Language.Defaults): lex_attr_getters[LANG] = lambda text: 'da' lex_attr_getters[NORM] = add_lookups(Language.Defaults.lex_attr_getters[NORM], BASE_NORMS) tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS) - stop_words = set(STOP_WORDS) + stop_words = STOP_WORDS class Danish(Language): diff --git a/spacy/lang/de/__init__.py b/spacy/lang/de/__init__.py index e56bab844..e8e7a12db 100644 --- a/spacy/lang/de/__init__.py +++ b/spacy/lang/de/__init__.py @@ -22,11 +22,11 @@ class GermanDefaults(Language.Defaults): lex_attr_getters[NORM] = add_lookups(Language.Defaults.lex_attr_getters[NORM], NORM_EXCEPTIONS, BASE_NORMS) tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS) - infixes = tuple(TOKENIZER_INFIXES) - tag_map = dict(TAG_MAP) - stop_words = set(STOP_WORDS) - syntax_iterators = dict(SYNTAX_ITERATORS) - lemma_lookup = dict(LOOKUP) + infixes = TOKENIZER_INFIXES + tag_map = TAG_MAP + stop_words = STOP_WORDS + syntax_iterators = SYNTAX_ITERATORS + lemma_lookup = LOOKUP class German(Language): diff --git a/spacy/lang/en/__init__.py b/spacy/lang/en/__init__.py index fffac6467..63fd9c2b4 100644 --- a/spacy/lang/en/__init__.py +++ b/spacy/lang/en/__init__.py @@ -24,14 +24,14 @@ class EnglishDefaults(Language.Defaults): lex_attr_getters[NORM] = add_lookups(Language.Defaults.lex_attr_getters[NORM], BASE_NORMS, NORM_EXCEPTIONS) tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS) - tag_map = dict(TAG_MAP) - stop_words = set(STOP_WORDS) - morph_rules = dict(MORPH_RULES) - lemma_rules = dict(LEMMA_RULES) - lemma_index = dict(LEMMA_INDEX) - lemma_exc = dict(LEMMA_EXC) - lemma_lookup = dict(LOOKUP) - syntax_iterators = dict(SYNTAX_ITERATORS) + tag_map = TAG_MAP + stop_words = STOP_WORDS + morph_rules = MORPH_RULES + lemma_rules = LEMMA_RULES + lemma_index = LEMMA_INDEX + lemma_exc = LEMMA_EXC + lemma_lookup = LOOKUP + syntax_iterators = SYNTAX_ITERATORS class English(Language): diff --git a/spacy/lang/es/__init__.py b/spacy/lang/es/__init__.py index 4246a0703..661f0bbec 100644 --- a/spacy/lang/es/__init__.py +++ b/spacy/lang/es/__init__.py @@ -19,10 +19,10 @@ class SpanishDefaults(Language.Defaults): lex_attr_getters[LANG] = lambda text: 'es' lex_attr_getters[NORM] = add_lookups(Language.Defaults.lex_attr_getters[NORM], BASE_NORMS) tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS) - tag_map = dict(TAG_MAP) - stop_words = set(STOP_WORDS) - sytax_iterators = dict(SYNTAX_ITERATORS) - lemma_lookup = dict(LOOKUP) + tag_map = TAG_MAP + stop_words = STOP_WORDS + sytax_iterators = SYNTAX_ITERATORS + lemma_lookup = LOOKUP class Spanish(Language): diff --git a/spacy/lang/fi/__init__.py b/spacy/lang/fi/__init__.py index 2eb40851b..7f74495c5 100644 --- a/spacy/lang/fi/__init__.py +++ b/spacy/lang/fi/__init__.py @@ -16,7 +16,7 @@ class FinnishDefaults(Language.Defaults): lex_attr_getters[LANG] = lambda text: 'fi' lex_attr_getters[NORM] = add_lookups(Language.Defaults.lex_attr_getters[NORM], BASE_NORMS) tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS) - stop_words = set(STOP_WORDS) + stop_words = STOP_WORDS class Finnish(Language): diff --git a/spacy/lang/fr/__init__.py b/spacy/lang/fr/__init__.py index 0f2a60e3e..42acd0736 100644 --- a/spacy/lang/fr/__init__.py +++ b/spacy/lang/fr/__init__.py @@ -21,12 +21,12 @@ class FrenchDefaults(Language.Defaults): lex_attr_getters[LANG] = lambda text: 'fr' lex_attr_getters[NORM] = add_lookups(Language.Defaults.lex_attr_getters[NORM], BASE_NORMS) tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS) - stop_words = set(STOP_WORDS) - infixes = tuple(TOKENIZER_INFIXES) - suffixes = tuple(TOKENIZER_SUFFIXES) + stop_words = STOP_WORDS + infixes = TOKENIZER_INFIXES + suffixes = TOKENIZER_SUFFIXES token_match = TOKEN_MATCH - syntax_iterators = dict(SYNTAX_ITERATORS) - lemma_lookup = dict(LOOKUP) + syntax_iterators = SYNTAX_ITERATORS + lemma_lookup = LOOKUP class French(Language): diff --git a/spacy/lang/he/__init__.py b/spacy/lang/he/__init__.py index b815b3273..807794fee 100644 --- a/spacy/lang/he/__init__.py +++ b/spacy/lang/he/__init__.py @@ -13,7 +13,7 @@ class HebrewDefaults(Language.Defaults): lex_attr_getters = dict(Language.Defaults.lex_attr_getters) lex_attr_getters[LANG] = lambda text: 'he' tokenizer_exceptions = update_exc(BASE_EXCEPTIONS) - stop_words = set(STOP_WORDS) + stop_words = STOP_WORDS class Hebrew(Language): diff --git a/spacy/lang/hu/__init__.py b/spacy/lang/hu/__init__.py index fd039a8eb..35b047900 100644 --- a/spacy/lang/hu/__init__.py +++ b/spacy/lang/hu/__init__.py @@ -18,12 +18,12 @@ class HungarianDefaults(Language.Defaults): lex_attr_getters[LANG] = lambda text: 'hu' lex_attr_getters[NORM] = add_lookups(Language.Defaults.lex_attr_getters[NORM], BASE_NORMS) tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS) - stop_words = set(STOP_WORDS) - prefixes = tuple(TOKENIZER_PREFIXES) - suffixes = tuple(TOKENIZER_SUFFIXES) - infixes = tuple(TOKENIZER_INFIXES) + stop_words = STOP_WORDS + prefixes = TOKENIZER_PREFIXES + suffixes = TOKENIZER_SUFFIXES + infixes = TOKENIZER_INFIXES token_match = TOKEN_MATCH - lemma_lookup = dict(LOOKUP) + lemma_lookup = LOOKUP class Hungarian(Language): diff --git a/spacy/lang/id/__init__.py b/spacy/lang/id/__init__.py index 29fe86a01..2f21e73cf 100644 --- a/spacy/lang/id/__init__.py +++ b/spacy/lang/id/__init__.py @@ -20,12 +20,12 @@ class IndonesianDefaults(Language.Defaults): lex_attr_getters[LANG] = lambda text: 'id' lex_attr_getters.update(LEX_ATTRS) tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS) - stop_words = set(STOP_WORDS) - prefixes = tuple(TOKENIZER_PREFIXES) - suffixes = tuple(TOKENIZER_SUFFIXES) - infixes = tuple(TOKENIZER_INFIXES) - syntax_iterators = dict(SYNTAX_ITERATORS) - lemma_lookup = dict(LOOKUP) + stop_words = STOP_WORDS + prefixes = TOKENIZER_PREFIXES + suffixes = TOKENIZER_SUFFIXES + infixes = TOKENIZER_INFIXES + syntax_iterators = SYNTAX_ITERATORS + lemma_lookup = LOOKUP class Indonesian(Language): diff --git a/spacy/lang/it/__init__.py b/spacy/lang/it/__init__.py index c19cb6d39..6bc47ce92 100644 --- a/spacy/lang/it/__init__.py +++ b/spacy/lang/it/__init__.py @@ -16,8 +16,8 @@ class ItalianDefaults(Language.Defaults): lex_attr_getters[LANG] = lambda text: 'it' lex_attr_getters[NORM] = add_lookups(Language.Defaults.lex_attr_getters[NORM], BASE_NORMS) tokenizer_exceptions = update_exc(BASE_EXCEPTIONS) - stop_words = set(STOP_WORDS) - lemma_lookup = dict(LOOKUP) + stop_words = STOP_WORDS + lemma_lookup = LOOKUP class Italian(Language): diff --git a/spacy/lang/nb/__init__.py b/spacy/lang/nb/__init__.py index 8804f7424..4250e6809 100644 --- a/spacy/lang/nb/__init__.py +++ b/spacy/lang/nb/__init__.py @@ -17,7 +17,7 @@ class NorwegianDefaults(Language.Defaults): lex_attr_getters[LANG] = lambda text: 'nb' lex_attr_getters[NORM] = add_lookups(Language.Defaults.lex_attr_getters[NORM], BASE_NORMS) tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS) - stop_words = set(STOP_WORDS) + stop_words = STOP_WORDS class Norwegian(Language): diff --git a/spacy/lang/nl/__init__.py b/spacy/lang/nl/__init__.py index 29cbb4617..13786a7bc 100644 --- a/spacy/lang/nl/__init__.py +++ b/spacy/lang/nl/__init__.py @@ -17,7 +17,7 @@ class DutchDefaults(Language.Defaults): lex_attr_getters[LANG] = lambda text: 'nl' lex_attr_getters[NORM] = add_lookups(Language.Defaults.lex_attr_getters[NORM], BASE_NORMS) tokenizer_exceptions = update_exc(BASE_EXCEPTIONS) - stop_words = set(STOP_WORDS) + stop_words = STOP_WORDS class Dutch(Language): diff --git a/spacy/lang/pl/__init__.py b/spacy/lang/pl/__init__.py index 22e103246..80011f9d8 100644 --- a/spacy/lang/pl/__init__.py +++ b/spacy/lang/pl/__init__.py @@ -16,7 +16,7 @@ class PolishDefaults(Language.Defaults): lex_attr_getters[LANG] = lambda text: 'pl' lex_attr_getters[NORM] = add_lookups(Language.Defaults.lex_attr_getters[NORM], BASE_NORMS) tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS) - stop_words = set(STOP_WORDS) + stop_words = STOP_WORDS class Polish(Language): diff --git a/spacy/lang/pt/__init__.py b/spacy/lang/pt/__init__.py index 6366a25c1..2a8323597 100644 --- a/spacy/lang/pt/__init__.py +++ b/spacy/lang/pt/__init__.py @@ -19,8 +19,8 @@ class PortugueseDefaults(Language.Defaults): lex_attr_getters[NORM] = add_lookups(Language.Defaults.lex_attr_getters[NORM], BASE_NORMS) lex_attr_getters.update(LEX_ATTRS) tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS) - stop_words = set(STOP_WORDS) - lemma_lookup = dict(LOOKUP) + stop_words = STOP_WORDS + lemma_lookup = LOOKUP class Portuguese(Language): diff --git a/spacy/lang/sv/__init__.py b/spacy/lang/sv/__init__.py index 27da9024e..224c105d7 100644 --- a/spacy/lang/sv/__init__.py +++ b/spacy/lang/sv/__init__.py @@ -18,9 +18,9 @@ class SwedishDefaults(Language.Defaults): lex_attr_getters[LANG] = lambda text: 'sv' lex_attr_getters[NORM] = add_lookups(Language.Defaults.lex_attr_getters[NORM], BASE_NORMS) tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS) - stop_words = set(STOP_WORDS) - lemma_rules = dict(LEMMA_RULES) - lemma_lookup = dict(LOOKUP) + stop_words = STOP_WORDS + lemma_rules = LEMMA_RULES + lemma_lookup = LOOKUP class Swedish(Language): diff --git a/spacy/lang/th/__init__.py b/spacy/lang/th/__init__.py index e640fc4ef..bedec46c8 100644 --- a/spacy/lang/th/__init__.py +++ b/spacy/lang/th/__init__.py @@ -17,8 +17,8 @@ class ThaiDefaults(Language.Defaults): lex_attr_getters = dict(Language.Defaults.lex_attr_getters) lex_attr_getters[LANG] = lambda text: 'th' tokenizer_exceptions = dict(TOKENIZER_EXCEPTIONS) - tag_map = dict(TAG_MAP) - stop_words = set(STOP_WORDS) + tag_map = TAG_MAP + stop_words = STOP_WORDS class Thai(Language):