From d48ddd6c9aa983f922d3f310eeba6a272d4c8cbd Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Thu, 1 Oct 2020 21:54:33 +0200 Subject: [PATCH] Remove default initialize lookups --- spacy/lang/da/__init__.py | 12 ------------ spacy/lang/de/__init__.py | 12 ------------ spacy/lang/el/__init__.py | 12 ------------ spacy/lang/id/__init__.py | 12 ------------ spacy/lang/lb/__init__.py | 12 ------------ spacy/lang/pt/__init__.py | 12 ------------ spacy/lang/ru/__init__.py | 12 ------------ spacy/lang/sr/__init__.py | 12 ------------ spacy/lang/ta/__init__.py | 12 ------------ spacy/lang/th/__init__.py | 7 ------- spacy/tests/parser/test_ner.py | 1 - 11 files changed, 116 deletions(-) diff --git a/spacy/lang/da/__init__.py b/spacy/lang/da/__init__.py index 7128338af..8cac30b26 100644 --- a/spacy/lang/da/__init__.py +++ b/spacy/lang/da/__init__.py @@ -3,21 +3,9 @@ from .punctuation import TOKENIZER_INFIXES, TOKENIZER_SUFFIXES from .stop_words import STOP_WORDS from .lex_attrs import LEX_ATTRS from ...language import Language -from ...util import load_config_from_str - - -DEFAULT_CONFIG = """ -[initialize] - -[initialize.lookups] -@misc = "spacy.LookupsDataLoader.v1" -lang = ${nlp.lang} -tables = ["lexeme_norm"] -""" class DanishDefaults(Language.Defaults): - config = load_config_from_str(DEFAULT_CONFIG) tokenizer_exceptions = TOKENIZER_EXCEPTIONS infixes = TOKENIZER_INFIXES suffixes = TOKENIZER_SUFFIXES diff --git a/spacy/lang/de/__init__.py b/spacy/lang/de/__init__.py index 99c161961..b645d3480 100644 --- a/spacy/lang/de/__init__.py +++ b/spacy/lang/de/__init__.py @@ -3,21 +3,9 @@ from .punctuation import TOKENIZER_PREFIXES, TOKENIZER_SUFFIXES, TOKENIZER_INFIX from .stop_words import STOP_WORDS from .syntax_iterators import SYNTAX_ITERATORS from ...language import Language -from ...util import load_config_from_str - - -DEFAULT_CONFIG = """ -[initialize] - -[initialize.lookups] -@misc = "spacy.LookupsDataLoader.v1" -lang = ${nlp.lang} -tables = ["lexeme_norm"] -""" class GermanDefaults(Language.Defaults): - config = load_config_from_str(DEFAULT_CONFIG) tokenizer_exceptions = TOKENIZER_EXCEPTIONS prefixes = TOKENIZER_PREFIXES suffixes = TOKENIZER_SUFFIXES diff --git a/spacy/lang/el/__init__.py b/spacy/lang/el/__init__.py index 818405842..1a7b19914 100644 --- a/spacy/lang/el/__init__.py +++ b/spacy/lang/el/__init__.py @@ -9,21 +9,9 @@ from .punctuation import TOKENIZER_PREFIXES, TOKENIZER_SUFFIXES, TOKENIZER_INFIX from .lemmatizer import GreekLemmatizer from ...lookups import Lookups from ...language import Language -from ...util import load_config_from_str - - -DEFAULT_CONFIG = """ -[initialize] - -[initialize.lookups] -@misc = "spacy.LookupsDataLoader.v1" -lang = ${nlp.lang} -tables = ["lexeme_norm"] -""" class GreekDefaults(Language.Defaults): - config = load_config_from_str(DEFAULT_CONFIG) tokenizer_exceptions = TOKENIZER_EXCEPTIONS prefixes = TOKENIZER_PREFIXES suffixes = TOKENIZER_SUFFIXES diff --git a/spacy/lang/id/__init__.py b/spacy/lang/id/__init__.py index 46bef57ca..87373551c 100644 --- a/spacy/lang/id/__init__.py +++ b/spacy/lang/id/__init__.py @@ -4,21 +4,9 @@ from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS from .lex_attrs import LEX_ATTRS from .syntax_iterators import SYNTAX_ITERATORS from ...language import Language -from ...util import load_config_from_str - - -DEFAULT_CONFIG = """ -[initialize] - -[initialize.lookups] -@misc = "spacy.LookupsDataLoader.v1" -lang = ${nlp.lang} -tables = ["lexeme_norm"] -""" class IndonesianDefaults(Language.Defaults): - config = load_config_from_str(DEFAULT_CONFIG) tokenizer_exceptions = TOKENIZER_EXCEPTIONS prefixes = TOKENIZER_PREFIXES suffixes = TOKENIZER_SUFFIXES diff --git a/spacy/lang/lb/__init__.py b/spacy/lang/lb/__init__.py index ead5f5d10..da6fe55d7 100644 --- a/spacy/lang/lb/__init__.py +++ b/spacy/lang/lb/__init__.py @@ -3,21 +3,9 @@ from .punctuation import TOKENIZER_INFIXES from .lex_attrs import LEX_ATTRS from .stop_words import STOP_WORDS from ...language import Language -from ...util import load_config_from_str - - -DEFAULT_CONFIG = """ -[initialize] - -[initialize.lookups] -@misc = "spacy.LookupsDataLoader.v1" -lang = ${nlp.lang} -tables = ["lexeme_norm"] -""" class LuxembourgishDefaults(Language.Defaults): - config = load_config_from_str(DEFAULT_CONFIG) tokenizer_exceptions = TOKENIZER_EXCEPTIONS infixes = TOKENIZER_INFIXES lex_attr_getters = LEX_ATTRS diff --git a/spacy/lang/pt/__init__.py b/spacy/lang/pt/__init__.py index 1c95c11d9..0447099f0 100644 --- a/spacy/lang/pt/__init__.py +++ b/spacy/lang/pt/__init__.py @@ -3,21 +3,9 @@ from .stop_words import STOP_WORDS from .lex_attrs import LEX_ATTRS from .punctuation import TOKENIZER_INFIXES, TOKENIZER_PREFIXES from ...language import Language -from ...util import load_config_from_str - - -DEFAULT_CONFIG = """ -[initialize] - -[initialize.lookups] -@misc = "spacy.LookupsDataLoader.v1" -lang = ${nlp.lang} -tables = ["lexeme_norm"] -""" class PortugueseDefaults(Language.Defaults): - config = load_config_from_str(DEFAULT_CONFIG) tokenizer_exceptions = TOKENIZER_EXCEPTIONS infixes = TOKENIZER_INFIXES prefixes = TOKENIZER_PREFIXES diff --git a/spacy/lang/ru/__init__.py b/spacy/lang/ru/__init__.py index 857e197e9..6436ae0c7 100644 --- a/spacy/lang/ru/__init__.py +++ b/spacy/lang/ru/__init__.py @@ -7,21 +7,9 @@ from .lex_attrs import LEX_ATTRS from .lemmatizer import RussianLemmatizer from ...language import Language from ...lookups import Lookups -from ...util import load_config_from_str - - -DEFAULT_CONFIG = """ -[initialize] - -[initialize.lookups] -@misc = "spacy.LookupsDataLoader.v1" -lang = ${nlp.lang} -tables = ["lexeme_norm"] -""" class RussianDefaults(Language.Defaults): - config = load_config_from_str(DEFAULT_CONFIG) tokenizer_exceptions = TOKENIZER_EXCEPTIONS lex_attr_getters = LEX_ATTRS stop_words = STOP_WORDS diff --git a/spacy/lang/sr/__init__.py b/spacy/lang/sr/__init__.py index 5da19c6f3..165e54975 100644 --- a/spacy/lang/sr/__init__.py +++ b/spacy/lang/sr/__init__.py @@ -2,21 +2,9 @@ from .stop_words import STOP_WORDS from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS from .lex_attrs import LEX_ATTRS from ...language import Language -from ...util import load_config_from_str - - -DEFAULT_CONFIG = """ -[initialize] - -[initialize.lookups] -@misc = "spacy.LookupsDataLoader.v1" -lang = ${nlp.lang} -tables = ["lexeme_norm"] -""" class SerbianDefaults(Language.Defaults): - config = load_config_from_str(DEFAULT_CONFIG) tokenizer_exceptions = TOKENIZER_EXCEPTIONS lex_attr_getters = LEX_ATTRS stop_words = STOP_WORDS diff --git a/spacy/lang/ta/__init__.py b/spacy/lang/ta/__init__.py index 7a5a3ac8f..ac5fc7124 100644 --- a/spacy/lang/ta/__init__.py +++ b/spacy/lang/ta/__init__.py @@ -1,21 +1,9 @@ from .stop_words import STOP_WORDS from .lex_attrs import LEX_ATTRS from ...language import Language -from ...util import load_config_from_str - - -DEFAULT_CONFIG = """ -[initialize] - -[initialize.lookups] -@misc = "spacy.LookupsDataLoader.v1" -lang = ${nlp.lang} -tables = ["lexeme_norm"] -""" class TamilDefaults(Language.Defaults): - config = load_config_from_str(DEFAULT_CONFIG) lex_attr_getters = LEX_ATTRS stop_words = STOP_WORDS diff --git a/spacy/lang/th/__init__.py b/spacy/lang/th/__init__.py index 834fe1871..219c50c1a 100644 --- a/spacy/lang/th/__init__.py +++ b/spacy/lang/th/__init__.py @@ -10,13 +10,6 @@ DEFAULT_CONFIG = """ [nlp.tokenizer] @tokenizers = "spacy.th.ThaiTokenizer" - -[initialize] - -[initialize.lookups] -@misc = "spacy.LookupsDataLoader.v1" -lang = ${nlp.lang} -tables = ["lexeme_norm"] """ diff --git a/spacy/tests/parser/test_ner.py b/spacy/tests/parser/test_ner.py index 78a20c1e8..b657ae2e8 100644 --- a/spacy/tests/parser/test_ner.py +++ b/spacy/tests/parser/test_ner.py @@ -339,7 +339,6 @@ def test_ner_warns_no_lookups(caplog): nlp.vocab.lookups = Lookups() assert not len(nlp.vocab.lookups) nlp.add_pipe("ner") - nlp.config["initialize"]["lookups"] = None with caplog.at_level(logging.DEBUG): nlp.initialize() assert "W033" in caplog.text