diff --git a/spacy/fr/__init__.py b/spacy/fr/__init__.py index 974f882ca..9e4735a1f 100644 --- a/spacy/fr/__init__.py +++ b/spacy/fr/__init__.py @@ -6,7 +6,6 @@ from ..attrs import LANG from .language_data import * from .punctuation import TOKENIZER_INFIXES, TOKENIZER_SUFFIXES -from .tokenizer_exceptions import get_tokenizer_exceptions, TOKEN_MATCH class FrenchDefaults(BaseDefaults): @@ -20,7 +19,7 @@ class FrenchDefaults(BaseDefaults): @classmethod def create_tokenizer(cls, nlp=None): - cls.tokenizer_exceptions = get_tokenizer_exceptions() + cls.tokenizer_exceptions = TOKENIZER_EXCEPTIONS return super(FrenchDefaults, cls).create_tokenizer(nlp) diff --git a/spacy/fr/language_data.py b/spacy/fr/language_data.py index 7dbe4a02b..9d25644b7 100644 --- a/spacy/fr/language_data.py +++ b/spacy/fr/language_data.py @@ -2,9 +2,10 @@ from __future__ import unicode_literals from .stop_words import STOP_WORDS +from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS, TOKEN_MATCH STOP_WORDS = set(STOP_WORDS) -__all__ = ["STOP_WORDS"] +__all__ = ["STOP_WORDS", "TOKENIZER_EXCEPTIONS", "TOKEN_MATCH"] diff --git a/spacy/fr/tokenizer_exceptions.py b/spacy/fr/tokenizer_exceptions.py index 81a752755..342cb0e41 100644 --- a/spacy/fr/tokenizer_exceptions.py +++ b/spacy/fr/tokenizer_exceptions.py @@ -214,4 +214,6 @@ REGULAR_EXP.append(_URL_PATTERN) TOKEN_MATCH = re.compile('|'.join('({})'.format(m) for m in REGULAR_EXP), re.IGNORECASE).match -__all__ = ("get_tokenizer_exceptions", "TOKEN_MATCH") +TOKENIZER_EXCEPTIONS = get_tokenizer_exceptions() + +__all__ = ["TOKENIZER_EXCEPTIONS", "TOKEN_MATCH"]