diff --git a/spacy/fr/tokenizer_exceptions.py b/spacy/fr/tokenizer_exceptions.py index 8f8dcf0b0..84a75f7fb 100644 --- a/spacy/fr/tokenizer_exceptions.py +++ b/spacy/fr/tokenizer_exceptions.py @@ -8,7 +8,6 @@ from ..language_data.tokenizer_exceptions import _URL_PATTERN from ..language_data.punctuation import ALPHA_LOWER from .punctuation import ELISION, HYPHENS -from ._tokenizer_exceptions_list import BASE_EXCEPTIONS from ..symbols import * @@ -18,13 +17,9 @@ import re def get_exceptions(): + from ._tokenizer_exceptions_list import BASE_EXCEPTIONS return BASE_EXCEPTIONS - # with io.open(os.path.join(os.path.dirname(__file__), 'resources/tokenizer_exceptions'), - # 'rt', encoding='utf8') as f: - # for line in f: - # yield line.strip('\n') - def upper_first_letter(text): if len(text) == 0: @@ -217,6 +212,4 @@ REGULAR_EXP.append(_URL_PATTERN) TOKEN_MATCH = re.compile('|'.join('(?:{})'.format(m) for m in REGULAR_EXP), re.IGNORECASE).match -#TOKENIZER_EXCEPTIONS = get_tokenizer_exceptions() - __all__ = ["get_tokenizer_exceptions", "TOKEN_MATCH"]