From e7e22d8be65265573edad3b8fcaca80e77c9038d Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Mon, 27 Feb 2017 11:34:48 +0100 Subject: [PATCH 1/2] Move import within get_exceptions() function, to speed import --- spacy/fr/tokenizer_exceptions.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/spacy/fr/tokenizer_exceptions.py b/spacy/fr/tokenizer_exceptions.py index 8f8dcf0b0..e959d05f4 100644 --- a/spacy/fr/tokenizer_exceptions.py +++ b/spacy/fr/tokenizer_exceptions.py @@ -8,7 +8,6 @@ from ..language_data.tokenizer_exceptions import _URL_PATTERN from ..language_data.punctuation import ALPHA_LOWER from .punctuation import ELISION, HYPHENS -from ._tokenizer_exceptions_list import BASE_EXCEPTIONS from ..symbols import * @@ -18,13 +17,9 @@ import re def get_exceptions(): + from ._tokenizer_exceptions_list import BASE_EXCEPTIONS return BASE_EXCEPTIONS - # with io.open(os.path.join(os.path.dirname(__file__), 'resources/tokenizer_exceptions'), - # 'rt', encoding='utf8') as f: - # for line in f: - # yield line.strip('\n') - def upper_first_letter(text): if len(text) == 0: From bd4375a2e64522444806e4194b04b04932347d97 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Mon, 27 Feb 2017 11:44:26 +0100 Subject: [PATCH 2/2] Remove comment --- spacy/fr/tokenizer_exceptions.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/spacy/fr/tokenizer_exceptions.py b/spacy/fr/tokenizer_exceptions.py index e959d05f4..84a75f7fb 100644 --- a/spacy/fr/tokenizer_exceptions.py +++ b/spacy/fr/tokenizer_exceptions.py @@ -212,6 +212,4 @@ REGULAR_EXP.append(_URL_PATTERN) TOKEN_MATCH = re.compile('|'.join('(?:{})'.format(m) for m in REGULAR_EXP), re.IGNORECASE).match -#TOKENIZER_EXCEPTIONS = get_tokenizer_exceptions() - __all__ = ["get_tokenizer_exceptions", "TOKEN_MATCH"]