Merge branch 'french-tokenizer-exceptions'

2025-12-06 09:44:21 +03:00 · 2017-02-27 11:44:39 +01:00 · 2017-02-27 11:44:39 +01:00 · cc9b2b74e3
commit cc9b2b74e3
parent 34bcc8706d bd4375a2e6
1 changed files with 1 additions and 8 deletions
--- a/spacy/fr/tokenizer_exceptions.py
+++ b/spacy/fr/tokenizer_exceptions.py
@ -8,7 +8,6 @@ from ..language_data.tokenizer_exceptions import _URL_PATTERN
 from ..language_data.punctuation import ALPHA_LOWER

 from .punctuation import ELISION, HYPHENS
-from ._tokenizer_exceptions_list import BASE_EXCEPTIONS

 from ..symbols import *

@ -18,13 +17,9 @@ import re


 def get_exceptions():
+    from ._tokenizer_exceptions_list import BASE_EXCEPTIONS
    return BASE_EXCEPTIONS

-    # with io.open(os.path.join(os.path.dirname(__file__), 'resources/tokenizer_exceptions'),
-    #              'rt', encoding='utf8') as f:
-    #     for line in f:
-    #         yield line.strip('\n')
-

 def upper_first_letter(text):
    if len(text) == 0:
@ -217,6 +212,4 @@ REGULAR_EXP.append(_URL_PATTERN)

 TOKEN_MATCH = re.compile('|'.join('(?:{})'.format(m) for m in REGULAR_EXP), re.IGNORECASE).match

-#TOKENIZER_EXCEPTIONS = get_tokenizer_exceptions()
-
 __all__ = ["get_tokenizer_exceptions", "TOKEN_MATCH"]