Merge branch 'french-tokenizer-exceptions'

2025-10-24 12:41:23 +03:00 · 2017-02-27 11:44:39 +01:00 · 2017-02-27 11:44:39 +01:00 · cc9b2b74e3
commit cc9b2b74e3
parent 34bcc8706d bd4375a2e6
1 changed files with 1 additions and 8 deletions
--- a/spacy/fr/tokenizer_exceptions.py
+++ b/spacy/fr/tokenizer_exceptions.py
@ -8,7 +8,6 @@ from ..language_data.tokenizer_exceptions import _URL_PATTERN
 from ..language_data.punctuation import ALPHA_LOWER
 from .punctuation import ELISION, HYPHENS
 from ._tokenizer_exceptions_list import BASE_EXCEPTIONS
 from ..symbols import *
@ -18,13 +17,9 @@ import re
 def get_exceptions():
    from ._tokenizer_exceptions_list import BASE_EXCEPTIONS
    return BASE_EXCEPTIONS
    # with io.open(os.path.join(os.path.dirname(__file__), 'resources/tokenizer_exceptions'),
    #              'rt', encoding='utf8') as f:
    #     for line in f:
    #         yield line.strip('\n')
 def upper_first_letter(text):
    if len(text) == 0:
@ -217,6 +212,4 @@ REGULAR_EXP.append(_URL_PATTERN)
 TOKEN_MATCH = re.compile('|'.join('(?:{})'.format(m) for m in REGULAR_EXP), re.IGNORECASE).match
 #TOKENIZER_EXCEPTIONS = get_tokenizer_exceptions()
 __all__ = ["get_tokenizer_exceptions", "TOKEN_MATCH"]