Tidy up [ci skip]

This commit is contained in:
Ines Montani 2020-07-25 13:00:49 +02:00
parent 4a0a692875
commit 49f27a2a7b
2 changed files with 2 additions and 2 deletions

View File

@ -1,5 +1,5 @@
from ..char_classes import LIST_ELLIPSES, LIST_ICONS, HYPHENS from ..char_classes import LIST_ELLIPSES, LIST_ICONS, HYPHENS
from ..char_classes import CONCAT_QUOTES, ALPHA_LOWER, ALPHA_UPPER, ALPHA, PUNCT from ..char_classes import CONCAT_QUOTES, ALPHA_LOWER, ALPHA_UPPER, ALPHA
_infixes = ( _infixes = (
LIST_ELLIPSES LIST_ELLIPSES

View File

@ -151,6 +151,6 @@ for orth in ABBREVIATIONS:
# Sentences ending in "i." (as in "... peka i."), "m." (as in "...än 2000 m."), # Sentences ending in "i." (as in "... peka i."), "m." (as in "...än 2000 m."),
# should be tokenized as two separate tokens. # should be tokenized as two separate tokens.
for orth in ["i", "m"]: for orth in ["i", "m"]:
_exc[orth + "."] = [{ORTH: orth, NORM: orth, NORM: orth}, {ORTH: "."}] _exc[orth + "."] = [{ORTH: orth, NORM: orth}, {ORTH: "."}]
TOKENIZER_EXCEPTIONS = update_exc(BASE_EXCEPTIONS, _exc) TOKENIZER_EXCEPTIONS = update_exc(BASE_EXCEPTIONS, _exc)