diff --git a/spacy/lang/en/punctuation.py b/spacy/lang/en/punctuation.py index 67e3e80e5..5d3eb792e 100644 --- a/spacy/lang/en/punctuation.py +++ b/spacy/lang/en/punctuation.py @@ -1,5 +1,5 @@ from ..char_classes import LIST_ELLIPSES, LIST_ICONS, HYPHENS -from ..char_classes import CONCAT_QUOTES, ALPHA_LOWER, ALPHA_UPPER, ALPHA, PUNCT +from ..char_classes import CONCAT_QUOTES, ALPHA_LOWER, ALPHA_UPPER, ALPHA _infixes = ( LIST_ELLIPSES diff --git a/spacy/lang/sv/tokenizer_exceptions.py b/spacy/lang/sv/tokenizer_exceptions.py index 64206f2f2..ce7db895a 100644 --- a/spacy/lang/sv/tokenizer_exceptions.py +++ b/spacy/lang/sv/tokenizer_exceptions.py @@ -151,6 +151,6 @@ for orth in ABBREVIATIONS: # Sentences ending in "i." (as in "... peka i."), "m." (as in "...än 2000 m."), # should be tokenized as two separate tokens. for orth in ["i", "m"]: - _exc[orth + "."] = [{ORTH: orth, NORM: orth, NORM: orth}, {ORTH: "."}] + _exc[orth + "."] = [{ORTH: orth, NORM: orth}, {ORTH: "."}] TOKENIZER_EXCEPTIONS = update_exc(BASE_EXCEPTIONS, _exc)