[issue 805] Fix issue

This commit is contained in:
Michael Wallin 2017-02-04 16:22:21 +02:00
parent 35100c8bdd
commit d25556bf80
2 changed files with 4 additions and 3 deletions

View File

@ -5,12 +5,14 @@ from .. import language_data as base
from ..language_data import update_exc, strings_to_exc
from .stop_words import STOP_WORDS
from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS, ORTH_ONLY
STOP_WORDS = set(STOP_WORDS)
TOKENIZER_EXCEPTIONS = strings_to_exc(base.EMOTICONS)
TOKENIZER_EXCEPTIONS = dict(TOKENIZER_EXCEPTIONS)
update_exc(TOKENIZER_EXCEPTIONS, strings_to_exc(ORTH_ONLY))
update_exc(TOKENIZER_EXCEPTIONS, strings_to_exc(base.EMOTICONS))
update_exc(TOKENIZER_EXCEPTIONS, strings_to_exc(base.ABBREVIATIONS))

View File

@ -107,7 +107,6 @@ ORTH_ONLY = [
"p.g.a.",
"ref.",
"resp.",
"s.",
"s.a.s.",
"s.k.",
"st.",