diff --git a/spacy/sv/tokenizer_exceptions.py b/spacy/sv/tokenizer_exceptions.py new file mode 100644 index 000000000..6cf144b44 --- /dev/null +++ b/spacy/sv/tokenizer_exceptions.py @@ -0,0 +1,58 @@ +# encoding: utf8 +from __future__ import unicode_literals + +from ..symbols import * +from ..language_data import PRON_LEMMA + + +TOKENIZER_EXCEPTIONS = { + +} + + +ORTH_ONLY = [ + "ang.", + "anm.", + "bil.", + "bl.a.", + "dvs.", + "e.Kr.", + "el.", + "e.d.", + "eng.", + "etc.", + "exkl.", + "f.d.", + "fid.", + "f.Kr.", + "forts.", + "fr.o.m.", + "f.ö.", + "förf.", + "inkl.", + "jur.", + "kl.", + "kr.", + "lat.", + "m.a.o.", + "max.", + "m.fl.", + "min.", + "m.m.", + "obs.", + "o.d.", + "osv.", + "p.g.a.", + "ref.", + "resp.", + "s.", + "s.a.s.", + "s.k.", + "st.", + "s:t", + "t.ex.", + "t.o.m.", + "ung.", + "äv.", + "övers." +]