diff --git a/spacy/language_data/__init__.py b/spacy/language_data/__init__.py index f6aa4317c..43a4ef0be 100644 --- a/spacy/language_data/__init__.py +++ b/spacy/language_data/__init__.py @@ -1,3 +1,4 @@ +from .abbreviations import * from .emoticons import * from .punctuation import * from .tag_map import * diff --git a/spacy/language_data/abbreviations.py b/spacy/language_data/abbreviations.py new file mode 100644 index 000000000..b49daa0ad --- /dev/null +++ b/spacy/language_data/abbreviations.py @@ -0,0 +1,43 @@ +# encoding: utf8 +from __future__ import unicode_literals + + +ABBREVIATIONS = [ + "'", + "\\\")", + "", + "''", + "C++", + "a.", + "b.", + "c.", + "d.", + "e.", + "f.", + "g.", + "h.", + "i.", + "j.", + "k.", + "l.", + "m.", + "n.", + "o.", + "p.", + "q.", + "r.", + "s.", + "t.", + "u.", + "v.", + "w.", + "x.", + "y.", + "z.", + "ä.", + "ö.", + "ü." +] + + +__all__ = [ "ABBREVIATIONS" ]