From 7c3cb2a6521d19cba965d8cb86fcf3c70dced720 Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Sun, 8 Jan 2017 20:34:03 +0100 Subject: [PATCH] Add global abbreviations data --- spacy/language_data/__init__.py | 1 + spacy/language_data/abbreviations.py | 43 ++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+) create mode 100644 spacy/language_data/abbreviations.py diff --git a/spacy/language_data/__init__.py b/spacy/language_data/__init__.py index f6aa4317c..43a4ef0be 100644 --- a/spacy/language_data/__init__.py +++ b/spacy/language_data/__init__.py @@ -1,3 +1,4 @@ +from .abbreviations import * from .emoticons import * from .punctuation import * from .tag_map import * diff --git a/spacy/language_data/abbreviations.py b/spacy/language_data/abbreviations.py new file mode 100644 index 000000000..b49daa0ad --- /dev/null +++ b/spacy/language_data/abbreviations.py @@ -0,0 +1,43 @@ +# encoding: utf8 +from __future__ import unicode_literals + + +ABBREVIATIONS = [ + "'", + "\\\")", + "", + "''", + "C++", + "a.", + "b.", + "c.", + "d.", + "e.", + "f.", + "g.", + "h.", + "i.", + "j.", + "k.", + "l.", + "m.", + "n.", + "o.", + "p.", + "q.", + "r.", + "s.", + "t.", + "u.", + "v.", + "w.", + "x.", + "y.", + "z.", + "ä.", + "ö.", + "ü." +] + + +__all__ = [ "ABBREVIATIONS" ]