From 4a1e206064d18180ce1b1fa045f5a7dbae3b001c Mon Sep 17 00:00:00 2001
From: Ines Montani <ines@ines.io>
Date: Wed, 7 Dec 2016 20:29:35 +0100
Subject: [PATCH] Remove old lang_data directory

---
 lang_data/de/abbrev.de.tab        |  319 --
 lang_data/de/gazetteer.json       |  194 --
 lang_data/de/generate_specials.py |  334 --
 lang_data/de/infix.txt            |    6 -
 lang_data/de/lemma_rules.json     |    1 -
 lang_data/de/morphs.json          |   71 -
 lang_data/de/prefix.txt           |   27 -
 lang_data/de/sample.txt           |    3 -
 lang_data/de/specials.json        | 1483 ---------
 lang_data/de/suffix.txt           |   73 -
 lang_data/de/tag_map.json         |   59 -
 lang_data/en/LICENSE              |   20 -
 lang_data/en/gazetteer.json       |  194 --
 lang_data/en/generate_specials.py |  422 ---
 lang_data/en/infix.txt            |    6 -
 lang_data/en/lemma_rules.json     |   38 -
 lang_data/en/morphs.json          |   59 -
 lang_data/en/prefix.txt           |   21 -
 lang_data/en/specials.json        | 4924 -----------------------------
 lang_data/en/suffix.txt           |   26 -
 lang_data/en/tag_map.json         |   60 -
 lang_data/fi/infix.txt            |    3 -
 lang_data/fi/lemma_rules.json     |    1 -
 lang_data/fi/morphs.json          |    0
 lang_data/fi/prefix.txt           |   21 -
 lang_data/fi/sample.txt           |    3 -
 lang_data/fi/specials.json        |  149 -
 lang_data/fi/suffix.txt           |   26 -
 lang_data/fi/tag_map.json         |   19 -
 lang_data/it/infix.txt            |    3 -
 lang_data/it/morphs.json          |    0
 lang_data/it/prefix.txt           |   21 -
 lang_data/it/specials.json        |  149 -
 lang_data/it/suffix.txt           |   26 -
 lang_data/it/tag_map.json         |   44 -
 lang_data/zh/gazetteer.json       |  194 --
 lang_data/zh/infix.txt            |    6 -
 lang_data/zh/morphs.json          |    1 -
 lang_data/zh/prefix.txt           |   21 -
 lang_data/zh/specials.json        |    1 -
 lang_data/zh/suffix.txt           |   26 -
 lang_data/zh/tag_map.json         |   43 -
 42 files changed, 9097 deletions(-)
 delete mode 100644 lang_data/de/abbrev.de.tab
 delete mode 100644 lang_data/de/gazetteer.json
 delete mode 100644 lang_data/de/generate_specials.py
 delete mode 100644 lang_data/de/infix.txt
 delete mode 100644 lang_data/de/lemma_rules.json
 delete mode 100644 lang_data/de/morphs.json
 delete mode 100644 lang_data/de/prefix.txt
 delete mode 100644 lang_data/de/sample.txt
 delete mode 100644 lang_data/de/specials.json
 delete mode 100644 lang_data/de/suffix.txt
 delete mode 100644 lang_data/de/tag_map.json
 delete mode 100644 lang_data/en/LICENSE
 delete mode 100644 lang_data/en/gazetteer.json
 delete mode 100644 lang_data/en/generate_specials.py
 delete mode 100644 lang_data/en/infix.txt
 delete mode 100644 lang_data/en/lemma_rules.json
 delete mode 100644 lang_data/en/morphs.json
 delete mode 100644 lang_data/en/prefix.txt
 delete mode 100644 lang_data/en/specials.json
 delete mode 100644 lang_data/en/suffix.txt
 delete mode 100644 lang_data/en/tag_map.json
 delete mode 100644 lang_data/fi/infix.txt
 delete mode 100644 lang_data/fi/lemma_rules.json
 delete mode 100644 lang_data/fi/morphs.json
 delete mode 100644 lang_data/fi/prefix.txt
 delete mode 100644 lang_data/fi/sample.txt
 delete mode 100644 lang_data/fi/specials.json
 delete mode 100644 lang_data/fi/suffix.txt
 delete mode 100644 lang_data/fi/tag_map.json
 delete mode 100644 lang_data/it/infix.txt
 delete mode 100644 lang_data/it/morphs.json
 delete mode 100644 lang_data/it/prefix.txt
 delete mode 100644 lang_data/it/specials.json
 delete mode 100644 lang_data/it/suffix.txt
 delete mode 100644 lang_data/it/tag_map.json
 delete mode 100644 lang_data/zh/gazetteer.json
 delete mode 100644 lang_data/zh/infix.txt
 delete mode 100644 lang_data/zh/morphs.json
 delete mode 100644 lang_data/zh/prefix.txt
 delete mode 100644 lang_data/zh/specials.json
 delete mode 100644 lang_data/zh/suffix.txt
 delete mode 100644 lang_data/zh/tag_map.json

diff --git a/lang_data/de/abbrev.de.tab b/lang_data/de/abbrev.de.tab
deleted file mode 100644
index 97374c83d..000000000
--- a/lang_data/de/abbrev.de.tab
+++ /dev/null
@@ -1,319 +0,0 @@
-# surface form lemma pos 
-# multiple values are separated by |
-# empty lines and lines starting with # are being ignored 
-
-''	''
-\")	\")
-\n	\n	<nl>	SP
-\t	\t	<tab>	SP
- 	 	<space>	SP
-
-# example: Wie geht's?
-'s	's	es
-'S	'S	es
-
-# example: Haste mal 'nen Euro?
-'n	'n	ein
-'ne	'ne	eine
-'nen	'nen	einen
-
-# example: Kommen S’ nur herein!
-s'	s'	sie
-S'	S'	sie
-
-# example: Da haben wir's!
-ich's	ich|'s	ich|es
-du's	du|'s	du|es
-er's	er|'s	er|es
-sie's	sie|'s	sie|es
-wir's	wir|'s	wir|es
-ihr's	ihr|'s	ihr|es
-
-# example: Die katze auf'm dach.
-auf'm	auf|'m	auf|dem
-unter'm	unter|'m	unter|dem
-über'm	über|'m	über|dem
-vor'm	vor|'m	vor|dem
-hinter'm	hinter|'m	hinter|dem
-
-# persons
-B.A.	B.A.
-B.Sc.	B.Sc.
-Dipl.	Dipl.
-Dipl.-Ing.	Dipl.-Ing.
-Dr.	Dr.
-Fr.	Fr.
-Frl.	Frl.
-Hr.	Hr.
-Hrn.	Hrn.
-Frl.	Frl.
-Prof.	Prof.
-St.	St.
-Hrgs.	Hrgs.
-Hg.	Hg.
-a.Z.	a.Z.
-a.D.	a.D.
-h.c.	h.c.
-Jr.	Jr.
-jr.	jr.
-jun.	jun.
-sen.	sen.
-rer.	rer.
-Ing.	Ing.
-M.A.	M.A.
-Mr.	Mr.
-M.Sc.	M.Sc.
-nat.	nat.
-phil.	phil.
-
-# companies
-Co.	Co.
-co.	co.
-Cie.	Cie.
-A.G.	A.G.
-G.m.b.H.	G.m.b.H.
-i.G.	i.G.
-e.V.	e.V.
-
-# popular german abbreviations
-Abb.	Abb.
-Abk.	Abk.
-Abs.	Abs.
-Abt.	Abt.
-abzgl.	abzgl.
-allg.	allg.
-a.M.	a.M.
-Bd.	Bd.
-betr.	betr.
-Betr.	Betr.
-Biol.	Biol.
-biol.	biol.
-Bf.	Bf.
-Bhf.	Bhf.
-Bsp.	Bsp.
-bspw.	bspw.
-bzgl.	bzgl.
-bzw.	bzw.
-d.h.	d.h.
-dgl.	dgl.
-ebd.	ebd.
-ehem.	ehem.
-eigtl.	eigtl.
-entspr.	entspr.
-erm.	erm.
-ev.	ev.
-evtl.	evtl.
-Fa.	Fa.
-Fam.	Fam.
-geb.	geb.
-Gebr.	Gebr.
-gem.	gem.
-ggf.	ggf.
-ggü.	ggü.
-ggfs.	ggfs.
-gegr.	gegr.
-Hbf.	Hbf.
-Hrsg.	Hrsg.
-hrsg.	hrsg.
-i.A.	i.A.
-i.d.R.	i.d.R.
-inkl.	inkl.
-insb.	insb.
-i.O.	i.O.
-i.Tr.	i.Tr.
-i.V.	i.V.
-jur.	jur.
-kath.	kath.
-K.O.	K.O.
-lt.	lt.
-max.	max.
-m.E.	m.E.
-m.M.	m.M.
-mtl.	mtl.
-min.	min.
-mind.	mind.
-MwSt.	MwSt.
-Nr.	Nr.
-o.a.	o.a.
-o.ä.	o.ä.
-o.Ä.	o.Ä.
-o.g.	o.g.
-o.k.	o.k.
-O.K.	O.K.
-Orig.	Orig.
-orig.	orig.
-pers.	pers.
-Pkt.	Pkt.
-Red.	Red.
-röm.	röm.
-s.o.	s.o.
-sog.	sog.
-std.	std.
-stellv.	stellv.
-Str.	Str.
-tägl.	tägl.
-Tel.	Tel.
-u.a.	u.a.
-usf.	usf.
-u.s.w.	u.s.w.
-usw.	usw.
-u.U.	u.U.
-u.v.m.	u.v.m.
-uvm.	uvm.
-v.a.	v.a.
-vgl.	vgl.
-vllt.	vllt.
-v.l.n.r.	v.l.n.r.
-vlt.	vlt.
-Vol.	Vol.
-wiss.	wiss.
-Univ.	Univ.
-z.B.	z.B.
-z.b.	z.b.
-z.Bsp.	z.Bsp.
-z.T.	z.T.
-z.Z.	z.Z.
-zzgl.	zzgl.
-z.Zt.	z.Zt.
-
-# popular latin abbreviations
-vs.	vs.
-adv.	adv.
-Chr.	Chr.
-A.C.	A.C.
-A.D.	A.D.
-e.g.	e.g.
-i.e.	i.e.
-al.	al.
-p.a.	p.a.
-P.S.	P.S.
-q.e.d.	q.e.d.
-R.I.P.	R.I.P.
-etc.	etc.
-incl.	incl.
-ca.	ca.
-n.Chr.	n.Chr.
-p.s.	p.s.
-v.Chr.	v.Chr.
-
-# popular english abbreviations
-D.C.	D.C.
-N.Y.	N.Y.
-N.Y.C.	N.Y.C.
-U.S.	U.S.
-U.S.A.	U.S.A.
-L.A.	L.A.
-U.S.S.	U.S.S.
-
-# dates & time
-Jan.	Jan.
-Feb.	Feb.
-Mrz.	Mrz.
-Mär.	Mär.
-Apr.	Apr.
-Jun.	Jun.
-Jul.	Jul.
-Aug.	Aug.
-Sep.	Sep.
-Sept.	Sept.
-Okt.	Okt.
-Nov.	Nov.
-Dez.	Dez.
-Mo.	Mo.
-Di.	Di.
-Mi.	Mi.
-Do.	Do.
-Fr.	Fr.
-Sa.	Sa.
-So.	So.
-Std.	Std.
-Jh.	Jh.
-Jhd.	Jhd.
-
-# numbers
-Tsd.	Tsd.
-Mio.	Mio.
-Mrd.	Mrd.
-
-# countries & languages
-engl.	engl.
-frz.	frz.
-lat.	lat.
-österr.	österr.
-
-# smileys
-:)	:)
-<3	<3
-;)	;)
-(:	(:
-:(	:(
--_-	-_-
-=)	=)
-:/	:/
-:>	:>
-;-)	;-)
-:Y	:Y
-:P	:P
-:-P	:-P
-:3	:3
-=3	=3
-xD	xD
-^_^	^_^
-=]	=]
-=D	=D
-<333	<333
-:))	:))
-:0	:0
--__-	-__-
-xDD	xDD
-o_o	o_o
-o_O	o_O
-V_V	V_V
-=[[	=[[
-<33	<33
-;p	;p
-;D	;D
-;-p	;-p
-;(	;(
-:p	:p
-:]	:]
-:O	:O
-:-/	:-/
-:-)	:-)
-:(((	:(((
-:((	:((
-:')	:')
-(^_^)	(^_^)
-(=	(=
-o.O	o.O
-
-# single letters
-a.	a.
-b.	b.
-c.	c.
-d.	d.
-e.	e.
-f.	f.
-g.	g.
-h.	h.
-i.	i.
-j.	j.
-k.	k.
-l.	l.
-m.	m.
-n.	n.
-o.	o.
-p.	p.
-q.	q.
-r.	r.
-s.	s.
-t.	t.
-u.	u.
-v.	v.
-w.	w.
-x.	x.
-y.	y.
-z.	z.
-ä.	ä.
-ö.	ö.
-ü.	ü.
diff --git a/lang_data/de/gazetteer.json b/lang_data/de/gazetteer.json
deleted file mode 100644
index d52fed839..000000000
--- a/lang_data/de/gazetteer.json
+++ /dev/null
@@ -1,194 +0,0 @@
-{
-	"Reddit": [
-		"PRODUCT",
-		{},
-		[
-			[{"lower": "reddit"}]
-		]
-	],
-	"SeptemberElevenAttacks": [
-		"EVENT",
-		{},
-		[
-			[
-				{"orth": "9/11"}
-			],
-			[
-				{"lower": "september"},
-				{"orth": "11"}
-			]
-		]
-	],
-	"Linux": [
-		"PRODUCT",
-		{},
-		[
-			[{"lower": "linux"}]
-		]
-	],
-	"Haskell": [
-		"PRODUCT",
-		{},
-		[
-			[{"lower": "haskell"}]
-		]
-	],
-	"HaskellCurry": [
-		"PERSON",
-		{},
-		[
-			[
-				{"lower": "haskell"},
-				{"lower": "curry"}
-			]
-		]
-	],
-	"Javascript": [
-		"PRODUCT",
-		{},
-		[
-			[{"lower": "javascript"}]
-		]
-	],
-	"CSS": [
-		"PRODUCT",
-		{},
-		[
-			[{"lower": "css"}],
-			[{"lower": "css3"}]
-		]
-	],
-	"displaCy": [
-		"PRODUCT",
-		{},
-		[
-			[{"lower": "displacy"}]
-		]
-	],
-	"spaCy": [
-		"PRODUCT",
-		{},
-		[
-			[{"orth": "spaCy"}]
-		]
-	],
-
-    "HTML": [
-		"PRODUCT",
-		{},
-		[
-			[{"lower": "html"}],
-			[{"lower": "html5"}]
-		]
-	],
-    "Python": [
-        "PRODUCT",
-        {},
-        [
-            [{"orth": "Python"}]
-        ]
-    ],
-    "Ruby": [
-        "PRODUCT",
-        {},
-        [
-            [{"orth": "Ruby"}]
-        ]
-    ],
-    "Digg": [
-        "PRODUCT",
-        {},
-        [
-            [{"lower": "digg"}]
-        ]
-    ],
-     "FoxNews": [
-        "ORG",
-        {},
-        [
-            [{"orth": "Fox"}],
-            [{"orth": "News"}]
-        ]
-    ],
-    "Google": [
-        "ORG",
-        {},
-        [
-            [{"lower": "google"}]
-        ]
-    ],
-    "Mac": [
-        "PRODUCT",
-        {},
-        [
-            [{"lower": "mac"}]
-        ]
-    ],
-    "Wikipedia": [
-        "PRODUCT",
-        {},
-        [
-            [{"lower": "wikipedia"}]
-        ]
-    ],
-    "Windows": [
-        "PRODUCT",
-        {},
-        [
-            [{"orth": "Windows"}]
-        ]
-    ],
-     "Dell": [
-        "ORG",
-        {},
-        [
-            [{"lower": "dell"}]
-        ]
-    ],
-    "Facebook": [
-        "ORG",
-        {},
-        [
-            [{"lower": "facebook"}]
-        ]
-    ],
-     "Blizzard": [
-        "ORG",
-        {},
-        [
-            [{"orth": "Blizzard"}]
-        ]
-    ],
-    "Ubuntu": [
-        "ORG",
-        {},
-        [
-            [{"orth": "Ubuntu"}]
-        ]
-    ],
-    "Youtube": [
-        "PRODUCT",
-        {},
-        [
-            [{"lower": "youtube"}]
-        ]
-    ],
-    "false_positives": [
-        null,
-        {},
-        [
-            [{"orth": "Shit"}],
-            [{"orth": "Weed"}],
-            [{"orth": "Cool"}],
-            [{"orth": "Btw"}],
-            [{"orth": "Bah"}],
-            [{"orth": "Bullshit"}],
-            [{"orth": "Lol"}],
-            [{"orth": "Yo"}, {"lower": "dawg"}],
-            [{"orth": "Yay"}],
-            [{"orth": "Ahh"}],
-            [{"orth": "Yea"}],
-            [{"orth": "Bah"}]
-        ]
-    ]
-}
diff --git a/lang_data/de/generate_specials.py b/lang_data/de/generate_specials.py
deleted file mode 100644
index b3dc52e4f..000000000
--- a/lang_data/de/generate_specials.py
+++ /dev/null
@@ -1,334 +0,0 @@
-# coding=utf8
-import json
-import io
-import itertools
-
-contractions = {}
-
-# contains the lemmas, parts of speech, number, and tenspect of
-# potential tokens generated after splitting contractions off
-token_properties = {}
-
-# contains starting tokens with their potential contractions
-# each potential contraction has a list of exceptions
-    # lower - don't generate the lowercase version
-    # upper - don't generate the uppercase version
-    # contrLower - don't generate the lowercase version with apostrophe (') removed
-    # contrUpper - dont' generate the uppercase version with apostrophe (') removed
-# for example, we don't want to create the word "hell" or "Hell" from "he" + "'ll" so 
-# we add "contrLower" and "contrUpper" to the exceptions list
-starting_tokens = {}
-
-# other specials that don't really have contractions
-# so they are hardcoded
-hardcoded_specials = {
-                "''": [{"F": "''"}],
-                "\")": [{"F": "\")"}],
-                "\n": [{"F": "\n", "pos": "SP"}],
-                "\t": [{"F": "\t", "pos": "SP"}],
-                " ": [{"F": " ", "pos": "SP"}],
-
-                # example: Wie geht's?
-                "'s":  [{"F": "'s", "L": "es"}],
-                "'S":  [{"F": "'S", "L": "es"}],
-
-                # example: Haste mal 'nen Euro?
-                "'n":  [{"F": "'n", "L": "ein"}],
-                "'ne":  [{"F": "'ne", "L": "eine"}],
-                "'nen":  [{"F": "'nen", "L": "einen"}],
-
-                # example: Kommen S’ nur herein!
-                "s'":  [{"F": "s'", "L": "sie"}],
-                "S'":  [{"F": "S'", "L": "sie"}],
-
-                # example: Da haben wir's!
-                "ich's":  [{"F": "ich"}, {"F": "'s", "L": "es"}],
-                "du's":  [{"F": "du"}, {"F": "'s", "L": "es"}],
-                "er's":  [{"F": "er"}, {"F": "'s", "L": "es"}],
-                "sie's":  [{"F": "sie"}, {"F": "'s", "L": "es"}],
-                "wir's":  [{"F": "wir"}, {"F": "'s", "L": "es"}],
-                "ihr's":  [{"F": "ihr"}, {"F": "'s", "L": "es"}],
-
-                # example: Die katze auf'm dach.
-                "auf'm":  [{"F": "auf"}, {"F": "'m", "L": "dem"}],
-                "unter'm":  [{"F": "unter"}, {"F": "'m", "L": "dem"}],
-                "über'm":  [{"F": "über"}, {"F": "'m", "L": "dem"}],
-                "vor'm":  [{"F": "vor"}, {"F": "'m", "L": "dem"}],
-                "hinter'm":  [{"F": "hinter"}, {"F": "'m", "L": "dem"}],
-
-                # persons
-                "Fr.": [{"F": "Fr."}],
-                "Hr.": [{"F": "Hr."}],
-                "Frl.": [{"F": "Frl."}],
-                "Prof.": [{"F": "Prof."}],
-                "Dr.": [{"F": "Dr."}],
-                "St.": [{"F": "St."}],
-                "Hrgs.": [{"F": "Hrgs."}],
-                "Hg.": [{"F": "Hg."}],
-                "a.Z.": [{"F": "a.Z."}],
-                "a.D.": [{"F": "a.D."}],
-                "A.D.": [{"F": "A.D."}],
-                "h.c.": [{"F": "h.c."}],
-                "jun.": [{"F": "jun."}],
-                "sen.": [{"F": "sen."}],
-                "rer.": [{"F": "rer."}],
-                "Dipl.": [{"F": "Dipl."}],
-                "Ing.": [{"F": "Ing."}],
-                "Dipl.-Ing.": [{"F": "Dipl.-Ing."}],
-
-                # companies
-                "Co.": [{"F": "Co."}],
-                "co.": [{"F": "co."}],
-                "Cie.": [{"F": "Cie."}],
-                "A.G.": [{"F": "A.G."}],
-                "G.m.b.H.": [{"F": "G.m.b.H."}],
-                "i.G.": [{"F": "i.G."}],
-                "e.V.": [{"F": "e.V."}],
-
-                # popular german abbreviations
-                "ggü.": [{"F": "ggü."}],
-                "ggf.": [{"F": "ggf."}],
-                "ggfs.": [{"F": "ggfs."}],
-                "Gebr.": [{"F": "Gebr."}],
-                "geb.": [{"F": "geb."}],
-                "gegr.": [{"F": "gegr."}],
-                "erm.": [{"F": "erm."}],
-                "engl.": [{"F": "engl."}],
-                "ehem.": [{"F": "ehem."}],
-                "Biol.": [{"F": "Biol."}],
-                "biol.": [{"F": "biol."}],
-                "Abk.": [{"F": "Abk."}],
-                "Abb.": [{"F": "Abb."}],
-                "abzgl.": [{"F": "abzgl."}],
-                "Hbf.": [{"F": "Hbf."}],
-                "Bhf.": [{"F": "Bhf."}],
-                "Bf.": [{"F": "Bf."}],
-                "i.V.": [{"F": "i.V."}],
-                "inkl.": [{"F": "inkl."}],
-                "insb.": [{"F": "insb."}],
-                "z.B.": [{"F": "z.B."}],
-                "i.Tr.": [{"F": "i.Tr."}],
-                "Jhd.": [{"F": "Jhd."}],
-                "jur.": [{"F": "jur."}],
-                "lt.": [{"F": "lt."}],
-                "nat.": [{"F": "nat."}],
-                "u.a.": [{"F": "u.a."}],
-                "u.s.w.": [{"F": "u.s.w."}],
-                "Nr.": [{"F": "Nr."}],
-                "Univ.": [{"F": "Univ."}],
-                "vgl.": [{"F": "vgl."}],
-                "zzgl.": [{"F": "zzgl."}],
-                "z.Z.": [{"F": "z.Z."}],
-                "betr.": [{"F": "betr."}],
-                "ehem.": [{"F": "ehem."}],
-
-                # popular latin abbreviations
-                "vs.": [{"F": "vs."}],
-                "adv.": [{"F": "adv."}],
-                "Chr.": [{"F": "Chr."}],
-                "A.C.": [{"F": "A.C."}],
-                "A.D.": [{"F": "A.D."}],
-                "e.g.": [{"F": "e.g."}],
-                "i.e.": [{"F": "i.e."}],
-                "al.": [{"F": "al."}],
-                "p.a.": [{"F": "p.a."}],
-                "P.S.": [{"F": "P.S."}],
-                "q.e.d.": [{"F": "q.e.d."}],
-                "R.I.P.": [{"F": "R.I.P."}],
-                "etc.": [{"F": "etc."}],
-                "incl.": [{"F": "incl."}],
-
-                # popular english abbreviations
-                "D.C.": [{"F": "D.C."}],
-                "N.Y.": [{"F": "N.Y."}],
-                "N.Y.C.": [{"F": "N.Y.C."}],
-
-                # dates
-                "Jan.": [{"F": "Jan."}],
-                "Feb.": [{"F": "Feb."}],
-                "Mrz.": [{"F": "Mrz."}],
-                "Mär.": [{"F": "Mär."}],
-                "Apr.": [{"F": "Apr."}],
-                "Jun.": [{"F": "Jun."}],
-                "Jul.": [{"F": "Jul."}],
-                "Aug.": [{"F": "Aug."}],
-                "Sep.": [{"F": "Sep."}],
-                "Sept.": [{"F": "Sept."}],
-                "Okt.": [{"F": "Okt."}],
-                "Nov.": [{"F": "Nov."}],
-                "Dez.": [{"F": "Dez."}],
-                "Mo.": [{"F": "Mo."}],
-                "Di.": [{"F": "Di."}],
-                "Mi.": [{"F": "Mi."}],
-                "Do.": [{"F": "Do."}],
-                "Fr.": [{"F": "Fr."}],
-                "Sa.": [{"F": "Sa."}],
-                "So.": [{"F": "So."}],
-
-                # smileys
-                ":)":    [{"F": ":)"}],
-                "<3":    [{"F": "<3"}],
-                ";)":    [{"F": ";)"}],
-                "(:":    [{"F": "(:"}],
-                ":(":    [{"F": ":("}],
-                "-_-":   [{"F": "-_-"}],
-                "=)":    [{"F": "=)"}],
-                ":/":    [{"F": ":/"}],
-                ":>":    [{"F": ":>"}],
-                ";-)":   [{"F": ";-)"}],
-                ":Y":    [{"F": ":Y"}],
-                ":P":    [{"F": ":P"}],
-                ":-P":   [{"F": ":-P"}],
-                ":3":    [{"F": ":3"}],
-                "=3":    [{"F": "=3"}],
-                "xD":    [{"F": "xD"}],
-                "^_^":   [{"F": "^_^"}],
-                "=]":    [{"F": "=]"}],
-                "=D":    [{"F": "=D"}],
-                "<333":  [{"F": "<333"}],
-                ":))":   [{"F": ":))"}],
-                ":0":    [{"F": ":0"}],
-                "-__-":  [{"F": "-__-"}],
-                "xDD":   [{"F": "xDD"}],
-                "o_o":   [{"F": "o_o"}],
-                "o_O":   [{"F": "o_O"}],
-                "V_V":   [{"F": "V_V"}],
-                "=[[":   [{"F": "=[["}],
-                "<33":   [{"F": "<33"}],
-                ";p":    [{"F": ";p"}],
-                ";D":    [{"F": ";D"}],
-                ";-p":   [{"F": ";-p"}],
-                ";(":    [{"F": ";("}],
-                ":p":    [{"F": ":p"}],
-                ":]":    [{"F": ":]"}],
-                ":O":    [{"F": ":O"}],
-                ":-/":   [{"F": ":-/"}],
-                ":-)":   [{"F": ":-)"}],
-                ":(((":  [{"F": ":((("}],
-                ":((":   [{"F": ":(("}],
-                ":')":   [{"F": ":')"}],
-                "(^_^)": [{"F": "(^_^)"}],
-                "(=":    [{"F": "(="}],
-                "o.O":   [{"F": "o.O"}],
-
-                "a.": [{"F": "a."}],
-                "b.": [{"F": "b."}],
-                "c.": [{"F": "c."}],
-                "d.": [{"F": "d."}],
-                "e.": [{"F": "e."}],
-                "f.": [{"F": "f."}],
-                "g.": [{"F": "g."}],
-                "h.": [{"F": "h."}],
-                "i.": [{"F": "i."}],
-                "j.": [{"F": "j."}],
-                "k.": [{"F": "k."}],
-                "l.": [{"F": "l."}],
-                "m.": [{"F": "m."}],
-                "n.": [{"F": "n."}],
-                "o.": [{"F": "o."}],
-                "p.": [{"F": "p."}],
-                "q.": [{"F": "q."}],
-                "r.": [{"F": "r."}],
-                "s.": [{"F": "s."}],
-                "t.": [{"F": "t."}],
-                "u.": [{"F": "u."}],
-                "v.": [{"F": "v."}],
-                "w.": [{"F": "w."}],
-                "x.": [{"F": "x."}],
-                "y.": [{"F": "y."}],
-                "z.": [{"F": "z."}],
-}
-
-def get_double_contractions(ending):
-    endings = []
-
-    ends_with_contraction = any([ending.endswith(contraction) for contraction in contractions])
-
-    while ends_with_contraction:
-        for contraction in contractions:
-            if ending.endswith(contraction):
-                endings.append(contraction)
-                ending = ending.rstrip(contraction)
-        ends_with_contraction = any([ending.endswith(contraction) for contraction in contractions])
-
-    endings.reverse() # reverse because the last ending is put in the list first
-    return endings
-
-def get_token_properties(token, capitalize=False, remove_contractions=False):
-    props = dict(token_properties.get(token)) # ensure we copy the dict so we can add the "F" prop
-    if capitalize:
-        token = token.capitalize()
-    if remove_contractions:
-        token = token.replace("'", "")
-
-    props["F"] = token
-    return props
-
-
-def create_entry(token, endings, capitalize=False, remove_contractions=False):
-    properties = []
-    properties.append(get_token_properties(token, capitalize=capitalize, remove_contractions=remove_contractions))
-    for e in endings:
-        properties.append(get_token_properties(e, remove_contractions=remove_contractions))
-    return properties
-
-
-FIELDNAMES = ['F','L','pos']
-def read_hardcoded(stream):
-    hc_specials = {}
-    for line in stream:
-        line = line.strip()
-        if line.startswith('#') or not line:
-            continue
-        key,_,rest = line.partition('\t')
-        values = []
-        for annotation in zip(*[ e.split('|') for e in rest.split('\t') ]):
-            values.append({ k:v for k,v in itertools.izip_longest(FIELDNAMES,annotation) if v })
-        hc_specials[key] = values
-    return hc_specials
-
-
-def generate_specials():
-
-    specials = {}
-
-    for token in starting_tokens:
-        possible_endings = starting_tokens[token]
-        for ending in possible_endings:
-
-            endings = []
-            if ending.count("'") > 1:
-                endings.extend(get_double_contractions(ending))
-            else:
-                endings.append(ending)
-
-            exceptions = possible_endings[ending]
-
-            if "lower" not in exceptions:
-                special = token + ending
-                specials[special] = create_entry(token, endings)
-
-            if "upper" not in exceptions:
-                special = token.capitalize() + ending
-                specials[special] = create_entry(token, endings, capitalize=True)
-
-            if "contrLower" not in exceptions:
-                special = token + ending.replace("'", "")
-                specials[special] = create_entry(token, endings, remove_contractions=True)
-
-            if "contrUpper" not in exceptions:
-                special = token.capitalize() + ending.replace("'", "")
-                specials[special] = create_entry(token, endings, capitalize=True, remove_contractions=True)
-
-    # add in hardcoded specials
-    # changed it so it generates them from a file
-    with io.open('abbrev.de.tab','r',encoding='utf8') as abbrev_:
-        hc_specials = read_hardcoded(abbrev_)
-    specials = dict(specials, **hc_specials)
-
-    return specials
-
-if __name__ == "__main__":
-    specials = generate_specials()
-    with open("specials.json", "w") as f:
-        json.dump(specials, f, sort_keys=True, indent=4, separators=(',', ': '))
diff --git a/lang_data/de/infix.txt b/lang_data/de/infix.txt
deleted file mode 100644
index 8398d5d42..000000000
--- a/lang_data/de/infix.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-\.\.\.
-(?<=[a-z])\.(?=[A-Z])
-(?<=[a-zöäüßA-ZÖÄÜ"]):(?=[a-zöäüßA-ZÖÄÜ])
-(?<=[a-zöäüßA-ZÖÄÜ"])>(?=[a-zöäüßA-ZÖÄÜ])
-(?<=[a-zöäüßA-ZÖÄÜ"])<(?=[a-zöäüßA-ZÖÄÜ])
-(?<=[a-zöäüßA-ZÖÄÜ"])=(?=[a-zöäüßA-ZÖÄÜ])
diff --git a/lang_data/de/lemma_rules.json b/lang_data/de/lemma_rules.json
deleted file mode 100644
index 0967ef424..000000000
--- a/lang_data/de/lemma_rules.json
+++ /dev/null
@@ -1 +0,0 @@
-{}
diff --git a/lang_data/de/morphs.json b/lang_data/de/morphs.json
deleted file mode 100644
index ae024add2..000000000
--- a/lang_data/de/morphs.json
+++ /dev/null
@@ -1,71 +0,0 @@
-{
-    "PRP": {
-        "ich":     {"L": "-PRON-", "person": 1, "number": 1, "gender": 0, "case": 1},
-        "meiner":  {"L": "-PRON-", "person": 1, "number": 1, "gender": 0, "case": 2},
-        "mir":     {"L": "-PRON-", "person": 1, "number": 1, "gender": 0, "case": 3},
-        "mich":    {"L": "-PRON-", "person": 1, "number": 1, "gender": 0, "case": 4},
-        "du":      {"L": "-PRON-", "person": 2, "number": 0, "gender": 0, "case": 1},
-        "deiner":  {"L": "-PRON-", "person": 2, "number": 0, "gender": 0, "case": 2},
-        "dir":     {"L": "-PRON-", "person": 2, "number": 0, "gender": 0, "case": 3},
-        "dich":    {"L": "-PRON-", "person": 2, "number": 0, "gender": 0, "case": 4},
-        "er":      {"L": "-PRON-", "person": 3, "number": 0, "gender": 1, "case": 1},
-        "seiner":  {"L": "-PRON-", "person": 3, "number": 0, "gender": 1, "case": 2},
-        "ihm":     {"L": "-PRON-", "person": 3, "number": 0, "gender": 1, "case": 3},
-        "ihn":     {"L": "-PRON-", "person": 3, "number": 0, "gender": 1, "case": 4},
-        "sie":     {"L": "-PRON-", "person": 3, "number": 0, "gender": 2, "case": 1},
-        "ihrer":   {"L": "-PRON-", "person": 3, "number": 0, "gender": 2, "case": 2},
-        "ihr":     {"L": "-PRON-", "person": 3, "number": 0, "gender": 2, "case": 3},
-        "sie":     {"L": "-PRON-", "person": 3, "number": 0, "gender": 2, "case": 4},
-        "es":      {"L": "-PRON-", "person": 3, "number": 0, "gender": 3, "case": 1},
-        "seiner":  {"L": "-PRON-", "person": 3, "number": 0, "gender": 3, "case": 2},
-        "ihm":     {"L": "-PRON-", "person": 3, "number": 0, "gender": 3, "case": 3},
-        "es":      {"L": "-PRON-", "person": 3, "number": 0, "gender": 3, "case": 4},
-        "wir":     {"L": "-PRON-", "person": 1, "number": 0, "gender": 0, "case": 1},
-        "unser":   {"L": "-PRON-", "person": 1, "number": 0, "gender": 0, "case": 2},
-        "uns":     {"L": "-PRON-", "person": 1, "number": 0, "gender": 0, "case": 3},
-        "uns":     {"L": "-PRON-", "person": 1, "number": 0, "gender": 0, "case": 4},
-        "ihr":     {"L": "-PRON-", "person": 2, "number": 0, "gender": 0, "case": 1},
-        "euer":    {"L": "-PRON-", "person": 2, "number": 0, "gender": 0, "case": 2},
-        "euch":    {"L": "-PRON-", "person": 2, "number": 0, "gender": 0, "case": 3},
-        "euch":    {"L": "-PRON-", "person": 2, "number": 0, "gender": 0, "case": 4},
-        "sie":     {"L": "-PRON-", "person": 3, "number": 0, "gender": 0, "case": 1},
-        "ihrer":   {"L": "-PRON-", "person": 3, "number": 0, "gender": 0, "case": 2},
-        "ihnen":   {"L": "-PRON-", "person": 3, "number": 0, "gender": 0, "case": 3},
-        "sie":     {"L": "-PRON-", "person": 3, "number": 0, "gender": 0, "case": 4}
-    },
-
-    "PRP$": {
-        "mein":    {"L": "-PRON-", "person": 1, "number": 0, "gender": 0, "case": 1},
-        "meines":  {"L": "-PRON-", "person": 1, "number": 0, "gender": 0, "case": 2},
-        "meinem":  {"L": "-PRON-", "person": 1, "number": 0, "gender": 0, "case": 3},
-        "meinen":  {"L": "-PRON-", "person": 1, "number": 0, "gender": 0, "case": 4},
-        "dein":    {"L": "-PRON-", "person": 2, "number": 0, "gender": 0, "case": 1},
-        "deines":  {"L": "-PRON-", "person": 2, "number": 0, "gender": 0, "case": 2},
-        "deinem":  {"L": "-PRON-", "person": 2, "number": 0, "gender": 0, "case": 3},
-        "deinen":  {"L": "-PRON-", "person": 2, "number": 0, "gender": 0, "case": 4},
-        "sein":    {"L": "-PRON-", "person": 3, "number": 0, "gender": 1, "case": 1},
-        "seines":  {"L": "-PRON-", "person": 3, "number": 0, "gender": 1, "case": 2},
-        "seinem":  {"L": "-PRON-", "person": 3, "number": 0, "gender": 1, "case": 3},
-        "seinen":  {"L": "-PRON-", "person": 3, "number": 0, "gender": 1, "case": 4},
-        "ihr":     {"L": "-PRON-", "person": 3, "number": 0, "gender": 2, "case": 1},
-        "ihrer":   {"L": "-PRON-", "person": 3, "number": 0, "gender": 2, "case": 2},
-        "ihrem":   {"L": "-PRON-", "person": 3, "number": 0, "gender": 2, "case": 3},
-        "ihren":   {"L": "-PRON-", "person": 3, "number": 0, "gender": 2, "case": 4},
-        "sein":    {"L": "-PRON-", "person": 3, "number": 0, "gender": 3, "case": 1},
-        "seines":  {"L": "-PRON-", "person": 3, "number": 0, "gender": 3, "case": 2},
-        "seinem":  {"L": "-PRON-", "person": 3, "number": 0, "gender": 3, "case": 3},
-        "seinen":  {"L": "-PRON-", "person": 3, "number": 0, "gender": 3, "case": 4},
-        "unser":   {"L": "-PRON-", "person": 1, "number": 0, "gender": 0, "case": 1},
-        "unseres": {"L": "-PRON-", "person": 1, "number": 0, "gender": 0, "case": 2},
-        "unserem": {"L": "-PRON-", "person": 1, "number": 0, "gender": 0, "case": 3},
-        "unseren": {"L": "-PRON-", "person": 1, "number": 0, "gender": 0, "case": 4},
-        "euer":    {"L": "-PRON-", "person": 2, "number": 0, "gender": 0, "case": 1},
-        "eures":   {"L": "-PRON-", "person": 2, "number": 0, "gender": 0, "case": 2},
-        "eurem":   {"L": "-PRON-", "person": 2, "number": 0, "gender": 0, "case": 3},
-        "euren":   {"L": "-PRON-", "person": 2, "number": 0, "gender": 0, "case": 4},
-        "ihr":     {"L": "-PRON-", "person": 3, "number": 0, "gender": 0, "case": 1},
-        "ihres":   {"L": "-PRON-", "person": 3, "number": 0, "gender": 0, "case": 2},
-        "ihrem":   {"L": "-PRON-", "person": 3, "number": 0, "gender": 0, "case": 3},
-        "ihren":   {"L": "-PRON-", "person": 3, "number": 0, "gender": 0, "case": 4}
-    }
-}
diff --git a/lang_data/de/prefix.txt b/lang_data/de/prefix.txt
deleted file mode 100644
index e37542a9c..000000000
--- a/lang_data/de/prefix.txt
+++ /dev/null
@@ -1,27 +0,0 @@
-,
-"
-(
-[
-{
-*
-<
->
-$
-£
-„
-“
-'
-``
-`
-#
-US$
-C$
-A$
-a-
-‘
-....
-...
-‚
-»
-_
-§
diff --git a/lang_data/de/sample.txt b/lang_data/de/sample.txt
deleted file mode 100644
index 12c0bb787..000000000
--- a/lang_data/de/sample.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-Biografie: Ein Spiel ist ein Theaterstück des Schweizer Schriftstellers Max Frisch, das 1967 entstand und am 1. Februar 1968 im Schauspielhaus Zürich uraufgeführt wurde. 1984 legte Frisch eine überarbeitete Neufassung vor. Das von Frisch als Komödie bezeichnete Stück greift eines seiner zentralen Themen auf: die Möglichkeit oder Unmöglichkeit des Menschen, seine Identität zu verändern.
-
-Mit Biografie: Ein Spiel wandte sich Frisch von der Parabelform seiner Erfolgsstücke Biedermann und die Brandstifter und Andorra ab und postulierte eine „Dramaturgie der Permutation“. Darin sollte nicht, wie im klassischen Theater, Sinn und Schicksal im Mittelpunkt stehen, sondern die Zufälligkeit von Ereignissen und die Möglichkeit ihrer Variation. Dennoch handelt Biografie: Ein Spiel gerade von der Unmöglichkeit seines Protagonisten, seinen Lebenslauf grundlegend zu verändern. Frisch empfand die Wirkung des Stücks im Nachhinein als zu fatalistisch und die Umsetzung seiner theoretischen Absichten als nicht geglückt. Obwohl das Stück 1968 als unpolitisch und nicht zeitgemäß kritisiert wurde und auch später eine geteilte Rezeption erfuhr, gehört es an deutschsprachigen Bühnen zu den häufiger aufgeführten Stücken Frischs.
diff --git a/lang_data/de/specials.json b/lang_data/de/specials.json
deleted file mode 100644
index 282ec6df4..000000000
--- a/lang_data/de/specials.json
+++ /dev/null
@@ -1,1483 +0,0 @@
-{
-    "''": [
-        {
-            "F": "''"
-        }
-    ],
-    "'S": [
-        {
-            "F": "'S",
-            "L": "es"
-        }
-    ],
-    "'n": [
-        {
-            "F": "'n",
-            "L": "ein"
-        }
-    ],
-    "'ne": [
-        {
-            "F": "'ne",
-            "L": "eine"
-        }
-    ],
-    "'nen": [
-        {
-            "F": "'nen",
-            "L": "einen"
-        }
-    ],
-    "'s": [
-        {
-            "F": "'s",
-            "L": "es"
-        }
-    ],
-    "(:": [
-        {
-            "F": "(:"
-        }
-    ],
-    "(=": [
-        {
-            "F": "(="
-        }
-    ],
-    "(^_^)": [
-        {
-            "F": "(^_^)"
-        }
-    ],
-    "-_-": [
-        {
-            "F": "-_-"
-        }
-    ],
-    "-__-": [
-        {
-            "F": "-__-"
-        }
-    ],
-    ":')": [
-        {
-            "F": ":')"
-        }
-    ],
-    ":(": [
-        {
-            "F": ":("
-        }
-    ],
-    ":((": [
-        {
-            "F": ":(("
-        }
-    ],
-    ":(((": [
-        {
-            "F": ":((("
-        }
-    ],
-    ":)": [
-        {
-            "F": ":)"
-        }
-    ],
-    ":))": [
-        {
-            "F": ":))"
-        }
-    ],
-    ":-)": [
-        {
-            "F": ":-)"
-        }
-    ],
-    ":-/": [
-        {
-            "F": ":-/"
-        }
-    ],
-    ":-P": [
-        {
-            "F": ":-P"
-        }
-    ],
-    ":/": [
-        {
-            "F": ":/"
-        }
-    ],
-    ":0": [
-        {
-            "F": ":0"
-        }
-    ],
-    ":3": [
-        {
-            "F": ":3"
-        }
-    ],
-    ":>": [
-        {
-            "F": ":>"
-        }
-    ],
-    ":O": [
-        {
-            "F": ":O"
-        }
-    ],
-    ":P": [
-        {
-            "F": ":P"
-        }
-    ],
-    ":Y": [
-        {
-            "F": ":Y"
-        }
-    ],
-    ":]": [
-        {
-            "F": ":]"
-        }
-    ],
-    ":p": [
-        {
-            "F": ":p"
-        }
-    ],
-    ";(": [
-        {
-            "F": ";("
-        }
-    ],
-    ";)": [
-        {
-            "F": ";)"
-        }
-    ],
-    ";-)": [
-        {
-            "F": ";-)"
-        }
-    ],
-    ";-p": [
-        {
-            "F": ";-p"
-        }
-    ],
-    ";D": [
-        {
-            "F": ";D"
-        }
-    ],
-    ";p": [
-        {
-            "F": ";p"
-        }
-    ],
-    "<3": [
-        {
-            "F": "<3"
-        }
-    ],
-    "<33": [
-        {
-            "F": "<33"
-        }
-    ],
-    "<333": [
-        {
-            "F": "<333"
-        }
-    ],
-    "<space>": [
-        {
-            "F": "SP"
-        }
-    ],
-    "=)": [
-        {
-            "F": "=)"
-        }
-    ],
-    "=3": [
-        {
-            "F": "=3"
-        }
-    ],
-    "=D": [
-        {
-            "F": "=D"
-        }
-    ],
-    "=[[": [
-        {
-            "F": "=[["
-        }
-    ],
-    "=]": [
-        {
-            "F": "=]"
-        }
-    ],
-    "A.C.": [
-        {
-            "F": "A.C."
-        }
-    ],
-    "A.D.": [
-        {
-            "F": "A.D."
-        }
-    ],
-    "A.G.": [
-        {
-            "F": "A.G."
-        }
-    ],
-    "Abb.": [
-        {
-            "F": "Abb."
-        }
-    ],
-    "Abk.": [
-        {
-            "F": "Abk."
-        }
-    ],
-    "Abs.": [
-        {
-            "F": "Abs."
-        }
-    ],
-    "Abt.": [
-        {
-            "F": "Abt."
-        }
-    ],
-    "Apr.": [
-        {
-            "F": "Apr."
-        }
-    ],
-    "Aug.": [
-        {
-            "F": "Aug."
-        }
-    ],
-    "B.A.": [
-        {
-            "F": "B.A."
-        }
-    ],
-    "B.Sc.": [
-        {
-            "F": "B.Sc."
-        }
-    ],
-    "Bd.": [
-        {
-            "F": "Bd."
-        }
-    ],
-    "Betr.": [
-        {
-            "F": "Betr."
-        }
-    ],
-    "Bf.": [
-        {
-            "F": "Bf."
-        }
-    ],
-    "Bhf.": [
-        {
-            "F": "Bhf."
-        }
-    ],
-    "Biol.": [
-        {
-            "F": "Biol."
-        }
-    ],
-    "Bsp.": [
-        {
-            "F": "Bsp."
-        }
-    ],
-    "Chr.": [
-        {
-            "F": "Chr."
-        }
-    ],
-    "Cie.": [
-        {
-            "F": "Cie."
-        }
-    ],
-    "Co.": [
-        {
-            "F": "Co."
-        }
-    ],
-    "D.C.": [
-        {
-            "F": "D.C."
-        }
-    ],
-    "Dez.": [
-        {
-            "F": "Dez."
-        }
-    ],
-    "Di.": [
-        {
-            "F": "Di."
-        }
-    ],
-    "Dipl.": [
-        {
-            "F": "Dipl."
-        }
-    ],
-    "Dipl.-Ing.": [
-        {
-            "F": "Dipl.-Ing."
-        }
-    ],
-    "Do.": [
-        {
-            "F": "Do."
-        }
-    ],
-    "Dr.": [
-        {
-            "F": "Dr."
-        }
-    ],
-    "Fa.": [
-        {
-            "F": "Fa."
-        }
-    ],
-    "Fam.": [
-        {
-            "F": "Fam."
-        }
-    ],
-    "Feb.": [
-        {
-            "F": "Feb."
-        }
-    ],
-    "Fr.": [
-        {
-            "F": "Fr."
-        }
-    ],
-    "Frl.": [
-        {
-            "F": "Frl."
-        }
-    ],
-    "G.m.b.H.": [
-        {
-            "F": "G.m.b.H."
-        }
-    ],
-    "Gebr.": [
-        {
-            "F": "Gebr."
-        }
-    ],
-    "Hbf.": [
-        {
-            "F": "Hbf."
-        }
-    ],
-    "Hg.": [
-        {
-            "F": "Hg."
-        }
-    ],
-    "Hr.": [
-        {
-            "F": "Hr."
-        }
-    ],
-    "Hrgs.": [
-        {
-            "F": "Hrgs."
-        }
-    ],
-    "Hrn.": [
-        {
-            "F": "Hrn."
-        }
-    ],
-    "Hrsg.": [
-        {
-            "F": "Hrsg."
-        }
-    ],
-    "Ing.": [
-        {
-            "F": "Ing."
-        }
-    ],
-    "Jan.": [
-        {
-            "F": "Jan."
-        }
-    ],
-    "Jh.": [
-        {
-            "F": "Jh."
-        }
-    ],
-    "Jhd.": [
-        {
-            "F": "Jhd."
-        }
-    ],
-    "Jr.": [
-        {
-            "F": "Jr."
-        }
-    ],
-    "Jul.": [
-        {
-            "F": "Jul."
-        }
-    ],
-    "Jun.": [
-        {
-            "F": "Jun."
-        }
-    ],
-    "K.O.": [
-        {
-            "F": "K.O."
-        }
-    ],
-    "L.A.": [
-        {
-            "F": "L.A."
-        }
-    ],
-    "M.A.": [
-        {
-            "F": "M.A."
-        }
-    ],
-    "M.Sc.": [
-        {
-            "F": "M.Sc."
-        }
-    ],
-    "Mi.": [
-        {
-            "F": "Mi."
-        }
-    ],
-    "Mio.": [
-        {
-            "F": "Mio."
-        }
-    ],
-    "Mo.": [
-        {
-            "F": "Mo."
-        }
-    ],
-    "Mr.": [
-        {
-            "F": "Mr."
-        }
-    ],
-    "Mrd.": [
-        {
-            "F": "Mrd."
-        }
-    ],
-    "Mrz.": [
-        {
-            "F": "Mrz."
-        }
-    ],
-    "MwSt.": [
-        {
-            "F": "MwSt."
-        }
-    ],
-    "M\u00e4r.": [
-        {
-            "F": "M\u00e4r."
-        }
-    ],
-    "N.Y.": [
-        {
-            "F": "N.Y."
-        }
-    ],
-    "N.Y.C.": [
-        {
-            "F": "N.Y.C."
-        }
-    ],
-    "Nov.": [
-        {
-            "F": "Nov."
-        }
-    ],
-    "Nr.": [
-        {
-            "F": "Nr."
-        }
-    ],
-    "O.K.": [
-        {
-            "F": "O.K."
-        }
-    ],
-    "Okt.": [
-        {
-            "F": "Okt."
-        }
-    ],
-    "Orig.": [
-        {
-            "F": "Orig."
-        }
-    ],
-    "P.S.": [
-        {
-            "F": "P.S."
-        }
-    ],
-    "Pkt.": [
-        {
-            "F": "Pkt."
-        }
-    ],
-    "Prof.": [
-        {
-            "F": "Prof."
-        }
-    ],
-    "R.I.P.": [
-        {
-            "F": "R.I.P."
-        }
-    ],
-    "Red.": [
-        {
-            "F": "Red."
-        }
-    ],
-    "S'": [
-        {
-            "F": "S'",
-            "L": "sie"
-        }
-    ],
-    "Sa.": [
-        {
-            "F": "Sa."
-        }
-    ],
-    "Sep.": [
-        {
-            "F": "Sep."
-        }
-    ],
-    "Sept.": [
-        {
-            "F": "Sept."
-        }
-    ],
-    "So.": [
-        {
-            "F": "So."
-        }
-    ],
-    "St.": [
-        {
-            "F": "St."
-        }
-    ],
-    "Std.": [
-        {
-            "F": "Std."
-        }
-    ],
-    "Str.": [
-        {
-            "F": "Str."
-        }
-    ],
-    "Tel.": [
-        {
-            "F": "Tel."
-        }
-    ],
-    "Tsd.": [
-        {
-            "F": "Tsd."
-        }
-    ],
-    "U.S.": [
-        {
-            "F": "U.S."
-        }
-    ],
-    "U.S.A.": [
-        {
-            "F": "U.S.A."
-        }
-    ],
-    "U.S.S.": [
-        {
-            "F": "U.S.S."
-        }
-    ],
-    "Univ.": [
-        {
-            "F": "Univ."
-        }
-    ],
-    "V_V": [
-        {
-            "F": "V_V"
-        }
-    ],
-    "Vol.": [
-        {
-            "F": "Vol."
-        }
-    ],
-    "\\\")": [
-        {
-            "F": "\\\")"
-        }
-    ],
-    "\\n": [
-        {
-            "F": "\\n",
-            "L": "<nl>",
-            "pos": "SP"
-        }
-    ],
-    "\\t": [
-        {
-            "F": "\\t",
-            "L": "<tab>",
-            "pos": "SP"
-        }
-    ],
-    "^_^": [
-        {
-            "F": "^_^"
-        }
-    ],
-    "a.": [
-        {
-            "F": "a."
-        }
-    ],
-    "a.D.": [
-        {
-            "F": "a.D."
-        }
-    ],
-    "a.M.": [
-        {
-            "F": "a.M."
-        }
-    ],
-    "a.Z.": [
-        {
-            "F": "a.Z."
-        }
-    ],
-    "abzgl.": [
-        {
-            "F": "abzgl."
-        }
-    ],
-    "adv.": [
-        {
-            "F": "adv."
-        }
-    ],
-    "al.": [
-        {
-            "F": "al."
-        }
-    ],
-    "allg.": [
-        {
-            "F": "allg."
-        }
-    ],
-    "auf'm": [
-        {
-            "F": "auf",
-            "L": "auf"
-        },
-        {
-            "F": "'m",
-            "L": "dem"
-        }
-    ],
-    "b.": [
-        {
-            "F": "b."
-        }
-    ],
-    "betr.": [
-        {
-            "F": "betr."
-        }
-    ],
-    "biol.": [
-        {
-            "F": "biol."
-        }
-    ],
-    "bspw.": [
-        {
-            "F": "bspw."
-        }
-    ],
-    "bzgl.": [
-        {
-            "F": "bzgl."
-        }
-    ],
-    "bzw.": [
-        {
-            "F": "bzw."
-        }
-    ],
-    "c.": [
-        {
-            "F": "c."
-        }
-    ],
-    "ca.": [
-        {
-            "F": "ca."
-        }
-    ],
-    "co.": [
-        {
-            "F": "co."
-        }
-    ],
-    "d.": [
-        {
-            "F": "d."
-        }
-    ],
-    "d.h.": [
-        {
-            "F": "d.h."
-        }
-    ],
-    "dgl.": [
-        {
-            "F": "dgl."
-        }
-    ],
-    "du's": [
-        {
-            "F": "du",
-            "L": "du"
-        },
-        {
-            "F": "'s",
-            "L": "es"
-        }
-    ],
-    "e.": [
-        {
-            "F": "e."
-        }
-    ],
-    "e.V.": [
-        {
-            "F": "e.V."
-        }
-    ],
-    "e.g.": [
-        {
-            "F": "e.g."
-        }
-    ],
-    "ebd.": [
-        {
-            "F": "ebd."
-        }
-    ],
-    "ehem.": [
-        {
-            "F": "ehem."
-        }
-    ],
-    "eigtl.": [
-        {
-            "F": "eigtl."
-        }
-    ],
-    "engl.": [
-        {
-            "F": "engl."
-        }
-    ],
-    "entspr.": [
-        {
-            "F": "entspr."
-        }
-    ],
-    "er's": [
-        {
-            "F": "er",
-            "L": "er"
-        },
-        {
-            "F": "'s",
-            "L": "es"
-        }
-    ],
-    "erm.": [
-        {
-            "F": "erm."
-        }
-    ],
-    "etc.": [
-        {
-            "F": "etc."
-        }
-    ],
-    "ev.": [
-        {
-            "F": "ev."
-        }
-    ],
-    "evtl.": [
-        {
-            "F": "evtl."
-        }
-    ],
-    "f.": [
-        {
-            "F": "f."
-        }
-    ],
-    "frz.": [
-        {
-            "F": "frz."
-        }
-    ],
-    "g.": [
-        {
-            "F": "g."
-        }
-    ],
-    "geb.": [
-        {
-            "F": "geb."
-        }
-    ],
-    "gegr.": [
-        {
-            "F": "gegr."
-        }
-    ],
-    "gem.": [
-        {
-            "F": "gem."
-        }
-    ],
-    "ggf.": [
-        {
-            "F": "ggf."
-        }
-    ],
-    "ggfs.": [
-        {
-            "F": "ggfs."
-        }
-    ],
-    "gg\u00fc.": [
-        {
-            "F": "gg\u00fc."
-        }
-    ],
-    "h.": [
-        {
-            "F": "h."
-        }
-    ],
-    "h.c.": [
-        {
-            "F": "h.c."
-        }
-    ],
-    "hinter'm": [
-        {
-            "F": "hinter",
-            "L": "hinter"
-        },
-        {
-            "F": "'m",
-            "L": "dem"
-        }
-    ],
-    "hrsg.": [
-        {
-            "F": "hrsg."
-        }
-    ],
-    "i.": [
-        {
-            "F": "i."
-        }
-    ],
-    "i.A.": [
-        {
-            "F": "i.A."
-        }
-    ],
-    "i.G.": [
-        {
-            "F": "i.G."
-        }
-    ],
-    "i.O.": [
-        {
-            "F": "i.O."
-        }
-    ],
-    "i.Tr.": [
-        {
-            "F": "i.Tr."
-        }
-    ],
-    "i.V.": [
-        {
-            "F": "i.V."
-        }
-    ],
-    "i.d.R.": [
-        {
-            "F": "i.d.R."
-        }
-    ],
-    "i.e.": [
-        {
-            "F": "i.e."
-        }
-    ],
-    "ich's": [
-        {
-            "F": "ich",
-            "L": "ich"
-        },
-        {
-            "F": "'s",
-            "L": "es"
-        }
-    ],
-    "ihr's": [
-        {
-            "F": "ihr",
-            "L": "ihr"
-        },
-        {
-            "F": "'s",
-            "L": "es"
-        }
-    ],
-    "incl.": [
-        {
-            "F": "incl."
-        }
-    ],
-    "inkl.": [
-        {
-            "F": "inkl."
-        }
-    ],
-    "insb.": [
-        {
-            "F": "insb."
-        }
-    ],
-    "j.": [
-        {
-            "F": "j."
-        }
-    ],
-    "jr.": [
-        {
-            "F": "jr."
-        }
-    ],
-    "jun.": [
-        {
-            "F": "jun."
-        }
-    ],
-    "jur.": [
-        {
-            "F": "jur."
-        }
-    ],
-    "k.": [
-        {
-            "F": "k."
-        }
-    ],
-    "kath.": [
-        {
-            "F": "kath."
-        }
-    ],
-    "l.": [
-        {
-            "F": "l."
-        }
-    ],
-    "lat.": [
-        {
-            "F": "lat."
-        }
-    ],
-    "lt.": [
-        {
-            "F": "lt."
-        }
-    ],
-    "m.": [
-        {
-            "F": "m."
-        }
-    ],
-    "m.E.": [
-        {
-            "F": "m.E."
-        }
-    ],
-    "m.M.": [
-        {
-            "F": "m.M."
-        }
-    ],
-    "max.": [
-        {
-            "F": "max."
-        }
-    ],
-    "min.": [
-        {
-            "F": "min."
-        }
-    ],
-    "mind.": [
-        {
-            "F": "mind."
-        }
-    ],
-    "mtl.": [
-        {
-            "F": "mtl."
-        }
-    ],
-    "n.": [
-        {
-            "F": "n."
-        }
-    ],
-    "n.Chr.": [
-        {
-            "F": "n.Chr."
-        }
-    ],
-    "nat.": [
-        {
-            "F": "nat."
-        }
-    ],
-    "o.": [
-        {
-            "F": "o."
-        }
-    ],
-    "o.O": [
-        {
-            "F": "o.O"
-        }
-    ],
-    "o.a.": [
-        {
-            "F": "o.a."
-        }
-    ],
-    "o.g.": [
-        {
-            "F": "o.g."
-        }
-    ],
-    "o.k.": [
-        {
-            "F": "o.k."
-        }
-    ],
-    "o.\u00c4.": [
-        {
-            "F": "o.\u00c4."
-        }
-    ],
-    "o.\u00e4.": [
-        {
-            "F": "o.\u00e4."
-        }
-    ],
-    "o_O": [
-        {
-            "F": "o_O"
-        }
-    ],
-    "o_o": [
-        {
-            "F": "o_o"
-        }
-    ],
-    "orig.": [
-        {
-            "F": "orig."
-        }
-    ],
-    "p.": [
-        {
-            "F": "p."
-        }
-    ],
-    "p.a.": [
-        {
-            "F": "p.a."
-        }
-    ],
-    "p.s.": [
-        {
-            "F": "p.s."
-        }
-    ],
-    "pers.": [
-        {
-            "F": "pers."
-        }
-    ],
-    "phil.": [
-        {
-            "F": "phil."
-        }
-    ],
-    "q.": [
-        {
-            "F": "q."
-        }
-    ],
-    "q.e.d.": [
-        {
-            "F": "q.e.d."
-        }
-    ],
-    "r.": [
-        {
-            "F": "r."
-        }
-    ],
-    "rer.": [
-        {
-            "F": "rer."
-        }
-    ],
-    "r\u00f6m.": [
-        {
-            "F": "r\u00f6m."
-        }
-    ],
-    "s'": [
-        {
-            "F": "s'",
-            "L": "sie"
-        }
-    ],
-    "s.": [
-        {
-            "F": "s."
-        }
-    ],
-    "s.o.": [
-        {
-            "F": "s.o."
-        }
-    ],
-    "sen.": [
-        {
-            "F": "sen."
-        }
-    ],
-    "sie's": [
-        {
-            "F": "sie",
-            "L": "sie"
-        },
-        {
-            "F": "'s",
-            "L": "es"
-        }
-    ],
-    "sog.": [
-        {
-            "F": "sog."
-        }
-    ],
-    "std.": [
-        {
-            "F": "std."
-        }
-    ],
-    "stellv.": [
-        {
-            "F": "stellv."
-        }
-    ],
-    "t.": [
-        {
-            "F": "t."
-        }
-    ],
-    "t\u00e4gl.": [
-        {
-            "F": "t\u00e4gl."
-        }
-    ],
-    "u.": [
-        {
-            "F": "u."
-        }
-    ],
-    "u.U.": [
-        {
-            "F": "u.U."
-        }
-    ],
-    "u.a.": [
-        {
-            "F": "u.a."
-        }
-    ],
-    "u.s.w.": [
-        {
-            "F": "u.s.w."
-        }
-    ],
-    "u.v.m.": [
-        {
-            "F": "u.v.m."
-        }
-    ],
-    "unter'm": [
-        {
-            "F": "unter",
-            "L": "unter"
-        },
-        {
-            "F": "'m",
-            "L": "dem"
-        }
-    ],
-    "usf.": [
-        {
-            "F": "usf."
-        }
-    ],
-    "usw.": [
-        {
-            "F": "usw."
-        }
-    ],
-    "uvm.": [
-        {
-            "F": "uvm."
-        }
-    ],
-    "v.": [
-        {
-            "F": "v."
-        }
-    ],
-    "v.Chr.": [
-        {
-            "F": "v.Chr."
-        }
-    ],
-    "v.a.": [
-        {
-            "F": "v.a."
-        }
-    ],
-    "v.l.n.r.": [
-        {
-            "F": "v.l.n.r."
-        }
-    ],
-    "vgl.": [
-        {
-            "F": "vgl."
-        }
-    ],
-    "vllt.": [
-        {
-            "F": "vllt."
-        }
-    ],
-    "vlt.": [
-        {
-            "F": "vlt."
-        }
-    ],
-    "vor'm": [
-        {
-            "F": "vor",
-            "L": "vor"
-        },
-        {
-            "F": "'m",
-            "L": "dem"
-        }
-    ],
-    "vs.": [
-        {
-            "F": "vs."
-        }
-    ],
-    "w.": [
-        {
-            "F": "w."
-        }
-    ],
-    "wir's": [
-        {
-            "F": "wir",
-            "L": "wir"
-        },
-        {
-            "F": "'s",
-            "L": "es"
-        }
-    ],
-    "wiss.": [
-        {
-            "F": "wiss."
-        }
-    ],
-    "x.": [
-        {
-            "F": "x."
-        }
-    ],
-    "xD": [
-        {
-            "F": "xD"
-        }
-    ],
-    "xDD": [
-        {
-            "F": "xDD"
-        }
-    ],
-    "y.": [
-        {
-            "F": "y."
-        }
-    ],
-    "z.": [
-        {
-            "F": "z."
-        }
-    ],
-    "z.B.": [
-        {
-            "F": "z.B."
-        }
-    ],
-    "z.Bsp.": [
-        {
-            "F": "z.Bsp."
-        }
-    ],
-    "z.T.": [
-        {
-            "F": "z.T."
-        }
-    ],
-    "z.Z.": [
-        {
-            "F": "z.Z."
-        }
-    ],
-    "z.Zt.": [
-        {
-            "F": "z.Zt."
-        }
-    ],
-    "z.b.": [
-        {
-            "F": "z.b."
-        }
-    ],
-    "zzgl.": [
-        {
-            "F": "zzgl."
-        }
-    ],
-    "\u00e4.": [
-        {
-            "F": "\u00e4."
-        }
-    ],
-    "\u00f6.": [
-        {
-            "F": "\u00f6."
-        }
-    ],
-    "\u00f6sterr.": [
-        {
-            "F": "\u00f6sterr."
-        }
-    ],
-    "\u00fc.": [
-        {
-            "F": "\u00fc."
-        }
-    ],
-    "\u00fcber'm": [
-        {
-            "F": "\u00fcber",
-            "L": "\u00fcber"
-        },
-        {
-            "F": "'m",
-            "L": "dem"
-        }
-    ]
-}
\ No newline at end of file
diff --git a/lang_data/de/suffix.txt b/lang_data/de/suffix.txt
deleted file mode 100644
index aeecb85a2..000000000
--- a/lang_data/de/suffix.txt
+++ /dev/null
@@ -1,73 +0,0 @@
-,
-\"
-\)
-\]
-\}
-\*
-\!
-\?
-%
-\$
->
-:
-;
-'
-”
-“
-«
-_
-''
-'s
-'S
-’s
-’S
-’
-‘
-°
-€
-\.\.
-\.\.\.
-\.\.\.\.
-(?<=[a-zäöüßÖÄÜ)\]"'´«‘’%\)²“”])\.
-\-\-
-´
-(?<=[0-9])km²
-(?<=[0-9])m²
-(?<=[0-9])cm²
-(?<=[0-9])mm²
-(?<=[0-9])km³
-(?<=[0-9])m³
-(?<=[0-9])cm³
-(?<=[0-9])mm³
-(?<=[0-9])ha
-(?<=[0-9])km
-(?<=[0-9])m
-(?<=[0-9])cm
-(?<=[0-9])mm
-(?<=[0-9])µm
-(?<=[0-9])nm
-(?<=[0-9])yd
-(?<=[0-9])in
-(?<=[0-9])ft
-(?<=[0-9])kg
-(?<=[0-9])g
-(?<=[0-9])mg
-(?<=[0-9])µg
-(?<=[0-9])t
-(?<=[0-9])lb
-(?<=[0-9])oz
-(?<=[0-9])m/s
-(?<=[0-9])km/h
-(?<=[0-9])mph
-(?<=[0-9])°C
-(?<=[0-9])°K
-(?<=[0-9])°F
-(?<=[0-9])hPa
-(?<=[0-9])Pa
-(?<=[0-9])mbar
-(?<=[0-9])mb
-(?<=[0-9])T
-(?<=[0-9])G
-(?<=[0-9])M
-(?<=[0-9])K
-(?<=[0-9])kb
diff --git a/lang_data/de/tag_map.json b/lang_data/de/tag_map.json
deleted file mode 100644
index 29da20a39..000000000
--- a/lang_data/de/tag_map.json
+++ /dev/null
@@ -1,59 +0,0 @@
-{
-"$(": {"pos": "PUNCT", "PunctType": "Brck"},
-"$,": {"pos": "PUNCT", "PunctType": "Comm"},
-"$.": {"pos": "PUNCT", "PunctType": "Peri"},
-"ADJA":	{"pos": "ADJ"},
-"ADJD":	{"pos": "ADJ", "Variant": "Short"},
-"ADV":	{"pos": "ADV"},
-"APPO":	{"pos": "ADP", "AdpType": "Post"},
-"APPR":	{"pos": "ADP", "AdpType": "Prep"},
-"APPRART":	{"pos": "ADP", "AdpType": "Prep", "PronType": "Art"},
-"APZR":	{"pos": "ADP", "AdpType": "Circ"},
-"ART":	{"pos": "DET", "PronType": "Art"},
-"CARD":	{"pos": "NUM", "NumType": "Card"},
-"FM":	{"pos": "X", "Foreign": "Yes"},
-"ITJ":	{"pos": "INTJ"},
-"KOKOM": {"pos": "CONJ", "ConjType": "Comp"},
-"KON": {"pos": "CONJ"},
-"KOUI":	{"pos": "SCONJ"},
-"KOUS":	{"pos": "SCONJ"},
-"NE": {"pos": "PROPN"},
-"NNE": {"pos": "PROPN"},
-"NN": {"pos": "NOUN"},
-"PAV": {"pos": "ADV", "PronType": "Dem"},
-"PROAV": {"pos": "ADV", "PronType": "Dem"},
-"PDAT":	{"pos": "DET", "PronType": "Dem"},
-"PDS": {"pos": "PRON", "PronType": "Dem"},
-"PIAT":	{"pos": "DET", "PronType": "Ind,Neg,Tot"},
-"PIDAT":	{"pos": "DET", "AdjType": "Pdt", "PronType": "Ind,Neg,Tot"},
-"PIS":	{"pos": "PRON", "PronType": "Ind,Neg,Tot"},
-"PPER":	{"pos": "PRON", "PronType": "Prs"},
-"PPOSAT":	{"pos": "DET", "Poss": "Yes", "PronType": "Prs"},
-"PPOSS":	{"pos": "PRON", "Poss": "Yes", "PronType": "Prs"},
-"PRELAT":	{"pos": "DET", "PronType": "Rel"},
-"PRELS":	{"pos": "PRON", "PronType": "Rel"},
-"PRF":	{"pos": "PRON", "PronType": "Prs", "Reflex": "Yes"},
-"PTKA":	{"pos": "PART"},
-"PTKANT":	{"pos": "PART", "PartType": "Res"},
-"PTKNEG":	{"pos": "PART", "Negative": "Neg"},
-"PTKVZ":	{"pos": "PART", "PartType": "Vbp"},
-"PTKZU":	{"pos": "PART", "PartType": "Inf"},
-"PWAT":	{"pos": "DET", "PronType": "Int"},
-"PWAV":	{"pos": "ADV", "PronType": "Int"},
-"PWS":	{"pos": "PRON", "PronType": "Int"},
-"TRUNC":	{"pos": "X", "Hyph": "Yes"},
-"VAFIN":	{"pos": "AUX", "Mood": "Ind", "VerbForm": "Fin"},
-"VAIMP":	{"pos": "AUX", "Mood": "Imp", "VerbForm": "Fin"},
-"VAINF":	{"pos": "AUX", "VerbForm": "Inf"},
-"VAPP":	{"pos": "AUX", "Aspect": "Perf", "VerbForm": "Part"},
-"VMFIN":	{"pos": "VERB", "Mood": "Ind", "VerbForm": "Fin", "VerbType": "Mod"},
-"VMINF":	{"pos": "VERB", "VerbForm": "Inf", "VerbType": "Mod"},
-"VMPP":	{"pos": "VERB", "Aspect": "Perf", "VerbForm": "Part", "VerbType": "Mod"},
-"VVFIN":	{"pos": "VERB", "Mood": "Ind", "VerbForm": "Fin"},
-"VVIMP":	{"pos": "VERB", "Mood": "Imp", "VerbForm": "Fin"},
-"VVINF":	{"pos": "VERB", "VerbForm": "Inf"},
-"VVIZU":	{"pos": "VERB", "VerbForm": "Inf"},
-"VVPP":	{"pos": "VERB", "Aspect": "Perf", "VerbForm": "Part"},
-"XY":	{"pos": "X"},
-"SP": {"pos": "SPACE"}
-}
diff --git a/lang_data/en/LICENSE b/lang_data/en/LICENSE
deleted file mode 100644
index 4f49c2dff..000000000
--- a/lang_data/en/LICENSE
+++ /dev/null
@@ -1,20 +0,0 @@
-WordNet Release 3.0 This software and database is being provided to you, the
-LICENSEE, by Princeton University under the following license. By obtaining,
-using and/or copying this software and database, you agree that you have read,
-understood, and will comply with these terms and conditions.: Permission to
-use, copy, modify and distribute this software and database and its
-documentation for any purpose and without fee or royalty is hereby granted,
-provided that you agree to comply with the following copyright notice and
-statements, including the disclaimer, and that the same appear on ALL copies of
-the software, database and documentation, including modifications that you make for internal use or for distribution. WordNet 3.0 Copyright 2006 by Princeton
-University. All rights reserved. THIS SOFTWARE AND DATABASE IS PROVIDED "AS IS"
-AND PRINCETON UNIVERSITY MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
-IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PRINCETON UNIVERSITY MAKES NO
-REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
-PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE, DATABASE OR
-DOCUMENTATION WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS
-OR OTHER RIGHTS. The name of Princeton University or Princeton may not be used
-in advertising or publicity pertaining to distribution of the software and/or
-database. Title to copyright in this software, database and any associated
-documentation shall at all times remain with Princeton University and LICENSEE
-agrees to preserve same.
diff --git a/lang_data/en/gazetteer.json b/lang_data/en/gazetteer.json
deleted file mode 100644
index d52fed839..000000000
--- a/lang_data/en/gazetteer.json
+++ /dev/null
@@ -1,194 +0,0 @@
-{
-	"Reddit": [
-		"PRODUCT",
-		{},
-		[
-			[{"lower": "reddit"}]
-		]
-	],
-	"SeptemberElevenAttacks": [
-		"EVENT",
-		{},
-		[
-			[
-				{"orth": "9/11"}
-			],
-			[
-				{"lower": "september"},
-				{"orth": "11"}
-			]
-		]
-	],
-	"Linux": [
-		"PRODUCT",
-		{},
-		[
-			[{"lower": "linux"}]
-		]
-	],
-	"Haskell": [
-		"PRODUCT",
-		{},
-		[
-			[{"lower": "haskell"}]
-		]
-	],
-	"HaskellCurry": [
-		"PERSON",
-		{},
-		[
-			[
-				{"lower": "haskell"},
-				{"lower": "curry"}
-			]
-		]
-	],
-	"Javascript": [
-		"PRODUCT",
-		{},
-		[
-			[{"lower": "javascript"}]
-		]
-	],
-	"CSS": [
-		"PRODUCT",
-		{},
-		[
-			[{"lower": "css"}],
-			[{"lower": "css3"}]
-		]
-	],
-	"displaCy": [
-		"PRODUCT",
-		{},
-		[
-			[{"lower": "displacy"}]
-		]
-	],
-	"spaCy": [
-		"PRODUCT",
-		{},
-		[
-			[{"orth": "spaCy"}]
-		]
-	],
-
-    "HTML": [
-		"PRODUCT",
-		{},
-		[
-			[{"lower": "html"}],
-			[{"lower": "html5"}]
-		]
-	],
-    "Python": [
-        "PRODUCT",
-        {},
-        [
-            [{"orth": "Python"}]
-        ]
-    ],
-    "Ruby": [
-        "PRODUCT",
-        {},
-        [
-            [{"orth": "Ruby"}]
-        ]
-    ],
-    "Digg": [
-        "PRODUCT",
-        {},
-        [
-            [{"lower": "digg"}]
-        ]
-    ],
-     "FoxNews": [
-        "ORG",
-        {},
-        [
-            [{"orth": "Fox"}],
-            [{"orth": "News"}]
-        ]
-    ],
-    "Google": [
-        "ORG",
-        {},
-        [
-            [{"lower": "google"}]
-        ]
-    ],
-    "Mac": [
-        "PRODUCT",
-        {},
-        [
-            [{"lower": "mac"}]
-        ]
-    ],
-    "Wikipedia": [
-        "PRODUCT",
-        {},
-        [
-            [{"lower": "wikipedia"}]
-        ]
-    ],
-    "Windows": [
-        "PRODUCT",
-        {},
-        [
-            [{"orth": "Windows"}]
-        ]
-    ],
-     "Dell": [
-        "ORG",
-        {},
-        [
-            [{"lower": "dell"}]
-        ]
-    ],
-    "Facebook": [
-        "ORG",
-        {},
-        [
-            [{"lower": "facebook"}]
-        ]
-    ],
-     "Blizzard": [
-        "ORG",
-        {},
-        [
-            [{"orth": "Blizzard"}]
-        ]
-    ],
-    "Ubuntu": [
-        "ORG",
-        {},
-        [
-            [{"orth": "Ubuntu"}]
-        ]
-    ],
-    "Youtube": [
-        "PRODUCT",
-        {},
-        [
-            [{"lower": "youtube"}]
-        ]
-    ],
-    "false_positives": [
-        null,
-        {},
-        [
-            [{"orth": "Shit"}],
-            [{"orth": "Weed"}],
-            [{"orth": "Cool"}],
-            [{"orth": "Btw"}],
-            [{"orth": "Bah"}],
-            [{"orth": "Bullshit"}],
-            [{"orth": "Lol"}],
-            [{"orth": "Yo"}, {"lower": "dawg"}],
-            [{"orth": "Yay"}],
-            [{"orth": "Ahh"}],
-            [{"orth": "Yea"}],
-            [{"orth": "Bah"}]
-        ]
-    ]
-}
diff --git a/lang_data/en/generate_specials.py b/lang_data/en/generate_specials.py
deleted file mode 100644
index a48f8f69d..000000000
--- a/lang_data/en/generate_specials.py
+++ /dev/null
@@ -1,422 +0,0 @@
-# -#- coding: utf-8 -*-
-import json
-
-contractions = {"n't", "'nt", "not", "'ve", "'d", "'ll", "'s", "'m", "'ma", "'re"}
-
-# contains the lemmas, parts of speech, number, and tenspect of
-# potential tokens generated after splitting contractions off
-token_properties = { 
-
-            "ai": {"L": "be", "pos": "VBP", "number": 2},
-            "are": {"L": "be", "pos": "VBP", "number": 2},
-            "ca": {"L": "can", "pos": "MD"},
-            "can": {"L": "can", "pos": "MD"},
-            "could": {"pos": "MD", "L": "could"},
-            "'d": {"L": "would", "pos": "MD"},
-            "did": {"L": "do", "pos": "VBD"},
-            "do": {"L": "do"},
-            "does": {"L": "do", "pos": "VBZ"},
-            "had": {"L": "have", "pos": "VBD"},
-            "has": {"L": "have", "pos": "VBZ"},
-            "have": {"pos": "VB"},
-            "he": {"L": "-PRON-", "pos": "PRP"},
-            "how": {},
-            "i": {"L": "-PRON-", "pos": "PRP"},
-            "is": {"L": "be", "pos": "VBZ"},
-            "it": {"L": "-PRON-", "pos": "PRP"},
-            "'ll": {"L": "will", "pos": "MD"},
-            "'m": {"L": "be", "pos": "VBP", "number": 1, "tenspect": 1},
-            "'ma": {},
-            "might": {},
-            "must": {},
-            "need": {}, 
-            "not": {"L": "not", "pos": "RB"},
-            "'nt": {"L": "not", "pos": "RB"},
-            "n't": {"L": "not", "pos": "RB"},
-            "'re": {"L": "be", "pos": "VBZ"},
-            "'s": {},                                       # no POS or lemma for s?
-            "sha": {"L": "shall", "pos": "MD"},
-            "she": {"L": "-PRON-", "pos": "PRP"},
-            "should": {},
-            "that": {},
-            "there": {},
-            "they": {"L": "-PRON-", "pos": "PRP"},
-            "was": {},
-            "we": {"L": "-PRON-", "pos": "PRP"},
-            "were": {},
-            "what": {},
-            "when": {},
-            "where": {},
-            "who": {},
-            "why": {},
-            "wo": {},
-            "would": {},
-            "you": {"L": "-PRON-", "pos": "PRP"},
-            "'ve": {"L": "have", "pos": "VB"}
-}
-
-# contains starting tokens with their potential contractions
-# each potential contraction has a list of exceptions
-    # lower - don't generate the lowercase version
-    # upper - don't generate the uppercase version
-    # contrLower - don't generate the lowercase version with apostrophe (') removed
-    # contrUpper - dont' generate the uppercase version with apostrophe (') removed
-# for example, we don't want to create the word "hell" or "Hell" from "he" + "'ll" so 
-# we add "contrLower" and "contrUpper" to the exceptions list
-starting_tokens = {
-
-                "ai": {"n't": []}, 
-                "are": {"n't": []}, 
-                "ca": {"n't": []},
-                "can": {"not": []},
-                "could": {"'ve": [], "n't": [], "n't've": []},
-                "did": {"n't": []},
-                "does": {"n't": []},
-                "do": {"n't": []},
-                "had": {"n't": [], "n't've": []},
-                "has": {"n't": []},
-                "have": {"n't": []},
-                "he": {"'d": [], "'d've": [], "'ll": ["contrLower", "contrUpper"], "'s": []},
-                "how": {"'d": [], "'ll": [], "'s": []},
-                "i": {"'d": ["contrLower", "contrUpper"], "'d've": [], "'ll": ["contrLower", "contrUpper"], "'m": [], "'ma": [], "'ve": []},
-                "is": {"n't": []},
-                "it": {"'d": [], "'d've": [], "'ll": [], "'s": ["contrLower", "contrUpper"]},
-                "might": {"n't": [], "n't've": [], "'ve": []},
-                "must": {"n't": [], "'ve": []},
-                "need": {"n't": []},
-                "not": {"'ve": []},
-                "sha": {"n't": []},
-                "she": {"'d": ["contrLower", "contrUpper"], "'d've": [], "'ll": ["contrLower", "contrUpper"], "'s": []},
-                "should": {"'ve": [], "n't": [], "n't've": []},
-                "that": {"'s": []},
-                "there": {"'d": [], "'d've": [], "'s": ["contrLower", "contrUpper"], "'ll": []},
-                "they": {"'d": [], "'d've": [], "'ll": [], "'re": [], "'ve": []},
-                "was": {"n't": []},
-                "we": {"'d": ["contrLower", "contrUpper"], "'d've": [], "'ll": ["contrLower", "contrUpper"], "'re": ["contrLower", "contrUpper"], "'ve": []},
-                "were": {"n't": []},
-                "what": {"'ll": [], "'re": [], "'s": [], "'ve": []},
-                "when": {"'s": []},
-                "where": {"'d": [], "'s": [], "'ve": []},
-                "who": {"'d": [], "'ll": [], "'re": ["contrLower", "contrUpper"], "'s": [], "'ve": []},
-                "why": {"'ll": [], "'re": [], "'s": []},
-                "wo": {"n't": []},
-                "would": {"'ve": [], "n't": [], "n't've": []},
-                "you": {"'d": [], "'d've": [], "'ll": [], "'re": [], "'ve": []}
-
-                }
-
-# other specials that don't really have contractions
-# so they are hardcoded
-hardcoded_specials = {
-                "let's": [{"F": "let"}, {"F": "'s", "L": "us"}],
-                "Let's": [{"F": "Let"}, {"F": "'s", "L": "us"}],
-
-                "'s":  [{"F": "'s", "L": "'s"}],
-
-                "'S":  [{"F": "'S", "L": "'s"}],
-                u"\u2018s": [{"F": u"\u2018s", "L": "'s"}],
-                u"\u2018S": [{"F": u"\u2018S", "L": "'s"}],
-
-                "'em": [{"F": "'em"}],
-
-                "'ol": [{"F": "'ol"}],
-
-                "vs.": [{"F": "vs."}],
-
-                "Ms.": [{"F": "Ms."}],
-                "Mr.": [{"F": "Mr."}],
-                "Dr.": [{"F": "Dr."}],
-                "Mrs.": [{"F": "Mrs."}],
-                "Messrs.": [{"F": "Messrs."}],
-                "Gov.": [{"F": "Gov."}],
-                "Gen.": [{"F": "Gen."}],
-
-                "Mt.": [{"F": "Mt.", "L": "Mount"}],
-
-                "''": [{"F": "''"}],
-
-                "—": [{"F": "—", "L": "--", "pos": ":"}],
-
-                "Corp.": [{"F": "Corp."}],
-                "Inc.": [{"F": "Inc."}],
-                "Co.": [{"F": "Co."}],
-                "co.": [{"F": "co."}],
-                "Ltd.": [{"F": "Ltd."}],
-                "Bros.": [{"F": "Bros."}],
-
-                "Rep.": [{"F": "Rep."}],
-                "Sen.": [{"F": "Sen."}],
-                "Jr.": [{"F": "Jr."}],
-                "Rev.": [{"F": "Rev."}],
-                "Adm.": [{"F": "Adm."}],
-                "St.": [{"F": "St."}],
-
-                "a.m.": [{"F": "a.m."}],
-                "p.m.": [{"F": "p.m."}],
-
-                "1a.m.": [{"F": "1"}, {"F": "a.m."}],
-                "2a.m.": [{"F": "2"}, {"F": "a.m."}],
-                "3a.m.": [{"F": "3"}, {"F": "a.m."}],
-                "4a.m.": [{"F": "4"}, {"F": "a.m."}],
-                "5a.m.": [{"F": "5"}, {"F": "a.m."}],
-                "6a.m.": [{"F": "6"}, {"F": "a.m."}],
-                "7a.m.": [{"F": "7"}, {"F": "a.m."}],
-                "8a.m.": [{"F": "8"}, {"F": "a.m."}],
-                "9a.m.": [{"F": "9"}, {"F": "a.m."}],
-                "10a.m.": [{"F": "10"}, {"F": "a.m."}],
-                "11a.m.": [{"F": "11"}, {"F": "a.m."}],
-                "12a.m.": [{"F": "12"}, {"F": "a.m."}],
-                "1am": [{"F": "1"}, {"F": "am", "L": "a.m."}],
-                "2am": [{"F": "2"}, {"F": "am", "L": "a.m."}],
-                "3am": [{"F": "3"}, {"F": "am", "L": "a.m."}],
-                "4am": [{"F": "4"}, {"F": "am", "L": "a.m."}],
-                "5am": [{"F": "5"}, {"F": "am", "L": "a.m."}],
-                "6am": [{"F": "6"}, {"F": "am", "L": "a.m."}],
-                "7am": [{"F": "7"}, {"F": "am", "L": "a.m."}],
-                "8am": [{"F": "8"}, {"F": "am", "L": "a.m."}],
-                "9am": [{"F": "9"}, {"F": "am", "L": "a.m."}],
-                "10am": [{"F": "10"}, {"F": "am", "L": "a.m."}],
-                "11am": [{"F": "11"}, {"F": "am", "L": "a.m."}],
-                "12am": [{"F": "12"}, {"F": "am", "L": "a.m."}],
-
-
-                "p.m.": [{"F": "p.m."}],
-                "1p.m.": [{"F": "1"}, {"F": "p.m."}],
-                "2p.m.": [{"F": "2"}, {"F": "p.m."}],
-                "3p.m.": [{"F": "3"}, {"F": "p.m."}],
-                "4p.m.": [{"F": "4"}, {"F": "p.m."}],
-                "5p.m.": [{"F": "5"}, {"F": "p.m."}],
-                "6p.m.": [{"F": "6"}, {"F": "p.m."}],
-                "7p.m.": [{"F": "7"}, {"F": "p.m."}],
-                "8p.m.": [{"F": "8"}, {"F": "p.m."}],
-                "9p.m.": [{"F": "9"}, {"F": "p.m."}],
-                "10p.m.": [{"F": "10"}, {"F": "p.m."}],
-                "11p.m.": [{"F": "11"}, {"F": "p.m."}],
-                "12p.m.": [{"F": "12"}, {"F": "p.m."}],
-                "1pm": [{"F": "1"}, {"F": "pm", "L": "p.m."}],
-                "2pm": [{"F": "2"}, {"F": "pm", "L": "p.m."}],
-                "3pm": [{"F": "3"}, {"F": "pm", "L": "p.m."}],
-                "4pm": [{"F": "4"}, {"F": "pm", "L": "p.m."}],
-                "5pm": [{"F": "5"}, {"F": "pm", "L": "p.m."}],
-                "6pm": [{"F": "6"}, {"F": "pm", "L": "p.m."}],
-                "7pm": [{"F": "7"}, {"F": "pm", "L": "p.m."}],
-                "8pm": [{"F": "8"}, {"F": "pm", "L": "p.m."}],
-                "9pm": [{"F": "9"}, {"F": "pm", "L": "p.m."}],
-                "10pm": [{"F": "10"}, {"F": "pm", "L": "p.m."}],
-                "11pm": [{"F": "11"}, {"F": "pm", "L": "p.m."}],
-                "12pm": [{"F": "12"}, {"F": "pm", "L": "p.m."}],
-
-                "Jan.": [{"F": "Jan."}],
-                "Feb.": [{"F": "Feb."}],
-                "Mar.": [{"F": "Mar."}],
-                "Apr.": [{"F": "Apr."}],
-                "May.": [{"F": "May."}],
-                "Jun.": [{"F": "Jun."}],
-                "Jul.": [{"F": "Jul."}],
-                "Aug.": [{"F": "Aug."}],
-                "Sep.": [{"F": "Sep."}],
-                "Sept.": [{"F": "Sept."}],
-                "Oct.": [{"F": "Oct."}],
-                "Nov.": [{"F": "Nov."}],
-                "Dec.": [{"F": "Dec."}],
-
-                "Ala.": [{"F": "Ala."}],
-                "Ariz.": [{"F": "Ariz."}],
-                "Ark.": [{"F":  "Ark."}],
-                "Calif.": [{"F": "Calif."}],
-                "Colo.": [{"F": "Colo."}],
-                "Conn.": [{"F": "Conn."}],
-                "Del.": [{"F":  "Del."}],
-                "D.C.": [{"F": "D.C."}],
-                "Fla.": [{"F":  "Fla."}],
-                "Ga.": [{"F": "Ga."}],
-                "Ill.": [{"F": "Ill."}],
-                "Ind.": [{"F": "Ind."}],
-                "Kans.": [{"F": "Kans."}],
-                "Kan.": [{"F": "Kan."}],
-                "Ky.": [{"F": "Ky."}],
-                "La.": [{"F": "La."}],
-                "Md.": [{"F": "Md."}],
-                "Mass.": [{"F": "Mass."}],
-                "Mich.": [{"F": "Mich."}],
-                "Minn.": [{"F": "Minn."}],
-                "Miss.": [{"F": "Miss."}],
-                "Mo.": [{"F": "Mo."}],
-                "Mont.": [{"F": "Mont."}],
-                "Nebr.": [{"F": "Nebr."}],
-                "Neb.": [{"F": "Neb."}],
-                "Nev.": [{"F":  "Nev."}],
-                "N.H.": [{"F": "N.H."}],
-                "N.J.": [{"F": "N.J."}],
-                "N.M.": [{"F": "N.M."}],
-                "N.Y.": [{"F": "N.Y."}],
-                "N.C.": [{"F": "N.C."}],
-                "N.D.": [{"F": "N.D."}],
-                "Okla.": [{"F": "Okla."}],
-                "Ore.": [{"F": "Ore."}],
-                "Pa.": [{"F": "Pa."}],
-                "Tenn.": [{"F": "Tenn."}],
-                "Va.": [{"F": "Va."}],
-                "Wash.": [{"F": "Wash."}],
-                "Wis.": [{"F": "Wis."}],
-
-                ":)":  [{"F": ":)"}],
-                "<3":  [{"F": "<3"}],
-                ";)":  [{"F": ";)"}],
-                "(:":  [{"F": "(:"}],
-                ":(":  [{"F": ":("}],
-                "-_-": [{"F": "-_-"}],
-                "=)":  [{"F": "=)"}],
-                ":/":  [{"F": ":/"}],
-                ":>":  [{"F": ":>"}],
-                ";-)": [{"F": ";-)"}],
-                ":Y":  [{"F": ":Y"}],
-                ":P":  [{"F": ":P"}],
-                ":-P": [{"F": ":-P"}],
-                ":3":  [{"F": ":3"}],
-                "=3":  [{"F": "=3"}],
-                "xD":  [{"F": "xD"}],
-                "^_^": [{"F": "^_^"}],
-                "=]":  [{"F": "=]"}],
-                "=D":  [{"F": "=D"}],
-                "<333":    [{"F": "<333"}],
-                ":))": [{"F": ":))"}],
-                ":0":  [{"F": ":0"}],
-                "-__-":    [{"F": "-__-"}],
-                "xDD": [{"F": "xDD"}],
-                "o_o": [{"F": "o_o"}],
-                "o_O": [{"F": "o_O"}],
-                "V_V": [{"F": "V_V"}],
-                "=[[": [{"F": "=[["}],
-                "<33": [{"F": "<33"}],
-                ";p":  [{"F": ";p"}],
-                ";D":  [{"F": ";D"}],
-                ";-p": [{"F": ";-p"}],
-                ";(":  [{"F": ";("}],
-                ":p":  [{"F": ":p"}],
-                ":]":  [{"F": ":]"}],
-                ":O":  [{"F": ":O"}],
-                ":-/": [{"F": ":-/"}],
-                ":-)": [{"F": ":-)"}],
-                ":(((":    [{"F": ":((("}],
-                ":((": [{"F": ":(("}],
-                ":')": [{"F": ":')"}],
-                "(^_^)":   [{"F": "(^_^)"}],
-                "(=":  [{"F": "(="}],
-                "o.O": [{"F": "o.O"}],
-                "\")": [{"F": "\")"}],
-                "a.": [{"F": "a."}],
-                "b.": [{"F": "b."}],
-                "c.": [{"F": "c."}],
-                "d.": [{"F": "d."}],
-                "e.": [{"F": "e."}],
-                "f.": [{"F": "f."}],
-                "g.": [{"F": "g."}],
-                "h.": [{"F": "h."}],
-                "i.": [{"F": "i."}],
-                "j.": [{"F": "j."}],
-                "k.": [{"F": "k."}],
-                "l.": [{"F": "l."}],
-                "m.": [{"F": "m."}],
-                "n.": [{"F": "n."}],
-                "o.": [{"F": "o."}],
-                "p.": [{"F": "p."}],
-                "q.": [{"F": "q."}],
-                "r.": [{"F": "r."}],
-                "s.": [{"F": "s."}],
-                "t.": [{"F": "t."}],
-                "u.": [{"F": "u."}],
-                "v.": [{"F": "v."}],
-                "w.": [{"F": "w."}],
-                "x.": [{"F": "x."}],
-                "y.": [{"F": "y."}],
-                "z.": [{"F": "z."}],
-
-                "i.e.": [{"F": "i.e."}],
-                "I.e.": [{"F": "I.e."}],
-                "I.E.": [{"F": "I.E."}],
-                "e.g.": [{"F": "e.g."}],
-                "E.g.": [{"F": "E.g."}],
-                "E.G.": [{"F": "E.G."}],
-                "\n": [{"F": "\n", "pos": "SP"}],
-                "\t": [{"F": "\t", "pos": "SP"}],
-                " ": [{"F": " ", "pos": "SP"}],
-                u"\u00a0": [{"F": u"\u00a0", "pos": "SP", "L": "  "}]
-
-}
-
-def get_double_contractions(ending):
-    endings = []
-
-    ends_with_contraction = any([ending.endswith(contraction) for contraction in contractions])
-
-    while ends_with_contraction:
-        for contraction in contractions:
-            if ending.endswith(contraction):
-                endings.append(contraction)
-                ending = ending.rstrip(contraction)
-        ends_with_contraction = any([ending.endswith(contraction) for contraction in contractions])
-
-    endings.reverse() # reverse because the last ending is put in the list first
-    return endings
-
-def get_token_properties(token, capitalize=False, remove_contractions=False):
-    props = dict(token_properties.get(token)) # ensure we copy the dict so we can add the "F" prop
-    if capitalize:
-        token = token.capitalize()
-    if remove_contractions:
-        token = token.replace("'", "")
-
-    props["F"] = token
-    return props
-
-def create_entry(token, endings, capitalize=False, remove_contractions=False):
-    
-    properties = []
-    properties.append(get_token_properties(token, capitalize=capitalize, remove_contractions=remove_contractions))
-    for e in endings:
-        properties.append(get_token_properties(e, remove_contractions=remove_contractions))
-    return properties
-
-def generate_specials():
-
-    specials = {}
-
-    for token in starting_tokens:
-        possible_endings = starting_tokens[token]
-        for ending in possible_endings:
-
-            endings = []
-            if ending.count("'") > 1:
-                endings.extend(get_double_contractions(ending))
-            else:
-                endings.append(ending)
-
-            exceptions = possible_endings[ending]
-
-            if "lower" not in exceptions:
-                special = token + ending
-                specials[special] = create_entry(token, endings)
-
-            if "upper" not in exceptions:
-                special = token.capitalize() + ending
-                specials[special] = create_entry(token, endings, capitalize=True)
-
-            if "contrLower" not in exceptions:
-                special = token + ending.replace("'", "")
-                specials[special] = create_entry(token, endings, remove_contractions=True)
-
-            if "contrUpper" not in exceptions:
-                special = token.capitalize() + ending.replace("'", "")
-                specials[special] = create_entry(token, endings, capitalize=True, remove_contractions=True)
-
-    # add in hardcoded specials
-    specials = dict(specials, **hardcoded_specials)
-
-    return specials
-
-if __name__ == "__main__":
-    specials = generate_specials()
-    with open("specials.json", "w") as file_:
-        file_.write(json.dumps(specials, indent=2))
-
diff --git a/lang_data/en/infix.txt b/lang_data/en/infix.txt
deleted file mode 100644
index b9b0230a7..000000000
--- a/lang_data/en/infix.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-\.\.\.+
-(?<=[a-z])\.(?=[A-Z])
-(?<=[a-zA-Z])-(?=[a-zA-z])
-(?<=[a-zA-Z])--(?=[a-zA-z])
-(?<=[0-9])-(?=[0-9])
-(?<=[A-Za-z]),(?=[A-Za-z])
diff --git a/lang_data/en/lemma_rules.json b/lang_data/en/lemma_rules.json
deleted file mode 100644
index 1e76436cd..000000000
--- a/lang_data/en/lemma_rules.json
+++ /dev/null
@@ -1,38 +0,0 @@
-{
-    "noun": [
-        ["s", ""],
-        ["ses", "s"],
-        ["ves", "f"],
-        ["xes", "x"],
-        ["zes", "z"],
-        ["ches", "ch"],
-        ["shes", "sh"],
-        ["men", "man"],
-        ["ies", "y"]
-    ],
-
-    "verb": [
-        ["s", ""],
-        ["ies", "y"],
-        ["es", "e"],
-        ["es", ""],
-        ["ed", "e"],
-        ["ed", ""],
-        ["ing", "e"],
-        ["ing", ""]
-    ],
-
-    "adj": [
-        ["er", ""],
-        ["est", ""],
-        ["er", "e"],
-        ["est", "e"]
-    ],
-
-    "punct": [
-        ["“", "\""],
-        ["”", "\""],
-        ["\u2018", "'"],
-        ["\u2019", "'"]
-    ]
-}
diff --git a/lang_data/en/morphs.json b/lang_data/en/morphs.json
deleted file mode 100644
index 059381b27..000000000
--- a/lang_data/en/morphs.json
+++ /dev/null
@@ -1,59 +0,0 @@
-{
-    "PRP": {
-        "I":          {"L": "-PRON-", "PronType": "Prs", "Person": "One",   "Number": "Sing",                   "Case": "Nom"},
-        "me":         {"L": "-PRON-", "PronType": "Prs", "Person": "One",   "Number": "Sing",                   "Case": "Acc"},
-        "you":        {"L": "-PRON-", "PronType": "Prs", "Person": "Two"},
-        "he":         {"L": "-PRON-", "PronType": "Prs", "Person": "Three", "Number": "Sing", "Gender": "Masc", "Case": "Nom"},
-        "him":        {"L": "-PRON-", "PronType": "Prs", "Person": "Three", "Number": "Sing", "Gender": "Masc", "Case": "Acc"},
-        "she":        {"L": "-PRON-", "PronType": "Prs", "Person": "Three", "Number": "Sing", "Gender": "Fem",  "Case": "Nom"},
-        "her":        {"L": "-PRON-", "PronType": "Prs", "Person": "Three", "Number": "Sing", "Gender": "Fem",  "Case": "Acc"},
-        "it":         {"L": "-PRON-", "PronType": "Prs", "Person": "Three", "Number": "Sing", "Gender": "Neut"},
-        "we":         {"L": "-PRON-", "PronType": "Prs", "Person": "One",   "Number": "Plur",                   "Case": "Nom"},
-        "us":         {"L": "-PRON-", "PronType": "Prs", "Person": "One",   "Number": "Plur",                   "Case": "Acc"},
-        "they":       {"L": "-PRON-", "PronType": "Prs", "Person": "Three", "Number": "Plur",                   "Case": "Nom"},
-        "them":       {"L": "-PRON-", "PronType": "Prs", "Person": "Three", "Number": "Plur",                   "Case": "Acc"},
-        
-        "mine":       {"L": "-PRON-", "PronType": "Prs", "Person": "One",   "Number": "Sing",                   "Poss": "Yes", "Reflex": "Yes"},
-        "yours":      {"L": "-PRON-", "PronType": "Prs", "Person": "Two",                                       "Poss": "Yes", "Reflex": "Yes"},
-        "his":        {"L": "-PRON-", "PronType": "Prs", "Person": "Three", "Number": "Sing", "Gender": "Masc", "Poss": "Yes", "Reflex": "Yes"},
-        "hers":       {"L": "-PRON-", "PronType": "Prs", "Person": "Three", "Number": "Sing", "Gender": "Fem",  "Poss": "Yes", "Reflex": "Yes"},
-        "its":        {"L": "-PRON-", "PronType": "Prs", "Person": "Three", "Number": "Sing", "Gender": "Neut", "Poss": "Yes", "Reflex": "Yes"},
-        "ours":       {"L": "-PRON-", "PronType": "Prs", "Person": "One",   "Number": "Plur",                   "Poss": "Yes", "Reflex": "Yes"},
-        "yours":      {"L": "-PRON-", "PronType": "Prs", "Person": "Two",   "Number": "Plur",                   "Poss": "Yes", "Reflex": "Yes"},
-        "theirs":     {"L": "-PRON-", "PronType": "Prs", "Person": "Three", "Number": "Plur",                   "Poss": "Yes", "Reflex": "Yes"},
-        
-        "myself":     {"L": "-PRON-", "PronType": "Prs", "Person": "One",   "Number": "Sing",  "Case": "Acc",                  "Reflex": "Yes"},
-        "yourself":   {"L": "-PRON-", "PronType": "Prs", "Person": "Two",                     "Case": "Acc",                   "Reflex": "Yes"},
-        "himself":    {"L": "-PRON-", "PronType": "Prs", "Person": "Three", "Number": "Sing", "Case": "Acc", "Gender": "Masc", "Reflex": "Yes"},
-        "herself":    {"L": "-PRON-", "PronType": "Prs", "Person": "Three", "Number": "Sing", "Case": "Acc", "Gender": "Fem",  "Reflex": "Yes"},
-        "itself":     {"L": "-PRON-", "PronType": "Prs", "Person": "Three", "Number": "Sing", "Case": "Acc", "Gender": "Neut", "Reflex": "Yes"},
-        "themself":   {"L": "-PRON-", "PronType": "Prs", "Person": "Three", "Number": "Sing", "Case": "Acc",                   "Reflex": "Yes"},
-        "ourselves":  {"L": "-PRON-", "PronType": "Prs", "Person": "One",   "Number": "Plur", "Case": "Acc",                   "Reflex": "Yes"},
-        "yourselves": {"L": "-PRON-", "PronType": "Prs", "Person": "Two",                     "Case": "Acc",                   "Reflex": "Yes"},
-        "themselves": {"L": "-PRON-", "PronType": "Prs", "Person": "Three", "Number": "Plur", "Case": "Acc",                   "Reflex": "Yes"}
- 
-    },
-
-    "PRP$": {
-        "my":    {"L": "-PRON-", "Person": "One",   "Number": "Sing",                   "PronType": "Prs", "Poss": "Yes"},
-        "your":  {"L": "-PRON-", "Person": "Two",                                       "PronType": "Prs", "Poss": "Yes"},
-        "his":   {"L": "-PRON-", "Person": "Three", "Number": "Sing", "Gender": "Masc", "PronType": "Prs", "Poss": "Yes"},
-        "her":   {"L": "-PRON-", "Person": "Three", "Number": "Sing", "Gender": "Fem",  "PronType": "Prs", "Poss": "Yes"},
-        "its":   {"L": "-PRON-", "Person": "Three", "Number": "Sing", "Gender": "Neut", "PronType": "Prs", "Poss": "Yes"},
-        "our":   {"L": "-PRON-", "Person": "One",   "Number": "Plur",                   "PronType": "Prs", "Poss": "Yes"},
-        "their": {"L": "-PRON-", "Person": "Three", "Number": "Plur",                   "PronType": "Prs", "Poss": "Yes"}
-    },
-
-    "VBZ": {
-        "am":  {"L": "be", "VerbForm": "Fin", "Person": "One",   "Tense": "Pres", "Mood": "Ind"},
-        "are": {"L": "be", "VerbForm": "Fin", "Person": "Two",   "Tense": "Pres", "Mood": "Ind"},
-        "is":  {"L": "be", "VerbForm": "Fin", "Person": "Three", "Tense": "Pres", "Mood": "Ind"},
-    },
-    "VBP": {
-        "are":  {"L": "be", "VerbForm": "Fin", "Tense": "Pres", "Mood": "Ind"}
-    },
-    "VBD": {
-        "was":  {"L": "be", "VerbForm": "Fin", "Tense": "Past", "Number": "Sing"},
-        "were": {"L": "be", "VerbForm": "Fin", "Tense": "Past", "Number": "Plur"}
-    }
-}
diff --git a/lang_data/en/prefix.txt b/lang_data/en/prefix.txt
deleted file mode 100644
index 48c4fc549..000000000
--- a/lang_data/en/prefix.txt
+++ /dev/null
@@ -1,21 +0,0 @@
-,
-"
-(
-[
-{
-*
-<
-$
-£
-“
-'
-``
-`
-#
-US$
-C$
-A$
-a-
-‘
-....
-...
diff --git a/lang_data/en/specials.json b/lang_data/en/specials.json
deleted file mode 100644
index 3600717ad..000000000
--- a/lang_data/en/specials.json
+++ /dev/null
@@ -1,4924 +0,0 @@
-{
-  "d.": [
-    {
-      "F": "d."
-    }
-  ], 
-  "Theydve": [
-    {
-      "L": "-PRON-", 
-      "F": "They"
-    }, 
-    {
-      "F": "d", 
-      "L": "would", 
-      "pos": "MD"
-    }, 
-    {
-      "F": "ve", 
-      "L": "have", 
-      "pos": "VB"
-    }
-  ], 
-  ":/": [
-    {
-      "F": ":/"
-    }
-  ], 
-  "shouldn't've": [
-    {
-      "F": "should"
-    }, 
-    {
-      "F": "n't", 
-      "L": "not", 
-      "pos": "RB"
-    }, 
-    {
-      "F": "'ve", 
-      "L": "have", 
-      "pos": "VB"
-    }
-  ], 
-  "There'll": [
-    {
-      "F": "There"
-    }, 
-    {
-      "F": "'ll", 
-      "L": "will", 
-      "pos": "MD"
-    }
-  ], 
-  "E.G.": [
-    {
-      "F": "E.G."
-    }
-  ], 
-  "howll": [
-    {
-      "F": "how"
-    }, 
-    {
-      "F": "ll", 
-      "L": "will", 
-      "pos": "MD"
-    }
-  ], 
-  "6a.m.": [
-    {
-      "F": "6"
-    }, 
-    {
-      "F": "a.m."
-    }
-  ], 
-  "Ore.": [
-    {
-      "F": "Ore."
-    }
-  ], 
-  "Hadn't've": [
-    {
-      "F": "Had", 
-      "L": "have", 
-      "pos": "VBD"
-    }, 
-    {
-      "F": "n't", 
-      "L": "not", 
-      "pos": "RB"
-    }, 
-    {
-      "F": "'ve", 
-      "L": "have", 
-      "pos": "VB"
-    }
-  ], 
-  ":>": [
-    {
-      "F": ":>"
-    }
-  ], 
-  "3p.m.": [
-    {
-      "F": "3"
-    }, 
-    {
-      "F": "p.m."
-    }
-  ], 
-  "who'll": [
-    {
-      "F": "who"
-    }, 
-    {
-      "F": "'ll", 
-      "L": "will", 
-      "pos": "MD"
-    }
-  ], 
-  "5a.m.": [
-    {
-      "F": "5"
-    }, 
-    {
-      "F": "a.m."
-    }
-  ], 
-  ":(": [
-    {
-      "F": ":("
-    }
-  ], 
-  ":0": [
-    {
-      "F": ":0"
-    }
-  ], 
-  "10a.m.": [
-    {
-      "F": "10"
-    }, 
-    {
-      "F": "a.m."
-    }
-  ], 
-  "aint": [
-    {
-      "F": "ai", 
-      "pos": "VBP", 
-      "number": 2, 
-      "L": "be"
-    }, 
-    {
-      "F": "nt", 
-      "L": "not", 
-      "pos": "RB"
-    }
-  ], 
-  " ": [
-    {
-      "pos": "SP", 
-      "F": " "
-    }
-  ], 
-  "Dec.": [
-    {
-      "F": "Dec."
-    }
-  ], 
-  "Shouldnt": [
-    {
-      "F": "Should"
-    }, 
-    {
-      "F": "nt", 
-      "L": "not", 
-      "pos": "RB"
-    }
-  ], 
-  "Ky.": [
-    {
-      "F": "Ky."
-    }
-  ], 
-  "when's": [
-    {
-      "F": "when"
-    }, 
-    {
-      "F": "'s"
-    }
-  ], 
-  "Didnt": [
-    {
-      "F": "Did", 
-      "L": "do", 
-      "pos": "VBD"
-    }, 
-    {
-      "F": "nt", 
-      "L": "not", 
-      "pos": "RB"
-    }
-  ], 
-  "itll": [
-    {
-      "L": "-PRON-", 
-      "F": "it"
-    }, 
-    {
-      "F": "ll", 
-      "L": "will", 
-      "pos": "MD"
-    }
-  ], 
-  "Who're": [
-    {
-      "F": "Who"
-    }, 
-    {
-      "F": "'re"
-    }
-  ], 
-  "=D": [
-    {
-      "F": "=D"
-    }
-  ], 
-  "Ain't": [
-    {
-      "F": "Ai", 
-      "pos": "VBP", 
-      "number": 2, 
-      "L": "be"
-    }, 
-    {
-      "F": "n't", 
-      "L": "not", 
-      "pos": "RB"
-    }
-  ], 
-  "Can't": [
-    {
-      "F": "Ca", 
-      "L": "can", 
-      "pos": "MD"
-    }, 
-    {
-      "F": "n't", 
-      "L": "not", 
-      "pos": "RB"
-    }
-  ], 
-  "Whyre": [
-    {
-      "F": "Why"
-    }, 
-    {
-      "F": "re"
-    }
-  ], 
-  "Aren't": [
-    {
-      "F": "Are", 
-      "pos": "VBP", 
-      "number": 2, 
-      "L": "be"
-    }, 
-    {
-      "F": "n't", 
-      "L": "not", 
-      "pos": "RB"
-    }
-  ], 
-  "Neednt": [
-    {
-      "F": "Need"
-    }, 
-    {
-      "F": "nt", 
-      "L": "not", 
-      "pos": "RB"
-    }
-  ], 
-  "should've": [
-    {
-      "F": "should"
-    }, 
-    {
-      "F": "'ve", 
-      "L": "have", 
-      "pos": "VB"
-    }
-  ], 
-  "shouldn't": [
-    {
-      "F": "should"
-    }, 
-    {
-      "F": "n't", 
-      "L": "not", 
-      "pos": "RB"
-    }
-  ], 
-  "Idve": [
-    {
-      "L": "-PRON-", 
-      "F": "I"
-    }, 
-    {
-      "F": "d", 
-      "L": "would", 
-      "pos": "MD"
-    }, 
-    {
-      "F": "ve", 
-      "L": "have", 
-      "pos": "VB"
-    }
-  ], 
-  "weve": [
-    {
-      "F": "we"
-    }, 
-    {
-      "F": "ve", 
-      "L": "have", 
-      "pos": "VB"
-    }
-  ], 
-  "Va.": [
-    {
-      "F": "Va."
-    }
-  ], 
-  "D.C.": [
-    {
-      "F": "D.C."
-    }
-  ], 
-  "3am": [
-    {
-      "F": "3"
-    }, 
-    {
-      "L": "a.m.", 
-      "F": "am"
-    }
-  ], 
-  "Ive": [
-    {
-      "L": "-PRON-", 
-      "F": "I"
-    }, 
-    {
-      "F": "ve", 
-      "L": "have", 
-      "pos": "VB"
-    }
-  ], 
-  "Md.": [
-    {
-      "F": "Md."
-    }
-  ], 
-  ";D": [
-    {
-      "F": ";D"
-    }
-  ], 
-  "Mrs.": [
-    {
-      "F": "Mrs."
-    }
-  ], 
-  "Minn.": [
-    {
-      "F": "Minn."
-    }
-  ], 
-  "they'd": [
-    {
-      "L": "-PRON-", 
-      "F": "they"
-    }, 
-    {
-      "F": "'d", 
-      "L": "would", 
-      "pos": "MD"
-    }
-  ], 
-  "Youdve": [
-    {
-      "L": "-PRON-", 
-      "F": "You"
-    }, 
-    {
-      "F": "d", 
-      "L": "would", 
-      "pos": "MD"
-    }, 
-    {
-      "F": "ve", 
-      "L": "have", 
-      "pos": "VB"
-    }
-  ], 
-  "theyve": [
-    {
-      "L": "-PRON-", 
-      "F": "they"
-    }, 
-    {
-      "F": "ve", 
-      "L": "have", 
-      "pos": "VB"
-    }
-  ], 
-  "Weren't": [
-    {
-      "F": "Were"
-    }, 
-    {
-      "F": "n't", 
-      "L": "not", 
-      "pos": "RB"
-    }
-  ], 
-  "werent": [
-    {
-      "F": "were"
-    }, 
-    {
-      "F": "nt", 
-      "L": "not", 
-      "pos": "RB"
-    }
-  ], 
-  "whyre": [
-    {
-      "F": "why"
-    }, 
-    {
-      "F": "re"
-    }
-  ], 
-  "g.": [
-    {
-      "F": "g."
-    }
-  ], 
-  "I'm": [
-    {
-      "L": "-PRON-", 
-      "F": "I"
-    }, 
-    {
-      "pos": "VBP", 
-      "F": "'m", 
-      "tenspect": 1, 
-      "number": 1, 
-      "L": "be"
-    }
-  ], 
-  ":p": [
-    {
-      "F": ":p"
-    }
-  ], 
-  "She'd've": [
-    {
-      "L": "-PRON-", 
-      "F": "She"
-    }, 
-    {
-      "F": "'d", 
-      "L": "would", 
-      "pos": "MD"
-    }, 
-    {
-      "F": "'ve", 
-      "L": "have", 
-      "pos": "VB"
-    }
-  ], 
-  "not've": [
-    {
-      "F": "not", 
-      "L": "not", 
-      "pos": "RB"
-    }, 
-    {
-      "F": "'ve", 
-      "L": "have", 
-      "pos": "VB"
-    }
-  ], 
-  "we'll": [
-    {
-      "F": "we"
-    }, 
-    {
-      "F": "'ll", 
-      "L": "will", 
-      "pos": "MD"
-    }
-  ], 
-  ":O": [
-    {
-      "F": ":O"
-    }
-  ], 
-  "<33": [
-    {
-      "F": "<33"
-    }
-  ], 
-  "Don't": [
-    {
-      "L": "do", 
-      "F": "Do"
-    }, 
-    {
-      "F": "n't", 
-      "L": "not", 
-      "pos": "RB"
-    }
-  ], 
-  "Whyll": [
-    {
-      "F": "Why"
-    }, 
-    {
-      "F": "ll", 
-      "L": "will", 
-      "pos": "MD"
-    }
-  ], 
-  "''": [
-    {
-      "F": "''"
-    }
-  ], 
-  "they've": [
-    {
-      "L": "-PRON-", 
-      "F": "they"
-    }, 
-    {
-      "F": "'ve", 
-      "L": "have", 
-      "pos": "VB"
-    }
-  ], 
-  "t.": [
-    {
-      "F": "t."
-    }
-  ], 
-  "wasn't": [
-    {
-      "F": "was"
-    }, 
-    {
-      "F": "n't", 
-      "L": "not", 
-      "pos": "RB"
-    }
-  ], 
-  "could've": [
-    {
-      "pos": "MD", 
-      "F": "could"
-    }, 
-    {
-      "F": "'ve", 
-      "L": "have", 
-      "pos": "VB"
-    }
-  ], 
-  "what've": [
-    {
-      "F": "what"
-    }, 
-    {
-      "F": "'ve", 
-      "L": "have", 
-      "pos": "VB"
-    }
-  ], 
-  "havent": [
-    {
-      "pos": "VB", 
-      "F": "have"
-    }, 
-    {
-      "F": "nt", 
-      "L": "not", 
-      "pos": "RB"
-    }
-  ], 
-  "Who've": [
-    {
-      "F": "Who"
-    }, 
-    {
-      "F": "'ve", 
-      "L": "have", 
-      "pos": "VB"
-    }
-  ], 
-  "11am": [
-    {
-      "F": "11"
-    }, 
-    {
-      "L": "a.m.", 
-      "F": "am"
-    }
-  ], 
-  "Shan't": [
-    {
-      "F": "Sha"
-    }, 
-    {
-      "F": "n't", 
-      "L": "not", 
-      "pos": "RB"
-    }
-  ], 
-  "i'll": [
-    {
-      "L": "-PRON-", 
-      "F": "i"
-    }, 
-    {
-      "F": "'ll", 
-      "L": "will", 
-      "pos": "MD"
-    }
-  ], 
-  "i.e.": [
-    {
-      "F": "i.e."
-    }
-  ], 
-  "you'd": [
-    {
-      "L": "-PRON-", 
-      "F": "you"
-    }, 
-    {
-      "F": "'d", 
-      "L": "would", 
-      "pos": "MD"
-    }
-  ], 
-  "w.": [
-    {
-      "F": "w."
-    }
-  ], 
-  "whens": [
-    {
-      "F": "when"
-    }, 
-    {
-      "F": "s"
-    }
-  ], 
-  "whys": [
-    {
-      "F": "why"
-    }, 
-    {
-      "F": "s"
-    }
-  ], 
-  "6pm": [
-    {
-      "F": "6"
-    }, 
-    {
-      "L": "p.m.", 
-      "F": "pm"
-    }
-  ], 
-  "4p.m.": [
-    {
-      "F": "4"
-    }, 
-    {
-      "F": "p.m."
-    }
-  ], 
-  "Whereve": [
-    {
-      "F": "Where"
-    }, 
-    {
-      "F": "ve", 
-      "L": "have", 
-      "pos": "VB"
-    }
-  ], 
-  "o_o": [
-    {
-      "F": "o_o"
-    }
-  ], 
-  "Mo.": [
-    {
-      "F": "Mo."
-    }
-  ], 
-  "Kan.": [
-    {
-      "F": "Kan."
-    }
-  ], 
-  "\u00a0": [
-    {
-      "pos": "SP", 
-      "L": "  ", 
-      "F": "\u00a0"
-    }
-  ], 
-  "there'd": [
-    {
-      "F": "there"
-    }, 
-    {
-      "F": "'d", 
-      "L": "would", 
-      "pos": "MD"
-    }
-  ], 
-  "N.H.": [
-    {
-      "F": "N.H."
-    }
-  ], 
-  "(^_^)": [
-    {
-      "F": "(^_^)"
-    }
-  ], 
-  "Mont.": [
-    {
-      "F": "Mont."
-    }
-  ], 
-  "hadn't've": [
-    {
-      "F": "had", 
-      "L": "have", 
-      "pos": "VBD"
-    }, 
-    {
-      "F": "n't", 
-      "L": "not", 
-      "pos": "RB"
-    }, 
-    {
-      "F": "'ve", 
-      "L": "have", 
-      "pos": "VB"
-    }
-  ], 
-  "whatll": [
-    {
-      "F": "what"
-    }, 
-    {
-      "F": "ll", 
-      "L": "will", 
-      "pos": "MD"
-    }
-  ], 
-  "wouldn't've": [
-    {
-      "F": "would"
-    }, 
-    {
-      "F": "n't", 
-      "L": "not", 
-      "pos": "RB"
-    }, 
-    {
-      "F": "'ve", 
-      "L": "have", 
-      "pos": "VB"
-    }
-  ], 
-  "there's": [
-    {
-      "F": "there"
-    }, 
-    {
-      "F": "'s"
-    }
-  ], 
-  "2pm": [
-    {
-      "F": "2"
-    }, 
-    {
-      "L": "p.m.", 
-      "F": "pm"
-    }
-  ], 
-  "Who'll": [
-    {
-      "F": "Who"
-    }, 
-    {
-      "F": "'ll", 
-      "L": "will", 
-      "pos": "MD"
-    }
-  ], 
-  "o_O": [
-    {
-      "F": "o_O"
-    }
-  ], 
-  "Nev.": [
-    {
-      "F": "Nev."
-    }
-  ], 
-  "youll": [
-    {
-      "L": "-PRON-", 
-      "F": "you"
-    }, 
-    {
-      "F": "ll", 
-      "L": "will", 
-      "pos": "MD"
-    }
-  ], 
-  "wouldve": [
-    {
-      "F": "would"
-    }, 
-    {
-      "F": "ve", 
-      "L": "have", 
-      "pos": "VB"
-    }
-  ], 
-  "Nov.": [
-    {
-      "F": "Nov."
-    }
-  ], 
-  "z.": [
-    {
-      "F": "z."
-    }
-  ], 
-  "xDD": [
-    {
-      "F": "xDD"
-    }
-  ], 
-  "Sen.": [
-    {
-      "F": "Sen."
-    }
-  ], 
-  "Wouldnt": [
-    {
-      "F": "Would"
-    }, 
-    {
-      "F": "nt", 
-      "L": "not", 
-      "pos": "RB"
-    }
-  ], 
-  "Thered": [
-    {
-      "F": "There"
-    }, 
-    {
-      "F": "d", 
-      "L": "would", 
-      "pos": "MD"
-    }
-  ], 
-  "Youre": [
-    {
-      "L": "-PRON-", 
-      "F": "You"
-    }, 
-    {
-      "F": "re"
-    }
-  ], 
-  "Couldn't've": [
-    {
-      "pos": "MD", 
-      "F": "Could"
-    }, 
-    {
-      "F": "n't", 
-      "L": "not", 
-      "pos": "RB"
-    }, 
-    {
-      "F": "'ve", 
-      "L": "have", 
-      "pos": "VB"
-    }
-  ], 
-  "who're": [
-    {
-      "F": "who"
-    }, 
-    {
-      "F": "'re"
-    }
-  ], 
-  "Whys": [
-    {
-      "F": "Why"
-    }, 
-    {
-      "F": "s"
-    }
-  ], 
-  "mightn't've": [
-    {
-      "F": "might"
-    }, 
-    {
-      "F": "n't", 
-      "L": "not", 
-      "pos": "RB"
-    }, 
-    {
-      "F": "'ve", 
-      "L": "have", 
-      "pos": "VB"
-    }
-  ], 
-  "Wholl": [
-    {
-      "F": "Who"
-    }, 
-    {
-      "F": "ll", 
-      "L": "will", 
-      "pos": "MD"
-    }
-  ], 
-  "hadn't": [
-    {
-      "F": "had", 
-      "L": "have", 
-      "pos": "VBD"
-    }, 
-    {
-      "F": "n't", 
-      "L": "not", 
-      "pos": "RB"
-    }
-  ], 
-  "Havent": [
-    {
-      "pos": "VB", 
-      "F": "Have"
-    }, 
-    {
-      "F": "nt", 
-      "L": "not", 
-      "pos": "RB"
-    }
-  ], 
-  "Whatve": [
-    {
-      "F": "What"
-    }, 
-    {
-      "F": "ve", 
-      "L": "have", 
-      "pos": "VB"
-    }
-  ], 
-  ":)": [
-    {
-      "F": ":)"
-    }
-  ], 
-  "o.O": [
-    {
-      "F": "o.O"
-    }
-  ], 
-  "Thats": [
-    {
-      "F": "That"
-    }, 
-    {
-      "F": "s"
-    }
-  ], 
-  ":((": [
-    {
-      "F": ":(("
-    }
-  ], 
-  "Gov.": [
-    {
-      "F": "Gov."
-    }
-  ], 
-  "Howll": [
-    {
-      "F": "How"
-    }, 
-    {
-      "F": "ll", 
-      "L": "will", 
-      "pos": "MD"
-    }
-  ], 
-  "p.": [
-    {
-      "F": "p."
-    }
-  ], 
-  "wouldn't": [
-    {
-      "F": "would"
-    }, 
-    {
-      "F": "n't", 
-      "L": "not", 
-      "pos": "RB"
-    }
-  ], 
-  "9pm": [
-    {
-      "F": "9"
-    }, 
-    {
-      "L": "p.m.", 
-      "F": "pm"
-    }
-  ], 
-  "You'll": [
-    {
-      "L": "-PRON-", 
-      "F": "You"
-    }, 
-    {
-      "F": "'ll", 
-      "L": "will", 
-      "pos": "MD"
-    }
-  ], 
-  "Ala.": [
-    {
-      "F": "Ala."
-    }
-  ], 
-  "12am": [
-    {
-      "F": "12"
-    }, 
-    {
-      "L": "a.m.", 
-      "F": "am"
-    }
-  ], 
-  "=]": [
-    {
-      "F": "=]"
-    }
-  ], 
-  "Cant": [
-    {
-      "F": "Ca", 
-      "L": "can", 
-      "pos": "MD"
-    }, 
-    {
-      "F": "nt", 
-      "L": "not", 
-      "pos": "RB"
-    }
-  ], 
-  "i'd": [
-    {
-      "L": "-PRON-", 
-      "F": "i"
-    }, 
-    {
-      "F": "'d", 
-      "L": "would", 
-      "pos": "MD"
-    }
-  ], 
-  "a.m.": [
-    {
-      "F": "a.m."
-    }
-  ], 
-  "weren't": [
-    {
-      "F": "were"
-    }, 
-    {
-      "F": "n't", 
-      "L": "not", 
-      "pos": "RB"
-    }
-  ], 
-  "would've": [
-    {
-      "F": "would"
-    }, 
-    {
-      "F": "'ve", 
-      "L": "have", 
-      "pos": "VB"
-    }
-  ], 
-  "i'm": [
-    {
-      "L": "-PRON-", 
-      "F": "i"
-    }, 
-    {
-      "pos": "VBP", 
-      "F": "'m", 
-      "tenspect": 1, 
-      "number": 1, 
-      "L": "be"
-    }
-  ], 
-  "why'll": [
-    {
-      "F": "why"
-    }, 
-    {
-      "F": "'ll", 
-      "L": "will", 
-      "pos": "MD"
-    }
-  ], 
-  "we'd've": [
-    {
-      "F": "we"
-    }, 
-    {
-      "F": "'d", 
-      "L": "would", 
-      "pos": "MD"
-    }, 
-    {
-      "F": "'ve", 
-      "L": "have", 
-      "pos": "VB"
-    }
-  ], 
-  "Shouldve": [
-    {
-      "F": "Should"
-    }, 
-    {
-      "F": "ve", 
-      "L": "have", 
-      "pos": "VB"
-    }
-  ], 
-  "can't": [
-    {
-      "F": "ca", 
-      "L": "can", 
-      "pos": "MD"
-    }, 
-    {
-      "F": "n't", 
-      "L": "not", 
-      "pos": "RB"
-    }
-  ], 
-  "thats": [
-    {
-      "F": "that"
-    }, 
-    {
-      "F": "s"
-    }
-  ], 
-  "1p.m.": [
-    {
-      "F": "1"
-    }, 
-    {
-      "F": "p.m."
-    }
-  ], 
-  "12a.m.": [
-    {
-      "F": "12"
-    }, 
-    {
-      "F": "a.m."
-    }
-  ], 
-  "Hes": [
-    {
-      "L": "-PRON-", 
-      "F": "He"
-    }, 
-    {
-      "F": "s"
-    }
-  ], 
-  "Needn't": [
-    {
-      "F": "Need"
-    }, 
-    {
-      "F": "n't", 
-      "L": "not", 
-      "pos": "RB"
-    }
-  ], 
-  "It's": [
-    {
-      "L": "-PRON-", 
-      "F": "It"
-    }, 
-    {
-      "F": "'s"
-    }
-  ], 
-  "St.": [
-    {
-      "F": "St."
-    }
-  ], 
-  "Why're": [
-    {
-      "F": "Why"
-    }, 
-    {
-      "F": "'re"
-    }
-  ], 
-  ":(((": [
-    {
-      "F": ":((("
-    }
-  ], 
-  "Hed": [
-    {
-      "L": "-PRON-", 
-      "F": "He"
-    }, 
-    {
-      "F": "d", 
-      "L": "would", 
-      "pos": "MD"
-    }
-  ], 
-  "Mt.": [
-    {
-      "L": "Mount", 
-      "F": "Mt."
-    }
-  ], 
-  "couldn't": [
-    {
-      "pos": "MD", 
-      "F": "could"
-    }, 
-    {
-      "F": "n't", 
-      "L": "not", 
-      "pos": "RB"
-    }
-  ], 
-  "What've": [
-    {
-      "F": "What"
-    }, 
-    {
-      "F": "'ve", 
-      "L": "have", 
-      "pos": "VB"
-    }
-  ], 
-  "4a.m.": [
-    {
-      "F": "4"
-    }, 
-    {
-      "F": "a.m."
-    }
-  ], 
-  "Ind.": [
-    {
-      "F": "Ind."
-    }
-  ], 
-  "It'd": [
-    {
-      "L": "-PRON-", 
-      "F": "It"
-    }, 
-    {
-      "F": "'d", 
-      "L": "would", 
-      "pos": "MD"
-    }
-  ], 
-  "<3": [
-    {
-      "F": "<3"
-    }
-  ], 
-  "theydve": [
-    {
-      "L": "-PRON-", 
-      "F": "they"
-    }, 
-    {
-      "F": "d", 
-      "L": "would", 
-      "pos": "MD"
-    }, 
-    {
-      "F": "ve", 
-      "L": "have", 
-      "pos": "VB"
-    }
-  ], 
-  "aren't": [
-    {
-      "F": "are", 
-      "pos": "VBP", 
-      "number": 2, 
-      "L": "be"
-    }, 
-    {
-      "F": "n't", 
-      "L": "not", 
-      "pos": "RB"
-    }
-  ], 
-  "Mightn't": [
-    {
-      "F": "Might"
-    }, 
-    {
-      "F": "n't", 
-      "L": "not", 
-      "pos": "RB"
-    }
-  ], 
-  "'S": [
-    {
-      "L": "'s", 
-      "F": "'S"
-    }
-  ], 
-  "I've": [
-    {
-      "L": "-PRON-", 
-      "F": "I"
-    }, 
-    {
-      "F": "'ve", 
-      "L": "have", 
-      "pos": "VB"
-    }
-  ], 
-  "Whered": [
-    {
-      "F": "Where"
-    }, 
-    {
-      "F": "d", 
-      "L": "would", 
-      "pos": "MD"
-    }
-  ], 
-  "Itdve": [
-    {
-      "L": "-PRON-", 
-      "F": "It"
-    }, 
-    {
-      "F": "d", 
-      "L": "would", 
-      "pos": "MD"
-    }, 
-    {
-      "F": "ve", 
-      "L": "have", 
-      "pos": "VB"
-    }
-  ], 
-  "I'ma": [
-    {
-      "L": "-PRON-", 
-      "F": "I"
-    }, 
-    {
-      "F": "'ma"
-    }
-  ], 
-  "whos": [
-    {
-      "F": "who"
-    }, 
-    {
-      "F": "s"
-    }
-  ], 
-  "They'd": [
-    {
-      "L": "-PRON-", 
-      "F": "They"
-    }, 
-    {
-      "F": "'d", 
-      "L": "would", 
-      "pos": "MD"
-    }
-  ], 
-  "What'll": [
-    {
-      "F": "What"
-    }, 
-    {
-      "F": "'ll", 
-      "L": "will", 
-      "pos": "MD"
-    }
-  ], 
-  ":Y": [
-    {
-      "F": ":Y"
-    }
-  ], 
-  "You've": [
-    {
-      "L": "-PRON-", 
-      "F": "You"
-    }, 
-    {
-      "F": "'ve", 
-      "L": "have", 
-      "pos": "VB"
-    }
-  ], 
-  "Mustve": [
-    {
-      "F": "Must"
-    }, 
-    {
-      "F": "ve", 
-      "L": "have", 
-      "pos": "VB"
-    }
-  ], 
-  "whod": [
-    {
-      "F": "who"
-    }, 
-    {
-      "F": "d", 
-      "L": "would", 
-      "pos": "MD"
-    }
-  ], 
-  "mightntve": [
-    {
-      "F": "might"
-    }, 
-    {
-      "F": "nt", 
-      "L": "not", 
-      "pos": "RB"
-    }, 
-    {
-      "F": "ve", 
-      "L": "have", 
-      "pos": "VB"
-    }
-  ], 
-  "I'd've": [
-    {
-      "L": "-PRON-", 
-      "F": "I"
-    }, 
-    {
-      "F": "'d", 
-      "L": "would", 
-      "pos": "MD"
-    }, 
-    {
-      "F": "'ve", 
-      "L": "have", 
-      "pos": "VB"
-    }
-  ], 
-  "Must've": [
-    {
-      "F": "Must"
-    }, 
-    {
-      "F": "'ve", 
-      "L": "have", 
-      "pos": "VB"
-    }
-  ], 
-  "it'd": [
-    {
-      "L": "-PRON-", 
-      "F": "it"
-    }, 
-    {
-      "F": "'d", 
-      "L": "would", 
-      "pos": "MD"
-    }
-  ], 
-  "Ark.": [
-    {
-      "F": "Ark."
-    }
-  ], 
-  "Wis.": [
-    {
-      "F": "Wis."
-    }
-  ], 
-  "6p.m.": [
-    {
-      "F": "6"
-    }, 
-    {
-      "F": "p.m."
-    }
-  ], 
-  "what're": [
-    {
-      "F": "what"
-    }, 
-    {
-      "F": "'re"
-    }
-  ], 
-  "N.C.": [
-    {
-      "F": "N.C."
-    }
-  ], 
-  "Wasn't": [
-    {
-      "F": "Was"
-    }, 
-    {
-      "F": "n't", 
-      "L": "not", 
-      "pos": "RB"
-    }
-  ], 
-  "what's": [
-    {
-      "F": "what"
-    }, 
-    {
-      "F": "'s"
-    }
-  ], 
-  "he'd've": [
-    {
-      "L": "-PRON-", 
-      "F": "he"
-    }, 
-    {
-      "F": "'d", 
-      "L": "would", 
-      "pos": "MD"
-    }, 
-    {
-      "F": "'ve", 
-      "L": "have", 
-      "pos": "VB"
-    }
-  ], 
-  "Jan.": [
-    {
-      "F": "Jan."
-    }
-  ], 
-  "She'd": [
-    {
-      "L": "-PRON-", 
-      "F": "She"
-    }, 
-    {
-      "F": "'d", 
-      "L": "would", 
-      "pos": "MD"
-    }
-  ], 
-  "shedve": [
-    {
-      "L": "-PRON-", 
-      "F": "she"
-    }, 
-    {
-      "F": "d", 
-      "L": "would", 
-      "pos": "MD"
-    }, 
-    {
-      "F": "ve", 
-      "L": "have", 
-      "pos": "VB"
-    }
-  ], 
-  "Tenn.": [
-    {
-      "F": "Tenn."
-    }
-  ], 
-  "ain't": [
-    {
-      "F": "ai", 
-      "pos": "VBP", 
-      "number": 2, 
-      "L": "be"
-    }, 
-    {
-      "F": "n't", 
-      "L": "not", 
-      "pos": "RB"
-    }
-  ], 
-  "Wash.": [
-    {
-      "F": "Wash."
-    }
-  ], 
-  "She's": [
-    {
-      "L": "-PRON-", 
-      "F": "She"
-    }, 
-    {
-      "F": "'s"
-    }
-  ], 
-  "i'd've": [
-    {
-      "L": "-PRON-", 
-      "F": "i"
-    }, 
-    {
-      "F": "'d", 
-      "L": "would", 
-      "pos": "MD"
-    }, 
-    {
-      "F": "'ve", 
-      "L": "have", 
-      "pos": "VB"
-    }
-  ], 
-  "2a.m.": [
-    {
-      "F": "2"
-    }, 
-    {
-      "F": "a.m."
-    }
-  ], 
-  "We'd've": [
-    {
-      "F": "We"
-    }, 
-    {
-      "F": "'d", 
-      "L": "would", 
-      "pos": "MD"
-    }, 
-    {
-      "F": "'ve", 
-      "L": "have", 
-      "pos": "VB"
-    }
-  ], 
-  "must've": [
-    {
-      "F": "must"
-    }, 
-    {
-      "F": "'ve", 
-      "L": "have", 
-      "pos": "VB"
-    }
-  ], 
-  "That's": [
-    {
-      "F": "That"
-    }, 
-    {
-      "F": "'s"
-    }
-  ], 
-  "Sept.": [
-    {
-      "F": "Sept."
-    }
-  ], 
-  "whatre": [
-    {
-      "F": "what"
-    }, 
-    {
-      "F": "re"
-    }
-  ], 
-  "you'd've": [
-    {
-      "L": "-PRON-", 
-      "F": "you"
-    }, 
-    {
-      "F": "'d", 
-      "L": "would", 
-      "pos": "MD"
-    }, 
-    {
-      "F": "'ve", 
-      "L": "have", 
-      "pos": "VB"
-    }
-  ], 
-  "Dont": [
-    {
-      "L": "do", 
-      "F": "Do"
-    }, 
-    {
-      "F": "nt", 
-      "L": "not", 
-      "pos": "RB"
-    }
-  ], 
-  "i.": [
-    {
-      "F": "i."
-    }
-  ], 
-  "Jun.": [
-    {
-      "F": "Jun."
-    }
-  ], 
-  "thered": [
-    {
-      "F": "there"
-    }, 
-    {
-      "F": "d", 
-      "L": "would", 
-      "pos": "MD"
-    }
-  ], 
-  "Youd": [
-    {
-      "L": "-PRON-", 
-      "F": "You"
-    }, 
-    {
-      "F": "d", 
-      "L": "would", 
-      "pos": "MD"
-    }
-  ], 
-  "couldn't've": [
-    {
-      "pos": "MD", 
-      "F": "could"
-    }, 
-    {
-      "F": "n't", 
-      "L": "not", 
-      "pos": "RB"
-    }, 
-    {
-      "F": "'ve", 
-      "L": "have", 
-      "pos": "VB"
-    }
-  ], 
-  "Whens": [
-    {
-      "F": "When"
-    }, 
-    {
-      "F": "s"
-    }
-  ], 
-  "8a.m.": [
-    {
-      "F": "8"
-    }, 
-    {
-      "F": "a.m."
-    }
-  ], 
-  "Isnt": [
-    {
-      "F": "Is", 
-      "L": "be", 
-      "pos": "VBZ"
-    }, 
-    {
-      "F": "nt", 
-      "L": "not", 
-      "pos": "RB"
-    }
-  ], 
-  "mightve": [
-    {
-      "F": "might"
-    }, 
-    {
-      "F": "ve", 
-      "L": "have", 
-      "pos": "VB"
-    }
-  ], 
-  "'ol": [
-    {
-      "F": "'ol"
-    }
-  ], 
-  "2p.m.": [
-    {
-      "F": "2"
-    }, 
-    {
-      "F": "p.m."
-    }
-  ], 
-  "9a.m.": [
-    {
-      "F": "9"
-    }, 
-    {
-      "F": "a.m."
-    }
-  ], 
-  "q.": [
-    {
-      "F": "q."
-    }
-  ], 
-  "didnt": [
-    {
-      "F": "did", 
-      "L": "do", 
-      "pos": "VBD"
-    }, 
-    {
-      "F": "nt", 
-      "L": "not", 
-      "pos": "RB"
-    }
-  ], 
-  "ive": [
-    {
-      "L": "-PRON-", 
-      "F": "i"
-    }, 
-    {
-      "F": "ve", 
-      "L": "have", 
-      "pos": "VB"
-    }
-  ], 
-  "It'd've": [
-    {
-      "L": "-PRON-", 
-      "F": "It"
-    }, 
-    {
-      "F": "'d", 
-      "L": "would", 
-      "pos": "MD"
-    }, 
-    {
-      "F": "'ve", 
-      "L": "have", 
-      "pos": "VB"
-    }
-  ], 
-  "e.g.": [
-    {
-      "F": "e.g."
-    }
-  ], 
-  "\t": [
-    {
-      "pos": "SP", 
-      "F": "\t"
-    }
-  ], 
-  "Mich.": [
-    {
-      "F": "Mich."
-    }
-  ], 
-  "Itll": [
-    {
-      "L": "-PRON-", 
-      "F": "It"
-    }, 
-    {
-      "F": "ll", 
-      "L": "will", 
-      "pos": "MD"
-    }
-  ], 
-  "didn't": [
-    {
-      "F": "did", 
-      "L": "do", 
-      "pos": "VBD"
-    }, 
-    {
-      "F": "n't", 
-      "L": "not", 
-      "pos": "RB"
-    }
-  ], 
-  "3pm": [
-    {
-      "F": "3"
-    }, 
-    {
-      "L": "p.m.", 
-      "F": "pm"
-    }
-  ], 
-  "Jul.": [
-    {
-      "F": "Jul."
-    }
-  ], 
-  "7pm": [
-    {
-      "F": "7"
-    }, 
-    {
-      "L": "p.m.", 
-      "F": "pm"
-    }
-  ], 
-  "cant": [
-    {
-      "F": "ca", 
-      "L": "can", 
-      "pos": "MD"
-    }, 
-    {
-      "F": "nt", 
-      "L": "not", 
-      "pos": "RB"
-    }
-  ], 
-  "Miss.": [
-    {
-      "F": "Miss."
-    }
-  ], 
-  "im": [
-    {
-      "L": "-PRON-", 
-      "F": "i"
-    }, 
-    {
-      "pos": "VBP", 
-      "F": "m", 
-      "tenspect": 1, 
-      "number": 1, 
-      "L": "be"
-    }
-  ], 
-  "Ariz.": [
-    {
-      "F": "Ariz."
-    }
-  ], 
-  "they'd've": [
-    {
-      "L": "-PRON-", 
-      "F": "they"
-    }, 
-    {
-      "F": "'d", 
-      "L": "would", 
-      "pos": "MD"
-    }, 
-    {
-      "F": "'ve", 
-      "L": "have", 
-      "pos": "VB"
-    }
-  ], 
-  "f.": [
-    {
-      "F": "f."
-    }
-  ], 
-  "Co.": [
-    {
-      "F": "Co."
-    }
-  ], 
-  "Hadntve": [
-    {
-      "F": "Had", 
-      "L": "have", 
-      "pos": "VBD"
-    }, 
-    {
-      "F": "nt", 
-      "L": "not", 
-      "pos": "RB"
-    }, 
-    {
-      "F": "ve", 
-      "L": "have", 
-      "pos": "VB"
-    }
-  ], 
-  "Weve": [
-    {
-      "F": "We"
-    }, 
-    {
-      "F": "ve", 
-      "L": "have", 
-      "pos": "VB"
-    }
-  ], 
-  "1a.m.": [
-    {
-      "F": "1"
-    }, 
-    {
-      "F": "a.m."
-    }
-  ], 
-  "=3": [
-    {
-      "F": "=3"
-    }
-  ], 
-  "Mightnt": [
-    {
-      "F": "Might"
-    }, 
-    {
-      "F": "nt", 
-      "L": "not", 
-      "pos": "RB"
-    }
-  ], 
-  "1pm": [
-    {
-      "F": "1"
-    }, 
-    {
-      "L": "p.m.", 
-      "F": "pm"
-    }
-  ], 
-  "youdve": [
-    {
-      "L": "-PRON-", 
-      "F": "you"
-    }, 
-    {
-      "F": "d", 
-      "L": "would", 
-      "pos": "MD"
-    }, 
-    {
-      "F": "ve", 
-      "L": "have", 
-      "pos": "VB"
-    }
-  ], 
-  "Shedve": [
-    {
-      "L": "-PRON-", 
-      "F": "She"
-    }, 
-    {
-      "F": "d", 
-      "L": "would", 
-      "pos": "MD"
-    }, 
-    {
-      "F": "ve", 
-      "L": "have", 
-      "pos": "VB"
-    }
-  ], 
-  "theyd": [
-    {
-      "L": "-PRON-", 
-      "F": "they"
-    }, 
-    {
-      "F": "d", 
-      "L": "would", 
-      "pos": "MD"
-    }
-  ], 
-  "Ill.": [
-    {
-      "F": "Ill."
-    }
-  ], 
-  "N.D.": [
-    {
-      "F": "N.D."
-    }
-  ], 
-  "Cannot": [
-    {
-      "F": "Can", 
-      "L": "can", 
-      "pos": "MD"
-    }, 
-    {
-      "F": "not", 
-      "L": "not", 
-      "pos": "RB"
-    }
-  ], 
-  "s.": [
-    {
-      "F": "s."
-    }
-  ], 
-  "Hadn't": [
-    {
-      "F": "Had", 
-      "L": "have", 
-      "pos": "VBD"
-    }, 
-    {
-      "F": "n't", 
-      "L": "not", 
-      "pos": "RB"
-    }
-  ], 
-  "What're": [
-    {
-      "F": "What"
-    }, 
-    {
-      "F": "'re"
-    }
-  ], 
-  "He'll": [
-    {
-      "L": "-PRON-", 
-      "F": "He"
-    }, 
-    {
-      "F": "'ll", 
-      "L": "will", 
-      "pos": "MD"
-    }
-  ], 
-  "wholl": [
-    {
-      "F": "who"
-    }, 
-    {
-      "F": "ll", 
-      "L": "will", 
-      "pos": "MD"
-    }
-  ], 
-  "They're": [
-    {
-      "L": "-PRON-", 
-      "F": "They"
-    }, 
-    {
-      "F": "'re"
-    }
-  ], 
-  "Neb.": [
-    {
-      "F": "Neb."
-    }
-  ], 
-  "shouldnt": [
-    {
-      "F": "should"
-    }, 
-    {
-      "F": "nt", 
-      "L": "not", 
-      "pos": "RB"
-    }
-  ], 
-  "\n": [
-    {
-      "pos": "SP", 
-      "F": "\n"
-    }
-  ], 
-  "whered": [
-    {
-      "F": "where"
-    }, 
-    {
-      "F": "d", 
-      "L": "would", 
-      "pos": "MD"
-    }
-  ], 
-  "7a.m.": [
-    {
-      "F": "7"
-    }, 
-    {
-      "F": "a.m."
-    }
-  ], 
-  "youve": [
-    {
-      "L": "-PRON-", 
-      "F": "you"
-    }, 
-    {
-      "F": "ve", 
-      "L": "have", 
-      "pos": "VB"
-    }
-  ], 
-  "4am": [
-    {
-      "F": "4"
-    }, 
-    {
-      "L": "a.m.", 
-      "F": "am"
-    }
-  ], 
-  "v.": [
-    {
-      "F": "v."
-    }
-  ], 
-  "notve": [
-    {
-      "F": "not", 
-      "L": "not", 
-      "pos": "RB"
-    }, 
-    {
-      "F": "ve", 
-      "L": "have", 
-      "pos": "VB"
-    }
-  ], 
-  "couldve": [
-    {
-      "pos": "MD", 
-      "F": "could"
-    }, 
-    {
-      "F": "ve", 
-      "L": "have", 
-      "pos": "VB"
-    }
-  ], 
-  "mustve": [
-    {
-      "F": "must"
-    }, 
-    {
-      "F": "ve", 
-      "L": "have", 
-      "pos": "VB"
-    }
-  ], 
-  "Youve": [
-    {
-      "L": "-PRON-", 
-      "F": "You"
-    }, 
-    {
-      "F": "ve", 
-      "L": "have", 
-      "pos": "VB"
-    }
-  ], 
-  "therell": [
-    {
-      "F": "there"
-    }, 
-    {
-      "F": "ll", 
-      "L": "will", 
-      "pos": "MD"
-    }
-  ], 
-  "might've": [
-    {
-      "F": "might"
-    }, 
-    {
-      "F": "'ve", 
-      "L": "have", 
-      "pos": "VB"
-    }
-  ], 
-  "Mustn't": [
-    {
-      "F": "Must"
-    }, 
-    {
-      "F": "n't", 
-      "L": "not", 
-      "pos": "RB"
-    }
-  ], 
-  "wheres": [
-    {
-      "F": "where"
-    }, 
-    {
-      "F": "s"
-    }
-  ], 
-  "they're": [
-    {
-      "L": "-PRON-", 
-      "F": "they"
-    }, 
-    {
-      "F": "'re"
-    }
-  ], 
-  "idve": [
-    {
-      "L": "-PRON-", 
-      "F": "i"
-    }, 
-    {
-      "F": "d", 
-      "L": "would", 
-      "pos": "MD"
-    }, 
-    {
-      "F": "ve", 
-      "L": "have", 
-      "pos": "VB"
-    }
-  ], 
-  "hows": [
-    {
-      "F": "how"
-    }, 
-    {
-      "F": "s"
-    }
-  ], 
-  "Fla.": [
-    {
-      "F": "Fla."
-    }
-  ], 
-  "N.M.": [
-    {
-      "F": "N.M."
-    }
-  ], 
-  "youre": [
-    {
-      "L": "-PRON-", 
-      "F": "you"
-    }, 
-    {
-      "F": "re"
-    }
-  ], 
-  "Didn't": [
-    {
-      "F": "Did", 
-      "L": "do", 
-      "pos": "VBD"
-    }, 
-    {
-      "F": "n't", 
-      "L": "not", 
-      "pos": "RB"
-    }
-  ], 
-  "Couldve": [
-    {
-      "pos": "MD", 
-      "F": "Could"
-    }, 
-    {
-      "F": "ve", 
-      "L": "have", 
-      "pos": "VB"
-    }
-  ], 
-  "10p.m.": [
-    {
-      "F": "10"
-    }, 
-    {
-      "F": "p.m."
-    }
-  ], 
-  "Del.": [
-    {
-      "F": "Del."
-    }
-  ], 
-  "Oct.": [
-    {
-      "F": "Oct."
-    }
-  ], 
-  "Rep.": [
-    {
-      "F": "Rep."
-    }
-  ], 
-  "cannot": [
-    {
-      "F": "can", 
-      "L": "can", 
-      "pos": "MD"
-    }, 
-    {
-      "F": "not", 
-      "L": "not", 
-      "pos": "RB"
-    }
-  ], 
-  "Im": [
-    {
-      "L": "-PRON-", 
-      "F": "I"
-    }, 
-    {
-      "pos": "VBP", 
-      "F": "m", 
-      "tenspect": 1, 
-      "number": 1, 
-      "L": "be"
-    }
-  ], 
-  "howd": [
-    {
-      "F": "how"
-    }, 
-    {
-      "F": "d", 
-      "L": "would", 
-      "pos": "MD"
-    }
-  ], 
-  "Okla.": [
-    {
-      "F": "Okla."
-    }
-  ], 
-  "Feb.": [
-    {
-      "F": "Feb."
-    }
-  ], 
-  "you've": [
-    {
-      "L": "-PRON-", 
-      "F": "you"
-    }, 
-    {
-      "F": "'ve", 
-      "L": "have", 
-      "pos": "VB"
-    }
-  ], 
-  "You're": [
-    {
-      "L": "-PRON-", 
-      "F": "You"
-    }, 
-    {
-      "F": "'re"
-    }
-  ], 
-  "she'll": [
-    {
-      "L": "-PRON-", 
-      "F": "she"
-    }, 
-    {
-      "F": "'ll", 
-      "L": "will", 
-      "pos": "MD"
-    }
-  ], 
-  "Theyll": [
-    {
-      "L": "-PRON-", 
-      "F": "They"
-    }, 
-    {
-      "F": "ll", 
-      "L": "will", 
-      "pos": "MD"
-    }
-  ], 
-  "don't": [
-    {
-      "L": "do", 
-      "F": "do"
-    }, 
-    {
-      "F": "n't", 
-      "L": "not", 
-      "pos": "RB"
-    }
-  ], 
-  "itd": [
-    {
-      "L": "-PRON-", 
-      "F": "it"
-    }, 
-    {
-      "F": "d", 
-      "L": "would", 
-      "pos": "MD"
-    }
-  ], 
-  ":-)": [
-    {
-      "F": ":-)"
-    }
-  ], 
-  "Hedve": [
-    {
-      "L": "-PRON-", 
-      "F": "He"
-    }, 
-    {
-      "F": "d", 
-      "L": "would", 
-      "pos": "MD"
-    }, 
-    {
-      "F": "ve", 
-      "L": "have", 
-      "pos": "VB"
-    }
-  ], 
-  "isnt": [
-    {
-      "F": "is", 
-      "L": "be", 
-      "pos": "VBZ"
-    }, 
-    {
-      "F": "nt", 
-      "L": "not", 
-      "pos": "RB"
-    }
-  ], 
-  "won't": [
-    {
-      "F": "wo"
-    }, 
-    {
-      "F": "n't", 
-      "L": "not", 
-      "pos": "RB"
-    }
-  ], 
-  "We're": [
-    {
-      "F": "We"
-    }, 
-    {
-      "F": "'re"
-    }
-  ], 
-  "3a.m.": [
-    {
-      "F": "3"
-    }, 
-    {
-      "F": "a.m."
-    }
-  ], 
-  "^_^": [
-    {
-      "F": "^_^"
-    }
-  ], 
-  "\u2018S": [
-    {
-      "L": "'s", 
-      "F": "\u2018S"
-    }
-  ], 
-  "9p.m.": [
-    {
-      "F": "9"
-    }, 
-    {
-      "F": "p.m."
-    }
-  ], 
-  "dont": [
-    {
-      "L": "do", 
-      "F": "do"
-    }, 
-    {
-      "F": "nt", 
-      "L": "not", 
-      "pos": "RB"
-    }
-  ], 
-  "ima": [
-    {
-      "L": "-PRON-", 
-      "F": "i"
-    }, 
-    {
-      "F": "ma"
-    }
-  ], 
-  "Let's": [
-    {
-      "F": "Let"
-    }, 
-    {
-      "L": "us", 
-      "F": "'s"
-    }
-  ], 
-  "he's": [
-    {
-      "L": "-PRON-", 
-      "F": "he"
-    }, 
-    {
-      "F": "'s"
-    }
-  ], 
-  "we've": [
-    {
-      "F": "we"
-    }, 
-    {
-      "F": "'ve", 
-      "L": "have", 
-      "pos": "VB"
-    }
-  ], 
-  "What's": [
-    {
-      "F": "What"
-    }, 
-    {
-      "F": "'s"
-    }
-  ], 
-  "Who's": [
-    {
-      "F": "Who"
-    }, 
-    {
-      "F": "'s"
-    }
-  ], 
-  "-__-": [
-    {
-      "F": "-__-"
-    }
-  ], 
-  "hedve": [
-    {
-      "L": "-PRON-", 
-      "F": "he"
-    }, 
-    {
-      "F": "d", 
-      "L": "would", 
-      "pos": "MD"
-    }, 
-    {
-      "F": "ve", 
-      "L": "have", 
-      "pos": "VB"
-    }
-  ], 
-  "he'd": [
-    {
-      "L": "-PRON-", 
-      "F": "he"
-    }, 
-    {
-      "F": "'d", 
-      "L": "would", 
-      "pos": "MD"
-    }
-  ], 
-  "When's": [
-    {
-      "F": "When"
-    }, 
-    {
-      "F": "'s"
-    }
-  ], 
-  "Mightn't've": [
-    {
-      "F": "Might"
-    }, 
-    {
-      "F": "n't", 
-      "L": "not", 
-      "pos": "RB"
-    }, 
-    {
-      "F": "'ve", 
-      "L": "have", 
-      "pos": "VB"
-    }
-  ], 
-  "We've": [
-    {
-      "F": "We"
-    }, 
-    {
-      "F": "'ve", 
-      "L": "have", 
-      "pos": "VB"
-    }
-  ], 
-  "\u2018s": [
-    {
-      "L": "'s", 
-      "F": "\u2018s"
-    }
-  ], 
-  "Couldntve": [
-    {
-      "pos": "MD", 
-      "F": "Could"
-    }, 
-    {
-      "F": "nt", 
-      "L": "not", 
-      "pos": "RB"
-    }, 
-    {
-      "F": "ve", 
-      "L": "have", 
-      "pos": "VB"
-    }
-  ], 
-  "Who'd": [
-    {
-      "F": "Who"
-    }, 
-    {
-      "F": "'d", 
-      "L": "would", 
-      "pos": "MD"
-    }
-  ], 
-  ":-/": [
-    {
-      "F": ":-/"
-    }
-  ], 
-  "haven't": [
-    {
-      "pos": "VB", 
-      "F": "have"
-    }, 
-    {
-      "F": "n't", 
-      "L": "not", 
-      "pos": "RB"
-    }
-  ], 
-  "Gen.": [
-    {
-      "F": "Gen."
-    }
-  ], 
-  "(:": [
-    {
-      "F": "(:"
-    }
-  ], 
-  "arent": [
-    {
-      "F": "are", 
-      "pos": "VBP", 
-      "number": 2, 
-      "L": "be"
-    }, 
-    {
-      "F": "nt", 
-      "L": "not", 
-      "pos": "RB"
-    }
-  ], 
-  "You'd've": [
-    {
-      "L": "-PRON-", 
-      "F": "You"
-    }, 
-    {
-      "F": "'d", 
-      "L": "would", 
-      "pos": "MD"
-    }, 
-    {
-      "F": "'ve", 
-      "L": "have", 
-      "pos": "VB"
-    }
-  ], 
-  "c.": [
-    {
-      "F": "c."
-    }
-  ], 
-  "(=": [
-    {
-      "F": "(="
-    }
-  ], 
-  "Wouldn't": [
-    {
-      "F": "Would"
-    }, 
-    {
-      "F": "n't", 
-      "L": "not", 
-      "pos": "RB"
-    }
-  ], 
-  "who's": [
-    {
-      "F": "who"
-    }, 
-    {
-      "F": "'s"
-    }
-  ], 
-  "12p.m.": [
-    {
-      "F": "12"
-    }, 
-    {
-      "F": "p.m."
-    }
-  ], 
-  "5am": [
-    {
-      "F": "5"
-    }, 
-    {
-      "L": "a.m.", 
-      "F": "am"
-    }
-  ], 
-  "Mightve": [
-    {
-      "F": "Might"
-    }, 
-    {
-      "F": "ve", 
-      "L": "have", 
-      "pos": "VB"
-    }
-  ], 
-  "Theredve": [
-    {
-      "F": "There"
-    }, 
-    {
-      "F": "d", 
-      "L": "would", 
-      "pos": "MD"
-    }, 
-    {
-      "F": "ve", 
-      "L": "have", 
-      "pos": "VB"
-    }
-  ], 
-  "theredve": [
-    {
-      "F": "there"
-    }, 
-    {
-      "F": "d", 
-      "L": "would", 
-      "pos": "MD"
-    }, 
-    {
-      "F": "ve", 
-      "L": "have", 
-      "pos": "VB"
-    }
-  ], 
-  "Messrs.": [
-    {
-      "F": "Messrs."
-    }
-  ], 
-  "who'd": [
-    {
-      "F": "who"
-    }, 
-    {
-      "F": "'d", 
-      "L": "would", 
-      "pos": "MD"
-    }
-  ], 
-  "Where's": [
-    {
-      "F": "Where"
-    }, 
-    {
-      "F": "'s"
-    }
-  ], 
-  "wont": [
-    {
-      "F": "wo"
-    }, 
-    {
-      "F": "nt", 
-      "L": "not", 
-      "pos": "RB"
-    }
-  ], 
-  "she'd've": [
-    {
-      "L": "-PRON-", 
-      "F": "she"
-    }, 
-    {
-      "F": "'d", 
-      "L": "would", 
-      "pos": "MD"
-    }, 
-    {
-      "F": "'ve", 
-      "L": "have", 
-      "pos": "VB"
-    }
-  ], 
-  "10pm": [
-    {
-      "F": "10"
-    }, 
-    {
-      "L": "p.m.", 
-      "F": "pm"
-    }
-  ], 
-  "Corp.": [
-    {
-      "F": "Corp."
-    }
-  ], 
-  "Aug.": [
-    {
-      "F": "Aug."
-    }
-  ], 
-  "-_-": [
-    {
-      "F": "-_-"
-    }
-  ], 
-  "y.": [
-    {
-      "F": "y."
-    }
-  ], 
-  "Should've": [
-    {
-      "F": "Should"
-    }, 
-    {
-      "F": "'ve", 
-      "L": "have", 
-      "pos": "VB"
-    }
-  ], 
-  "11pm": [
-    {
-      "F": "11"
-    }, 
-    {
-      "L": "p.m.", 
-      "F": "pm"
-    }
-  ], 
-  "8am": [
-    {
-      "F": "8"
-    }, 
-    {
-      "L": "a.m.", 
-      "F": "am"
-    }
-  ], 
-  "theyre": [
-    {
-      "L": "-PRON-", 
-      "F": "they"
-    }, 
-    {
-      "F": "re"
-    }
-  ], 
-  "l.": [
-    {
-      "F": "l."
-    }
-  ], 
-  "Wouldntve": [
-    {
-      "F": "Would"
-    }, 
-    {
-      "F": "nt", 
-      "L": "not", 
-      "pos": "RB"
-    }, 
-    {
-      "F": "ve", 
-      "L": "have", 
-      "pos": "VB"
-    }
-  ], 
-  "Ga.": [
-    {
-      "F": "Ga."
-    }
-  ], 
-  "1am": [
-    {
-      "F": "1"
-    }, 
-    {
-      "L": "a.m.", 
-      "F": "am"
-    }
-  ], 
-  "Where've": [
-    {
-      "F": "Where"
-    }, 
-    {
-      "F": "'ve", 
-      "L": "have", 
-      "pos": "VB"
-    }
-  ], 
-  "11a.m.": [
-    {
-      "F": "11"
-    }, 
-    {
-      "F": "a.m."
-    }
-  ], 
-  "mustn't": [
-    {
-      "F": "must"
-    }, 
-    {
-      "F": "n't", 
-      "L": "not", 
-      "pos": "RB"
-    }
-  ], 
-  "isn't": [
-    {
-      "F": "is", 
-      "L": "be", 
-      "pos": "VBZ"
-    }, 
-    {
-      "F": "n't", 
-      "L": "not", 
-      "pos": "RB"
-    }
-  ], 
-  "Bros.": [
-    {
-      "F": "Bros."
-    }
-  ], 
-  "Aint": [
-    {
-      "F": "Ai", 
-      "pos": "VBP", 
-      "number": 2, 
-      "L": "be"
-    }, 
-    {
-      "F": "nt", 
-      "L": "not", 
-      "pos": "RB"
-    }
-  ], 
-  "why's": [
-    {
-      "F": "why"
-    }, 
-    {
-      "F": "'s"
-    }
-  ], 
-  "V_V": [
-    {
-      "F": "V_V"
-    }
-  ], 
-  ";p": [
-    {
-      "F": ";p"
-    }
-  ], 
-  "There'd": [
-    {
-      "F": "There"
-    }, 
-    {
-      "F": "'d", 
-      "L": "would", 
-      "pos": "MD"
-    }
-  ], 
-  "They'll": [
-    {
-      "L": "-PRON-", 
-      "F": "They"
-    }, 
-    {
-      "F": "'ll", 
-      "L": "will", 
-      "pos": "MD"
-    }
-  ], 
-  "b.": [
-    {
-      "F": "b."
-    }
-  ], 
-  "how'll": [
-    {
-      "F": "how"
-    }, 
-    {
-      "F": "'ll", 
-      "L": "will", 
-      "pos": "MD"
-    }
-  ], 
-  "Wedve": [
-    {
-      "F": "We"
-    }, 
-    {
-      "F": "d", 
-      "L": "would", 
-      "pos": "MD"
-    }, 
-    {
-      "F": "ve", 
-      "L": "have", 
-      "pos": "VB"
-    }
-  ], 
-  "couldntve": [
-    {
-      "pos": "MD", 
-      "F": "could"
-    }, 
-    {
-      "F": "nt", 
-      "L": "not", 
-      "pos": "RB"
-    }, 
-    {
-      "F": "ve", 
-      "L": "have", 
-      "pos": "VB"
-    }
-  ], 
-  "12pm": [
-    {
-      "F": "12"
-    }, 
-    {
-      "L": "p.m.", 
-      "F": "pm"
-    }
-  ], 
-  "There's": [
-    {
-      "F": "There"
-    }, 
-    {
-      "F": "'s"
-    }
-  ], 
-  "we'd": [
-    {
-      "F": "we"
-    }, 
-    {
-      "F": "'d", 
-      "L": "would", 
-      "pos": "MD"
-    }
-  ], 
-  "Dr.": [
-    {
-      "F": "Dr."
-    }
-  ], 
-  "Whod": [
-    {
-      "F": "Who"
-    }, 
-    {
-      "F": "d", 
-      "L": "would", 
-      "pos": "MD"
-    }
-  ], 
-  ":-P": [
-    {
-      "F": ":-P"
-    }
-  ], 
-  "whatve": [
-    {
-      "F": "what"
-    }, 
-    {
-      "F": "ve", 
-      "L": "have", 
-      "pos": "VB"
-    }
-  ], 
-  "Wouldve": [
-    {
-      "F": "Would"
-    }, 
-    {
-      "F": "ve", 
-      "L": "have", 
-      "pos": "VB"
-    }
-  ], 
-  "o.": [
-    {
-      "F": "o."
-    }
-  ], 
-  "there'll": [
-    {
-      "F": "there"
-    }, 
-    {
-      "F": "'ll", 
-      "L": "will", 
-      "pos": "MD"
-    }
-  ], 
-  ":]": [
-    {
-      "F": ":]"
-    }
-  ], 
-  "needn't": [
-    {
-      "F": "need"
-    }, 
-    {
-      "F": "n't", 
-      "L": "not", 
-      "pos": "RB"
-    }
-  ], 
-  "shouldntve": [
-    {
-      "F": "should"
-    }, 
-    {
-      "F": "nt", 
-      "L": "not", 
-      "pos": "RB"
-    }, 
-    {
-      "F": "ve", 
-      "L": "have", 
-      "pos": "VB"
-    }
-  ], 
-  "why're": [
-    {
-      "F": "why"
-    }, 
-    {
-      "F": "'re"
-    }
-  ], 
-  "p.m.": [
-    {
-      "F": "p.m."
-    }
-  ], 
-  "Doesnt": [
-    {
-      "F": "Does", 
-      "L": "do", 
-      "pos": "VBZ"
-    }, 
-    {
-      "F": "nt", 
-      "L": "not", 
-      "pos": "RB"
-    }
-  ], 
-  "whereve": [
-    {
-      "F": "where"
-    }, 
-    {
-      "F": "ve", 
-      "L": "have", 
-      "pos": "VB"
-    }
-  ], 
-  "they'll": [
-    {
-      "L": "-PRON-", 
-      "F": "they"
-    }, 
-    {
-      "F": "'ll", 
-      "L": "will", 
-      "pos": "MD"
-    }
-  ], 
-  "I'd": [
-    {
-      "L": "-PRON-", 
-      "F": "I"
-    }, 
-    {
-      "F": "'d", 
-      "L": "would", 
-      "pos": "MD"
-    }
-  ], 
-  "Might've": [
-    {
-      "F": "Might"
-    }, 
-    {
-      "F": "'ve", 
-      "L": "have", 
-      "pos": "VB"
-    }
-  ], 
-  "mightnt": [
-    {
-      "F": "might"
-    }, 
-    {
-      "F": "nt", 
-      "L": "not", 
-      "pos": "RB"
-    }
-  ], 
-  "Kans.": [
-    {
-      "F": "Kans."
-    }
-  ], 
-  "Not've": [
-    {
-      "F": "Not", 
-      "L": "not", 
-      "pos": "RB"
-    }, 
-    {
-      "F": "'ve", 
-      "L": "have", 
-      "pos": "VB"
-    }
-  ], 
-  "e.": [
-    {
-      "F": "e."
-    }
-  ], 
-  "mightn't": [
-    {
-      "F": "might"
-    }, 
-    {
-      "F": "n't", 
-      "L": "not", 
-      "pos": "RB"
-    }
-  ], 
-  "you're": [
-    {
-      "L": "-PRON-", 
-      "F": "you"
-    }, 
-    {
-      "F": "'re"
-    }
-  ], 
-  "Mar.": [
-    {
-      "F": "Mar."
-    }
-  ], 
-  "They've": [
-    {
-      "L": "-PRON-", 
-      "F": "They"
-    }, 
-    {
-      "F": "'ve", 
-      "L": "have", 
-      "pos": "VB"
-    }
-  ], 
-  "\")": [
-    {
-      "F": "\")"
-    }
-  ], 
-  "what'll": [
-    {
-      "F": "what"
-    }, 
-    {
-      "F": "'ll", 
-      "L": "will", 
-      "pos": "MD"
-    }
-  ], 
-  "Calif.": [
-    {
-      "F": "Calif."
-    }
-  ], 
-  "Could've": [
-    {
-      "pos": "MD", 
-      "F": "Could"
-    }, 
-    {
-      "F": "'ve", 
-      "L": "have", 
-      "pos": "VB"
-    }
-  ], 
-  "Would've": [
-    {
-      "F": "Would"
-    }, 
-    {
-      "F": "'ve", 
-      "L": "have", 
-      "pos": "VB"
-    }
-  ], 
-  ";)": [
-    {
-      "F": ";)"
-    }
-  ], 
-  ";(": [
-    {
-      "F": ";("
-    }
-  ], 
-  "Isn't": [
-    {
-      "F": "Is", 
-      "L": "be", 
-      "pos": "VBZ"
-    }, 
-    {
-      "F": "n't", 
-      "L": "not", 
-      "pos": "RB"
-    }
-  ], 
-  "let's": [
-    {
-      "F": "let"
-    }, 
-    {
-      "L": "us", 
-      "F": "'s"
-    }
-  ], 
-  "'em": [
-    {
-      "F": "'em"
-    }
-  ], 
-  "She'll": [
-    {
-      "L": "-PRON-", 
-      "F": "She"
-    }, 
-    {
-      "F": "'ll", 
-      "L": "will", 
-      "pos": "MD"
-    }
-  ], 
-  "I.E.": [
-    {
-      "F": "I.E."
-    }
-  ], 
-  "You'd": [
-    {
-      "L": "-PRON-", 
-      "F": "You"
-    }, 
-    {
-      "F": "'d", 
-      "L": "would", 
-      "pos": "MD"
-    }
-  ], 
-  "wouldnt": [
-    {
-      "F": "would"
-    }, 
-    {
-      "F": "nt", 
-      "L": "not", 
-      "pos": "RB"
-    }
-  ], 
-  "6am": [
-    {
-      "F": "6"
-    }, 
-    {
-      "L": "a.m.", 
-      "F": "am"
-    }
-  ], 
-  ":P": [
-    {
-      "F": ":P"
-    }
-  ], 
-  "Why'll": [
-    {
-      "F": "Why"
-    }, 
-    {
-      "F": "'ll", 
-      "L": "will", 
-      "pos": "MD"
-    }
-  ], 
-  "Where'd": [
-    {
-      "F": "Where"
-    }, 
-    {
-      "F": "'d", 
-      "L": "would", 
-      "pos": "MD"
-    }
-  ], 
-  "Theyre": [
-    {
-      "L": "-PRON-", 
-      "F": "They"
-    }, 
-    {
-      "F": "re"
-    }
-  ], 
-  "11p.m.": [
-    {
-      "F": "11"
-    }, 
-    {
-      "F": "p.m."
-    }
-  ], 
-  "Won't": [
-    {
-      "F": "Wo"
-    }, 
-    {
-      "F": "n't", 
-      "L": "not", 
-      "pos": "RB"
-    }
-  ], 
-  "Couldn't": [
-    {
-      "pos": "MD", 
-      "F": "Could"
-    }, 
-    {
-      "F": "n't", 
-      "L": "not", 
-      "pos": "RB"
-    }
-  ], 
-  "it's": [
-    {
-      "L": "-PRON-", 
-      "F": "it"
-    }, 
-    {
-      "F": "'s"
-    }
-  ], 
-  "r.": [
-    {
-      "F": "r."
-    }
-  ], 
-  "it'll": [
-    {
-      "L": "-PRON-", 
-      "F": "it"
-    }, 
-    {
-      "F": "'ll", 
-      "L": "will", 
-      "pos": "MD"
-    }
-  ], 
-  "They'd've": [
-    {
-      "L": "-PRON-", 
-      "F": "They"
-    }, 
-    {
-      "F": "'d", 
-      "L": "would", 
-      "pos": "MD"
-    }, 
-    {
-      "F": "'ve", 
-      "L": "have", 
-      "pos": "VB"
-    }
-  ], 
-  "Ima": [
-    {
-      "L": "-PRON-", 
-      "F": "I"
-    }, 
-    {
-      "F": "ma"
-    }
-  ], 
-  "5pm": [
-    {
-      "F": "5"
-    }, 
-    {
-      "L": "p.m.", 
-      "F": "pm"
-    }
-  ], 
-  "10am": [
-    {
-      "F": "10"
-    }, 
-    {
-      "L": "a.m.", 
-      "F": "am"
-    }
-  ], 
-  "m.": [
-    {
-      "F": "m."
-    }
-  ], 
-  "whats": [
-    {
-      "F": "what"
-    }, 
-    {
-      "F": "s"
-    }
-  ], 
-  "How's": [
-    {
-      "F": "How"
-    }, 
-    {
-      "F": "'s"
-    }
-  ], 
-  "Sep.": [
-    {
-      "F": "Sep."
-    }
-  ], 
-  "Shouldntve": [
-    {
-      "F": "Should"
-    }, 
-    {
-      "F": "nt", 
-      "L": "not", 
-      "pos": "RB"
-    }, 
-    {
-      "F": "ve", 
-      "L": "have", 
-      "pos": "VB"
-    }
-  ], 
-  "youd": [
-    {
-      "L": "-PRON-", 
-      "F": "you"
-    }, 
-    {
-      "F": "d", 
-      "L": "would", 
-      "pos": "MD"
-    }
-  ], 
-  "Whatll": [
-    {
-      "F": "What"
-    }, 
-    {
-      "F": "ll", 
-      "L": "will", 
-      "pos": "MD"
-    }
-  ], 
-  "Wouldn't've": [
-    {
-      "F": "Would"
-    }, 
-    {
-      "F": "n't", 
-      "L": "not", 
-      "pos": "RB"
-    }, 
-    {
-      "F": "'ve", 
-      "L": "have", 
-      "pos": "VB"
-    }
-  ], 
-  "How'd": [
-    {
-      "F": "How"
-    }, 
-    {
-      "F": "'d", 
-      "L": "would", 
-      "pos": "MD"
-    }
-  ], 
-  "doesnt": [
-    {
-      "F": "does", 
-      "L": "do", 
-      "pos": "VBZ"
-    }, 
-    {
-      "F": "nt", 
-      "L": "not", 
-      "pos": "RB"
-    }
-  ], 
-  "h.": [
-    {
-      "F": "h."
-    }
-  ], 
-  "Shouldn't": [
-    {
-      "F": "Should"
-    }, 
-    {
-      "F": "n't", 
-      "L": "not", 
-      "pos": "RB"
-    }
-  ], 
-  "He'd've": [
-    {
-      "L": "-PRON-", 
-      "F": "He"
-    }, 
-    {
-      "F": "'d", 
-      "L": "would", 
-      "pos": "MD"
-    }, 
-    {
-      "F": "'ve", 
-      "L": "have", 
-      "pos": "VB"
-    }
-  ], 
-  "Mightntve": [
-    {
-      "F": "Might"
-    }, 
-    {
-      "F": "nt", 
-      "L": "not", 
-      "pos": "RB"
-    }, 
-    {
-      "F": "ve", 
-      "L": "have", 
-      "pos": "VB"
-    }
-  ], 
-  "couldnt": [
-    {
-      "pos": "MD", 
-      "F": "could"
-    }, 
-    {
-      "F": "nt", 
-      "L": "not", 
-      "pos": "RB"
-    }
-  ], 
-  "Haven't": [
-    {
-      "pos": "VB", 
-      "F": "Have"
-    }, 
-    {
-      "F": "n't", 
-      "L": "not", 
-      "pos": "RB"
-    }
-  ], 
-  "<333": [
-    {
-      "F": "<333"
-    }
-  ], 
-  "doesn't": [
-    {
-      "F": "does", 
-      "L": "do", 
-      "pos": "VBZ"
-    }, 
-    {
-      "F": "n't", 
-      "L": "not", 
-      "pos": "RB"
-    }
-  ], 
-  "Hasn't": [
-    {
-      "F": "Has"
-    }, 
-    {
-      "F": "n't", 
-      "L": "not", 
-      "pos": "RB"
-    }
-  ], 
-  "how's": [
-    {
-      "F": "how"
-    }, 
-    {
-      "F": "'s"
-    }
-  ], 
-  "hes": [
-    {
-      "L": "-PRON-", 
-      "F": "he"
-    }, 
-    {
-      "F": "s"
-    }
-  ], 
-  "=[[": [
-    {
-      "F": "=[["
-    }
-  ], 
-  "xD": [
-    {
-      "F": "xD"
-    }
-  ], 
-  "he'll": [
-    {
-      "L": "-PRON-", 
-      "F": "he"
-    }, 
-    {
-      "F": "'ll", 
-      "L": "will", 
-      "pos": "MD"
-    }
-  ], 
-  "hed": [
-    {
-      "L": "-PRON-", 
-      "F": "he"
-    }, 
-    {
-      "F": "d", 
-      "L": "would", 
-      "pos": "MD"
-    }
-  ], 
-  "7p.m.": [
-    {
-      "F": "7"
-    }, 
-    {
-      "F": "p.m."
-    }
-  ], 
-  "how'd": [
-    {
-      "F": "how"
-    }, 
-    {
-      "F": "'d", 
-      "L": "would", 
-      "pos": "MD"
-    }
-  ], 
-  "u.": [
-    {
-      "F": "u."
-    }
-  ], 
-  "we're": [
-    {
-      "F": "we"
-    }, 
-    {
-      "F": "'re"
-    }
-  ], 
-  "vs.": [
-    {
-      "F": "vs."
-    }
-  ], 
-  "Hadnt": [
-    {
-      "F": "Had", 
-      "L": "have", 
-      "pos": "VBD"
-    }, 
-    {
-      "F": "nt", 
-      "L": "not", 
-      "pos": "RB"
-    }
-  ], 
-  "Shant": [
-    {
-      "F": "Sha"
-    }, 
-    {
-      "F": "nt", 
-      "L": "not", 
-      "pos": "RB"
-    }
-  ], 
-  "Theyve": [
-    {
-      "L": "-PRON-", 
-      "F": "They"
-    }, 
-    {
-      "F": "ve", 
-      "L": "have", 
-      "pos": "VB"
-    }
-  ], 
-  "Hows": [
-    {
-      "F": "How"
-    }, 
-    {
-      "F": "s"
-    }
-  ], 
-  "We'll": [
-    {
-      "F": "We"
-    }, 
-    {
-      "F": "'ll", 
-      "L": "will", 
-      "pos": "MD"
-    }
-  ], 
-  "N.Y.": [
-    {
-      "F": "N.Y."
-    }
-  ], 
-  "x.": [
-    {
-      "F": "x."
-    }
-  ], 
-  "8p.m.": [
-    {
-      "F": "8"
-    }, 
-    {
-      "F": "p.m."
-    }
-  ], 
-  "i've": [
-    {
-      "L": "-PRON-", 
-      "F": "i"
-    }, 
-    {
-      "F": "'ve", 
-      "L": "have", 
-      "pos": "VB"
-    }
-  ], 
-  "Whove": [
-    {
-      "F": "Who"
-    }, 
-    {
-      "F": "ve", 
-      "L": "have", 
-      "pos": "VB"
-    }
-  ], 
-  "2am": [
-    {
-      "F": "2"
-    }, 
-    {
-      "L": "a.m.", 
-      "F": "am"
-    }
-  ], 
-  "La.": [
-    {
-      "F": "La."
-    }
-  ], 
-  "i'ma": [
-    {
-      "L": "-PRON-", 
-      "F": "i"
-    }, 
-    {
-      "F": "'ma"
-    }
-  ], 
-  "N.J.": [
-    {
-      "F": "N.J."
-    }
-  ], 
-  "Nebr.": [
-    {
-      "F": "Nebr."
-    }
-  ], 
-  "Howd": [
-    {
-      "F": "How"
-    }, 
-    {
-      "F": "d", 
-      "L": "would", 
-      "pos": "MD"
-    }
-  ], 
-  "hadnt": [
-    {
-      "F": "had", 
-      "L": "have", 
-      "pos": "VBD"
-    }, 
-    {
-      "F": "nt", 
-      "L": "not", 
-      "pos": "RB"
-    }
-  ], 
-  "shant": [
-    {
-      "F": "sha"
-    }, 
-    {
-      "F": "nt", 
-      "L": "not", 
-      "pos": "RB"
-    }
-  ], 
-  "There'd've": [
-    {
-      "F": "There"
-    }, 
-    {
-      "F": "'d", 
-      "L": "would", 
-      "pos": "MD"
-    }, 
-    {
-      "F": "'ve", 
-      "L": "have", 
-      "pos": "VB"
-    }
-  ], 
-  "Inc.": [
-    {
-      "F": "Inc."
-    }
-  ], 
-  "I'll": [
-    {
-      "L": "-PRON-", 
-      "F": "I"
-    }, 
-    {
-      "F": "'ll", 
-      "L": "will", 
-      "pos": "MD"
-    }
-  ], 
-  "Why's": [
-    {
-      "F": "Why"
-    }, 
-    {
-      "F": "'s"
-    }
-  ], 
-  "Adm.": [
-    {
-      "F": "Adm."
-    }
-  ], 
-  "Shouldn't've": [
-    {
-      "F": "Should"
-    }, 
-    {
-      "F": "n't", 
-      "L": "not", 
-      "pos": "RB"
-    }, 
-    {
-      "F": "'ve", 
-      "L": "have", 
-      "pos": "VB"
-    }
-  ], 
-  "n.": [
-    {
-      "F": "n."
-    }
-  ], 
-  "Wasnt": [
-    {
-      "F": "Was"
-    }, 
-    {
-      "F": "nt", 
-      "L": "not", 
-      "pos": "RB"
-    }
-  ], 
-  "whove": [
-    {
-      "F": "who"
-    }, 
-    {
-      "F": "ve", 
-      "L": "have", 
-      "pos": "VB"
-    }
-  ], 
-  ";-p": [
-    {
-      "F": ";-p"
-    }
-  ], 
-  "hasn't": [
-    {
-      "F": "has"
-    }, 
-    {
-      "F": "n't", 
-      "L": "not", 
-      "pos": "RB"
-    }
-  ], 
-  "wouldntve": [
-    {
-      "F": "would"
-    }, 
-    {
-      "F": "nt", 
-      "L": "not", 
-      "pos": "RB"
-    }, 
-    {
-      "F": "ve", 
-      "L": "have", 
-      "pos": "VB"
-    }
-  ], 
-  "Wheres": [
-    {
-      "F": "Where"
-    }, 
-    {
-      "F": "s"
-    }
-  ], 
-  "How'll": [
-    {
-      "F": "How"
-    }, 
-    {
-      "F": "'ll", 
-      "L": "will", 
-      "pos": "MD"
-    }
-  ], 
-  "there'd've": [
-    {
-      "F": "there"
-    }, 
-    {
-      "F": "'d", 
-      "L": "would", 
-      "pos": "MD"
-    }, 
-    {
-      "F": "'ve", 
-      "L": "have", 
-      "pos": "VB"
-    }
-  ], 
-  "Whos": [
-    {
-      "F": "Who"
-    }, 
-    {
-      "F": "s"
-    }
-  ], 
-  "shes": [
-    {
-      "L": "-PRON-", 
-      "F": "she"
-    }, 
-    {
-      "F": "s"
-    }
-  ], 
-  "Doesn't": [
-    {
-      "F": "Does", 
-      "L": "do", 
-      "pos": "VBZ"
-    }, 
-    {
-      "F": "n't", 
-      "L": "not", 
-      "pos": "RB"
-    }
-  ], 
-  "Arent": [
-    {
-      "F": "Are", 
-      "pos": "VBP", 
-      "number": 2, 
-      "L": "be"
-    }, 
-    {
-      "F": "nt", 
-      "L": "not", 
-      "pos": "RB"
-    }
-  ], 
-  "Hasnt": [
-    {
-      "F": "Has"
-    }, 
-    {
-      "F": "nt", 
-      "L": "not", 
-      "pos": "RB"
-    }
-  ], 
-  "j.": [
-    {
-      "F": "j."
-    }
-  ], 
-  "He's": [
-    {
-      "L": "-PRON-", 
-      "F": "He"
-    }, 
-    {
-      "F": "'s"
-    }
-  ], 
-  "wasnt": [
-    {
-      "F": "was"
-    }, 
-    {
-      "F": "nt", 
-      "L": "not", 
-      "pos": "RB"
-    }
-  ], 
-  "whyll": [
-    {
-      "F": "why"
-    }, 
-    {
-      "F": "ll", 
-      "L": "will", 
-      "pos": "MD"
-    }
-  ], 
-  "co.": [
-    {
-      "F": "co."
-    }
-  ], 
-  "mustnt": [
-    {
-      "F": "must"
-    }, 
-    {
-      "F": "nt", 
-      "L": "not", 
-      "pos": "RB"
-    }
-  ], 
-  "He'd": [
-    {
-      "L": "-PRON-", 
-      "F": "He"
-    }, 
-    {
-      "F": "'d", 
-      "L": "would", 
-      "pos": "MD"
-    }
-  ], 
-  "I.e.": [
-    {
-      "F": "I.e."
-    }
-  ], 
-  "Shes": [
-    {
-      "L": "-PRON-", 
-      "F": "She"
-    }, 
-    {
-      "F": "s"
-    }
-  ], 
-  "where've": [
-    {
-      "F": "where"
-    }, 
-    {
-      "F": "'ve", 
-      "L": "have", 
-      "pos": "VB"
-    }
-  ], 
-  "Youll": [
-    {
-      "L": "-PRON-", 
-      "F": "You"
-    }, 
-    {
-      "F": "ll", 
-      "L": "will", 
-      "pos": "MD"
-    }
-  ], 
-  "Apr.": [
-    {
-      "F": "Apr."
-    }
-  ], 
-  ":')": [
-    {
-      "F": ":')"
-    }
-  ], 
-  "Conn.": [
-    {
-      "F": "Conn."
-    }
-  ], 
-  "8pm": [
-    {
-      "F": "8"
-    }, 
-    {
-      "L": "p.m.", 
-      "F": "pm"
-    }
-  ], 
-  "9am": [
-    {
-      "F": "9"
-    }, 
-    {
-      "L": "a.m.", 
-      "F": "am"
-    }
-  ], 
-  "hasnt": [
-    {
-      "F": "has"
-    }, 
-    {
-      "F": "nt", 
-      "L": "not", 
-      "pos": "RB"
-    }
-  ], 
-  "theyll": [
-    {
-      "L": "-PRON-", 
-      "F": "they"
-    }, 
-    {
-      "F": "ll", 
-      "L": "will", 
-      "pos": "MD"
-    }
-  ], 
-  "it'd've": [
-    {
-      "L": "-PRON-", 
-      "F": "it"
-    }, 
-    {
-      "F": "'d", 
-      "L": "would", 
-      "pos": "MD"
-    }, 
-    {
-      "F": "'ve", 
-      "L": "have", 
-      "pos": "VB"
-    }
-  ], 
-  "itdve": [
-    {
-      "L": "-PRON-", 
-      "F": "it"
-    }, 
-    {
-      "F": "d", 
-      "L": "would", 
-      "pos": "MD"
-    }, 
-    {
-      "F": "ve", 
-      "L": "have", 
-      "pos": "VB"
-    }
-  ], 
-  "Jr.": [
-    {
-      "F": "Jr."
-    }
-  ], 
-  "Rev.": [
-    {
-      "F": "Rev."
-    }
-  ], 
-  "k.": [
-    {
-      "F": "k."
-    }
-  ], 
-  "wedve": [
-    {
-      "F": "we"
-    }, 
-    {
-      "F": "d", 
-      "L": "would", 
-      "pos": "MD"
-    }, 
-    {
-      "F": "ve", 
-      "L": "have", 
-      "pos": "VB"
-    }
-  ], 
-  "=)": [
-    {
-      "F": "=)"
-    }
-  ], 
-  "Colo.": [
-    {
-      "F": "Colo."
-    }
-  ], 
-  "Mr.": [
-    {
-      "F": "Mr."
-    }
-  ], 
-  "Werent": [
-    {
-      "F": "Were"
-    }, 
-    {
-      "F": "nt", 
-      "L": "not", 
-      "pos": "RB"
-    }
-  ], 
-  "Therell": [
-    {
-      "F": "There"
-    }, 
-    {
-      "F": "ll", 
-      "L": "will", 
-      "pos": "MD"
-    }
-  ], 
-  "shan't": [
-    {
-      "F": "sha"
-    }, 
-    {
-      "F": "n't", 
-      "L": "not", 
-      "pos": "RB"
-    }
-  ], 
-  ";-)": [
-    {
-      "F": ";-)"
-    }
-  ], 
-  "Wont": [
-    {
-      "F": "Wo"
-    }, 
-    {
-      "F": "nt", 
-      "L": "not", 
-      "pos": "RB"
-    }
-  ], 
-  "hadntve": [
-    {
-      "F": "had", 
-      "L": "have", 
-      "pos": "VBD"
-    }, 
-    {
-      "F": "nt", 
-      "L": "not", 
-      "pos": "RB"
-    }, 
-    {
-      "F": "ve", 
-      "L": "have", 
-      "pos": "VB"
-    }
-  ], 
-  "who've": [
-    {
-      "F": "who"
-    }, 
-    {
-      "F": "'ve", 
-      "L": "have", 
-      "pos": "VB"
-    }
-  ], 
-  "Whatre": [
-    {
-      "F": "What"
-    }, 
-    {
-      "F": "re"
-    }
-  ], 
-  "'s": [
-    {
-      "L": "'s", 
-      "F": "'s"
-    }
-  ], 
-  "where'd": [
-    {
-      "F": "where"
-    }, 
-    {
-      "F": "'d", 
-      "L": "would", 
-      "pos": "MD"
-    }
-  ], 
-  "shouldve": [
-    {
-      "F": "should"
-    }, 
-    {
-      "F": "ve", 
-      "L": "have", 
-      "pos": "VB"
-    }
-  ], 
-  "a.": [
-    {
-      "F": "a."
-    }
-  ], 
-  "where's": [
-    {
-      "F": "where"
-    }, 
-    {
-      "F": "'s"
-    }
-  ], 
-  "Ltd.": [
-    {
-      "F": "Ltd."
-    }
-  ], 
-  "Mass.": [
-    {
-      "F": "Mass."
-    }
-  ], 
-  "neednt": [
-    {
-      "F": "need"
-    }, 
-    {
-      "F": "nt", 
-      "L": "not", 
-      "pos": "RB"
-    }
-  ], 
-  "Pa.": [
-    {
-      "F": "Pa."
-    }
-  ], 
-  "It'll": [
-    {
-      "L": "-PRON-", 
-      "F": "It"
-    }, 
-    {
-      "F": "'ll", 
-      "L": "will", 
-      "pos": "MD"
-    }
-  ], 
-  "7am": [
-    {
-      "F": "7"
-    }, 
-    {
-      "L": "a.m.", 
-      "F": "am"
-    }
-  ], 
-  "We'd": [
-    {
-      "F": "We"
-    }, 
-    {
-      "F": "'d", 
-      "L": "would", 
-      "pos": "MD"
-    }
-  ], 
-  "Whats": [
-    {
-      "F": "What"
-    }, 
-    {
-      "F": "s"
-    }
-  ], 
-  "\u2014": [
-    {
-      "pos": ":", 
-      "L": "--", 
-      "F": "\u2014"
-    }
-  ], 
-  "E.g.": [
-    {
-      "F": "E.g."
-    }
-  ], 
-  "Ms.": [
-    {
-      "F": "Ms."
-    }
-  ], 
-  ":3": [
-    {
-      "F": ":3"
-    }
-  ], 
-  "5p.m.": [
-    {
-      "F": "5"
-    }, 
-    {
-      "F": "p.m."
-    }
-  ], 
-  "Itd": [
-    {
-      "L": "-PRON-", 
-      "F": "It"
-    }, 
-    {
-      "F": "d", 
-      "L": "would", 
-      "pos": "MD"
-    }
-  ], 
-  "May.": [
-    {
-      "F": "May."
-    }
-  ], 
-  "she'd": [
-    {
-      "L": "-PRON-", 
-      "F": "she"
-    }, 
-    {
-      "F": "'d", 
-      "L": "would", 
-      "pos": "MD"
-    }
-  ], 
-  "Mustnt": [
-    {
-      "F": "Must"
-    }, 
-    {
-      "F": "nt", 
-      "L": "not", 
-      "pos": "RB"
-    }
-  ], 
-  "Notve": [
-    {
-      "F": "Not", 
-      "L": "not", 
-      "pos": "RB"
-    }, 
-    {
-      "F": "ve", 
-      "L": "have", 
-      "pos": "VB"
-    }
-  ], 
-  "you'll": [
-    {
-      "L": "-PRON-", 
-      "F": "you"
-    }, 
-    {
-      "F": "'ll", 
-      "L": "will", 
-      "pos": "MD"
-    }
-  ], 
-  "Theyd": [
-    {
-      "L": "-PRON-", 
-      "F": "They"
-    }, 
-    {
-      "F": "d", 
-      "L": "would", 
-      "pos": "MD"
-    }
-  ], 
-  "she's": [
-    {
-      "L": "-PRON-", 
-      "F": "she"
-    }, 
-    {
-      "F": "'s"
-    }
-  ], 
-  "Couldnt": [
-    {
-      "pos": "MD", 
-      "F": "Could"
-    }, 
-    {
-      "F": "nt", 
-      "L": "not", 
-      "pos": "RB"
-    }
-  ], 
-  "that's": [
-    {
-      "F": "that"
-    }, 
-    {
-      "F": "'s"
-    }
-  ], 
-  "4pm": [
-    {
-      "F": "4"
-    }, 
-    {
-      "L": "p.m.", 
-      "F": "pm"
-    }
-  ], 
-  ":))": [
-    {
-      "F": ":))"
-    }
-  ]
-}
\ No newline at end of file
diff --git a/lang_data/en/suffix.txt b/lang_data/en/suffix.txt
deleted file mode 100644
index d8c6bc2c2..000000000
--- a/lang_data/en/suffix.txt
+++ /dev/null
@@ -1,26 +0,0 @@
-,
-\"
-\)
-\]
-\}
-\*
-\!
-\?
-%
-\$
->
-:
-;
-'
-”
-''
-'s
-'S
-’s
-’S
-’
-\.\.
-\.\.\.
-\.\.\.\.
-(?<=[a-z0-9)\]"'%\)])\.
-(?<=[0-9])km
diff --git a/lang_data/en/tag_map.json b/lang_data/en/tag_map.json
deleted file mode 100644
index f913f38fe..000000000
--- a/lang_data/en/tag_map.json
+++ /dev/null
@@ -1,60 +0,0 @@
-{
-".": {"pos": "punct", "puncttype": "peri"},
-",": {"pos": "punct", "puncttype": "comm"},
-"-LRB-": {"pos": "punct", "puncttype": "brck", "punctside": "ini"},
-"-RRB-": {"pos": "punct", "puncttype": "brck", "punctside": "fin"},
-"``": {"pos": "punct", "puncttype": "quot", "punctside": "ini"},
-"\"\"": {"pos": "punct", "puncttype": "quot", "punctside": "fin"},
-"''": {"pos": "punct", "puncttype": "quot", "punctside": "fin"},
-":": {"pos": "punct"},
-"$": {"pos": "sym", "other": {"symtype": "currency"}},
-"#": {"pos": "sym", "other": {"symtype": "numbersign"}},
-"AFX": {"pos": "adj",  "hyph": "hyph"},
-"CC": {"pos": "conj", "conjtype": "coor"},
-"CD": {"pos": "num", "numtype": "card"},
-"DT": {"pos": "det"},
-"EX": {"pos": "adv", "advtype": "ex"},
-"FW": {"pos": "x", "foreign": "foreign"},
-"HYPH": {"pos": "punct", "puncttype": "dash"},
-"IN": {"pos": "adp"},
-"JJ": {"pos": "adj", "degree": "pos"},
-"JJR": {"pos": "adj", "degree": "comp"},
-"JJS": {"pos": "adj", "degree": "sup"},
-"LS": {"pos": "punct", "numtype": "ord"},
-"MD": {"pos": "verb", "verbtype": "mod"},
-"NIL": {"pos": ""},
-"NN": {"pos": "noun", "number": "sing"},
-"NNP": {"pos": "propn", "nountype": "prop", "number": "sing"},
-"NNPS": {"pos": "propn", "nountype": "prop", "number": "plur"},
-"NNS": {"pos": "noun", "number": "plur"},
-"PDT": {"pos": "adj", "adjtype": "pdt", "prontype": "prn"},
-"POS": {"pos": "part", "poss": "poss"},
-"PRP": {"pos": "pron", "prontype": "prs"},
-"PRP$": {"pos": "adj", "prontype": "prs", "poss": "poss"},
-"RB": {"pos": "adv", "degree": "pos"},
-"RBR": {"pos": "adv", "degree": "comp"},
-"RBS": {"pos": "adv", "degree": "sup"},
-"RP": {"pos": "part"},
-"SYM": {"pos": "sym"},
-"TO": {"pos": "part", "parttype": "inf", "verbform": "inf"},
-"UH": {"pos": "intJ"},
-"VB": {"pos": "verb", "verbform": "inf"},
-"VBD": {"pos": "verb", "verbform": "fin", "tense": "past"},
-"VBG": {"pos": "verb", "verbform": "part", "tense": "pres", "aspect": "prog"},
-"VBN": {"pos": "verb", "verbform": "part", "tense": "past", "aspect": "perf"},
-"VBP": {"pos": "verb", "verbform": "fin", "tense": "pres"},
-"VBZ": {"pos": "verb", "verbform": "fin", "tense": "pres", "number": "sing", "person": 3},
-"WDT": {"pos": "adj", "prontype": "int|rel"},
-"WP": {"pos": "noun", "prontype": "int|rel"},
-"WP$": {"pos": "adj", "poss": "poss", "prontype": "int|rel"},
-"WRB": {"pos": "adv", "prontype": "int|rel"},
-"SP": {"pos": "space"},
-"ADD": {"pos": "x"},
-"NFP": {"pos": "punct"},
-"GW": {"pos": "x"},
-"AFX": {"pos": "x"},
-"HYPH": {"pos": "punct"},
-"XX": {"pos": "x"},
-"BES": {"pos": "verb"},
-"HVS": {"pos": "verb"}
-}
diff --git a/lang_data/fi/infix.txt b/lang_data/fi/infix.txt
deleted file mode 100644
index 37eca7350..000000000
--- a/lang_data/fi/infix.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-\.\.\.
-(?<=[a-z])\.(?=[A-Z])
-(?<=[a-zA-Z])-(?=[a-zA-z])
diff --git a/lang_data/fi/lemma_rules.json b/lang_data/fi/lemma_rules.json
deleted file mode 100644
index 0967ef424..000000000
--- a/lang_data/fi/lemma_rules.json
+++ /dev/null
@@ -1 +0,0 @@
-{}
diff --git a/lang_data/fi/morphs.json b/lang_data/fi/morphs.json
deleted file mode 100644
index e69de29bb..000000000
diff --git a/lang_data/fi/prefix.txt b/lang_data/fi/prefix.txt
deleted file mode 100644
index 48c4fc549..000000000
--- a/lang_data/fi/prefix.txt
+++ /dev/null
@@ -1,21 +0,0 @@
-,
-"
-(
-[
-{
-*
-<
-$
-£
-“
-'
-``
-`
-#
-US$
-C$
-A$
-a-
-‘
-....
-...
diff --git a/lang_data/fi/sample.txt b/lang_data/fi/sample.txt
deleted file mode 100644
index 12c0bb787..000000000
--- a/lang_data/fi/sample.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-Biografie: Ein Spiel ist ein Theaterstück des Schweizer Schriftstellers Max Frisch, das 1967 entstand und am 1. Februar 1968 im Schauspielhaus Zürich uraufgeführt wurde. 1984 legte Frisch eine überarbeitete Neufassung vor. Das von Frisch als Komödie bezeichnete Stück greift eines seiner zentralen Themen auf: die Möglichkeit oder Unmöglichkeit des Menschen, seine Identität zu verändern.
-
-Mit Biografie: Ein Spiel wandte sich Frisch von der Parabelform seiner Erfolgsstücke Biedermann und die Brandstifter und Andorra ab und postulierte eine „Dramaturgie der Permutation“. Darin sollte nicht, wie im klassischen Theater, Sinn und Schicksal im Mittelpunkt stehen, sondern die Zufälligkeit von Ereignissen und die Möglichkeit ihrer Variation. Dennoch handelt Biografie: Ein Spiel gerade von der Unmöglichkeit seines Protagonisten, seinen Lebenslauf grundlegend zu verändern. Frisch empfand die Wirkung des Stücks im Nachhinein als zu fatalistisch und die Umsetzung seiner theoretischen Absichten als nicht geglückt. Obwohl das Stück 1968 als unpolitisch und nicht zeitgemäß kritisiert wurde und auch später eine geteilte Rezeption erfuhr, gehört es an deutschsprachigen Bühnen zu den häufiger aufgeführten Stücken Frischs.
diff --git a/lang_data/fi/specials.json b/lang_data/fi/specials.json
deleted file mode 100644
index 0e0986339..000000000
--- a/lang_data/fi/specials.json
+++ /dev/null
@@ -1,149 +0,0 @@
-{
-"a.m.": [{"F": "a.m."}],
-"p.m.": [{"F": "p.m."}],
-
-"1a.m.": [{"F": "1"}, {"F": "a.m."}],
-"2a.m.": [{"F": "2"}, {"F": "a.m."}],
-"3a.m.": [{"F": "3"}, {"F": "a.m."}],
-"4a.m.": [{"F": "4"}, {"F": "a.m."}],
-"5a.m.": [{"F": "5"}, {"F": "a.m."}],
-"6a.m.": [{"F": "6"}, {"F": "a.m."}],
-"7a.m.": [{"F": "7"}, {"F": "a.m."}],
-"8a.m.": [{"F": "8"}, {"F": "a.m."}],
-"9a.m.": [{"F": "9"}, {"F": "a.m."}],
-"10a.m.": [{"F": "10"}, {"F": "a.m."}],
-"11a.m.": [{"F": "11"}, {"F": "a.m."}],
-"12a.m.": [{"F": "12"}, {"F": "a.m."}],
-"1am": [{"F": "1"}, {"F": "am", "L": "a.m."}],
-"2am": [{"F": "2"}, {"F": "am", "L": "a.m."}],
-"3am": [{"F": "3"}, {"F": "am", "L": "a.m."}],
-"4am": [{"F": "4"}, {"F": "am", "L": "a.m."}],
-"5am": [{"F": "5"}, {"F": "am", "L": "a.m."}],
-"6am": [{"F": "6"}, {"F": "am", "L": "a.m."}],
-"7am": [{"F": "7"}, {"F": "am", "L": "a.m."}],
-"8am": [{"F": "8"}, {"F": "am", "L": "a.m."}],
-"9am": [{"F": "9"}, {"F": "am", "L": "a.m."}],
-"10am": [{"F": "10"}, {"F": "am", "L": "a.m."}],
-"11am": [{"F": "11"}, {"F": "am", "L": "a.m."}],
-"12am": [{"F": "12"}, {"F": "am", "L": "a.m."}],
-
-
-"1p.m.": [{"F": "1"}, {"F": "p.m."}],
-"2p.m.": [{"F": "2"}, {"F": "p.m."}],
-"3p.m.": [{"F": "3"}, {"F": "p.m."}],
-"4p.m.": [{"F": "4"}, {"F": "p.m."}],
-"5p.m.": [{"F": "5"}, {"F": "p.m."}],
-"6p.m.": [{"F": "6"}, {"F": "p.m."}],
-"7p.m.": [{"F": "7"}, {"F": "p.m."}],
-"8p.m.": [{"F": "8"}, {"F": "p.m."}],
-"9p.m.": [{"F": "9"}, {"F": "p.m."}],
-"10p.m.": [{"F": "10"}, {"F": "p.m."}],
-"11p.m.": [{"F": "11"}, {"F": "p.m."}],
-"12p.m.": [{"F": "12"}, {"F": "p.m."}],
-"1pm": [{"F": "1"}, {"F": "pm", "L": "p.m."}],
-"2pm": [{"F": "2"}, {"F": "pm", "L": "p.m."}],
-"3pm": [{"F": "3"}, {"F": "pm", "L": "p.m."}],
-"4pm": [{"F": "4"}, {"F": "pm", "L": "p.m."}],
-"5pm": [{"F": "5"}, {"F": "pm", "L": "p.m."}],
-"6pm": [{"F": "6"}, {"F": "pm", "L": "p.m."}],
-"7pm": [{"F": "7"}, {"F": "pm", "L": "p.m."}],
-"8pm": [{"F": "8"}, {"F": "pm", "L": "p.m."}],
-"9pm": [{"F": "9"}, {"F": "pm", "L": "p.m."}],
-"10pm": [{"F": "10"}, {"F": "pm", "L": "p.m."}],
-"11pm": [{"F": "11"}, {"F": "pm", "L": "p.m."}],
-"12pm": [{"F": "12"}, {"F": "pm", "L": "p.m."}],
-
-"Jan.": [{"F": "Jan.", "L": "Januar"}],
-"Feb.": [{"F": "Feb.", "L": "Februar"}],
-"Mär.": [{"F": "Mär.", "L": "März"}],
-"Apr.": [{"F": "Apr.", "L": "April"}],
-"Mai.": [{"F": "Mai.", "L": "Mai"}],
-"Jun.": [{"F": "Jun.", "L": "Juni"}],
-"Jul.": [{"F": "Jul.", "L": "Juli"}],
-"Aug.": [{"F": "Aug.", "L": "August"}],
-"Sep.": [{"F": "Sep.", "L": "September"}],
-"Sept.": [{"F": "Sept.", "L": "September"}],
-"Okt.": [{"F": "Okt.", "L": "Oktober"}],
-"Nov.": [{"F": "Nov.", "L": "November"}],
-"Dez.": [{"F": "Dez.", "L": "Dezember"}],
-
-":)":  [{"F": ":)"}],
-"<3":  [{"F": "<3"}],
-";)":  [{"F": ";)"}],
-"(:":  [{"F": "(:"}],
-":(":  [{"F": ":("}],
-"-_-": [{"F": "-_-"}],
-"=)":  [{"F": "=)"}],
-":/":  [{"F": ":/"}],
-":>":  [{"F": ":>"}],
-";-)": [{"F": ";-)"}],
-":Y":  [{"F": ":Y"}],
-":P":  [{"F": ":P"}],
-":-P": [{"F": ":-P"}],
-":3":  [{"F": ":3"}],
-"=3":  [{"F": "=3"}],
-"xD":  [{"F": "xD"}],
-"^_^": [{"F": "^_^"}],
-"=]":  [{"F": "=]"}],
-"=D":  [{"F": "=D"}],
-"<333":    [{"F": "<333"}],
-":))": [{"F": ":))"}],
-":0":  [{"F": ":0"}],
-"-__-":    [{"F": "-__-"}],
-"xDD": [{"F": "xDD"}],
-"o_o": [{"F": "o_o"}],
-"o_O": [{"F": "o_O"}],
-"V_V": [{"F": "V_V"}],
-"=[[": [{"F": "=[["}],
-"<33": [{"F": "<33"}],
-";p":  [{"F": ";p"}],
-";D":  [{"F": ";D"}],
-";-p": [{"F": ";-p"}],
-";(":  [{"F": ";("}],
-":p":  [{"F": ":p"}],
-":]":  [{"F": ":]"}],
-":O":  [{"F": ":O"}],
-":-/": [{"F": ":-/"}],
-":-)": [{"F": ":-)"}],
-":(((":    [{"F": ":((("}],
-":((": [{"F": ":(("}],
-":')": [{"F": ":')"}],
-"(^_^)":   [{"F": "(^_^)"}],
-"(=":  [{"F": "(="}],
-"o.O": [{"F": "o.O"}],
-"\")": [{"F": "\")"}],
-"a.": [{"F": "a."}],
-"b.": [{"F": "b."}],
-"c.": [{"F": "c."}],
-"d.": [{"F": "d."}],
-"e.": [{"F": "e."}],
-"f.": [{"F": "f."}],
-"g.": [{"F": "g."}],
-"h.": [{"F": "h."}],
-"i.": [{"F": "i."}],
-"j.": [{"F": "j."}],
-"k.": [{"F": "k."}],
-"l.": [{"F": "l."}],
-"m.": [{"F": "m."}],
-"n.": [{"F": "n."}],
-"o.": [{"F": "o."}],
-"p.": [{"F": "p."}],
-"q.": [{"F": "q."}],
-"s.": [{"F": "s."}],
-"t.": [{"F": "t."}],
-"u.": [{"F": "u."}],
-"v.": [{"F": "v."}],
-"w.": [{"F": "w."}],
-"x.": [{"F": "x."}],
-"y.": [{"F": "y."}],
-"z.": [{"F": "z."}],
-
-"z.b.": [{"F": "z.b."}],
-"e.h.": [{"F": "I.e."}],
-"o.ä.": [{"F": "I.E."}],
-"bzw.": [{"F": "bzw."}],
-"usw.": [{"F": "usw."}],
-"\n": [{"F": "\n", "pos": "SP"}],
-"\t": [{"F": "\t", "pos": "SP"}],
-" ": [{"F": " ", "pos": "SP"}]
-}
diff --git a/lang_data/fi/suffix.txt b/lang_data/fi/suffix.txt
deleted file mode 100644
index d8c6bc2c2..000000000
--- a/lang_data/fi/suffix.txt
+++ /dev/null
@@ -1,26 +0,0 @@
-,
-\"
-\)
-\]
-\}
-\*
-\!
-\?
-%
-\$
->
-:
-;
-'
-”
-''
-'s
-'S
-’s
-’S
-’
-\.\.
-\.\.\.
-\.\.\.\.
-(?<=[a-z0-9)\]"'%\)])\.
-(?<=[0-9])km
diff --git a/lang_data/fi/tag_map.json b/lang_data/fi/tag_map.json
deleted file mode 100644
index 4451d0fa0..000000000
--- a/lang_data/fi/tag_map.json
+++ /dev/null
@@ -1,19 +0,0 @@
-{
-    "NOUN": {"pos": "NOUN"},
-    "VERB": {"pos": "VERB"},
-    "PUNCT": {"pos": "PUNCT"},
-    "ADV": {"pos": "ADV"},
-    "ADJ": {"pos": "ADJ"},
-    "PRON": {"pos": "PRON"},
-    "PROPN": {"pos": "PROPN"},
-    "CONJ": {"pos": "CONJ"},
-    "NUM": {"pos": "NUM"},
-    "AUX": {"pos": "AUX"},
-    "SCONJ": {"pos": "SCONJ"},
-    "ADP": {"pos": "ADP"},
-    "SYM": {"pos": "SYM"},
-    "X": {"pos": "X"},
-    "INTJ": {"pos": "INTJ"},
-    "DET": {"pos": "DET"},
-    "PART": {"pos": "PART"}
-}
diff --git a/lang_data/it/infix.txt b/lang_data/it/infix.txt
deleted file mode 100644
index 37eca7350..000000000
--- a/lang_data/it/infix.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-\.\.\.
-(?<=[a-z])\.(?=[A-Z])
-(?<=[a-zA-Z])-(?=[a-zA-z])
diff --git a/lang_data/it/morphs.json b/lang_data/it/morphs.json
deleted file mode 100644
index e69de29bb..000000000
diff --git a/lang_data/it/prefix.txt b/lang_data/it/prefix.txt
deleted file mode 100644
index 48c4fc549..000000000
--- a/lang_data/it/prefix.txt
+++ /dev/null
@@ -1,21 +0,0 @@
-,
-"
-(
-[
-{
-*
-<
-$
-£
-“
-'
-``
-`
-#
-US$
-C$
-A$
-a-
-‘
-....
-...
diff --git a/lang_data/it/specials.json b/lang_data/it/specials.json
deleted file mode 100644
index 0e0986339..000000000
--- a/lang_data/it/specials.json
+++ /dev/null
@@ -1,149 +0,0 @@
-{
-"a.m.": [{"F": "a.m."}],
-"p.m.": [{"F": "p.m."}],
-
-"1a.m.": [{"F": "1"}, {"F": "a.m."}],
-"2a.m.": [{"F": "2"}, {"F": "a.m."}],
-"3a.m.": [{"F": "3"}, {"F": "a.m."}],
-"4a.m.": [{"F": "4"}, {"F": "a.m."}],
-"5a.m.": [{"F": "5"}, {"F": "a.m."}],
-"6a.m.": [{"F": "6"}, {"F": "a.m."}],
-"7a.m.": [{"F": "7"}, {"F": "a.m."}],
-"8a.m.": [{"F": "8"}, {"F": "a.m."}],
-"9a.m.": [{"F": "9"}, {"F": "a.m."}],
-"10a.m.": [{"F": "10"}, {"F": "a.m."}],
-"11a.m.": [{"F": "11"}, {"F": "a.m."}],
-"12a.m.": [{"F": "12"}, {"F": "a.m."}],
-"1am": [{"F": "1"}, {"F": "am", "L": "a.m."}],
-"2am": [{"F": "2"}, {"F": "am", "L": "a.m."}],
-"3am": [{"F": "3"}, {"F": "am", "L": "a.m."}],
-"4am": [{"F": "4"}, {"F": "am", "L": "a.m."}],
-"5am": [{"F": "5"}, {"F": "am", "L": "a.m."}],
-"6am": [{"F": "6"}, {"F": "am", "L": "a.m."}],
-"7am": [{"F": "7"}, {"F": "am", "L": "a.m."}],
-"8am": [{"F": "8"}, {"F": "am", "L": "a.m."}],
-"9am": [{"F": "9"}, {"F": "am", "L": "a.m."}],
-"10am": [{"F": "10"}, {"F": "am", "L": "a.m."}],
-"11am": [{"F": "11"}, {"F": "am", "L": "a.m."}],
-"12am": [{"F": "12"}, {"F": "am", "L": "a.m."}],
-
-
-"1p.m.": [{"F": "1"}, {"F": "p.m."}],
-"2p.m.": [{"F": "2"}, {"F": "p.m."}],
-"3p.m.": [{"F": "3"}, {"F": "p.m."}],
-"4p.m.": [{"F": "4"}, {"F": "p.m."}],
-"5p.m.": [{"F": "5"}, {"F": "p.m."}],
-"6p.m.": [{"F": "6"}, {"F": "p.m."}],
-"7p.m.": [{"F": "7"}, {"F": "p.m."}],
-"8p.m.": [{"F": "8"}, {"F": "p.m."}],
-"9p.m.": [{"F": "9"}, {"F": "p.m."}],
-"10p.m.": [{"F": "10"}, {"F": "p.m."}],
-"11p.m.": [{"F": "11"}, {"F": "p.m."}],
-"12p.m.": [{"F": "12"}, {"F": "p.m."}],
-"1pm": [{"F": "1"}, {"F": "pm", "L": "p.m."}],
-"2pm": [{"F": "2"}, {"F": "pm", "L": "p.m."}],
-"3pm": [{"F": "3"}, {"F": "pm", "L": "p.m."}],
-"4pm": [{"F": "4"}, {"F": "pm", "L": "p.m."}],
-"5pm": [{"F": "5"}, {"F": "pm", "L": "p.m."}],
-"6pm": [{"F": "6"}, {"F": "pm", "L": "p.m."}],
-"7pm": [{"F": "7"}, {"F": "pm", "L": "p.m."}],
-"8pm": [{"F": "8"}, {"F": "pm", "L": "p.m."}],
-"9pm": [{"F": "9"}, {"F": "pm", "L": "p.m."}],
-"10pm": [{"F": "10"}, {"F": "pm", "L": "p.m."}],
-"11pm": [{"F": "11"}, {"F": "pm", "L": "p.m."}],
-"12pm": [{"F": "12"}, {"F": "pm", "L": "p.m."}],
-
-"Jan.": [{"F": "Jan.", "L": "Januar"}],
-"Feb.": [{"F": "Feb.", "L": "Februar"}],
-"Mär.": [{"F": "Mär.", "L": "März"}],
-"Apr.": [{"F": "Apr.", "L": "April"}],
-"Mai.": [{"F": "Mai.", "L": "Mai"}],
-"Jun.": [{"F": "Jun.", "L": "Juni"}],
-"Jul.": [{"F": "Jul.", "L": "Juli"}],
-"Aug.": [{"F": "Aug.", "L": "August"}],
-"Sep.": [{"F": "Sep.", "L": "September"}],
-"Sept.": [{"F": "Sept.", "L": "September"}],
-"Okt.": [{"F": "Okt.", "L": "Oktober"}],
-"Nov.": [{"F": "Nov.", "L": "November"}],
-"Dez.": [{"F": "Dez.", "L": "Dezember"}],
-
-":)":  [{"F": ":)"}],
-"<3":  [{"F": "<3"}],
-";)":  [{"F": ";)"}],
-"(:":  [{"F": "(:"}],
-":(":  [{"F": ":("}],
-"-_-": [{"F": "-_-"}],
-"=)":  [{"F": "=)"}],
-":/":  [{"F": ":/"}],
-":>":  [{"F": ":>"}],
-";-)": [{"F": ";-)"}],
-":Y":  [{"F": ":Y"}],
-":P":  [{"F": ":P"}],
-":-P": [{"F": ":-P"}],
-":3":  [{"F": ":3"}],
-"=3":  [{"F": "=3"}],
-"xD":  [{"F": "xD"}],
-"^_^": [{"F": "^_^"}],
-"=]":  [{"F": "=]"}],
-"=D":  [{"F": "=D"}],
-"<333":    [{"F": "<333"}],
-":))": [{"F": ":))"}],
-":0":  [{"F": ":0"}],
-"-__-":    [{"F": "-__-"}],
-"xDD": [{"F": "xDD"}],
-"o_o": [{"F": "o_o"}],
-"o_O": [{"F": "o_O"}],
-"V_V": [{"F": "V_V"}],
-"=[[": [{"F": "=[["}],
-"<33": [{"F": "<33"}],
-";p":  [{"F": ";p"}],
-";D":  [{"F": ";D"}],
-";-p": [{"F": ";-p"}],
-";(":  [{"F": ";("}],
-":p":  [{"F": ":p"}],
-":]":  [{"F": ":]"}],
-":O":  [{"F": ":O"}],
-":-/": [{"F": ":-/"}],
-":-)": [{"F": ":-)"}],
-":(((":    [{"F": ":((("}],
-":((": [{"F": ":(("}],
-":')": [{"F": ":')"}],
-"(^_^)":   [{"F": "(^_^)"}],
-"(=":  [{"F": "(="}],
-"o.O": [{"F": "o.O"}],
-"\")": [{"F": "\")"}],
-"a.": [{"F": "a."}],
-"b.": [{"F": "b."}],
-"c.": [{"F": "c."}],
-"d.": [{"F": "d."}],
-"e.": [{"F": "e."}],
-"f.": [{"F": "f."}],
-"g.": [{"F": "g."}],
-"h.": [{"F": "h."}],
-"i.": [{"F": "i."}],
-"j.": [{"F": "j."}],
-"k.": [{"F": "k."}],
-"l.": [{"F": "l."}],
-"m.": [{"F": "m."}],
-"n.": [{"F": "n."}],
-"o.": [{"F": "o."}],
-"p.": [{"F": "p."}],
-"q.": [{"F": "q."}],
-"s.": [{"F": "s."}],
-"t.": [{"F": "t."}],
-"u.": [{"F": "u."}],
-"v.": [{"F": "v."}],
-"w.": [{"F": "w."}],
-"x.": [{"F": "x."}],
-"y.": [{"F": "y."}],
-"z.": [{"F": "z."}],
-
-"z.b.": [{"F": "z.b."}],
-"e.h.": [{"F": "I.e."}],
-"o.ä.": [{"F": "I.E."}],
-"bzw.": [{"F": "bzw."}],
-"usw.": [{"F": "usw."}],
-"\n": [{"F": "\n", "pos": "SP"}],
-"\t": [{"F": "\t", "pos": "SP"}],
-" ": [{"F": " ", "pos": "SP"}]
-}
diff --git a/lang_data/it/suffix.txt b/lang_data/it/suffix.txt
deleted file mode 100644
index d8c6bc2c2..000000000
--- a/lang_data/it/suffix.txt
+++ /dev/null
@@ -1,26 +0,0 @@
-,
-\"
-\)
-\]
-\}
-\*
-\!
-\?
-%
-\$
->
-:
-;
-'
-”
-''
-'s
-'S
-’s
-’S
-’
-\.\.
-\.\.\.
-\.\.\.\.
-(?<=[a-z0-9)\]"'%\)])\.
-(?<=[0-9])km
diff --git a/lang_data/it/tag_map.json b/lang_data/it/tag_map.json
deleted file mode 100644
index 92f11e457..000000000
--- a/lang_data/it/tag_map.json
+++ /dev/null
@@ -1,44 +0,0 @@
-{
-"S": {"pos": "NOUN"},
-"E":   {"pos": "ADP"},
-"RD":  {"pos": "DET"},
-"V":   {"pos": "VERB"},
-"_":   {"pos": "NO_TAG"},
-"A":   {"pos": "ADJ"},
-"SP":  {"pos": "PROPN"},
-"FF":  {"pos": "PUNCT"},
-"FS":  {"pos": "PUNCT"},
-"B":   {"pos": "ADV"},
-"CC":  {"pos": "CONJ"},
-"FB":  {"pos": "PUNCT"},
-"VA":  {"pos": "AUX"},
-"PC":  {"pos": "PRON"},
-"N":   {"pos": "NUM"},
-"RI":  {"pos": "DET"},
-"PR":  {"pos": "PRON"},
-"CS":  {"pos": "SCONJ"},
-"BN":  {"pos": "ADV"},
-"AP":  {"pos": "DET"},
-"VM":  {"pos": "AUX"},
-"DI":  {"pos": "DET"},
-"FC":  {"pos": "PUNCT"},
-"PI":  {"pos": "PRON"},
-"DD":  {"pos": "DET"},
-"DQ":  {"pos": "DET"},
-"PQ":  {"pos": "PRON"},
-"PD":  {"pos": "PRON"},
-"NO":  {"pos": "ADJ"},
-"PE":  {"pos": "PRON"},
-"T":   {"pos": "DET"},
-"X":   {"pos": "SYM"},
-"SW":  {"pos": "X"},
-"NO":  {"pos": "PRON"},
-"I":   {"pos": "INTJ"},
-"X":   {"pos": "X"},
-"DR":  {"pos": "DET"},
-"EA":  {"pos": "ADP"},
-"PP":  {"pos": "PRON"},
-"X":   {"pos": "NUM"},
-"DE":  {"pos": "DET"},
-"X":   {"pos": "PART"}
-}
diff --git a/lang_data/zh/gazetteer.json b/lang_data/zh/gazetteer.json
deleted file mode 100644
index d52fed839..000000000
--- a/lang_data/zh/gazetteer.json
+++ /dev/null
@@ -1,194 +0,0 @@
-{
-	"Reddit": [
-		"PRODUCT",
-		{},
-		[
-			[{"lower": "reddit"}]
-		]
-	],
-	"SeptemberElevenAttacks": [
-		"EVENT",
-		{},
-		[
-			[
-				{"orth": "9/11"}
-			],
-			[
-				{"lower": "september"},
-				{"orth": "11"}
-			]
-		]
-	],
-	"Linux": [
-		"PRODUCT",
-		{},
-		[
-			[{"lower": "linux"}]
-		]
-	],
-	"Haskell": [
-		"PRODUCT",
-		{},
-		[
-			[{"lower": "haskell"}]
-		]
-	],
-	"HaskellCurry": [
-		"PERSON",
-		{},
-		[
-			[
-				{"lower": "haskell"},
-				{"lower": "curry"}
-			]
-		]
-	],
-	"Javascript": [
-		"PRODUCT",
-		{},
-		[
-			[{"lower": "javascript"}]
-		]
-	],
-	"CSS": [
-		"PRODUCT",
-		{},
-		[
-			[{"lower": "css"}],
-			[{"lower": "css3"}]
-		]
-	],
-	"displaCy": [
-		"PRODUCT",
-		{},
-		[
-			[{"lower": "displacy"}]
-		]
-	],
-	"spaCy": [
-		"PRODUCT",
-		{},
-		[
-			[{"orth": "spaCy"}]
-		]
-	],
-
-    "HTML": [
-		"PRODUCT",
-		{},
-		[
-			[{"lower": "html"}],
-			[{"lower": "html5"}]
-		]
-	],
-    "Python": [
-        "PRODUCT",
-        {},
-        [
-            [{"orth": "Python"}]
-        ]
-    ],
-    "Ruby": [
-        "PRODUCT",
-        {},
-        [
-            [{"orth": "Ruby"}]
-        ]
-    ],
-    "Digg": [
-        "PRODUCT",
-        {},
-        [
-            [{"lower": "digg"}]
-        ]
-    ],
-     "FoxNews": [
-        "ORG",
-        {},
-        [
-            [{"orth": "Fox"}],
-            [{"orth": "News"}]
-        ]
-    ],
-    "Google": [
-        "ORG",
-        {},
-        [
-            [{"lower": "google"}]
-        ]
-    ],
-    "Mac": [
-        "PRODUCT",
-        {},
-        [
-            [{"lower": "mac"}]
-        ]
-    ],
-    "Wikipedia": [
-        "PRODUCT",
-        {},
-        [
-            [{"lower": "wikipedia"}]
-        ]
-    ],
-    "Windows": [
-        "PRODUCT",
-        {},
-        [
-            [{"orth": "Windows"}]
-        ]
-    ],
-     "Dell": [
-        "ORG",
-        {},
-        [
-            [{"lower": "dell"}]
-        ]
-    ],
-    "Facebook": [
-        "ORG",
-        {},
-        [
-            [{"lower": "facebook"}]
-        ]
-    ],
-     "Blizzard": [
-        "ORG",
-        {},
-        [
-            [{"orth": "Blizzard"}]
-        ]
-    ],
-    "Ubuntu": [
-        "ORG",
-        {},
-        [
-            [{"orth": "Ubuntu"}]
-        ]
-    ],
-    "Youtube": [
-        "PRODUCT",
-        {},
-        [
-            [{"lower": "youtube"}]
-        ]
-    ],
-    "false_positives": [
-        null,
-        {},
-        [
-            [{"orth": "Shit"}],
-            [{"orth": "Weed"}],
-            [{"orth": "Cool"}],
-            [{"orth": "Btw"}],
-            [{"orth": "Bah"}],
-            [{"orth": "Bullshit"}],
-            [{"orth": "Lol"}],
-            [{"orth": "Yo"}, {"lower": "dawg"}],
-            [{"orth": "Yay"}],
-            [{"orth": "Ahh"}],
-            [{"orth": "Yea"}],
-            [{"orth": "Bah"}]
-        ]
-    ]
-}
diff --git a/lang_data/zh/infix.txt b/lang_data/zh/infix.txt
deleted file mode 100644
index aa36da8e9..000000000
--- a/lang_data/zh/infix.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-\.\.\.
-(?<=[a-z])\.(?=[A-Z])
-(?<=[a-zA-Z])-(?=[a-zA-z])
-(?<=[a-zA-Z])--(?=[a-zA-z])
-(?<=[0-9])-(?=[0-9])
-(?<=[A-Za-z]),(?=[A-Za-z])
diff --git a/lang_data/zh/morphs.json b/lang_data/zh/morphs.json
deleted file mode 100644
index 0967ef424..000000000
--- a/lang_data/zh/morphs.json
+++ /dev/null
@@ -1 +0,0 @@
-{}
diff --git a/lang_data/zh/prefix.txt b/lang_data/zh/prefix.txt
deleted file mode 100644
index 48c4fc549..000000000
--- a/lang_data/zh/prefix.txt
+++ /dev/null
@@ -1,21 +0,0 @@
-,
-"
-(
-[
-{
-*
-<
-$
-£
-“
-'
-``
-`
-#
-US$
-C$
-A$
-a-
-‘
-....
-...
diff --git a/lang_data/zh/specials.json b/lang_data/zh/specials.json
deleted file mode 100644
index 0967ef424..000000000
--- a/lang_data/zh/specials.json
+++ /dev/null
@@ -1 +0,0 @@
-{}
diff --git a/lang_data/zh/suffix.txt b/lang_data/zh/suffix.txt
deleted file mode 100644
index d8c6bc2c2..000000000
--- a/lang_data/zh/suffix.txt
+++ /dev/null
@@ -1,26 +0,0 @@
-,
-\"
-\)
-\]
-\}
-\*
-\!
-\?
-%
-\$
->
-:
-;
-'
-”
-''
-'s
-'S
-’s
-’S
-’
-\.\.
-\.\.\.
-\.\.\.\.
-(?<=[a-z0-9)\]"'%\)])\.
-(?<=[0-9])km
diff --git a/lang_data/zh/tag_map.json b/lang_data/zh/tag_map.json
deleted file mode 100644
index afc0c722c..000000000
--- a/lang_data/zh/tag_map.json
+++ /dev/null
@@ -1,43 +0,0 @@
-{
-    "NR":   {"pos": "PROPN"},
-    "AD":   {"pos": "ADV"},
-    "NN":   {"pos": "NOUN"},
-    "CD":   {"pos": "NUM"},
-    "DEG":  {"pos": "PART"},
-    "PN":   {"pos": "PRON"},
-    "M":    {"pos": "PART"},
-    "JJ":   {"pos": "ADJ"},
-    "DEC":  {"pos": "PART"},
-    "NT":   {"pos": "NOUN"},
-    "DT":   {"pos": "DET"},
-    "LC":   {"pos": "PART"},
-    "CC":   {"pos": "CONJ"},
-    "AS":   {"pos": "PART"},
-    "SP":   {"pos": "PART"},
-    "IJ":   {"pos": "INTJ"},
-    "OD":   {"pos": "NUM"},
-    "MSP":  {"pos": "PART"},
-    "CS":   {"pos": "SCONJ"},
-    "ETC":  {"pos": "PART"},
-    "DEV":  {"pos": "PART"},
-    "BA":   {"pos": "AUX"},
-    "SB":   {"pos": "AUX"},
-    "DER":  {"pos": "PART"},
-    "LB":   {"pos": "AUX"},
-    "P":    {"pos": "ADP"},
-    "URL":  {"pos": "SYM"},
-    "FRAG": {"pos": "X"},
-    "X":    {"pos": "X"},
-    "ON":   {"pos": "X"},
-    "FW":   {"pos": "X"},
-    "VC":   {"pos": "VERB"},
-    "VV":   {"pos": "VERB"},
-    "VA":   {"pos": "VERB"},
-    "VE":   {"pos": "VERB"},
-    "PU":   {"pos": "PUNCT"},
-    "SP":   {"pos": "SPACE"},
-    "NP":   {"pos": "X"},
-    "_":    {"pos": "X"},
-    "VP":   {"pos": "X"},
-    "CHAR": {"pos": "X"}
-}