Remove old lang_data directory

This commit is contained in:
Ines Montani 2016-12-07 20:29:35 +01:00
parent 79dce0aabe
commit 4a1e206064
42 changed files with 0 additions and 9097 deletions

View File

@ -1,319 +0,0 @@
# surface form lemma pos
# multiple values are separated by |
# empty lines and lines starting with # are being ignored
'' ''
\") \")
\n \n <nl> SP
\t \t <tab> SP
<space> SP
# example: Wie geht's?
's 's es
'S 'S es
# example: Haste mal 'nen Euro?
'n 'n ein
'ne 'ne eine
'nen 'nen einen
# example: Kommen S nur herein!
s' s' sie
S' S' sie
# example: Da haben wir's!
ich's ich|'s ich|es
du's du|'s du|es
er's er|'s er|es
sie's sie|'s sie|es
wir's wir|'s wir|es
ihr's ihr|'s ihr|es
# example: Die katze auf'm dach.
auf'm auf|'m auf|dem
unter'm unter|'m unter|dem
über'm über|'m über|dem
vor'm vor|'m vor|dem
hinter'm hinter|'m hinter|dem
# persons
B.A. B.A.
B.Sc. B.Sc.
Dipl. Dipl.
Dipl.-Ing. Dipl.-Ing.
Dr. Dr.
Fr. Fr.
Frl. Frl.
Hr. Hr.
Hrn. Hrn.
Frl. Frl.
Prof. Prof.
St. St.
Hrgs. Hrgs.
Hg. Hg.
a.Z. a.Z.
a.D. a.D.
h.c. h.c.
Jr. Jr.
jr. jr.
jun. jun.
sen. sen.
rer. rer.
Ing. Ing.
M.A. M.A.
Mr. Mr.
M.Sc. M.Sc.
nat. nat.
phil. phil.
# companies
Co. Co.
co. co.
Cie. Cie.
A.G. A.G.
G.m.b.H. G.m.b.H.
i.G. i.G.
e.V. e.V.
# popular german abbreviations
Abb. Abb.
Abk. Abk.
Abs. Abs.
Abt. Abt.
abzgl. abzgl.
allg. allg.
a.M. a.M.
Bd. Bd.
betr. betr.
Betr. Betr.
Biol. Biol.
biol. biol.
Bf. Bf.
Bhf. Bhf.
Bsp. Bsp.
bspw. bspw.
bzgl. bzgl.
bzw. bzw.
d.h. d.h.
dgl. dgl.
ebd. ebd.
ehem. ehem.
eigtl. eigtl.
entspr. entspr.
erm. erm.
ev. ev.
evtl. evtl.
Fa. Fa.
Fam. Fam.
geb. geb.
Gebr. Gebr.
gem. gem.
ggf. ggf.
ggü. ggü.
ggfs. ggfs.
gegr. gegr.
Hbf. Hbf.
Hrsg. Hrsg.
hrsg. hrsg.
i.A. i.A.
i.d.R. i.d.R.
inkl. inkl.
insb. insb.
i.O. i.O.
i.Tr. i.Tr.
i.V. i.V.
jur. jur.
kath. kath.
K.O. K.O.
lt. lt.
max. max.
m.E. m.E.
m.M. m.M.
mtl. mtl.
min. min.
mind. mind.
MwSt. MwSt.
Nr. Nr.
o.a. o.a.
o.ä. o.ä.
o.Ä. o.Ä.
o.g. o.g.
o.k. o.k.
O.K. O.K.
Orig. Orig.
orig. orig.
pers. pers.
Pkt. Pkt.
Red. Red.
röm. röm.
s.o. s.o.
sog. sog.
std. std.
stellv. stellv.
Str. Str.
tägl. tägl.
Tel. Tel.
u.a. u.a.
usf. usf.
u.s.w. u.s.w.
usw. usw.
u.U. u.U.
u.v.m. u.v.m.
uvm. uvm.
v.a. v.a.
vgl. vgl.
vllt. vllt.
v.l.n.r. v.l.n.r.
vlt. vlt.
Vol. Vol.
wiss. wiss.
Univ. Univ.
z.B. z.B.
z.b. z.b.
z.Bsp. z.Bsp.
z.T. z.T.
z.Z. z.Z.
zzgl. zzgl.
z.Zt. z.Zt.
# popular latin abbreviations
vs. vs.
adv. adv.
Chr. Chr.
A.C. A.C.
A.D. A.D.
e.g. e.g.
i.e. i.e.
al. al.
p.a. p.a.
P.S. P.S.
q.e.d. q.e.d.
R.I.P. R.I.P.
etc. etc.
incl. incl.
ca. ca.
n.Chr. n.Chr.
p.s. p.s.
v.Chr. v.Chr.
# popular english abbreviations
D.C. D.C.
N.Y. N.Y.
N.Y.C. N.Y.C.
U.S. U.S.
U.S.A. U.S.A.
L.A. L.A.
U.S.S. U.S.S.
# dates & time
Jan. Jan.
Feb. Feb.
Mrz. Mrz.
Mär. Mär.
Apr. Apr.
Jun. Jun.
Jul. Jul.
Aug. Aug.
Sep. Sep.
Sept. Sept.
Okt. Okt.
Nov. Nov.
Dez. Dez.
Mo. Mo.
Di. Di.
Mi. Mi.
Do. Do.
Fr. Fr.
Sa. Sa.
So. So.
Std. Std.
Jh. Jh.
Jhd. Jhd.
# numbers
Tsd. Tsd.
Mio. Mio.
Mrd. Mrd.
# countries & languages
engl. engl.
frz. frz.
lat. lat.
österr. österr.
# smileys
:) :)
<3 <3
;) ;)
(: (:
:( :(
-_- -_-
=) =)
:/ :/
:> :>
;-) ;-)
:Y :Y
:P :P
:-P :-P
:3 :3
=3 =3
xD xD
^_^ ^_^
=] =]
=D =D
<333 <333
:)) :))
:0 :0
-__- -__-
xDD xDD
o_o o_o
o_O o_O
V_V V_V
=[[ =[[
<33 <33
;p ;p
;D ;D
;-p ;-p
;( ;(
:p :p
:] :]
:O :O
:-/ :-/
:-) :-)
:((( :(((
:(( :((
:') :')
(^_^) (^_^)
(= (=
o.O o.O
# single letters
a. a.
b. b.
c. c.
d. d.
e. e.
f. f.
g. g.
h. h.
i. i.
j. j.
k. k.
l. l.
m. m.
n. n.
o. o.
p. p.
q. q.
r. r.
s. s.
t. t.
u. u.
v. v.
w. w.
x. x.
y. y.
z. z.
ä. ä.
ö. ö.
ü. ü.

View File

@ -1,194 +0,0 @@
{
"Reddit": [
"PRODUCT",
{},
[
[{"lower": "reddit"}]
]
],
"SeptemberElevenAttacks": [
"EVENT",
{},
[
[
{"orth": "9/11"}
],
[
{"lower": "september"},
{"orth": "11"}
]
]
],
"Linux": [
"PRODUCT",
{},
[
[{"lower": "linux"}]
]
],
"Haskell": [
"PRODUCT",
{},
[
[{"lower": "haskell"}]
]
],
"HaskellCurry": [
"PERSON",
{},
[
[
{"lower": "haskell"},
{"lower": "curry"}
]
]
],
"Javascript": [
"PRODUCT",
{},
[
[{"lower": "javascript"}]
]
],
"CSS": [
"PRODUCT",
{},
[
[{"lower": "css"}],
[{"lower": "css3"}]
]
],
"displaCy": [
"PRODUCT",
{},
[
[{"lower": "displacy"}]
]
],
"spaCy": [
"PRODUCT",
{},
[
[{"orth": "spaCy"}]
]
],
"HTML": [
"PRODUCT",
{},
[
[{"lower": "html"}],
[{"lower": "html5"}]
]
],
"Python": [
"PRODUCT",
{},
[
[{"orth": "Python"}]
]
],
"Ruby": [
"PRODUCT",
{},
[
[{"orth": "Ruby"}]
]
],
"Digg": [
"PRODUCT",
{},
[
[{"lower": "digg"}]
]
],
"FoxNews": [
"ORG",
{},
[
[{"orth": "Fox"}],
[{"orth": "News"}]
]
],
"Google": [
"ORG",
{},
[
[{"lower": "google"}]
]
],
"Mac": [
"PRODUCT",
{},
[
[{"lower": "mac"}]
]
],
"Wikipedia": [
"PRODUCT",
{},
[
[{"lower": "wikipedia"}]
]
],
"Windows": [
"PRODUCT",
{},
[
[{"orth": "Windows"}]
]
],
"Dell": [
"ORG",
{},
[
[{"lower": "dell"}]
]
],
"Facebook": [
"ORG",
{},
[
[{"lower": "facebook"}]
]
],
"Blizzard": [
"ORG",
{},
[
[{"orth": "Blizzard"}]
]
],
"Ubuntu": [
"ORG",
{},
[
[{"orth": "Ubuntu"}]
]
],
"Youtube": [
"PRODUCT",
{},
[
[{"lower": "youtube"}]
]
],
"false_positives": [
null,
{},
[
[{"orth": "Shit"}],
[{"orth": "Weed"}],
[{"orth": "Cool"}],
[{"orth": "Btw"}],
[{"orth": "Bah"}],
[{"orth": "Bullshit"}],
[{"orth": "Lol"}],
[{"orth": "Yo"}, {"lower": "dawg"}],
[{"orth": "Yay"}],
[{"orth": "Ahh"}],
[{"orth": "Yea"}],
[{"orth": "Bah"}]
]
]
}

View File

@ -1,334 +0,0 @@
# coding=utf8
import json
import io
import itertools
contractions = {}
# contains the lemmas, parts of speech, number, and tenspect of
# potential tokens generated after splitting contractions off
token_properties = {}
# contains starting tokens with their potential contractions
# each potential contraction has a list of exceptions
# lower - don't generate the lowercase version
# upper - don't generate the uppercase version
# contrLower - don't generate the lowercase version with apostrophe (') removed
# contrUpper - dont' generate the uppercase version with apostrophe (') removed
# for example, we don't want to create the word "hell" or "Hell" from "he" + "'ll" so
# we add "contrLower" and "contrUpper" to the exceptions list
starting_tokens = {}
# other specials that don't really have contractions
# so they are hardcoded
hardcoded_specials = {
"''": [{"F": "''"}],
"\")": [{"F": "\")"}],
"\n": [{"F": "\n", "pos": "SP"}],
"\t": [{"F": "\t", "pos": "SP"}],
" ": [{"F": " ", "pos": "SP"}],
# example: Wie geht's?
"'s": [{"F": "'s", "L": "es"}],
"'S": [{"F": "'S", "L": "es"}],
# example: Haste mal 'nen Euro?
"'n": [{"F": "'n", "L": "ein"}],
"'ne": [{"F": "'ne", "L": "eine"}],
"'nen": [{"F": "'nen", "L": "einen"}],
# example: Kommen S nur herein!
"s'": [{"F": "s'", "L": "sie"}],
"S'": [{"F": "S'", "L": "sie"}],
# example: Da haben wir's!
"ich's": [{"F": "ich"}, {"F": "'s", "L": "es"}],
"du's": [{"F": "du"}, {"F": "'s", "L": "es"}],
"er's": [{"F": "er"}, {"F": "'s", "L": "es"}],
"sie's": [{"F": "sie"}, {"F": "'s", "L": "es"}],
"wir's": [{"F": "wir"}, {"F": "'s", "L": "es"}],
"ihr's": [{"F": "ihr"}, {"F": "'s", "L": "es"}],
# example: Die katze auf'm dach.
"auf'm": [{"F": "auf"}, {"F": "'m", "L": "dem"}],
"unter'm": [{"F": "unter"}, {"F": "'m", "L": "dem"}],
"über'm": [{"F": "über"}, {"F": "'m", "L": "dem"}],
"vor'm": [{"F": "vor"}, {"F": "'m", "L": "dem"}],
"hinter'm": [{"F": "hinter"}, {"F": "'m", "L": "dem"}],
# persons
"Fr.": [{"F": "Fr."}],
"Hr.": [{"F": "Hr."}],
"Frl.": [{"F": "Frl."}],
"Prof.": [{"F": "Prof."}],
"Dr.": [{"F": "Dr."}],
"St.": [{"F": "St."}],
"Hrgs.": [{"F": "Hrgs."}],
"Hg.": [{"F": "Hg."}],
"a.Z.": [{"F": "a.Z."}],
"a.D.": [{"F": "a.D."}],
"A.D.": [{"F": "A.D."}],
"h.c.": [{"F": "h.c."}],
"jun.": [{"F": "jun."}],
"sen.": [{"F": "sen."}],
"rer.": [{"F": "rer."}],
"Dipl.": [{"F": "Dipl."}],
"Ing.": [{"F": "Ing."}],
"Dipl.-Ing.": [{"F": "Dipl.-Ing."}],
# companies
"Co.": [{"F": "Co."}],
"co.": [{"F": "co."}],
"Cie.": [{"F": "Cie."}],
"A.G.": [{"F": "A.G."}],
"G.m.b.H.": [{"F": "G.m.b.H."}],
"i.G.": [{"F": "i.G."}],
"e.V.": [{"F": "e.V."}],
# popular german abbreviations
"ggü.": [{"F": "ggü."}],
"ggf.": [{"F": "ggf."}],
"ggfs.": [{"F": "ggfs."}],
"Gebr.": [{"F": "Gebr."}],
"geb.": [{"F": "geb."}],
"gegr.": [{"F": "gegr."}],
"erm.": [{"F": "erm."}],
"engl.": [{"F": "engl."}],
"ehem.": [{"F": "ehem."}],
"Biol.": [{"F": "Biol."}],
"biol.": [{"F": "biol."}],
"Abk.": [{"F": "Abk."}],
"Abb.": [{"F": "Abb."}],
"abzgl.": [{"F": "abzgl."}],
"Hbf.": [{"F": "Hbf."}],
"Bhf.": [{"F": "Bhf."}],
"Bf.": [{"F": "Bf."}],
"i.V.": [{"F": "i.V."}],
"inkl.": [{"F": "inkl."}],
"insb.": [{"F": "insb."}],
"z.B.": [{"F": "z.B."}],
"i.Tr.": [{"F": "i.Tr."}],
"Jhd.": [{"F": "Jhd."}],
"jur.": [{"F": "jur."}],
"lt.": [{"F": "lt."}],
"nat.": [{"F": "nat."}],
"u.a.": [{"F": "u.a."}],
"u.s.w.": [{"F": "u.s.w."}],
"Nr.": [{"F": "Nr."}],
"Univ.": [{"F": "Univ."}],
"vgl.": [{"F": "vgl."}],
"zzgl.": [{"F": "zzgl."}],
"z.Z.": [{"F": "z.Z."}],
"betr.": [{"F": "betr."}],
"ehem.": [{"F": "ehem."}],
# popular latin abbreviations
"vs.": [{"F": "vs."}],
"adv.": [{"F": "adv."}],
"Chr.": [{"F": "Chr."}],
"A.C.": [{"F": "A.C."}],
"A.D.": [{"F": "A.D."}],
"e.g.": [{"F": "e.g."}],
"i.e.": [{"F": "i.e."}],
"al.": [{"F": "al."}],
"p.a.": [{"F": "p.a."}],
"P.S.": [{"F": "P.S."}],
"q.e.d.": [{"F": "q.e.d."}],
"R.I.P.": [{"F": "R.I.P."}],
"etc.": [{"F": "etc."}],
"incl.": [{"F": "incl."}],
# popular english abbreviations
"D.C.": [{"F": "D.C."}],
"N.Y.": [{"F": "N.Y."}],
"N.Y.C.": [{"F": "N.Y.C."}],
# dates
"Jan.": [{"F": "Jan."}],
"Feb.": [{"F": "Feb."}],
"Mrz.": [{"F": "Mrz."}],
"Mär.": [{"F": "Mär."}],
"Apr.": [{"F": "Apr."}],
"Jun.": [{"F": "Jun."}],
"Jul.": [{"F": "Jul."}],
"Aug.": [{"F": "Aug."}],
"Sep.": [{"F": "Sep."}],
"Sept.": [{"F": "Sept."}],
"Okt.": [{"F": "Okt."}],
"Nov.": [{"F": "Nov."}],
"Dez.": [{"F": "Dez."}],
"Mo.": [{"F": "Mo."}],
"Di.": [{"F": "Di."}],
"Mi.": [{"F": "Mi."}],
"Do.": [{"F": "Do."}],
"Fr.": [{"F": "Fr."}],
"Sa.": [{"F": "Sa."}],
"So.": [{"F": "So."}],
# smileys
":)": [{"F": ":)"}],
"<3": [{"F": "<3"}],
";)": [{"F": ";)"}],
"(:": [{"F": "(:"}],
":(": [{"F": ":("}],
"-_-": [{"F": "-_-"}],
"=)": [{"F": "=)"}],
":/": [{"F": ":/"}],
":>": [{"F": ":>"}],
";-)": [{"F": ";-)"}],
":Y": [{"F": ":Y"}],
":P": [{"F": ":P"}],
":-P": [{"F": ":-P"}],
":3": [{"F": ":3"}],
"=3": [{"F": "=3"}],
"xD": [{"F": "xD"}],
"^_^": [{"F": "^_^"}],
"=]": [{"F": "=]"}],
"=D": [{"F": "=D"}],
"<333": [{"F": "<333"}],
":))": [{"F": ":))"}],
":0": [{"F": ":0"}],
"-__-": [{"F": "-__-"}],
"xDD": [{"F": "xDD"}],
"o_o": [{"F": "o_o"}],
"o_O": [{"F": "o_O"}],
"V_V": [{"F": "V_V"}],
"=[[": [{"F": "=[["}],
"<33": [{"F": "<33"}],
";p": [{"F": ";p"}],
";D": [{"F": ";D"}],
";-p": [{"F": ";-p"}],
";(": [{"F": ";("}],
":p": [{"F": ":p"}],
":]": [{"F": ":]"}],
":O": [{"F": ":O"}],
":-/": [{"F": ":-/"}],
":-)": [{"F": ":-)"}],
":(((": [{"F": ":((("}],
":((": [{"F": ":(("}],
":')": [{"F": ":')"}],
"(^_^)": [{"F": "(^_^)"}],
"(=": [{"F": "(="}],
"o.O": [{"F": "o.O"}],
"a.": [{"F": "a."}],
"b.": [{"F": "b."}],
"c.": [{"F": "c."}],
"d.": [{"F": "d."}],
"e.": [{"F": "e."}],
"f.": [{"F": "f."}],
"g.": [{"F": "g."}],
"h.": [{"F": "h."}],
"i.": [{"F": "i."}],
"j.": [{"F": "j."}],
"k.": [{"F": "k."}],
"l.": [{"F": "l."}],
"m.": [{"F": "m."}],
"n.": [{"F": "n."}],
"o.": [{"F": "o."}],
"p.": [{"F": "p."}],
"q.": [{"F": "q."}],
"r.": [{"F": "r."}],
"s.": [{"F": "s."}],
"t.": [{"F": "t."}],
"u.": [{"F": "u."}],
"v.": [{"F": "v."}],
"w.": [{"F": "w."}],
"x.": [{"F": "x."}],
"y.": [{"F": "y."}],
"z.": [{"F": "z."}],
}
def get_double_contractions(ending):
endings = []
ends_with_contraction = any([ending.endswith(contraction) for contraction in contractions])
while ends_with_contraction:
for contraction in contractions:
if ending.endswith(contraction):
endings.append(contraction)
ending = ending.rstrip(contraction)
ends_with_contraction = any([ending.endswith(contraction) for contraction in contractions])
endings.reverse() # reverse because the last ending is put in the list first
return endings
def get_token_properties(token, capitalize=False, remove_contractions=False):
props = dict(token_properties.get(token)) # ensure we copy the dict so we can add the "F" prop
if capitalize:
token = token.capitalize()
if remove_contractions:
token = token.replace("'", "")
props["F"] = token
return props
def create_entry(token, endings, capitalize=False, remove_contractions=False):
properties = []
properties.append(get_token_properties(token, capitalize=capitalize, remove_contractions=remove_contractions))
for e in endings:
properties.append(get_token_properties(e, remove_contractions=remove_contractions))
return properties
FIELDNAMES = ['F','L','pos']
def read_hardcoded(stream):
hc_specials = {}
for line in stream:
line = line.strip()
if line.startswith('#') or not line:
continue
key,_,rest = line.partition('\t')
values = []
for annotation in zip(*[ e.split('|') for e in rest.split('\t') ]):
values.append({ k:v for k,v in itertools.izip_longest(FIELDNAMES,annotation) if v })
hc_specials[key] = values
return hc_specials
def generate_specials():
specials = {}
for token in starting_tokens:
possible_endings = starting_tokens[token]
for ending in possible_endings:
endings = []
if ending.count("'") > 1:
endings.extend(get_double_contractions(ending))
else:
endings.append(ending)
exceptions = possible_endings[ending]
if "lower" not in exceptions:
special = token + ending
specials[special] = create_entry(token, endings)
if "upper" not in exceptions:
special = token.capitalize() + ending
specials[special] = create_entry(token, endings, capitalize=True)
if "contrLower" not in exceptions:
special = token + ending.replace("'", "")
specials[special] = create_entry(token, endings, remove_contractions=True)
if "contrUpper" not in exceptions:
special = token.capitalize() + ending.replace("'", "")
specials[special] = create_entry(token, endings, capitalize=True, remove_contractions=True)
# add in hardcoded specials
# changed it so it generates them from a file
with io.open('abbrev.de.tab','r',encoding='utf8') as abbrev_:
hc_specials = read_hardcoded(abbrev_)
specials = dict(specials, **hc_specials)
return specials
if __name__ == "__main__":
specials = generate_specials()
with open("specials.json", "w") as f:
json.dump(specials, f, sort_keys=True, indent=4, separators=(',', ': '))

View File

@ -1,6 +0,0 @@
\.\.\.
(?<=[a-z])\.(?=[A-Z])
(?<=[a-zöäüßA-ZÖÄÜ"]):(?=[a-zöäüßA-ZÖÄÜ])
(?<=[a-zöäüßA-ZÖÄÜ"])>(?=[a-zöäüßA-ZÖÄÜ])
(?<=[a-zöäüßA-ZÖÄÜ"])<(?=[a-zöäüßA-ZÖÄÜ])
(?<=[a-zöäüßA-ZÖÄÜ"])=(?=[a-zöäüßA-ZÖÄÜ])

View File

@ -1 +0,0 @@
{}

View File

@ -1,71 +0,0 @@
{
"PRP": {
"ich": {"L": "-PRON-", "person": 1, "number": 1, "gender": 0, "case": 1},
"meiner": {"L": "-PRON-", "person": 1, "number": 1, "gender": 0, "case": 2},
"mir": {"L": "-PRON-", "person": 1, "number": 1, "gender": 0, "case": 3},
"mich": {"L": "-PRON-", "person": 1, "number": 1, "gender": 0, "case": 4},
"du": {"L": "-PRON-", "person": 2, "number": 0, "gender": 0, "case": 1},
"deiner": {"L": "-PRON-", "person": 2, "number": 0, "gender": 0, "case": 2},
"dir": {"L": "-PRON-", "person": 2, "number": 0, "gender": 0, "case": 3},
"dich": {"L": "-PRON-", "person": 2, "number": 0, "gender": 0, "case": 4},
"er": {"L": "-PRON-", "person": 3, "number": 0, "gender": 1, "case": 1},
"seiner": {"L": "-PRON-", "person": 3, "number": 0, "gender": 1, "case": 2},
"ihm": {"L": "-PRON-", "person": 3, "number": 0, "gender": 1, "case": 3},
"ihn": {"L": "-PRON-", "person": 3, "number": 0, "gender": 1, "case": 4},
"sie": {"L": "-PRON-", "person": 3, "number": 0, "gender": 2, "case": 1},
"ihrer": {"L": "-PRON-", "person": 3, "number": 0, "gender": 2, "case": 2},
"ihr": {"L": "-PRON-", "person": 3, "number": 0, "gender": 2, "case": 3},
"sie": {"L": "-PRON-", "person": 3, "number": 0, "gender": 2, "case": 4},
"es": {"L": "-PRON-", "person": 3, "number": 0, "gender": 3, "case": 1},
"seiner": {"L": "-PRON-", "person": 3, "number": 0, "gender": 3, "case": 2},
"ihm": {"L": "-PRON-", "person": 3, "number": 0, "gender": 3, "case": 3},
"es": {"L": "-PRON-", "person": 3, "number": 0, "gender": 3, "case": 4},
"wir": {"L": "-PRON-", "person": 1, "number": 0, "gender": 0, "case": 1},
"unser": {"L": "-PRON-", "person": 1, "number": 0, "gender": 0, "case": 2},
"uns": {"L": "-PRON-", "person": 1, "number": 0, "gender": 0, "case": 3},
"uns": {"L": "-PRON-", "person": 1, "number": 0, "gender": 0, "case": 4},
"ihr": {"L": "-PRON-", "person": 2, "number": 0, "gender": 0, "case": 1},
"euer": {"L": "-PRON-", "person": 2, "number": 0, "gender": 0, "case": 2},
"euch": {"L": "-PRON-", "person": 2, "number": 0, "gender": 0, "case": 3},
"euch": {"L": "-PRON-", "person": 2, "number": 0, "gender": 0, "case": 4},
"sie": {"L": "-PRON-", "person": 3, "number": 0, "gender": 0, "case": 1},
"ihrer": {"L": "-PRON-", "person": 3, "number": 0, "gender": 0, "case": 2},
"ihnen": {"L": "-PRON-", "person": 3, "number": 0, "gender": 0, "case": 3},
"sie": {"L": "-PRON-", "person": 3, "number": 0, "gender": 0, "case": 4}
},
"PRP$": {
"mein": {"L": "-PRON-", "person": 1, "number": 0, "gender": 0, "case": 1},
"meines": {"L": "-PRON-", "person": 1, "number": 0, "gender": 0, "case": 2},
"meinem": {"L": "-PRON-", "person": 1, "number": 0, "gender": 0, "case": 3},
"meinen": {"L": "-PRON-", "person": 1, "number": 0, "gender": 0, "case": 4},
"dein": {"L": "-PRON-", "person": 2, "number": 0, "gender": 0, "case": 1},
"deines": {"L": "-PRON-", "person": 2, "number": 0, "gender": 0, "case": 2},
"deinem": {"L": "-PRON-", "person": 2, "number": 0, "gender": 0, "case": 3},
"deinen": {"L": "-PRON-", "person": 2, "number": 0, "gender": 0, "case": 4},
"sein": {"L": "-PRON-", "person": 3, "number": 0, "gender": 1, "case": 1},
"seines": {"L": "-PRON-", "person": 3, "number": 0, "gender": 1, "case": 2},
"seinem": {"L": "-PRON-", "person": 3, "number": 0, "gender": 1, "case": 3},
"seinen": {"L": "-PRON-", "person": 3, "number": 0, "gender": 1, "case": 4},
"ihr": {"L": "-PRON-", "person": 3, "number": 0, "gender": 2, "case": 1},
"ihrer": {"L": "-PRON-", "person": 3, "number": 0, "gender": 2, "case": 2},
"ihrem": {"L": "-PRON-", "person": 3, "number": 0, "gender": 2, "case": 3},
"ihren": {"L": "-PRON-", "person": 3, "number": 0, "gender": 2, "case": 4},
"sein": {"L": "-PRON-", "person": 3, "number": 0, "gender": 3, "case": 1},
"seines": {"L": "-PRON-", "person": 3, "number": 0, "gender": 3, "case": 2},
"seinem": {"L": "-PRON-", "person": 3, "number": 0, "gender": 3, "case": 3},
"seinen": {"L": "-PRON-", "person": 3, "number": 0, "gender": 3, "case": 4},
"unser": {"L": "-PRON-", "person": 1, "number": 0, "gender": 0, "case": 1},
"unseres": {"L": "-PRON-", "person": 1, "number": 0, "gender": 0, "case": 2},
"unserem": {"L": "-PRON-", "person": 1, "number": 0, "gender": 0, "case": 3},
"unseren": {"L": "-PRON-", "person": 1, "number": 0, "gender": 0, "case": 4},
"euer": {"L": "-PRON-", "person": 2, "number": 0, "gender": 0, "case": 1},
"eures": {"L": "-PRON-", "person": 2, "number": 0, "gender": 0, "case": 2},
"eurem": {"L": "-PRON-", "person": 2, "number": 0, "gender": 0, "case": 3},
"euren": {"L": "-PRON-", "person": 2, "number": 0, "gender": 0, "case": 4},
"ihr": {"L": "-PRON-", "person": 3, "number": 0, "gender": 0, "case": 1},
"ihres": {"L": "-PRON-", "person": 3, "number": 0, "gender": 0, "case": 2},
"ihrem": {"L": "-PRON-", "person": 3, "number": 0, "gender": 0, "case": 3},
"ihren": {"L": "-PRON-", "person": 3, "number": 0, "gender": 0, "case": 4}
}
}

View File

@ -1,27 +0,0 @@
,
"
(
[
{
*
<
>
$
£
'
``
`
#
US$
C$
A$
a-
....
...
»
_
§

View File

@ -1,3 +0,0 @@
Biografie: Ein Spiel ist ein Theaterstück des Schweizer Schriftstellers Max Frisch, das 1967 entstand und am 1. Februar 1968 im Schauspielhaus Zürich uraufgeführt wurde. 1984 legte Frisch eine überarbeitete Neufassung vor. Das von Frisch als Komödie bezeichnete Stück greift eines seiner zentralen Themen auf: die Möglichkeit oder Unmöglichkeit des Menschen, seine Identität zu verändern.
Mit Biografie: Ein Spiel wandte sich Frisch von der Parabelform seiner Erfolgsstücke Biedermann und die Brandstifter und Andorra ab und postulierte eine „Dramaturgie der Permutation“. Darin sollte nicht, wie im klassischen Theater, Sinn und Schicksal im Mittelpunkt stehen, sondern die Zufälligkeit von Ereignissen und die Möglichkeit ihrer Variation. Dennoch handelt Biografie: Ein Spiel gerade von der Unmöglichkeit seines Protagonisten, seinen Lebenslauf grundlegend zu verändern. Frisch empfand die Wirkung des Stücks im Nachhinein als zu fatalistisch und die Umsetzung seiner theoretischen Absichten als nicht geglückt. Obwohl das Stück 1968 als unpolitisch und nicht zeitgemäß kritisiert wurde und auch später eine geteilte Rezeption erfuhr, gehört es an deutschsprachigen Bühnen zu den häufiger aufgeführten Stücken Frischs.

File diff suppressed because it is too large Load Diff

View File

@ -1,73 +0,0 @@
,
\"
\)
\]
\}
\*
\!
\?
%
\$
>
:
;
'
«
_
''
's
'S
s
S
°
\.\.
\.\.\.
\.\.\.\.
(?<=[a-zäöüßÖÄÜ)\]"'´«‘’%\)²“”])\.
\-\-
´
(?<=[0-9])km²
(?<=[0-9])m²
(?<=[0-9])cm²
(?<=[0-9])mm²
(?<=[0-9])km³
(?<=[0-9])m³
(?<=[0-9])cm³
(?<=[0-9])mm³
(?<=[0-9])ha
(?<=[0-9])km
(?<=[0-9])m
(?<=[0-9])cm
(?<=[0-9])mm
(?<=[0-9])µm
(?<=[0-9])nm
(?<=[0-9])yd
(?<=[0-9])in
(?<=[0-9])ft
(?<=[0-9])kg
(?<=[0-9])g
(?<=[0-9])mg
(?<=[0-9])µg
(?<=[0-9])t
(?<=[0-9])lb
(?<=[0-9])oz
(?<=[0-9])m/s
(?<=[0-9])km/h
(?<=[0-9])mph
(?<=[0-9])°C
(?<=[0-9])°K
(?<=[0-9])°F
(?<=[0-9])hPa
(?<=[0-9])Pa
(?<=[0-9])mbar
(?<=[0-9])mb
(?<=[0-9])T
(?<=[0-9])G
(?<=[0-9])M
(?<=[0-9])K
(?<=[0-9])kb

View File

@ -1,59 +0,0 @@
{
"$(": {"pos": "PUNCT", "PunctType": "Brck"},
"$,": {"pos": "PUNCT", "PunctType": "Comm"},
"$.": {"pos": "PUNCT", "PunctType": "Peri"},
"ADJA": {"pos": "ADJ"},
"ADJD": {"pos": "ADJ", "Variant": "Short"},
"ADV": {"pos": "ADV"},
"APPO": {"pos": "ADP", "AdpType": "Post"},
"APPR": {"pos": "ADP", "AdpType": "Prep"},
"APPRART": {"pos": "ADP", "AdpType": "Prep", "PronType": "Art"},
"APZR": {"pos": "ADP", "AdpType": "Circ"},
"ART": {"pos": "DET", "PronType": "Art"},
"CARD": {"pos": "NUM", "NumType": "Card"},
"FM": {"pos": "X", "Foreign": "Yes"},
"ITJ": {"pos": "INTJ"},
"KOKOM": {"pos": "CONJ", "ConjType": "Comp"},
"KON": {"pos": "CONJ"},
"KOUI": {"pos": "SCONJ"},
"KOUS": {"pos": "SCONJ"},
"NE": {"pos": "PROPN"},
"NNE": {"pos": "PROPN"},
"NN": {"pos": "NOUN"},
"PAV": {"pos": "ADV", "PronType": "Dem"},
"PROAV": {"pos": "ADV", "PronType": "Dem"},
"PDAT": {"pos": "DET", "PronType": "Dem"},
"PDS": {"pos": "PRON", "PronType": "Dem"},
"PIAT": {"pos": "DET", "PronType": "Ind,Neg,Tot"},
"PIDAT": {"pos": "DET", "AdjType": "Pdt", "PronType": "Ind,Neg,Tot"},
"PIS": {"pos": "PRON", "PronType": "Ind,Neg,Tot"},
"PPER": {"pos": "PRON", "PronType": "Prs"},
"PPOSAT": {"pos": "DET", "Poss": "Yes", "PronType": "Prs"},
"PPOSS": {"pos": "PRON", "Poss": "Yes", "PronType": "Prs"},
"PRELAT": {"pos": "DET", "PronType": "Rel"},
"PRELS": {"pos": "PRON", "PronType": "Rel"},
"PRF": {"pos": "PRON", "PronType": "Prs", "Reflex": "Yes"},
"PTKA": {"pos": "PART"},
"PTKANT": {"pos": "PART", "PartType": "Res"},
"PTKNEG": {"pos": "PART", "Negative": "Neg"},
"PTKVZ": {"pos": "PART", "PartType": "Vbp"},
"PTKZU": {"pos": "PART", "PartType": "Inf"},
"PWAT": {"pos": "DET", "PronType": "Int"},
"PWAV": {"pos": "ADV", "PronType": "Int"},
"PWS": {"pos": "PRON", "PronType": "Int"},
"TRUNC": {"pos": "X", "Hyph": "Yes"},
"VAFIN": {"pos": "AUX", "Mood": "Ind", "VerbForm": "Fin"},
"VAIMP": {"pos": "AUX", "Mood": "Imp", "VerbForm": "Fin"},
"VAINF": {"pos": "AUX", "VerbForm": "Inf"},
"VAPP": {"pos": "AUX", "Aspect": "Perf", "VerbForm": "Part"},
"VMFIN": {"pos": "VERB", "Mood": "Ind", "VerbForm": "Fin", "VerbType": "Mod"},
"VMINF": {"pos": "VERB", "VerbForm": "Inf", "VerbType": "Mod"},
"VMPP": {"pos": "VERB", "Aspect": "Perf", "VerbForm": "Part", "VerbType": "Mod"},
"VVFIN": {"pos": "VERB", "Mood": "Ind", "VerbForm": "Fin"},
"VVIMP": {"pos": "VERB", "Mood": "Imp", "VerbForm": "Fin"},
"VVINF": {"pos": "VERB", "VerbForm": "Inf"},
"VVIZU": {"pos": "VERB", "VerbForm": "Inf"},
"VVPP": {"pos": "VERB", "Aspect": "Perf", "VerbForm": "Part"},
"XY": {"pos": "X"},
"SP": {"pos": "SPACE"}
}

View File

@ -1,20 +0,0 @@
WordNet Release 3.0 This software and database is being provided to you, the
LICENSEE, by Princeton University under the following license. By obtaining,
using and/or copying this software and database, you agree that you have read,
understood, and will comply with these terms and conditions.: Permission to
use, copy, modify and distribute this software and database and its
documentation for any purpose and without fee or royalty is hereby granted,
provided that you agree to comply with the following copyright notice and
statements, including the disclaimer, and that the same appear on ALL copies of
the software, database and documentation, including modifications that you make for internal use or for distribution. WordNet 3.0 Copyright 2006 by Princeton
University. All rights reserved. THIS SOFTWARE AND DATABASE IS PROVIDED "AS IS"
AND PRINCETON UNIVERSITY MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PRINCETON UNIVERSITY MAKES NO
REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE, DATABASE OR
DOCUMENTATION WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS
OR OTHER RIGHTS. The name of Princeton University or Princeton may not be used
in advertising or publicity pertaining to distribution of the software and/or
database. Title to copyright in this software, database and any associated
documentation shall at all times remain with Princeton University and LICENSEE
agrees to preserve same.

View File

@ -1,194 +0,0 @@
{
"Reddit": [
"PRODUCT",
{},
[
[{"lower": "reddit"}]
]
],
"SeptemberElevenAttacks": [
"EVENT",
{},
[
[
{"orth": "9/11"}
],
[
{"lower": "september"},
{"orth": "11"}
]
]
],
"Linux": [
"PRODUCT",
{},
[
[{"lower": "linux"}]
]
],
"Haskell": [
"PRODUCT",
{},
[
[{"lower": "haskell"}]
]
],
"HaskellCurry": [
"PERSON",
{},
[
[
{"lower": "haskell"},
{"lower": "curry"}
]
]
],
"Javascript": [
"PRODUCT",
{},
[
[{"lower": "javascript"}]
]
],
"CSS": [
"PRODUCT",
{},
[
[{"lower": "css"}],
[{"lower": "css3"}]
]
],
"displaCy": [
"PRODUCT",
{},
[
[{"lower": "displacy"}]
]
],
"spaCy": [
"PRODUCT",
{},
[
[{"orth": "spaCy"}]
]
],
"HTML": [
"PRODUCT",
{},
[
[{"lower": "html"}],
[{"lower": "html5"}]
]
],
"Python": [
"PRODUCT",
{},
[
[{"orth": "Python"}]
]
],
"Ruby": [
"PRODUCT",
{},
[
[{"orth": "Ruby"}]
]
],
"Digg": [
"PRODUCT",
{},
[
[{"lower": "digg"}]
]
],
"FoxNews": [
"ORG",
{},
[
[{"orth": "Fox"}],
[{"orth": "News"}]
]
],
"Google": [
"ORG",
{},
[
[{"lower": "google"}]
]
],
"Mac": [
"PRODUCT",
{},
[
[{"lower": "mac"}]
]
],
"Wikipedia": [
"PRODUCT",
{},
[
[{"lower": "wikipedia"}]
]
],
"Windows": [
"PRODUCT",
{},
[
[{"orth": "Windows"}]
]
],
"Dell": [
"ORG",
{},
[
[{"lower": "dell"}]
]
],
"Facebook": [
"ORG",
{},
[
[{"lower": "facebook"}]
]
],
"Blizzard": [
"ORG",
{},
[
[{"orth": "Blizzard"}]
]
],
"Ubuntu": [
"ORG",
{},
[
[{"orth": "Ubuntu"}]
]
],
"Youtube": [
"PRODUCT",
{},
[
[{"lower": "youtube"}]
]
],
"false_positives": [
null,
{},
[
[{"orth": "Shit"}],
[{"orth": "Weed"}],
[{"orth": "Cool"}],
[{"orth": "Btw"}],
[{"orth": "Bah"}],
[{"orth": "Bullshit"}],
[{"orth": "Lol"}],
[{"orth": "Yo"}, {"lower": "dawg"}],
[{"orth": "Yay"}],
[{"orth": "Ahh"}],
[{"orth": "Yea"}],
[{"orth": "Bah"}]
]
]
}

View File

@ -1,422 +0,0 @@
# -#- coding: utf-8 -*-
import json
contractions = {"n't", "'nt", "not", "'ve", "'d", "'ll", "'s", "'m", "'ma", "'re"}
# contains the lemmas, parts of speech, number, and tenspect of
# potential tokens generated after splitting contractions off
token_properties = {
"ai": {"L": "be", "pos": "VBP", "number": 2},
"are": {"L": "be", "pos": "VBP", "number": 2},
"ca": {"L": "can", "pos": "MD"},
"can": {"L": "can", "pos": "MD"},
"could": {"pos": "MD", "L": "could"},
"'d": {"L": "would", "pos": "MD"},
"did": {"L": "do", "pos": "VBD"},
"do": {"L": "do"},
"does": {"L": "do", "pos": "VBZ"},
"had": {"L": "have", "pos": "VBD"},
"has": {"L": "have", "pos": "VBZ"},
"have": {"pos": "VB"},
"he": {"L": "-PRON-", "pos": "PRP"},
"how": {},
"i": {"L": "-PRON-", "pos": "PRP"},
"is": {"L": "be", "pos": "VBZ"},
"it": {"L": "-PRON-", "pos": "PRP"},
"'ll": {"L": "will", "pos": "MD"},
"'m": {"L": "be", "pos": "VBP", "number": 1, "tenspect": 1},
"'ma": {},
"might": {},
"must": {},
"need": {},
"not": {"L": "not", "pos": "RB"},
"'nt": {"L": "not", "pos": "RB"},
"n't": {"L": "not", "pos": "RB"},
"'re": {"L": "be", "pos": "VBZ"},
"'s": {}, # no POS or lemma for s?
"sha": {"L": "shall", "pos": "MD"},
"she": {"L": "-PRON-", "pos": "PRP"},
"should": {},
"that": {},
"there": {},
"they": {"L": "-PRON-", "pos": "PRP"},
"was": {},
"we": {"L": "-PRON-", "pos": "PRP"},
"were": {},
"what": {},
"when": {},
"where": {},
"who": {},
"why": {},
"wo": {},
"would": {},
"you": {"L": "-PRON-", "pos": "PRP"},
"'ve": {"L": "have", "pos": "VB"}
}
# contains starting tokens with their potential contractions
# each potential contraction has a list of exceptions
# lower - don't generate the lowercase version
# upper - don't generate the uppercase version
# contrLower - don't generate the lowercase version with apostrophe (') removed
# contrUpper - dont' generate the uppercase version with apostrophe (') removed
# for example, we don't want to create the word "hell" or "Hell" from "he" + "'ll" so
# we add "contrLower" and "contrUpper" to the exceptions list
starting_tokens = {
"ai": {"n't": []},
"are": {"n't": []},
"ca": {"n't": []},
"can": {"not": []},
"could": {"'ve": [], "n't": [], "n't've": []},
"did": {"n't": []},
"does": {"n't": []},
"do": {"n't": []},
"had": {"n't": [], "n't've": []},
"has": {"n't": []},
"have": {"n't": []},
"he": {"'d": [], "'d've": [], "'ll": ["contrLower", "contrUpper"], "'s": []},
"how": {"'d": [], "'ll": [], "'s": []},
"i": {"'d": ["contrLower", "contrUpper"], "'d've": [], "'ll": ["contrLower", "contrUpper"], "'m": [], "'ma": [], "'ve": []},
"is": {"n't": []},
"it": {"'d": [], "'d've": [], "'ll": [], "'s": ["contrLower", "contrUpper"]},
"might": {"n't": [], "n't've": [], "'ve": []},
"must": {"n't": [], "'ve": []},
"need": {"n't": []},
"not": {"'ve": []},
"sha": {"n't": []},
"she": {"'d": ["contrLower", "contrUpper"], "'d've": [], "'ll": ["contrLower", "contrUpper"], "'s": []},
"should": {"'ve": [], "n't": [], "n't've": []},
"that": {"'s": []},
"there": {"'d": [], "'d've": [], "'s": ["contrLower", "contrUpper"], "'ll": []},
"they": {"'d": [], "'d've": [], "'ll": [], "'re": [], "'ve": []},
"was": {"n't": []},
"we": {"'d": ["contrLower", "contrUpper"], "'d've": [], "'ll": ["contrLower", "contrUpper"], "'re": ["contrLower", "contrUpper"], "'ve": []},
"were": {"n't": []},
"what": {"'ll": [], "'re": [], "'s": [], "'ve": []},
"when": {"'s": []},
"where": {"'d": [], "'s": [], "'ve": []},
"who": {"'d": [], "'ll": [], "'re": ["contrLower", "contrUpper"], "'s": [], "'ve": []},
"why": {"'ll": [], "'re": [], "'s": []},
"wo": {"n't": []},
"would": {"'ve": [], "n't": [], "n't've": []},
"you": {"'d": [], "'d've": [], "'ll": [], "'re": [], "'ve": []}
}
# other specials that don't really have contractions
# so they are hardcoded
hardcoded_specials = {
"let's": [{"F": "let"}, {"F": "'s", "L": "us"}],
"Let's": [{"F": "Let"}, {"F": "'s", "L": "us"}],
"'s": [{"F": "'s", "L": "'s"}],
"'S": [{"F": "'S", "L": "'s"}],
u"\u2018s": [{"F": u"\u2018s", "L": "'s"}],
u"\u2018S": [{"F": u"\u2018S", "L": "'s"}],
"'em": [{"F": "'em"}],
"'ol": [{"F": "'ol"}],
"vs.": [{"F": "vs."}],
"Ms.": [{"F": "Ms."}],
"Mr.": [{"F": "Mr."}],
"Dr.": [{"F": "Dr."}],
"Mrs.": [{"F": "Mrs."}],
"Messrs.": [{"F": "Messrs."}],
"Gov.": [{"F": "Gov."}],
"Gen.": [{"F": "Gen."}],
"Mt.": [{"F": "Mt.", "L": "Mount"}],
"''": [{"F": "''"}],
"": [{"F": "", "L": "--", "pos": ":"}],
"Corp.": [{"F": "Corp."}],
"Inc.": [{"F": "Inc."}],
"Co.": [{"F": "Co."}],
"co.": [{"F": "co."}],
"Ltd.": [{"F": "Ltd."}],
"Bros.": [{"F": "Bros."}],
"Rep.": [{"F": "Rep."}],
"Sen.": [{"F": "Sen."}],
"Jr.": [{"F": "Jr."}],
"Rev.": [{"F": "Rev."}],
"Adm.": [{"F": "Adm."}],
"St.": [{"F": "St."}],
"a.m.": [{"F": "a.m."}],
"p.m.": [{"F": "p.m."}],
"1a.m.": [{"F": "1"}, {"F": "a.m."}],
"2a.m.": [{"F": "2"}, {"F": "a.m."}],
"3a.m.": [{"F": "3"}, {"F": "a.m."}],
"4a.m.": [{"F": "4"}, {"F": "a.m."}],
"5a.m.": [{"F": "5"}, {"F": "a.m."}],
"6a.m.": [{"F": "6"}, {"F": "a.m."}],
"7a.m.": [{"F": "7"}, {"F": "a.m."}],
"8a.m.": [{"F": "8"}, {"F": "a.m."}],
"9a.m.": [{"F": "9"}, {"F": "a.m."}],
"10a.m.": [{"F": "10"}, {"F": "a.m."}],
"11a.m.": [{"F": "11"}, {"F": "a.m."}],
"12a.m.": [{"F": "12"}, {"F": "a.m."}],
"1am": [{"F": "1"}, {"F": "am", "L": "a.m."}],
"2am": [{"F": "2"}, {"F": "am", "L": "a.m."}],
"3am": [{"F": "3"}, {"F": "am", "L": "a.m."}],
"4am": [{"F": "4"}, {"F": "am", "L": "a.m."}],
"5am": [{"F": "5"}, {"F": "am", "L": "a.m."}],
"6am": [{"F": "6"}, {"F": "am", "L": "a.m."}],
"7am": [{"F": "7"}, {"F": "am", "L": "a.m."}],
"8am": [{"F": "8"}, {"F": "am", "L": "a.m."}],
"9am": [{"F": "9"}, {"F": "am", "L": "a.m."}],
"10am": [{"F": "10"}, {"F": "am", "L": "a.m."}],
"11am": [{"F": "11"}, {"F": "am", "L": "a.m."}],
"12am": [{"F": "12"}, {"F": "am", "L": "a.m."}],
"p.m.": [{"F": "p.m."}],
"1p.m.": [{"F": "1"}, {"F": "p.m."}],
"2p.m.": [{"F": "2"}, {"F": "p.m."}],
"3p.m.": [{"F": "3"}, {"F": "p.m."}],
"4p.m.": [{"F": "4"}, {"F": "p.m."}],
"5p.m.": [{"F": "5"}, {"F": "p.m."}],
"6p.m.": [{"F": "6"}, {"F": "p.m."}],
"7p.m.": [{"F": "7"}, {"F": "p.m."}],
"8p.m.": [{"F": "8"}, {"F": "p.m."}],
"9p.m.": [{"F": "9"}, {"F": "p.m."}],
"10p.m.": [{"F": "10"}, {"F": "p.m."}],
"11p.m.": [{"F": "11"}, {"F": "p.m."}],
"12p.m.": [{"F": "12"}, {"F": "p.m."}],
"1pm": [{"F": "1"}, {"F": "pm", "L": "p.m."}],
"2pm": [{"F": "2"}, {"F": "pm", "L": "p.m."}],
"3pm": [{"F": "3"}, {"F": "pm", "L": "p.m."}],
"4pm": [{"F": "4"}, {"F": "pm", "L": "p.m."}],
"5pm": [{"F": "5"}, {"F": "pm", "L": "p.m."}],
"6pm": [{"F": "6"}, {"F": "pm", "L": "p.m."}],
"7pm": [{"F": "7"}, {"F": "pm", "L": "p.m."}],
"8pm": [{"F": "8"}, {"F": "pm", "L": "p.m."}],
"9pm": [{"F": "9"}, {"F": "pm", "L": "p.m."}],
"10pm": [{"F": "10"}, {"F": "pm", "L": "p.m."}],
"11pm": [{"F": "11"}, {"F": "pm", "L": "p.m."}],
"12pm": [{"F": "12"}, {"F": "pm", "L": "p.m."}],
"Jan.": [{"F": "Jan."}],
"Feb.": [{"F": "Feb."}],
"Mar.": [{"F": "Mar."}],
"Apr.": [{"F": "Apr."}],
"May.": [{"F": "May."}],
"Jun.": [{"F": "Jun."}],
"Jul.": [{"F": "Jul."}],
"Aug.": [{"F": "Aug."}],
"Sep.": [{"F": "Sep."}],
"Sept.": [{"F": "Sept."}],
"Oct.": [{"F": "Oct."}],
"Nov.": [{"F": "Nov."}],
"Dec.": [{"F": "Dec."}],
"Ala.": [{"F": "Ala."}],
"Ariz.": [{"F": "Ariz."}],
"Ark.": [{"F": "Ark."}],
"Calif.": [{"F": "Calif."}],
"Colo.": [{"F": "Colo."}],
"Conn.": [{"F": "Conn."}],
"Del.": [{"F": "Del."}],
"D.C.": [{"F": "D.C."}],
"Fla.": [{"F": "Fla."}],
"Ga.": [{"F": "Ga."}],
"Ill.": [{"F": "Ill."}],
"Ind.": [{"F": "Ind."}],
"Kans.": [{"F": "Kans."}],
"Kan.": [{"F": "Kan."}],
"Ky.": [{"F": "Ky."}],
"La.": [{"F": "La."}],
"Md.": [{"F": "Md."}],
"Mass.": [{"F": "Mass."}],
"Mich.": [{"F": "Mich."}],
"Minn.": [{"F": "Minn."}],
"Miss.": [{"F": "Miss."}],
"Mo.": [{"F": "Mo."}],
"Mont.": [{"F": "Mont."}],
"Nebr.": [{"F": "Nebr."}],
"Neb.": [{"F": "Neb."}],
"Nev.": [{"F": "Nev."}],
"N.H.": [{"F": "N.H."}],
"N.J.": [{"F": "N.J."}],
"N.M.": [{"F": "N.M."}],
"N.Y.": [{"F": "N.Y."}],
"N.C.": [{"F": "N.C."}],
"N.D.": [{"F": "N.D."}],
"Okla.": [{"F": "Okla."}],
"Ore.": [{"F": "Ore."}],
"Pa.": [{"F": "Pa."}],
"Tenn.": [{"F": "Tenn."}],
"Va.": [{"F": "Va."}],
"Wash.": [{"F": "Wash."}],
"Wis.": [{"F": "Wis."}],
":)": [{"F": ":)"}],
"<3": [{"F": "<3"}],
";)": [{"F": ";)"}],
"(:": [{"F": "(:"}],
":(": [{"F": ":("}],
"-_-": [{"F": "-_-"}],
"=)": [{"F": "=)"}],
":/": [{"F": ":/"}],
":>": [{"F": ":>"}],
";-)": [{"F": ";-)"}],
":Y": [{"F": ":Y"}],
":P": [{"F": ":P"}],
":-P": [{"F": ":-P"}],
":3": [{"F": ":3"}],
"=3": [{"F": "=3"}],
"xD": [{"F": "xD"}],
"^_^": [{"F": "^_^"}],
"=]": [{"F": "=]"}],
"=D": [{"F": "=D"}],
"<333": [{"F": "<333"}],
":))": [{"F": ":))"}],
":0": [{"F": ":0"}],
"-__-": [{"F": "-__-"}],
"xDD": [{"F": "xDD"}],
"o_o": [{"F": "o_o"}],
"o_O": [{"F": "o_O"}],
"V_V": [{"F": "V_V"}],
"=[[": [{"F": "=[["}],
"<33": [{"F": "<33"}],
";p": [{"F": ";p"}],
";D": [{"F": ";D"}],
";-p": [{"F": ";-p"}],
";(": [{"F": ";("}],
":p": [{"F": ":p"}],
":]": [{"F": ":]"}],
":O": [{"F": ":O"}],
":-/": [{"F": ":-/"}],
":-)": [{"F": ":-)"}],
":(((": [{"F": ":((("}],
":((": [{"F": ":(("}],
":')": [{"F": ":')"}],
"(^_^)": [{"F": "(^_^)"}],
"(=": [{"F": "(="}],
"o.O": [{"F": "o.O"}],
"\")": [{"F": "\")"}],
"a.": [{"F": "a."}],
"b.": [{"F": "b."}],
"c.": [{"F": "c."}],
"d.": [{"F": "d."}],
"e.": [{"F": "e."}],
"f.": [{"F": "f."}],
"g.": [{"F": "g."}],
"h.": [{"F": "h."}],
"i.": [{"F": "i."}],
"j.": [{"F": "j."}],
"k.": [{"F": "k."}],
"l.": [{"F": "l."}],
"m.": [{"F": "m."}],
"n.": [{"F": "n."}],
"o.": [{"F": "o."}],
"p.": [{"F": "p."}],
"q.": [{"F": "q."}],
"r.": [{"F": "r."}],
"s.": [{"F": "s."}],
"t.": [{"F": "t."}],
"u.": [{"F": "u."}],
"v.": [{"F": "v."}],
"w.": [{"F": "w."}],
"x.": [{"F": "x."}],
"y.": [{"F": "y."}],
"z.": [{"F": "z."}],
"i.e.": [{"F": "i.e."}],
"I.e.": [{"F": "I.e."}],
"I.E.": [{"F": "I.E."}],
"e.g.": [{"F": "e.g."}],
"E.g.": [{"F": "E.g."}],
"E.G.": [{"F": "E.G."}],
"\n": [{"F": "\n", "pos": "SP"}],
"\t": [{"F": "\t", "pos": "SP"}],
" ": [{"F": " ", "pos": "SP"}],
u"\u00a0": [{"F": u"\u00a0", "pos": "SP", "L": " "}]
}
def get_double_contractions(ending):
endings = []
ends_with_contraction = any([ending.endswith(contraction) for contraction in contractions])
while ends_with_contraction:
for contraction in contractions:
if ending.endswith(contraction):
endings.append(contraction)
ending = ending.rstrip(contraction)
ends_with_contraction = any([ending.endswith(contraction) for contraction in contractions])
endings.reverse() # reverse because the last ending is put in the list first
return endings
def get_token_properties(token, capitalize=False, remove_contractions=False):
props = dict(token_properties.get(token)) # ensure we copy the dict so we can add the "F" prop
if capitalize:
token = token.capitalize()
if remove_contractions:
token = token.replace("'", "")
props["F"] = token
return props
def create_entry(token, endings, capitalize=False, remove_contractions=False):
properties = []
properties.append(get_token_properties(token, capitalize=capitalize, remove_contractions=remove_contractions))
for e in endings:
properties.append(get_token_properties(e, remove_contractions=remove_contractions))
return properties
def generate_specials():
specials = {}
for token in starting_tokens:
possible_endings = starting_tokens[token]
for ending in possible_endings:
endings = []
if ending.count("'") > 1:
endings.extend(get_double_contractions(ending))
else:
endings.append(ending)
exceptions = possible_endings[ending]
if "lower" not in exceptions:
special = token + ending
specials[special] = create_entry(token, endings)
if "upper" not in exceptions:
special = token.capitalize() + ending
specials[special] = create_entry(token, endings, capitalize=True)
if "contrLower" not in exceptions:
special = token + ending.replace("'", "")
specials[special] = create_entry(token, endings, remove_contractions=True)
if "contrUpper" not in exceptions:
special = token.capitalize() + ending.replace("'", "")
specials[special] = create_entry(token, endings, capitalize=True, remove_contractions=True)
# add in hardcoded specials
specials = dict(specials, **hardcoded_specials)
return specials
if __name__ == "__main__":
specials = generate_specials()
with open("specials.json", "w") as file_:
file_.write(json.dumps(specials, indent=2))

View File

@ -1,6 +0,0 @@
\.\.\.+
(?<=[a-z])\.(?=[A-Z])
(?<=[a-zA-Z])-(?=[a-zA-z])
(?<=[a-zA-Z])--(?=[a-zA-z])
(?<=[0-9])-(?=[0-9])
(?<=[A-Za-z]),(?=[A-Za-z])

View File

@ -1,38 +0,0 @@
{
"noun": [
["s", ""],
["ses", "s"],
["ves", "f"],
["xes", "x"],
["zes", "z"],
["ches", "ch"],
["shes", "sh"],
["men", "man"],
["ies", "y"]
],
"verb": [
["s", ""],
["ies", "y"],
["es", "e"],
["es", ""],
["ed", "e"],
["ed", ""],
["ing", "e"],
["ing", ""]
],
"adj": [
["er", ""],
["est", ""],
["er", "e"],
["est", "e"]
],
"punct": [
["“", "\""],
["”", "\""],
["\u2018", "'"],
["\u2019", "'"]
]
}

View File

@ -1,59 +0,0 @@
{
"PRP": {
"I": {"L": "-PRON-", "PronType": "Prs", "Person": "One", "Number": "Sing", "Case": "Nom"},
"me": {"L": "-PRON-", "PronType": "Prs", "Person": "One", "Number": "Sing", "Case": "Acc"},
"you": {"L": "-PRON-", "PronType": "Prs", "Person": "Two"},
"he": {"L": "-PRON-", "PronType": "Prs", "Person": "Three", "Number": "Sing", "Gender": "Masc", "Case": "Nom"},
"him": {"L": "-PRON-", "PronType": "Prs", "Person": "Three", "Number": "Sing", "Gender": "Masc", "Case": "Acc"},
"she": {"L": "-PRON-", "PronType": "Prs", "Person": "Three", "Number": "Sing", "Gender": "Fem", "Case": "Nom"},
"her": {"L": "-PRON-", "PronType": "Prs", "Person": "Three", "Number": "Sing", "Gender": "Fem", "Case": "Acc"},
"it": {"L": "-PRON-", "PronType": "Prs", "Person": "Three", "Number": "Sing", "Gender": "Neut"},
"we": {"L": "-PRON-", "PronType": "Prs", "Person": "One", "Number": "Plur", "Case": "Nom"},
"us": {"L": "-PRON-", "PronType": "Prs", "Person": "One", "Number": "Plur", "Case": "Acc"},
"they": {"L": "-PRON-", "PronType": "Prs", "Person": "Three", "Number": "Plur", "Case": "Nom"},
"them": {"L": "-PRON-", "PronType": "Prs", "Person": "Three", "Number": "Plur", "Case": "Acc"},
"mine": {"L": "-PRON-", "PronType": "Prs", "Person": "One", "Number": "Sing", "Poss": "Yes", "Reflex": "Yes"},
"yours": {"L": "-PRON-", "PronType": "Prs", "Person": "Two", "Poss": "Yes", "Reflex": "Yes"},
"his": {"L": "-PRON-", "PronType": "Prs", "Person": "Three", "Number": "Sing", "Gender": "Masc", "Poss": "Yes", "Reflex": "Yes"},
"hers": {"L": "-PRON-", "PronType": "Prs", "Person": "Three", "Number": "Sing", "Gender": "Fem", "Poss": "Yes", "Reflex": "Yes"},
"its": {"L": "-PRON-", "PronType": "Prs", "Person": "Three", "Number": "Sing", "Gender": "Neut", "Poss": "Yes", "Reflex": "Yes"},
"ours": {"L": "-PRON-", "PronType": "Prs", "Person": "One", "Number": "Plur", "Poss": "Yes", "Reflex": "Yes"},
"yours": {"L": "-PRON-", "PronType": "Prs", "Person": "Two", "Number": "Plur", "Poss": "Yes", "Reflex": "Yes"},
"theirs": {"L": "-PRON-", "PronType": "Prs", "Person": "Three", "Number": "Plur", "Poss": "Yes", "Reflex": "Yes"},
"myself": {"L": "-PRON-", "PronType": "Prs", "Person": "One", "Number": "Sing", "Case": "Acc", "Reflex": "Yes"},
"yourself": {"L": "-PRON-", "PronType": "Prs", "Person": "Two", "Case": "Acc", "Reflex": "Yes"},
"himself": {"L": "-PRON-", "PronType": "Prs", "Person": "Three", "Number": "Sing", "Case": "Acc", "Gender": "Masc", "Reflex": "Yes"},
"herself": {"L": "-PRON-", "PronType": "Prs", "Person": "Three", "Number": "Sing", "Case": "Acc", "Gender": "Fem", "Reflex": "Yes"},
"itself": {"L": "-PRON-", "PronType": "Prs", "Person": "Three", "Number": "Sing", "Case": "Acc", "Gender": "Neut", "Reflex": "Yes"},
"themself": {"L": "-PRON-", "PronType": "Prs", "Person": "Three", "Number": "Sing", "Case": "Acc", "Reflex": "Yes"},
"ourselves": {"L": "-PRON-", "PronType": "Prs", "Person": "One", "Number": "Plur", "Case": "Acc", "Reflex": "Yes"},
"yourselves": {"L": "-PRON-", "PronType": "Prs", "Person": "Two", "Case": "Acc", "Reflex": "Yes"},
"themselves": {"L": "-PRON-", "PronType": "Prs", "Person": "Three", "Number": "Plur", "Case": "Acc", "Reflex": "Yes"}
},
"PRP$": {
"my": {"L": "-PRON-", "Person": "One", "Number": "Sing", "PronType": "Prs", "Poss": "Yes"},
"your": {"L": "-PRON-", "Person": "Two", "PronType": "Prs", "Poss": "Yes"},
"his": {"L": "-PRON-", "Person": "Three", "Number": "Sing", "Gender": "Masc", "PronType": "Prs", "Poss": "Yes"},
"her": {"L": "-PRON-", "Person": "Three", "Number": "Sing", "Gender": "Fem", "PronType": "Prs", "Poss": "Yes"},
"its": {"L": "-PRON-", "Person": "Three", "Number": "Sing", "Gender": "Neut", "PronType": "Prs", "Poss": "Yes"},
"our": {"L": "-PRON-", "Person": "One", "Number": "Plur", "PronType": "Prs", "Poss": "Yes"},
"their": {"L": "-PRON-", "Person": "Three", "Number": "Plur", "PronType": "Prs", "Poss": "Yes"}
},
"VBZ": {
"am": {"L": "be", "VerbForm": "Fin", "Person": "One", "Tense": "Pres", "Mood": "Ind"},
"are": {"L": "be", "VerbForm": "Fin", "Person": "Two", "Tense": "Pres", "Mood": "Ind"},
"is": {"L": "be", "VerbForm": "Fin", "Person": "Three", "Tense": "Pres", "Mood": "Ind"},
},
"VBP": {
"are": {"L": "be", "VerbForm": "Fin", "Tense": "Pres", "Mood": "Ind"}
},
"VBD": {
"was": {"L": "be", "VerbForm": "Fin", "Tense": "Past", "Number": "Sing"},
"were": {"L": "be", "VerbForm": "Fin", "Tense": "Past", "Number": "Plur"}
}
}

View File

@ -1,21 +0,0 @@
,
"
(
[
{
*
<
$
£
'
``
`
#
US$
C$
A$
a-
....
...

File diff suppressed because it is too large Load Diff

View File

@ -1,26 +0,0 @@
,
\"
\)
\]
\}
\*
\!
\?
%
\$
>
:
;
'
''
's
'S
s
S
\.\.
\.\.\.
\.\.\.\.
(?<=[a-z0-9)\]"'%\)])\.
(?<=[0-9])km

View File

@ -1,60 +0,0 @@
{
".": {"pos": "punct", "puncttype": "peri"},
",": {"pos": "punct", "puncttype": "comm"},
"-LRB-": {"pos": "punct", "puncttype": "brck", "punctside": "ini"},
"-RRB-": {"pos": "punct", "puncttype": "brck", "punctside": "fin"},
"``": {"pos": "punct", "puncttype": "quot", "punctside": "ini"},
"\"\"": {"pos": "punct", "puncttype": "quot", "punctside": "fin"},
"''": {"pos": "punct", "puncttype": "quot", "punctside": "fin"},
":": {"pos": "punct"},
"$": {"pos": "sym", "other": {"symtype": "currency"}},
"#": {"pos": "sym", "other": {"symtype": "numbersign"}},
"AFX": {"pos": "adj", "hyph": "hyph"},
"CC": {"pos": "conj", "conjtype": "coor"},
"CD": {"pos": "num", "numtype": "card"},
"DT": {"pos": "det"},
"EX": {"pos": "adv", "advtype": "ex"},
"FW": {"pos": "x", "foreign": "foreign"},
"HYPH": {"pos": "punct", "puncttype": "dash"},
"IN": {"pos": "adp"},
"JJ": {"pos": "adj", "degree": "pos"},
"JJR": {"pos": "adj", "degree": "comp"},
"JJS": {"pos": "adj", "degree": "sup"},
"LS": {"pos": "punct", "numtype": "ord"},
"MD": {"pos": "verb", "verbtype": "mod"},
"NIL": {"pos": ""},
"NN": {"pos": "noun", "number": "sing"},
"NNP": {"pos": "propn", "nountype": "prop", "number": "sing"},
"NNPS": {"pos": "propn", "nountype": "prop", "number": "plur"},
"NNS": {"pos": "noun", "number": "plur"},
"PDT": {"pos": "adj", "adjtype": "pdt", "prontype": "prn"},
"POS": {"pos": "part", "poss": "poss"},
"PRP": {"pos": "pron", "prontype": "prs"},
"PRP$": {"pos": "adj", "prontype": "prs", "poss": "poss"},
"RB": {"pos": "adv", "degree": "pos"},
"RBR": {"pos": "adv", "degree": "comp"},
"RBS": {"pos": "adv", "degree": "sup"},
"RP": {"pos": "part"},
"SYM": {"pos": "sym"},
"TO": {"pos": "part", "parttype": "inf", "verbform": "inf"},
"UH": {"pos": "intJ"},
"VB": {"pos": "verb", "verbform": "inf"},
"VBD": {"pos": "verb", "verbform": "fin", "tense": "past"},
"VBG": {"pos": "verb", "verbform": "part", "tense": "pres", "aspect": "prog"},
"VBN": {"pos": "verb", "verbform": "part", "tense": "past", "aspect": "perf"},
"VBP": {"pos": "verb", "verbform": "fin", "tense": "pres"},
"VBZ": {"pos": "verb", "verbform": "fin", "tense": "pres", "number": "sing", "person": 3},
"WDT": {"pos": "adj", "prontype": "int|rel"},
"WP": {"pos": "noun", "prontype": "int|rel"},
"WP$": {"pos": "adj", "poss": "poss", "prontype": "int|rel"},
"WRB": {"pos": "adv", "prontype": "int|rel"},
"SP": {"pos": "space"},
"ADD": {"pos": "x"},
"NFP": {"pos": "punct"},
"GW": {"pos": "x"},
"AFX": {"pos": "x"},
"HYPH": {"pos": "punct"},
"XX": {"pos": "x"},
"BES": {"pos": "verb"},
"HVS": {"pos": "verb"}
}

View File

@ -1,3 +0,0 @@
\.\.\.
(?<=[a-z])\.(?=[A-Z])
(?<=[a-zA-Z])-(?=[a-zA-z])

View File

@ -1 +0,0 @@
{}

View File

@ -1,21 +0,0 @@
,
"
(
[
{
*
<
$
£
'
``
`
#
US$
C$
A$
a-
....
...

View File

@ -1,3 +0,0 @@
Biografie: Ein Spiel ist ein Theaterstück des Schweizer Schriftstellers Max Frisch, das 1967 entstand und am 1. Februar 1968 im Schauspielhaus Zürich uraufgeführt wurde. 1984 legte Frisch eine überarbeitete Neufassung vor. Das von Frisch als Komödie bezeichnete Stück greift eines seiner zentralen Themen auf: die Möglichkeit oder Unmöglichkeit des Menschen, seine Identität zu verändern.
Mit Biografie: Ein Spiel wandte sich Frisch von der Parabelform seiner Erfolgsstücke Biedermann und die Brandstifter und Andorra ab und postulierte eine „Dramaturgie der Permutation“. Darin sollte nicht, wie im klassischen Theater, Sinn und Schicksal im Mittelpunkt stehen, sondern die Zufälligkeit von Ereignissen und die Möglichkeit ihrer Variation. Dennoch handelt Biografie: Ein Spiel gerade von der Unmöglichkeit seines Protagonisten, seinen Lebenslauf grundlegend zu verändern. Frisch empfand die Wirkung des Stücks im Nachhinein als zu fatalistisch und die Umsetzung seiner theoretischen Absichten als nicht geglückt. Obwohl das Stück 1968 als unpolitisch und nicht zeitgemäß kritisiert wurde und auch später eine geteilte Rezeption erfuhr, gehört es an deutschsprachigen Bühnen zu den häufiger aufgeführten Stücken Frischs.

View File

@ -1,149 +0,0 @@
{
"a.m.": [{"F": "a.m."}],
"p.m.": [{"F": "p.m."}],
"1a.m.": [{"F": "1"}, {"F": "a.m."}],
"2a.m.": [{"F": "2"}, {"F": "a.m."}],
"3a.m.": [{"F": "3"}, {"F": "a.m."}],
"4a.m.": [{"F": "4"}, {"F": "a.m."}],
"5a.m.": [{"F": "5"}, {"F": "a.m."}],
"6a.m.": [{"F": "6"}, {"F": "a.m."}],
"7a.m.": [{"F": "7"}, {"F": "a.m."}],
"8a.m.": [{"F": "8"}, {"F": "a.m."}],
"9a.m.": [{"F": "9"}, {"F": "a.m."}],
"10a.m.": [{"F": "10"}, {"F": "a.m."}],
"11a.m.": [{"F": "11"}, {"F": "a.m."}],
"12a.m.": [{"F": "12"}, {"F": "a.m."}],
"1am": [{"F": "1"}, {"F": "am", "L": "a.m."}],
"2am": [{"F": "2"}, {"F": "am", "L": "a.m."}],
"3am": [{"F": "3"}, {"F": "am", "L": "a.m."}],
"4am": [{"F": "4"}, {"F": "am", "L": "a.m."}],
"5am": [{"F": "5"}, {"F": "am", "L": "a.m."}],
"6am": [{"F": "6"}, {"F": "am", "L": "a.m."}],
"7am": [{"F": "7"}, {"F": "am", "L": "a.m."}],
"8am": [{"F": "8"}, {"F": "am", "L": "a.m."}],
"9am": [{"F": "9"}, {"F": "am", "L": "a.m."}],
"10am": [{"F": "10"}, {"F": "am", "L": "a.m."}],
"11am": [{"F": "11"}, {"F": "am", "L": "a.m."}],
"12am": [{"F": "12"}, {"F": "am", "L": "a.m."}],
"1p.m.": [{"F": "1"}, {"F": "p.m."}],
"2p.m.": [{"F": "2"}, {"F": "p.m."}],
"3p.m.": [{"F": "3"}, {"F": "p.m."}],
"4p.m.": [{"F": "4"}, {"F": "p.m."}],
"5p.m.": [{"F": "5"}, {"F": "p.m."}],
"6p.m.": [{"F": "6"}, {"F": "p.m."}],
"7p.m.": [{"F": "7"}, {"F": "p.m."}],
"8p.m.": [{"F": "8"}, {"F": "p.m."}],
"9p.m.": [{"F": "9"}, {"F": "p.m."}],
"10p.m.": [{"F": "10"}, {"F": "p.m."}],
"11p.m.": [{"F": "11"}, {"F": "p.m."}],
"12p.m.": [{"F": "12"}, {"F": "p.m."}],
"1pm": [{"F": "1"}, {"F": "pm", "L": "p.m."}],
"2pm": [{"F": "2"}, {"F": "pm", "L": "p.m."}],
"3pm": [{"F": "3"}, {"F": "pm", "L": "p.m."}],
"4pm": [{"F": "4"}, {"F": "pm", "L": "p.m."}],
"5pm": [{"F": "5"}, {"F": "pm", "L": "p.m."}],
"6pm": [{"F": "6"}, {"F": "pm", "L": "p.m."}],
"7pm": [{"F": "7"}, {"F": "pm", "L": "p.m."}],
"8pm": [{"F": "8"}, {"F": "pm", "L": "p.m."}],
"9pm": [{"F": "9"}, {"F": "pm", "L": "p.m."}],
"10pm": [{"F": "10"}, {"F": "pm", "L": "p.m."}],
"11pm": [{"F": "11"}, {"F": "pm", "L": "p.m."}],
"12pm": [{"F": "12"}, {"F": "pm", "L": "p.m."}],
"Jan.": [{"F": "Jan.", "L": "Januar"}],
"Feb.": [{"F": "Feb.", "L": "Februar"}],
"Mär.": [{"F": "Mär.", "L": "März"}],
"Apr.": [{"F": "Apr.", "L": "April"}],
"Mai.": [{"F": "Mai.", "L": "Mai"}],
"Jun.": [{"F": "Jun.", "L": "Juni"}],
"Jul.": [{"F": "Jul.", "L": "Juli"}],
"Aug.": [{"F": "Aug.", "L": "August"}],
"Sep.": [{"F": "Sep.", "L": "September"}],
"Sept.": [{"F": "Sept.", "L": "September"}],
"Okt.": [{"F": "Okt.", "L": "Oktober"}],
"Nov.": [{"F": "Nov.", "L": "November"}],
"Dez.": [{"F": "Dez.", "L": "Dezember"}],
":)": [{"F": ":)"}],
"<3": [{"F": "<3"}],
";)": [{"F": ";)"}],
"(:": [{"F": "(:"}],
":(": [{"F": ":("}],
"-_-": [{"F": "-_-"}],
"=)": [{"F": "=)"}],
":/": [{"F": ":/"}],
":>": [{"F": ":>"}],
";-)": [{"F": ";-)"}],
":Y": [{"F": ":Y"}],
":P": [{"F": ":P"}],
":-P": [{"F": ":-P"}],
":3": [{"F": ":3"}],
"=3": [{"F": "=3"}],
"xD": [{"F": "xD"}],
"^_^": [{"F": "^_^"}],
"=]": [{"F": "=]"}],
"=D": [{"F": "=D"}],
"<333": [{"F": "<333"}],
":))": [{"F": ":))"}],
":0": [{"F": ":0"}],
"-__-": [{"F": "-__-"}],
"xDD": [{"F": "xDD"}],
"o_o": [{"F": "o_o"}],
"o_O": [{"F": "o_O"}],
"V_V": [{"F": "V_V"}],
"=[[": [{"F": "=[["}],
"<33": [{"F": "<33"}],
";p": [{"F": ";p"}],
";D": [{"F": ";D"}],
";-p": [{"F": ";-p"}],
";(": [{"F": ";("}],
":p": [{"F": ":p"}],
":]": [{"F": ":]"}],
":O": [{"F": ":O"}],
":-/": [{"F": ":-/"}],
":-)": [{"F": ":-)"}],
":(((": [{"F": ":((("}],
":((": [{"F": ":(("}],
":')": [{"F": ":')"}],
"(^_^)": [{"F": "(^_^)"}],
"(=": [{"F": "(="}],
"o.O": [{"F": "o.O"}],
"\")": [{"F": "\")"}],
"a.": [{"F": "a."}],
"b.": [{"F": "b."}],
"c.": [{"F": "c."}],
"d.": [{"F": "d."}],
"e.": [{"F": "e."}],
"f.": [{"F": "f."}],
"g.": [{"F": "g."}],
"h.": [{"F": "h."}],
"i.": [{"F": "i."}],
"j.": [{"F": "j."}],
"k.": [{"F": "k."}],
"l.": [{"F": "l."}],
"m.": [{"F": "m."}],
"n.": [{"F": "n."}],
"o.": [{"F": "o."}],
"p.": [{"F": "p."}],
"q.": [{"F": "q."}],
"s.": [{"F": "s."}],
"t.": [{"F": "t."}],
"u.": [{"F": "u."}],
"v.": [{"F": "v."}],
"w.": [{"F": "w."}],
"x.": [{"F": "x."}],
"y.": [{"F": "y."}],
"z.": [{"F": "z."}],
"z.b.": [{"F": "z.b."}],
"e.h.": [{"F": "I.e."}],
"o.ä.": [{"F": "I.E."}],
"bzw.": [{"F": "bzw."}],
"usw.": [{"F": "usw."}],
"\n": [{"F": "\n", "pos": "SP"}],
"\t": [{"F": "\t", "pos": "SP"}],
" ": [{"F": " ", "pos": "SP"}]
}

View File

@ -1,26 +0,0 @@
,
\"
\)
\]
\}
\*
\!
\?
%
\$
>
:
;
'
''
's
'S
s
S
\.\.
\.\.\.
\.\.\.\.
(?<=[a-z0-9)\]"'%\)])\.
(?<=[0-9])km

View File

@ -1,19 +0,0 @@
{
"NOUN": {"pos": "NOUN"},
"VERB": {"pos": "VERB"},
"PUNCT": {"pos": "PUNCT"},
"ADV": {"pos": "ADV"},
"ADJ": {"pos": "ADJ"},
"PRON": {"pos": "PRON"},
"PROPN": {"pos": "PROPN"},
"CONJ": {"pos": "CONJ"},
"NUM": {"pos": "NUM"},
"AUX": {"pos": "AUX"},
"SCONJ": {"pos": "SCONJ"},
"ADP": {"pos": "ADP"},
"SYM": {"pos": "SYM"},
"X": {"pos": "X"},
"INTJ": {"pos": "INTJ"},
"DET": {"pos": "DET"},
"PART": {"pos": "PART"}
}

View File

@ -1,3 +0,0 @@
\.\.\.
(?<=[a-z])\.(?=[A-Z])
(?<=[a-zA-Z])-(?=[a-zA-z])

View File

@ -1,21 +0,0 @@
,
"
(
[
{
*
<
$
£
'
``
`
#
US$
C$
A$
a-
....
...

View File

@ -1,149 +0,0 @@
{
"a.m.": [{"F": "a.m."}],
"p.m.": [{"F": "p.m."}],
"1a.m.": [{"F": "1"}, {"F": "a.m."}],
"2a.m.": [{"F": "2"}, {"F": "a.m."}],
"3a.m.": [{"F": "3"}, {"F": "a.m."}],
"4a.m.": [{"F": "4"}, {"F": "a.m."}],
"5a.m.": [{"F": "5"}, {"F": "a.m."}],
"6a.m.": [{"F": "6"}, {"F": "a.m."}],
"7a.m.": [{"F": "7"}, {"F": "a.m."}],
"8a.m.": [{"F": "8"}, {"F": "a.m."}],
"9a.m.": [{"F": "9"}, {"F": "a.m."}],
"10a.m.": [{"F": "10"}, {"F": "a.m."}],
"11a.m.": [{"F": "11"}, {"F": "a.m."}],
"12a.m.": [{"F": "12"}, {"F": "a.m."}],
"1am": [{"F": "1"}, {"F": "am", "L": "a.m."}],
"2am": [{"F": "2"}, {"F": "am", "L": "a.m."}],
"3am": [{"F": "3"}, {"F": "am", "L": "a.m."}],
"4am": [{"F": "4"}, {"F": "am", "L": "a.m."}],
"5am": [{"F": "5"}, {"F": "am", "L": "a.m."}],
"6am": [{"F": "6"}, {"F": "am", "L": "a.m."}],
"7am": [{"F": "7"}, {"F": "am", "L": "a.m."}],
"8am": [{"F": "8"}, {"F": "am", "L": "a.m."}],
"9am": [{"F": "9"}, {"F": "am", "L": "a.m."}],
"10am": [{"F": "10"}, {"F": "am", "L": "a.m."}],
"11am": [{"F": "11"}, {"F": "am", "L": "a.m."}],
"12am": [{"F": "12"}, {"F": "am", "L": "a.m."}],
"1p.m.": [{"F": "1"}, {"F": "p.m."}],
"2p.m.": [{"F": "2"}, {"F": "p.m."}],
"3p.m.": [{"F": "3"}, {"F": "p.m."}],
"4p.m.": [{"F": "4"}, {"F": "p.m."}],
"5p.m.": [{"F": "5"}, {"F": "p.m."}],
"6p.m.": [{"F": "6"}, {"F": "p.m."}],
"7p.m.": [{"F": "7"}, {"F": "p.m."}],
"8p.m.": [{"F": "8"}, {"F": "p.m."}],
"9p.m.": [{"F": "9"}, {"F": "p.m."}],
"10p.m.": [{"F": "10"}, {"F": "p.m."}],
"11p.m.": [{"F": "11"}, {"F": "p.m."}],
"12p.m.": [{"F": "12"}, {"F": "p.m."}],
"1pm": [{"F": "1"}, {"F": "pm", "L": "p.m."}],
"2pm": [{"F": "2"}, {"F": "pm", "L": "p.m."}],
"3pm": [{"F": "3"}, {"F": "pm", "L": "p.m."}],
"4pm": [{"F": "4"}, {"F": "pm", "L": "p.m."}],
"5pm": [{"F": "5"}, {"F": "pm", "L": "p.m."}],
"6pm": [{"F": "6"}, {"F": "pm", "L": "p.m."}],
"7pm": [{"F": "7"}, {"F": "pm", "L": "p.m."}],
"8pm": [{"F": "8"}, {"F": "pm", "L": "p.m."}],
"9pm": [{"F": "9"}, {"F": "pm", "L": "p.m."}],
"10pm": [{"F": "10"}, {"F": "pm", "L": "p.m."}],
"11pm": [{"F": "11"}, {"F": "pm", "L": "p.m."}],
"12pm": [{"F": "12"}, {"F": "pm", "L": "p.m."}],
"Jan.": [{"F": "Jan.", "L": "Januar"}],
"Feb.": [{"F": "Feb.", "L": "Februar"}],
"Mär.": [{"F": "Mär.", "L": "März"}],
"Apr.": [{"F": "Apr.", "L": "April"}],
"Mai.": [{"F": "Mai.", "L": "Mai"}],
"Jun.": [{"F": "Jun.", "L": "Juni"}],
"Jul.": [{"F": "Jul.", "L": "Juli"}],
"Aug.": [{"F": "Aug.", "L": "August"}],
"Sep.": [{"F": "Sep.", "L": "September"}],
"Sept.": [{"F": "Sept.", "L": "September"}],
"Okt.": [{"F": "Okt.", "L": "Oktober"}],
"Nov.": [{"F": "Nov.", "L": "November"}],
"Dez.": [{"F": "Dez.", "L": "Dezember"}],
":)": [{"F": ":)"}],
"<3": [{"F": "<3"}],
";)": [{"F": ";)"}],
"(:": [{"F": "(:"}],
":(": [{"F": ":("}],
"-_-": [{"F": "-_-"}],
"=)": [{"F": "=)"}],
":/": [{"F": ":/"}],
":>": [{"F": ":>"}],
";-)": [{"F": ";-)"}],
":Y": [{"F": ":Y"}],
":P": [{"F": ":P"}],
":-P": [{"F": ":-P"}],
":3": [{"F": ":3"}],
"=3": [{"F": "=3"}],
"xD": [{"F": "xD"}],
"^_^": [{"F": "^_^"}],
"=]": [{"F": "=]"}],
"=D": [{"F": "=D"}],
"<333": [{"F": "<333"}],
":))": [{"F": ":))"}],
":0": [{"F": ":0"}],
"-__-": [{"F": "-__-"}],
"xDD": [{"F": "xDD"}],
"o_o": [{"F": "o_o"}],
"o_O": [{"F": "o_O"}],
"V_V": [{"F": "V_V"}],
"=[[": [{"F": "=[["}],
"<33": [{"F": "<33"}],
";p": [{"F": ";p"}],
";D": [{"F": ";D"}],
";-p": [{"F": ";-p"}],
";(": [{"F": ";("}],
":p": [{"F": ":p"}],
":]": [{"F": ":]"}],
":O": [{"F": ":O"}],
":-/": [{"F": ":-/"}],
":-)": [{"F": ":-)"}],
":(((": [{"F": ":((("}],
":((": [{"F": ":(("}],
":')": [{"F": ":')"}],
"(^_^)": [{"F": "(^_^)"}],
"(=": [{"F": "(="}],
"o.O": [{"F": "o.O"}],
"\")": [{"F": "\")"}],
"a.": [{"F": "a."}],
"b.": [{"F": "b."}],
"c.": [{"F": "c."}],
"d.": [{"F": "d."}],
"e.": [{"F": "e."}],
"f.": [{"F": "f."}],
"g.": [{"F": "g."}],
"h.": [{"F": "h."}],
"i.": [{"F": "i."}],
"j.": [{"F": "j."}],
"k.": [{"F": "k."}],
"l.": [{"F": "l."}],
"m.": [{"F": "m."}],
"n.": [{"F": "n."}],
"o.": [{"F": "o."}],
"p.": [{"F": "p."}],
"q.": [{"F": "q."}],
"s.": [{"F": "s."}],
"t.": [{"F": "t."}],
"u.": [{"F": "u."}],
"v.": [{"F": "v."}],
"w.": [{"F": "w."}],
"x.": [{"F": "x."}],
"y.": [{"F": "y."}],
"z.": [{"F": "z."}],
"z.b.": [{"F": "z.b."}],
"e.h.": [{"F": "I.e."}],
"o.ä.": [{"F": "I.E."}],
"bzw.": [{"F": "bzw."}],
"usw.": [{"F": "usw."}],
"\n": [{"F": "\n", "pos": "SP"}],
"\t": [{"F": "\t", "pos": "SP"}],
" ": [{"F": " ", "pos": "SP"}]
}

View File

@ -1,26 +0,0 @@
,
\"
\)
\]
\}
\*
\!
\?
%
\$
>
:
;
'
''
's
'S
s
S
\.\.
\.\.\.
\.\.\.\.
(?<=[a-z0-9)\]"'%\)])\.
(?<=[0-9])km

View File

@ -1,44 +0,0 @@
{
"S": {"pos": "NOUN"},
"E": {"pos": "ADP"},
"RD": {"pos": "DET"},
"V": {"pos": "VERB"},
"_": {"pos": "NO_TAG"},
"A": {"pos": "ADJ"},
"SP": {"pos": "PROPN"},
"FF": {"pos": "PUNCT"},
"FS": {"pos": "PUNCT"},
"B": {"pos": "ADV"},
"CC": {"pos": "CONJ"},
"FB": {"pos": "PUNCT"},
"VA": {"pos": "AUX"},
"PC": {"pos": "PRON"},
"N": {"pos": "NUM"},
"RI": {"pos": "DET"},
"PR": {"pos": "PRON"},
"CS": {"pos": "SCONJ"},
"BN": {"pos": "ADV"},
"AP": {"pos": "DET"},
"VM": {"pos": "AUX"},
"DI": {"pos": "DET"},
"FC": {"pos": "PUNCT"},
"PI": {"pos": "PRON"},
"DD": {"pos": "DET"},
"DQ": {"pos": "DET"},
"PQ": {"pos": "PRON"},
"PD": {"pos": "PRON"},
"NO": {"pos": "ADJ"},
"PE": {"pos": "PRON"},
"T": {"pos": "DET"},
"X": {"pos": "SYM"},
"SW": {"pos": "X"},
"NO": {"pos": "PRON"},
"I": {"pos": "INTJ"},
"X": {"pos": "X"},
"DR": {"pos": "DET"},
"EA": {"pos": "ADP"},
"PP": {"pos": "PRON"},
"X": {"pos": "NUM"},
"DE": {"pos": "DET"},
"X": {"pos": "PART"}
}

View File

@ -1,194 +0,0 @@
{
"Reddit": [
"PRODUCT",
{},
[
[{"lower": "reddit"}]
]
],
"SeptemberElevenAttacks": [
"EVENT",
{},
[
[
{"orth": "9/11"}
],
[
{"lower": "september"},
{"orth": "11"}
]
]
],
"Linux": [
"PRODUCT",
{},
[
[{"lower": "linux"}]
]
],
"Haskell": [
"PRODUCT",
{},
[
[{"lower": "haskell"}]
]
],
"HaskellCurry": [
"PERSON",
{},
[
[
{"lower": "haskell"},
{"lower": "curry"}
]
]
],
"Javascript": [
"PRODUCT",
{},
[
[{"lower": "javascript"}]
]
],
"CSS": [
"PRODUCT",
{},
[
[{"lower": "css"}],
[{"lower": "css3"}]
]
],
"displaCy": [
"PRODUCT",
{},
[
[{"lower": "displacy"}]
]
],
"spaCy": [
"PRODUCT",
{},
[
[{"orth": "spaCy"}]
]
],
"HTML": [
"PRODUCT",
{},
[
[{"lower": "html"}],
[{"lower": "html5"}]
]
],
"Python": [
"PRODUCT",
{},
[
[{"orth": "Python"}]
]
],
"Ruby": [
"PRODUCT",
{},
[
[{"orth": "Ruby"}]
]
],
"Digg": [
"PRODUCT",
{},
[
[{"lower": "digg"}]
]
],
"FoxNews": [
"ORG",
{},
[
[{"orth": "Fox"}],
[{"orth": "News"}]
]
],
"Google": [
"ORG",
{},
[
[{"lower": "google"}]
]
],
"Mac": [
"PRODUCT",
{},
[
[{"lower": "mac"}]
]
],
"Wikipedia": [
"PRODUCT",
{},
[
[{"lower": "wikipedia"}]
]
],
"Windows": [
"PRODUCT",
{},
[
[{"orth": "Windows"}]
]
],
"Dell": [
"ORG",
{},
[
[{"lower": "dell"}]
]
],
"Facebook": [
"ORG",
{},
[
[{"lower": "facebook"}]
]
],
"Blizzard": [
"ORG",
{},
[
[{"orth": "Blizzard"}]
]
],
"Ubuntu": [
"ORG",
{},
[
[{"orth": "Ubuntu"}]
]
],
"Youtube": [
"PRODUCT",
{},
[
[{"lower": "youtube"}]
]
],
"false_positives": [
null,
{},
[
[{"orth": "Shit"}],
[{"orth": "Weed"}],
[{"orth": "Cool"}],
[{"orth": "Btw"}],
[{"orth": "Bah"}],
[{"orth": "Bullshit"}],
[{"orth": "Lol"}],
[{"orth": "Yo"}, {"lower": "dawg"}],
[{"orth": "Yay"}],
[{"orth": "Ahh"}],
[{"orth": "Yea"}],
[{"orth": "Bah"}]
]
]
}

View File

@ -1,6 +0,0 @@
\.\.\.
(?<=[a-z])\.(?=[A-Z])
(?<=[a-zA-Z])-(?=[a-zA-z])
(?<=[a-zA-Z])--(?=[a-zA-z])
(?<=[0-9])-(?=[0-9])
(?<=[A-Za-z]),(?=[A-Za-z])

View File

@ -1 +0,0 @@
{}

View File

@ -1,21 +0,0 @@
,
"
(
[
{
*
<
$
£
'
``
`
#
US$
C$
A$
a-
....
...

View File

@ -1 +0,0 @@
{}

View File

@ -1,26 +0,0 @@
,
\"
\)
\]
\}
\*
\!
\?
%
\$
>
:
;
'
''
's
'S
s
S
\.\.
\.\.\.
\.\.\.\.
(?<=[a-z0-9)\]"'%\)])\.
(?<=[0-9])km

View File

@ -1,43 +0,0 @@
{
"NR": {"pos": "PROPN"},
"AD": {"pos": "ADV"},
"NN": {"pos": "NOUN"},
"CD": {"pos": "NUM"},
"DEG": {"pos": "PART"},
"PN": {"pos": "PRON"},
"M": {"pos": "PART"},
"JJ": {"pos": "ADJ"},
"DEC": {"pos": "PART"},
"NT": {"pos": "NOUN"},
"DT": {"pos": "DET"},
"LC": {"pos": "PART"},
"CC": {"pos": "CONJ"},
"AS": {"pos": "PART"},
"SP": {"pos": "PART"},
"IJ": {"pos": "INTJ"},
"OD": {"pos": "NUM"},
"MSP": {"pos": "PART"},
"CS": {"pos": "SCONJ"},
"ETC": {"pos": "PART"},
"DEV": {"pos": "PART"},
"BA": {"pos": "AUX"},
"SB": {"pos": "AUX"},
"DER": {"pos": "PART"},
"LB": {"pos": "AUX"},
"P": {"pos": "ADP"},
"URL": {"pos": "SYM"},
"FRAG": {"pos": "X"},
"X": {"pos": "X"},
"ON": {"pos": "X"},
"FW": {"pos": "X"},
"VC": {"pos": "VERB"},
"VV": {"pos": "VERB"},
"VA": {"pos": "VERB"},
"VE": {"pos": "VERB"},
"PU": {"pos": "PUNCT"},
"SP": {"pos": "SPACE"},
"NP": {"pos": "X"},
"_": {"pos": "X"},
"VP": {"pos": "X"},
"CHAR": {"pos": "X"}
}