mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-26 17:24:41 +03:00
Remove old lang_data directory
This commit is contained in:
parent
79dce0aabe
commit
4a1e206064
|
@ -1,319 +0,0 @@
|
|||
# surface form lemma pos
|
||||
# multiple values are separated by |
|
||||
# empty lines and lines starting with # are being ignored
|
||||
|
||||
'' ''
|
||||
\") \")
|
||||
\n \n <nl> SP
|
||||
\t \t <tab> SP
|
||||
<space> SP
|
||||
|
||||
# example: Wie geht's?
|
||||
's 's es
|
||||
'S 'S es
|
||||
|
||||
# example: Haste mal 'nen Euro?
|
||||
'n 'n ein
|
||||
'ne 'ne eine
|
||||
'nen 'nen einen
|
||||
|
||||
# example: Kommen S’ nur herein!
|
||||
s' s' sie
|
||||
S' S' sie
|
||||
|
||||
# example: Da haben wir's!
|
||||
ich's ich|'s ich|es
|
||||
du's du|'s du|es
|
||||
er's er|'s er|es
|
||||
sie's sie|'s sie|es
|
||||
wir's wir|'s wir|es
|
||||
ihr's ihr|'s ihr|es
|
||||
|
||||
# example: Die katze auf'm dach.
|
||||
auf'm auf|'m auf|dem
|
||||
unter'm unter|'m unter|dem
|
||||
über'm über|'m über|dem
|
||||
vor'm vor|'m vor|dem
|
||||
hinter'm hinter|'m hinter|dem
|
||||
|
||||
# persons
|
||||
B.A. B.A.
|
||||
B.Sc. B.Sc.
|
||||
Dipl. Dipl.
|
||||
Dipl.-Ing. Dipl.-Ing.
|
||||
Dr. Dr.
|
||||
Fr. Fr.
|
||||
Frl. Frl.
|
||||
Hr. Hr.
|
||||
Hrn. Hrn.
|
||||
Frl. Frl.
|
||||
Prof. Prof.
|
||||
St. St.
|
||||
Hrgs. Hrgs.
|
||||
Hg. Hg.
|
||||
a.Z. a.Z.
|
||||
a.D. a.D.
|
||||
h.c. h.c.
|
||||
Jr. Jr.
|
||||
jr. jr.
|
||||
jun. jun.
|
||||
sen. sen.
|
||||
rer. rer.
|
||||
Ing. Ing.
|
||||
M.A. M.A.
|
||||
Mr. Mr.
|
||||
M.Sc. M.Sc.
|
||||
nat. nat.
|
||||
phil. phil.
|
||||
|
||||
# companies
|
||||
Co. Co.
|
||||
co. co.
|
||||
Cie. Cie.
|
||||
A.G. A.G.
|
||||
G.m.b.H. G.m.b.H.
|
||||
i.G. i.G.
|
||||
e.V. e.V.
|
||||
|
||||
# popular german abbreviations
|
||||
Abb. Abb.
|
||||
Abk. Abk.
|
||||
Abs. Abs.
|
||||
Abt. Abt.
|
||||
abzgl. abzgl.
|
||||
allg. allg.
|
||||
a.M. a.M.
|
||||
Bd. Bd.
|
||||
betr. betr.
|
||||
Betr. Betr.
|
||||
Biol. Biol.
|
||||
biol. biol.
|
||||
Bf. Bf.
|
||||
Bhf. Bhf.
|
||||
Bsp. Bsp.
|
||||
bspw. bspw.
|
||||
bzgl. bzgl.
|
||||
bzw. bzw.
|
||||
d.h. d.h.
|
||||
dgl. dgl.
|
||||
ebd. ebd.
|
||||
ehem. ehem.
|
||||
eigtl. eigtl.
|
||||
entspr. entspr.
|
||||
erm. erm.
|
||||
ev. ev.
|
||||
evtl. evtl.
|
||||
Fa. Fa.
|
||||
Fam. Fam.
|
||||
geb. geb.
|
||||
Gebr. Gebr.
|
||||
gem. gem.
|
||||
ggf. ggf.
|
||||
ggü. ggü.
|
||||
ggfs. ggfs.
|
||||
gegr. gegr.
|
||||
Hbf. Hbf.
|
||||
Hrsg. Hrsg.
|
||||
hrsg. hrsg.
|
||||
i.A. i.A.
|
||||
i.d.R. i.d.R.
|
||||
inkl. inkl.
|
||||
insb. insb.
|
||||
i.O. i.O.
|
||||
i.Tr. i.Tr.
|
||||
i.V. i.V.
|
||||
jur. jur.
|
||||
kath. kath.
|
||||
K.O. K.O.
|
||||
lt. lt.
|
||||
max. max.
|
||||
m.E. m.E.
|
||||
m.M. m.M.
|
||||
mtl. mtl.
|
||||
min. min.
|
||||
mind. mind.
|
||||
MwSt. MwSt.
|
||||
Nr. Nr.
|
||||
o.a. o.a.
|
||||
o.ä. o.ä.
|
||||
o.Ä. o.Ä.
|
||||
o.g. o.g.
|
||||
o.k. o.k.
|
||||
O.K. O.K.
|
||||
Orig. Orig.
|
||||
orig. orig.
|
||||
pers. pers.
|
||||
Pkt. Pkt.
|
||||
Red. Red.
|
||||
röm. röm.
|
||||
s.o. s.o.
|
||||
sog. sog.
|
||||
std. std.
|
||||
stellv. stellv.
|
||||
Str. Str.
|
||||
tägl. tägl.
|
||||
Tel. Tel.
|
||||
u.a. u.a.
|
||||
usf. usf.
|
||||
u.s.w. u.s.w.
|
||||
usw. usw.
|
||||
u.U. u.U.
|
||||
u.v.m. u.v.m.
|
||||
uvm. uvm.
|
||||
v.a. v.a.
|
||||
vgl. vgl.
|
||||
vllt. vllt.
|
||||
v.l.n.r. v.l.n.r.
|
||||
vlt. vlt.
|
||||
Vol. Vol.
|
||||
wiss. wiss.
|
||||
Univ. Univ.
|
||||
z.B. z.B.
|
||||
z.b. z.b.
|
||||
z.Bsp. z.Bsp.
|
||||
z.T. z.T.
|
||||
z.Z. z.Z.
|
||||
zzgl. zzgl.
|
||||
z.Zt. z.Zt.
|
||||
|
||||
# popular latin abbreviations
|
||||
vs. vs.
|
||||
adv. adv.
|
||||
Chr. Chr.
|
||||
A.C. A.C.
|
||||
A.D. A.D.
|
||||
e.g. e.g.
|
||||
i.e. i.e.
|
||||
al. al.
|
||||
p.a. p.a.
|
||||
P.S. P.S.
|
||||
q.e.d. q.e.d.
|
||||
R.I.P. R.I.P.
|
||||
etc. etc.
|
||||
incl. incl.
|
||||
ca. ca.
|
||||
n.Chr. n.Chr.
|
||||
p.s. p.s.
|
||||
v.Chr. v.Chr.
|
||||
|
||||
# popular english abbreviations
|
||||
D.C. D.C.
|
||||
N.Y. N.Y.
|
||||
N.Y.C. N.Y.C.
|
||||
U.S. U.S.
|
||||
U.S.A. U.S.A.
|
||||
L.A. L.A.
|
||||
U.S.S. U.S.S.
|
||||
|
||||
# dates & time
|
||||
Jan. Jan.
|
||||
Feb. Feb.
|
||||
Mrz. Mrz.
|
||||
Mär. Mär.
|
||||
Apr. Apr.
|
||||
Jun. Jun.
|
||||
Jul. Jul.
|
||||
Aug. Aug.
|
||||
Sep. Sep.
|
||||
Sept. Sept.
|
||||
Okt. Okt.
|
||||
Nov. Nov.
|
||||
Dez. Dez.
|
||||
Mo. Mo.
|
||||
Di. Di.
|
||||
Mi. Mi.
|
||||
Do. Do.
|
||||
Fr. Fr.
|
||||
Sa. Sa.
|
||||
So. So.
|
||||
Std. Std.
|
||||
Jh. Jh.
|
||||
Jhd. Jhd.
|
||||
|
||||
# numbers
|
||||
Tsd. Tsd.
|
||||
Mio. Mio.
|
||||
Mrd. Mrd.
|
||||
|
||||
# countries & languages
|
||||
engl. engl.
|
||||
frz. frz.
|
||||
lat. lat.
|
||||
österr. österr.
|
||||
|
||||
# smileys
|
||||
:) :)
|
||||
<3 <3
|
||||
;) ;)
|
||||
(: (:
|
||||
:( :(
|
||||
-_- -_-
|
||||
=) =)
|
||||
:/ :/
|
||||
:> :>
|
||||
;-) ;-)
|
||||
:Y :Y
|
||||
:P :P
|
||||
:-P :-P
|
||||
:3 :3
|
||||
=3 =3
|
||||
xD xD
|
||||
^_^ ^_^
|
||||
=] =]
|
||||
=D =D
|
||||
<333 <333
|
||||
:)) :))
|
||||
:0 :0
|
||||
-__- -__-
|
||||
xDD xDD
|
||||
o_o o_o
|
||||
o_O o_O
|
||||
V_V V_V
|
||||
=[[ =[[
|
||||
<33 <33
|
||||
;p ;p
|
||||
;D ;D
|
||||
;-p ;-p
|
||||
;( ;(
|
||||
:p :p
|
||||
:] :]
|
||||
:O :O
|
||||
:-/ :-/
|
||||
:-) :-)
|
||||
:((( :(((
|
||||
:(( :((
|
||||
:') :')
|
||||
(^_^) (^_^)
|
||||
(= (=
|
||||
o.O o.O
|
||||
|
||||
# single letters
|
||||
a. a.
|
||||
b. b.
|
||||
c. c.
|
||||
d. d.
|
||||
e. e.
|
||||
f. f.
|
||||
g. g.
|
||||
h. h.
|
||||
i. i.
|
||||
j. j.
|
||||
k. k.
|
||||
l. l.
|
||||
m. m.
|
||||
n. n.
|
||||
o. o.
|
||||
p. p.
|
||||
q. q.
|
||||
r. r.
|
||||
s. s.
|
||||
t. t.
|
||||
u. u.
|
||||
v. v.
|
||||
w. w.
|
||||
x. x.
|
||||
y. y.
|
||||
z. z.
|
||||
ä. ä.
|
||||
ö. ö.
|
||||
ü. ü.
|
|
@ -1,194 +0,0 @@
|
|||
{
|
||||
"Reddit": [
|
||||
"PRODUCT",
|
||||
{},
|
||||
[
|
||||
[{"lower": "reddit"}]
|
||||
]
|
||||
],
|
||||
"SeptemberElevenAttacks": [
|
||||
"EVENT",
|
||||
{},
|
||||
[
|
||||
[
|
||||
{"orth": "9/11"}
|
||||
],
|
||||
[
|
||||
{"lower": "september"},
|
||||
{"orth": "11"}
|
||||
]
|
||||
]
|
||||
],
|
||||
"Linux": [
|
||||
"PRODUCT",
|
||||
{},
|
||||
[
|
||||
[{"lower": "linux"}]
|
||||
]
|
||||
],
|
||||
"Haskell": [
|
||||
"PRODUCT",
|
||||
{},
|
||||
[
|
||||
[{"lower": "haskell"}]
|
||||
]
|
||||
],
|
||||
"HaskellCurry": [
|
||||
"PERSON",
|
||||
{},
|
||||
[
|
||||
[
|
||||
{"lower": "haskell"},
|
||||
{"lower": "curry"}
|
||||
]
|
||||
]
|
||||
],
|
||||
"Javascript": [
|
||||
"PRODUCT",
|
||||
{},
|
||||
[
|
||||
[{"lower": "javascript"}]
|
||||
]
|
||||
],
|
||||
"CSS": [
|
||||
"PRODUCT",
|
||||
{},
|
||||
[
|
||||
[{"lower": "css"}],
|
||||
[{"lower": "css3"}]
|
||||
]
|
||||
],
|
||||
"displaCy": [
|
||||
"PRODUCT",
|
||||
{},
|
||||
[
|
||||
[{"lower": "displacy"}]
|
||||
]
|
||||
],
|
||||
"spaCy": [
|
||||
"PRODUCT",
|
||||
{},
|
||||
[
|
||||
[{"orth": "spaCy"}]
|
||||
]
|
||||
],
|
||||
|
||||
"HTML": [
|
||||
"PRODUCT",
|
||||
{},
|
||||
[
|
||||
[{"lower": "html"}],
|
||||
[{"lower": "html5"}]
|
||||
]
|
||||
],
|
||||
"Python": [
|
||||
"PRODUCT",
|
||||
{},
|
||||
[
|
||||
[{"orth": "Python"}]
|
||||
]
|
||||
],
|
||||
"Ruby": [
|
||||
"PRODUCT",
|
||||
{},
|
||||
[
|
||||
[{"orth": "Ruby"}]
|
||||
]
|
||||
],
|
||||
"Digg": [
|
||||
"PRODUCT",
|
||||
{},
|
||||
[
|
||||
[{"lower": "digg"}]
|
||||
]
|
||||
],
|
||||
"FoxNews": [
|
||||
"ORG",
|
||||
{},
|
||||
[
|
||||
[{"orth": "Fox"}],
|
||||
[{"orth": "News"}]
|
||||
]
|
||||
],
|
||||
"Google": [
|
||||
"ORG",
|
||||
{},
|
||||
[
|
||||
[{"lower": "google"}]
|
||||
]
|
||||
],
|
||||
"Mac": [
|
||||
"PRODUCT",
|
||||
{},
|
||||
[
|
||||
[{"lower": "mac"}]
|
||||
]
|
||||
],
|
||||
"Wikipedia": [
|
||||
"PRODUCT",
|
||||
{},
|
||||
[
|
||||
[{"lower": "wikipedia"}]
|
||||
]
|
||||
],
|
||||
"Windows": [
|
||||
"PRODUCT",
|
||||
{},
|
||||
[
|
||||
[{"orth": "Windows"}]
|
||||
]
|
||||
],
|
||||
"Dell": [
|
||||
"ORG",
|
||||
{},
|
||||
[
|
||||
[{"lower": "dell"}]
|
||||
]
|
||||
],
|
||||
"Facebook": [
|
||||
"ORG",
|
||||
{},
|
||||
[
|
||||
[{"lower": "facebook"}]
|
||||
]
|
||||
],
|
||||
"Blizzard": [
|
||||
"ORG",
|
||||
{},
|
||||
[
|
||||
[{"orth": "Blizzard"}]
|
||||
]
|
||||
],
|
||||
"Ubuntu": [
|
||||
"ORG",
|
||||
{},
|
||||
[
|
||||
[{"orth": "Ubuntu"}]
|
||||
]
|
||||
],
|
||||
"Youtube": [
|
||||
"PRODUCT",
|
||||
{},
|
||||
[
|
||||
[{"lower": "youtube"}]
|
||||
]
|
||||
],
|
||||
"false_positives": [
|
||||
null,
|
||||
{},
|
||||
[
|
||||
[{"orth": "Shit"}],
|
||||
[{"orth": "Weed"}],
|
||||
[{"orth": "Cool"}],
|
||||
[{"orth": "Btw"}],
|
||||
[{"orth": "Bah"}],
|
||||
[{"orth": "Bullshit"}],
|
||||
[{"orth": "Lol"}],
|
||||
[{"orth": "Yo"}, {"lower": "dawg"}],
|
||||
[{"orth": "Yay"}],
|
||||
[{"orth": "Ahh"}],
|
||||
[{"orth": "Yea"}],
|
||||
[{"orth": "Bah"}]
|
||||
]
|
||||
]
|
||||
}
|
|
@ -1,334 +0,0 @@
|
|||
# coding=utf8
|
||||
import json
|
||||
import io
|
||||
import itertools
|
||||
|
||||
contractions = {}
|
||||
|
||||
# contains the lemmas, parts of speech, number, and tenspect of
|
||||
# potential tokens generated after splitting contractions off
|
||||
token_properties = {}
|
||||
|
||||
# contains starting tokens with their potential contractions
|
||||
# each potential contraction has a list of exceptions
|
||||
# lower - don't generate the lowercase version
|
||||
# upper - don't generate the uppercase version
|
||||
# contrLower - don't generate the lowercase version with apostrophe (') removed
|
||||
# contrUpper - dont' generate the uppercase version with apostrophe (') removed
|
||||
# for example, we don't want to create the word "hell" or "Hell" from "he" + "'ll" so
|
||||
# we add "contrLower" and "contrUpper" to the exceptions list
|
||||
starting_tokens = {}
|
||||
|
||||
# other specials that don't really have contractions
|
||||
# so they are hardcoded
|
||||
hardcoded_specials = {
|
||||
"''": [{"F": "''"}],
|
||||
"\")": [{"F": "\")"}],
|
||||
"\n": [{"F": "\n", "pos": "SP"}],
|
||||
"\t": [{"F": "\t", "pos": "SP"}],
|
||||
" ": [{"F": " ", "pos": "SP"}],
|
||||
|
||||
# example: Wie geht's?
|
||||
"'s": [{"F": "'s", "L": "es"}],
|
||||
"'S": [{"F": "'S", "L": "es"}],
|
||||
|
||||
# example: Haste mal 'nen Euro?
|
||||
"'n": [{"F": "'n", "L": "ein"}],
|
||||
"'ne": [{"F": "'ne", "L": "eine"}],
|
||||
"'nen": [{"F": "'nen", "L": "einen"}],
|
||||
|
||||
# example: Kommen S’ nur herein!
|
||||
"s'": [{"F": "s'", "L": "sie"}],
|
||||
"S'": [{"F": "S'", "L": "sie"}],
|
||||
|
||||
# example: Da haben wir's!
|
||||
"ich's": [{"F": "ich"}, {"F": "'s", "L": "es"}],
|
||||
"du's": [{"F": "du"}, {"F": "'s", "L": "es"}],
|
||||
"er's": [{"F": "er"}, {"F": "'s", "L": "es"}],
|
||||
"sie's": [{"F": "sie"}, {"F": "'s", "L": "es"}],
|
||||
"wir's": [{"F": "wir"}, {"F": "'s", "L": "es"}],
|
||||
"ihr's": [{"F": "ihr"}, {"F": "'s", "L": "es"}],
|
||||
|
||||
# example: Die katze auf'm dach.
|
||||
"auf'm": [{"F": "auf"}, {"F": "'m", "L": "dem"}],
|
||||
"unter'm": [{"F": "unter"}, {"F": "'m", "L": "dem"}],
|
||||
"über'm": [{"F": "über"}, {"F": "'m", "L": "dem"}],
|
||||
"vor'm": [{"F": "vor"}, {"F": "'m", "L": "dem"}],
|
||||
"hinter'm": [{"F": "hinter"}, {"F": "'m", "L": "dem"}],
|
||||
|
||||
# persons
|
||||
"Fr.": [{"F": "Fr."}],
|
||||
"Hr.": [{"F": "Hr."}],
|
||||
"Frl.": [{"F": "Frl."}],
|
||||
"Prof.": [{"F": "Prof."}],
|
||||
"Dr.": [{"F": "Dr."}],
|
||||
"St.": [{"F": "St."}],
|
||||
"Hrgs.": [{"F": "Hrgs."}],
|
||||
"Hg.": [{"F": "Hg."}],
|
||||
"a.Z.": [{"F": "a.Z."}],
|
||||
"a.D.": [{"F": "a.D."}],
|
||||
"A.D.": [{"F": "A.D."}],
|
||||
"h.c.": [{"F": "h.c."}],
|
||||
"jun.": [{"F": "jun."}],
|
||||
"sen.": [{"F": "sen."}],
|
||||
"rer.": [{"F": "rer."}],
|
||||
"Dipl.": [{"F": "Dipl."}],
|
||||
"Ing.": [{"F": "Ing."}],
|
||||
"Dipl.-Ing.": [{"F": "Dipl.-Ing."}],
|
||||
|
||||
# companies
|
||||
"Co.": [{"F": "Co."}],
|
||||
"co.": [{"F": "co."}],
|
||||
"Cie.": [{"F": "Cie."}],
|
||||
"A.G.": [{"F": "A.G."}],
|
||||
"G.m.b.H.": [{"F": "G.m.b.H."}],
|
||||
"i.G.": [{"F": "i.G."}],
|
||||
"e.V.": [{"F": "e.V."}],
|
||||
|
||||
# popular german abbreviations
|
||||
"ggü.": [{"F": "ggü."}],
|
||||
"ggf.": [{"F": "ggf."}],
|
||||
"ggfs.": [{"F": "ggfs."}],
|
||||
"Gebr.": [{"F": "Gebr."}],
|
||||
"geb.": [{"F": "geb."}],
|
||||
"gegr.": [{"F": "gegr."}],
|
||||
"erm.": [{"F": "erm."}],
|
||||
"engl.": [{"F": "engl."}],
|
||||
"ehem.": [{"F": "ehem."}],
|
||||
"Biol.": [{"F": "Biol."}],
|
||||
"biol.": [{"F": "biol."}],
|
||||
"Abk.": [{"F": "Abk."}],
|
||||
"Abb.": [{"F": "Abb."}],
|
||||
"abzgl.": [{"F": "abzgl."}],
|
||||
"Hbf.": [{"F": "Hbf."}],
|
||||
"Bhf.": [{"F": "Bhf."}],
|
||||
"Bf.": [{"F": "Bf."}],
|
||||
"i.V.": [{"F": "i.V."}],
|
||||
"inkl.": [{"F": "inkl."}],
|
||||
"insb.": [{"F": "insb."}],
|
||||
"z.B.": [{"F": "z.B."}],
|
||||
"i.Tr.": [{"F": "i.Tr."}],
|
||||
"Jhd.": [{"F": "Jhd."}],
|
||||
"jur.": [{"F": "jur."}],
|
||||
"lt.": [{"F": "lt."}],
|
||||
"nat.": [{"F": "nat."}],
|
||||
"u.a.": [{"F": "u.a."}],
|
||||
"u.s.w.": [{"F": "u.s.w."}],
|
||||
"Nr.": [{"F": "Nr."}],
|
||||
"Univ.": [{"F": "Univ."}],
|
||||
"vgl.": [{"F": "vgl."}],
|
||||
"zzgl.": [{"F": "zzgl."}],
|
||||
"z.Z.": [{"F": "z.Z."}],
|
||||
"betr.": [{"F": "betr."}],
|
||||
"ehem.": [{"F": "ehem."}],
|
||||
|
||||
# popular latin abbreviations
|
||||
"vs.": [{"F": "vs."}],
|
||||
"adv.": [{"F": "adv."}],
|
||||
"Chr.": [{"F": "Chr."}],
|
||||
"A.C.": [{"F": "A.C."}],
|
||||
"A.D.": [{"F": "A.D."}],
|
||||
"e.g.": [{"F": "e.g."}],
|
||||
"i.e.": [{"F": "i.e."}],
|
||||
"al.": [{"F": "al."}],
|
||||
"p.a.": [{"F": "p.a."}],
|
||||
"P.S.": [{"F": "P.S."}],
|
||||
"q.e.d.": [{"F": "q.e.d."}],
|
||||
"R.I.P.": [{"F": "R.I.P."}],
|
||||
"etc.": [{"F": "etc."}],
|
||||
"incl.": [{"F": "incl."}],
|
||||
|
||||
# popular english abbreviations
|
||||
"D.C.": [{"F": "D.C."}],
|
||||
"N.Y.": [{"F": "N.Y."}],
|
||||
"N.Y.C.": [{"F": "N.Y.C."}],
|
||||
|
||||
# dates
|
||||
"Jan.": [{"F": "Jan."}],
|
||||
"Feb.": [{"F": "Feb."}],
|
||||
"Mrz.": [{"F": "Mrz."}],
|
||||
"Mär.": [{"F": "Mär."}],
|
||||
"Apr.": [{"F": "Apr."}],
|
||||
"Jun.": [{"F": "Jun."}],
|
||||
"Jul.": [{"F": "Jul."}],
|
||||
"Aug.": [{"F": "Aug."}],
|
||||
"Sep.": [{"F": "Sep."}],
|
||||
"Sept.": [{"F": "Sept."}],
|
||||
"Okt.": [{"F": "Okt."}],
|
||||
"Nov.": [{"F": "Nov."}],
|
||||
"Dez.": [{"F": "Dez."}],
|
||||
"Mo.": [{"F": "Mo."}],
|
||||
"Di.": [{"F": "Di."}],
|
||||
"Mi.": [{"F": "Mi."}],
|
||||
"Do.": [{"F": "Do."}],
|
||||
"Fr.": [{"F": "Fr."}],
|
||||
"Sa.": [{"F": "Sa."}],
|
||||
"So.": [{"F": "So."}],
|
||||
|
||||
# smileys
|
||||
":)": [{"F": ":)"}],
|
||||
"<3": [{"F": "<3"}],
|
||||
";)": [{"F": ";)"}],
|
||||
"(:": [{"F": "(:"}],
|
||||
":(": [{"F": ":("}],
|
||||
"-_-": [{"F": "-_-"}],
|
||||
"=)": [{"F": "=)"}],
|
||||
":/": [{"F": ":/"}],
|
||||
":>": [{"F": ":>"}],
|
||||
";-)": [{"F": ";-)"}],
|
||||
":Y": [{"F": ":Y"}],
|
||||
":P": [{"F": ":P"}],
|
||||
":-P": [{"F": ":-P"}],
|
||||
":3": [{"F": ":3"}],
|
||||
"=3": [{"F": "=3"}],
|
||||
"xD": [{"F": "xD"}],
|
||||
"^_^": [{"F": "^_^"}],
|
||||
"=]": [{"F": "=]"}],
|
||||
"=D": [{"F": "=D"}],
|
||||
"<333": [{"F": "<333"}],
|
||||
":))": [{"F": ":))"}],
|
||||
":0": [{"F": ":0"}],
|
||||
"-__-": [{"F": "-__-"}],
|
||||
"xDD": [{"F": "xDD"}],
|
||||
"o_o": [{"F": "o_o"}],
|
||||
"o_O": [{"F": "o_O"}],
|
||||
"V_V": [{"F": "V_V"}],
|
||||
"=[[": [{"F": "=[["}],
|
||||
"<33": [{"F": "<33"}],
|
||||
";p": [{"F": ";p"}],
|
||||
";D": [{"F": ";D"}],
|
||||
";-p": [{"F": ";-p"}],
|
||||
";(": [{"F": ";("}],
|
||||
":p": [{"F": ":p"}],
|
||||
":]": [{"F": ":]"}],
|
||||
":O": [{"F": ":O"}],
|
||||
":-/": [{"F": ":-/"}],
|
||||
":-)": [{"F": ":-)"}],
|
||||
":(((": [{"F": ":((("}],
|
||||
":((": [{"F": ":(("}],
|
||||
":')": [{"F": ":')"}],
|
||||
"(^_^)": [{"F": "(^_^)"}],
|
||||
"(=": [{"F": "(="}],
|
||||
"o.O": [{"F": "o.O"}],
|
||||
|
||||
"a.": [{"F": "a."}],
|
||||
"b.": [{"F": "b."}],
|
||||
"c.": [{"F": "c."}],
|
||||
"d.": [{"F": "d."}],
|
||||
"e.": [{"F": "e."}],
|
||||
"f.": [{"F": "f."}],
|
||||
"g.": [{"F": "g."}],
|
||||
"h.": [{"F": "h."}],
|
||||
"i.": [{"F": "i."}],
|
||||
"j.": [{"F": "j."}],
|
||||
"k.": [{"F": "k."}],
|
||||
"l.": [{"F": "l."}],
|
||||
"m.": [{"F": "m."}],
|
||||
"n.": [{"F": "n."}],
|
||||
"o.": [{"F": "o."}],
|
||||
"p.": [{"F": "p."}],
|
||||
"q.": [{"F": "q."}],
|
||||
"r.": [{"F": "r."}],
|
||||
"s.": [{"F": "s."}],
|
||||
"t.": [{"F": "t."}],
|
||||
"u.": [{"F": "u."}],
|
||||
"v.": [{"F": "v."}],
|
||||
"w.": [{"F": "w."}],
|
||||
"x.": [{"F": "x."}],
|
||||
"y.": [{"F": "y."}],
|
||||
"z.": [{"F": "z."}],
|
||||
}
|
||||
|
||||
def get_double_contractions(ending):
|
||||
endings = []
|
||||
|
||||
ends_with_contraction = any([ending.endswith(contraction) for contraction in contractions])
|
||||
|
||||
while ends_with_contraction:
|
||||
for contraction in contractions:
|
||||
if ending.endswith(contraction):
|
||||
endings.append(contraction)
|
||||
ending = ending.rstrip(contraction)
|
||||
ends_with_contraction = any([ending.endswith(contraction) for contraction in contractions])
|
||||
|
||||
endings.reverse() # reverse because the last ending is put in the list first
|
||||
return endings
|
||||
|
||||
def get_token_properties(token, capitalize=False, remove_contractions=False):
|
||||
props = dict(token_properties.get(token)) # ensure we copy the dict so we can add the "F" prop
|
||||
if capitalize:
|
||||
token = token.capitalize()
|
||||
if remove_contractions:
|
||||
token = token.replace("'", "")
|
||||
|
||||
props["F"] = token
|
||||
return props
|
||||
|
||||
|
||||
def create_entry(token, endings, capitalize=False, remove_contractions=False):
|
||||
properties = []
|
||||
properties.append(get_token_properties(token, capitalize=capitalize, remove_contractions=remove_contractions))
|
||||
for e in endings:
|
||||
properties.append(get_token_properties(e, remove_contractions=remove_contractions))
|
||||
return properties
|
||||
|
||||
|
||||
FIELDNAMES = ['F','L','pos']
|
||||
def read_hardcoded(stream):
|
||||
hc_specials = {}
|
||||
for line in stream:
|
||||
line = line.strip()
|
||||
if line.startswith('#') or not line:
|
||||
continue
|
||||
key,_,rest = line.partition('\t')
|
||||
values = []
|
||||
for annotation in zip(*[ e.split('|') for e in rest.split('\t') ]):
|
||||
values.append({ k:v for k,v in itertools.izip_longest(FIELDNAMES,annotation) if v })
|
||||
hc_specials[key] = values
|
||||
return hc_specials
|
||||
|
||||
|
||||
def generate_specials():
|
||||
|
||||
specials = {}
|
||||
|
||||
for token in starting_tokens:
|
||||
possible_endings = starting_tokens[token]
|
||||
for ending in possible_endings:
|
||||
|
||||
endings = []
|
||||
if ending.count("'") > 1:
|
||||
endings.extend(get_double_contractions(ending))
|
||||
else:
|
||||
endings.append(ending)
|
||||
|
||||
exceptions = possible_endings[ending]
|
||||
|
||||
if "lower" not in exceptions:
|
||||
special = token + ending
|
||||
specials[special] = create_entry(token, endings)
|
||||
|
||||
if "upper" not in exceptions:
|
||||
special = token.capitalize() + ending
|
||||
specials[special] = create_entry(token, endings, capitalize=True)
|
||||
|
||||
if "contrLower" not in exceptions:
|
||||
special = token + ending.replace("'", "")
|
||||
specials[special] = create_entry(token, endings, remove_contractions=True)
|
||||
|
||||
if "contrUpper" not in exceptions:
|
||||
special = token.capitalize() + ending.replace("'", "")
|
||||
specials[special] = create_entry(token, endings, capitalize=True, remove_contractions=True)
|
||||
|
||||
# add in hardcoded specials
|
||||
# changed it so it generates them from a file
|
||||
with io.open('abbrev.de.tab','r',encoding='utf8') as abbrev_:
|
||||
hc_specials = read_hardcoded(abbrev_)
|
||||
specials = dict(specials, **hc_specials)
|
||||
|
||||
return specials
|
||||
|
||||
if __name__ == "__main__":
|
||||
specials = generate_specials()
|
||||
with open("specials.json", "w") as f:
|
||||
json.dump(specials, f, sort_keys=True, indent=4, separators=(',', ': '))
|
|
@ -1,6 +0,0 @@
|
|||
\.\.\.
|
||||
(?<=[a-z])\.(?=[A-Z])
|
||||
(?<=[a-zöäüßA-ZÖÄÜ"]):(?=[a-zöäüßA-ZÖÄÜ])
|
||||
(?<=[a-zöäüßA-ZÖÄÜ"])>(?=[a-zöäüßA-ZÖÄÜ])
|
||||
(?<=[a-zöäüßA-ZÖÄÜ"])<(?=[a-zöäüßA-ZÖÄÜ])
|
||||
(?<=[a-zöäüßA-ZÖÄÜ"])=(?=[a-zöäüßA-ZÖÄÜ])
|
|
@ -1 +0,0 @@
|
|||
{}
|
|
@ -1,71 +0,0 @@
|
|||
{
|
||||
"PRP": {
|
||||
"ich": {"L": "-PRON-", "person": 1, "number": 1, "gender": 0, "case": 1},
|
||||
"meiner": {"L": "-PRON-", "person": 1, "number": 1, "gender": 0, "case": 2},
|
||||
"mir": {"L": "-PRON-", "person": 1, "number": 1, "gender": 0, "case": 3},
|
||||
"mich": {"L": "-PRON-", "person": 1, "number": 1, "gender": 0, "case": 4},
|
||||
"du": {"L": "-PRON-", "person": 2, "number": 0, "gender": 0, "case": 1},
|
||||
"deiner": {"L": "-PRON-", "person": 2, "number": 0, "gender": 0, "case": 2},
|
||||
"dir": {"L": "-PRON-", "person": 2, "number": 0, "gender": 0, "case": 3},
|
||||
"dich": {"L": "-PRON-", "person": 2, "number": 0, "gender": 0, "case": 4},
|
||||
"er": {"L": "-PRON-", "person": 3, "number": 0, "gender": 1, "case": 1},
|
||||
"seiner": {"L": "-PRON-", "person": 3, "number": 0, "gender": 1, "case": 2},
|
||||
"ihm": {"L": "-PRON-", "person": 3, "number": 0, "gender": 1, "case": 3},
|
||||
"ihn": {"L": "-PRON-", "person": 3, "number": 0, "gender": 1, "case": 4},
|
||||
"sie": {"L": "-PRON-", "person": 3, "number": 0, "gender": 2, "case": 1},
|
||||
"ihrer": {"L": "-PRON-", "person": 3, "number": 0, "gender": 2, "case": 2},
|
||||
"ihr": {"L": "-PRON-", "person": 3, "number": 0, "gender": 2, "case": 3},
|
||||
"sie": {"L": "-PRON-", "person": 3, "number": 0, "gender": 2, "case": 4},
|
||||
"es": {"L": "-PRON-", "person": 3, "number": 0, "gender": 3, "case": 1},
|
||||
"seiner": {"L": "-PRON-", "person": 3, "number": 0, "gender": 3, "case": 2},
|
||||
"ihm": {"L": "-PRON-", "person": 3, "number": 0, "gender": 3, "case": 3},
|
||||
"es": {"L": "-PRON-", "person": 3, "number": 0, "gender": 3, "case": 4},
|
||||
"wir": {"L": "-PRON-", "person": 1, "number": 0, "gender": 0, "case": 1},
|
||||
"unser": {"L": "-PRON-", "person": 1, "number": 0, "gender": 0, "case": 2},
|
||||
"uns": {"L": "-PRON-", "person": 1, "number": 0, "gender": 0, "case": 3},
|
||||
"uns": {"L": "-PRON-", "person": 1, "number": 0, "gender": 0, "case": 4},
|
||||
"ihr": {"L": "-PRON-", "person": 2, "number": 0, "gender": 0, "case": 1},
|
||||
"euer": {"L": "-PRON-", "person": 2, "number": 0, "gender": 0, "case": 2},
|
||||
"euch": {"L": "-PRON-", "person": 2, "number": 0, "gender": 0, "case": 3},
|
||||
"euch": {"L": "-PRON-", "person": 2, "number": 0, "gender": 0, "case": 4},
|
||||
"sie": {"L": "-PRON-", "person": 3, "number": 0, "gender": 0, "case": 1},
|
||||
"ihrer": {"L": "-PRON-", "person": 3, "number": 0, "gender": 0, "case": 2},
|
||||
"ihnen": {"L": "-PRON-", "person": 3, "number": 0, "gender": 0, "case": 3},
|
||||
"sie": {"L": "-PRON-", "person": 3, "number": 0, "gender": 0, "case": 4}
|
||||
},
|
||||
|
||||
"PRP$": {
|
||||
"mein": {"L": "-PRON-", "person": 1, "number": 0, "gender": 0, "case": 1},
|
||||
"meines": {"L": "-PRON-", "person": 1, "number": 0, "gender": 0, "case": 2},
|
||||
"meinem": {"L": "-PRON-", "person": 1, "number": 0, "gender": 0, "case": 3},
|
||||
"meinen": {"L": "-PRON-", "person": 1, "number": 0, "gender": 0, "case": 4},
|
||||
"dein": {"L": "-PRON-", "person": 2, "number": 0, "gender": 0, "case": 1},
|
||||
"deines": {"L": "-PRON-", "person": 2, "number": 0, "gender": 0, "case": 2},
|
||||
"deinem": {"L": "-PRON-", "person": 2, "number": 0, "gender": 0, "case": 3},
|
||||
"deinen": {"L": "-PRON-", "person": 2, "number": 0, "gender": 0, "case": 4},
|
||||
"sein": {"L": "-PRON-", "person": 3, "number": 0, "gender": 1, "case": 1},
|
||||
"seines": {"L": "-PRON-", "person": 3, "number": 0, "gender": 1, "case": 2},
|
||||
"seinem": {"L": "-PRON-", "person": 3, "number": 0, "gender": 1, "case": 3},
|
||||
"seinen": {"L": "-PRON-", "person": 3, "number": 0, "gender": 1, "case": 4},
|
||||
"ihr": {"L": "-PRON-", "person": 3, "number": 0, "gender": 2, "case": 1},
|
||||
"ihrer": {"L": "-PRON-", "person": 3, "number": 0, "gender": 2, "case": 2},
|
||||
"ihrem": {"L": "-PRON-", "person": 3, "number": 0, "gender": 2, "case": 3},
|
||||
"ihren": {"L": "-PRON-", "person": 3, "number": 0, "gender": 2, "case": 4},
|
||||
"sein": {"L": "-PRON-", "person": 3, "number": 0, "gender": 3, "case": 1},
|
||||
"seines": {"L": "-PRON-", "person": 3, "number": 0, "gender": 3, "case": 2},
|
||||
"seinem": {"L": "-PRON-", "person": 3, "number": 0, "gender": 3, "case": 3},
|
||||
"seinen": {"L": "-PRON-", "person": 3, "number": 0, "gender": 3, "case": 4},
|
||||
"unser": {"L": "-PRON-", "person": 1, "number": 0, "gender": 0, "case": 1},
|
||||
"unseres": {"L": "-PRON-", "person": 1, "number": 0, "gender": 0, "case": 2},
|
||||
"unserem": {"L": "-PRON-", "person": 1, "number": 0, "gender": 0, "case": 3},
|
||||
"unseren": {"L": "-PRON-", "person": 1, "number": 0, "gender": 0, "case": 4},
|
||||
"euer": {"L": "-PRON-", "person": 2, "number": 0, "gender": 0, "case": 1},
|
||||
"eures": {"L": "-PRON-", "person": 2, "number": 0, "gender": 0, "case": 2},
|
||||
"eurem": {"L": "-PRON-", "person": 2, "number": 0, "gender": 0, "case": 3},
|
||||
"euren": {"L": "-PRON-", "person": 2, "number": 0, "gender": 0, "case": 4},
|
||||
"ihr": {"L": "-PRON-", "person": 3, "number": 0, "gender": 0, "case": 1},
|
||||
"ihres": {"L": "-PRON-", "person": 3, "number": 0, "gender": 0, "case": 2},
|
||||
"ihrem": {"L": "-PRON-", "person": 3, "number": 0, "gender": 0, "case": 3},
|
||||
"ihren": {"L": "-PRON-", "person": 3, "number": 0, "gender": 0, "case": 4}
|
||||
}
|
||||
}
|
|
@ -1,27 +0,0 @@
|
|||
,
|
||||
"
|
||||
(
|
||||
[
|
||||
{
|
||||
*
|
||||
<
|
||||
>
|
||||
$
|
||||
£
|
||||
„
|
||||
“
|
||||
'
|
||||
``
|
||||
`
|
||||
#
|
||||
US$
|
||||
C$
|
||||
A$
|
||||
a-
|
||||
‘
|
||||
....
|
||||
...
|
||||
‚
|
||||
»
|
||||
_
|
||||
§
|
|
@ -1,3 +0,0 @@
|
|||
Biografie: Ein Spiel ist ein Theaterstück des Schweizer Schriftstellers Max Frisch, das 1967 entstand und am 1. Februar 1968 im Schauspielhaus Zürich uraufgeführt wurde. 1984 legte Frisch eine überarbeitete Neufassung vor. Das von Frisch als Komödie bezeichnete Stück greift eines seiner zentralen Themen auf: die Möglichkeit oder Unmöglichkeit des Menschen, seine Identität zu verändern.
|
||||
|
||||
Mit Biografie: Ein Spiel wandte sich Frisch von der Parabelform seiner Erfolgsstücke Biedermann und die Brandstifter und Andorra ab und postulierte eine „Dramaturgie der Permutation“. Darin sollte nicht, wie im klassischen Theater, Sinn und Schicksal im Mittelpunkt stehen, sondern die Zufälligkeit von Ereignissen und die Möglichkeit ihrer Variation. Dennoch handelt Biografie: Ein Spiel gerade von der Unmöglichkeit seines Protagonisten, seinen Lebenslauf grundlegend zu verändern. Frisch empfand die Wirkung des Stücks im Nachhinein als zu fatalistisch und die Umsetzung seiner theoretischen Absichten als nicht geglückt. Obwohl das Stück 1968 als unpolitisch und nicht zeitgemäß kritisiert wurde und auch später eine geteilte Rezeption erfuhr, gehört es an deutschsprachigen Bühnen zu den häufiger aufgeführten Stücken Frischs.
|
File diff suppressed because it is too large
Load Diff
|
@ -1,73 +0,0 @@
|
|||
,
|
||||
\"
|
||||
\)
|
||||
\]
|
||||
\}
|
||||
\*
|
||||
\!
|
||||
\?
|
||||
%
|
||||
\$
|
||||
>
|
||||
:
|
||||
;
|
||||
'
|
||||
”
|
||||
“
|
||||
«
|
||||
_
|
||||
''
|
||||
's
|
||||
'S
|
||||
’s
|
||||
’S
|
||||
’
|
||||
‘
|
||||
°
|
||||
€
|
||||
\.\.
|
||||
\.\.\.
|
||||
\.\.\.\.
|
||||
(?<=[a-zäöüßÖÄÜ)\]"'´«‘’%\)²“”])\.
|
||||
\-\-
|
||||
´
|
||||
(?<=[0-9])km²
|
||||
(?<=[0-9])m²
|
||||
(?<=[0-9])cm²
|
||||
(?<=[0-9])mm²
|
||||
(?<=[0-9])km³
|
||||
(?<=[0-9])m³
|
||||
(?<=[0-9])cm³
|
||||
(?<=[0-9])mm³
|
||||
(?<=[0-9])ha
|
||||
(?<=[0-9])km
|
||||
(?<=[0-9])m
|
||||
(?<=[0-9])cm
|
||||
(?<=[0-9])mm
|
||||
(?<=[0-9])µm
|
||||
(?<=[0-9])nm
|
||||
(?<=[0-9])yd
|
||||
(?<=[0-9])in
|
||||
(?<=[0-9])ft
|
||||
(?<=[0-9])kg
|
||||
(?<=[0-9])g
|
||||
(?<=[0-9])mg
|
||||
(?<=[0-9])µg
|
||||
(?<=[0-9])t
|
||||
(?<=[0-9])lb
|
||||
(?<=[0-9])oz
|
||||
(?<=[0-9])m/s
|
||||
(?<=[0-9])km/h
|
||||
(?<=[0-9])mph
|
||||
(?<=[0-9])°C
|
||||
(?<=[0-9])°K
|
||||
(?<=[0-9])°F
|
||||
(?<=[0-9])hPa
|
||||
(?<=[0-9])Pa
|
||||
(?<=[0-9])mbar
|
||||
(?<=[0-9])mb
|
||||
(?<=[0-9])T
|
||||
(?<=[0-9])G
|
||||
(?<=[0-9])M
|
||||
(?<=[0-9])K
|
||||
(?<=[0-9])kb
|
|
@ -1,59 +0,0 @@
|
|||
{
|
||||
"$(": {"pos": "PUNCT", "PunctType": "Brck"},
|
||||
"$,": {"pos": "PUNCT", "PunctType": "Comm"},
|
||||
"$.": {"pos": "PUNCT", "PunctType": "Peri"},
|
||||
"ADJA": {"pos": "ADJ"},
|
||||
"ADJD": {"pos": "ADJ", "Variant": "Short"},
|
||||
"ADV": {"pos": "ADV"},
|
||||
"APPO": {"pos": "ADP", "AdpType": "Post"},
|
||||
"APPR": {"pos": "ADP", "AdpType": "Prep"},
|
||||
"APPRART": {"pos": "ADP", "AdpType": "Prep", "PronType": "Art"},
|
||||
"APZR": {"pos": "ADP", "AdpType": "Circ"},
|
||||
"ART": {"pos": "DET", "PronType": "Art"},
|
||||
"CARD": {"pos": "NUM", "NumType": "Card"},
|
||||
"FM": {"pos": "X", "Foreign": "Yes"},
|
||||
"ITJ": {"pos": "INTJ"},
|
||||
"KOKOM": {"pos": "CONJ", "ConjType": "Comp"},
|
||||
"KON": {"pos": "CONJ"},
|
||||
"KOUI": {"pos": "SCONJ"},
|
||||
"KOUS": {"pos": "SCONJ"},
|
||||
"NE": {"pos": "PROPN"},
|
||||
"NNE": {"pos": "PROPN"},
|
||||
"NN": {"pos": "NOUN"},
|
||||
"PAV": {"pos": "ADV", "PronType": "Dem"},
|
||||
"PROAV": {"pos": "ADV", "PronType": "Dem"},
|
||||
"PDAT": {"pos": "DET", "PronType": "Dem"},
|
||||
"PDS": {"pos": "PRON", "PronType": "Dem"},
|
||||
"PIAT": {"pos": "DET", "PronType": "Ind,Neg,Tot"},
|
||||
"PIDAT": {"pos": "DET", "AdjType": "Pdt", "PronType": "Ind,Neg,Tot"},
|
||||
"PIS": {"pos": "PRON", "PronType": "Ind,Neg,Tot"},
|
||||
"PPER": {"pos": "PRON", "PronType": "Prs"},
|
||||
"PPOSAT": {"pos": "DET", "Poss": "Yes", "PronType": "Prs"},
|
||||
"PPOSS": {"pos": "PRON", "Poss": "Yes", "PronType": "Prs"},
|
||||
"PRELAT": {"pos": "DET", "PronType": "Rel"},
|
||||
"PRELS": {"pos": "PRON", "PronType": "Rel"},
|
||||
"PRF": {"pos": "PRON", "PronType": "Prs", "Reflex": "Yes"},
|
||||
"PTKA": {"pos": "PART"},
|
||||
"PTKANT": {"pos": "PART", "PartType": "Res"},
|
||||
"PTKNEG": {"pos": "PART", "Negative": "Neg"},
|
||||
"PTKVZ": {"pos": "PART", "PartType": "Vbp"},
|
||||
"PTKZU": {"pos": "PART", "PartType": "Inf"},
|
||||
"PWAT": {"pos": "DET", "PronType": "Int"},
|
||||
"PWAV": {"pos": "ADV", "PronType": "Int"},
|
||||
"PWS": {"pos": "PRON", "PronType": "Int"},
|
||||
"TRUNC": {"pos": "X", "Hyph": "Yes"},
|
||||
"VAFIN": {"pos": "AUX", "Mood": "Ind", "VerbForm": "Fin"},
|
||||
"VAIMP": {"pos": "AUX", "Mood": "Imp", "VerbForm": "Fin"},
|
||||
"VAINF": {"pos": "AUX", "VerbForm": "Inf"},
|
||||
"VAPP": {"pos": "AUX", "Aspect": "Perf", "VerbForm": "Part"},
|
||||
"VMFIN": {"pos": "VERB", "Mood": "Ind", "VerbForm": "Fin", "VerbType": "Mod"},
|
||||
"VMINF": {"pos": "VERB", "VerbForm": "Inf", "VerbType": "Mod"},
|
||||
"VMPP": {"pos": "VERB", "Aspect": "Perf", "VerbForm": "Part", "VerbType": "Mod"},
|
||||
"VVFIN": {"pos": "VERB", "Mood": "Ind", "VerbForm": "Fin"},
|
||||
"VVIMP": {"pos": "VERB", "Mood": "Imp", "VerbForm": "Fin"},
|
||||
"VVINF": {"pos": "VERB", "VerbForm": "Inf"},
|
||||
"VVIZU": {"pos": "VERB", "VerbForm": "Inf"},
|
||||
"VVPP": {"pos": "VERB", "Aspect": "Perf", "VerbForm": "Part"},
|
||||
"XY": {"pos": "X"},
|
||||
"SP": {"pos": "SPACE"}
|
||||
}
|
|
@ -1,20 +0,0 @@
|
|||
WordNet Release 3.0 This software and database is being provided to you, the
|
||||
LICENSEE, by Princeton University under the following license. By obtaining,
|
||||
using and/or copying this software and database, you agree that you have read,
|
||||
understood, and will comply with these terms and conditions.: Permission to
|
||||
use, copy, modify and distribute this software and database and its
|
||||
documentation for any purpose and without fee or royalty is hereby granted,
|
||||
provided that you agree to comply with the following copyright notice and
|
||||
statements, including the disclaimer, and that the same appear on ALL copies of
|
||||
the software, database and documentation, including modifications that you make for internal use or for distribution. WordNet 3.0 Copyright 2006 by Princeton
|
||||
University. All rights reserved. THIS SOFTWARE AND DATABASE IS PROVIDED "AS IS"
|
||||
AND PRINCETON UNIVERSITY MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
|
||||
IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PRINCETON UNIVERSITY MAKES NO
|
||||
REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
|
||||
PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE, DATABASE OR
|
||||
DOCUMENTATION WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS
|
||||
OR OTHER RIGHTS. The name of Princeton University or Princeton may not be used
|
||||
in advertising or publicity pertaining to distribution of the software and/or
|
||||
database. Title to copyright in this software, database and any associated
|
||||
documentation shall at all times remain with Princeton University and LICENSEE
|
||||
agrees to preserve same.
|
|
@ -1,194 +0,0 @@
|
|||
{
|
||||
"Reddit": [
|
||||
"PRODUCT",
|
||||
{},
|
||||
[
|
||||
[{"lower": "reddit"}]
|
||||
]
|
||||
],
|
||||
"SeptemberElevenAttacks": [
|
||||
"EVENT",
|
||||
{},
|
||||
[
|
||||
[
|
||||
{"orth": "9/11"}
|
||||
],
|
||||
[
|
||||
{"lower": "september"},
|
||||
{"orth": "11"}
|
||||
]
|
||||
]
|
||||
],
|
||||
"Linux": [
|
||||
"PRODUCT",
|
||||
{},
|
||||
[
|
||||
[{"lower": "linux"}]
|
||||
]
|
||||
],
|
||||
"Haskell": [
|
||||
"PRODUCT",
|
||||
{},
|
||||
[
|
||||
[{"lower": "haskell"}]
|
||||
]
|
||||
],
|
||||
"HaskellCurry": [
|
||||
"PERSON",
|
||||
{},
|
||||
[
|
||||
[
|
||||
{"lower": "haskell"},
|
||||
{"lower": "curry"}
|
||||
]
|
||||
]
|
||||
],
|
||||
"Javascript": [
|
||||
"PRODUCT",
|
||||
{},
|
||||
[
|
||||
[{"lower": "javascript"}]
|
||||
]
|
||||
],
|
||||
"CSS": [
|
||||
"PRODUCT",
|
||||
{},
|
||||
[
|
||||
[{"lower": "css"}],
|
||||
[{"lower": "css3"}]
|
||||
]
|
||||
],
|
||||
"displaCy": [
|
||||
"PRODUCT",
|
||||
{},
|
||||
[
|
||||
[{"lower": "displacy"}]
|
||||
]
|
||||
],
|
||||
"spaCy": [
|
||||
"PRODUCT",
|
||||
{},
|
||||
[
|
||||
[{"orth": "spaCy"}]
|
||||
]
|
||||
],
|
||||
|
||||
"HTML": [
|
||||
"PRODUCT",
|
||||
{},
|
||||
[
|
||||
[{"lower": "html"}],
|
||||
[{"lower": "html5"}]
|
||||
]
|
||||
],
|
||||
"Python": [
|
||||
"PRODUCT",
|
||||
{},
|
||||
[
|
||||
[{"orth": "Python"}]
|
||||
]
|
||||
],
|
||||
"Ruby": [
|
||||
"PRODUCT",
|
||||
{},
|
||||
[
|
||||
[{"orth": "Ruby"}]
|
||||
]
|
||||
],
|
||||
"Digg": [
|
||||
"PRODUCT",
|
||||
{},
|
||||
[
|
||||
[{"lower": "digg"}]
|
||||
]
|
||||
],
|
||||
"FoxNews": [
|
||||
"ORG",
|
||||
{},
|
||||
[
|
||||
[{"orth": "Fox"}],
|
||||
[{"orth": "News"}]
|
||||
]
|
||||
],
|
||||
"Google": [
|
||||
"ORG",
|
||||
{},
|
||||
[
|
||||
[{"lower": "google"}]
|
||||
]
|
||||
],
|
||||
"Mac": [
|
||||
"PRODUCT",
|
||||
{},
|
||||
[
|
||||
[{"lower": "mac"}]
|
||||
]
|
||||
],
|
||||
"Wikipedia": [
|
||||
"PRODUCT",
|
||||
{},
|
||||
[
|
||||
[{"lower": "wikipedia"}]
|
||||
]
|
||||
],
|
||||
"Windows": [
|
||||
"PRODUCT",
|
||||
{},
|
||||
[
|
||||
[{"orth": "Windows"}]
|
||||
]
|
||||
],
|
||||
"Dell": [
|
||||
"ORG",
|
||||
{},
|
||||
[
|
||||
[{"lower": "dell"}]
|
||||
]
|
||||
],
|
||||
"Facebook": [
|
||||
"ORG",
|
||||
{},
|
||||
[
|
||||
[{"lower": "facebook"}]
|
||||
]
|
||||
],
|
||||
"Blizzard": [
|
||||
"ORG",
|
||||
{},
|
||||
[
|
||||
[{"orth": "Blizzard"}]
|
||||
]
|
||||
],
|
||||
"Ubuntu": [
|
||||
"ORG",
|
||||
{},
|
||||
[
|
||||
[{"orth": "Ubuntu"}]
|
||||
]
|
||||
],
|
||||
"Youtube": [
|
||||
"PRODUCT",
|
||||
{},
|
||||
[
|
||||
[{"lower": "youtube"}]
|
||||
]
|
||||
],
|
||||
"false_positives": [
|
||||
null,
|
||||
{},
|
||||
[
|
||||
[{"orth": "Shit"}],
|
||||
[{"orth": "Weed"}],
|
||||
[{"orth": "Cool"}],
|
||||
[{"orth": "Btw"}],
|
||||
[{"orth": "Bah"}],
|
||||
[{"orth": "Bullshit"}],
|
||||
[{"orth": "Lol"}],
|
||||
[{"orth": "Yo"}, {"lower": "dawg"}],
|
||||
[{"orth": "Yay"}],
|
||||
[{"orth": "Ahh"}],
|
||||
[{"orth": "Yea"}],
|
||||
[{"orth": "Bah"}]
|
||||
]
|
||||
]
|
||||
}
|
|
@ -1,422 +0,0 @@
|
|||
# -#- coding: utf-8 -*-
|
||||
import json
|
||||
|
||||
contractions = {"n't", "'nt", "not", "'ve", "'d", "'ll", "'s", "'m", "'ma", "'re"}
|
||||
|
||||
# contains the lemmas, parts of speech, number, and tenspect of
|
||||
# potential tokens generated after splitting contractions off
|
||||
token_properties = {
|
||||
|
||||
"ai": {"L": "be", "pos": "VBP", "number": 2},
|
||||
"are": {"L": "be", "pos": "VBP", "number": 2},
|
||||
"ca": {"L": "can", "pos": "MD"},
|
||||
"can": {"L": "can", "pos": "MD"},
|
||||
"could": {"pos": "MD", "L": "could"},
|
||||
"'d": {"L": "would", "pos": "MD"},
|
||||
"did": {"L": "do", "pos": "VBD"},
|
||||
"do": {"L": "do"},
|
||||
"does": {"L": "do", "pos": "VBZ"},
|
||||
"had": {"L": "have", "pos": "VBD"},
|
||||
"has": {"L": "have", "pos": "VBZ"},
|
||||
"have": {"pos": "VB"},
|
||||
"he": {"L": "-PRON-", "pos": "PRP"},
|
||||
"how": {},
|
||||
"i": {"L": "-PRON-", "pos": "PRP"},
|
||||
"is": {"L": "be", "pos": "VBZ"},
|
||||
"it": {"L": "-PRON-", "pos": "PRP"},
|
||||
"'ll": {"L": "will", "pos": "MD"},
|
||||
"'m": {"L": "be", "pos": "VBP", "number": 1, "tenspect": 1},
|
||||
"'ma": {},
|
||||
"might": {},
|
||||
"must": {},
|
||||
"need": {},
|
||||
"not": {"L": "not", "pos": "RB"},
|
||||
"'nt": {"L": "not", "pos": "RB"},
|
||||
"n't": {"L": "not", "pos": "RB"},
|
||||
"'re": {"L": "be", "pos": "VBZ"},
|
||||
"'s": {}, # no POS or lemma for s?
|
||||
"sha": {"L": "shall", "pos": "MD"},
|
||||
"she": {"L": "-PRON-", "pos": "PRP"},
|
||||
"should": {},
|
||||
"that": {},
|
||||
"there": {},
|
||||
"they": {"L": "-PRON-", "pos": "PRP"},
|
||||
"was": {},
|
||||
"we": {"L": "-PRON-", "pos": "PRP"},
|
||||
"were": {},
|
||||
"what": {},
|
||||
"when": {},
|
||||
"where": {},
|
||||
"who": {},
|
||||
"why": {},
|
||||
"wo": {},
|
||||
"would": {},
|
||||
"you": {"L": "-PRON-", "pos": "PRP"},
|
||||
"'ve": {"L": "have", "pos": "VB"}
|
||||
}
|
||||
|
||||
# contains starting tokens with their potential contractions
|
||||
# each potential contraction has a list of exceptions
|
||||
# lower - don't generate the lowercase version
|
||||
# upper - don't generate the uppercase version
|
||||
# contrLower - don't generate the lowercase version with apostrophe (') removed
|
||||
# contrUpper - dont' generate the uppercase version with apostrophe (') removed
|
||||
# for example, we don't want to create the word "hell" or "Hell" from "he" + "'ll" so
|
||||
# we add "contrLower" and "contrUpper" to the exceptions list
|
||||
starting_tokens = {
|
||||
|
||||
"ai": {"n't": []},
|
||||
"are": {"n't": []},
|
||||
"ca": {"n't": []},
|
||||
"can": {"not": []},
|
||||
"could": {"'ve": [], "n't": [], "n't've": []},
|
||||
"did": {"n't": []},
|
||||
"does": {"n't": []},
|
||||
"do": {"n't": []},
|
||||
"had": {"n't": [], "n't've": []},
|
||||
"has": {"n't": []},
|
||||
"have": {"n't": []},
|
||||
"he": {"'d": [], "'d've": [], "'ll": ["contrLower", "contrUpper"], "'s": []},
|
||||
"how": {"'d": [], "'ll": [], "'s": []},
|
||||
"i": {"'d": ["contrLower", "contrUpper"], "'d've": [], "'ll": ["contrLower", "contrUpper"], "'m": [], "'ma": [], "'ve": []},
|
||||
"is": {"n't": []},
|
||||
"it": {"'d": [], "'d've": [], "'ll": [], "'s": ["contrLower", "contrUpper"]},
|
||||
"might": {"n't": [], "n't've": [], "'ve": []},
|
||||
"must": {"n't": [], "'ve": []},
|
||||
"need": {"n't": []},
|
||||
"not": {"'ve": []},
|
||||
"sha": {"n't": []},
|
||||
"she": {"'d": ["contrLower", "contrUpper"], "'d've": [], "'ll": ["contrLower", "contrUpper"], "'s": []},
|
||||
"should": {"'ve": [], "n't": [], "n't've": []},
|
||||
"that": {"'s": []},
|
||||
"there": {"'d": [], "'d've": [], "'s": ["contrLower", "contrUpper"], "'ll": []},
|
||||
"they": {"'d": [], "'d've": [], "'ll": [], "'re": [], "'ve": []},
|
||||
"was": {"n't": []},
|
||||
"we": {"'d": ["contrLower", "contrUpper"], "'d've": [], "'ll": ["contrLower", "contrUpper"], "'re": ["contrLower", "contrUpper"], "'ve": []},
|
||||
"were": {"n't": []},
|
||||
"what": {"'ll": [], "'re": [], "'s": [], "'ve": []},
|
||||
"when": {"'s": []},
|
||||
"where": {"'d": [], "'s": [], "'ve": []},
|
||||
"who": {"'d": [], "'ll": [], "'re": ["contrLower", "contrUpper"], "'s": [], "'ve": []},
|
||||
"why": {"'ll": [], "'re": [], "'s": []},
|
||||
"wo": {"n't": []},
|
||||
"would": {"'ve": [], "n't": [], "n't've": []},
|
||||
"you": {"'d": [], "'d've": [], "'ll": [], "'re": [], "'ve": []}
|
||||
|
||||
}
|
||||
|
||||
# other specials that don't really have contractions
|
||||
# so they are hardcoded
|
||||
hardcoded_specials = {
|
||||
"let's": [{"F": "let"}, {"F": "'s", "L": "us"}],
|
||||
"Let's": [{"F": "Let"}, {"F": "'s", "L": "us"}],
|
||||
|
||||
"'s": [{"F": "'s", "L": "'s"}],
|
||||
|
||||
"'S": [{"F": "'S", "L": "'s"}],
|
||||
u"\u2018s": [{"F": u"\u2018s", "L": "'s"}],
|
||||
u"\u2018S": [{"F": u"\u2018S", "L": "'s"}],
|
||||
|
||||
"'em": [{"F": "'em"}],
|
||||
|
||||
"'ol": [{"F": "'ol"}],
|
||||
|
||||
"vs.": [{"F": "vs."}],
|
||||
|
||||
"Ms.": [{"F": "Ms."}],
|
||||
"Mr.": [{"F": "Mr."}],
|
||||
"Dr.": [{"F": "Dr."}],
|
||||
"Mrs.": [{"F": "Mrs."}],
|
||||
"Messrs.": [{"F": "Messrs."}],
|
||||
"Gov.": [{"F": "Gov."}],
|
||||
"Gen.": [{"F": "Gen."}],
|
||||
|
||||
"Mt.": [{"F": "Mt.", "L": "Mount"}],
|
||||
|
||||
"''": [{"F": "''"}],
|
||||
|
||||
"—": [{"F": "—", "L": "--", "pos": ":"}],
|
||||
|
||||
"Corp.": [{"F": "Corp."}],
|
||||
"Inc.": [{"F": "Inc."}],
|
||||
"Co.": [{"F": "Co."}],
|
||||
"co.": [{"F": "co."}],
|
||||
"Ltd.": [{"F": "Ltd."}],
|
||||
"Bros.": [{"F": "Bros."}],
|
||||
|
||||
"Rep.": [{"F": "Rep."}],
|
||||
"Sen.": [{"F": "Sen."}],
|
||||
"Jr.": [{"F": "Jr."}],
|
||||
"Rev.": [{"F": "Rev."}],
|
||||
"Adm.": [{"F": "Adm."}],
|
||||
"St.": [{"F": "St."}],
|
||||
|
||||
"a.m.": [{"F": "a.m."}],
|
||||
"p.m.": [{"F": "p.m."}],
|
||||
|
||||
"1a.m.": [{"F": "1"}, {"F": "a.m."}],
|
||||
"2a.m.": [{"F": "2"}, {"F": "a.m."}],
|
||||
"3a.m.": [{"F": "3"}, {"F": "a.m."}],
|
||||
"4a.m.": [{"F": "4"}, {"F": "a.m."}],
|
||||
"5a.m.": [{"F": "5"}, {"F": "a.m."}],
|
||||
"6a.m.": [{"F": "6"}, {"F": "a.m."}],
|
||||
"7a.m.": [{"F": "7"}, {"F": "a.m."}],
|
||||
"8a.m.": [{"F": "8"}, {"F": "a.m."}],
|
||||
"9a.m.": [{"F": "9"}, {"F": "a.m."}],
|
||||
"10a.m.": [{"F": "10"}, {"F": "a.m."}],
|
||||
"11a.m.": [{"F": "11"}, {"F": "a.m."}],
|
||||
"12a.m.": [{"F": "12"}, {"F": "a.m."}],
|
||||
"1am": [{"F": "1"}, {"F": "am", "L": "a.m."}],
|
||||
"2am": [{"F": "2"}, {"F": "am", "L": "a.m."}],
|
||||
"3am": [{"F": "3"}, {"F": "am", "L": "a.m."}],
|
||||
"4am": [{"F": "4"}, {"F": "am", "L": "a.m."}],
|
||||
"5am": [{"F": "5"}, {"F": "am", "L": "a.m."}],
|
||||
"6am": [{"F": "6"}, {"F": "am", "L": "a.m."}],
|
||||
"7am": [{"F": "7"}, {"F": "am", "L": "a.m."}],
|
||||
"8am": [{"F": "8"}, {"F": "am", "L": "a.m."}],
|
||||
"9am": [{"F": "9"}, {"F": "am", "L": "a.m."}],
|
||||
"10am": [{"F": "10"}, {"F": "am", "L": "a.m."}],
|
||||
"11am": [{"F": "11"}, {"F": "am", "L": "a.m."}],
|
||||
"12am": [{"F": "12"}, {"F": "am", "L": "a.m."}],
|
||||
|
||||
|
||||
"p.m.": [{"F": "p.m."}],
|
||||
"1p.m.": [{"F": "1"}, {"F": "p.m."}],
|
||||
"2p.m.": [{"F": "2"}, {"F": "p.m."}],
|
||||
"3p.m.": [{"F": "3"}, {"F": "p.m."}],
|
||||
"4p.m.": [{"F": "4"}, {"F": "p.m."}],
|
||||
"5p.m.": [{"F": "5"}, {"F": "p.m."}],
|
||||
"6p.m.": [{"F": "6"}, {"F": "p.m."}],
|
||||
"7p.m.": [{"F": "7"}, {"F": "p.m."}],
|
||||
"8p.m.": [{"F": "8"}, {"F": "p.m."}],
|
||||
"9p.m.": [{"F": "9"}, {"F": "p.m."}],
|
||||
"10p.m.": [{"F": "10"}, {"F": "p.m."}],
|
||||
"11p.m.": [{"F": "11"}, {"F": "p.m."}],
|
||||
"12p.m.": [{"F": "12"}, {"F": "p.m."}],
|
||||
"1pm": [{"F": "1"}, {"F": "pm", "L": "p.m."}],
|
||||
"2pm": [{"F": "2"}, {"F": "pm", "L": "p.m."}],
|
||||
"3pm": [{"F": "3"}, {"F": "pm", "L": "p.m."}],
|
||||
"4pm": [{"F": "4"}, {"F": "pm", "L": "p.m."}],
|
||||
"5pm": [{"F": "5"}, {"F": "pm", "L": "p.m."}],
|
||||
"6pm": [{"F": "6"}, {"F": "pm", "L": "p.m."}],
|
||||
"7pm": [{"F": "7"}, {"F": "pm", "L": "p.m."}],
|
||||
"8pm": [{"F": "8"}, {"F": "pm", "L": "p.m."}],
|
||||
"9pm": [{"F": "9"}, {"F": "pm", "L": "p.m."}],
|
||||
"10pm": [{"F": "10"}, {"F": "pm", "L": "p.m."}],
|
||||
"11pm": [{"F": "11"}, {"F": "pm", "L": "p.m."}],
|
||||
"12pm": [{"F": "12"}, {"F": "pm", "L": "p.m."}],
|
||||
|
||||
"Jan.": [{"F": "Jan."}],
|
||||
"Feb.": [{"F": "Feb."}],
|
||||
"Mar.": [{"F": "Mar."}],
|
||||
"Apr.": [{"F": "Apr."}],
|
||||
"May.": [{"F": "May."}],
|
||||
"Jun.": [{"F": "Jun."}],
|
||||
"Jul.": [{"F": "Jul."}],
|
||||
"Aug.": [{"F": "Aug."}],
|
||||
"Sep.": [{"F": "Sep."}],
|
||||
"Sept.": [{"F": "Sept."}],
|
||||
"Oct.": [{"F": "Oct."}],
|
||||
"Nov.": [{"F": "Nov."}],
|
||||
"Dec.": [{"F": "Dec."}],
|
||||
|
||||
"Ala.": [{"F": "Ala."}],
|
||||
"Ariz.": [{"F": "Ariz."}],
|
||||
"Ark.": [{"F": "Ark."}],
|
||||
"Calif.": [{"F": "Calif."}],
|
||||
"Colo.": [{"F": "Colo."}],
|
||||
"Conn.": [{"F": "Conn."}],
|
||||
"Del.": [{"F": "Del."}],
|
||||
"D.C.": [{"F": "D.C."}],
|
||||
"Fla.": [{"F": "Fla."}],
|
||||
"Ga.": [{"F": "Ga."}],
|
||||
"Ill.": [{"F": "Ill."}],
|
||||
"Ind.": [{"F": "Ind."}],
|
||||
"Kans.": [{"F": "Kans."}],
|
||||
"Kan.": [{"F": "Kan."}],
|
||||
"Ky.": [{"F": "Ky."}],
|
||||
"La.": [{"F": "La."}],
|
||||
"Md.": [{"F": "Md."}],
|
||||
"Mass.": [{"F": "Mass."}],
|
||||
"Mich.": [{"F": "Mich."}],
|
||||
"Minn.": [{"F": "Minn."}],
|
||||
"Miss.": [{"F": "Miss."}],
|
||||
"Mo.": [{"F": "Mo."}],
|
||||
"Mont.": [{"F": "Mont."}],
|
||||
"Nebr.": [{"F": "Nebr."}],
|
||||
"Neb.": [{"F": "Neb."}],
|
||||
"Nev.": [{"F": "Nev."}],
|
||||
"N.H.": [{"F": "N.H."}],
|
||||
"N.J.": [{"F": "N.J."}],
|
||||
"N.M.": [{"F": "N.M."}],
|
||||
"N.Y.": [{"F": "N.Y."}],
|
||||
"N.C.": [{"F": "N.C."}],
|
||||
"N.D.": [{"F": "N.D."}],
|
||||
"Okla.": [{"F": "Okla."}],
|
||||
"Ore.": [{"F": "Ore."}],
|
||||
"Pa.": [{"F": "Pa."}],
|
||||
"Tenn.": [{"F": "Tenn."}],
|
||||
"Va.": [{"F": "Va."}],
|
||||
"Wash.": [{"F": "Wash."}],
|
||||
"Wis.": [{"F": "Wis."}],
|
||||
|
||||
":)": [{"F": ":)"}],
|
||||
"<3": [{"F": "<3"}],
|
||||
";)": [{"F": ";)"}],
|
||||
"(:": [{"F": "(:"}],
|
||||
":(": [{"F": ":("}],
|
||||
"-_-": [{"F": "-_-"}],
|
||||
"=)": [{"F": "=)"}],
|
||||
":/": [{"F": ":/"}],
|
||||
":>": [{"F": ":>"}],
|
||||
";-)": [{"F": ";-)"}],
|
||||
":Y": [{"F": ":Y"}],
|
||||
":P": [{"F": ":P"}],
|
||||
":-P": [{"F": ":-P"}],
|
||||
":3": [{"F": ":3"}],
|
||||
"=3": [{"F": "=3"}],
|
||||
"xD": [{"F": "xD"}],
|
||||
"^_^": [{"F": "^_^"}],
|
||||
"=]": [{"F": "=]"}],
|
||||
"=D": [{"F": "=D"}],
|
||||
"<333": [{"F": "<333"}],
|
||||
":))": [{"F": ":))"}],
|
||||
":0": [{"F": ":0"}],
|
||||
"-__-": [{"F": "-__-"}],
|
||||
"xDD": [{"F": "xDD"}],
|
||||
"o_o": [{"F": "o_o"}],
|
||||
"o_O": [{"F": "o_O"}],
|
||||
"V_V": [{"F": "V_V"}],
|
||||
"=[[": [{"F": "=[["}],
|
||||
"<33": [{"F": "<33"}],
|
||||
";p": [{"F": ";p"}],
|
||||
";D": [{"F": ";D"}],
|
||||
";-p": [{"F": ";-p"}],
|
||||
";(": [{"F": ";("}],
|
||||
":p": [{"F": ":p"}],
|
||||
":]": [{"F": ":]"}],
|
||||
":O": [{"F": ":O"}],
|
||||
":-/": [{"F": ":-/"}],
|
||||
":-)": [{"F": ":-)"}],
|
||||
":(((": [{"F": ":((("}],
|
||||
":((": [{"F": ":(("}],
|
||||
":')": [{"F": ":')"}],
|
||||
"(^_^)": [{"F": "(^_^)"}],
|
||||
"(=": [{"F": "(="}],
|
||||
"o.O": [{"F": "o.O"}],
|
||||
"\")": [{"F": "\")"}],
|
||||
"a.": [{"F": "a."}],
|
||||
"b.": [{"F": "b."}],
|
||||
"c.": [{"F": "c."}],
|
||||
"d.": [{"F": "d."}],
|
||||
"e.": [{"F": "e."}],
|
||||
"f.": [{"F": "f."}],
|
||||
"g.": [{"F": "g."}],
|
||||
"h.": [{"F": "h."}],
|
||||
"i.": [{"F": "i."}],
|
||||
"j.": [{"F": "j."}],
|
||||
"k.": [{"F": "k."}],
|
||||
"l.": [{"F": "l."}],
|
||||
"m.": [{"F": "m."}],
|
||||
"n.": [{"F": "n."}],
|
||||
"o.": [{"F": "o."}],
|
||||
"p.": [{"F": "p."}],
|
||||
"q.": [{"F": "q."}],
|
||||
"r.": [{"F": "r."}],
|
||||
"s.": [{"F": "s."}],
|
||||
"t.": [{"F": "t."}],
|
||||
"u.": [{"F": "u."}],
|
||||
"v.": [{"F": "v."}],
|
||||
"w.": [{"F": "w."}],
|
||||
"x.": [{"F": "x."}],
|
||||
"y.": [{"F": "y."}],
|
||||
"z.": [{"F": "z."}],
|
||||
|
||||
"i.e.": [{"F": "i.e."}],
|
||||
"I.e.": [{"F": "I.e."}],
|
||||
"I.E.": [{"F": "I.E."}],
|
||||
"e.g.": [{"F": "e.g."}],
|
||||
"E.g.": [{"F": "E.g."}],
|
||||
"E.G.": [{"F": "E.G."}],
|
||||
"\n": [{"F": "\n", "pos": "SP"}],
|
||||
"\t": [{"F": "\t", "pos": "SP"}],
|
||||
" ": [{"F": " ", "pos": "SP"}],
|
||||
u"\u00a0": [{"F": u"\u00a0", "pos": "SP", "L": " "}]
|
||||
|
||||
}
|
||||
|
||||
def get_double_contractions(ending):
|
||||
endings = []
|
||||
|
||||
ends_with_contraction = any([ending.endswith(contraction) for contraction in contractions])
|
||||
|
||||
while ends_with_contraction:
|
||||
for contraction in contractions:
|
||||
if ending.endswith(contraction):
|
||||
endings.append(contraction)
|
||||
ending = ending.rstrip(contraction)
|
||||
ends_with_contraction = any([ending.endswith(contraction) for contraction in contractions])
|
||||
|
||||
endings.reverse() # reverse because the last ending is put in the list first
|
||||
return endings
|
||||
|
||||
def get_token_properties(token, capitalize=False, remove_contractions=False):
|
||||
props = dict(token_properties.get(token)) # ensure we copy the dict so we can add the "F" prop
|
||||
if capitalize:
|
||||
token = token.capitalize()
|
||||
if remove_contractions:
|
||||
token = token.replace("'", "")
|
||||
|
||||
props["F"] = token
|
||||
return props
|
||||
|
||||
def create_entry(token, endings, capitalize=False, remove_contractions=False):
|
||||
|
||||
properties = []
|
||||
properties.append(get_token_properties(token, capitalize=capitalize, remove_contractions=remove_contractions))
|
||||
for e in endings:
|
||||
properties.append(get_token_properties(e, remove_contractions=remove_contractions))
|
||||
return properties
|
||||
|
||||
def generate_specials():
|
||||
|
||||
specials = {}
|
||||
|
||||
for token in starting_tokens:
|
||||
possible_endings = starting_tokens[token]
|
||||
for ending in possible_endings:
|
||||
|
||||
endings = []
|
||||
if ending.count("'") > 1:
|
||||
endings.extend(get_double_contractions(ending))
|
||||
else:
|
||||
endings.append(ending)
|
||||
|
||||
exceptions = possible_endings[ending]
|
||||
|
||||
if "lower" not in exceptions:
|
||||
special = token + ending
|
||||
specials[special] = create_entry(token, endings)
|
||||
|
||||
if "upper" not in exceptions:
|
||||
special = token.capitalize() + ending
|
||||
specials[special] = create_entry(token, endings, capitalize=True)
|
||||
|
||||
if "contrLower" not in exceptions:
|
||||
special = token + ending.replace("'", "")
|
||||
specials[special] = create_entry(token, endings, remove_contractions=True)
|
||||
|
||||
if "contrUpper" not in exceptions:
|
||||
special = token.capitalize() + ending.replace("'", "")
|
||||
specials[special] = create_entry(token, endings, capitalize=True, remove_contractions=True)
|
||||
|
||||
# add in hardcoded specials
|
||||
specials = dict(specials, **hardcoded_specials)
|
||||
|
||||
return specials
|
||||
|
||||
if __name__ == "__main__":
|
||||
specials = generate_specials()
|
||||
with open("specials.json", "w") as file_:
|
||||
file_.write(json.dumps(specials, indent=2))
|
||||
|
|
@ -1,6 +0,0 @@
|
|||
\.\.\.+
|
||||
(?<=[a-z])\.(?=[A-Z])
|
||||
(?<=[a-zA-Z])-(?=[a-zA-z])
|
||||
(?<=[a-zA-Z])--(?=[a-zA-z])
|
||||
(?<=[0-9])-(?=[0-9])
|
||||
(?<=[A-Za-z]),(?=[A-Za-z])
|
|
@ -1,38 +0,0 @@
|
|||
{
|
||||
"noun": [
|
||||
["s", ""],
|
||||
["ses", "s"],
|
||||
["ves", "f"],
|
||||
["xes", "x"],
|
||||
["zes", "z"],
|
||||
["ches", "ch"],
|
||||
["shes", "sh"],
|
||||
["men", "man"],
|
||||
["ies", "y"]
|
||||
],
|
||||
|
||||
"verb": [
|
||||
["s", ""],
|
||||
["ies", "y"],
|
||||
["es", "e"],
|
||||
["es", ""],
|
||||
["ed", "e"],
|
||||
["ed", ""],
|
||||
["ing", "e"],
|
||||
["ing", ""]
|
||||
],
|
||||
|
||||
"adj": [
|
||||
["er", ""],
|
||||
["est", ""],
|
||||
["er", "e"],
|
||||
["est", "e"]
|
||||
],
|
||||
|
||||
"punct": [
|
||||
["“", "\""],
|
||||
["”", "\""],
|
||||
["\u2018", "'"],
|
||||
["\u2019", "'"]
|
||||
]
|
||||
}
|
|
@ -1,59 +0,0 @@
|
|||
{
|
||||
"PRP": {
|
||||
"I": {"L": "-PRON-", "PronType": "Prs", "Person": "One", "Number": "Sing", "Case": "Nom"},
|
||||
"me": {"L": "-PRON-", "PronType": "Prs", "Person": "One", "Number": "Sing", "Case": "Acc"},
|
||||
"you": {"L": "-PRON-", "PronType": "Prs", "Person": "Two"},
|
||||
"he": {"L": "-PRON-", "PronType": "Prs", "Person": "Three", "Number": "Sing", "Gender": "Masc", "Case": "Nom"},
|
||||
"him": {"L": "-PRON-", "PronType": "Prs", "Person": "Three", "Number": "Sing", "Gender": "Masc", "Case": "Acc"},
|
||||
"she": {"L": "-PRON-", "PronType": "Prs", "Person": "Three", "Number": "Sing", "Gender": "Fem", "Case": "Nom"},
|
||||
"her": {"L": "-PRON-", "PronType": "Prs", "Person": "Three", "Number": "Sing", "Gender": "Fem", "Case": "Acc"},
|
||||
"it": {"L": "-PRON-", "PronType": "Prs", "Person": "Three", "Number": "Sing", "Gender": "Neut"},
|
||||
"we": {"L": "-PRON-", "PronType": "Prs", "Person": "One", "Number": "Plur", "Case": "Nom"},
|
||||
"us": {"L": "-PRON-", "PronType": "Prs", "Person": "One", "Number": "Plur", "Case": "Acc"},
|
||||
"they": {"L": "-PRON-", "PronType": "Prs", "Person": "Three", "Number": "Plur", "Case": "Nom"},
|
||||
"them": {"L": "-PRON-", "PronType": "Prs", "Person": "Three", "Number": "Plur", "Case": "Acc"},
|
||||
|
||||
"mine": {"L": "-PRON-", "PronType": "Prs", "Person": "One", "Number": "Sing", "Poss": "Yes", "Reflex": "Yes"},
|
||||
"yours": {"L": "-PRON-", "PronType": "Prs", "Person": "Two", "Poss": "Yes", "Reflex": "Yes"},
|
||||
"his": {"L": "-PRON-", "PronType": "Prs", "Person": "Three", "Number": "Sing", "Gender": "Masc", "Poss": "Yes", "Reflex": "Yes"},
|
||||
"hers": {"L": "-PRON-", "PronType": "Prs", "Person": "Three", "Number": "Sing", "Gender": "Fem", "Poss": "Yes", "Reflex": "Yes"},
|
||||
"its": {"L": "-PRON-", "PronType": "Prs", "Person": "Three", "Number": "Sing", "Gender": "Neut", "Poss": "Yes", "Reflex": "Yes"},
|
||||
"ours": {"L": "-PRON-", "PronType": "Prs", "Person": "One", "Number": "Plur", "Poss": "Yes", "Reflex": "Yes"},
|
||||
"yours": {"L": "-PRON-", "PronType": "Prs", "Person": "Two", "Number": "Plur", "Poss": "Yes", "Reflex": "Yes"},
|
||||
"theirs": {"L": "-PRON-", "PronType": "Prs", "Person": "Three", "Number": "Plur", "Poss": "Yes", "Reflex": "Yes"},
|
||||
|
||||
"myself": {"L": "-PRON-", "PronType": "Prs", "Person": "One", "Number": "Sing", "Case": "Acc", "Reflex": "Yes"},
|
||||
"yourself": {"L": "-PRON-", "PronType": "Prs", "Person": "Two", "Case": "Acc", "Reflex": "Yes"},
|
||||
"himself": {"L": "-PRON-", "PronType": "Prs", "Person": "Three", "Number": "Sing", "Case": "Acc", "Gender": "Masc", "Reflex": "Yes"},
|
||||
"herself": {"L": "-PRON-", "PronType": "Prs", "Person": "Three", "Number": "Sing", "Case": "Acc", "Gender": "Fem", "Reflex": "Yes"},
|
||||
"itself": {"L": "-PRON-", "PronType": "Prs", "Person": "Three", "Number": "Sing", "Case": "Acc", "Gender": "Neut", "Reflex": "Yes"},
|
||||
"themself": {"L": "-PRON-", "PronType": "Prs", "Person": "Three", "Number": "Sing", "Case": "Acc", "Reflex": "Yes"},
|
||||
"ourselves": {"L": "-PRON-", "PronType": "Prs", "Person": "One", "Number": "Plur", "Case": "Acc", "Reflex": "Yes"},
|
||||
"yourselves": {"L": "-PRON-", "PronType": "Prs", "Person": "Two", "Case": "Acc", "Reflex": "Yes"},
|
||||
"themselves": {"L": "-PRON-", "PronType": "Prs", "Person": "Three", "Number": "Plur", "Case": "Acc", "Reflex": "Yes"}
|
||||
|
||||
},
|
||||
|
||||
"PRP$": {
|
||||
"my": {"L": "-PRON-", "Person": "One", "Number": "Sing", "PronType": "Prs", "Poss": "Yes"},
|
||||
"your": {"L": "-PRON-", "Person": "Two", "PronType": "Prs", "Poss": "Yes"},
|
||||
"his": {"L": "-PRON-", "Person": "Three", "Number": "Sing", "Gender": "Masc", "PronType": "Prs", "Poss": "Yes"},
|
||||
"her": {"L": "-PRON-", "Person": "Three", "Number": "Sing", "Gender": "Fem", "PronType": "Prs", "Poss": "Yes"},
|
||||
"its": {"L": "-PRON-", "Person": "Three", "Number": "Sing", "Gender": "Neut", "PronType": "Prs", "Poss": "Yes"},
|
||||
"our": {"L": "-PRON-", "Person": "One", "Number": "Plur", "PronType": "Prs", "Poss": "Yes"},
|
||||
"their": {"L": "-PRON-", "Person": "Three", "Number": "Plur", "PronType": "Prs", "Poss": "Yes"}
|
||||
},
|
||||
|
||||
"VBZ": {
|
||||
"am": {"L": "be", "VerbForm": "Fin", "Person": "One", "Tense": "Pres", "Mood": "Ind"},
|
||||
"are": {"L": "be", "VerbForm": "Fin", "Person": "Two", "Tense": "Pres", "Mood": "Ind"},
|
||||
"is": {"L": "be", "VerbForm": "Fin", "Person": "Three", "Tense": "Pres", "Mood": "Ind"},
|
||||
},
|
||||
"VBP": {
|
||||
"are": {"L": "be", "VerbForm": "Fin", "Tense": "Pres", "Mood": "Ind"}
|
||||
},
|
||||
"VBD": {
|
||||
"was": {"L": "be", "VerbForm": "Fin", "Tense": "Past", "Number": "Sing"},
|
||||
"were": {"L": "be", "VerbForm": "Fin", "Tense": "Past", "Number": "Plur"}
|
||||
}
|
||||
}
|
|
@ -1,21 +0,0 @@
|
|||
,
|
||||
"
|
||||
(
|
||||
[
|
||||
{
|
||||
*
|
||||
<
|
||||
$
|
||||
£
|
||||
“
|
||||
'
|
||||
``
|
||||
`
|
||||
#
|
||||
US$
|
||||
C$
|
||||
A$
|
||||
a-
|
||||
‘
|
||||
....
|
||||
...
|
File diff suppressed because it is too large
Load Diff
|
@ -1,26 +0,0 @@
|
|||
,
|
||||
\"
|
||||
\)
|
||||
\]
|
||||
\}
|
||||
\*
|
||||
\!
|
||||
\?
|
||||
%
|
||||
\$
|
||||
>
|
||||
:
|
||||
;
|
||||
'
|
||||
”
|
||||
''
|
||||
's
|
||||
'S
|
||||
’s
|
||||
’S
|
||||
’
|
||||
\.\.
|
||||
\.\.\.
|
||||
\.\.\.\.
|
||||
(?<=[a-z0-9)\]"'%\)])\.
|
||||
(?<=[0-9])km
|
|
@ -1,60 +0,0 @@
|
|||
{
|
||||
".": {"pos": "punct", "puncttype": "peri"},
|
||||
",": {"pos": "punct", "puncttype": "comm"},
|
||||
"-LRB-": {"pos": "punct", "puncttype": "brck", "punctside": "ini"},
|
||||
"-RRB-": {"pos": "punct", "puncttype": "brck", "punctside": "fin"},
|
||||
"``": {"pos": "punct", "puncttype": "quot", "punctside": "ini"},
|
||||
"\"\"": {"pos": "punct", "puncttype": "quot", "punctside": "fin"},
|
||||
"''": {"pos": "punct", "puncttype": "quot", "punctside": "fin"},
|
||||
":": {"pos": "punct"},
|
||||
"$": {"pos": "sym", "other": {"symtype": "currency"}},
|
||||
"#": {"pos": "sym", "other": {"symtype": "numbersign"}},
|
||||
"AFX": {"pos": "adj", "hyph": "hyph"},
|
||||
"CC": {"pos": "conj", "conjtype": "coor"},
|
||||
"CD": {"pos": "num", "numtype": "card"},
|
||||
"DT": {"pos": "det"},
|
||||
"EX": {"pos": "adv", "advtype": "ex"},
|
||||
"FW": {"pos": "x", "foreign": "foreign"},
|
||||
"HYPH": {"pos": "punct", "puncttype": "dash"},
|
||||
"IN": {"pos": "adp"},
|
||||
"JJ": {"pos": "adj", "degree": "pos"},
|
||||
"JJR": {"pos": "adj", "degree": "comp"},
|
||||
"JJS": {"pos": "adj", "degree": "sup"},
|
||||
"LS": {"pos": "punct", "numtype": "ord"},
|
||||
"MD": {"pos": "verb", "verbtype": "mod"},
|
||||
"NIL": {"pos": ""},
|
||||
"NN": {"pos": "noun", "number": "sing"},
|
||||
"NNP": {"pos": "propn", "nountype": "prop", "number": "sing"},
|
||||
"NNPS": {"pos": "propn", "nountype": "prop", "number": "plur"},
|
||||
"NNS": {"pos": "noun", "number": "plur"},
|
||||
"PDT": {"pos": "adj", "adjtype": "pdt", "prontype": "prn"},
|
||||
"POS": {"pos": "part", "poss": "poss"},
|
||||
"PRP": {"pos": "pron", "prontype": "prs"},
|
||||
"PRP$": {"pos": "adj", "prontype": "prs", "poss": "poss"},
|
||||
"RB": {"pos": "adv", "degree": "pos"},
|
||||
"RBR": {"pos": "adv", "degree": "comp"},
|
||||
"RBS": {"pos": "adv", "degree": "sup"},
|
||||
"RP": {"pos": "part"},
|
||||
"SYM": {"pos": "sym"},
|
||||
"TO": {"pos": "part", "parttype": "inf", "verbform": "inf"},
|
||||
"UH": {"pos": "intJ"},
|
||||
"VB": {"pos": "verb", "verbform": "inf"},
|
||||
"VBD": {"pos": "verb", "verbform": "fin", "tense": "past"},
|
||||
"VBG": {"pos": "verb", "verbform": "part", "tense": "pres", "aspect": "prog"},
|
||||
"VBN": {"pos": "verb", "verbform": "part", "tense": "past", "aspect": "perf"},
|
||||
"VBP": {"pos": "verb", "verbform": "fin", "tense": "pres"},
|
||||
"VBZ": {"pos": "verb", "verbform": "fin", "tense": "pres", "number": "sing", "person": 3},
|
||||
"WDT": {"pos": "adj", "prontype": "int|rel"},
|
||||
"WP": {"pos": "noun", "prontype": "int|rel"},
|
||||
"WP$": {"pos": "adj", "poss": "poss", "prontype": "int|rel"},
|
||||
"WRB": {"pos": "adv", "prontype": "int|rel"},
|
||||
"SP": {"pos": "space"},
|
||||
"ADD": {"pos": "x"},
|
||||
"NFP": {"pos": "punct"},
|
||||
"GW": {"pos": "x"},
|
||||
"AFX": {"pos": "x"},
|
||||
"HYPH": {"pos": "punct"},
|
||||
"XX": {"pos": "x"},
|
||||
"BES": {"pos": "verb"},
|
||||
"HVS": {"pos": "verb"}
|
||||
}
|
|
@ -1,3 +0,0 @@
|
|||
\.\.\.
|
||||
(?<=[a-z])\.(?=[A-Z])
|
||||
(?<=[a-zA-Z])-(?=[a-zA-z])
|
|
@ -1 +0,0 @@
|
|||
{}
|
|
@ -1,21 +0,0 @@
|
|||
,
|
||||
"
|
||||
(
|
||||
[
|
||||
{
|
||||
*
|
||||
<
|
||||
$
|
||||
£
|
||||
“
|
||||
'
|
||||
``
|
||||
`
|
||||
#
|
||||
US$
|
||||
C$
|
||||
A$
|
||||
a-
|
||||
‘
|
||||
....
|
||||
...
|
|
@ -1,3 +0,0 @@
|
|||
Biografie: Ein Spiel ist ein Theaterstück des Schweizer Schriftstellers Max Frisch, das 1967 entstand und am 1. Februar 1968 im Schauspielhaus Zürich uraufgeführt wurde. 1984 legte Frisch eine überarbeitete Neufassung vor. Das von Frisch als Komödie bezeichnete Stück greift eines seiner zentralen Themen auf: die Möglichkeit oder Unmöglichkeit des Menschen, seine Identität zu verändern.
|
||||
|
||||
Mit Biografie: Ein Spiel wandte sich Frisch von der Parabelform seiner Erfolgsstücke Biedermann und die Brandstifter und Andorra ab und postulierte eine „Dramaturgie der Permutation“. Darin sollte nicht, wie im klassischen Theater, Sinn und Schicksal im Mittelpunkt stehen, sondern die Zufälligkeit von Ereignissen und die Möglichkeit ihrer Variation. Dennoch handelt Biografie: Ein Spiel gerade von der Unmöglichkeit seines Protagonisten, seinen Lebenslauf grundlegend zu verändern. Frisch empfand die Wirkung des Stücks im Nachhinein als zu fatalistisch und die Umsetzung seiner theoretischen Absichten als nicht geglückt. Obwohl das Stück 1968 als unpolitisch und nicht zeitgemäß kritisiert wurde und auch später eine geteilte Rezeption erfuhr, gehört es an deutschsprachigen Bühnen zu den häufiger aufgeführten Stücken Frischs.
|
|
@ -1,149 +0,0 @@
|
|||
{
|
||||
"a.m.": [{"F": "a.m."}],
|
||||
"p.m.": [{"F": "p.m."}],
|
||||
|
||||
"1a.m.": [{"F": "1"}, {"F": "a.m."}],
|
||||
"2a.m.": [{"F": "2"}, {"F": "a.m."}],
|
||||
"3a.m.": [{"F": "3"}, {"F": "a.m."}],
|
||||
"4a.m.": [{"F": "4"}, {"F": "a.m."}],
|
||||
"5a.m.": [{"F": "5"}, {"F": "a.m."}],
|
||||
"6a.m.": [{"F": "6"}, {"F": "a.m."}],
|
||||
"7a.m.": [{"F": "7"}, {"F": "a.m."}],
|
||||
"8a.m.": [{"F": "8"}, {"F": "a.m."}],
|
||||
"9a.m.": [{"F": "9"}, {"F": "a.m."}],
|
||||
"10a.m.": [{"F": "10"}, {"F": "a.m."}],
|
||||
"11a.m.": [{"F": "11"}, {"F": "a.m."}],
|
||||
"12a.m.": [{"F": "12"}, {"F": "a.m."}],
|
||||
"1am": [{"F": "1"}, {"F": "am", "L": "a.m."}],
|
||||
"2am": [{"F": "2"}, {"F": "am", "L": "a.m."}],
|
||||
"3am": [{"F": "3"}, {"F": "am", "L": "a.m."}],
|
||||
"4am": [{"F": "4"}, {"F": "am", "L": "a.m."}],
|
||||
"5am": [{"F": "5"}, {"F": "am", "L": "a.m."}],
|
||||
"6am": [{"F": "6"}, {"F": "am", "L": "a.m."}],
|
||||
"7am": [{"F": "7"}, {"F": "am", "L": "a.m."}],
|
||||
"8am": [{"F": "8"}, {"F": "am", "L": "a.m."}],
|
||||
"9am": [{"F": "9"}, {"F": "am", "L": "a.m."}],
|
||||
"10am": [{"F": "10"}, {"F": "am", "L": "a.m."}],
|
||||
"11am": [{"F": "11"}, {"F": "am", "L": "a.m."}],
|
||||
"12am": [{"F": "12"}, {"F": "am", "L": "a.m."}],
|
||||
|
||||
|
||||
"1p.m.": [{"F": "1"}, {"F": "p.m."}],
|
||||
"2p.m.": [{"F": "2"}, {"F": "p.m."}],
|
||||
"3p.m.": [{"F": "3"}, {"F": "p.m."}],
|
||||
"4p.m.": [{"F": "4"}, {"F": "p.m."}],
|
||||
"5p.m.": [{"F": "5"}, {"F": "p.m."}],
|
||||
"6p.m.": [{"F": "6"}, {"F": "p.m."}],
|
||||
"7p.m.": [{"F": "7"}, {"F": "p.m."}],
|
||||
"8p.m.": [{"F": "8"}, {"F": "p.m."}],
|
||||
"9p.m.": [{"F": "9"}, {"F": "p.m."}],
|
||||
"10p.m.": [{"F": "10"}, {"F": "p.m."}],
|
||||
"11p.m.": [{"F": "11"}, {"F": "p.m."}],
|
||||
"12p.m.": [{"F": "12"}, {"F": "p.m."}],
|
||||
"1pm": [{"F": "1"}, {"F": "pm", "L": "p.m."}],
|
||||
"2pm": [{"F": "2"}, {"F": "pm", "L": "p.m."}],
|
||||
"3pm": [{"F": "3"}, {"F": "pm", "L": "p.m."}],
|
||||
"4pm": [{"F": "4"}, {"F": "pm", "L": "p.m."}],
|
||||
"5pm": [{"F": "5"}, {"F": "pm", "L": "p.m."}],
|
||||
"6pm": [{"F": "6"}, {"F": "pm", "L": "p.m."}],
|
||||
"7pm": [{"F": "7"}, {"F": "pm", "L": "p.m."}],
|
||||
"8pm": [{"F": "8"}, {"F": "pm", "L": "p.m."}],
|
||||
"9pm": [{"F": "9"}, {"F": "pm", "L": "p.m."}],
|
||||
"10pm": [{"F": "10"}, {"F": "pm", "L": "p.m."}],
|
||||
"11pm": [{"F": "11"}, {"F": "pm", "L": "p.m."}],
|
||||
"12pm": [{"F": "12"}, {"F": "pm", "L": "p.m."}],
|
||||
|
||||
"Jan.": [{"F": "Jan.", "L": "Januar"}],
|
||||
"Feb.": [{"F": "Feb.", "L": "Februar"}],
|
||||
"Mär.": [{"F": "Mär.", "L": "März"}],
|
||||
"Apr.": [{"F": "Apr.", "L": "April"}],
|
||||
"Mai.": [{"F": "Mai.", "L": "Mai"}],
|
||||
"Jun.": [{"F": "Jun.", "L": "Juni"}],
|
||||
"Jul.": [{"F": "Jul.", "L": "Juli"}],
|
||||
"Aug.": [{"F": "Aug.", "L": "August"}],
|
||||
"Sep.": [{"F": "Sep.", "L": "September"}],
|
||||
"Sept.": [{"F": "Sept.", "L": "September"}],
|
||||
"Okt.": [{"F": "Okt.", "L": "Oktober"}],
|
||||
"Nov.": [{"F": "Nov.", "L": "November"}],
|
||||
"Dez.": [{"F": "Dez.", "L": "Dezember"}],
|
||||
|
||||
":)": [{"F": ":)"}],
|
||||
"<3": [{"F": "<3"}],
|
||||
";)": [{"F": ";)"}],
|
||||
"(:": [{"F": "(:"}],
|
||||
":(": [{"F": ":("}],
|
||||
"-_-": [{"F": "-_-"}],
|
||||
"=)": [{"F": "=)"}],
|
||||
":/": [{"F": ":/"}],
|
||||
":>": [{"F": ":>"}],
|
||||
";-)": [{"F": ";-)"}],
|
||||
":Y": [{"F": ":Y"}],
|
||||
":P": [{"F": ":P"}],
|
||||
":-P": [{"F": ":-P"}],
|
||||
":3": [{"F": ":3"}],
|
||||
"=3": [{"F": "=3"}],
|
||||
"xD": [{"F": "xD"}],
|
||||
"^_^": [{"F": "^_^"}],
|
||||
"=]": [{"F": "=]"}],
|
||||
"=D": [{"F": "=D"}],
|
||||
"<333": [{"F": "<333"}],
|
||||
":))": [{"F": ":))"}],
|
||||
":0": [{"F": ":0"}],
|
||||
"-__-": [{"F": "-__-"}],
|
||||
"xDD": [{"F": "xDD"}],
|
||||
"o_o": [{"F": "o_o"}],
|
||||
"o_O": [{"F": "o_O"}],
|
||||
"V_V": [{"F": "V_V"}],
|
||||
"=[[": [{"F": "=[["}],
|
||||
"<33": [{"F": "<33"}],
|
||||
";p": [{"F": ";p"}],
|
||||
";D": [{"F": ";D"}],
|
||||
";-p": [{"F": ";-p"}],
|
||||
";(": [{"F": ";("}],
|
||||
":p": [{"F": ":p"}],
|
||||
":]": [{"F": ":]"}],
|
||||
":O": [{"F": ":O"}],
|
||||
":-/": [{"F": ":-/"}],
|
||||
":-)": [{"F": ":-)"}],
|
||||
":(((": [{"F": ":((("}],
|
||||
":((": [{"F": ":(("}],
|
||||
":')": [{"F": ":')"}],
|
||||
"(^_^)": [{"F": "(^_^)"}],
|
||||
"(=": [{"F": "(="}],
|
||||
"o.O": [{"F": "o.O"}],
|
||||
"\")": [{"F": "\")"}],
|
||||
"a.": [{"F": "a."}],
|
||||
"b.": [{"F": "b."}],
|
||||
"c.": [{"F": "c."}],
|
||||
"d.": [{"F": "d."}],
|
||||
"e.": [{"F": "e."}],
|
||||
"f.": [{"F": "f."}],
|
||||
"g.": [{"F": "g."}],
|
||||
"h.": [{"F": "h."}],
|
||||
"i.": [{"F": "i."}],
|
||||
"j.": [{"F": "j."}],
|
||||
"k.": [{"F": "k."}],
|
||||
"l.": [{"F": "l."}],
|
||||
"m.": [{"F": "m."}],
|
||||
"n.": [{"F": "n."}],
|
||||
"o.": [{"F": "o."}],
|
||||
"p.": [{"F": "p."}],
|
||||
"q.": [{"F": "q."}],
|
||||
"s.": [{"F": "s."}],
|
||||
"t.": [{"F": "t."}],
|
||||
"u.": [{"F": "u."}],
|
||||
"v.": [{"F": "v."}],
|
||||
"w.": [{"F": "w."}],
|
||||
"x.": [{"F": "x."}],
|
||||
"y.": [{"F": "y."}],
|
||||
"z.": [{"F": "z."}],
|
||||
|
||||
"z.b.": [{"F": "z.b."}],
|
||||
"e.h.": [{"F": "I.e."}],
|
||||
"o.ä.": [{"F": "I.E."}],
|
||||
"bzw.": [{"F": "bzw."}],
|
||||
"usw.": [{"F": "usw."}],
|
||||
"\n": [{"F": "\n", "pos": "SP"}],
|
||||
"\t": [{"F": "\t", "pos": "SP"}],
|
||||
" ": [{"F": " ", "pos": "SP"}]
|
||||
}
|
|
@ -1,26 +0,0 @@
|
|||
,
|
||||
\"
|
||||
\)
|
||||
\]
|
||||
\}
|
||||
\*
|
||||
\!
|
||||
\?
|
||||
%
|
||||
\$
|
||||
>
|
||||
:
|
||||
;
|
||||
'
|
||||
”
|
||||
''
|
||||
's
|
||||
'S
|
||||
’s
|
||||
’S
|
||||
’
|
||||
\.\.
|
||||
\.\.\.
|
||||
\.\.\.\.
|
||||
(?<=[a-z0-9)\]"'%\)])\.
|
||||
(?<=[0-9])km
|
|
@ -1,19 +0,0 @@
|
|||
{
|
||||
"NOUN": {"pos": "NOUN"},
|
||||
"VERB": {"pos": "VERB"},
|
||||
"PUNCT": {"pos": "PUNCT"},
|
||||
"ADV": {"pos": "ADV"},
|
||||
"ADJ": {"pos": "ADJ"},
|
||||
"PRON": {"pos": "PRON"},
|
||||
"PROPN": {"pos": "PROPN"},
|
||||
"CONJ": {"pos": "CONJ"},
|
||||
"NUM": {"pos": "NUM"},
|
||||
"AUX": {"pos": "AUX"},
|
||||
"SCONJ": {"pos": "SCONJ"},
|
||||
"ADP": {"pos": "ADP"},
|
||||
"SYM": {"pos": "SYM"},
|
||||
"X": {"pos": "X"},
|
||||
"INTJ": {"pos": "INTJ"},
|
||||
"DET": {"pos": "DET"},
|
||||
"PART": {"pos": "PART"}
|
||||
}
|
|
@ -1,3 +0,0 @@
|
|||
\.\.\.
|
||||
(?<=[a-z])\.(?=[A-Z])
|
||||
(?<=[a-zA-Z])-(?=[a-zA-z])
|
|
@ -1,21 +0,0 @@
|
|||
,
|
||||
"
|
||||
(
|
||||
[
|
||||
{
|
||||
*
|
||||
<
|
||||
$
|
||||
£
|
||||
“
|
||||
'
|
||||
``
|
||||
`
|
||||
#
|
||||
US$
|
||||
C$
|
||||
A$
|
||||
a-
|
||||
‘
|
||||
....
|
||||
...
|
|
@ -1,149 +0,0 @@
|
|||
{
|
||||
"a.m.": [{"F": "a.m."}],
|
||||
"p.m.": [{"F": "p.m."}],
|
||||
|
||||
"1a.m.": [{"F": "1"}, {"F": "a.m."}],
|
||||
"2a.m.": [{"F": "2"}, {"F": "a.m."}],
|
||||
"3a.m.": [{"F": "3"}, {"F": "a.m."}],
|
||||
"4a.m.": [{"F": "4"}, {"F": "a.m."}],
|
||||
"5a.m.": [{"F": "5"}, {"F": "a.m."}],
|
||||
"6a.m.": [{"F": "6"}, {"F": "a.m."}],
|
||||
"7a.m.": [{"F": "7"}, {"F": "a.m."}],
|
||||
"8a.m.": [{"F": "8"}, {"F": "a.m."}],
|
||||
"9a.m.": [{"F": "9"}, {"F": "a.m."}],
|
||||
"10a.m.": [{"F": "10"}, {"F": "a.m."}],
|
||||
"11a.m.": [{"F": "11"}, {"F": "a.m."}],
|
||||
"12a.m.": [{"F": "12"}, {"F": "a.m."}],
|
||||
"1am": [{"F": "1"}, {"F": "am", "L": "a.m."}],
|
||||
"2am": [{"F": "2"}, {"F": "am", "L": "a.m."}],
|
||||
"3am": [{"F": "3"}, {"F": "am", "L": "a.m."}],
|
||||
"4am": [{"F": "4"}, {"F": "am", "L": "a.m."}],
|
||||
"5am": [{"F": "5"}, {"F": "am", "L": "a.m."}],
|
||||
"6am": [{"F": "6"}, {"F": "am", "L": "a.m."}],
|
||||
"7am": [{"F": "7"}, {"F": "am", "L": "a.m."}],
|
||||
"8am": [{"F": "8"}, {"F": "am", "L": "a.m."}],
|
||||
"9am": [{"F": "9"}, {"F": "am", "L": "a.m."}],
|
||||
"10am": [{"F": "10"}, {"F": "am", "L": "a.m."}],
|
||||
"11am": [{"F": "11"}, {"F": "am", "L": "a.m."}],
|
||||
"12am": [{"F": "12"}, {"F": "am", "L": "a.m."}],
|
||||
|
||||
|
||||
"1p.m.": [{"F": "1"}, {"F": "p.m."}],
|
||||
"2p.m.": [{"F": "2"}, {"F": "p.m."}],
|
||||
"3p.m.": [{"F": "3"}, {"F": "p.m."}],
|
||||
"4p.m.": [{"F": "4"}, {"F": "p.m."}],
|
||||
"5p.m.": [{"F": "5"}, {"F": "p.m."}],
|
||||
"6p.m.": [{"F": "6"}, {"F": "p.m."}],
|
||||
"7p.m.": [{"F": "7"}, {"F": "p.m."}],
|
||||
"8p.m.": [{"F": "8"}, {"F": "p.m."}],
|
||||
"9p.m.": [{"F": "9"}, {"F": "p.m."}],
|
||||
"10p.m.": [{"F": "10"}, {"F": "p.m."}],
|
||||
"11p.m.": [{"F": "11"}, {"F": "p.m."}],
|
||||
"12p.m.": [{"F": "12"}, {"F": "p.m."}],
|
||||
"1pm": [{"F": "1"}, {"F": "pm", "L": "p.m."}],
|
||||
"2pm": [{"F": "2"}, {"F": "pm", "L": "p.m."}],
|
||||
"3pm": [{"F": "3"}, {"F": "pm", "L": "p.m."}],
|
||||
"4pm": [{"F": "4"}, {"F": "pm", "L": "p.m."}],
|
||||
"5pm": [{"F": "5"}, {"F": "pm", "L": "p.m."}],
|
||||
"6pm": [{"F": "6"}, {"F": "pm", "L": "p.m."}],
|
||||
"7pm": [{"F": "7"}, {"F": "pm", "L": "p.m."}],
|
||||
"8pm": [{"F": "8"}, {"F": "pm", "L": "p.m."}],
|
||||
"9pm": [{"F": "9"}, {"F": "pm", "L": "p.m."}],
|
||||
"10pm": [{"F": "10"}, {"F": "pm", "L": "p.m."}],
|
||||
"11pm": [{"F": "11"}, {"F": "pm", "L": "p.m."}],
|
||||
"12pm": [{"F": "12"}, {"F": "pm", "L": "p.m."}],
|
||||
|
||||
"Jan.": [{"F": "Jan.", "L": "Januar"}],
|
||||
"Feb.": [{"F": "Feb.", "L": "Februar"}],
|
||||
"Mär.": [{"F": "Mär.", "L": "März"}],
|
||||
"Apr.": [{"F": "Apr.", "L": "April"}],
|
||||
"Mai.": [{"F": "Mai.", "L": "Mai"}],
|
||||
"Jun.": [{"F": "Jun.", "L": "Juni"}],
|
||||
"Jul.": [{"F": "Jul.", "L": "Juli"}],
|
||||
"Aug.": [{"F": "Aug.", "L": "August"}],
|
||||
"Sep.": [{"F": "Sep.", "L": "September"}],
|
||||
"Sept.": [{"F": "Sept.", "L": "September"}],
|
||||
"Okt.": [{"F": "Okt.", "L": "Oktober"}],
|
||||
"Nov.": [{"F": "Nov.", "L": "November"}],
|
||||
"Dez.": [{"F": "Dez.", "L": "Dezember"}],
|
||||
|
||||
":)": [{"F": ":)"}],
|
||||
"<3": [{"F": "<3"}],
|
||||
";)": [{"F": ";)"}],
|
||||
"(:": [{"F": "(:"}],
|
||||
":(": [{"F": ":("}],
|
||||
"-_-": [{"F": "-_-"}],
|
||||
"=)": [{"F": "=)"}],
|
||||
":/": [{"F": ":/"}],
|
||||
":>": [{"F": ":>"}],
|
||||
";-)": [{"F": ";-)"}],
|
||||
":Y": [{"F": ":Y"}],
|
||||
":P": [{"F": ":P"}],
|
||||
":-P": [{"F": ":-P"}],
|
||||
":3": [{"F": ":3"}],
|
||||
"=3": [{"F": "=3"}],
|
||||
"xD": [{"F": "xD"}],
|
||||
"^_^": [{"F": "^_^"}],
|
||||
"=]": [{"F": "=]"}],
|
||||
"=D": [{"F": "=D"}],
|
||||
"<333": [{"F": "<333"}],
|
||||
":))": [{"F": ":))"}],
|
||||
":0": [{"F": ":0"}],
|
||||
"-__-": [{"F": "-__-"}],
|
||||
"xDD": [{"F": "xDD"}],
|
||||
"o_o": [{"F": "o_o"}],
|
||||
"o_O": [{"F": "o_O"}],
|
||||
"V_V": [{"F": "V_V"}],
|
||||
"=[[": [{"F": "=[["}],
|
||||
"<33": [{"F": "<33"}],
|
||||
";p": [{"F": ";p"}],
|
||||
";D": [{"F": ";D"}],
|
||||
";-p": [{"F": ";-p"}],
|
||||
";(": [{"F": ";("}],
|
||||
":p": [{"F": ":p"}],
|
||||
":]": [{"F": ":]"}],
|
||||
":O": [{"F": ":O"}],
|
||||
":-/": [{"F": ":-/"}],
|
||||
":-)": [{"F": ":-)"}],
|
||||
":(((": [{"F": ":((("}],
|
||||
":((": [{"F": ":(("}],
|
||||
":')": [{"F": ":')"}],
|
||||
"(^_^)": [{"F": "(^_^)"}],
|
||||
"(=": [{"F": "(="}],
|
||||
"o.O": [{"F": "o.O"}],
|
||||
"\")": [{"F": "\")"}],
|
||||
"a.": [{"F": "a."}],
|
||||
"b.": [{"F": "b."}],
|
||||
"c.": [{"F": "c."}],
|
||||
"d.": [{"F": "d."}],
|
||||
"e.": [{"F": "e."}],
|
||||
"f.": [{"F": "f."}],
|
||||
"g.": [{"F": "g."}],
|
||||
"h.": [{"F": "h."}],
|
||||
"i.": [{"F": "i."}],
|
||||
"j.": [{"F": "j."}],
|
||||
"k.": [{"F": "k."}],
|
||||
"l.": [{"F": "l."}],
|
||||
"m.": [{"F": "m."}],
|
||||
"n.": [{"F": "n."}],
|
||||
"o.": [{"F": "o."}],
|
||||
"p.": [{"F": "p."}],
|
||||
"q.": [{"F": "q."}],
|
||||
"s.": [{"F": "s."}],
|
||||
"t.": [{"F": "t."}],
|
||||
"u.": [{"F": "u."}],
|
||||
"v.": [{"F": "v."}],
|
||||
"w.": [{"F": "w."}],
|
||||
"x.": [{"F": "x."}],
|
||||
"y.": [{"F": "y."}],
|
||||
"z.": [{"F": "z."}],
|
||||
|
||||
"z.b.": [{"F": "z.b."}],
|
||||
"e.h.": [{"F": "I.e."}],
|
||||
"o.ä.": [{"F": "I.E."}],
|
||||
"bzw.": [{"F": "bzw."}],
|
||||
"usw.": [{"F": "usw."}],
|
||||
"\n": [{"F": "\n", "pos": "SP"}],
|
||||
"\t": [{"F": "\t", "pos": "SP"}],
|
||||
" ": [{"F": " ", "pos": "SP"}]
|
||||
}
|
|
@ -1,26 +0,0 @@
|
|||
,
|
||||
\"
|
||||
\)
|
||||
\]
|
||||
\}
|
||||
\*
|
||||
\!
|
||||
\?
|
||||
%
|
||||
\$
|
||||
>
|
||||
:
|
||||
;
|
||||
'
|
||||
”
|
||||
''
|
||||
's
|
||||
'S
|
||||
’s
|
||||
’S
|
||||
’
|
||||
\.\.
|
||||
\.\.\.
|
||||
\.\.\.\.
|
||||
(?<=[a-z0-9)\]"'%\)])\.
|
||||
(?<=[0-9])km
|
|
@ -1,44 +0,0 @@
|
|||
{
|
||||
"S": {"pos": "NOUN"},
|
||||
"E": {"pos": "ADP"},
|
||||
"RD": {"pos": "DET"},
|
||||
"V": {"pos": "VERB"},
|
||||
"_": {"pos": "NO_TAG"},
|
||||
"A": {"pos": "ADJ"},
|
||||
"SP": {"pos": "PROPN"},
|
||||
"FF": {"pos": "PUNCT"},
|
||||
"FS": {"pos": "PUNCT"},
|
||||
"B": {"pos": "ADV"},
|
||||
"CC": {"pos": "CONJ"},
|
||||
"FB": {"pos": "PUNCT"},
|
||||
"VA": {"pos": "AUX"},
|
||||
"PC": {"pos": "PRON"},
|
||||
"N": {"pos": "NUM"},
|
||||
"RI": {"pos": "DET"},
|
||||
"PR": {"pos": "PRON"},
|
||||
"CS": {"pos": "SCONJ"},
|
||||
"BN": {"pos": "ADV"},
|
||||
"AP": {"pos": "DET"},
|
||||
"VM": {"pos": "AUX"},
|
||||
"DI": {"pos": "DET"},
|
||||
"FC": {"pos": "PUNCT"},
|
||||
"PI": {"pos": "PRON"},
|
||||
"DD": {"pos": "DET"},
|
||||
"DQ": {"pos": "DET"},
|
||||
"PQ": {"pos": "PRON"},
|
||||
"PD": {"pos": "PRON"},
|
||||
"NO": {"pos": "ADJ"},
|
||||
"PE": {"pos": "PRON"},
|
||||
"T": {"pos": "DET"},
|
||||
"X": {"pos": "SYM"},
|
||||
"SW": {"pos": "X"},
|
||||
"NO": {"pos": "PRON"},
|
||||
"I": {"pos": "INTJ"},
|
||||
"X": {"pos": "X"},
|
||||
"DR": {"pos": "DET"},
|
||||
"EA": {"pos": "ADP"},
|
||||
"PP": {"pos": "PRON"},
|
||||
"X": {"pos": "NUM"},
|
||||
"DE": {"pos": "DET"},
|
||||
"X": {"pos": "PART"}
|
||||
}
|
|
@ -1,194 +0,0 @@
|
|||
{
|
||||
"Reddit": [
|
||||
"PRODUCT",
|
||||
{},
|
||||
[
|
||||
[{"lower": "reddit"}]
|
||||
]
|
||||
],
|
||||
"SeptemberElevenAttacks": [
|
||||
"EVENT",
|
||||
{},
|
||||
[
|
||||
[
|
||||
{"orth": "9/11"}
|
||||
],
|
||||
[
|
||||
{"lower": "september"},
|
||||
{"orth": "11"}
|
||||
]
|
||||
]
|
||||
],
|
||||
"Linux": [
|
||||
"PRODUCT",
|
||||
{},
|
||||
[
|
||||
[{"lower": "linux"}]
|
||||
]
|
||||
],
|
||||
"Haskell": [
|
||||
"PRODUCT",
|
||||
{},
|
||||
[
|
||||
[{"lower": "haskell"}]
|
||||
]
|
||||
],
|
||||
"HaskellCurry": [
|
||||
"PERSON",
|
||||
{},
|
||||
[
|
||||
[
|
||||
{"lower": "haskell"},
|
||||
{"lower": "curry"}
|
||||
]
|
||||
]
|
||||
],
|
||||
"Javascript": [
|
||||
"PRODUCT",
|
||||
{},
|
||||
[
|
||||
[{"lower": "javascript"}]
|
||||
]
|
||||
],
|
||||
"CSS": [
|
||||
"PRODUCT",
|
||||
{},
|
||||
[
|
||||
[{"lower": "css"}],
|
||||
[{"lower": "css3"}]
|
||||
]
|
||||
],
|
||||
"displaCy": [
|
||||
"PRODUCT",
|
||||
{},
|
||||
[
|
||||
[{"lower": "displacy"}]
|
||||
]
|
||||
],
|
||||
"spaCy": [
|
||||
"PRODUCT",
|
||||
{},
|
||||
[
|
||||
[{"orth": "spaCy"}]
|
||||
]
|
||||
],
|
||||
|
||||
"HTML": [
|
||||
"PRODUCT",
|
||||
{},
|
||||
[
|
||||
[{"lower": "html"}],
|
||||
[{"lower": "html5"}]
|
||||
]
|
||||
],
|
||||
"Python": [
|
||||
"PRODUCT",
|
||||
{},
|
||||
[
|
||||
[{"orth": "Python"}]
|
||||
]
|
||||
],
|
||||
"Ruby": [
|
||||
"PRODUCT",
|
||||
{},
|
||||
[
|
||||
[{"orth": "Ruby"}]
|
||||
]
|
||||
],
|
||||
"Digg": [
|
||||
"PRODUCT",
|
||||
{},
|
||||
[
|
||||
[{"lower": "digg"}]
|
||||
]
|
||||
],
|
||||
"FoxNews": [
|
||||
"ORG",
|
||||
{},
|
||||
[
|
||||
[{"orth": "Fox"}],
|
||||
[{"orth": "News"}]
|
||||
]
|
||||
],
|
||||
"Google": [
|
||||
"ORG",
|
||||
{},
|
||||
[
|
||||
[{"lower": "google"}]
|
||||
]
|
||||
],
|
||||
"Mac": [
|
||||
"PRODUCT",
|
||||
{},
|
||||
[
|
||||
[{"lower": "mac"}]
|
||||
]
|
||||
],
|
||||
"Wikipedia": [
|
||||
"PRODUCT",
|
||||
{},
|
||||
[
|
||||
[{"lower": "wikipedia"}]
|
||||
]
|
||||
],
|
||||
"Windows": [
|
||||
"PRODUCT",
|
||||
{},
|
||||
[
|
||||
[{"orth": "Windows"}]
|
||||
]
|
||||
],
|
||||
"Dell": [
|
||||
"ORG",
|
||||
{},
|
||||
[
|
||||
[{"lower": "dell"}]
|
||||
]
|
||||
],
|
||||
"Facebook": [
|
||||
"ORG",
|
||||
{},
|
||||
[
|
||||
[{"lower": "facebook"}]
|
||||
]
|
||||
],
|
||||
"Blizzard": [
|
||||
"ORG",
|
||||
{},
|
||||
[
|
||||
[{"orth": "Blizzard"}]
|
||||
]
|
||||
],
|
||||
"Ubuntu": [
|
||||
"ORG",
|
||||
{},
|
||||
[
|
||||
[{"orth": "Ubuntu"}]
|
||||
]
|
||||
],
|
||||
"Youtube": [
|
||||
"PRODUCT",
|
||||
{},
|
||||
[
|
||||
[{"lower": "youtube"}]
|
||||
]
|
||||
],
|
||||
"false_positives": [
|
||||
null,
|
||||
{},
|
||||
[
|
||||
[{"orth": "Shit"}],
|
||||
[{"orth": "Weed"}],
|
||||
[{"orth": "Cool"}],
|
||||
[{"orth": "Btw"}],
|
||||
[{"orth": "Bah"}],
|
||||
[{"orth": "Bullshit"}],
|
||||
[{"orth": "Lol"}],
|
||||
[{"orth": "Yo"}, {"lower": "dawg"}],
|
||||
[{"orth": "Yay"}],
|
||||
[{"orth": "Ahh"}],
|
||||
[{"orth": "Yea"}],
|
||||
[{"orth": "Bah"}]
|
||||
]
|
||||
]
|
||||
}
|
|
@ -1,6 +0,0 @@
|
|||
\.\.\.
|
||||
(?<=[a-z])\.(?=[A-Z])
|
||||
(?<=[a-zA-Z])-(?=[a-zA-z])
|
||||
(?<=[a-zA-Z])--(?=[a-zA-z])
|
||||
(?<=[0-9])-(?=[0-9])
|
||||
(?<=[A-Za-z]),(?=[A-Za-z])
|
|
@ -1 +0,0 @@
|
|||
{}
|
|
@ -1,21 +0,0 @@
|
|||
,
|
||||
"
|
||||
(
|
||||
[
|
||||
{
|
||||
*
|
||||
<
|
||||
$
|
||||
£
|
||||
“
|
||||
'
|
||||
``
|
||||
`
|
||||
#
|
||||
US$
|
||||
C$
|
||||
A$
|
||||
a-
|
||||
‘
|
||||
....
|
||||
...
|
|
@ -1 +0,0 @@
|
|||
{}
|
|
@ -1,26 +0,0 @@
|
|||
,
|
||||
\"
|
||||
\)
|
||||
\]
|
||||
\}
|
||||
\*
|
||||
\!
|
||||
\?
|
||||
%
|
||||
\$
|
||||
>
|
||||
:
|
||||
;
|
||||
'
|
||||
”
|
||||
''
|
||||
's
|
||||
'S
|
||||
’s
|
||||
’S
|
||||
’
|
||||
\.\.
|
||||
\.\.\.
|
||||
\.\.\.\.
|
||||
(?<=[a-z0-9)\]"'%\)])\.
|
||||
(?<=[0-9])km
|
|
@ -1,43 +0,0 @@
|
|||
{
|
||||
"NR": {"pos": "PROPN"},
|
||||
"AD": {"pos": "ADV"},
|
||||
"NN": {"pos": "NOUN"},
|
||||
"CD": {"pos": "NUM"},
|
||||
"DEG": {"pos": "PART"},
|
||||
"PN": {"pos": "PRON"},
|
||||
"M": {"pos": "PART"},
|
||||
"JJ": {"pos": "ADJ"},
|
||||
"DEC": {"pos": "PART"},
|
||||
"NT": {"pos": "NOUN"},
|
||||
"DT": {"pos": "DET"},
|
||||
"LC": {"pos": "PART"},
|
||||
"CC": {"pos": "CONJ"},
|
||||
"AS": {"pos": "PART"},
|
||||
"SP": {"pos": "PART"},
|
||||
"IJ": {"pos": "INTJ"},
|
||||
"OD": {"pos": "NUM"},
|
||||
"MSP": {"pos": "PART"},
|
||||
"CS": {"pos": "SCONJ"},
|
||||
"ETC": {"pos": "PART"},
|
||||
"DEV": {"pos": "PART"},
|
||||
"BA": {"pos": "AUX"},
|
||||
"SB": {"pos": "AUX"},
|
||||
"DER": {"pos": "PART"},
|
||||
"LB": {"pos": "AUX"},
|
||||
"P": {"pos": "ADP"},
|
||||
"URL": {"pos": "SYM"},
|
||||
"FRAG": {"pos": "X"},
|
||||
"X": {"pos": "X"},
|
||||
"ON": {"pos": "X"},
|
||||
"FW": {"pos": "X"},
|
||||
"VC": {"pos": "VERB"},
|
||||
"VV": {"pos": "VERB"},
|
||||
"VA": {"pos": "VERB"},
|
||||
"VE": {"pos": "VERB"},
|
||||
"PU": {"pos": "PUNCT"},
|
||||
"SP": {"pos": "SPACE"},
|
||||
"NP": {"pos": "X"},
|
||||
"_": {"pos": "X"},
|
||||
"VP": {"pos": "X"},
|
||||
"CHAR": {"pos": "X"}
|
||||
}
|
Loading…
Reference in New Issue
Block a user