From 4a1e206064d18180ce1b1fa045f5a7dbae3b001c Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Wed, 7 Dec 2016 20:29:35 +0100 Subject: [PATCH] Remove old lang_data directory --- lang_data/de/abbrev.de.tab | 319 -- lang_data/de/gazetteer.json | 194 -- lang_data/de/generate_specials.py | 334 -- lang_data/de/infix.txt | 6 - lang_data/de/lemma_rules.json | 1 - lang_data/de/morphs.json | 71 - lang_data/de/prefix.txt | 27 - lang_data/de/sample.txt | 3 - lang_data/de/specials.json | 1483 --------- lang_data/de/suffix.txt | 73 - lang_data/de/tag_map.json | 59 - lang_data/en/LICENSE | 20 - lang_data/en/gazetteer.json | 194 -- lang_data/en/generate_specials.py | 422 --- lang_data/en/infix.txt | 6 - lang_data/en/lemma_rules.json | 38 - lang_data/en/morphs.json | 59 - lang_data/en/prefix.txt | 21 - lang_data/en/specials.json | 4924 ----------------------------- lang_data/en/suffix.txt | 26 - lang_data/en/tag_map.json | 60 - lang_data/fi/infix.txt | 3 - lang_data/fi/lemma_rules.json | 1 - lang_data/fi/morphs.json | 0 lang_data/fi/prefix.txt | 21 - lang_data/fi/sample.txt | 3 - lang_data/fi/specials.json | 149 - lang_data/fi/suffix.txt | 26 - lang_data/fi/tag_map.json | 19 - lang_data/it/infix.txt | 3 - lang_data/it/morphs.json | 0 lang_data/it/prefix.txt | 21 - lang_data/it/specials.json | 149 - lang_data/it/suffix.txt | 26 - lang_data/it/tag_map.json | 44 - lang_data/zh/gazetteer.json | 194 -- lang_data/zh/infix.txt | 6 - lang_data/zh/morphs.json | 1 - lang_data/zh/prefix.txt | 21 - lang_data/zh/specials.json | 1 - lang_data/zh/suffix.txt | 26 - lang_data/zh/tag_map.json | 43 - 42 files changed, 9097 deletions(-) delete mode 100644 lang_data/de/abbrev.de.tab delete mode 100644 lang_data/de/gazetteer.json delete mode 100644 lang_data/de/generate_specials.py delete mode 100644 lang_data/de/infix.txt delete mode 100644 lang_data/de/lemma_rules.json delete mode 100644 lang_data/de/morphs.json delete mode 100644 lang_data/de/prefix.txt delete mode 100644 lang_data/de/sample.txt delete mode 100644 lang_data/de/specials.json delete mode 100644 lang_data/de/suffix.txt delete mode 100644 lang_data/de/tag_map.json delete mode 100644 lang_data/en/LICENSE delete mode 100644 lang_data/en/gazetteer.json delete mode 100644 lang_data/en/generate_specials.py delete mode 100644 lang_data/en/infix.txt delete mode 100644 lang_data/en/lemma_rules.json delete mode 100644 lang_data/en/morphs.json delete mode 100644 lang_data/en/prefix.txt delete mode 100644 lang_data/en/specials.json delete mode 100644 lang_data/en/suffix.txt delete mode 100644 lang_data/en/tag_map.json delete mode 100644 lang_data/fi/infix.txt delete mode 100644 lang_data/fi/lemma_rules.json delete mode 100644 lang_data/fi/morphs.json delete mode 100644 lang_data/fi/prefix.txt delete mode 100644 lang_data/fi/sample.txt delete mode 100644 lang_data/fi/specials.json delete mode 100644 lang_data/fi/suffix.txt delete mode 100644 lang_data/fi/tag_map.json delete mode 100644 lang_data/it/infix.txt delete mode 100644 lang_data/it/morphs.json delete mode 100644 lang_data/it/prefix.txt delete mode 100644 lang_data/it/specials.json delete mode 100644 lang_data/it/suffix.txt delete mode 100644 lang_data/it/tag_map.json delete mode 100644 lang_data/zh/gazetteer.json delete mode 100644 lang_data/zh/infix.txt delete mode 100644 lang_data/zh/morphs.json delete mode 100644 lang_data/zh/prefix.txt delete mode 100644 lang_data/zh/specials.json delete mode 100644 lang_data/zh/suffix.txt delete mode 100644 lang_data/zh/tag_map.json diff --git a/lang_data/de/abbrev.de.tab b/lang_data/de/abbrev.de.tab deleted file mode 100644 index 97374c83d..000000000 --- a/lang_data/de/abbrev.de.tab +++ /dev/null @@ -1,319 +0,0 @@ -# surface form lemma pos -# multiple values are separated by | -# empty lines and lines starting with # are being ignored - -'' '' -\") \") -\n \n SP -\t \t SP - SP - -# example: Wie geht's? -'s 's es -'S 'S es - -# example: Haste mal 'nen Euro? -'n 'n ein -'ne 'ne eine -'nen 'nen einen - -# example: Kommen S’ nur herein! -s' s' sie -S' S' sie - -# example: Da haben wir's! -ich's ich|'s ich|es -du's du|'s du|es -er's er|'s er|es -sie's sie|'s sie|es -wir's wir|'s wir|es -ihr's ihr|'s ihr|es - -# example: Die katze auf'm dach. -auf'm auf|'m auf|dem -unter'm unter|'m unter|dem -über'm über|'m über|dem -vor'm vor|'m vor|dem -hinter'm hinter|'m hinter|dem - -# persons -B.A. B.A. -B.Sc. B.Sc. -Dipl. Dipl. -Dipl.-Ing. Dipl.-Ing. -Dr. Dr. -Fr. Fr. -Frl. Frl. -Hr. Hr. -Hrn. Hrn. -Frl. Frl. -Prof. Prof. -St. St. -Hrgs. Hrgs. -Hg. Hg. -a.Z. a.Z. -a.D. a.D. -h.c. h.c. -Jr. Jr. -jr. jr. -jun. jun. -sen. sen. -rer. rer. -Ing. Ing. -M.A. M.A. -Mr. Mr. -M.Sc. M.Sc. -nat. nat. -phil. phil. - -# companies -Co. Co. -co. co. -Cie. Cie. -A.G. A.G. -G.m.b.H. G.m.b.H. -i.G. i.G. -e.V. e.V. - -# popular german abbreviations -Abb. Abb. -Abk. Abk. -Abs. Abs. -Abt. Abt. -abzgl. abzgl. -allg. allg. -a.M. a.M. -Bd. Bd. -betr. betr. -Betr. Betr. -Biol. Biol. -biol. biol. -Bf. Bf. -Bhf. Bhf. -Bsp. Bsp. -bspw. bspw. -bzgl. bzgl. -bzw. bzw. -d.h. d.h. -dgl. dgl. -ebd. ebd. -ehem. ehem. -eigtl. eigtl. -entspr. entspr. -erm. erm. -ev. ev. -evtl. evtl. -Fa. Fa. -Fam. Fam. -geb. geb. -Gebr. Gebr. -gem. gem. -ggf. ggf. -ggü. ggü. -ggfs. ggfs. -gegr. gegr. -Hbf. Hbf. -Hrsg. Hrsg. -hrsg. hrsg. -i.A. i.A. -i.d.R. i.d.R. -inkl. inkl. -insb. insb. -i.O. i.O. -i.Tr. i.Tr. -i.V. i.V. -jur. jur. -kath. kath. -K.O. K.O. -lt. lt. -max. max. -m.E. m.E. -m.M. m.M. -mtl. mtl. -min. min. -mind. mind. -MwSt. MwSt. -Nr. Nr. -o.a. o.a. -o.ä. o.ä. -o.Ä. o.Ä. -o.g. o.g. -o.k. o.k. -O.K. O.K. -Orig. Orig. -orig. orig. -pers. pers. -Pkt. Pkt. -Red. Red. -röm. röm. -s.o. s.o. -sog. sog. -std. std. -stellv. stellv. -Str. Str. -tägl. tägl. -Tel. Tel. -u.a. u.a. -usf. usf. -u.s.w. u.s.w. -usw. usw. -u.U. u.U. -u.v.m. u.v.m. -uvm. uvm. -v.a. v.a. -vgl. vgl. -vllt. vllt. -v.l.n.r. v.l.n.r. -vlt. vlt. -Vol. Vol. -wiss. wiss. -Univ. Univ. -z.B. z.B. -z.b. z.b. -z.Bsp. z.Bsp. -z.T. z.T. -z.Z. z.Z. -zzgl. zzgl. -z.Zt. z.Zt. - -# popular latin abbreviations -vs. vs. -adv. adv. -Chr. Chr. -A.C. A.C. -A.D. A.D. -e.g. e.g. -i.e. i.e. -al. al. -p.a. p.a. -P.S. P.S. -q.e.d. q.e.d. -R.I.P. R.I.P. -etc. etc. -incl. incl. -ca. ca. -n.Chr. n.Chr. -p.s. p.s. -v.Chr. v.Chr. - -# popular english abbreviations -D.C. D.C. -N.Y. N.Y. -N.Y.C. N.Y.C. -U.S. U.S. -U.S.A. U.S.A. -L.A. L.A. -U.S.S. U.S.S. - -# dates & time -Jan. Jan. -Feb. Feb. -Mrz. Mrz. -Mär. Mär. -Apr. Apr. -Jun. Jun. -Jul. Jul. -Aug. Aug. -Sep. Sep. -Sept. Sept. -Okt. Okt. -Nov. Nov. -Dez. Dez. -Mo. Mo. -Di. Di. -Mi. Mi. -Do. Do. -Fr. Fr. -Sa. Sa. -So. So. -Std. Std. -Jh. Jh. -Jhd. Jhd. - -# numbers -Tsd. Tsd. -Mio. Mio. -Mrd. Mrd. - -# countries & languages -engl. engl. -frz. frz. -lat. lat. -österr. österr. - -# smileys -:) :) -<3 <3 -;) ;) -(: (: -:( :( --_- -_- -=) =) -:/ :/ -:> :> -;-) ;-) -:Y :Y -:P :P -:-P :-P -:3 :3 -=3 =3 -xD xD -^_^ ^_^ -=] =] -=D =D -<333 <333 -:)) :)) -:0 :0 --__- -__- -xDD xDD -o_o o_o -o_O o_O -V_V V_V -=[[ =[[ -<33 <33 -;p ;p -;D ;D -;-p ;-p -;( ;( -:p :p -:] :] -:O :O -:-/ :-/ -:-) :-) -:((( :((( -:(( :(( -:') :') -(^_^) (^_^) -(= (= -o.O o.O - -# single letters -a. a. -b. b. -c. c. -d. d. -e. e. -f. f. -g. g. -h. h. -i. i. -j. j. -k. k. -l. l. -m. m. -n. n. -o. o. -p. p. -q. q. -r. r. -s. s. -t. t. -u. u. -v. v. -w. w. -x. x. -y. y. -z. z. -ä. ä. -ö. ö. -ü. ü. diff --git a/lang_data/de/gazetteer.json b/lang_data/de/gazetteer.json deleted file mode 100644 index d52fed839..000000000 --- a/lang_data/de/gazetteer.json +++ /dev/null @@ -1,194 +0,0 @@ -{ - "Reddit": [ - "PRODUCT", - {}, - [ - [{"lower": "reddit"}] - ] - ], - "SeptemberElevenAttacks": [ - "EVENT", - {}, - [ - [ - {"orth": "9/11"} - ], - [ - {"lower": "september"}, - {"orth": "11"} - ] - ] - ], - "Linux": [ - "PRODUCT", - {}, - [ - [{"lower": "linux"}] - ] - ], - "Haskell": [ - "PRODUCT", - {}, - [ - [{"lower": "haskell"}] - ] - ], - "HaskellCurry": [ - "PERSON", - {}, - [ - [ - {"lower": "haskell"}, - {"lower": "curry"} - ] - ] - ], - "Javascript": [ - "PRODUCT", - {}, - [ - [{"lower": "javascript"}] - ] - ], - "CSS": [ - "PRODUCT", - {}, - [ - [{"lower": "css"}], - [{"lower": "css3"}] - ] - ], - "displaCy": [ - "PRODUCT", - {}, - [ - [{"lower": "displacy"}] - ] - ], - "spaCy": [ - "PRODUCT", - {}, - [ - [{"orth": "spaCy"}] - ] - ], - - "HTML": [ - "PRODUCT", - {}, - [ - [{"lower": "html"}], - [{"lower": "html5"}] - ] - ], - "Python": [ - "PRODUCT", - {}, - [ - [{"orth": "Python"}] - ] - ], - "Ruby": [ - "PRODUCT", - {}, - [ - [{"orth": "Ruby"}] - ] - ], - "Digg": [ - "PRODUCT", - {}, - [ - [{"lower": "digg"}] - ] - ], - "FoxNews": [ - "ORG", - {}, - [ - [{"orth": "Fox"}], - [{"orth": "News"}] - ] - ], - "Google": [ - "ORG", - {}, - [ - [{"lower": "google"}] - ] - ], - "Mac": [ - "PRODUCT", - {}, - [ - [{"lower": "mac"}] - ] - ], - "Wikipedia": [ - "PRODUCT", - {}, - [ - [{"lower": "wikipedia"}] - ] - ], - "Windows": [ - "PRODUCT", - {}, - [ - [{"orth": "Windows"}] - ] - ], - "Dell": [ - "ORG", - {}, - [ - [{"lower": "dell"}] - ] - ], - "Facebook": [ - "ORG", - {}, - [ - [{"lower": "facebook"}] - ] - ], - "Blizzard": [ - "ORG", - {}, - [ - [{"orth": "Blizzard"}] - ] - ], - "Ubuntu": [ - "ORG", - {}, - [ - [{"orth": "Ubuntu"}] - ] - ], - "Youtube": [ - "PRODUCT", - {}, - [ - [{"lower": "youtube"}] - ] - ], - "false_positives": [ - null, - {}, - [ - [{"orth": "Shit"}], - [{"orth": "Weed"}], - [{"orth": "Cool"}], - [{"orth": "Btw"}], - [{"orth": "Bah"}], - [{"orth": "Bullshit"}], - [{"orth": "Lol"}], - [{"orth": "Yo"}, {"lower": "dawg"}], - [{"orth": "Yay"}], - [{"orth": "Ahh"}], - [{"orth": "Yea"}], - [{"orth": "Bah"}] - ] - ] -} diff --git a/lang_data/de/generate_specials.py b/lang_data/de/generate_specials.py deleted file mode 100644 index b3dc52e4f..000000000 --- a/lang_data/de/generate_specials.py +++ /dev/null @@ -1,334 +0,0 @@ -# coding=utf8 -import json -import io -import itertools - -contractions = {} - -# contains the lemmas, parts of speech, number, and tenspect of -# potential tokens generated after splitting contractions off -token_properties = {} - -# contains starting tokens with their potential contractions -# each potential contraction has a list of exceptions - # lower - don't generate the lowercase version - # upper - don't generate the uppercase version - # contrLower - don't generate the lowercase version with apostrophe (') removed - # contrUpper - dont' generate the uppercase version with apostrophe (') removed -# for example, we don't want to create the word "hell" or "Hell" from "he" + "'ll" so -# we add "contrLower" and "contrUpper" to the exceptions list -starting_tokens = {} - -# other specials that don't really have contractions -# so they are hardcoded -hardcoded_specials = { - "''": [{"F": "''"}], - "\")": [{"F": "\")"}], - "\n": [{"F": "\n", "pos": "SP"}], - "\t": [{"F": "\t", "pos": "SP"}], - " ": [{"F": " ", "pos": "SP"}], - - # example: Wie geht's? - "'s": [{"F": "'s", "L": "es"}], - "'S": [{"F": "'S", "L": "es"}], - - # example: Haste mal 'nen Euro? - "'n": [{"F": "'n", "L": "ein"}], - "'ne": [{"F": "'ne", "L": "eine"}], - "'nen": [{"F": "'nen", "L": "einen"}], - - # example: Kommen S’ nur herein! - "s'": [{"F": "s'", "L": "sie"}], - "S'": [{"F": "S'", "L": "sie"}], - - # example: Da haben wir's! - "ich's": [{"F": "ich"}, {"F": "'s", "L": "es"}], - "du's": [{"F": "du"}, {"F": "'s", "L": "es"}], - "er's": [{"F": "er"}, {"F": "'s", "L": "es"}], - "sie's": [{"F": "sie"}, {"F": "'s", "L": "es"}], - "wir's": [{"F": "wir"}, {"F": "'s", "L": "es"}], - "ihr's": [{"F": "ihr"}, {"F": "'s", "L": "es"}], - - # example: Die katze auf'm dach. - "auf'm": [{"F": "auf"}, {"F": "'m", "L": "dem"}], - "unter'm": [{"F": "unter"}, {"F": "'m", "L": "dem"}], - "über'm": [{"F": "über"}, {"F": "'m", "L": "dem"}], - "vor'm": [{"F": "vor"}, {"F": "'m", "L": "dem"}], - "hinter'm": [{"F": "hinter"}, {"F": "'m", "L": "dem"}], - - # persons - "Fr.": [{"F": "Fr."}], - "Hr.": [{"F": "Hr."}], - "Frl.": [{"F": "Frl."}], - "Prof.": [{"F": "Prof."}], - "Dr.": [{"F": "Dr."}], - "St.": [{"F": "St."}], - "Hrgs.": [{"F": "Hrgs."}], - "Hg.": [{"F": "Hg."}], - "a.Z.": [{"F": "a.Z."}], - "a.D.": [{"F": "a.D."}], - "A.D.": [{"F": "A.D."}], - "h.c.": [{"F": "h.c."}], - "jun.": [{"F": "jun."}], - "sen.": [{"F": "sen."}], - "rer.": [{"F": "rer."}], - "Dipl.": [{"F": "Dipl."}], - "Ing.": [{"F": "Ing."}], - "Dipl.-Ing.": [{"F": "Dipl.-Ing."}], - - # companies - "Co.": [{"F": "Co."}], - "co.": [{"F": "co."}], - "Cie.": [{"F": "Cie."}], - "A.G.": [{"F": "A.G."}], - "G.m.b.H.": [{"F": "G.m.b.H."}], - "i.G.": [{"F": "i.G."}], - "e.V.": [{"F": "e.V."}], - - # popular german abbreviations - "ggü.": [{"F": "ggü."}], - "ggf.": [{"F": "ggf."}], - "ggfs.": [{"F": "ggfs."}], - "Gebr.": [{"F": "Gebr."}], - "geb.": [{"F": "geb."}], - "gegr.": [{"F": "gegr."}], - "erm.": [{"F": "erm."}], - "engl.": [{"F": "engl."}], - "ehem.": [{"F": "ehem."}], - "Biol.": [{"F": "Biol."}], - "biol.": [{"F": "biol."}], - "Abk.": [{"F": "Abk."}], - "Abb.": [{"F": "Abb."}], - "abzgl.": [{"F": "abzgl."}], - "Hbf.": [{"F": "Hbf."}], - "Bhf.": [{"F": "Bhf."}], - "Bf.": [{"F": "Bf."}], - "i.V.": [{"F": "i.V."}], - "inkl.": [{"F": "inkl."}], - "insb.": [{"F": "insb."}], - "z.B.": [{"F": "z.B."}], - "i.Tr.": [{"F": "i.Tr."}], - "Jhd.": [{"F": "Jhd."}], - "jur.": [{"F": "jur."}], - "lt.": [{"F": "lt."}], - "nat.": [{"F": "nat."}], - "u.a.": [{"F": "u.a."}], - "u.s.w.": [{"F": "u.s.w."}], - "Nr.": [{"F": "Nr."}], - "Univ.": [{"F": "Univ."}], - "vgl.": [{"F": "vgl."}], - "zzgl.": [{"F": "zzgl."}], - "z.Z.": [{"F": "z.Z."}], - "betr.": [{"F": "betr."}], - "ehem.": [{"F": "ehem."}], - - # popular latin abbreviations - "vs.": [{"F": "vs."}], - "adv.": [{"F": "adv."}], - "Chr.": [{"F": "Chr."}], - "A.C.": [{"F": "A.C."}], - "A.D.": [{"F": "A.D."}], - "e.g.": [{"F": "e.g."}], - "i.e.": [{"F": "i.e."}], - "al.": [{"F": "al."}], - "p.a.": [{"F": "p.a."}], - "P.S.": [{"F": "P.S."}], - "q.e.d.": [{"F": "q.e.d."}], - "R.I.P.": [{"F": "R.I.P."}], - "etc.": [{"F": "etc."}], - "incl.": [{"F": "incl."}], - - # popular english abbreviations - "D.C.": [{"F": "D.C."}], - "N.Y.": [{"F": "N.Y."}], - "N.Y.C.": [{"F": "N.Y.C."}], - - # dates - "Jan.": [{"F": "Jan."}], - "Feb.": [{"F": "Feb."}], - "Mrz.": [{"F": "Mrz."}], - "Mär.": [{"F": "Mär."}], - "Apr.": [{"F": "Apr."}], - "Jun.": [{"F": "Jun."}], - "Jul.": [{"F": "Jul."}], - "Aug.": [{"F": "Aug."}], - "Sep.": [{"F": "Sep."}], - "Sept.": [{"F": "Sept."}], - "Okt.": [{"F": "Okt."}], - "Nov.": [{"F": "Nov."}], - "Dez.": [{"F": "Dez."}], - "Mo.": [{"F": "Mo."}], - "Di.": [{"F": "Di."}], - "Mi.": [{"F": "Mi."}], - "Do.": [{"F": "Do."}], - "Fr.": [{"F": "Fr."}], - "Sa.": [{"F": "Sa."}], - "So.": [{"F": "So."}], - - # smileys - ":)": [{"F": ":)"}], - "<3": [{"F": "<3"}], - ";)": [{"F": ";)"}], - "(:": [{"F": "(:"}], - ":(": [{"F": ":("}], - "-_-": [{"F": "-_-"}], - "=)": [{"F": "=)"}], - ":/": [{"F": ":/"}], - ":>": [{"F": ":>"}], - ";-)": [{"F": ";-)"}], - ":Y": [{"F": ":Y"}], - ":P": [{"F": ":P"}], - ":-P": [{"F": ":-P"}], - ":3": [{"F": ":3"}], - "=3": [{"F": "=3"}], - "xD": [{"F": "xD"}], - "^_^": [{"F": "^_^"}], - "=]": [{"F": "=]"}], - "=D": [{"F": "=D"}], - "<333": [{"F": "<333"}], - ":))": [{"F": ":))"}], - ":0": [{"F": ":0"}], - "-__-": [{"F": "-__-"}], - "xDD": [{"F": "xDD"}], - "o_o": [{"F": "o_o"}], - "o_O": [{"F": "o_O"}], - "V_V": [{"F": "V_V"}], - "=[[": [{"F": "=[["}], - "<33": [{"F": "<33"}], - ";p": [{"F": ";p"}], - ";D": [{"F": ";D"}], - ";-p": [{"F": ";-p"}], - ";(": [{"F": ";("}], - ":p": [{"F": ":p"}], - ":]": [{"F": ":]"}], - ":O": [{"F": ":O"}], - ":-/": [{"F": ":-/"}], - ":-)": [{"F": ":-)"}], - ":(((": [{"F": ":((("}], - ":((": [{"F": ":(("}], - ":')": [{"F": ":')"}], - "(^_^)": [{"F": "(^_^)"}], - "(=": [{"F": "(="}], - "o.O": [{"F": "o.O"}], - - "a.": [{"F": "a."}], - "b.": [{"F": "b."}], - "c.": [{"F": "c."}], - "d.": [{"F": "d."}], - "e.": [{"F": "e."}], - "f.": [{"F": "f."}], - "g.": [{"F": "g."}], - "h.": [{"F": "h."}], - "i.": [{"F": "i."}], - "j.": [{"F": "j."}], - "k.": [{"F": "k."}], - "l.": [{"F": "l."}], - "m.": [{"F": "m."}], - "n.": [{"F": "n."}], - "o.": [{"F": "o."}], - "p.": [{"F": "p."}], - "q.": [{"F": "q."}], - "r.": [{"F": "r."}], - "s.": [{"F": "s."}], - "t.": [{"F": "t."}], - "u.": [{"F": "u."}], - "v.": [{"F": "v."}], - "w.": [{"F": "w."}], - "x.": [{"F": "x."}], - "y.": [{"F": "y."}], - "z.": [{"F": "z."}], -} - -def get_double_contractions(ending): - endings = [] - - ends_with_contraction = any([ending.endswith(contraction) for contraction in contractions]) - - while ends_with_contraction: - for contraction in contractions: - if ending.endswith(contraction): - endings.append(contraction) - ending = ending.rstrip(contraction) - ends_with_contraction = any([ending.endswith(contraction) for contraction in contractions]) - - endings.reverse() # reverse because the last ending is put in the list first - return endings - -def get_token_properties(token, capitalize=False, remove_contractions=False): - props = dict(token_properties.get(token)) # ensure we copy the dict so we can add the "F" prop - if capitalize: - token = token.capitalize() - if remove_contractions: - token = token.replace("'", "") - - props["F"] = token - return props - - -def create_entry(token, endings, capitalize=False, remove_contractions=False): - properties = [] - properties.append(get_token_properties(token, capitalize=capitalize, remove_contractions=remove_contractions)) - for e in endings: - properties.append(get_token_properties(e, remove_contractions=remove_contractions)) - return properties - - -FIELDNAMES = ['F','L','pos'] -def read_hardcoded(stream): - hc_specials = {} - for line in stream: - line = line.strip() - if line.startswith('#') or not line: - continue - key,_,rest = line.partition('\t') - values = [] - for annotation in zip(*[ e.split('|') for e in rest.split('\t') ]): - values.append({ k:v for k,v in itertools.izip_longest(FIELDNAMES,annotation) if v }) - hc_specials[key] = values - return hc_specials - - -def generate_specials(): - - specials = {} - - for token in starting_tokens: - possible_endings = starting_tokens[token] - for ending in possible_endings: - - endings = [] - if ending.count("'") > 1: - endings.extend(get_double_contractions(ending)) - else: - endings.append(ending) - - exceptions = possible_endings[ending] - - if "lower" not in exceptions: - special = token + ending - specials[special] = create_entry(token, endings) - - if "upper" not in exceptions: - special = token.capitalize() + ending - specials[special] = create_entry(token, endings, capitalize=True) - - if "contrLower" not in exceptions: - special = token + ending.replace("'", "") - specials[special] = create_entry(token, endings, remove_contractions=True) - - if "contrUpper" not in exceptions: - special = token.capitalize() + ending.replace("'", "") - specials[special] = create_entry(token, endings, capitalize=True, remove_contractions=True) - - # add in hardcoded specials - # changed it so it generates them from a file - with io.open('abbrev.de.tab','r',encoding='utf8') as abbrev_: - hc_specials = read_hardcoded(abbrev_) - specials = dict(specials, **hc_specials) - - return specials - -if __name__ == "__main__": - specials = generate_specials() - with open("specials.json", "w") as f: - json.dump(specials, f, sort_keys=True, indent=4, separators=(',', ': ')) diff --git a/lang_data/de/infix.txt b/lang_data/de/infix.txt deleted file mode 100644 index 8398d5d42..000000000 --- a/lang_data/de/infix.txt +++ /dev/null @@ -1,6 +0,0 @@ -\.\.\. -(?<=[a-z])\.(?=[A-Z]) -(?<=[a-zöäüßA-ZÖÄÜ"]):(?=[a-zöäüßA-ZÖÄÜ]) -(?<=[a-zöäüßA-ZÖÄÜ"])>(?=[a-zöäüßA-ZÖÄÜ]) -(?<=[a-zöäüßA-ZÖÄÜ"])<(?=[a-zöäüßA-ZÖÄÜ]) -(?<=[a-zöäüßA-ZÖÄÜ"])=(?=[a-zöäüßA-ZÖÄÜ]) diff --git a/lang_data/de/lemma_rules.json b/lang_data/de/lemma_rules.json deleted file mode 100644 index 0967ef424..000000000 --- a/lang_data/de/lemma_rules.json +++ /dev/null @@ -1 +0,0 @@ -{} diff --git a/lang_data/de/morphs.json b/lang_data/de/morphs.json deleted file mode 100644 index ae024add2..000000000 --- a/lang_data/de/morphs.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "PRP": { - "ich": {"L": "-PRON-", "person": 1, "number": 1, "gender": 0, "case": 1}, - "meiner": {"L": "-PRON-", "person": 1, "number": 1, "gender": 0, "case": 2}, - "mir": {"L": "-PRON-", "person": 1, "number": 1, "gender": 0, "case": 3}, - "mich": {"L": "-PRON-", "person": 1, "number": 1, "gender": 0, "case": 4}, - "du": {"L": "-PRON-", "person": 2, "number": 0, "gender": 0, "case": 1}, - "deiner": {"L": "-PRON-", "person": 2, "number": 0, "gender": 0, "case": 2}, - "dir": {"L": "-PRON-", "person": 2, "number": 0, "gender": 0, "case": 3}, - "dich": {"L": "-PRON-", "person": 2, "number": 0, "gender": 0, "case": 4}, - "er": {"L": "-PRON-", "person": 3, "number": 0, "gender": 1, "case": 1}, - "seiner": {"L": "-PRON-", "person": 3, "number": 0, "gender": 1, "case": 2}, - "ihm": {"L": "-PRON-", "person": 3, "number": 0, "gender": 1, "case": 3}, - "ihn": {"L": "-PRON-", "person": 3, "number": 0, "gender": 1, "case": 4}, - "sie": {"L": "-PRON-", "person": 3, "number": 0, "gender": 2, "case": 1}, - "ihrer": {"L": "-PRON-", "person": 3, "number": 0, "gender": 2, "case": 2}, - "ihr": {"L": "-PRON-", "person": 3, "number": 0, "gender": 2, "case": 3}, - "sie": {"L": "-PRON-", "person": 3, "number": 0, "gender": 2, "case": 4}, - "es": {"L": "-PRON-", "person": 3, "number": 0, "gender": 3, "case": 1}, - "seiner": {"L": "-PRON-", "person": 3, "number": 0, "gender": 3, "case": 2}, - "ihm": {"L": "-PRON-", "person": 3, "number": 0, "gender": 3, "case": 3}, - "es": {"L": "-PRON-", "person": 3, "number": 0, "gender": 3, "case": 4}, - "wir": {"L": "-PRON-", "person": 1, "number": 0, "gender": 0, "case": 1}, - "unser": {"L": "-PRON-", "person": 1, "number": 0, "gender": 0, "case": 2}, - "uns": {"L": "-PRON-", "person": 1, "number": 0, "gender": 0, "case": 3}, - "uns": {"L": "-PRON-", "person": 1, "number": 0, "gender": 0, "case": 4}, - "ihr": {"L": "-PRON-", "person": 2, "number": 0, "gender": 0, "case": 1}, - "euer": {"L": "-PRON-", "person": 2, "number": 0, "gender": 0, "case": 2}, - "euch": {"L": "-PRON-", "person": 2, "number": 0, "gender": 0, "case": 3}, - "euch": {"L": "-PRON-", "person": 2, "number": 0, "gender": 0, "case": 4}, - "sie": {"L": "-PRON-", "person": 3, "number": 0, "gender": 0, "case": 1}, - "ihrer": {"L": "-PRON-", "person": 3, "number": 0, "gender": 0, "case": 2}, - "ihnen": {"L": "-PRON-", "person": 3, "number": 0, "gender": 0, "case": 3}, - "sie": {"L": "-PRON-", "person": 3, "number": 0, "gender": 0, "case": 4} - }, - - "PRP$": { - "mein": {"L": "-PRON-", "person": 1, "number": 0, "gender": 0, "case": 1}, - "meines": {"L": "-PRON-", "person": 1, "number": 0, "gender": 0, "case": 2}, - "meinem": {"L": "-PRON-", "person": 1, "number": 0, "gender": 0, "case": 3}, - "meinen": {"L": "-PRON-", "person": 1, "number": 0, "gender": 0, "case": 4}, - "dein": {"L": "-PRON-", "person": 2, "number": 0, "gender": 0, "case": 1}, - "deines": {"L": "-PRON-", "person": 2, "number": 0, "gender": 0, "case": 2}, - "deinem": {"L": "-PRON-", "person": 2, "number": 0, "gender": 0, "case": 3}, - "deinen": {"L": "-PRON-", "person": 2, "number": 0, "gender": 0, "case": 4}, - "sein": {"L": "-PRON-", "person": 3, "number": 0, "gender": 1, "case": 1}, - "seines": {"L": "-PRON-", "person": 3, "number": 0, "gender": 1, "case": 2}, - "seinem": {"L": "-PRON-", "person": 3, "number": 0, "gender": 1, "case": 3}, - "seinen": {"L": "-PRON-", "person": 3, "number": 0, "gender": 1, "case": 4}, - "ihr": {"L": "-PRON-", "person": 3, "number": 0, "gender": 2, "case": 1}, - "ihrer": {"L": "-PRON-", "person": 3, "number": 0, "gender": 2, "case": 2}, - "ihrem": {"L": "-PRON-", "person": 3, "number": 0, "gender": 2, "case": 3}, - "ihren": {"L": "-PRON-", "person": 3, "number": 0, "gender": 2, "case": 4}, - "sein": {"L": "-PRON-", "person": 3, "number": 0, "gender": 3, "case": 1}, - "seines": {"L": "-PRON-", "person": 3, "number": 0, "gender": 3, "case": 2}, - "seinem": {"L": "-PRON-", "person": 3, "number": 0, "gender": 3, "case": 3}, - "seinen": {"L": "-PRON-", "person": 3, "number": 0, "gender": 3, "case": 4}, - "unser": {"L": "-PRON-", "person": 1, "number": 0, "gender": 0, "case": 1}, - "unseres": {"L": "-PRON-", "person": 1, "number": 0, "gender": 0, "case": 2}, - "unserem": {"L": "-PRON-", "person": 1, "number": 0, "gender": 0, "case": 3}, - "unseren": {"L": "-PRON-", "person": 1, "number": 0, "gender": 0, "case": 4}, - "euer": {"L": "-PRON-", "person": 2, "number": 0, "gender": 0, "case": 1}, - "eures": {"L": "-PRON-", "person": 2, "number": 0, "gender": 0, "case": 2}, - "eurem": {"L": "-PRON-", "person": 2, "number": 0, "gender": 0, "case": 3}, - "euren": {"L": "-PRON-", "person": 2, "number": 0, "gender": 0, "case": 4}, - "ihr": {"L": "-PRON-", "person": 3, "number": 0, "gender": 0, "case": 1}, - "ihres": {"L": "-PRON-", "person": 3, "number": 0, "gender": 0, "case": 2}, - "ihrem": {"L": "-PRON-", "person": 3, "number": 0, "gender": 0, "case": 3}, - "ihren": {"L": "-PRON-", "person": 3, "number": 0, "gender": 0, "case": 4} - } -} diff --git a/lang_data/de/prefix.txt b/lang_data/de/prefix.txt deleted file mode 100644 index e37542a9c..000000000 --- a/lang_data/de/prefix.txt +++ /dev/null @@ -1,27 +0,0 @@ -, -" -( -[ -{ -* -< -> -$ -£ -„ -“ -' -`` -` -# -US$ -C$ -A$ -a- -‘ -.... -... -‚ -» -_ -§ diff --git a/lang_data/de/sample.txt b/lang_data/de/sample.txt deleted file mode 100644 index 12c0bb787..000000000 --- a/lang_data/de/sample.txt +++ /dev/null @@ -1,3 +0,0 @@ -Biografie: Ein Spiel ist ein Theaterstück des Schweizer Schriftstellers Max Frisch, das 1967 entstand und am 1. Februar 1968 im Schauspielhaus Zürich uraufgeführt wurde. 1984 legte Frisch eine überarbeitete Neufassung vor. Das von Frisch als Komödie bezeichnete Stück greift eines seiner zentralen Themen auf: die Möglichkeit oder Unmöglichkeit des Menschen, seine Identität zu verändern. - -Mit Biografie: Ein Spiel wandte sich Frisch von der Parabelform seiner Erfolgsstücke Biedermann und die Brandstifter und Andorra ab und postulierte eine „Dramaturgie der Permutation“. Darin sollte nicht, wie im klassischen Theater, Sinn und Schicksal im Mittelpunkt stehen, sondern die Zufälligkeit von Ereignissen und die Möglichkeit ihrer Variation. Dennoch handelt Biografie: Ein Spiel gerade von der Unmöglichkeit seines Protagonisten, seinen Lebenslauf grundlegend zu verändern. Frisch empfand die Wirkung des Stücks im Nachhinein als zu fatalistisch und die Umsetzung seiner theoretischen Absichten als nicht geglückt. Obwohl das Stück 1968 als unpolitisch und nicht zeitgemäß kritisiert wurde und auch später eine geteilte Rezeption erfuhr, gehört es an deutschsprachigen Bühnen zu den häufiger aufgeführten Stücken Frischs. diff --git a/lang_data/de/specials.json b/lang_data/de/specials.json deleted file mode 100644 index 282ec6df4..000000000 --- a/lang_data/de/specials.json +++ /dev/null @@ -1,1483 +0,0 @@ -{ - "''": [ - { - "F": "''" - } - ], - "'S": [ - { - "F": "'S", - "L": "es" - } - ], - "'n": [ - { - "F": "'n", - "L": "ein" - } - ], - "'ne": [ - { - "F": "'ne", - "L": "eine" - } - ], - "'nen": [ - { - "F": "'nen", - "L": "einen" - } - ], - "'s": [ - { - "F": "'s", - "L": "es" - } - ], - "(:": [ - { - "F": "(:" - } - ], - "(=": [ - { - "F": "(=" - } - ], - "(^_^)": [ - { - "F": "(^_^)" - } - ], - "-_-": [ - { - "F": "-_-" - } - ], - "-__-": [ - { - "F": "-__-" - } - ], - ":')": [ - { - "F": ":')" - } - ], - ":(": [ - { - "F": ":(" - } - ], - ":((": [ - { - "F": ":((" - } - ], - ":(((": [ - { - "F": ":(((" - } - ], - ":)": [ - { - "F": ":)" - } - ], - ":))": [ - { - "F": ":))" - } - ], - ":-)": [ - { - "F": ":-)" - } - ], - ":-/": [ - { - "F": ":-/" - } - ], - ":-P": [ - { - "F": ":-P" - } - ], - ":/": [ - { - "F": ":/" - } - ], - ":0": [ - { - "F": ":0" - } - ], - ":3": [ - { - "F": ":3" - } - ], - ":>": [ - { - "F": ":>" - } - ], - ":O": [ - { - "F": ":O" - } - ], - ":P": [ - { - "F": ":P" - } - ], - ":Y": [ - { - "F": ":Y" - } - ], - ":]": [ - { - "F": ":]" - } - ], - ":p": [ - { - "F": ":p" - } - ], - ";(": [ - { - "F": ";(" - } - ], - ";)": [ - { - "F": ";)" - } - ], - ";-)": [ - { - "F": ";-)" - } - ], - ";-p": [ - { - "F": ";-p" - } - ], - ";D": [ - { - "F": ";D" - } - ], - ";p": [ - { - "F": ";p" - } - ], - "<3": [ - { - "F": "<3" - } - ], - "<33": [ - { - "F": "<33" - } - ], - "<333": [ - { - "F": "<333" - } - ], - "": [ - { - "F": "SP" - } - ], - "=)": [ - { - "F": "=)" - } - ], - "=3": [ - { - "F": "=3" - } - ], - "=D": [ - { - "F": "=D" - } - ], - "=[[": [ - { - "F": "=[[" - } - ], - "=]": [ - { - "F": "=]" - } - ], - "A.C.": [ - { - "F": "A.C." - } - ], - "A.D.": [ - { - "F": "A.D." - } - ], - "A.G.": [ - { - "F": "A.G." - } - ], - "Abb.": [ - { - "F": "Abb." - } - ], - "Abk.": [ - { - "F": "Abk." - } - ], - "Abs.": [ - { - "F": "Abs." - } - ], - "Abt.": [ - { - "F": "Abt." - } - ], - "Apr.": [ - { - "F": "Apr." - } - ], - "Aug.": [ - { - "F": "Aug." - } - ], - "B.A.": [ - { - "F": "B.A." - } - ], - "B.Sc.": [ - { - "F": "B.Sc." - } - ], - "Bd.": [ - { - "F": "Bd." - } - ], - "Betr.": [ - { - "F": "Betr." - } - ], - "Bf.": [ - { - "F": "Bf." - } - ], - "Bhf.": [ - { - "F": "Bhf." - } - ], - "Biol.": [ - { - "F": "Biol." - } - ], - "Bsp.": [ - { - "F": "Bsp." - } - ], - "Chr.": [ - { - "F": "Chr." - } - ], - "Cie.": [ - { - "F": "Cie." - } - ], - "Co.": [ - { - "F": "Co." - } - ], - "D.C.": [ - { - "F": "D.C." - } - ], - "Dez.": [ - { - "F": "Dez." - } - ], - "Di.": [ - { - "F": "Di." - } - ], - "Dipl.": [ - { - "F": "Dipl." - } - ], - "Dipl.-Ing.": [ - { - "F": "Dipl.-Ing." - } - ], - "Do.": [ - { - "F": "Do." - } - ], - "Dr.": [ - { - "F": "Dr." - } - ], - "Fa.": [ - { - "F": "Fa." - } - ], - "Fam.": [ - { - "F": "Fam." - } - ], - "Feb.": [ - { - "F": "Feb." - } - ], - "Fr.": [ - { - "F": "Fr." - } - ], - "Frl.": [ - { - "F": "Frl." - } - ], - "G.m.b.H.": [ - { - "F": "G.m.b.H." - } - ], - "Gebr.": [ - { - "F": "Gebr." - } - ], - "Hbf.": [ - { - "F": "Hbf." - } - ], - "Hg.": [ - { - "F": "Hg." - } - ], - "Hr.": [ - { - "F": "Hr." - } - ], - "Hrgs.": [ - { - "F": "Hrgs." - } - ], - "Hrn.": [ - { - "F": "Hrn." - } - ], - "Hrsg.": [ - { - "F": "Hrsg." - } - ], - "Ing.": [ - { - "F": "Ing." - } - ], - "Jan.": [ - { - "F": "Jan." - } - ], - "Jh.": [ - { - "F": "Jh." - } - ], - "Jhd.": [ - { - "F": "Jhd." - } - ], - "Jr.": [ - { - "F": "Jr." - } - ], - "Jul.": [ - { - "F": "Jul." - } - ], - "Jun.": [ - { - "F": "Jun." - } - ], - "K.O.": [ - { - "F": "K.O." - } - ], - "L.A.": [ - { - "F": "L.A." - } - ], - "M.A.": [ - { - "F": "M.A." - } - ], - "M.Sc.": [ - { - "F": "M.Sc." - } - ], - "Mi.": [ - { - "F": "Mi." - } - ], - "Mio.": [ - { - "F": "Mio." - } - ], - "Mo.": [ - { - "F": "Mo." - } - ], - "Mr.": [ - { - "F": "Mr." - } - ], - "Mrd.": [ - { - "F": "Mrd." - } - ], - "Mrz.": [ - { - "F": "Mrz." - } - ], - "MwSt.": [ - { - "F": "MwSt." - } - ], - "M\u00e4r.": [ - { - "F": "M\u00e4r." - } - ], - "N.Y.": [ - { - "F": "N.Y." - } - ], - "N.Y.C.": [ - { - "F": "N.Y.C." - } - ], - "Nov.": [ - { - "F": "Nov." - } - ], - "Nr.": [ - { - "F": "Nr." - } - ], - "O.K.": [ - { - "F": "O.K." - } - ], - "Okt.": [ - { - "F": "Okt." - } - ], - "Orig.": [ - { - "F": "Orig." - } - ], - "P.S.": [ - { - "F": "P.S." - } - ], - "Pkt.": [ - { - "F": "Pkt." - } - ], - "Prof.": [ - { - "F": "Prof." - } - ], - "R.I.P.": [ - { - "F": "R.I.P." - } - ], - "Red.": [ - { - "F": "Red." - } - ], - "S'": [ - { - "F": "S'", - "L": "sie" - } - ], - "Sa.": [ - { - "F": "Sa." - } - ], - "Sep.": [ - { - "F": "Sep." - } - ], - "Sept.": [ - { - "F": "Sept." - } - ], - "So.": [ - { - "F": "So." - } - ], - "St.": [ - { - "F": "St." - } - ], - "Std.": [ - { - "F": "Std." - } - ], - "Str.": [ - { - "F": "Str." - } - ], - "Tel.": [ - { - "F": "Tel." - } - ], - "Tsd.": [ - { - "F": "Tsd." - } - ], - "U.S.": [ - { - "F": "U.S." - } - ], - "U.S.A.": [ - { - "F": "U.S.A." - } - ], - "U.S.S.": [ - { - "F": "U.S.S." - } - ], - "Univ.": [ - { - "F": "Univ." - } - ], - "V_V": [ - { - "F": "V_V" - } - ], - "Vol.": [ - { - "F": "Vol." - } - ], - "\\\")": [ - { - "F": "\\\")" - } - ], - "\\n": [ - { - "F": "\\n", - "L": "", - "pos": "SP" - } - ], - "\\t": [ - { - "F": "\\t", - "L": "", - "pos": "SP" - } - ], - "^_^": [ - { - "F": "^_^" - } - ], - "a.": [ - { - "F": "a." - } - ], - "a.D.": [ - { - "F": "a.D." - } - ], - "a.M.": [ - { - "F": "a.M." - } - ], - "a.Z.": [ - { - "F": "a.Z." - } - ], - "abzgl.": [ - { - "F": "abzgl." - } - ], - "adv.": [ - { - "F": "adv." - } - ], - "al.": [ - { - "F": "al." - } - ], - "allg.": [ - { - "F": "allg." - } - ], - "auf'm": [ - { - "F": "auf", - "L": "auf" - }, - { - "F": "'m", - "L": "dem" - } - ], - "b.": [ - { - "F": "b." - } - ], - "betr.": [ - { - "F": "betr." - } - ], - "biol.": [ - { - "F": "biol." - } - ], - "bspw.": [ - { - "F": "bspw." - } - ], - "bzgl.": [ - { - "F": "bzgl." - } - ], - "bzw.": [ - { - "F": "bzw." - } - ], - "c.": [ - { - "F": "c." - } - ], - "ca.": [ - { - "F": "ca." - } - ], - "co.": [ - { - "F": "co." - } - ], - "d.": [ - { - "F": "d." - } - ], - "d.h.": [ - { - "F": "d.h." - } - ], - "dgl.": [ - { - "F": "dgl." - } - ], - "du's": [ - { - "F": "du", - "L": "du" - }, - { - "F": "'s", - "L": "es" - } - ], - "e.": [ - { - "F": "e." - } - ], - "e.V.": [ - { - "F": "e.V." - } - ], - "e.g.": [ - { - "F": "e.g." - } - ], - "ebd.": [ - { - "F": "ebd." - } - ], - "ehem.": [ - { - "F": "ehem." - } - ], - "eigtl.": [ - { - "F": "eigtl." - } - ], - "engl.": [ - { - "F": "engl." - } - ], - "entspr.": [ - { - "F": "entspr." - } - ], - "er's": [ - { - "F": "er", - "L": "er" - }, - { - "F": "'s", - "L": "es" - } - ], - "erm.": [ - { - "F": "erm." - } - ], - "etc.": [ - { - "F": "etc." - } - ], - "ev.": [ - { - "F": "ev." - } - ], - "evtl.": [ - { - "F": "evtl." - } - ], - "f.": [ - { - "F": "f." - } - ], - "frz.": [ - { - "F": "frz." - } - ], - "g.": [ - { - "F": "g." - } - ], - "geb.": [ - { - "F": "geb." - } - ], - "gegr.": [ - { - "F": "gegr." - } - ], - "gem.": [ - { - "F": "gem." - } - ], - "ggf.": [ - { - "F": "ggf." - } - ], - "ggfs.": [ - { - "F": "ggfs." - } - ], - "gg\u00fc.": [ - { - "F": "gg\u00fc." - } - ], - "h.": [ - { - "F": "h." - } - ], - "h.c.": [ - { - "F": "h.c." - } - ], - "hinter'm": [ - { - "F": "hinter", - "L": "hinter" - }, - { - "F": "'m", - "L": "dem" - } - ], - "hrsg.": [ - { - "F": "hrsg." - } - ], - "i.": [ - { - "F": "i." - } - ], - "i.A.": [ - { - "F": "i.A." - } - ], - "i.G.": [ - { - "F": "i.G." - } - ], - "i.O.": [ - { - "F": "i.O." - } - ], - "i.Tr.": [ - { - "F": "i.Tr." - } - ], - "i.V.": [ - { - "F": "i.V." - } - ], - "i.d.R.": [ - { - "F": "i.d.R." - } - ], - "i.e.": [ - { - "F": "i.e." - } - ], - "ich's": [ - { - "F": "ich", - "L": "ich" - }, - { - "F": "'s", - "L": "es" - } - ], - "ihr's": [ - { - "F": "ihr", - "L": "ihr" - }, - { - "F": "'s", - "L": "es" - } - ], - "incl.": [ - { - "F": "incl." - } - ], - "inkl.": [ - { - "F": "inkl." - } - ], - "insb.": [ - { - "F": "insb." - } - ], - "j.": [ - { - "F": "j." - } - ], - "jr.": [ - { - "F": "jr." - } - ], - "jun.": [ - { - "F": "jun." - } - ], - "jur.": [ - { - "F": "jur." - } - ], - "k.": [ - { - "F": "k." - } - ], - "kath.": [ - { - "F": "kath." - } - ], - "l.": [ - { - "F": "l." - } - ], - "lat.": [ - { - "F": "lat." - } - ], - "lt.": [ - { - "F": "lt." - } - ], - "m.": [ - { - "F": "m." - } - ], - "m.E.": [ - { - "F": "m.E." - } - ], - "m.M.": [ - { - "F": "m.M." - } - ], - "max.": [ - { - "F": "max." - } - ], - "min.": [ - { - "F": "min." - } - ], - "mind.": [ - { - "F": "mind." - } - ], - "mtl.": [ - { - "F": "mtl." - } - ], - "n.": [ - { - "F": "n." - } - ], - "n.Chr.": [ - { - "F": "n.Chr." - } - ], - "nat.": [ - { - "F": "nat." - } - ], - "o.": [ - { - "F": "o." - } - ], - "o.O": [ - { - "F": "o.O" - } - ], - "o.a.": [ - { - "F": "o.a." - } - ], - "o.g.": [ - { - "F": "o.g." - } - ], - "o.k.": [ - { - "F": "o.k." - } - ], - "o.\u00c4.": [ - { - "F": "o.\u00c4." - } - ], - "o.\u00e4.": [ - { - "F": "o.\u00e4." - } - ], - "o_O": [ - { - "F": "o_O" - } - ], - "o_o": [ - { - "F": "o_o" - } - ], - "orig.": [ - { - "F": "orig." - } - ], - "p.": [ - { - "F": "p." - } - ], - "p.a.": [ - { - "F": "p.a." - } - ], - "p.s.": [ - { - "F": "p.s." - } - ], - "pers.": [ - { - "F": "pers." - } - ], - "phil.": [ - { - "F": "phil." - } - ], - "q.": [ - { - "F": "q." - } - ], - "q.e.d.": [ - { - "F": "q.e.d." - } - ], - "r.": [ - { - "F": "r." - } - ], - "rer.": [ - { - "F": "rer." - } - ], - "r\u00f6m.": [ - { - "F": "r\u00f6m." - } - ], - "s'": [ - { - "F": "s'", - "L": "sie" - } - ], - "s.": [ - { - "F": "s." - } - ], - "s.o.": [ - { - "F": "s.o." - } - ], - "sen.": [ - { - "F": "sen." - } - ], - "sie's": [ - { - "F": "sie", - "L": "sie" - }, - { - "F": "'s", - "L": "es" - } - ], - "sog.": [ - { - "F": "sog." - } - ], - "std.": [ - { - "F": "std." - } - ], - "stellv.": [ - { - "F": "stellv." - } - ], - "t.": [ - { - "F": "t." - } - ], - "t\u00e4gl.": [ - { - "F": "t\u00e4gl." - } - ], - "u.": [ - { - "F": "u." - } - ], - "u.U.": [ - { - "F": "u.U." - } - ], - "u.a.": [ - { - "F": "u.a." - } - ], - "u.s.w.": [ - { - "F": "u.s.w." - } - ], - "u.v.m.": [ - { - "F": "u.v.m." - } - ], - "unter'm": [ - { - "F": "unter", - "L": "unter" - }, - { - "F": "'m", - "L": "dem" - } - ], - "usf.": [ - { - "F": "usf." - } - ], - "usw.": [ - { - "F": "usw." - } - ], - "uvm.": [ - { - "F": "uvm." - } - ], - "v.": [ - { - "F": "v." - } - ], - "v.Chr.": [ - { - "F": "v.Chr." - } - ], - "v.a.": [ - { - "F": "v.a." - } - ], - "v.l.n.r.": [ - { - "F": "v.l.n.r." - } - ], - "vgl.": [ - { - "F": "vgl." - } - ], - "vllt.": [ - { - "F": "vllt." - } - ], - "vlt.": [ - { - "F": "vlt." - } - ], - "vor'm": [ - { - "F": "vor", - "L": "vor" - }, - { - "F": "'m", - "L": "dem" - } - ], - "vs.": [ - { - "F": "vs." - } - ], - "w.": [ - { - "F": "w." - } - ], - "wir's": [ - { - "F": "wir", - "L": "wir" - }, - { - "F": "'s", - "L": "es" - } - ], - "wiss.": [ - { - "F": "wiss." - } - ], - "x.": [ - { - "F": "x." - } - ], - "xD": [ - { - "F": "xD" - } - ], - "xDD": [ - { - "F": "xDD" - } - ], - "y.": [ - { - "F": "y." - } - ], - "z.": [ - { - "F": "z." - } - ], - "z.B.": [ - { - "F": "z.B." - } - ], - "z.Bsp.": [ - { - "F": "z.Bsp." - } - ], - "z.T.": [ - { - "F": "z.T." - } - ], - "z.Z.": [ - { - "F": "z.Z." - } - ], - "z.Zt.": [ - { - "F": "z.Zt." - } - ], - "z.b.": [ - { - "F": "z.b." - } - ], - "zzgl.": [ - { - "F": "zzgl." - } - ], - "\u00e4.": [ - { - "F": "\u00e4." - } - ], - "\u00f6.": [ - { - "F": "\u00f6." - } - ], - "\u00f6sterr.": [ - { - "F": "\u00f6sterr." - } - ], - "\u00fc.": [ - { - "F": "\u00fc." - } - ], - "\u00fcber'm": [ - { - "F": "\u00fcber", - "L": "\u00fcber" - }, - { - "F": "'m", - "L": "dem" - } - ] -} \ No newline at end of file diff --git a/lang_data/de/suffix.txt b/lang_data/de/suffix.txt deleted file mode 100644 index aeecb85a2..000000000 --- a/lang_data/de/suffix.txt +++ /dev/null @@ -1,73 +0,0 @@ -, -\" -\) -\] -\} -\* -\! -\? -% -\$ -> -: -; -' -” -“ -« -_ -'' -'s -'S -’s -’S -’ -‘ -° -€ -\.\. -\.\.\. -\.\.\.\. -(?<=[a-zäöüßÖÄÜ)\]"'´«‘’%\)²“”])\. -\-\- -´ -(?<=[0-9])km² -(?<=[0-9])m² -(?<=[0-9])cm² -(?<=[0-9])mm² -(?<=[0-9])km³ -(?<=[0-9])m³ -(?<=[0-9])cm³ -(?<=[0-9])mm³ -(?<=[0-9])ha -(?<=[0-9])km -(?<=[0-9])m -(?<=[0-9])cm -(?<=[0-9])mm -(?<=[0-9])µm -(?<=[0-9])nm -(?<=[0-9])yd -(?<=[0-9])in -(?<=[0-9])ft -(?<=[0-9])kg -(?<=[0-9])g -(?<=[0-9])mg -(?<=[0-9])µg -(?<=[0-9])t -(?<=[0-9])lb -(?<=[0-9])oz -(?<=[0-9])m/s -(?<=[0-9])km/h -(?<=[0-9])mph -(?<=[0-9])°C -(?<=[0-9])°K -(?<=[0-9])°F -(?<=[0-9])hPa -(?<=[0-9])Pa -(?<=[0-9])mbar -(?<=[0-9])mb -(?<=[0-9])T -(?<=[0-9])G -(?<=[0-9])M -(?<=[0-9])K -(?<=[0-9])kb diff --git a/lang_data/de/tag_map.json b/lang_data/de/tag_map.json deleted file mode 100644 index 29da20a39..000000000 --- a/lang_data/de/tag_map.json +++ /dev/null @@ -1,59 +0,0 @@ -{ -"$(": {"pos": "PUNCT", "PunctType": "Brck"}, -"$,": {"pos": "PUNCT", "PunctType": "Comm"}, -"$.": {"pos": "PUNCT", "PunctType": "Peri"}, -"ADJA": {"pos": "ADJ"}, -"ADJD": {"pos": "ADJ", "Variant": "Short"}, -"ADV": {"pos": "ADV"}, -"APPO": {"pos": "ADP", "AdpType": "Post"}, -"APPR": {"pos": "ADP", "AdpType": "Prep"}, -"APPRART": {"pos": "ADP", "AdpType": "Prep", "PronType": "Art"}, -"APZR": {"pos": "ADP", "AdpType": "Circ"}, -"ART": {"pos": "DET", "PronType": "Art"}, -"CARD": {"pos": "NUM", "NumType": "Card"}, -"FM": {"pos": "X", "Foreign": "Yes"}, -"ITJ": {"pos": "INTJ"}, -"KOKOM": {"pos": "CONJ", "ConjType": "Comp"}, -"KON": {"pos": "CONJ"}, -"KOUI": {"pos": "SCONJ"}, -"KOUS": {"pos": "SCONJ"}, -"NE": {"pos": "PROPN"}, -"NNE": {"pos": "PROPN"}, -"NN": {"pos": "NOUN"}, -"PAV": {"pos": "ADV", "PronType": "Dem"}, -"PROAV": {"pos": "ADV", "PronType": "Dem"}, -"PDAT": {"pos": "DET", "PronType": "Dem"}, -"PDS": {"pos": "PRON", "PronType": "Dem"}, -"PIAT": {"pos": "DET", "PronType": "Ind,Neg,Tot"}, -"PIDAT": {"pos": "DET", "AdjType": "Pdt", "PronType": "Ind,Neg,Tot"}, -"PIS": {"pos": "PRON", "PronType": "Ind,Neg,Tot"}, -"PPER": {"pos": "PRON", "PronType": "Prs"}, -"PPOSAT": {"pos": "DET", "Poss": "Yes", "PronType": "Prs"}, -"PPOSS": {"pos": "PRON", "Poss": "Yes", "PronType": "Prs"}, -"PRELAT": {"pos": "DET", "PronType": "Rel"}, -"PRELS": {"pos": "PRON", "PronType": "Rel"}, -"PRF": {"pos": "PRON", "PronType": "Prs", "Reflex": "Yes"}, -"PTKA": {"pos": "PART"}, -"PTKANT": {"pos": "PART", "PartType": "Res"}, -"PTKNEG": {"pos": "PART", "Negative": "Neg"}, -"PTKVZ": {"pos": "PART", "PartType": "Vbp"}, -"PTKZU": {"pos": "PART", "PartType": "Inf"}, -"PWAT": {"pos": "DET", "PronType": "Int"}, -"PWAV": {"pos": "ADV", "PronType": "Int"}, -"PWS": {"pos": "PRON", "PronType": "Int"}, -"TRUNC": {"pos": "X", "Hyph": "Yes"}, -"VAFIN": {"pos": "AUX", "Mood": "Ind", "VerbForm": "Fin"}, -"VAIMP": {"pos": "AUX", "Mood": "Imp", "VerbForm": "Fin"}, -"VAINF": {"pos": "AUX", "VerbForm": "Inf"}, -"VAPP": {"pos": "AUX", "Aspect": "Perf", "VerbForm": "Part"}, -"VMFIN": {"pos": "VERB", "Mood": "Ind", "VerbForm": "Fin", "VerbType": "Mod"}, -"VMINF": {"pos": "VERB", "VerbForm": "Inf", "VerbType": "Mod"}, -"VMPP": {"pos": "VERB", "Aspect": "Perf", "VerbForm": "Part", "VerbType": "Mod"}, -"VVFIN": {"pos": "VERB", "Mood": "Ind", "VerbForm": "Fin"}, -"VVIMP": {"pos": "VERB", "Mood": "Imp", "VerbForm": "Fin"}, -"VVINF": {"pos": "VERB", "VerbForm": "Inf"}, -"VVIZU": {"pos": "VERB", "VerbForm": "Inf"}, -"VVPP": {"pos": "VERB", "Aspect": "Perf", "VerbForm": "Part"}, -"XY": {"pos": "X"}, -"SP": {"pos": "SPACE"} -} diff --git a/lang_data/en/LICENSE b/lang_data/en/LICENSE deleted file mode 100644 index 4f49c2dff..000000000 --- a/lang_data/en/LICENSE +++ /dev/null @@ -1,20 +0,0 @@ -WordNet Release 3.0 This software and database is being provided to you, the -LICENSEE, by Princeton University under the following license. By obtaining, -using and/or copying this software and database, you agree that you have read, -understood, and will comply with these terms and conditions.: Permission to -use, copy, modify and distribute this software and database and its -documentation for any purpose and without fee or royalty is hereby granted, -provided that you agree to comply with the following copyright notice and -statements, including the disclaimer, and that the same appear on ALL copies of -the software, database and documentation, including modifications that you make for internal use or for distribution. WordNet 3.0 Copyright 2006 by Princeton -University. All rights reserved. THIS SOFTWARE AND DATABASE IS PROVIDED "AS IS" -AND PRINCETON UNIVERSITY MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR -IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PRINCETON UNIVERSITY MAKES NO -REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY -PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE, DATABASE OR -DOCUMENTATION WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS -OR OTHER RIGHTS. The name of Princeton University or Princeton may not be used -in advertising or publicity pertaining to distribution of the software and/or -database. Title to copyright in this software, database and any associated -documentation shall at all times remain with Princeton University and LICENSEE -agrees to preserve same. diff --git a/lang_data/en/gazetteer.json b/lang_data/en/gazetteer.json deleted file mode 100644 index d52fed839..000000000 --- a/lang_data/en/gazetteer.json +++ /dev/null @@ -1,194 +0,0 @@ -{ - "Reddit": [ - "PRODUCT", - {}, - [ - [{"lower": "reddit"}] - ] - ], - "SeptemberElevenAttacks": [ - "EVENT", - {}, - [ - [ - {"orth": "9/11"} - ], - [ - {"lower": "september"}, - {"orth": "11"} - ] - ] - ], - "Linux": [ - "PRODUCT", - {}, - [ - [{"lower": "linux"}] - ] - ], - "Haskell": [ - "PRODUCT", - {}, - [ - [{"lower": "haskell"}] - ] - ], - "HaskellCurry": [ - "PERSON", - {}, - [ - [ - {"lower": "haskell"}, - {"lower": "curry"} - ] - ] - ], - "Javascript": [ - "PRODUCT", - {}, - [ - [{"lower": "javascript"}] - ] - ], - "CSS": [ - "PRODUCT", - {}, - [ - [{"lower": "css"}], - [{"lower": "css3"}] - ] - ], - "displaCy": [ - "PRODUCT", - {}, - [ - [{"lower": "displacy"}] - ] - ], - "spaCy": [ - "PRODUCT", - {}, - [ - [{"orth": "spaCy"}] - ] - ], - - "HTML": [ - "PRODUCT", - {}, - [ - [{"lower": "html"}], - [{"lower": "html5"}] - ] - ], - "Python": [ - "PRODUCT", - {}, - [ - [{"orth": "Python"}] - ] - ], - "Ruby": [ - "PRODUCT", - {}, - [ - [{"orth": "Ruby"}] - ] - ], - "Digg": [ - "PRODUCT", - {}, - [ - [{"lower": "digg"}] - ] - ], - "FoxNews": [ - "ORG", - {}, - [ - [{"orth": "Fox"}], - [{"orth": "News"}] - ] - ], - "Google": [ - "ORG", - {}, - [ - [{"lower": "google"}] - ] - ], - "Mac": [ - "PRODUCT", - {}, - [ - [{"lower": "mac"}] - ] - ], - "Wikipedia": [ - "PRODUCT", - {}, - [ - [{"lower": "wikipedia"}] - ] - ], - "Windows": [ - "PRODUCT", - {}, - [ - [{"orth": "Windows"}] - ] - ], - "Dell": [ - "ORG", - {}, - [ - [{"lower": "dell"}] - ] - ], - "Facebook": [ - "ORG", - {}, - [ - [{"lower": "facebook"}] - ] - ], - "Blizzard": [ - "ORG", - {}, - [ - [{"orth": "Blizzard"}] - ] - ], - "Ubuntu": [ - "ORG", - {}, - [ - [{"orth": "Ubuntu"}] - ] - ], - "Youtube": [ - "PRODUCT", - {}, - [ - [{"lower": "youtube"}] - ] - ], - "false_positives": [ - null, - {}, - [ - [{"orth": "Shit"}], - [{"orth": "Weed"}], - [{"orth": "Cool"}], - [{"orth": "Btw"}], - [{"orth": "Bah"}], - [{"orth": "Bullshit"}], - [{"orth": "Lol"}], - [{"orth": "Yo"}, {"lower": "dawg"}], - [{"orth": "Yay"}], - [{"orth": "Ahh"}], - [{"orth": "Yea"}], - [{"orth": "Bah"}] - ] - ] -} diff --git a/lang_data/en/generate_specials.py b/lang_data/en/generate_specials.py deleted file mode 100644 index a48f8f69d..000000000 --- a/lang_data/en/generate_specials.py +++ /dev/null @@ -1,422 +0,0 @@ -# -#- coding: utf-8 -*- -import json - -contractions = {"n't", "'nt", "not", "'ve", "'d", "'ll", "'s", "'m", "'ma", "'re"} - -# contains the lemmas, parts of speech, number, and tenspect of -# potential tokens generated after splitting contractions off -token_properties = { - - "ai": {"L": "be", "pos": "VBP", "number": 2}, - "are": {"L": "be", "pos": "VBP", "number": 2}, - "ca": {"L": "can", "pos": "MD"}, - "can": {"L": "can", "pos": "MD"}, - "could": {"pos": "MD", "L": "could"}, - "'d": {"L": "would", "pos": "MD"}, - "did": {"L": "do", "pos": "VBD"}, - "do": {"L": "do"}, - "does": {"L": "do", "pos": "VBZ"}, - "had": {"L": "have", "pos": "VBD"}, - "has": {"L": "have", "pos": "VBZ"}, - "have": {"pos": "VB"}, - "he": {"L": "-PRON-", "pos": "PRP"}, - "how": {}, - "i": {"L": "-PRON-", "pos": "PRP"}, - "is": {"L": "be", "pos": "VBZ"}, - "it": {"L": "-PRON-", "pos": "PRP"}, - "'ll": {"L": "will", "pos": "MD"}, - "'m": {"L": "be", "pos": "VBP", "number": 1, "tenspect": 1}, - "'ma": {}, - "might": {}, - "must": {}, - "need": {}, - "not": {"L": "not", "pos": "RB"}, - "'nt": {"L": "not", "pos": "RB"}, - "n't": {"L": "not", "pos": "RB"}, - "'re": {"L": "be", "pos": "VBZ"}, - "'s": {}, # no POS or lemma for s? - "sha": {"L": "shall", "pos": "MD"}, - "she": {"L": "-PRON-", "pos": "PRP"}, - "should": {}, - "that": {}, - "there": {}, - "they": {"L": "-PRON-", "pos": "PRP"}, - "was": {}, - "we": {"L": "-PRON-", "pos": "PRP"}, - "were": {}, - "what": {}, - "when": {}, - "where": {}, - "who": {}, - "why": {}, - "wo": {}, - "would": {}, - "you": {"L": "-PRON-", "pos": "PRP"}, - "'ve": {"L": "have", "pos": "VB"} -} - -# contains starting tokens with their potential contractions -# each potential contraction has a list of exceptions - # lower - don't generate the lowercase version - # upper - don't generate the uppercase version - # contrLower - don't generate the lowercase version with apostrophe (') removed - # contrUpper - dont' generate the uppercase version with apostrophe (') removed -# for example, we don't want to create the word "hell" or "Hell" from "he" + "'ll" so -# we add "contrLower" and "contrUpper" to the exceptions list -starting_tokens = { - - "ai": {"n't": []}, - "are": {"n't": []}, - "ca": {"n't": []}, - "can": {"not": []}, - "could": {"'ve": [], "n't": [], "n't've": []}, - "did": {"n't": []}, - "does": {"n't": []}, - "do": {"n't": []}, - "had": {"n't": [], "n't've": []}, - "has": {"n't": []}, - "have": {"n't": []}, - "he": {"'d": [], "'d've": [], "'ll": ["contrLower", "contrUpper"], "'s": []}, - "how": {"'d": [], "'ll": [], "'s": []}, - "i": {"'d": ["contrLower", "contrUpper"], "'d've": [], "'ll": ["contrLower", "contrUpper"], "'m": [], "'ma": [], "'ve": []}, - "is": {"n't": []}, - "it": {"'d": [], "'d've": [], "'ll": [], "'s": ["contrLower", "contrUpper"]}, - "might": {"n't": [], "n't've": [], "'ve": []}, - "must": {"n't": [], "'ve": []}, - "need": {"n't": []}, - "not": {"'ve": []}, - "sha": {"n't": []}, - "she": {"'d": ["contrLower", "contrUpper"], "'d've": [], "'ll": ["contrLower", "contrUpper"], "'s": []}, - "should": {"'ve": [], "n't": [], "n't've": []}, - "that": {"'s": []}, - "there": {"'d": [], "'d've": [], "'s": ["contrLower", "contrUpper"], "'ll": []}, - "they": {"'d": [], "'d've": [], "'ll": [], "'re": [], "'ve": []}, - "was": {"n't": []}, - "we": {"'d": ["contrLower", "contrUpper"], "'d've": [], "'ll": ["contrLower", "contrUpper"], "'re": ["contrLower", "contrUpper"], "'ve": []}, - "were": {"n't": []}, - "what": {"'ll": [], "'re": [], "'s": [], "'ve": []}, - "when": {"'s": []}, - "where": {"'d": [], "'s": [], "'ve": []}, - "who": {"'d": [], "'ll": [], "'re": ["contrLower", "contrUpper"], "'s": [], "'ve": []}, - "why": {"'ll": [], "'re": [], "'s": []}, - "wo": {"n't": []}, - "would": {"'ve": [], "n't": [], "n't've": []}, - "you": {"'d": [], "'d've": [], "'ll": [], "'re": [], "'ve": []} - - } - -# other specials that don't really have contractions -# so they are hardcoded -hardcoded_specials = { - "let's": [{"F": "let"}, {"F": "'s", "L": "us"}], - "Let's": [{"F": "Let"}, {"F": "'s", "L": "us"}], - - "'s": [{"F": "'s", "L": "'s"}], - - "'S": [{"F": "'S", "L": "'s"}], - u"\u2018s": [{"F": u"\u2018s", "L": "'s"}], - u"\u2018S": [{"F": u"\u2018S", "L": "'s"}], - - "'em": [{"F": "'em"}], - - "'ol": [{"F": "'ol"}], - - "vs.": [{"F": "vs."}], - - "Ms.": [{"F": "Ms."}], - "Mr.": [{"F": "Mr."}], - "Dr.": [{"F": "Dr."}], - "Mrs.": [{"F": "Mrs."}], - "Messrs.": [{"F": "Messrs."}], - "Gov.": [{"F": "Gov."}], - "Gen.": [{"F": "Gen."}], - - "Mt.": [{"F": "Mt.", "L": "Mount"}], - - "''": [{"F": "''"}], - - "—": [{"F": "—", "L": "--", "pos": ":"}], - - "Corp.": [{"F": "Corp."}], - "Inc.": [{"F": "Inc."}], - "Co.": [{"F": "Co."}], - "co.": [{"F": "co."}], - "Ltd.": [{"F": "Ltd."}], - "Bros.": [{"F": "Bros."}], - - "Rep.": [{"F": "Rep."}], - "Sen.": [{"F": "Sen."}], - "Jr.": [{"F": "Jr."}], - "Rev.": [{"F": "Rev."}], - "Adm.": [{"F": "Adm."}], - "St.": [{"F": "St."}], - - "a.m.": [{"F": "a.m."}], - "p.m.": [{"F": "p.m."}], - - "1a.m.": [{"F": "1"}, {"F": "a.m."}], - "2a.m.": [{"F": "2"}, {"F": "a.m."}], - "3a.m.": [{"F": "3"}, {"F": "a.m."}], - "4a.m.": [{"F": "4"}, {"F": "a.m."}], - "5a.m.": [{"F": "5"}, {"F": "a.m."}], - "6a.m.": [{"F": "6"}, {"F": "a.m."}], - "7a.m.": [{"F": "7"}, {"F": "a.m."}], - "8a.m.": [{"F": "8"}, {"F": "a.m."}], - "9a.m.": [{"F": "9"}, {"F": "a.m."}], - "10a.m.": [{"F": "10"}, {"F": "a.m."}], - "11a.m.": [{"F": "11"}, {"F": "a.m."}], - "12a.m.": [{"F": "12"}, {"F": "a.m."}], - "1am": [{"F": "1"}, {"F": "am", "L": "a.m."}], - "2am": [{"F": "2"}, {"F": "am", "L": "a.m."}], - "3am": [{"F": "3"}, {"F": "am", "L": "a.m."}], - "4am": [{"F": "4"}, {"F": "am", "L": "a.m."}], - "5am": [{"F": "5"}, {"F": "am", "L": "a.m."}], - "6am": [{"F": "6"}, {"F": "am", "L": "a.m."}], - "7am": [{"F": "7"}, {"F": "am", "L": "a.m."}], - "8am": [{"F": "8"}, {"F": "am", "L": "a.m."}], - "9am": [{"F": "9"}, {"F": "am", "L": "a.m."}], - "10am": [{"F": "10"}, {"F": "am", "L": "a.m."}], - "11am": [{"F": "11"}, {"F": "am", "L": "a.m."}], - "12am": [{"F": "12"}, {"F": "am", "L": "a.m."}], - - - "p.m.": [{"F": "p.m."}], - "1p.m.": [{"F": "1"}, {"F": "p.m."}], - "2p.m.": [{"F": "2"}, {"F": "p.m."}], - "3p.m.": [{"F": "3"}, {"F": "p.m."}], - "4p.m.": [{"F": "4"}, {"F": "p.m."}], - "5p.m.": [{"F": "5"}, {"F": "p.m."}], - "6p.m.": [{"F": "6"}, {"F": "p.m."}], - "7p.m.": [{"F": "7"}, {"F": "p.m."}], - "8p.m.": [{"F": "8"}, {"F": "p.m."}], - "9p.m.": [{"F": "9"}, {"F": "p.m."}], - "10p.m.": [{"F": "10"}, {"F": "p.m."}], - "11p.m.": [{"F": "11"}, {"F": "p.m."}], - "12p.m.": [{"F": "12"}, {"F": "p.m."}], - "1pm": [{"F": "1"}, {"F": "pm", "L": "p.m."}], - "2pm": [{"F": "2"}, {"F": "pm", "L": "p.m."}], - "3pm": [{"F": "3"}, {"F": "pm", "L": "p.m."}], - "4pm": [{"F": "4"}, {"F": "pm", "L": "p.m."}], - "5pm": [{"F": "5"}, {"F": "pm", "L": "p.m."}], - "6pm": [{"F": "6"}, {"F": "pm", "L": "p.m."}], - "7pm": [{"F": "7"}, {"F": "pm", "L": "p.m."}], - "8pm": [{"F": "8"}, {"F": "pm", "L": "p.m."}], - "9pm": [{"F": "9"}, {"F": "pm", "L": "p.m."}], - "10pm": [{"F": "10"}, {"F": "pm", "L": "p.m."}], - "11pm": [{"F": "11"}, {"F": "pm", "L": "p.m."}], - "12pm": [{"F": "12"}, {"F": "pm", "L": "p.m."}], - - "Jan.": [{"F": "Jan."}], - "Feb.": [{"F": "Feb."}], - "Mar.": [{"F": "Mar."}], - "Apr.": [{"F": "Apr."}], - "May.": [{"F": "May."}], - "Jun.": [{"F": "Jun."}], - "Jul.": [{"F": "Jul."}], - "Aug.": [{"F": "Aug."}], - "Sep.": [{"F": "Sep."}], - "Sept.": [{"F": "Sept."}], - "Oct.": [{"F": "Oct."}], - "Nov.": [{"F": "Nov."}], - "Dec.": [{"F": "Dec."}], - - "Ala.": [{"F": "Ala."}], - "Ariz.": [{"F": "Ariz."}], - "Ark.": [{"F": "Ark."}], - "Calif.": [{"F": "Calif."}], - "Colo.": [{"F": "Colo."}], - "Conn.": [{"F": "Conn."}], - "Del.": [{"F": "Del."}], - "D.C.": [{"F": "D.C."}], - "Fla.": [{"F": "Fla."}], - "Ga.": [{"F": "Ga."}], - "Ill.": [{"F": "Ill."}], - "Ind.": [{"F": "Ind."}], - "Kans.": [{"F": "Kans."}], - "Kan.": [{"F": "Kan."}], - "Ky.": [{"F": "Ky."}], - "La.": [{"F": "La."}], - "Md.": [{"F": "Md."}], - "Mass.": [{"F": "Mass."}], - "Mich.": [{"F": "Mich."}], - "Minn.": [{"F": "Minn."}], - "Miss.": [{"F": "Miss."}], - "Mo.": [{"F": "Mo."}], - "Mont.": [{"F": "Mont."}], - "Nebr.": [{"F": "Nebr."}], - "Neb.": [{"F": "Neb."}], - "Nev.": [{"F": "Nev."}], - "N.H.": [{"F": "N.H."}], - "N.J.": [{"F": "N.J."}], - "N.M.": [{"F": "N.M."}], - "N.Y.": [{"F": "N.Y."}], - "N.C.": [{"F": "N.C."}], - "N.D.": [{"F": "N.D."}], - "Okla.": [{"F": "Okla."}], - "Ore.": [{"F": "Ore."}], - "Pa.": [{"F": "Pa."}], - "Tenn.": [{"F": "Tenn."}], - "Va.": [{"F": "Va."}], - "Wash.": [{"F": "Wash."}], - "Wis.": [{"F": "Wis."}], - - ":)": [{"F": ":)"}], - "<3": [{"F": "<3"}], - ";)": [{"F": ";)"}], - "(:": [{"F": "(:"}], - ":(": [{"F": ":("}], - "-_-": [{"F": "-_-"}], - "=)": [{"F": "=)"}], - ":/": [{"F": ":/"}], - ":>": [{"F": ":>"}], - ";-)": [{"F": ";-)"}], - ":Y": [{"F": ":Y"}], - ":P": [{"F": ":P"}], - ":-P": [{"F": ":-P"}], - ":3": [{"F": ":3"}], - "=3": [{"F": "=3"}], - "xD": [{"F": "xD"}], - "^_^": [{"F": "^_^"}], - "=]": [{"F": "=]"}], - "=D": [{"F": "=D"}], - "<333": [{"F": "<333"}], - ":))": [{"F": ":))"}], - ":0": [{"F": ":0"}], - "-__-": [{"F": "-__-"}], - "xDD": [{"F": "xDD"}], - "o_o": [{"F": "o_o"}], - "o_O": [{"F": "o_O"}], - "V_V": [{"F": "V_V"}], - "=[[": [{"F": "=[["}], - "<33": [{"F": "<33"}], - ";p": [{"F": ";p"}], - ";D": [{"F": ";D"}], - ";-p": [{"F": ";-p"}], - ";(": [{"F": ";("}], - ":p": [{"F": ":p"}], - ":]": [{"F": ":]"}], - ":O": [{"F": ":O"}], - ":-/": [{"F": ":-/"}], - ":-)": [{"F": ":-)"}], - ":(((": [{"F": ":((("}], - ":((": [{"F": ":(("}], - ":')": [{"F": ":')"}], - "(^_^)": [{"F": "(^_^)"}], - "(=": [{"F": "(="}], - "o.O": [{"F": "o.O"}], - "\")": [{"F": "\")"}], - "a.": [{"F": "a."}], - "b.": [{"F": "b."}], - "c.": [{"F": "c."}], - "d.": [{"F": "d."}], - "e.": [{"F": "e."}], - "f.": [{"F": "f."}], - "g.": [{"F": "g."}], - "h.": [{"F": "h."}], - "i.": [{"F": "i."}], - "j.": [{"F": "j."}], - "k.": [{"F": "k."}], - "l.": [{"F": "l."}], - "m.": [{"F": "m."}], - "n.": [{"F": "n."}], - "o.": [{"F": "o."}], - "p.": [{"F": "p."}], - "q.": [{"F": "q."}], - "r.": [{"F": "r."}], - "s.": [{"F": "s."}], - "t.": [{"F": "t."}], - "u.": [{"F": "u."}], - "v.": [{"F": "v."}], - "w.": [{"F": "w."}], - "x.": [{"F": "x."}], - "y.": [{"F": "y."}], - "z.": [{"F": "z."}], - - "i.e.": [{"F": "i.e."}], - "I.e.": [{"F": "I.e."}], - "I.E.": [{"F": "I.E."}], - "e.g.": [{"F": "e.g."}], - "E.g.": [{"F": "E.g."}], - "E.G.": [{"F": "E.G."}], - "\n": [{"F": "\n", "pos": "SP"}], - "\t": [{"F": "\t", "pos": "SP"}], - " ": [{"F": " ", "pos": "SP"}], - u"\u00a0": [{"F": u"\u00a0", "pos": "SP", "L": " "}] - -} - -def get_double_contractions(ending): - endings = [] - - ends_with_contraction = any([ending.endswith(contraction) for contraction in contractions]) - - while ends_with_contraction: - for contraction in contractions: - if ending.endswith(contraction): - endings.append(contraction) - ending = ending.rstrip(contraction) - ends_with_contraction = any([ending.endswith(contraction) for contraction in contractions]) - - endings.reverse() # reverse because the last ending is put in the list first - return endings - -def get_token_properties(token, capitalize=False, remove_contractions=False): - props = dict(token_properties.get(token)) # ensure we copy the dict so we can add the "F" prop - if capitalize: - token = token.capitalize() - if remove_contractions: - token = token.replace("'", "") - - props["F"] = token - return props - -def create_entry(token, endings, capitalize=False, remove_contractions=False): - - properties = [] - properties.append(get_token_properties(token, capitalize=capitalize, remove_contractions=remove_contractions)) - for e in endings: - properties.append(get_token_properties(e, remove_contractions=remove_contractions)) - return properties - -def generate_specials(): - - specials = {} - - for token in starting_tokens: - possible_endings = starting_tokens[token] - for ending in possible_endings: - - endings = [] - if ending.count("'") > 1: - endings.extend(get_double_contractions(ending)) - else: - endings.append(ending) - - exceptions = possible_endings[ending] - - if "lower" not in exceptions: - special = token + ending - specials[special] = create_entry(token, endings) - - if "upper" not in exceptions: - special = token.capitalize() + ending - specials[special] = create_entry(token, endings, capitalize=True) - - if "contrLower" not in exceptions: - special = token + ending.replace("'", "") - specials[special] = create_entry(token, endings, remove_contractions=True) - - if "contrUpper" not in exceptions: - special = token.capitalize() + ending.replace("'", "") - specials[special] = create_entry(token, endings, capitalize=True, remove_contractions=True) - - # add in hardcoded specials - specials = dict(specials, **hardcoded_specials) - - return specials - -if __name__ == "__main__": - specials = generate_specials() - with open("specials.json", "w") as file_: - file_.write(json.dumps(specials, indent=2)) - diff --git a/lang_data/en/infix.txt b/lang_data/en/infix.txt deleted file mode 100644 index b9b0230a7..000000000 --- a/lang_data/en/infix.txt +++ /dev/null @@ -1,6 +0,0 @@ -\.\.\.+ -(?<=[a-z])\.(?=[A-Z]) -(?<=[a-zA-Z])-(?=[a-zA-z]) -(?<=[a-zA-Z])--(?=[a-zA-z]) -(?<=[0-9])-(?=[0-9]) -(?<=[A-Za-z]),(?=[A-Za-z]) diff --git a/lang_data/en/lemma_rules.json b/lang_data/en/lemma_rules.json deleted file mode 100644 index 1e76436cd..000000000 --- a/lang_data/en/lemma_rules.json +++ /dev/null @@ -1,38 +0,0 @@ -{ - "noun": [ - ["s", ""], - ["ses", "s"], - ["ves", "f"], - ["xes", "x"], - ["zes", "z"], - ["ches", "ch"], - ["shes", "sh"], - ["men", "man"], - ["ies", "y"] - ], - - "verb": [ - ["s", ""], - ["ies", "y"], - ["es", "e"], - ["es", ""], - ["ed", "e"], - ["ed", ""], - ["ing", "e"], - ["ing", ""] - ], - - "adj": [ - ["er", ""], - ["est", ""], - ["er", "e"], - ["est", "e"] - ], - - "punct": [ - ["“", "\""], - ["”", "\""], - ["\u2018", "'"], - ["\u2019", "'"] - ] -} diff --git a/lang_data/en/morphs.json b/lang_data/en/morphs.json deleted file mode 100644 index 059381b27..000000000 --- a/lang_data/en/morphs.json +++ /dev/null @@ -1,59 +0,0 @@ -{ - "PRP": { - "I": {"L": "-PRON-", "PronType": "Prs", "Person": "One", "Number": "Sing", "Case": "Nom"}, - "me": {"L": "-PRON-", "PronType": "Prs", "Person": "One", "Number": "Sing", "Case": "Acc"}, - "you": {"L": "-PRON-", "PronType": "Prs", "Person": "Two"}, - "he": {"L": "-PRON-", "PronType": "Prs", "Person": "Three", "Number": "Sing", "Gender": "Masc", "Case": "Nom"}, - "him": {"L": "-PRON-", "PronType": "Prs", "Person": "Three", "Number": "Sing", "Gender": "Masc", "Case": "Acc"}, - "she": {"L": "-PRON-", "PronType": "Prs", "Person": "Three", "Number": "Sing", "Gender": "Fem", "Case": "Nom"}, - "her": {"L": "-PRON-", "PronType": "Prs", "Person": "Three", "Number": "Sing", "Gender": "Fem", "Case": "Acc"}, - "it": {"L": "-PRON-", "PronType": "Prs", "Person": "Three", "Number": "Sing", "Gender": "Neut"}, - "we": {"L": "-PRON-", "PronType": "Prs", "Person": "One", "Number": "Plur", "Case": "Nom"}, - "us": {"L": "-PRON-", "PronType": "Prs", "Person": "One", "Number": "Plur", "Case": "Acc"}, - "they": {"L": "-PRON-", "PronType": "Prs", "Person": "Three", "Number": "Plur", "Case": "Nom"}, - "them": {"L": "-PRON-", "PronType": "Prs", "Person": "Three", "Number": "Plur", "Case": "Acc"}, - - "mine": {"L": "-PRON-", "PronType": "Prs", "Person": "One", "Number": "Sing", "Poss": "Yes", "Reflex": "Yes"}, - "yours": {"L": "-PRON-", "PronType": "Prs", "Person": "Two", "Poss": "Yes", "Reflex": "Yes"}, - "his": {"L": "-PRON-", "PronType": "Prs", "Person": "Three", "Number": "Sing", "Gender": "Masc", "Poss": "Yes", "Reflex": "Yes"}, - "hers": {"L": "-PRON-", "PronType": "Prs", "Person": "Three", "Number": "Sing", "Gender": "Fem", "Poss": "Yes", "Reflex": "Yes"}, - "its": {"L": "-PRON-", "PronType": "Prs", "Person": "Three", "Number": "Sing", "Gender": "Neut", "Poss": "Yes", "Reflex": "Yes"}, - "ours": {"L": "-PRON-", "PronType": "Prs", "Person": "One", "Number": "Plur", "Poss": "Yes", "Reflex": "Yes"}, - "yours": {"L": "-PRON-", "PronType": "Prs", "Person": "Two", "Number": "Plur", "Poss": "Yes", "Reflex": "Yes"}, - "theirs": {"L": "-PRON-", "PronType": "Prs", "Person": "Three", "Number": "Plur", "Poss": "Yes", "Reflex": "Yes"}, - - "myself": {"L": "-PRON-", "PronType": "Prs", "Person": "One", "Number": "Sing", "Case": "Acc", "Reflex": "Yes"}, - "yourself": {"L": "-PRON-", "PronType": "Prs", "Person": "Two", "Case": "Acc", "Reflex": "Yes"}, - "himself": {"L": "-PRON-", "PronType": "Prs", "Person": "Three", "Number": "Sing", "Case": "Acc", "Gender": "Masc", "Reflex": "Yes"}, - "herself": {"L": "-PRON-", "PronType": "Prs", "Person": "Three", "Number": "Sing", "Case": "Acc", "Gender": "Fem", "Reflex": "Yes"}, - "itself": {"L": "-PRON-", "PronType": "Prs", "Person": "Three", "Number": "Sing", "Case": "Acc", "Gender": "Neut", "Reflex": "Yes"}, - "themself": {"L": "-PRON-", "PronType": "Prs", "Person": "Three", "Number": "Sing", "Case": "Acc", "Reflex": "Yes"}, - "ourselves": {"L": "-PRON-", "PronType": "Prs", "Person": "One", "Number": "Plur", "Case": "Acc", "Reflex": "Yes"}, - "yourselves": {"L": "-PRON-", "PronType": "Prs", "Person": "Two", "Case": "Acc", "Reflex": "Yes"}, - "themselves": {"L": "-PRON-", "PronType": "Prs", "Person": "Three", "Number": "Plur", "Case": "Acc", "Reflex": "Yes"} - - }, - - "PRP$": { - "my": {"L": "-PRON-", "Person": "One", "Number": "Sing", "PronType": "Prs", "Poss": "Yes"}, - "your": {"L": "-PRON-", "Person": "Two", "PronType": "Prs", "Poss": "Yes"}, - "his": {"L": "-PRON-", "Person": "Three", "Number": "Sing", "Gender": "Masc", "PronType": "Prs", "Poss": "Yes"}, - "her": {"L": "-PRON-", "Person": "Three", "Number": "Sing", "Gender": "Fem", "PronType": "Prs", "Poss": "Yes"}, - "its": {"L": "-PRON-", "Person": "Three", "Number": "Sing", "Gender": "Neut", "PronType": "Prs", "Poss": "Yes"}, - "our": {"L": "-PRON-", "Person": "One", "Number": "Plur", "PronType": "Prs", "Poss": "Yes"}, - "their": {"L": "-PRON-", "Person": "Three", "Number": "Plur", "PronType": "Prs", "Poss": "Yes"} - }, - - "VBZ": { - "am": {"L": "be", "VerbForm": "Fin", "Person": "One", "Tense": "Pres", "Mood": "Ind"}, - "are": {"L": "be", "VerbForm": "Fin", "Person": "Two", "Tense": "Pres", "Mood": "Ind"}, - "is": {"L": "be", "VerbForm": "Fin", "Person": "Three", "Tense": "Pres", "Mood": "Ind"}, - }, - "VBP": { - "are": {"L": "be", "VerbForm": "Fin", "Tense": "Pres", "Mood": "Ind"} - }, - "VBD": { - "was": {"L": "be", "VerbForm": "Fin", "Tense": "Past", "Number": "Sing"}, - "were": {"L": "be", "VerbForm": "Fin", "Tense": "Past", "Number": "Plur"} - } -} diff --git a/lang_data/en/prefix.txt b/lang_data/en/prefix.txt deleted file mode 100644 index 48c4fc549..000000000 --- a/lang_data/en/prefix.txt +++ /dev/null @@ -1,21 +0,0 @@ -, -" -( -[ -{ -* -< -$ -£ -“ -' -`` -` -# -US$ -C$ -A$ -a- -‘ -.... -... diff --git a/lang_data/en/specials.json b/lang_data/en/specials.json deleted file mode 100644 index 3600717ad..000000000 --- a/lang_data/en/specials.json +++ /dev/null @@ -1,4924 +0,0 @@ -{ - "d.": [ - { - "F": "d." - } - ], - "Theydve": [ - { - "L": "-PRON-", - "F": "They" - }, - { - "F": "d", - "L": "would", - "pos": "MD" - }, - { - "F": "ve", - "L": "have", - "pos": "VB" - } - ], - ":/": [ - { - "F": ":/" - } - ], - "shouldn't've": [ - { - "F": "should" - }, - { - "F": "n't", - "L": "not", - "pos": "RB" - }, - { - "F": "'ve", - "L": "have", - "pos": "VB" - } - ], - "There'll": [ - { - "F": "There" - }, - { - "F": "'ll", - "L": "will", - "pos": "MD" - } - ], - "E.G.": [ - { - "F": "E.G." - } - ], - "howll": [ - { - "F": "how" - }, - { - "F": "ll", - "L": "will", - "pos": "MD" - } - ], - "6a.m.": [ - { - "F": "6" - }, - { - "F": "a.m." - } - ], - "Ore.": [ - { - "F": "Ore." - } - ], - "Hadn't've": [ - { - "F": "Had", - "L": "have", - "pos": "VBD" - }, - { - "F": "n't", - "L": "not", - "pos": "RB" - }, - { - "F": "'ve", - "L": "have", - "pos": "VB" - } - ], - ":>": [ - { - "F": ":>" - } - ], - "3p.m.": [ - { - "F": "3" - }, - { - "F": "p.m." - } - ], - "who'll": [ - { - "F": "who" - }, - { - "F": "'ll", - "L": "will", - "pos": "MD" - } - ], - "5a.m.": [ - { - "F": "5" - }, - { - "F": "a.m." - } - ], - ":(": [ - { - "F": ":(" - } - ], - ":0": [ - { - "F": ":0" - } - ], - "10a.m.": [ - { - "F": "10" - }, - { - "F": "a.m." - } - ], - "aint": [ - { - "F": "ai", - "pos": "VBP", - "number": 2, - "L": "be" - }, - { - "F": "nt", - "L": "not", - "pos": "RB" - } - ], - " ": [ - { - "pos": "SP", - "F": " " - } - ], - "Dec.": [ - { - "F": "Dec." - } - ], - "Shouldnt": [ - { - "F": "Should" - }, - { - "F": "nt", - "L": "not", - "pos": "RB" - } - ], - "Ky.": [ - { - "F": "Ky." - } - ], - "when's": [ - { - "F": "when" - }, - { - "F": "'s" - } - ], - "Didnt": [ - { - "F": "Did", - "L": "do", - "pos": "VBD" - }, - { - "F": "nt", - "L": "not", - "pos": "RB" - } - ], - "itll": [ - { - "L": "-PRON-", - "F": "it" - }, - { - "F": "ll", - "L": "will", - "pos": "MD" - } - ], - "Who're": [ - { - "F": "Who" - }, - { - "F": "'re" - } - ], - "=D": [ - { - "F": "=D" - } - ], - "Ain't": [ - { - "F": "Ai", - "pos": "VBP", - "number": 2, - "L": "be" - }, - { - "F": "n't", - "L": "not", - "pos": "RB" - } - ], - "Can't": [ - { - "F": "Ca", - "L": "can", - "pos": "MD" - }, - { - "F": "n't", - "L": "not", - "pos": "RB" - } - ], - "Whyre": [ - { - "F": "Why" - }, - { - "F": "re" - } - ], - "Aren't": [ - { - "F": "Are", - "pos": "VBP", - "number": 2, - "L": "be" - }, - { - "F": "n't", - "L": "not", - "pos": "RB" - } - ], - "Neednt": [ - { - "F": "Need" - }, - { - "F": "nt", - "L": "not", - "pos": "RB" - } - ], - "should've": [ - { - "F": "should" - }, - { - "F": "'ve", - "L": "have", - "pos": "VB" - } - ], - "shouldn't": [ - { - "F": "should" - }, - { - "F": "n't", - "L": "not", - "pos": "RB" - } - ], - "Idve": [ - { - "L": "-PRON-", - "F": "I" - }, - { - "F": "d", - "L": "would", - "pos": "MD" - }, - { - "F": "ve", - "L": "have", - "pos": "VB" - } - ], - "weve": [ - { - "F": "we" - }, - { - "F": "ve", - "L": "have", - "pos": "VB" - } - ], - "Va.": [ - { - "F": "Va." - } - ], - "D.C.": [ - { - "F": "D.C." - } - ], - "3am": [ - { - "F": "3" - }, - { - "L": "a.m.", - "F": "am" - } - ], - "Ive": [ - { - "L": "-PRON-", - "F": "I" - }, - { - "F": "ve", - "L": "have", - "pos": "VB" - } - ], - "Md.": [ - { - "F": "Md." - } - ], - ";D": [ - { - "F": ";D" - } - ], - "Mrs.": [ - { - "F": "Mrs." - } - ], - "Minn.": [ - { - "F": "Minn." - } - ], - "they'd": [ - { - "L": "-PRON-", - "F": "they" - }, - { - "F": "'d", - "L": "would", - "pos": "MD" - } - ], - "Youdve": [ - { - "L": "-PRON-", - "F": "You" - }, - { - "F": "d", - "L": "would", - "pos": "MD" - }, - { - "F": "ve", - "L": "have", - "pos": "VB" - } - ], - "theyve": [ - { - "L": "-PRON-", - "F": "they" - }, - { - "F": "ve", - "L": "have", - "pos": "VB" - } - ], - "Weren't": [ - { - "F": "Were" - }, - { - "F": "n't", - "L": "not", - "pos": "RB" - } - ], - "werent": [ - { - "F": "were" - }, - { - "F": "nt", - "L": "not", - "pos": "RB" - } - ], - "whyre": [ - { - "F": "why" - }, - { - "F": "re" - } - ], - "g.": [ - { - "F": "g." - } - ], - "I'm": [ - { - "L": "-PRON-", - "F": "I" - }, - { - "pos": "VBP", - "F": "'m", - "tenspect": 1, - "number": 1, - "L": "be" - } - ], - ":p": [ - { - "F": ":p" - } - ], - "She'd've": [ - { - "L": "-PRON-", - "F": "She" - }, - { - "F": "'d", - "L": "would", - "pos": "MD" - }, - { - "F": "'ve", - "L": "have", - "pos": "VB" - } - ], - "not've": [ - { - "F": "not", - "L": "not", - "pos": "RB" - }, - { - "F": "'ve", - "L": "have", - "pos": "VB" - } - ], - "we'll": [ - { - "F": "we" - }, - { - "F": "'ll", - "L": "will", - "pos": "MD" - } - ], - ":O": [ - { - "F": ":O" - } - ], - "<33": [ - { - "F": "<33" - } - ], - "Don't": [ - { - "L": "do", - "F": "Do" - }, - { - "F": "n't", - "L": "not", - "pos": "RB" - } - ], - "Whyll": [ - { - "F": "Why" - }, - { - "F": "ll", - "L": "will", - "pos": "MD" - } - ], - "''": [ - { - "F": "''" - } - ], - "they've": [ - { - "L": "-PRON-", - "F": "they" - }, - { - "F": "'ve", - "L": "have", - "pos": "VB" - } - ], - "t.": [ - { - "F": "t." - } - ], - "wasn't": [ - { - "F": "was" - }, - { - "F": "n't", - "L": "not", - "pos": "RB" - } - ], - "could've": [ - { - "pos": "MD", - "F": "could" - }, - { - "F": "'ve", - "L": "have", - "pos": "VB" - } - ], - "what've": [ - { - "F": "what" - }, - { - "F": "'ve", - "L": "have", - "pos": "VB" - } - ], - "havent": [ - { - "pos": "VB", - "F": "have" - }, - { - "F": "nt", - "L": "not", - "pos": "RB" - } - ], - "Who've": [ - { - "F": "Who" - }, - { - "F": "'ve", - "L": "have", - "pos": "VB" - } - ], - "11am": [ - { - "F": "11" - }, - { - "L": "a.m.", - "F": "am" - } - ], - "Shan't": [ - { - "F": "Sha" - }, - { - "F": "n't", - "L": "not", - "pos": "RB" - } - ], - "i'll": [ - { - "L": "-PRON-", - "F": "i" - }, - { - "F": "'ll", - "L": "will", - "pos": "MD" - } - ], - "i.e.": [ - { - "F": "i.e." - } - ], - "you'd": [ - { - "L": "-PRON-", - "F": "you" - }, - { - "F": "'d", - "L": "would", - "pos": "MD" - } - ], - "w.": [ - { - "F": "w." - } - ], - "whens": [ - { - "F": "when" - }, - { - "F": "s" - } - ], - "whys": [ - { - "F": "why" - }, - { - "F": "s" - } - ], - "6pm": [ - { - "F": "6" - }, - { - "L": "p.m.", - "F": "pm" - } - ], - "4p.m.": [ - { - "F": "4" - }, - { - "F": "p.m." - } - ], - "Whereve": [ - { - "F": "Where" - }, - { - "F": "ve", - "L": "have", - "pos": "VB" - } - ], - "o_o": [ - { - "F": "o_o" - } - ], - "Mo.": [ - { - "F": "Mo." - } - ], - "Kan.": [ - { - "F": "Kan." - } - ], - "\u00a0": [ - { - "pos": "SP", - "L": " ", - "F": "\u00a0" - } - ], - "there'd": [ - { - "F": "there" - }, - { - "F": "'d", - "L": "would", - "pos": "MD" - } - ], - "N.H.": [ - { - "F": "N.H." - } - ], - "(^_^)": [ - { - "F": "(^_^)" - } - ], - "Mont.": [ - { - "F": "Mont." - } - ], - "hadn't've": [ - { - "F": "had", - "L": "have", - "pos": "VBD" - }, - { - "F": "n't", - "L": "not", - "pos": "RB" - }, - { - "F": "'ve", - "L": "have", - "pos": "VB" - } - ], - "whatll": [ - { - "F": "what" - }, - { - "F": "ll", - "L": "will", - "pos": "MD" - } - ], - "wouldn't've": [ - { - "F": "would" - }, - { - "F": "n't", - "L": "not", - "pos": "RB" - }, - { - "F": "'ve", - "L": "have", - "pos": "VB" - } - ], - "there's": [ - { - "F": "there" - }, - { - "F": "'s" - } - ], - "2pm": [ - { - "F": "2" - }, - { - "L": "p.m.", - "F": "pm" - } - ], - "Who'll": [ - { - "F": "Who" - }, - { - "F": "'ll", - "L": "will", - "pos": "MD" - } - ], - "o_O": [ - { - "F": "o_O" - } - ], - "Nev.": [ - { - "F": "Nev." - } - ], - "youll": [ - { - "L": "-PRON-", - "F": "you" - }, - { - "F": "ll", - "L": "will", - "pos": "MD" - } - ], - "wouldve": [ - { - "F": "would" - }, - { - "F": "ve", - "L": "have", - "pos": "VB" - } - ], - "Nov.": [ - { - "F": "Nov." - } - ], - "z.": [ - { - "F": "z." - } - ], - "xDD": [ - { - "F": "xDD" - } - ], - "Sen.": [ - { - "F": "Sen." - } - ], - "Wouldnt": [ - { - "F": "Would" - }, - { - "F": "nt", - "L": "not", - "pos": "RB" - } - ], - "Thered": [ - { - "F": "There" - }, - { - "F": "d", - "L": "would", - "pos": "MD" - } - ], - "Youre": [ - { - "L": "-PRON-", - "F": "You" - }, - { - "F": "re" - } - ], - "Couldn't've": [ - { - "pos": "MD", - "F": "Could" - }, - { - "F": "n't", - "L": "not", - "pos": "RB" - }, - { - "F": "'ve", - "L": "have", - "pos": "VB" - } - ], - "who're": [ - { - "F": "who" - }, - { - "F": "'re" - } - ], - "Whys": [ - { - "F": "Why" - }, - { - "F": "s" - } - ], - "mightn't've": [ - { - "F": "might" - }, - { - "F": "n't", - "L": "not", - "pos": "RB" - }, - { - "F": "'ve", - "L": "have", - "pos": "VB" - } - ], - "Wholl": [ - { - "F": "Who" - }, - { - "F": "ll", - "L": "will", - "pos": "MD" - } - ], - "hadn't": [ - { - "F": "had", - "L": "have", - "pos": "VBD" - }, - { - "F": "n't", - "L": "not", - "pos": "RB" - } - ], - "Havent": [ - { - "pos": "VB", - "F": "Have" - }, - { - "F": "nt", - "L": "not", - "pos": "RB" - } - ], - "Whatve": [ - { - "F": "What" - }, - { - "F": "ve", - "L": "have", - "pos": "VB" - } - ], - ":)": [ - { - "F": ":)" - } - ], - "o.O": [ - { - "F": "o.O" - } - ], - "Thats": [ - { - "F": "That" - }, - { - "F": "s" - } - ], - ":((": [ - { - "F": ":((" - } - ], - "Gov.": [ - { - "F": "Gov." - } - ], - "Howll": [ - { - "F": "How" - }, - { - "F": "ll", - "L": "will", - "pos": "MD" - } - ], - "p.": [ - { - "F": "p." - } - ], - "wouldn't": [ - { - "F": "would" - }, - { - "F": "n't", - "L": "not", - "pos": "RB" - } - ], - "9pm": [ - { - "F": "9" - }, - { - "L": "p.m.", - "F": "pm" - } - ], - "You'll": [ - { - "L": "-PRON-", - "F": "You" - }, - { - "F": "'ll", - "L": "will", - "pos": "MD" - } - ], - "Ala.": [ - { - "F": "Ala." - } - ], - "12am": [ - { - "F": "12" - }, - { - "L": "a.m.", - "F": "am" - } - ], - "=]": [ - { - "F": "=]" - } - ], - "Cant": [ - { - "F": "Ca", - "L": "can", - "pos": "MD" - }, - { - "F": "nt", - "L": "not", - "pos": "RB" - } - ], - "i'd": [ - { - "L": "-PRON-", - "F": "i" - }, - { - "F": "'d", - "L": "would", - "pos": "MD" - } - ], - "a.m.": [ - { - "F": "a.m." - } - ], - "weren't": [ - { - "F": "were" - }, - { - "F": "n't", - "L": "not", - "pos": "RB" - } - ], - "would've": [ - { - "F": "would" - }, - { - "F": "'ve", - "L": "have", - "pos": "VB" - } - ], - "i'm": [ - { - "L": "-PRON-", - "F": "i" - }, - { - "pos": "VBP", - "F": "'m", - "tenspect": 1, - "number": 1, - "L": "be" - } - ], - "why'll": [ - { - "F": "why" - }, - { - "F": "'ll", - "L": "will", - "pos": "MD" - } - ], - "we'd've": [ - { - "F": "we" - }, - { - "F": "'d", - "L": "would", - "pos": "MD" - }, - { - "F": "'ve", - "L": "have", - "pos": "VB" - } - ], - "Shouldve": [ - { - "F": "Should" - }, - { - "F": "ve", - "L": "have", - "pos": "VB" - } - ], - "can't": [ - { - "F": "ca", - "L": "can", - "pos": "MD" - }, - { - "F": "n't", - "L": "not", - "pos": "RB" - } - ], - "thats": [ - { - "F": "that" - }, - { - "F": "s" - } - ], - "1p.m.": [ - { - "F": "1" - }, - { - "F": "p.m." - } - ], - "12a.m.": [ - { - "F": "12" - }, - { - "F": "a.m." - } - ], - "Hes": [ - { - "L": "-PRON-", - "F": "He" - }, - { - "F": "s" - } - ], - "Needn't": [ - { - "F": "Need" - }, - { - "F": "n't", - "L": "not", - "pos": "RB" - } - ], - "It's": [ - { - "L": "-PRON-", - "F": "It" - }, - { - "F": "'s" - } - ], - "St.": [ - { - "F": "St." - } - ], - "Why're": [ - { - "F": "Why" - }, - { - "F": "'re" - } - ], - ":(((": [ - { - "F": ":(((" - } - ], - "Hed": [ - { - "L": "-PRON-", - "F": "He" - }, - { - "F": "d", - "L": "would", - "pos": "MD" - } - ], - "Mt.": [ - { - "L": "Mount", - "F": "Mt." - } - ], - "couldn't": [ - { - "pos": "MD", - "F": "could" - }, - { - "F": "n't", - "L": "not", - "pos": "RB" - } - ], - "What've": [ - { - "F": "What" - }, - { - "F": "'ve", - "L": "have", - "pos": "VB" - } - ], - "4a.m.": [ - { - "F": "4" - }, - { - "F": "a.m." - } - ], - "Ind.": [ - { - "F": "Ind." - } - ], - "It'd": [ - { - "L": "-PRON-", - "F": "It" - }, - { - "F": "'d", - "L": "would", - "pos": "MD" - } - ], - "<3": [ - { - "F": "<3" - } - ], - "theydve": [ - { - "L": "-PRON-", - "F": "they" - }, - { - "F": "d", - "L": "would", - "pos": "MD" - }, - { - "F": "ve", - "L": "have", - "pos": "VB" - } - ], - "aren't": [ - { - "F": "are", - "pos": "VBP", - "number": 2, - "L": "be" - }, - { - "F": "n't", - "L": "not", - "pos": "RB" - } - ], - "Mightn't": [ - { - "F": "Might" - }, - { - "F": "n't", - "L": "not", - "pos": "RB" - } - ], - "'S": [ - { - "L": "'s", - "F": "'S" - } - ], - "I've": [ - { - "L": "-PRON-", - "F": "I" - }, - { - "F": "'ve", - "L": "have", - "pos": "VB" - } - ], - "Whered": [ - { - "F": "Where" - }, - { - "F": "d", - "L": "would", - "pos": "MD" - } - ], - "Itdve": [ - { - "L": "-PRON-", - "F": "It" - }, - { - "F": "d", - "L": "would", - "pos": "MD" - }, - { - "F": "ve", - "L": "have", - "pos": "VB" - } - ], - "I'ma": [ - { - "L": "-PRON-", - "F": "I" - }, - { - "F": "'ma" - } - ], - "whos": [ - { - "F": "who" - }, - { - "F": "s" - } - ], - "They'd": [ - { - "L": "-PRON-", - "F": "They" - }, - { - "F": "'d", - "L": "would", - "pos": "MD" - } - ], - "What'll": [ - { - "F": "What" - }, - { - "F": "'ll", - "L": "will", - "pos": "MD" - } - ], - ":Y": [ - { - "F": ":Y" - } - ], - "You've": [ - { - "L": "-PRON-", - "F": "You" - }, - { - "F": "'ve", - "L": "have", - "pos": "VB" - } - ], - "Mustve": [ - { - "F": "Must" - }, - { - "F": "ve", - "L": "have", - "pos": "VB" - } - ], - "whod": [ - { - "F": "who" - }, - { - "F": "d", - "L": "would", - "pos": "MD" - } - ], - "mightntve": [ - { - "F": "might" - }, - { - "F": "nt", - "L": "not", - "pos": "RB" - }, - { - "F": "ve", - "L": "have", - "pos": "VB" - } - ], - "I'd've": [ - { - "L": "-PRON-", - "F": "I" - }, - { - "F": "'d", - "L": "would", - "pos": "MD" - }, - { - "F": "'ve", - "L": "have", - "pos": "VB" - } - ], - "Must've": [ - { - "F": "Must" - }, - { - "F": "'ve", - "L": "have", - "pos": "VB" - } - ], - "it'd": [ - { - "L": "-PRON-", - "F": "it" - }, - { - "F": "'d", - "L": "would", - "pos": "MD" - } - ], - "Ark.": [ - { - "F": "Ark." - } - ], - "Wis.": [ - { - "F": "Wis." - } - ], - "6p.m.": [ - { - "F": "6" - }, - { - "F": "p.m." - } - ], - "what're": [ - { - "F": "what" - }, - { - "F": "'re" - } - ], - "N.C.": [ - { - "F": "N.C." - } - ], - "Wasn't": [ - { - "F": "Was" - }, - { - "F": "n't", - "L": "not", - "pos": "RB" - } - ], - "what's": [ - { - "F": "what" - }, - { - "F": "'s" - } - ], - "he'd've": [ - { - "L": "-PRON-", - "F": "he" - }, - { - "F": "'d", - "L": "would", - "pos": "MD" - }, - { - "F": "'ve", - "L": "have", - "pos": "VB" - } - ], - "Jan.": [ - { - "F": "Jan." - } - ], - "She'd": [ - { - "L": "-PRON-", - "F": "She" - }, - { - "F": "'d", - "L": "would", - "pos": "MD" - } - ], - "shedve": [ - { - "L": "-PRON-", - "F": "she" - }, - { - "F": "d", - "L": "would", - "pos": "MD" - }, - { - "F": "ve", - "L": "have", - "pos": "VB" - } - ], - "Tenn.": [ - { - "F": "Tenn." - } - ], - "ain't": [ - { - "F": "ai", - "pos": "VBP", - "number": 2, - "L": "be" - }, - { - "F": "n't", - "L": "not", - "pos": "RB" - } - ], - "Wash.": [ - { - "F": "Wash." - } - ], - "She's": [ - { - "L": "-PRON-", - "F": "She" - }, - { - "F": "'s" - } - ], - "i'd've": [ - { - "L": "-PRON-", - "F": "i" - }, - { - "F": "'d", - "L": "would", - "pos": "MD" - }, - { - "F": "'ve", - "L": "have", - "pos": "VB" - } - ], - "2a.m.": [ - { - "F": "2" - }, - { - "F": "a.m." - } - ], - "We'd've": [ - { - "F": "We" - }, - { - "F": "'d", - "L": "would", - "pos": "MD" - }, - { - "F": "'ve", - "L": "have", - "pos": "VB" - } - ], - "must've": [ - { - "F": "must" - }, - { - "F": "'ve", - "L": "have", - "pos": "VB" - } - ], - "That's": [ - { - "F": "That" - }, - { - "F": "'s" - } - ], - "Sept.": [ - { - "F": "Sept." - } - ], - "whatre": [ - { - "F": "what" - }, - { - "F": "re" - } - ], - "you'd've": [ - { - "L": "-PRON-", - "F": "you" - }, - { - "F": "'d", - "L": "would", - "pos": "MD" - }, - { - "F": "'ve", - "L": "have", - "pos": "VB" - } - ], - "Dont": [ - { - "L": "do", - "F": "Do" - }, - { - "F": "nt", - "L": "not", - "pos": "RB" - } - ], - "i.": [ - { - "F": "i." - } - ], - "Jun.": [ - { - "F": "Jun." - } - ], - "thered": [ - { - "F": "there" - }, - { - "F": "d", - "L": "would", - "pos": "MD" - } - ], - "Youd": [ - { - "L": "-PRON-", - "F": "You" - }, - { - "F": "d", - "L": "would", - "pos": "MD" - } - ], - "couldn't've": [ - { - "pos": "MD", - "F": "could" - }, - { - "F": "n't", - "L": "not", - "pos": "RB" - }, - { - "F": "'ve", - "L": "have", - "pos": "VB" - } - ], - "Whens": [ - { - "F": "When" - }, - { - "F": "s" - } - ], - "8a.m.": [ - { - "F": "8" - }, - { - "F": "a.m." - } - ], - "Isnt": [ - { - "F": "Is", - "L": "be", - "pos": "VBZ" - }, - { - "F": "nt", - "L": "not", - "pos": "RB" - } - ], - "mightve": [ - { - "F": "might" - }, - { - "F": "ve", - "L": "have", - "pos": "VB" - } - ], - "'ol": [ - { - "F": "'ol" - } - ], - "2p.m.": [ - { - "F": "2" - }, - { - "F": "p.m." - } - ], - "9a.m.": [ - { - "F": "9" - }, - { - "F": "a.m." - } - ], - "q.": [ - { - "F": "q." - } - ], - "didnt": [ - { - "F": "did", - "L": "do", - "pos": "VBD" - }, - { - "F": "nt", - "L": "not", - "pos": "RB" - } - ], - "ive": [ - { - "L": "-PRON-", - "F": "i" - }, - { - "F": "ve", - "L": "have", - "pos": "VB" - } - ], - "It'd've": [ - { - "L": "-PRON-", - "F": "It" - }, - { - "F": "'d", - "L": "would", - "pos": "MD" - }, - { - "F": "'ve", - "L": "have", - "pos": "VB" - } - ], - "e.g.": [ - { - "F": "e.g." - } - ], - "\t": [ - { - "pos": "SP", - "F": "\t" - } - ], - "Mich.": [ - { - "F": "Mich." - } - ], - "Itll": [ - { - "L": "-PRON-", - "F": "It" - }, - { - "F": "ll", - "L": "will", - "pos": "MD" - } - ], - "didn't": [ - { - "F": "did", - "L": "do", - "pos": "VBD" - }, - { - "F": "n't", - "L": "not", - "pos": "RB" - } - ], - "3pm": [ - { - "F": "3" - }, - { - "L": "p.m.", - "F": "pm" - } - ], - "Jul.": [ - { - "F": "Jul." - } - ], - "7pm": [ - { - "F": "7" - }, - { - "L": "p.m.", - "F": "pm" - } - ], - "cant": [ - { - "F": "ca", - "L": "can", - "pos": "MD" - }, - { - "F": "nt", - "L": "not", - "pos": "RB" - } - ], - "Miss.": [ - { - "F": "Miss." - } - ], - "im": [ - { - "L": "-PRON-", - "F": "i" - }, - { - "pos": "VBP", - "F": "m", - "tenspect": 1, - "number": 1, - "L": "be" - } - ], - "Ariz.": [ - { - "F": "Ariz." - } - ], - "they'd've": [ - { - "L": "-PRON-", - "F": "they" - }, - { - "F": "'d", - "L": "would", - "pos": "MD" - }, - { - "F": "'ve", - "L": "have", - "pos": "VB" - } - ], - "f.": [ - { - "F": "f." - } - ], - "Co.": [ - { - "F": "Co." - } - ], - "Hadntve": [ - { - "F": "Had", - "L": "have", - "pos": "VBD" - }, - { - "F": "nt", - "L": "not", - "pos": "RB" - }, - { - "F": "ve", - "L": "have", - "pos": "VB" - } - ], - "Weve": [ - { - "F": "We" - }, - { - "F": "ve", - "L": "have", - "pos": "VB" - } - ], - "1a.m.": [ - { - "F": "1" - }, - { - "F": "a.m." - } - ], - "=3": [ - { - "F": "=3" - } - ], - "Mightnt": [ - { - "F": "Might" - }, - { - "F": "nt", - "L": "not", - "pos": "RB" - } - ], - "1pm": [ - { - "F": "1" - }, - { - "L": "p.m.", - "F": "pm" - } - ], - "youdve": [ - { - "L": "-PRON-", - "F": "you" - }, - { - "F": "d", - "L": "would", - "pos": "MD" - }, - { - "F": "ve", - "L": "have", - "pos": "VB" - } - ], - "Shedve": [ - { - "L": "-PRON-", - "F": "She" - }, - { - "F": "d", - "L": "would", - "pos": "MD" - }, - { - "F": "ve", - "L": "have", - "pos": "VB" - } - ], - "theyd": [ - { - "L": "-PRON-", - "F": "they" - }, - { - "F": "d", - "L": "would", - "pos": "MD" - } - ], - "Ill.": [ - { - "F": "Ill." - } - ], - "N.D.": [ - { - "F": "N.D." - } - ], - "Cannot": [ - { - "F": "Can", - "L": "can", - "pos": "MD" - }, - { - "F": "not", - "L": "not", - "pos": "RB" - } - ], - "s.": [ - { - "F": "s." - } - ], - "Hadn't": [ - { - "F": "Had", - "L": "have", - "pos": "VBD" - }, - { - "F": "n't", - "L": "not", - "pos": "RB" - } - ], - "What're": [ - { - "F": "What" - }, - { - "F": "'re" - } - ], - "He'll": [ - { - "L": "-PRON-", - "F": "He" - }, - { - "F": "'ll", - "L": "will", - "pos": "MD" - } - ], - "wholl": [ - { - "F": "who" - }, - { - "F": "ll", - "L": "will", - "pos": "MD" - } - ], - "They're": [ - { - "L": "-PRON-", - "F": "They" - }, - { - "F": "'re" - } - ], - "Neb.": [ - { - "F": "Neb." - } - ], - "shouldnt": [ - { - "F": "should" - }, - { - "F": "nt", - "L": "not", - "pos": "RB" - } - ], - "\n": [ - { - "pos": "SP", - "F": "\n" - } - ], - "whered": [ - { - "F": "where" - }, - { - "F": "d", - "L": "would", - "pos": "MD" - } - ], - "7a.m.": [ - { - "F": "7" - }, - { - "F": "a.m." - } - ], - "youve": [ - { - "L": "-PRON-", - "F": "you" - }, - { - "F": "ve", - "L": "have", - "pos": "VB" - } - ], - "4am": [ - { - "F": "4" - }, - { - "L": "a.m.", - "F": "am" - } - ], - "v.": [ - { - "F": "v." - } - ], - "notve": [ - { - "F": "not", - "L": "not", - "pos": "RB" - }, - { - "F": "ve", - "L": "have", - "pos": "VB" - } - ], - "couldve": [ - { - "pos": "MD", - "F": "could" - }, - { - "F": "ve", - "L": "have", - "pos": "VB" - } - ], - "mustve": [ - { - "F": "must" - }, - { - "F": "ve", - "L": "have", - "pos": "VB" - } - ], - "Youve": [ - { - "L": "-PRON-", - "F": "You" - }, - { - "F": "ve", - "L": "have", - "pos": "VB" - } - ], - "therell": [ - { - "F": "there" - }, - { - "F": "ll", - "L": "will", - "pos": "MD" - } - ], - "might've": [ - { - "F": "might" - }, - { - "F": "'ve", - "L": "have", - "pos": "VB" - } - ], - "Mustn't": [ - { - "F": "Must" - }, - { - "F": "n't", - "L": "not", - "pos": "RB" - } - ], - "wheres": [ - { - "F": "where" - }, - { - "F": "s" - } - ], - "they're": [ - { - "L": "-PRON-", - "F": "they" - }, - { - "F": "'re" - } - ], - "idve": [ - { - "L": "-PRON-", - "F": "i" - }, - { - "F": "d", - "L": "would", - "pos": "MD" - }, - { - "F": "ve", - "L": "have", - "pos": "VB" - } - ], - "hows": [ - { - "F": "how" - }, - { - "F": "s" - } - ], - "Fla.": [ - { - "F": "Fla." - } - ], - "N.M.": [ - { - "F": "N.M." - } - ], - "youre": [ - { - "L": "-PRON-", - "F": "you" - }, - { - "F": "re" - } - ], - "Didn't": [ - { - "F": "Did", - "L": "do", - "pos": "VBD" - }, - { - "F": "n't", - "L": "not", - "pos": "RB" - } - ], - "Couldve": [ - { - "pos": "MD", - "F": "Could" - }, - { - "F": "ve", - "L": "have", - "pos": "VB" - } - ], - "10p.m.": [ - { - "F": "10" - }, - { - "F": "p.m." - } - ], - "Del.": [ - { - "F": "Del." - } - ], - "Oct.": [ - { - "F": "Oct." - } - ], - "Rep.": [ - { - "F": "Rep." - } - ], - "cannot": [ - { - "F": "can", - "L": "can", - "pos": "MD" - }, - { - "F": "not", - "L": "not", - "pos": "RB" - } - ], - "Im": [ - { - "L": "-PRON-", - "F": "I" - }, - { - "pos": "VBP", - "F": "m", - "tenspect": 1, - "number": 1, - "L": "be" - } - ], - "howd": [ - { - "F": "how" - }, - { - "F": "d", - "L": "would", - "pos": "MD" - } - ], - "Okla.": [ - { - "F": "Okla." - } - ], - "Feb.": [ - { - "F": "Feb." - } - ], - "you've": [ - { - "L": "-PRON-", - "F": "you" - }, - { - "F": "'ve", - "L": "have", - "pos": "VB" - } - ], - "You're": [ - { - "L": "-PRON-", - "F": "You" - }, - { - "F": "'re" - } - ], - "she'll": [ - { - "L": "-PRON-", - "F": "she" - }, - { - "F": "'ll", - "L": "will", - "pos": "MD" - } - ], - "Theyll": [ - { - "L": "-PRON-", - "F": "They" - }, - { - "F": "ll", - "L": "will", - "pos": "MD" - } - ], - "don't": [ - { - "L": "do", - "F": "do" - }, - { - "F": "n't", - "L": "not", - "pos": "RB" - } - ], - "itd": [ - { - "L": "-PRON-", - "F": "it" - }, - { - "F": "d", - "L": "would", - "pos": "MD" - } - ], - ":-)": [ - { - "F": ":-)" - } - ], - "Hedve": [ - { - "L": "-PRON-", - "F": "He" - }, - { - "F": "d", - "L": "would", - "pos": "MD" - }, - { - "F": "ve", - "L": "have", - "pos": "VB" - } - ], - "isnt": [ - { - "F": "is", - "L": "be", - "pos": "VBZ" - }, - { - "F": "nt", - "L": "not", - "pos": "RB" - } - ], - "won't": [ - { - "F": "wo" - }, - { - "F": "n't", - "L": "not", - "pos": "RB" - } - ], - "We're": [ - { - "F": "We" - }, - { - "F": "'re" - } - ], - "3a.m.": [ - { - "F": "3" - }, - { - "F": "a.m." - } - ], - "^_^": [ - { - "F": "^_^" - } - ], - "\u2018S": [ - { - "L": "'s", - "F": "\u2018S" - } - ], - "9p.m.": [ - { - "F": "9" - }, - { - "F": "p.m." - } - ], - "dont": [ - { - "L": "do", - "F": "do" - }, - { - "F": "nt", - "L": "not", - "pos": "RB" - } - ], - "ima": [ - { - "L": "-PRON-", - "F": "i" - }, - { - "F": "ma" - } - ], - "Let's": [ - { - "F": "Let" - }, - { - "L": "us", - "F": "'s" - } - ], - "he's": [ - { - "L": "-PRON-", - "F": "he" - }, - { - "F": "'s" - } - ], - "we've": [ - { - "F": "we" - }, - { - "F": "'ve", - "L": "have", - "pos": "VB" - } - ], - "What's": [ - { - "F": "What" - }, - { - "F": "'s" - } - ], - "Who's": [ - { - "F": "Who" - }, - { - "F": "'s" - } - ], - "-__-": [ - { - "F": "-__-" - } - ], - "hedve": [ - { - "L": "-PRON-", - "F": "he" - }, - { - "F": "d", - "L": "would", - "pos": "MD" - }, - { - "F": "ve", - "L": "have", - "pos": "VB" - } - ], - "he'd": [ - { - "L": "-PRON-", - "F": "he" - }, - { - "F": "'d", - "L": "would", - "pos": "MD" - } - ], - "When's": [ - { - "F": "When" - }, - { - "F": "'s" - } - ], - "Mightn't've": [ - { - "F": "Might" - }, - { - "F": "n't", - "L": "not", - "pos": "RB" - }, - { - "F": "'ve", - "L": "have", - "pos": "VB" - } - ], - "We've": [ - { - "F": "We" - }, - { - "F": "'ve", - "L": "have", - "pos": "VB" - } - ], - "\u2018s": [ - { - "L": "'s", - "F": "\u2018s" - } - ], - "Couldntve": [ - { - "pos": "MD", - "F": "Could" - }, - { - "F": "nt", - "L": "not", - "pos": "RB" - }, - { - "F": "ve", - "L": "have", - "pos": "VB" - } - ], - "Who'd": [ - { - "F": "Who" - }, - { - "F": "'d", - "L": "would", - "pos": "MD" - } - ], - ":-/": [ - { - "F": ":-/" - } - ], - "haven't": [ - { - "pos": "VB", - "F": "have" - }, - { - "F": "n't", - "L": "not", - "pos": "RB" - } - ], - "Gen.": [ - { - "F": "Gen." - } - ], - "(:": [ - { - "F": "(:" - } - ], - "arent": [ - { - "F": "are", - "pos": "VBP", - "number": 2, - "L": "be" - }, - { - "F": "nt", - "L": "not", - "pos": "RB" - } - ], - "You'd've": [ - { - "L": "-PRON-", - "F": "You" - }, - { - "F": "'d", - "L": "would", - "pos": "MD" - }, - { - "F": "'ve", - "L": "have", - "pos": "VB" - } - ], - "c.": [ - { - "F": "c." - } - ], - "(=": [ - { - "F": "(=" - } - ], - "Wouldn't": [ - { - "F": "Would" - }, - { - "F": "n't", - "L": "not", - "pos": "RB" - } - ], - "who's": [ - { - "F": "who" - }, - { - "F": "'s" - } - ], - "12p.m.": [ - { - "F": "12" - }, - { - "F": "p.m." - } - ], - "5am": [ - { - "F": "5" - }, - { - "L": "a.m.", - "F": "am" - } - ], - "Mightve": [ - { - "F": "Might" - }, - { - "F": "ve", - "L": "have", - "pos": "VB" - } - ], - "Theredve": [ - { - "F": "There" - }, - { - "F": "d", - "L": "would", - "pos": "MD" - }, - { - "F": "ve", - "L": "have", - "pos": "VB" - } - ], - "theredve": [ - { - "F": "there" - }, - { - "F": "d", - "L": "would", - "pos": "MD" - }, - { - "F": "ve", - "L": "have", - "pos": "VB" - } - ], - "Messrs.": [ - { - "F": "Messrs." - } - ], - "who'd": [ - { - "F": "who" - }, - { - "F": "'d", - "L": "would", - "pos": "MD" - } - ], - "Where's": [ - { - "F": "Where" - }, - { - "F": "'s" - } - ], - "wont": [ - { - "F": "wo" - }, - { - "F": "nt", - "L": "not", - "pos": "RB" - } - ], - "she'd've": [ - { - "L": "-PRON-", - "F": "she" - }, - { - "F": "'d", - "L": "would", - "pos": "MD" - }, - { - "F": "'ve", - "L": "have", - "pos": "VB" - } - ], - "10pm": [ - { - "F": "10" - }, - { - "L": "p.m.", - "F": "pm" - } - ], - "Corp.": [ - { - "F": "Corp." - } - ], - "Aug.": [ - { - "F": "Aug." - } - ], - "-_-": [ - { - "F": "-_-" - } - ], - "y.": [ - { - "F": "y." - } - ], - "Should've": [ - { - "F": "Should" - }, - { - "F": "'ve", - "L": "have", - "pos": "VB" - } - ], - "11pm": [ - { - "F": "11" - }, - { - "L": "p.m.", - "F": "pm" - } - ], - "8am": [ - { - "F": "8" - }, - { - "L": "a.m.", - "F": "am" - } - ], - "theyre": [ - { - "L": "-PRON-", - "F": "they" - }, - { - "F": "re" - } - ], - "l.": [ - { - "F": "l." - } - ], - "Wouldntve": [ - { - "F": "Would" - }, - { - "F": "nt", - "L": "not", - "pos": "RB" - }, - { - "F": "ve", - "L": "have", - "pos": "VB" - } - ], - "Ga.": [ - { - "F": "Ga." - } - ], - "1am": [ - { - "F": "1" - }, - { - "L": "a.m.", - "F": "am" - } - ], - "Where've": [ - { - "F": "Where" - }, - { - "F": "'ve", - "L": "have", - "pos": "VB" - } - ], - "11a.m.": [ - { - "F": "11" - }, - { - "F": "a.m." - } - ], - "mustn't": [ - { - "F": "must" - }, - { - "F": "n't", - "L": "not", - "pos": "RB" - } - ], - "isn't": [ - { - "F": "is", - "L": "be", - "pos": "VBZ" - }, - { - "F": "n't", - "L": "not", - "pos": "RB" - } - ], - "Bros.": [ - { - "F": "Bros." - } - ], - "Aint": [ - { - "F": "Ai", - "pos": "VBP", - "number": 2, - "L": "be" - }, - { - "F": "nt", - "L": "not", - "pos": "RB" - } - ], - "why's": [ - { - "F": "why" - }, - { - "F": "'s" - } - ], - "V_V": [ - { - "F": "V_V" - } - ], - ";p": [ - { - "F": ";p" - } - ], - "There'd": [ - { - "F": "There" - }, - { - "F": "'d", - "L": "would", - "pos": "MD" - } - ], - "They'll": [ - { - "L": "-PRON-", - "F": "They" - }, - { - "F": "'ll", - "L": "will", - "pos": "MD" - } - ], - "b.": [ - { - "F": "b." - } - ], - "how'll": [ - { - "F": "how" - }, - { - "F": "'ll", - "L": "will", - "pos": "MD" - } - ], - "Wedve": [ - { - "F": "We" - }, - { - "F": "d", - "L": "would", - "pos": "MD" - }, - { - "F": "ve", - "L": "have", - "pos": "VB" - } - ], - "couldntve": [ - { - "pos": "MD", - "F": "could" - }, - { - "F": "nt", - "L": "not", - "pos": "RB" - }, - { - "F": "ve", - "L": "have", - "pos": "VB" - } - ], - "12pm": [ - { - "F": "12" - }, - { - "L": "p.m.", - "F": "pm" - } - ], - "There's": [ - { - "F": "There" - }, - { - "F": "'s" - } - ], - "we'd": [ - { - "F": "we" - }, - { - "F": "'d", - "L": "would", - "pos": "MD" - } - ], - "Dr.": [ - { - "F": "Dr." - } - ], - "Whod": [ - { - "F": "Who" - }, - { - "F": "d", - "L": "would", - "pos": "MD" - } - ], - ":-P": [ - { - "F": ":-P" - } - ], - "whatve": [ - { - "F": "what" - }, - { - "F": "ve", - "L": "have", - "pos": "VB" - } - ], - "Wouldve": [ - { - "F": "Would" - }, - { - "F": "ve", - "L": "have", - "pos": "VB" - } - ], - "o.": [ - { - "F": "o." - } - ], - "there'll": [ - { - "F": "there" - }, - { - "F": "'ll", - "L": "will", - "pos": "MD" - } - ], - ":]": [ - { - "F": ":]" - } - ], - "needn't": [ - { - "F": "need" - }, - { - "F": "n't", - "L": "not", - "pos": "RB" - } - ], - "shouldntve": [ - { - "F": "should" - }, - { - "F": "nt", - "L": "not", - "pos": "RB" - }, - { - "F": "ve", - "L": "have", - "pos": "VB" - } - ], - "why're": [ - { - "F": "why" - }, - { - "F": "'re" - } - ], - "p.m.": [ - { - "F": "p.m." - } - ], - "Doesnt": [ - { - "F": "Does", - "L": "do", - "pos": "VBZ" - }, - { - "F": "nt", - "L": "not", - "pos": "RB" - } - ], - "whereve": [ - { - "F": "where" - }, - { - "F": "ve", - "L": "have", - "pos": "VB" - } - ], - "they'll": [ - { - "L": "-PRON-", - "F": "they" - }, - { - "F": "'ll", - "L": "will", - "pos": "MD" - } - ], - "I'd": [ - { - "L": "-PRON-", - "F": "I" - }, - { - "F": "'d", - "L": "would", - "pos": "MD" - } - ], - "Might've": [ - { - "F": "Might" - }, - { - "F": "'ve", - "L": "have", - "pos": "VB" - } - ], - "mightnt": [ - { - "F": "might" - }, - { - "F": "nt", - "L": "not", - "pos": "RB" - } - ], - "Kans.": [ - { - "F": "Kans." - } - ], - "Not've": [ - { - "F": "Not", - "L": "not", - "pos": "RB" - }, - { - "F": "'ve", - "L": "have", - "pos": "VB" - } - ], - "e.": [ - { - "F": "e." - } - ], - "mightn't": [ - { - "F": "might" - }, - { - "F": "n't", - "L": "not", - "pos": "RB" - } - ], - "you're": [ - { - "L": "-PRON-", - "F": "you" - }, - { - "F": "'re" - } - ], - "Mar.": [ - { - "F": "Mar." - } - ], - "They've": [ - { - "L": "-PRON-", - "F": "They" - }, - { - "F": "'ve", - "L": "have", - "pos": "VB" - } - ], - "\")": [ - { - "F": "\")" - } - ], - "what'll": [ - { - "F": "what" - }, - { - "F": "'ll", - "L": "will", - "pos": "MD" - } - ], - "Calif.": [ - { - "F": "Calif." - } - ], - "Could've": [ - { - "pos": "MD", - "F": "Could" - }, - { - "F": "'ve", - "L": "have", - "pos": "VB" - } - ], - "Would've": [ - { - "F": "Would" - }, - { - "F": "'ve", - "L": "have", - "pos": "VB" - } - ], - ";)": [ - { - "F": ";)" - } - ], - ";(": [ - { - "F": ";(" - } - ], - "Isn't": [ - { - "F": "Is", - "L": "be", - "pos": "VBZ" - }, - { - "F": "n't", - "L": "not", - "pos": "RB" - } - ], - "let's": [ - { - "F": "let" - }, - { - "L": "us", - "F": "'s" - } - ], - "'em": [ - { - "F": "'em" - } - ], - "She'll": [ - { - "L": "-PRON-", - "F": "She" - }, - { - "F": "'ll", - "L": "will", - "pos": "MD" - } - ], - "I.E.": [ - { - "F": "I.E." - } - ], - "You'd": [ - { - "L": "-PRON-", - "F": "You" - }, - { - "F": "'d", - "L": "would", - "pos": "MD" - } - ], - "wouldnt": [ - { - "F": "would" - }, - { - "F": "nt", - "L": "not", - "pos": "RB" - } - ], - "6am": [ - { - "F": "6" - }, - { - "L": "a.m.", - "F": "am" - } - ], - ":P": [ - { - "F": ":P" - } - ], - "Why'll": [ - { - "F": "Why" - }, - { - "F": "'ll", - "L": "will", - "pos": "MD" - } - ], - "Where'd": [ - { - "F": "Where" - }, - { - "F": "'d", - "L": "would", - "pos": "MD" - } - ], - "Theyre": [ - { - "L": "-PRON-", - "F": "They" - }, - { - "F": "re" - } - ], - "11p.m.": [ - { - "F": "11" - }, - { - "F": "p.m." - } - ], - "Won't": [ - { - "F": "Wo" - }, - { - "F": "n't", - "L": "not", - "pos": "RB" - } - ], - "Couldn't": [ - { - "pos": "MD", - "F": "Could" - }, - { - "F": "n't", - "L": "not", - "pos": "RB" - } - ], - "it's": [ - { - "L": "-PRON-", - "F": "it" - }, - { - "F": "'s" - } - ], - "r.": [ - { - "F": "r." - } - ], - "it'll": [ - { - "L": "-PRON-", - "F": "it" - }, - { - "F": "'ll", - "L": "will", - "pos": "MD" - } - ], - "They'd've": [ - { - "L": "-PRON-", - "F": "They" - }, - { - "F": "'d", - "L": "would", - "pos": "MD" - }, - { - "F": "'ve", - "L": "have", - "pos": "VB" - } - ], - "Ima": [ - { - "L": "-PRON-", - "F": "I" - }, - { - "F": "ma" - } - ], - "5pm": [ - { - "F": "5" - }, - { - "L": "p.m.", - "F": "pm" - } - ], - "10am": [ - { - "F": "10" - }, - { - "L": "a.m.", - "F": "am" - } - ], - "m.": [ - { - "F": "m." - } - ], - "whats": [ - { - "F": "what" - }, - { - "F": "s" - } - ], - "How's": [ - { - "F": "How" - }, - { - "F": "'s" - } - ], - "Sep.": [ - { - "F": "Sep." - } - ], - "Shouldntve": [ - { - "F": "Should" - }, - { - "F": "nt", - "L": "not", - "pos": "RB" - }, - { - "F": "ve", - "L": "have", - "pos": "VB" - } - ], - "youd": [ - { - "L": "-PRON-", - "F": "you" - }, - { - "F": "d", - "L": "would", - "pos": "MD" - } - ], - "Whatll": [ - { - "F": "What" - }, - { - "F": "ll", - "L": "will", - "pos": "MD" - } - ], - "Wouldn't've": [ - { - "F": "Would" - }, - { - "F": "n't", - "L": "not", - "pos": "RB" - }, - { - "F": "'ve", - "L": "have", - "pos": "VB" - } - ], - "How'd": [ - { - "F": "How" - }, - { - "F": "'d", - "L": "would", - "pos": "MD" - } - ], - "doesnt": [ - { - "F": "does", - "L": "do", - "pos": "VBZ" - }, - { - "F": "nt", - "L": "not", - "pos": "RB" - } - ], - "h.": [ - { - "F": "h." - } - ], - "Shouldn't": [ - { - "F": "Should" - }, - { - "F": "n't", - "L": "not", - "pos": "RB" - } - ], - "He'd've": [ - { - "L": "-PRON-", - "F": "He" - }, - { - "F": "'d", - "L": "would", - "pos": "MD" - }, - { - "F": "'ve", - "L": "have", - "pos": "VB" - } - ], - "Mightntve": [ - { - "F": "Might" - }, - { - "F": "nt", - "L": "not", - "pos": "RB" - }, - { - "F": "ve", - "L": "have", - "pos": "VB" - } - ], - "couldnt": [ - { - "pos": "MD", - "F": "could" - }, - { - "F": "nt", - "L": "not", - "pos": "RB" - } - ], - "Haven't": [ - { - "pos": "VB", - "F": "Have" - }, - { - "F": "n't", - "L": "not", - "pos": "RB" - } - ], - "<333": [ - { - "F": "<333" - } - ], - "doesn't": [ - { - "F": "does", - "L": "do", - "pos": "VBZ" - }, - { - "F": "n't", - "L": "not", - "pos": "RB" - } - ], - "Hasn't": [ - { - "F": "Has" - }, - { - "F": "n't", - "L": "not", - "pos": "RB" - } - ], - "how's": [ - { - "F": "how" - }, - { - "F": "'s" - } - ], - "hes": [ - { - "L": "-PRON-", - "F": "he" - }, - { - "F": "s" - } - ], - "=[[": [ - { - "F": "=[[" - } - ], - "xD": [ - { - "F": "xD" - } - ], - "he'll": [ - { - "L": "-PRON-", - "F": "he" - }, - { - "F": "'ll", - "L": "will", - "pos": "MD" - } - ], - "hed": [ - { - "L": "-PRON-", - "F": "he" - }, - { - "F": "d", - "L": "would", - "pos": "MD" - } - ], - "7p.m.": [ - { - "F": "7" - }, - { - "F": "p.m." - } - ], - "how'd": [ - { - "F": "how" - }, - { - "F": "'d", - "L": "would", - "pos": "MD" - } - ], - "u.": [ - { - "F": "u." - } - ], - "we're": [ - { - "F": "we" - }, - { - "F": "'re" - } - ], - "vs.": [ - { - "F": "vs." - } - ], - "Hadnt": [ - { - "F": "Had", - "L": "have", - "pos": "VBD" - }, - { - "F": "nt", - "L": "not", - "pos": "RB" - } - ], - "Shant": [ - { - "F": "Sha" - }, - { - "F": "nt", - "L": "not", - "pos": "RB" - } - ], - "Theyve": [ - { - "L": "-PRON-", - "F": "They" - }, - { - "F": "ve", - "L": "have", - "pos": "VB" - } - ], - "Hows": [ - { - "F": "How" - }, - { - "F": "s" - } - ], - "We'll": [ - { - "F": "We" - }, - { - "F": "'ll", - "L": "will", - "pos": "MD" - } - ], - "N.Y.": [ - { - "F": "N.Y." - } - ], - "x.": [ - { - "F": "x." - } - ], - "8p.m.": [ - { - "F": "8" - }, - { - "F": "p.m." - } - ], - "i've": [ - { - "L": "-PRON-", - "F": "i" - }, - { - "F": "'ve", - "L": "have", - "pos": "VB" - } - ], - "Whove": [ - { - "F": "Who" - }, - { - "F": "ve", - "L": "have", - "pos": "VB" - } - ], - "2am": [ - { - "F": "2" - }, - { - "L": "a.m.", - "F": "am" - } - ], - "La.": [ - { - "F": "La." - } - ], - "i'ma": [ - { - "L": "-PRON-", - "F": "i" - }, - { - "F": "'ma" - } - ], - "N.J.": [ - { - "F": "N.J." - } - ], - "Nebr.": [ - { - "F": "Nebr." - } - ], - "Howd": [ - { - "F": "How" - }, - { - "F": "d", - "L": "would", - "pos": "MD" - } - ], - "hadnt": [ - { - "F": "had", - "L": "have", - "pos": "VBD" - }, - { - "F": "nt", - "L": "not", - "pos": "RB" - } - ], - "shant": [ - { - "F": "sha" - }, - { - "F": "nt", - "L": "not", - "pos": "RB" - } - ], - "There'd've": [ - { - "F": "There" - }, - { - "F": "'d", - "L": "would", - "pos": "MD" - }, - { - "F": "'ve", - "L": "have", - "pos": "VB" - } - ], - "Inc.": [ - { - "F": "Inc." - } - ], - "I'll": [ - { - "L": "-PRON-", - "F": "I" - }, - { - "F": "'ll", - "L": "will", - "pos": "MD" - } - ], - "Why's": [ - { - "F": "Why" - }, - { - "F": "'s" - } - ], - "Adm.": [ - { - "F": "Adm." - } - ], - "Shouldn't've": [ - { - "F": "Should" - }, - { - "F": "n't", - "L": "not", - "pos": "RB" - }, - { - "F": "'ve", - "L": "have", - "pos": "VB" - } - ], - "n.": [ - { - "F": "n." - } - ], - "Wasnt": [ - { - "F": "Was" - }, - { - "F": "nt", - "L": "not", - "pos": "RB" - } - ], - "whove": [ - { - "F": "who" - }, - { - "F": "ve", - "L": "have", - "pos": "VB" - } - ], - ";-p": [ - { - "F": ";-p" - } - ], - "hasn't": [ - { - "F": "has" - }, - { - "F": "n't", - "L": "not", - "pos": "RB" - } - ], - "wouldntve": [ - { - "F": "would" - }, - { - "F": "nt", - "L": "not", - "pos": "RB" - }, - { - "F": "ve", - "L": "have", - "pos": "VB" - } - ], - "Wheres": [ - { - "F": "Where" - }, - { - "F": "s" - } - ], - "How'll": [ - { - "F": "How" - }, - { - "F": "'ll", - "L": "will", - "pos": "MD" - } - ], - "there'd've": [ - { - "F": "there" - }, - { - "F": "'d", - "L": "would", - "pos": "MD" - }, - { - "F": "'ve", - "L": "have", - "pos": "VB" - } - ], - "Whos": [ - { - "F": "Who" - }, - { - "F": "s" - } - ], - "shes": [ - { - "L": "-PRON-", - "F": "she" - }, - { - "F": "s" - } - ], - "Doesn't": [ - { - "F": "Does", - "L": "do", - "pos": "VBZ" - }, - { - "F": "n't", - "L": "not", - "pos": "RB" - } - ], - "Arent": [ - { - "F": "Are", - "pos": "VBP", - "number": 2, - "L": "be" - }, - { - "F": "nt", - "L": "not", - "pos": "RB" - } - ], - "Hasnt": [ - { - "F": "Has" - }, - { - "F": "nt", - "L": "not", - "pos": "RB" - } - ], - "j.": [ - { - "F": "j." - } - ], - "He's": [ - { - "L": "-PRON-", - "F": "He" - }, - { - "F": "'s" - } - ], - "wasnt": [ - { - "F": "was" - }, - { - "F": "nt", - "L": "not", - "pos": "RB" - } - ], - "whyll": [ - { - "F": "why" - }, - { - "F": "ll", - "L": "will", - "pos": "MD" - } - ], - "co.": [ - { - "F": "co." - } - ], - "mustnt": [ - { - "F": "must" - }, - { - "F": "nt", - "L": "not", - "pos": "RB" - } - ], - "He'd": [ - { - "L": "-PRON-", - "F": "He" - }, - { - "F": "'d", - "L": "would", - "pos": "MD" - } - ], - "I.e.": [ - { - "F": "I.e." - } - ], - "Shes": [ - { - "L": "-PRON-", - "F": "She" - }, - { - "F": "s" - } - ], - "where've": [ - { - "F": "where" - }, - { - "F": "'ve", - "L": "have", - "pos": "VB" - } - ], - "Youll": [ - { - "L": "-PRON-", - "F": "You" - }, - { - "F": "ll", - "L": "will", - "pos": "MD" - } - ], - "Apr.": [ - { - "F": "Apr." - } - ], - ":')": [ - { - "F": ":')" - } - ], - "Conn.": [ - { - "F": "Conn." - } - ], - "8pm": [ - { - "F": "8" - }, - { - "L": "p.m.", - "F": "pm" - } - ], - "9am": [ - { - "F": "9" - }, - { - "L": "a.m.", - "F": "am" - } - ], - "hasnt": [ - { - "F": "has" - }, - { - "F": "nt", - "L": "not", - "pos": "RB" - } - ], - "theyll": [ - { - "L": "-PRON-", - "F": "they" - }, - { - "F": "ll", - "L": "will", - "pos": "MD" - } - ], - "it'd've": [ - { - "L": "-PRON-", - "F": "it" - }, - { - "F": "'d", - "L": "would", - "pos": "MD" - }, - { - "F": "'ve", - "L": "have", - "pos": "VB" - } - ], - "itdve": [ - { - "L": "-PRON-", - "F": "it" - }, - { - "F": "d", - "L": "would", - "pos": "MD" - }, - { - "F": "ve", - "L": "have", - "pos": "VB" - } - ], - "Jr.": [ - { - "F": "Jr." - } - ], - "Rev.": [ - { - "F": "Rev." - } - ], - "k.": [ - { - "F": "k." - } - ], - "wedve": [ - { - "F": "we" - }, - { - "F": "d", - "L": "would", - "pos": "MD" - }, - { - "F": "ve", - "L": "have", - "pos": "VB" - } - ], - "=)": [ - { - "F": "=)" - } - ], - "Colo.": [ - { - "F": "Colo." - } - ], - "Mr.": [ - { - "F": "Mr." - } - ], - "Werent": [ - { - "F": "Were" - }, - { - "F": "nt", - "L": "not", - "pos": "RB" - } - ], - "Therell": [ - { - "F": "There" - }, - { - "F": "ll", - "L": "will", - "pos": "MD" - } - ], - "shan't": [ - { - "F": "sha" - }, - { - "F": "n't", - "L": "not", - "pos": "RB" - } - ], - ";-)": [ - { - "F": ";-)" - } - ], - "Wont": [ - { - "F": "Wo" - }, - { - "F": "nt", - "L": "not", - "pos": "RB" - } - ], - "hadntve": [ - { - "F": "had", - "L": "have", - "pos": "VBD" - }, - { - "F": "nt", - "L": "not", - "pos": "RB" - }, - { - "F": "ve", - "L": "have", - "pos": "VB" - } - ], - "who've": [ - { - "F": "who" - }, - { - "F": "'ve", - "L": "have", - "pos": "VB" - } - ], - "Whatre": [ - { - "F": "What" - }, - { - "F": "re" - } - ], - "'s": [ - { - "L": "'s", - "F": "'s" - } - ], - "where'd": [ - { - "F": "where" - }, - { - "F": "'d", - "L": "would", - "pos": "MD" - } - ], - "shouldve": [ - { - "F": "should" - }, - { - "F": "ve", - "L": "have", - "pos": "VB" - } - ], - "a.": [ - { - "F": "a." - } - ], - "where's": [ - { - "F": "where" - }, - { - "F": "'s" - } - ], - "Ltd.": [ - { - "F": "Ltd." - } - ], - "Mass.": [ - { - "F": "Mass." - } - ], - "neednt": [ - { - "F": "need" - }, - { - "F": "nt", - "L": "not", - "pos": "RB" - } - ], - "Pa.": [ - { - "F": "Pa." - } - ], - "It'll": [ - { - "L": "-PRON-", - "F": "It" - }, - { - "F": "'ll", - "L": "will", - "pos": "MD" - } - ], - "7am": [ - { - "F": "7" - }, - { - "L": "a.m.", - "F": "am" - } - ], - "We'd": [ - { - "F": "We" - }, - { - "F": "'d", - "L": "would", - "pos": "MD" - } - ], - "Whats": [ - { - "F": "What" - }, - { - "F": "s" - } - ], - "\u2014": [ - { - "pos": ":", - "L": "--", - "F": "\u2014" - } - ], - "E.g.": [ - { - "F": "E.g." - } - ], - "Ms.": [ - { - "F": "Ms." - } - ], - ":3": [ - { - "F": ":3" - } - ], - "5p.m.": [ - { - "F": "5" - }, - { - "F": "p.m." - } - ], - "Itd": [ - { - "L": "-PRON-", - "F": "It" - }, - { - "F": "d", - "L": "would", - "pos": "MD" - } - ], - "May.": [ - { - "F": "May." - } - ], - "she'd": [ - { - "L": "-PRON-", - "F": "she" - }, - { - "F": "'d", - "L": "would", - "pos": "MD" - } - ], - "Mustnt": [ - { - "F": "Must" - }, - { - "F": "nt", - "L": "not", - "pos": "RB" - } - ], - "Notve": [ - { - "F": "Not", - "L": "not", - "pos": "RB" - }, - { - "F": "ve", - "L": "have", - "pos": "VB" - } - ], - "you'll": [ - { - "L": "-PRON-", - "F": "you" - }, - { - "F": "'ll", - "L": "will", - "pos": "MD" - } - ], - "Theyd": [ - { - "L": "-PRON-", - "F": "They" - }, - { - "F": "d", - "L": "would", - "pos": "MD" - } - ], - "she's": [ - { - "L": "-PRON-", - "F": "she" - }, - { - "F": "'s" - } - ], - "Couldnt": [ - { - "pos": "MD", - "F": "Could" - }, - { - "F": "nt", - "L": "not", - "pos": "RB" - } - ], - "that's": [ - { - "F": "that" - }, - { - "F": "'s" - } - ], - "4pm": [ - { - "F": "4" - }, - { - "L": "p.m.", - "F": "pm" - } - ], - ":))": [ - { - "F": ":))" - } - ] -} \ No newline at end of file diff --git a/lang_data/en/suffix.txt b/lang_data/en/suffix.txt deleted file mode 100644 index d8c6bc2c2..000000000 --- a/lang_data/en/suffix.txt +++ /dev/null @@ -1,26 +0,0 @@ -, -\" -\) -\] -\} -\* -\! -\? -% -\$ -> -: -; -' -” -'' -'s -'S -’s -’S -’ -\.\. -\.\.\. -\.\.\.\. -(?<=[a-z0-9)\]"'%\)])\. -(?<=[0-9])km diff --git a/lang_data/en/tag_map.json b/lang_data/en/tag_map.json deleted file mode 100644 index f913f38fe..000000000 --- a/lang_data/en/tag_map.json +++ /dev/null @@ -1,60 +0,0 @@ -{ -".": {"pos": "punct", "puncttype": "peri"}, -",": {"pos": "punct", "puncttype": "comm"}, -"-LRB-": {"pos": "punct", "puncttype": "brck", "punctside": "ini"}, -"-RRB-": {"pos": "punct", "puncttype": "brck", "punctside": "fin"}, -"``": {"pos": "punct", "puncttype": "quot", "punctside": "ini"}, -"\"\"": {"pos": "punct", "puncttype": "quot", "punctside": "fin"}, -"''": {"pos": "punct", "puncttype": "quot", "punctside": "fin"}, -":": {"pos": "punct"}, -"$": {"pos": "sym", "other": {"symtype": "currency"}}, -"#": {"pos": "sym", "other": {"symtype": "numbersign"}}, -"AFX": {"pos": "adj", "hyph": "hyph"}, -"CC": {"pos": "conj", "conjtype": "coor"}, -"CD": {"pos": "num", "numtype": "card"}, -"DT": {"pos": "det"}, -"EX": {"pos": "adv", "advtype": "ex"}, -"FW": {"pos": "x", "foreign": "foreign"}, -"HYPH": {"pos": "punct", "puncttype": "dash"}, -"IN": {"pos": "adp"}, -"JJ": {"pos": "adj", "degree": "pos"}, -"JJR": {"pos": "adj", "degree": "comp"}, -"JJS": {"pos": "adj", "degree": "sup"}, -"LS": {"pos": "punct", "numtype": "ord"}, -"MD": {"pos": "verb", "verbtype": "mod"}, -"NIL": {"pos": ""}, -"NN": {"pos": "noun", "number": "sing"}, -"NNP": {"pos": "propn", "nountype": "prop", "number": "sing"}, -"NNPS": {"pos": "propn", "nountype": "prop", "number": "plur"}, -"NNS": {"pos": "noun", "number": "plur"}, -"PDT": {"pos": "adj", "adjtype": "pdt", "prontype": "prn"}, -"POS": {"pos": "part", "poss": "poss"}, -"PRP": {"pos": "pron", "prontype": "prs"}, -"PRP$": {"pos": "adj", "prontype": "prs", "poss": "poss"}, -"RB": {"pos": "adv", "degree": "pos"}, -"RBR": {"pos": "adv", "degree": "comp"}, -"RBS": {"pos": "adv", "degree": "sup"}, -"RP": {"pos": "part"}, -"SYM": {"pos": "sym"}, -"TO": {"pos": "part", "parttype": "inf", "verbform": "inf"}, -"UH": {"pos": "intJ"}, -"VB": {"pos": "verb", "verbform": "inf"}, -"VBD": {"pos": "verb", "verbform": "fin", "tense": "past"}, -"VBG": {"pos": "verb", "verbform": "part", "tense": "pres", "aspect": "prog"}, -"VBN": {"pos": "verb", "verbform": "part", "tense": "past", "aspect": "perf"}, -"VBP": {"pos": "verb", "verbform": "fin", "tense": "pres"}, -"VBZ": {"pos": "verb", "verbform": "fin", "tense": "pres", "number": "sing", "person": 3}, -"WDT": {"pos": "adj", "prontype": "int|rel"}, -"WP": {"pos": "noun", "prontype": "int|rel"}, -"WP$": {"pos": "adj", "poss": "poss", "prontype": "int|rel"}, -"WRB": {"pos": "adv", "prontype": "int|rel"}, -"SP": {"pos": "space"}, -"ADD": {"pos": "x"}, -"NFP": {"pos": "punct"}, -"GW": {"pos": "x"}, -"AFX": {"pos": "x"}, -"HYPH": {"pos": "punct"}, -"XX": {"pos": "x"}, -"BES": {"pos": "verb"}, -"HVS": {"pos": "verb"} -} diff --git a/lang_data/fi/infix.txt b/lang_data/fi/infix.txt deleted file mode 100644 index 37eca7350..000000000 --- a/lang_data/fi/infix.txt +++ /dev/null @@ -1,3 +0,0 @@ -\.\.\. -(?<=[a-z])\.(?=[A-Z]) -(?<=[a-zA-Z])-(?=[a-zA-z]) diff --git a/lang_data/fi/lemma_rules.json b/lang_data/fi/lemma_rules.json deleted file mode 100644 index 0967ef424..000000000 --- a/lang_data/fi/lemma_rules.json +++ /dev/null @@ -1 +0,0 @@ -{} diff --git a/lang_data/fi/morphs.json b/lang_data/fi/morphs.json deleted file mode 100644 index e69de29bb..000000000 diff --git a/lang_data/fi/prefix.txt b/lang_data/fi/prefix.txt deleted file mode 100644 index 48c4fc549..000000000 --- a/lang_data/fi/prefix.txt +++ /dev/null @@ -1,21 +0,0 @@ -, -" -( -[ -{ -* -< -$ -£ -“ -' -`` -` -# -US$ -C$ -A$ -a- -‘ -.... -... diff --git a/lang_data/fi/sample.txt b/lang_data/fi/sample.txt deleted file mode 100644 index 12c0bb787..000000000 --- a/lang_data/fi/sample.txt +++ /dev/null @@ -1,3 +0,0 @@ -Biografie: Ein Spiel ist ein Theaterstück des Schweizer Schriftstellers Max Frisch, das 1967 entstand und am 1. Februar 1968 im Schauspielhaus Zürich uraufgeführt wurde. 1984 legte Frisch eine überarbeitete Neufassung vor. Das von Frisch als Komödie bezeichnete Stück greift eines seiner zentralen Themen auf: die Möglichkeit oder Unmöglichkeit des Menschen, seine Identität zu verändern. - -Mit Biografie: Ein Spiel wandte sich Frisch von der Parabelform seiner Erfolgsstücke Biedermann und die Brandstifter und Andorra ab und postulierte eine „Dramaturgie der Permutation“. Darin sollte nicht, wie im klassischen Theater, Sinn und Schicksal im Mittelpunkt stehen, sondern die Zufälligkeit von Ereignissen und die Möglichkeit ihrer Variation. Dennoch handelt Biografie: Ein Spiel gerade von der Unmöglichkeit seines Protagonisten, seinen Lebenslauf grundlegend zu verändern. Frisch empfand die Wirkung des Stücks im Nachhinein als zu fatalistisch und die Umsetzung seiner theoretischen Absichten als nicht geglückt. Obwohl das Stück 1968 als unpolitisch und nicht zeitgemäß kritisiert wurde und auch später eine geteilte Rezeption erfuhr, gehört es an deutschsprachigen Bühnen zu den häufiger aufgeführten Stücken Frischs. diff --git a/lang_data/fi/specials.json b/lang_data/fi/specials.json deleted file mode 100644 index 0e0986339..000000000 --- a/lang_data/fi/specials.json +++ /dev/null @@ -1,149 +0,0 @@ -{ -"a.m.": [{"F": "a.m."}], -"p.m.": [{"F": "p.m."}], - -"1a.m.": [{"F": "1"}, {"F": "a.m."}], -"2a.m.": [{"F": "2"}, {"F": "a.m."}], -"3a.m.": [{"F": "3"}, {"F": "a.m."}], -"4a.m.": [{"F": "4"}, {"F": "a.m."}], -"5a.m.": [{"F": "5"}, {"F": "a.m."}], -"6a.m.": [{"F": "6"}, {"F": "a.m."}], -"7a.m.": [{"F": "7"}, {"F": "a.m."}], -"8a.m.": [{"F": "8"}, {"F": "a.m."}], -"9a.m.": [{"F": "9"}, {"F": "a.m."}], -"10a.m.": [{"F": "10"}, {"F": "a.m."}], -"11a.m.": [{"F": "11"}, {"F": "a.m."}], -"12a.m.": [{"F": "12"}, {"F": "a.m."}], -"1am": [{"F": "1"}, {"F": "am", "L": "a.m."}], -"2am": [{"F": "2"}, {"F": "am", "L": "a.m."}], -"3am": [{"F": "3"}, {"F": "am", "L": "a.m."}], -"4am": [{"F": "4"}, {"F": "am", "L": "a.m."}], -"5am": [{"F": "5"}, {"F": "am", "L": "a.m."}], -"6am": [{"F": "6"}, {"F": "am", "L": "a.m."}], -"7am": [{"F": "7"}, {"F": "am", "L": "a.m."}], -"8am": [{"F": "8"}, {"F": "am", "L": "a.m."}], -"9am": [{"F": "9"}, {"F": "am", "L": "a.m."}], -"10am": [{"F": "10"}, {"F": "am", "L": "a.m."}], -"11am": [{"F": "11"}, {"F": "am", "L": "a.m."}], -"12am": [{"F": "12"}, {"F": "am", "L": "a.m."}], - - -"1p.m.": [{"F": "1"}, {"F": "p.m."}], -"2p.m.": [{"F": "2"}, {"F": "p.m."}], -"3p.m.": [{"F": "3"}, {"F": "p.m."}], -"4p.m.": [{"F": "4"}, {"F": "p.m."}], -"5p.m.": [{"F": "5"}, {"F": "p.m."}], -"6p.m.": [{"F": "6"}, {"F": "p.m."}], -"7p.m.": [{"F": "7"}, {"F": "p.m."}], -"8p.m.": [{"F": "8"}, {"F": "p.m."}], -"9p.m.": [{"F": "9"}, {"F": "p.m."}], -"10p.m.": [{"F": "10"}, {"F": "p.m."}], -"11p.m.": [{"F": "11"}, {"F": "p.m."}], -"12p.m.": [{"F": "12"}, {"F": "p.m."}], -"1pm": [{"F": "1"}, {"F": "pm", "L": "p.m."}], -"2pm": [{"F": "2"}, {"F": "pm", "L": "p.m."}], -"3pm": [{"F": "3"}, {"F": "pm", "L": "p.m."}], -"4pm": [{"F": "4"}, {"F": "pm", "L": "p.m."}], -"5pm": [{"F": "5"}, {"F": "pm", "L": "p.m."}], -"6pm": [{"F": "6"}, {"F": "pm", "L": "p.m."}], -"7pm": [{"F": "7"}, {"F": "pm", "L": "p.m."}], -"8pm": [{"F": "8"}, {"F": "pm", "L": "p.m."}], -"9pm": [{"F": "9"}, {"F": "pm", "L": "p.m."}], -"10pm": [{"F": "10"}, {"F": "pm", "L": "p.m."}], -"11pm": [{"F": "11"}, {"F": "pm", "L": "p.m."}], -"12pm": [{"F": "12"}, {"F": "pm", "L": "p.m."}], - -"Jan.": [{"F": "Jan.", "L": "Januar"}], -"Feb.": [{"F": "Feb.", "L": "Februar"}], -"Mär.": [{"F": "Mär.", "L": "März"}], -"Apr.": [{"F": "Apr.", "L": "April"}], -"Mai.": [{"F": "Mai.", "L": "Mai"}], -"Jun.": [{"F": "Jun.", "L": "Juni"}], -"Jul.": [{"F": "Jul.", "L": "Juli"}], -"Aug.": [{"F": "Aug.", "L": "August"}], -"Sep.": [{"F": "Sep.", "L": "September"}], -"Sept.": [{"F": "Sept.", "L": "September"}], -"Okt.": [{"F": "Okt.", "L": "Oktober"}], -"Nov.": [{"F": "Nov.", "L": "November"}], -"Dez.": [{"F": "Dez.", "L": "Dezember"}], - -":)": [{"F": ":)"}], -"<3": [{"F": "<3"}], -";)": [{"F": ";)"}], -"(:": [{"F": "(:"}], -":(": [{"F": ":("}], -"-_-": [{"F": "-_-"}], -"=)": [{"F": "=)"}], -":/": [{"F": ":/"}], -":>": [{"F": ":>"}], -";-)": [{"F": ";-)"}], -":Y": [{"F": ":Y"}], -":P": [{"F": ":P"}], -":-P": [{"F": ":-P"}], -":3": [{"F": ":3"}], -"=3": [{"F": "=3"}], -"xD": [{"F": "xD"}], -"^_^": [{"F": "^_^"}], -"=]": [{"F": "=]"}], -"=D": [{"F": "=D"}], -"<333": [{"F": "<333"}], -":))": [{"F": ":))"}], -":0": [{"F": ":0"}], -"-__-": [{"F": "-__-"}], -"xDD": [{"F": "xDD"}], -"o_o": [{"F": "o_o"}], -"o_O": [{"F": "o_O"}], -"V_V": [{"F": "V_V"}], -"=[[": [{"F": "=[["}], -"<33": [{"F": "<33"}], -";p": [{"F": ";p"}], -";D": [{"F": ";D"}], -";-p": [{"F": ";-p"}], -";(": [{"F": ";("}], -":p": [{"F": ":p"}], -":]": [{"F": ":]"}], -":O": [{"F": ":O"}], -":-/": [{"F": ":-/"}], -":-)": [{"F": ":-)"}], -":(((": [{"F": ":((("}], -":((": [{"F": ":(("}], -":')": [{"F": ":')"}], -"(^_^)": [{"F": "(^_^)"}], -"(=": [{"F": "(="}], -"o.O": [{"F": "o.O"}], -"\")": [{"F": "\")"}], -"a.": [{"F": "a."}], -"b.": [{"F": "b."}], -"c.": [{"F": "c."}], -"d.": [{"F": "d."}], -"e.": [{"F": "e."}], -"f.": [{"F": "f."}], -"g.": [{"F": "g."}], -"h.": [{"F": "h."}], -"i.": [{"F": "i."}], -"j.": [{"F": "j."}], -"k.": [{"F": "k."}], -"l.": [{"F": "l."}], -"m.": [{"F": "m."}], -"n.": [{"F": "n."}], -"o.": [{"F": "o."}], -"p.": [{"F": "p."}], -"q.": [{"F": "q."}], -"s.": [{"F": "s."}], -"t.": [{"F": "t."}], -"u.": [{"F": "u."}], -"v.": [{"F": "v."}], -"w.": [{"F": "w."}], -"x.": [{"F": "x."}], -"y.": [{"F": "y."}], -"z.": [{"F": "z."}], - -"z.b.": [{"F": "z.b."}], -"e.h.": [{"F": "I.e."}], -"o.ä.": [{"F": "I.E."}], -"bzw.": [{"F": "bzw."}], -"usw.": [{"F": "usw."}], -"\n": [{"F": "\n", "pos": "SP"}], -"\t": [{"F": "\t", "pos": "SP"}], -" ": [{"F": " ", "pos": "SP"}] -} diff --git a/lang_data/fi/suffix.txt b/lang_data/fi/suffix.txt deleted file mode 100644 index d8c6bc2c2..000000000 --- a/lang_data/fi/suffix.txt +++ /dev/null @@ -1,26 +0,0 @@ -, -\" -\) -\] -\} -\* -\! -\? -% -\$ -> -: -; -' -” -'' -'s -'S -’s -’S -’ -\.\. -\.\.\. -\.\.\.\. -(?<=[a-z0-9)\]"'%\)])\. -(?<=[0-9])km diff --git a/lang_data/fi/tag_map.json b/lang_data/fi/tag_map.json deleted file mode 100644 index 4451d0fa0..000000000 --- a/lang_data/fi/tag_map.json +++ /dev/null @@ -1,19 +0,0 @@ -{ - "NOUN": {"pos": "NOUN"}, - "VERB": {"pos": "VERB"}, - "PUNCT": {"pos": "PUNCT"}, - "ADV": {"pos": "ADV"}, - "ADJ": {"pos": "ADJ"}, - "PRON": {"pos": "PRON"}, - "PROPN": {"pos": "PROPN"}, - "CONJ": {"pos": "CONJ"}, - "NUM": {"pos": "NUM"}, - "AUX": {"pos": "AUX"}, - "SCONJ": {"pos": "SCONJ"}, - "ADP": {"pos": "ADP"}, - "SYM": {"pos": "SYM"}, - "X": {"pos": "X"}, - "INTJ": {"pos": "INTJ"}, - "DET": {"pos": "DET"}, - "PART": {"pos": "PART"} -} diff --git a/lang_data/it/infix.txt b/lang_data/it/infix.txt deleted file mode 100644 index 37eca7350..000000000 --- a/lang_data/it/infix.txt +++ /dev/null @@ -1,3 +0,0 @@ -\.\.\. -(?<=[a-z])\.(?=[A-Z]) -(?<=[a-zA-Z])-(?=[a-zA-z]) diff --git a/lang_data/it/morphs.json b/lang_data/it/morphs.json deleted file mode 100644 index e69de29bb..000000000 diff --git a/lang_data/it/prefix.txt b/lang_data/it/prefix.txt deleted file mode 100644 index 48c4fc549..000000000 --- a/lang_data/it/prefix.txt +++ /dev/null @@ -1,21 +0,0 @@ -, -" -( -[ -{ -* -< -$ -£ -“ -' -`` -` -# -US$ -C$ -A$ -a- -‘ -.... -... diff --git a/lang_data/it/specials.json b/lang_data/it/specials.json deleted file mode 100644 index 0e0986339..000000000 --- a/lang_data/it/specials.json +++ /dev/null @@ -1,149 +0,0 @@ -{ -"a.m.": [{"F": "a.m."}], -"p.m.": [{"F": "p.m."}], - -"1a.m.": [{"F": "1"}, {"F": "a.m."}], -"2a.m.": [{"F": "2"}, {"F": "a.m."}], -"3a.m.": [{"F": "3"}, {"F": "a.m."}], -"4a.m.": [{"F": "4"}, {"F": "a.m."}], -"5a.m.": [{"F": "5"}, {"F": "a.m."}], -"6a.m.": [{"F": "6"}, {"F": "a.m."}], -"7a.m.": [{"F": "7"}, {"F": "a.m."}], -"8a.m.": [{"F": "8"}, {"F": "a.m."}], -"9a.m.": [{"F": "9"}, {"F": "a.m."}], -"10a.m.": [{"F": "10"}, {"F": "a.m."}], -"11a.m.": [{"F": "11"}, {"F": "a.m."}], -"12a.m.": [{"F": "12"}, {"F": "a.m."}], -"1am": [{"F": "1"}, {"F": "am", "L": "a.m."}], -"2am": [{"F": "2"}, {"F": "am", "L": "a.m."}], -"3am": [{"F": "3"}, {"F": "am", "L": "a.m."}], -"4am": [{"F": "4"}, {"F": "am", "L": "a.m."}], -"5am": [{"F": "5"}, {"F": "am", "L": "a.m."}], -"6am": [{"F": "6"}, {"F": "am", "L": "a.m."}], -"7am": [{"F": "7"}, {"F": "am", "L": "a.m."}], -"8am": [{"F": "8"}, {"F": "am", "L": "a.m."}], -"9am": [{"F": "9"}, {"F": "am", "L": "a.m."}], -"10am": [{"F": "10"}, {"F": "am", "L": "a.m."}], -"11am": [{"F": "11"}, {"F": "am", "L": "a.m."}], -"12am": [{"F": "12"}, {"F": "am", "L": "a.m."}], - - -"1p.m.": [{"F": "1"}, {"F": "p.m."}], -"2p.m.": [{"F": "2"}, {"F": "p.m."}], -"3p.m.": [{"F": "3"}, {"F": "p.m."}], -"4p.m.": [{"F": "4"}, {"F": "p.m."}], -"5p.m.": [{"F": "5"}, {"F": "p.m."}], -"6p.m.": [{"F": "6"}, {"F": "p.m."}], -"7p.m.": [{"F": "7"}, {"F": "p.m."}], -"8p.m.": [{"F": "8"}, {"F": "p.m."}], -"9p.m.": [{"F": "9"}, {"F": "p.m."}], -"10p.m.": [{"F": "10"}, {"F": "p.m."}], -"11p.m.": [{"F": "11"}, {"F": "p.m."}], -"12p.m.": [{"F": "12"}, {"F": "p.m."}], -"1pm": [{"F": "1"}, {"F": "pm", "L": "p.m."}], -"2pm": [{"F": "2"}, {"F": "pm", "L": "p.m."}], -"3pm": [{"F": "3"}, {"F": "pm", "L": "p.m."}], -"4pm": [{"F": "4"}, {"F": "pm", "L": "p.m."}], -"5pm": [{"F": "5"}, {"F": "pm", "L": "p.m."}], -"6pm": [{"F": "6"}, {"F": "pm", "L": "p.m."}], -"7pm": [{"F": "7"}, {"F": "pm", "L": "p.m."}], -"8pm": [{"F": "8"}, {"F": "pm", "L": "p.m."}], -"9pm": [{"F": "9"}, {"F": "pm", "L": "p.m."}], -"10pm": [{"F": "10"}, {"F": "pm", "L": "p.m."}], -"11pm": [{"F": "11"}, {"F": "pm", "L": "p.m."}], -"12pm": [{"F": "12"}, {"F": "pm", "L": "p.m."}], - -"Jan.": [{"F": "Jan.", "L": "Januar"}], -"Feb.": [{"F": "Feb.", "L": "Februar"}], -"Mär.": [{"F": "Mär.", "L": "März"}], -"Apr.": [{"F": "Apr.", "L": "April"}], -"Mai.": [{"F": "Mai.", "L": "Mai"}], -"Jun.": [{"F": "Jun.", "L": "Juni"}], -"Jul.": [{"F": "Jul.", "L": "Juli"}], -"Aug.": [{"F": "Aug.", "L": "August"}], -"Sep.": [{"F": "Sep.", "L": "September"}], -"Sept.": [{"F": "Sept.", "L": "September"}], -"Okt.": [{"F": "Okt.", "L": "Oktober"}], -"Nov.": [{"F": "Nov.", "L": "November"}], -"Dez.": [{"F": "Dez.", "L": "Dezember"}], - -":)": [{"F": ":)"}], -"<3": [{"F": "<3"}], -";)": [{"F": ";)"}], -"(:": [{"F": "(:"}], -":(": [{"F": ":("}], -"-_-": [{"F": "-_-"}], -"=)": [{"F": "=)"}], -":/": [{"F": ":/"}], -":>": [{"F": ":>"}], -";-)": [{"F": ";-)"}], -":Y": [{"F": ":Y"}], -":P": [{"F": ":P"}], -":-P": [{"F": ":-P"}], -":3": [{"F": ":3"}], -"=3": [{"F": "=3"}], -"xD": [{"F": "xD"}], -"^_^": [{"F": "^_^"}], -"=]": [{"F": "=]"}], -"=D": [{"F": "=D"}], -"<333": [{"F": "<333"}], -":))": [{"F": ":))"}], -":0": [{"F": ":0"}], -"-__-": [{"F": "-__-"}], -"xDD": [{"F": "xDD"}], -"o_o": [{"F": "o_o"}], -"o_O": [{"F": "o_O"}], -"V_V": [{"F": "V_V"}], -"=[[": [{"F": "=[["}], -"<33": [{"F": "<33"}], -";p": [{"F": ";p"}], -";D": [{"F": ";D"}], -";-p": [{"F": ";-p"}], -";(": [{"F": ";("}], -":p": [{"F": ":p"}], -":]": [{"F": ":]"}], -":O": [{"F": ":O"}], -":-/": [{"F": ":-/"}], -":-)": [{"F": ":-)"}], -":(((": [{"F": ":((("}], -":((": [{"F": ":(("}], -":')": [{"F": ":')"}], -"(^_^)": [{"F": "(^_^)"}], -"(=": [{"F": "(="}], -"o.O": [{"F": "o.O"}], -"\")": [{"F": "\")"}], -"a.": [{"F": "a."}], -"b.": [{"F": "b."}], -"c.": [{"F": "c."}], -"d.": [{"F": "d."}], -"e.": [{"F": "e."}], -"f.": [{"F": "f."}], -"g.": [{"F": "g."}], -"h.": [{"F": "h."}], -"i.": [{"F": "i."}], -"j.": [{"F": "j."}], -"k.": [{"F": "k."}], -"l.": [{"F": "l."}], -"m.": [{"F": "m."}], -"n.": [{"F": "n."}], -"o.": [{"F": "o."}], -"p.": [{"F": "p."}], -"q.": [{"F": "q."}], -"s.": [{"F": "s."}], -"t.": [{"F": "t."}], -"u.": [{"F": "u."}], -"v.": [{"F": "v."}], -"w.": [{"F": "w."}], -"x.": [{"F": "x."}], -"y.": [{"F": "y."}], -"z.": [{"F": "z."}], - -"z.b.": [{"F": "z.b."}], -"e.h.": [{"F": "I.e."}], -"o.ä.": [{"F": "I.E."}], -"bzw.": [{"F": "bzw."}], -"usw.": [{"F": "usw."}], -"\n": [{"F": "\n", "pos": "SP"}], -"\t": [{"F": "\t", "pos": "SP"}], -" ": [{"F": " ", "pos": "SP"}] -} diff --git a/lang_data/it/suffix.txt b/lang_data/it/suffix.txt deleted file mode 100644 index d8c6bc2c2..000000000 --- a/lang_data/it/suffix.txt +++ /dev/null @@ -1,26 +0,0 @@ -, -\" -\) -\] -\} -\* -\! -\? -% -\$ -> -: -; -' -” -'' -'s -'S -’s -’S -’ -\.\. -\.\.\. -\.\.\.\. -(?<=[a-z0-9)\]"'%\)])\. -(?<=[0-9])km diff --git a/lang_data/it/tag_map.json b/lang_data/it/tag_map.json deleted file mode 100644 index 92f11e457..000000000 --- a/lang_data/it/tag_map.json +++ /dev/null @@ -1,44 +0,0 @@ -{ -"S": {"pos": "NOUN"}, -"E": {"pos": "ADP"}, -"RD": {"pos": "DET"}, -"V": {"pos": "VERB"}, -"_": {"pos": "NO_TAG"}, -"A": {"pos": "ADJ"}, -"SP": {"pos": "PROPN"}, -"FF": {"pos": "PUNCT"}, -"FS": {"pos": "PUNCT"}, -"B": {"pos": "ADV"}, -"CC": {"pos": "CONJ"}, -"FB": {"pos": "PUNCT"}, -"VA": {"pos": "AUX"}, -"PC": {"pos": "PRON"}, -"N": {"pos": "NUM"}, -"RI": {"pos": "DET"}, -"PR": {"pos": "PRON"}, -"CS": {"pos": "SCONJ"}, -"BN": {"pos": "ADV"}, -"AP": {"pos": "DET"}, -"VM": {"pos": "AUX"}, -"DI": {"pos": "DET"}, -"FC": {"pos": "PUNCT"}, -"PI": {"pos": "PRON"}, -"DD": {"pos": "DET"}, -"DQ": {"pos": "DET"}, -"PQ": {"pos": "PRON"}, -"PD": {"pos": "PRON"}, -"NO": {"pos": "ADJ"}, -"PE": {"pos": "PRON"}, -"T": {"pos": "DET"}, -"X": {"pos": "SYM"}, -"SW": {"pos": "X"}, -"NO": {"pos": "PRON"}, -"I": {"pos": "INTJ"}, -"X": {"pos": "X"}, -"DR": {"pos": "DET"}, -"EA": {"pos": "ADP"}, -"PP": {"pos": "PRON"}, -"X": {"pos": "NUM"}, -"DE": {"pos": "DET"}, -"X": {"pos": "PART"} -} diff --git a/lang_data/zh/gazetteer.json b/lang_data/zh/gazetteer.json deleted file mode 100644 index d52fed839..000000000 --- a/lang_data/zh/gazetteer.json +++ /dev/null @@ -1,194 +0,0 @@ -{ - "Reddit": [ - "PRODUCT", - {}, - [ - [{"lower": "reddit"}] - ] - ], - "SeptemberElevenAttacks": [ - "EVENT", - {}, - [ - [ - {"orth": "9/11"} - ], - [ - {"lower": "september"}, - {"orth": "11"} - ] - ] - ], - "Linux": [ - "PRODUCT", - {}, - [ - [{"lower": "linux"}] - ] - ], - "Haskell": [ - "PRODUCT", - {}, - [ - [{"lower": "haskell"}] - ] - ], - "HaskellCurry": [ - "PERSON", - {}, - [ - [ - {"lower": "haskell"}, - {"lower": "curry"} - ] - ] - ], - "Javascript": [ - "PRODUCT", - {}, - [ - [{"lower": "javascript"}] - ] - ], - "CSS": [ - "PRODUCT", - {}, - [ - [{"lower": "css"}], - [{"lower": "css3"}] - ] - ], - "displaCy": [ - "PRODUCT", - {}, - [ - [{"lower": "displacy"}] - ] - ], - "spaCy": [ - "PRODUCT", - {}, - [ - [{"orth": "spaCy"}] - ] - ], - - "HTML": [ - "PRODUCT", - {}, - [ - [{"lower": "html"}], - [{"lower": "html5"}] - ] - ], - "Python": [ - "PRODUCT", - {}, - [ - [{"orth": "Python"}] - ] - ], - "Ruby": [ - "PRODUCT", - {}, - [ - [{"orth": "Ruby"}] - ] - ], - "Digg": [ - "PRODUCT", - {}, - [ - [{"lower": "digg"}] - ] - ], - "FoxNews": [ - "ORG", - {}, - [ - [{"orth": "Fox"}], - [{"orth": "News"}] - ] - ], - "Google": [ - "ORG", - {}, - [ - [{"lower": "google"}] - ] - ], - "Mac": [ - "PRODUCT", - {}, - [ - [{"lower": "mac"}] - ] - ], - "Wikipedia": [ - "PRODUCT", - {}, - [ - [{"lower": "wikipedia"}] - ] - ], - "Windows": [ - "PRODUCT", - {}, - [ - [{"orth": "Windows"}] - ] - ], - "Dell": [ - "ORG", - {}, - [ - [{"lower": "dell"}] - ] - ], - "Facebook": [ - "ORG", - {}, - [ - [{"lower": "facebook"}] - ] - ], - "Blizzard": [ - "ORG", - {}, - [ - [{"orth": "Blizzard"}] - ] - ], - "Ubuntu": [ - "ORG", - {}, - [ - [{"orth": "Ubuntu"}] - ] - ], - "Youtube": [ - "PRODUCT", - {}, - [ - [{"lower": "youtube"}] - ] - ], - "false_positives": [ - null, - {}, - [ - [{"orth": "Shit"}], - [{"orth": "Weed"}], - [{"orth": "Cool"}], - [{"orth": "Btw"}], - [{"orth": "Bah"}], - [{"orth": "Bullshit"}], - [{"orth": "Lol"}], - [{"orth": "Yo"}, {"lower": "dawg"}], - [{"orth": "Yay"}], - [{"orth": "Ahh"}], - [{"orth": "Yea"}], - [{"orth": "Bah"}] - ] - ] -} diff --git a/lang_data/zh/infix.txt b/lang_data/zh/infix.txt deleted file mode 100644 index aa36da8e9..000000000 --- a/lang_data/zh/infix.txt +++ /dev/null @@ -1,6 +0,0 @@ -\.\.\. -(?<=[a-z])\.(?=[A-Z]) -(?<=[a-zA-Z])-(?=[a-zA-z]) -(?<=[a-zA-Z])--(?=[a-zA-z]) -(?<=[0-9])-(?=[0-9]) -(?<=[A-Za-z]),(?=[A-Za-z]) diff --git a/lang_data/zh/morphs.json b/lang_data/zh/morphs.json deleted file mode 100644 index 0967ef424..000000000 --- a/lang_data/zh/morphs.json +++ /dev/null @@ -1 +0,0 @@ -{} diff --git a/lang_data/zh/prefix.txt b/lang_data/zh/prefix.txt deleted file mode 100644 index 48c4fc549..000000000 --- a/lang_data/zh/prefix.txt +++ /dev/null @@ -1,21 +0,0 @@ -, -" -( -[ -{ -* -< -$ -£ -“ -' -`` -` -# -US$ -C$ -A$ -a- -‘ -.... -... diff --git a/lang_data/zh/specials.json b/lang_data/zh/specials.json deleted file mode 100644 index 0967ef424..000000000 --- a/lang_data/zh/specials.json +++ /dev/null @@ -1 +0,0 @@ -{} diff --git a/lang_data/zh/suffix.txt b/lang_data/zh/suffix.txt deleted file mode 100644 index d8c6bc2c2..000000000 --- a/lang_data/zh/suffix.txt +++ /dev/null @@ -1,26 +0,0 @@ -, -\" -\) -\] -\} -\* -\! -\? -% -\$ -> -: -; -' -” -'' -'s -'S -’s -’S -’ -\.\. -\.\.\. -\.\.\.\. -(?<=[a-z0-9)\]"'%\)])\. -(?<=[0-9])km diff --git a/lang_data/zh/tag_map.json b/lang_data/zh/tag_map.json deleted file mode 100644 index afc0c722c..000000000 --- a/lang_data/zh/tag_map.json +++ /dev/null @@ -1,43 +0,0 @@ -{ - "NR": {"pos": "PROPN"}, - "AD": {"pos": "ADV"}, - "NN": {"pos": "NOUN"}, - "CD": {"pos": "NUM"}, - "DEG": {"pos": "PART"}, - "PN": {"pos": "PRON"}, - "M": {"pos": "PART"}, - "JJ": {"pos": "ADJ"}, - "DEC": {"pos": "PART"}, - "NT": {"pos": "NOUN"}, - "DT": {"pos": "DET"}, - "LC": {"pos": "PART"}, - "CC": {"pos": "CONJ"}, - "AS": {"pos": "PART"}, - "SP": {"pos": "PART"}, - "IJ": {"pos": "INTJ"}, - "OD": {"pos": "NUM"}, - "MSP": {"pos": "PART"}, - "CS": {"pos": "SCONJ"}, - "ETC": {"pos": "PART"}, - "DEV": {"pos": "PART"}, - "BA": {"pos": "AUX"}, - "SB": {"pos": "AUX"}, - "DER": {"pos": "PART"}, - "LB": {"pos": "AUX"}, - "P": {"pos": "ADP"}, - "URL": {"pos": "SYM"}, - "FRAG": {"pos": "X"}, - "X": {"pos": "X"}, - "ON": {"pos": "X"}, - "FW": {"pos": "X"}, - "VC": {"pos": "VERB"}, - "VV": {"pos": "VERB"}, - "VA": {"pos": "VERB"}, - "VE": {"pos": "VERB"}, - "PU": {"pos": "PUNCT"}, - "SP": {"pos": "SPACE"}, - "NP": {"pos": "X"}, - "_": {"pos": "X"}, - "VP": {"pos": "X"}, - "CHAR": {"pos": "X"} -}