mirror of
https://github.com/explosion/spaCy.git
synced 2024-11-10 19:57:17 +03:00
* Refactor for more universal spacy
This commit is contained in:
parent
c5a27d1821
commit
494da25872
3
lang_data/de/infix.txt
Normal file
3
lang_data/de/infix.txt
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
\.\.\.
|
||||||
|
(?<=[a-z])\.(?=[A-Z])
|
||||||
|
(?<=[a-zA-Z])-(?=[a-zA-z])
|
0
lang_data/de/morphs.json
Normal file
0
lang_data/de/morphs.json
Normal file
21
lang_data/de/prefix.txt
Normal file
21
lang_data/de/prefix.txt
Normal file
|
@ -0,0 +1,21 @@
|
||||||
|
,
|
||||||
|
"
|
||||||
|
(
|
||||||
|
[
|
||||||
|
{
|
||||||
|
*
|
||||||
|
<
|
||||||
|
$
|
||||||
|
£
|
||||||
|
“
|
||||||
|
'
|
||||||
|
``
|
||||||
|
`
|
||||||
|
#
|
||||||
|
US$
|
||||||
|
C$
|
||||||
|
A$
|
||||||
|
a-
|
||||||
|
‘
|
||||||
|
....
|
||||||
|
...
|
3
lang_data/de/sample.txt
Normal file
3
lang_data/de/sample.txt
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
Biografie: Ein Spiel ist ein Theaterstück des Schweizer Schriftstellers Max Frisch, das 1967 entstand und am 1. Februar 1968 im Schauspielhaus Zürich uraufgeführt wurde. 1984 legte Frisch eine überarbeitete Neufassung vor. Das von Frisch als Komödie bezeichnete Stück greift eines seiner zentralen Themen auf: die Möglichkeit oder Unmöglichkeit des Menschen, seine Identität zu verändern.
|
||||||
|
|
||||||
|
Mit Biografie: Ein Spiel wandte sich Frisch von der Parabelform seiner Erfolgsstücke Biedermann und die Brandstifter und Andorra ab und postulierte eine „Dramaturgie der Permutation“. Darin sollte nicht, wie im klassischen Theater, Sinn und Schicksal im Mittelpunkt stehen, sondern die Zufälligkeit von Ereignissen und die Möglichkeit ihrer Variation. Dennoch handelt Biografie: Ein Spiel gerade von der Unmöglichkeit seines Protagonisten, seinen Lebenslauf grundlegend zu verändern. Frisch empfand die Wirkung des Stücks im Nachhinein als zu fatalistisch und die Umsetzung seiner theoretischen Absichten als nicht geglückt. Obwohl das Stück 1968 als unpolitisch und nicht zeitgemäß kritisiert wurde und auch später eine geteilte Rezeption erfuhr, gehört es an deutschsprachigen Bühnen zu den häufiger aufgeführten Stücken Frischs.
|
149
lang_data/de/specials.json
Normal file
149
lang_data/de/specials.json
Normal file
|
@ -0,0 +1,149 @@
|
||||||
|
{
|
||||||
|
"a.m.": [{"F": "a.m."}],
|
||||||
|
"p.m.": [{"F": "p.m."}],
|
||||||
|
|
||||||
|
"1a.m.": [{"F": "1"}, {"F": "a.m."}],
|
||||||
|
"2a.m.": [{"F": "2"}, {"F": "a.m."}],
|
||||||
|
"3a.m.": [{"F": "3"}, {"F": "a.m."}],
|
||||||
|
"4a.m.": [{"F": "4"}, {"F": "a.m."}],
|
||||||
|
"5a.m.": [{"F": "5"}, {"F": "a.m."}],
|
||||||
|
"6a.m.": [{"F": "6"}, {"F": "a.m."}],
|
||||||
|
"7a.m.": [{"F": "7"}, {"F": "a.m."}],
|
||||||
|
"8a.m.": [{"F": "8"}, {"F": "a.m."}],
|
||||||
|
"9a.m.": [{"F": "9"}, {"F": "a.m."}],
|
||||||
|
"10a.m.": [{"F": "10"}, {"F": "a.m."}],
|
||||||
|
"11a.m.": [{"F": "11"}, {"F": "a.m."}],
|
||||||
|
"12a.m.": [{"F": "12"}, {"F": "a.m."}],
|
||||||
|
"1am": [{"F": "1"}, {"F": "am", "L": "a.m."}],
|
||||||
|
"2am": [{"F": "2"}, {"F": "am", "L": "a.m."}],
|
||||||
|
"3am": [{"F": "3"}, {"F": "am", "L": "a.m."}],
|
||||||
|
"4am": [{"F": "4"}, {"F": "am", "L": "a.m."}],
|
||||||
|
"5am": [{"F": "5"}, {"F": "am", "L": "a.m."}],
|
||||||
|
"6am": [{"F": "6"}, {"F": "am", "L": "a.m."}],
|
||||||
|
"7am": [{"F": "7"}, {"F": "am", "L": "a.m."}],
|
||||||
|
"8am": [{"F": "8"}, {"F": "am", "L": "a.m."}],
|
||||||
|
"9am": [{"F": "9"}, {"F": "am", "L": "a.m."}],
|
||||||
|
"10am": [{"F": "10"}, {"F": "am", "L": "a.m."}],
|
||||||
|
"11am": [{"F": "11"}, {"F": "am", "L": "a.m."}],
|
||||||
|
"12am": [{"F": "12"}, {"F": "am", "L": "a.m."}],
|
||||||
|
|
||||||
|
|
||||||
|
"1p.m.": [{"F": "1"}, {"F": "p.m."}],
|
||||||
|
"2p.m.": [{"F": "2"}, {"F": "p.m."}],
|
||||||
|
"3p.m.": [{"F": "3"}, {"F": "p.m."}],
|
||||||
|
"4p.m.": [{"F": "4"}, {"F": "p.m."}],
|
||||||
|
"5p.m.": [{"F": "5"}, {"F": "p.m."}],
|
||||||
|
"6p.m.": [{"F": "6"}, {"F": "p.m."}],
|
||||||
|
"7p.m.": [{"F": "7"}, {"F": "p.m."}],
|
||||||
|
"8p.m.": [{"F": "8"}, {"F": "p.m."}],
|
||||||
|
"9p.m.": [{"F": "9"}, {"F": "p.m."}],
|
||||||
|
"10p.m.": [{"F": "10"}, {"F": "p.m."}],
|
||||||
|
"11p.m.": [{"F": "11"}, {"F": "p.m."}],
|
||||||
|
"12p.m.": [{"F": "12"}, {"F": "p.m."}],
|
||||||
|
"1pm": [{"F": "1"}, {"F": "pm", "L": "p.m."}],
|
||||||
|
"2pm": [{"F": "2"}, {"F": "pm", "L": "p.m."}],
|
||||||
|
"3pm": [{"F": "3"}, {"F": "pm", "L": "p.m."}],
|
||||||
|
"4pm": [{"F": "4"}, {"F": "pm", "L": "p.m."}],
|
||||||
|
"5pm": [{"F": "5"}, {"F": "pm", "L": "p.m."}],
|
||||||
|
"6pm": [{"F": "6"}, {"F": "pm", "L": "p.m."}],
|
||||||
|
"7pm": [{"F": "7"}, {"F": "pm", "L": "p.m."}],
|
||||||
|
"8pm": [{"F": "8"}, {"F": "pm", "L": "p.m."}],
|
||||||
|
"9pm": [{"F": "9"}, {"F": "pm", "L": "p.m."}],
|
||||||
|
"10pm": [{"F": "10"}, {"F": "pm", "L": "p.m."}],
|
||||||
|
"11pm": [{"F": "11"}, {"F": "pm", "L": "p.m."}],
|
||||||
|
"12pm": [{"F": "12"}, {"F": "pm", "L": "p.m."}],
|
||||||
|
|
||||||
|
"Jan.": [{"F": "Jan.", "L": "Januar"}],
|
||||||
|
"Feb.": [{"F": "Feb.", "L": "Februar"}],
|
||||||
|
"Mär.": [{"F": "Mär.", "L": "März"}],
|
||||||
|
"Apr.": [{"F": "Apr.", "L": "April"}],
|
||||||
|
"Mai.": [{"F": "Mai.", "L": "Mai"}],
|
||||||
|
"Jun.": [{"F": "Jun.", "L": "Juni"}],
|
||||||
|
"Jul.": [{"F": "Jul.", "L": "Juli"}],
|
||||||
|
"Aug.": [{"F": "Aug.", "L": "August"}],
|
||||||
|
"Sep.": [{"F": "Sep.", "L": "September"}],
|
||||||
|
"Sept.": [{"F": "Sept.", "L": "September"}],
|
||||||
|
"Okt.": [{"F": "Okt.", "L": "Oktober"}],
|
||||||
|
"Nov.": [{"F": "Nov.", "L": "November"}],
|
||||||
|
"Dez.": [{"F": "Dez.", "L": "Dezember"}],
|
||||||
|
|
||||||
|
":)": [{"F": ":)"}],
|
||||||
|
"<3": [{"F": "<3"}],
|
||||||
|
";)": [{"F": ";)"}],
|
||||||
|
"(:": [{"F": "(:"}],
|
||||||
|
":(": [{"F": ":("}],
|
||||||
|
"-_-": [{"F": "-_-"}],
|
||||||
|
"=)": [{"F": "=)"}],
|
||||||
|
":/": [{"F": ":/"}],
|
||||||
|
":>": [{"F": ":>"}],
|
||||||
|
";-)": [{"F": ";-)"}],
|
||||||
|
":Y": [{"F": ":Y"}],
|
||||||
|
":P": [{"F": ":P"}],
|
||||||
|
":-P": [{"F": ":-P"}],
|
||||||
|
":3": [{"F": ":3"}],
|
||||||
|
"=3": [{"F": "=3"}],
|
||||||
|
"xD": [{"F": "xD"}],
|
||||||
|
"^_^": [{"F": "^_^"}],
|
||||||
|
"=]": [{"F": "=]"}],
|
||||||
|
"=D": [{"F": "=D"}],
|
||||||
|
"<333": [{"F": "<333"}],
|
||||||
|
":))": [{"F": ":))"}],
|
||||||
|
":0": [{"F": ":0"}],
|
||||||
|
"-__-": [{"F": "-__-"}],
|
||||||
|
"xDD": [{"F": "xDD"}],
|
||||||
|
"o_o": [{"F": "o_o"}],
|
||||||
|
"o_O": [{"F": "o_O"}],
|
||||||
|
"V_V": [{"F": "V_V"}],
|
||||||
|
"=[[": [{"F": "=[["}],
|
||||||
|
"<33": [{"F": "<33"}],
|
||||||
|
";p": [{"F": ";p"}],
|
||||||
|
";D": [{"F": ";D"}],
|
||||||
|
";-p": [{"F": ";-p"}],
|
||||||
|
";(": [{"F": ";("}],
|
||||||
|
":p": [{"F": ":p"}],
|
||||||
|
":]": [{"F": ":]"}],
|
||||||
|
":O": [{"F": ":O"}],
|
||||||
|
":-/": [{"F": ":-/"}],
|
||||||
|
":-)": [{"F": ":-)"}],
|
||||||
|
":(((": [{"F": ":((("}],
|
||||||
|
":((": [{"F": ":(("}],
|
||||||
|
":')": [{"F": ":')"}],
|
||||||
|
"(^_^)": [{"F": "(^_^)"}],
|
||||||
|
"(=": [{"F": "(="}],
|
||||||
|
"o.O": [{"F": "o.O"}],
|
||||||
|
"\")": [{"F": "\")"}],
|
||||||
|
"a.": [{"F": "a."}],
|
||||||
|
"b.": [{"F": "b."}],
|
||||||
|
"c.": [{"F": "c."}],
|
||||||
|
"d.": [{"F": "d."}],
|
||||||
|
"e.": [{"F": "e."}],
|
||||||
|
"f.": [{"F": "f."}],
|
||||||
|
"g.": [{"F": "g."}],
|
||||||
|
"h.": [{"F": "h."}],
|
||||||
|
"i.": [{"F": "i."}],
|
||||||
|
"j.": [{"F": "j."}],
|
||||||
|
"k.": [{"F": "k."}],
|
||||||
|
"l.": [{"F": "l."}],
|
||||||
|
"m.": [{"F": "m."}],
|
||||||
|
"n.": [{"F": "n."}],
|
||||||
|
"o.": [{"F": "o."}],
|
||||||
|
"p.": [{"F": "p."}],
|
||||||
|
"q.": [{"F": "q."}],
|
||||||
|
"s.": [{"F": "s."}],
|
||||||
|
"t.": [{"F": "t."}],
|
||||||
|
"u.": [{"F": "u."}],
|
||||||
|
"v.": [{"F": "v."}],
|
||||||
|
"w.": [{"F": "w."}],
|
||||||
|
"x.": [{"F": "x."}],
|
||||||
|
"y.": [{"F": "y."}],
|
||||||
|
"z.": [{"F": "z."}],
|
||||||
|
|
||||||
|
"z.b.": [{"F": "z.b."}],
|
||||||
|
"e.h.": [{"F": "I.e."}],
|
||||||
|
"o.ä.": [{"F": "I.E."}],
|
||||||
|
"bzw.": [{"F": "bzw."}],
|
||||||
|
"usw.": [{"F": "usw."}],
|
||||||
|
"\n": [{"F": "\n", "pos": "SP"}],
|
||||||
|
"\t": [{"F": "\t", "pos": "SP"}],
|
||||||
|
" ": [{"F": " ", "pos": "SP"}]
|
||||||
|
}
|
26
lang_data/de/suffix.txt
Normal file
26
lang_data/de/suffix.txt
Normal file
|
@ -0,0 +1,26 @@
|
||||||
|
,
|
||||||
|
\"
|
||||||
|
\)
|
||||||
|
\]
|
||||||
|
\}
|
||||||
|
\*
|
||||||
|
\!
|
||||||
|
\?
|
||||||
|
%
|
||||||
|
\$
|
||||||
|
>
|
||||||
|
:
|
||||||
|
;
|
||||||
|
'
|
||||||
|
”
|
||||||
|
''
|
||||||
|
's
|
||||||
|
'S
|
||||||
|
’s
|
||||||
|
’S
|
||||||
|
’
|
||||||
|
\.\.
|
||||||
|
\.\.\.
|
||||||
|
\.\.\.\.
|
||||||
|
(?<=[a-z0-9)\]"'%\)])\.
|
||||||
|
(?<=[0-9])km
|
56
lang_data/de/tag_map.json
Normal file
56
lang_data/de/tag_map.json
Normal file
|
@ -0,0 +1,56 @@
|
||||||
|
{
|
||||||
|
"$(": {"pos": "PUNCT", "PunctType": "Brck"},
|
||||||
|
"$,": {"pos": "PUNCT", "PunctType": "Comm"},
|
||||||
|
"$.": {"pos": "PUNCT", "PunctType": "Peri"},
|
||||||
|
"ADJA": {"pos": "ADJ"},
|
||||||
|
"ADJD": {"pos": "ADJ", "Variant": "Short"},
|
||||||
|
"ADV": {"pos": "ADV"},
|
||||||
|
"APPO": {"pos": "ADP", "AdpType": "Post"},
|
||||||
|
"APPR": {"pos": "ADP", "AdpType": "Prep"},
|
||||||
|
"APPRART": {"pos": "ADP", "AdpType": "Prep", "PronType": "Art"},
|
||||||
|
"APZR": {"pos": "ADP", "AdpType": "Circ"},
|
||||||
|
"ART": {"pos": "DET", "PronType": "Art"},
|
||||||
|
"CARD": {"pos": "NUM", "NumType": "Card"},
|
||||||
|
"FM": {"pos": "X", "Foreign": "Yes"},
|
||||||
|
"ITJ": {"pos": "INTJ"},
|
||||||
|
"KOKOM": {"pos": "CONJ", "ConjType": "Comp"},
|
||||||
|
"KON": {"pos": "CONJ"},
|
||||||
|
"KOUI": {"pos": "SCONJ"},
|
||||||
|
"KOUS": {"pos": "SCONJ"},
|
||||||
|
"NE": {"pos": "PROPN"},
|
||||||
|
"NN": {"pos": "NOUN"},
|
||||||
|
"PAV": {"pos": "ADV", "PronType": "Dem"},
|
||||||
|
"PDAT": {"pos": "DET", "PronType": "Dem"},
|
||||||
|
"PDS": {"pos": "PRON", "PronType": "Dem"},
|
||||||
|
"PIAT": {"pos": "DET", "PronType": "Ind,Neg,Tot"},
|
||||||
|
"PIDAT": {"pos": "DET", "AdjType": "Pdt", "PronType": "Ind,Neg,Tot"},
|
||||||
|
"PIS": {"pos": "PRON", "PronType": "Ind,Neg,Tot"},
|
||||||
|
"PPER": {"pos": "PRON", "PronType": "Prs"},
|
||||||
|
"PPOSAT": {"pos": "DET", "Poss": "Yes", "PronType": "Prs"},
|
||||||
|
"PPOSS": {"pos": "PRON", "Poss": "Yes", "PronType": "Prs"},
|
||||||
|
"PRELAT": {"pos": "DET", "PronType": "Rel"},
|
||||||
|
"PRELS": {"pos": "PRON", "PronType": "Rel"},
|
||||||
|
"PRF": {"pos": "PRON", "PronType": "Prs", "Reflex": "Yes"},
|
||||||
|
"PTKA": {"pos": "PART"},
|
||||||
|
"PTKANT": {"pos": "PART", "PartType": "Res"},
|
||||||
|
"PTKNEG": {"pos": "PART", "Negative": "Neg"},
|
||||||
|
"PTKVZ": {"pos": "PART", "PartType": "Vbp"},
|
||||||
|
"PTKZU": {"pos": "PART", "PartType": "Inf"},
|
||||||
|
"PWAT": {"pos": "DET", "PronType": "Int"},
|
||||||
|
"PWAV": {"pos": "ADV", "PronType": "Int"},
|
||||||
|
"PWS": {"pos": "PRON", "PronType": "Int"},
|
||||||
|
"TRUNC": {"pos": "X", "Hyph": "Yes"},
|
||||||
|
"VAFIN": {"pos": "AUX", "Mood": "Ind", "VerbForm": "Fin"},
|
||||||
|
"VAIMP": {"pos": "AUX", "Mood": "Imp", "VerbForm": "Fin"},
|
||||||
|
"VAINF": {"pos": "AUX", "VerbForm": "Inf"},
|
||||||
|
"VAPP": {"pos": "AUX", "Aspect": "Perf", "VerbForm": "Part"},
|
||||||
|
"VMFIN": {"pos": "VERB", "Mood": "Ind", "VerbForm": "Fin", "VerbType": "Mod"},
|
||||||
|
"VMINF": {"pos": "VERB", "VerbForm": "Inf", "VerbType": "Mod"},
|
||||||
|
"VMPP": {"pos": "VERB", "Aspect": "Perf", "VerbForm": "Part", "VerbType": "Mod"},
|
||||||
|
"VVFIN": {"pos": "VERB", "Mood": "Ind", "VerbForm": "Fin"},
|
||||||
|
"VVIMP": {"pos": "VERB", "Mood": "Imp", "VerbForm": "Fin"},
|
||||||
|
"VVINF": {"pos": "VERB", "VerbForm": "Inf"},
|
||||||
|
"VVIZU": {"pos": "VERB", "VerbForm": "Inf"},
|
||||||
|
"VVPP": {"pos": "VERB", "Aspect": "Perf", "VerbForm": "Part"},
|
||||||
|
"XY": {"pos": "X"}
|
||||||
|
}
|
51
lang_data/en/tag_map.json
Normal file
51
lang_data/en/tag_map.json
Normal file
|
@ -0,0 +1,51 @@
|
||||||
|
{
|
||||||
|
".": {"pos": "punc", "punctype": "peri"},
|
||||||
|
",": {"pos": "punc", "punctype": "comm"},
|
||||||
|
"-LRB-": {"pos": "punc", "punctype": "brck", "puncside": "ini"},
|
||||||
|
"-RRB-": {"pos": "punc", "punctype": "brck", "puncside": "fin"},
|
||||||
|
"``": {"pos": "punc", "punctype": "quot", "puncside": "ini"},
|
||||||
|
"\"\"": {"pos": "punc", "punctype": "quot", "puncside": "fin"},
|
||||||
|
":": {"pos": "punc"},
|
||||||
|
"$": {"pos": "sym", "other": {"symtype": "currency"}},
|
||||||
|
"#": {"pos": "sym", "other": {"symtype": "numbersign"}},
|
||||||
|
"AFX": {"pos": "adj", "hyph": "hyph"},
|
||||||
|
"CC": {"pos": "conj", "conjtype": "coor"},
|
||||||
|
"CD": {"pos": "num", "numtype": "card"},
|
||||||
|
"DT": {"pos": "adj", "prontype": "prn"},
|
||||||
|
"EX": {"pos": "adv", "advtype": "ex"},
|
||||||
|
"FW": {"foreign": "foreign"},
|
||||||
|
"HYPH": {"pos": "punc", "punctype": "dash"},
|
||||||
|
"IN": {"pos": "adp"},
|
||||||
|
"JJ": {"pos": "adj", "degree": "pos"},
|
||||||
|
"JJR": {"pos": "adj", "degree": "comp"},
|
||||||
|
"JJS": {"pos": "adj", "degree": "sup"},
|
||||||
|
"LS": {"pos": "punc", "numtype": "ord"},
|
||||||
|
"MD": {"pos": "verb", "verbtype": "mod"},
|
||||||
|
"NIL": {},
|
||||||
|
"NN": {"pos": "noun", "number": "sing"},
|
||||||
|
"NNP": {"pos": "noun", "nountype": "prop", "number": "sing"},
|
||||||
|
"NNPS": {"pos": "noun", "nountype": "prop", "number": "plur"},
|
||||||
|
"NNS": {"pos": "noun", "number": "plur"},
|
||||||
|
"PDT": {"pos": "adj", "adjtype": "pdt", "prontype": "prn"},
|
||||||
|
"POS": {"pos": "part", "poss": "poss"},
|
||||||
|
"PRP": {"pos": "noun", "prontype": "prs"},
|
||||||
|
"PRP$": {"pos": "adj", "prontype": "prs", "poss": "poss"},
|
||||||
|
"RB": {"pos": "adv", "degree": "pos"},
|
||||||
|
"RBR": {"pos": "adv", "degree": "comp"},
|
||||||
|
"RBS": {"pos": "adv", "degree": "sup"},
|
||||||
|
"RP": {"pos": "part"},
|
||||||
|
"SYM": {"pos": "sym"},
|
||||||
|
"TO": {"pos": "part", "parttype": "inf", "verbform": "inf"},
|
||||||
|
"UH": {"pos": "int"},
|
||||||
|
"VB": {"pos": "verb", "verbform": "inf"},
|
||||||
|
"VBD": {"pos": "verb", "verbform": "fin", "tense": "past"},
|
||||||
|
"VBG": {"pos": "verb", "verbform": "part", "tense": "pres", "aspect": "prog"},
|
||||||
|
"VBN": {"pos": "verb", "verbform": "part", "tense": "past", "aspect": "perf"},
|
||||||
|
"VBP": {"pos": "verb", "verbform": "fin", "tense": "pres"},
|
||||||
|
"VBZ": {"pos": "verb", "verbform": "fin", "tense": "pres", "number": "sing", "person": 3},
|
||||||
|
"WDT": {"pos": "adj", "prontype": "int|rel"},
|
||||||
|
"WP": {"pos": "noun", "prontype": "int|rel"},
|
||||||
|
"WP$": {"pos": "adj", "poss": "poss", "prontype": "int|rel"},
|
||||||
|
"WRB": {"pos": "adv", "prontype": "int|rel"},
|
||||||
|
"SP": {"pos": "space"}
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user