Fix LEMMA in exceptions and morph rules

This commit is contained in:
ines 2017-05-08 19:57:36 +02:00
parent 2edc0aee12
commit 41a322c733
5 changed files with 25 additions and 28 deletions

View File

@ -2,13 +2,13 @@
from __future__ import unicode_literals from __future__ import unicode_literals
from ..symbols import ORTH, LEMMA, TAG, NORM from ..symbols import ORTH, LEMMA, TAG, NORM
from ..deprecated import PRON_LEMMA, DET_LEMMA from ..deprecated import PRON_LEMMA
_exc = { _exc = {
"auf'm": [ "auf'm": [
{ORTH: "auf", LEMMA: "auf"}, {ORTH: "auf", LEMMA: "auf"},
{ORTH: "'m", LEMMA: DET_LEMMA, NORM: "dem" }], {ORTH: "'m", LEMMA: "der", NORM: "dem" }],
"du's": [ "du's": [
{ORTH: "du", LEMMA: PRON_LEMMA, TAG: "PPER"}, {ORTH: "du", LEMMA: PRON_LEMMA, TAG: "PPER"},
@ -20,7 +20,7 @@ _exc = {
"hinter'm": [ "hinter'm": [
{ORTH: "hinter", LEMMA: "hinter"}, {ORTH: "hinter", LEMMA: "hinter"},
{ORTH: "'m", LEMMA: DET_LEMMA, NORM: "dem"}], {ORTH: "'m", LEMMA: "der", NORM: "dem"}],
"ich's": [ "ich's": [
{ORTH: "ich", LEMMA: PRON_LEMMA, TAG: "PPER"}, {ORTH: "ich", LEMMA: PRON_LEMMA, TAG: "PPER"},
@ -36,11 +36,11 @@ _exc = {
"unter'm": [ "unter'm": [
{ORTH: "unter", LEMMA: "unter"}, {ORTH: "unter", LEMMA: "unter"},
{ORTH: "'m", LEMMA: DET_LEMMA, NORM: "dem"}], {ORTH: "'m", LEMMA: "der", NORM: "dem"}],
"vor'm": [ "vor'm": [
{ORTH: "vor", LEMMA: "vor"}, {ORTH: "vor", LEMMA: "vor"},
{ORTH: "'m", LEMMA: DET_LEMMA, NORM: "dem"}], {ORTH: "'m", LEMMA: "der", NORM: "dem"}],
"wir's": [ "wir's": [
{ORTH: "wir", LEMMA: PRON_LEMMA, TAG: "PPER"}, {ORTH: "wir", LEMMA: PRON_LEMMA, TAG: "PPER"},
@ -48,7 +48,7 @@ _exc = {
"über'm": [ "über'm": [
{ORTH: "über", LEMMA: "über"}, {ORTH: "über", LEMMA: "über"},
{ORTH: "'m", LEMMA: DET_LEMMA, NORM: "dem"}] {ORTH: "'m", LEMMA: "der", NORM: "dem"}]
} }
@ -57,12 +57,10 @@ for exc_data in [
{ORTH: "'s", LEMMA: PRON_LEMMA, TAG: "PPER"}, {ORTH: "'s", LEMMA: PRON_LEMMA, TAG: "PPER"},
{ORTH: "S'", LEMMA: PRON_LEMMA, TAG: "PPER"}, {ORTH: "S'", LEMMA: PRON_LEMMA, TAG: "PPER"},
{ORTH: "s'", LEMMA: PRON_LEMMA, TAG: "PPER"}, {ORTH: "s'", LEMMA: PRON_LEMMA, TAG: "PPER"},
{ORTH: "'n", LEMMA: DET_LEMMA, NORM: "ein"}, {ORTH: "'n", LEMMA: "ein", NORM: "ein"},
{ORTH: "'ne", LEMMA: DET_LEMMA, NORM: "eine"}, {ORTH: "'ne", LEMMA: "eine", NORM: "eine"},
{ORTH: "'nen", LEMMA: DET_LEMMA, NORM: "einen"}, {ORTH: "'nen", LEMMA: "ein", NORM: "einen"},
{ORTH: "'nem", LEMMA: DET_LEMMA, NORM: "einem"}, {ORTH: "'nem", LEMMA: "ein", NORM: "einem"},
{ORTH: "'s", LEMMA: PRON_LEMMA, TAG: "PPER"},
{ORTH: "Abb.", LEMMA: "Abbildung"}, {ORTH: "Abb.", LEMMA: "Abbildung"},
{ORTH: "Abk.", LEMMA: "Abkürzung"}, {ORTH: "Abk.", LEMMA: "Abkürzung"},
{ORTH: "Abt.", LEMMA: "Abteilung"}, {ORTH: "Abt.", LEMMA: "Abteilung"},

View File

@ -7,7 +7,6 @@ from .cli import download
PRON_LEMMA = "-PRON-" PRON_LEMMA = "-PRON-"
DET_LEMMA = "-DET-"
def depr_model_download(lang): def depr_model_download(lang):

View File

@ -2,7 +2,7 @@
from __future__ import unicode_literals from __future__ import unicode_literals
from ..symbols import ORTH, LEMMA, TAG, NORM, ADP, DET from ..symbols import ORTH, LEMMA, TAG, NORM, ADP, DET
from ..deprecated import PRON_LEMMA, DET_LEMMA from ..deprecated import PRON_LEMMA
_exc = { _exc = {
@ -32,11 +32,11 @@ _exc = {
"pal": [ "pal": [
{ORTH: "pa", LEMMA: "para"}, {ORTH: "pa", LEMMA: "para"},
{ORTH: "l", LEMMA: DET_LEMMA, NORM: "el"}], {ORTH: "l", LEMMA: "el"}],
"pala": [ "pala": [
{ORTH: "pa", LEMMA: "para"}, {ORTH: "pa", LEMMA: "para"},
{ORTH: "la", LEMMA: DET_LEMMA}] {ORTH: "la"}]
} }

View File

@ -51,17 +51,17 @@ MORPH_RULES = {
}, },
"VBZ": { "VBZ": {
"er": {LEMMA: "be", "VerbForm": "Fin", "Person": "One", "Tense": "Pres", "Mood": "Ind"}, "er": {"VerbForm": "Fin", "Person": "One", "Tense": "Pres", "Mood": "Ind"},
"er": {LEMMA: "be", "VerbForm": "Fin", "Person": "Two", "Tense": "Pres", "Mood": "Ind"}, "er": {"VerbForm": "Fin", "Person": "Two", "Tense": "Pres", "Mood": "Ind"},
"er": {LEMMA: "be", "VerbForm": "Fin", "Person": "Three", "Tense": "Pres", "Mood": "Ind"}, "er": {"VerbForm": "Fin", "Person": "Three", "Tense": "Pres", "Mood": "Ind"},
}, },
"VBP": { "VBP": {
"er": {LEMMA: "be", "VerbForm": "Fin", "Tense": "Pres", "Mood": "Ind"} "er": {"VerbForm": "Fin", "Tense": "Pres", "Mood": "Ind"}
}, },
"VBD": { "VBD": {
"var": {LEMMA: "be", "VerbForm": "Fin", "Tense": "Past", "Number": "Sing"}, "var": {"VerbForm": "Fin", "Tense": "Past", "Number": "Sing"},
"vært": {LEMMA: "be", "VerbForm": "Fin", "Tense": "Past", "Number": "Plur"} "vært": {"VerbForm": "Fin", "Tense": "Past", "Number": "Plur"}
} }
} }

View File

@ -53,17 +53,17 @@ MORPH_RULES = {
}, },
"VBZ": { "VBZ": {
"är": {LEMMA: "be", "VerbForm": "Fin", "Person": "One", "Tense": "Pres", "Mood": "Ind"}, "är": {"VerbForm": "Fin", "Person": "One", "Tense": "Pres", "Mood": "Ind"},
"är": {LEMMA: "be", "VerbForm": "Fin", "Person": "Two", "Tense": "Pres", "Mood": "Ind"}, "är": {"VerbForm": "Fin", "Person": "Two", "Tense": "Pres", "Mood": "Ind"},
"är": {LEMMA: "be", "VerbForm": "Fin", "Person": "Three", "Tense": "Pres", "Mood": "Ind"}, "är": {"VerbForm": "Fin", "Person": "Three", "Tense": "Pres", "Mood": "Ind"},
}, },
"VBP": { "VBP": {
"är": {LEMMA: "be", "VerbForm": "Fin", "Tense": "Pres", "Mood": "Ind"} "är": {"VerbForm": "Fin", "Tense": "Pres", "Mood": "Ind"}
}, },
"VBD": { "VBD": {
"var": {LEMMA: "be", "VerbForm": "Fin", "Tense": "Past", "Number": "Sing"}, "var": {"VerbForm": "Fin", "Tense": "Past", "Number": "Sing"},
"vart": {LEMMA: "be", "VerbForm": "Fin", "Tense": "Past", "Number": "Plur"} "vart": {"VerbForm": "Fin", "Tense": "Past", "Number": "Plur"}
} }
} }