mirror of
https://github.com/explosion/spaCy.git
synced 2025-07-10 08:12:24 +03:00
Fix LEMMA in exceptions and morph rules
This commit is contained in:
parent
2edc0aee12
commit
41a322c733
|
@ -2,13 +2,13 @@
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from ..symbols import ORTH, LEMMA, TAG, NORM
|
from ..symbols import ORTH, LEMMA, TAG, NORM
|
||||||
from ..deprecated import PRON_LEMMA, DET_LEMMA
|
from ..deprecated import PRON_LEMMA
|
||||||
|
|
||||||
|
|
||||||
_exc = {
|
_exc = {
|
||||||
"auf'm": [
|
"auf'm": [
|
||||||
{ORTH: "auf", LEMMA: "auf"},
|
{ORTH: "auf", LEMMA: "auf"},
|
||||||
{ORTH: "'m", LEMMA: DET_LEMMA, NORM: "dem" }],
|
{ORTH: "'m", LEMMA: "der", NORM: "dem" }],
|
||||||
|
|
||||||
"du's": [
|
"du's": [
|
||||||
{ORTH: "du", LEMMA: PRON_LEMMA, TAG: "PPER"},
|
{ORTH: "du", LEMMA: PRON_LEMMA, TAG: "PPER"},
|
||||||
|
@ -20,7 +20,7 @@ _exc = {
|
||||||
|
|
||||||
"hinter'm": [
|
"hinter'm": [
|
||||||
{ORTH: "hinter", LEMMA: "hinter"},
|
{ORTH: "hinter", LEMMA: "hinter"},
|
||||||
{ORTH: "'m", LEMMA: DET_LEMMA, NORM: "dem"}],
|
{ORTH: "'m", LEMMA: "der", NORM: "dem"}],
|
||||||
|
|
||||||
"ich's": [
|
"ich's": [
|
||||||
{ORTH: "ich", LEMMA: PRON_LEMMA, TAG: "PPER"},
|
{ORTH: "ich", LEMMA: PRON_LEMMA, TAG: "PPER"},
|
||||||
|
@ -36,11 +36,11 @@ _exc = {
|
||||||
|
|
||||||
"unter'm": [
|
"unter'm": [
|
||||||
{ORTH: "unter", LEMMA: "unter"},
|
{ORTH: "unter", LEMMA: "unter"},
|
||||||
{ORTH: "'m", LEMMA: DET_LEMMA, NORM: "dem"}],
|
{ORTH: "'m", LEMMA: "der", NORM: "dem"}],
|
||||||
|
|
||||||
"vor'm": [
|
"vor'm": [
|
||||||
{ORTH: "vor", LEMMA: "vor"},
|
{ORTH: "vor", LEMMA: "vor"},
|
||||||
{ORTH: "'m", LEMMA: DET_LEMMA, NORM: "dem"}],
|
{ORTH: "'m", LEMMA: "der", NORM: "dem"}],
|
||||||
|
|
||||||
"wir's": [
|
"wir's": [
|
||||||
{ORTH: "wir", LEMMA: PRON_LEMMA, TAG: "PPER"},
|
{ORTH: "wir", LEMMA: PRON_LEMMA, TAG: "PPER"},
|
||||||
|
@ -48,7 +48,7 @@ _exc = {
|
||||||
|
|
||||||
"über'm": [
|
"über'm": [
|
||||||
{ORTH: "über", LEMMA: "über"},
|
{ORTH: "über", LEMMA: "über"},
|
||||||
{ORTH: "'m", LEMMA: DET_LEMMA, NORM: "dem"}]
|
{ORTH: "'m", LEMMA: "der", NORM: "dem"}]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -57,12 +57,10 @@ for exc_data in [
|
||||||
{ORTH: "'s", LEMMA: PRON_LEMMA, TAG: "PPER"},
|
{ORTH: "'s", LEMMA: PRON_LEMMA, TAG: "PPER"},
|
||||||
{ORTH: "S'", LEMMA: PRON_LEMMA, TAG: "PPER"},
|
{ORTH: "S'", LEMMA: PRON_LEMMA, TAG: "PPER"},
|
||||||
{ORTH: "s'", LEMMA: PRON_LEMMA, TAG: "PPER"},
|
{ORTH: "s'", LEMMA: PRON_LEMMA, TAG: "PPER"},
|
||||||
{ORTH: "'n", LEMMA: DET_LEMMA, NORM: "ein"},
|
{ORTH: "'n", LEMMA: "ein", NORM: "ein"},
|
||||||
{ORTH: "'ne", LEMMA: DET_LEMMA, NORM: "eine"},
|
{ORTH: "'ne", LEMMA: "eine", NORM: "eine"},
|
||||||
{ORTH: "'nen", LEMMA: DET_LEMMA, NORM: "einen"},
|
{ORTH: "'nen", LEMMA: "ein", NORM: "einen"},
|
||||||
{ORTH: "'nem", LEMMA: DET_LEMMA, NORM: "einem"},
|
{ORTH: "'nem", LEMMA: "ein", NORM: "einem"},
|
||||||
{ORTH: "'s", LEMMA: PRON_LEMMA, TAG: "PPER"},
|
|
||||||
|
|
||||||
{ORTH: "Abb.", LEMMA: "Abbildung"},
|
{ORTH: "Abb.", LEMMA: "Abbildung"},
|
||||||
{ORTH: "Abk.", LEMMA: "Abkürzung"},
|
{ORTH: "Abk.", LEMMA: "Abkürzung"},
|
||||||
{ORTH: "Abt.", LEMMA: "Abteilung"},
|
{ORTH: "Abt.", LEMMA: "Abteilung"},
|
||||||
|
|
|
@ -7,7 +7,6 @@ from .cli import download
|
||||||
|
|
||||||
|
|
||||||
PRON_LEMMA = "-PRON-"
|
PRON_LEMMA = "-PRON-"
|
||||||
DET_LEMMA = "-DET-"
|
|
||||||
|
|
||||||
|
|
||||||
def depr_model_download(lang):
|
def depr_model_download(lang):
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from ..symbols import ORTH, LEMMA, TAG, NORM, ADP, DET
|
from ..symbols import ORTH, LEMMA, TAG, NORM, ADP, DET
|
||||||
from ..deprecated import PRON_LEMMA, DET_LEMMA
|
from ..deprecated import PRON_LEMMA
|
||||||
|
|
||||||
|
|
||||||
_exc = {
|
_exc = {
|
||||||
|
@ -32,11 +32,11 @@ _exc = {
|
||||||
|
|
||||||
"pal": [
|
"pal": [
|
||||||
{ORTH: "pa", LEMMA: "para"},
|
{ORTH: "pa", LEMMA: "para"},
|
||||||
{ORTH: "l", LEMMA: DET_LEMMA, NORM: "el"}],
|
{ORTH: "l", LEMMA: "el"}],
|
||||||
|
|
||||||
"pala": [
|
"pala": [
|
||||||
{ORTH: "pa", LEMMA: "para"},
|
{ORTH: "pa", LEMMA: "para"},
|
||||||
{ORTH: "la", LEMMA: DET_LEMMA}]
|
{ORTH: "la"}]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -51,17 +51,17 @@ MORPH_RULES = {
|
||||||
},
|
},
|
||||||
|
|
||||||
"VBZ": {
|
"VBZ": {
|
||||||
"er": {LEMMA: "be", "VerbForm": "Fin", "Person": "One", "Tense": "Pres", "Mood": "Ind"},
|
"er": {"VerbForm": "Fin", "Person": "One", "Tense": "Pres", "Mood": "Ind"},
|
||||||
"er": {LEMMA: "be", "VerbForm": "Fin", "Person": "Two", "Tense": "Pres", "Mood": "Ind"},
|
"er": {"VerbForm": "Fin", "Person": "Two", "Tense": "Pres", "Mood": "Ind"},
|
||||||
"er": {LEMMA: "be", "VerbForm": "Fin", "Person": "Three", "Tense": "Pres", "Mood": "Ind"},
|
"er": {"VerbForm": "Fin", "Person": "Three", "Tense": "Pres", "Mood": "Ind"},
|
||||||
},
|
},
|
||||||
|
|
||||||
"VBP": {
|
"VBP": {
|
||||||
"er": {LEMMA: "be", "VerbForm": "Fin", "Tense": "Pres", "Mood": "Ind"}
|
"er": {"VerbForm": "Fin", "Tense": "Pres", "Mood": "Ind"}
|
||||||
},
|
},
|
||||||
|
|
||||||
"VBD": {
|
"VBD": {
|
||||||
"var": {LEMMA: "be", "VerbForm": "Fin", "Tense": "Past", "Number": "Sing"},
|
"var": {"VerbForm": "Fin", "Tense": "Past", "Number": "Sing"},
|
||||||
"vært": {LEMMA: "be", "VerbForm": "Fin", "Tense": "Past", "Number": "Plur"}
|
"vært": {"VerbForm": "Fin", "Tense": "Past", "Number": "Plur"}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -53,17 +53,17 @@ MORPH_RULES = {
|
||||||
},
|
},
|
||||||
|
|
||||||
"VBZ": {
|
"VBZ": {
|
||||||
"är": {LEMMA: "be", "VerbForm": "Fin", "Person": "One", "Tense": "Pres", "Mood": "Ind"},
|
"är": {"VerbForm": "Fin", "Person": "One", "Tense": "Pres", "Mood": "Ind"},
|
||||||
"är": {LEMMA: "be", "VerbForm": "Fin", "Person": "Two", "Tense": "Pres", "Mood": "Ind"},
|
"är": {"VerbForm": "Fin", "Person": "Two", "Tense": "Pres", "Mood": "Ind"},
|
||||||
"är": {LEMMA: "be", "VerbForm": "Fin", "Person": "Three", "Tense": "Pres", "Mood": "Ind"},
|
"är": {"VerbForm": "Fin", "Person": "Three", "Tense": "Pres", "Mood": "Ind"},
|
||||||
},
|
},
|
||||||
|
|
||||||
"VBP": {
|
"VBP": {
|
||||||
"är": {LEMMA: "be", "VerbForm": "Fin", "Tense": "Pres", "Mood": "Ind"}
|
"är": {"VerbForm": "Fin", "Tense": "Pres", "Mood": "Ind"}
|
||||||
},
|
},
|
||||||
|
|
||||||
"VBD": {
|
"VBD": {
|
||||||
"var": {LEMMA: "be", "VerbForm": "Fin", "Tense": "Past", "Number": "Sing"},
|
"var": {"VerbForm": "Fin", "Tense": "Past", "Number": "Sing"},
|
||||||
"vart": {LEMMA: "be", "VerbForm": "Fin", "Tense": "Past", "Number": "Plur"}
|
"vart": {"VerbForm": "Fin", "Tense": "Past", "Number": "Plur"}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue
Block a user