mirror of
https://github.com/explosion/spaCy.git
synced 2025-03-22 02:44:15 +03:00
Fix #717: Set correct lemma for contracted verbs
This commit is contained in:
parent
413138de79
commit
fe442cac53
|
@ -54,10 +54,13 @@ MORPH_RULES = {
|
||||||
"am": {LEMMA: "be", "VerbForm": "Fin", "Person": "One", "Tense": "Pres", "Mood": "Ind"},
|
"am": {LEMMA: "be", "VerbForm": "Fin", "Person": "One", "Tense": "Pres", "Mood": "Ind"},
|
||||||
"are": {LEMMA: "be", "VerbForm": "Fin", "Person": "Two", "Tense": "Pres", "Mood": "Ind"},
|
"are": {LEMMA: "be", "VerbForm": "Fin", "Person": "Two", "Tense": "Pres", "Mood": "Ind"},
|
||||||
"is": {LEMMA: "be", "VerbForm": "Fin", "Person": "Three", "Tense": "Pres", "Mood": "Ind"},
|
"is": {LEMMA: "be", "VerbForm": "Fin", "Person": "Three", "Tense": "Pres", "Mood": "Ind"},
|
||||||
|
"'re": {LEMMA: "be", "VerbForm": "Fin", "Person": "Two", "Tense": "Pres", "Mood": "Ind"},
|
||||||
|
"'s": {LEMMA: "be", "VerbForm": "Fin", "Person": "Three", "Tense": "Pres", "Mood": "Ind"},
|
||||||
},
|
},
|
||||||
|
|
||||||
"VBP": {
|
"VBP": {
|
||||||
"are": {LEMMA: "be", "VerbForm": "Fin", "Tense": "Pres", "Mood": "Ind"}
|
"are": {LEMMA: "be", "VerbForm": "Fin", "Tense": "Pres", "Mood": "Ind"},
|
||||||
|
"'re": {LEMMA: "be", "VerbForm": "Fin", "Tense": "Pres", "Mood": "Ind"}
|
||||||
},
|
},
|
||||||
|
|
||||||
"VBD": {
|
"VBD": {
|
||||||
|
|
|
@ -107,7 +107,7 @@ for pron in ["you", "we", "they"]:
|
||||||
|
|
||||||
EXC[orth + "re"] = [
|
EXC[orth + "re"] = [
|
||||||
{ORTH: orth, LEMMA: PRON_LEMMA, TAG: "PRP"},
|
{ORTH: orth, LEMMA: PRON_LEMMA, TAG: "PRP"},
|
||||||
{ORTH: "re", LEMMA: "be", NORM: "are"}
|
{ORTH: "re", LEMMA: "be", NORM: "are", TAG: "VBZ"}
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -4,9 +4,11 @@ from __future__ import unicode_literals
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.xfail
|
|
||||||
@pytest.mark.models
|
@pytest.mark.models
|
||||||
@pytest.mark.parametrize('text1,text2', [("You're happy", "You are happy")])
|
@pytest.mark.parametrize('text1,text2',
|
||||||
|
[("You're happy", "You are happy"),
|
||||||
|
("I'm happy", "I am happy"),
|
||||||
|
("he's happy", "he's happy")])
|
||||||
def test_issue717(EN, text1, text2):
|
def test_issue717(EN, text1, text2):
|
||||||
"""Test that contractions are assigned the correct lemma."""
|
"""Test that contractions are assigned the correct lemma."""
|
||||||
doc1 = EN(text1)
|
doc1 = EN(text1)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user