Fix #717: Set correct lemma for contracted verbs

This commit is contained in:
Matthew Honnibal 2017-03-18 16:16:10 +01:00
parent 413138de79
commit fe442cac53
3 changed files with 9 additions and 4 deletions

View File

@ -54,10 +54,13 @@ MORPH_RULES = {
"am": {LEMMA: "be", "VerbForm": "Fin", "Person": "One", "Tense": "Pres", "Mood": "Ind"},
"are": {LEMMA: "be", "VerbForm": "Fin", "Person": "Two", "Tense": "Pres", "Mood": "Ind"},
"is": {LEMMA: "be", "VerbForm": "Fin", "Person": "Three", "Tense": "Pres", "Mood": "Ind"},
"'re": {LEMMA: "be", "VerbForm": "Fin", "Person": "Two", "Tense": "Pres", "Mood": "Ind"},
"'s": {LEMMA: "be", "VerbForm": "Fin", "Person": "Three", "Tense": "Pres", "Mood": "Ind"},
},
"VBP": {
"are": {LEMMA: "be", "VerbForm": "Fin", "Tense": "Pres", "Mood": "Ind"}
"are": {LEMMA: "be", "VerbForm": "Fin", "Tense": "Pres", "Mood": "Ind"},
"'re": {LEMMA: "be", "VerbForm": "Fin", "Tense": "Pres", "Mood": "Ind"}
},
"VBD": {

View File

@ -107,7 +107,7 @@ for pron in ["you", "we", "they"]:
EXC[orth + "re"] = [
{ORTH: orth, LEMMA: PRON_LEMMA, TAG: "PRP"},
{ORTH: "re", LEMMA: "be", NORM: "are"}
{ORTH: "re", LEMMA: "be", NORM: "are", TAG: "VBZ"}
]

View File

@ -4,9 +4,11 @@ from __future__ import unicode_literals
import pytest
@pytest.mark.xfail
@pytest.mark.models
@pytest.mark.parametrize('text1,text2', [("You're happy", "You are happy")])
@pytest.mark.parametrize('text1,text2',
[("You're happy", "You are happy"),
("I'm happy", "I am happy"),
("he's happy", "he's happy")])
def test_issue717(EN, text1, text2):
"""Test that contractions are assigned the correct lemma."""
doc1 = EN(text1)