mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-25 09:26:27 +03:00
Improve exceptions for 'd (would/had) in English (#5379)
Instead of treating `'d` in contractions like `I'd` as `would` in all cases in the tokenizer exceptions, leave the tagging and lemmatization up to later components.
This commit is contained in:
parent
d4cc18b746
commit
440b81bddc
|
@ -77,12 +77,12 @@ for pron in ["i", "you", "he", "she", "it", "we", "they"]:
|
|||
|
||||
_exc[orth + "'d"] = [
|
||||
{ORTH: orth, LEMMA: PRON_LEMMA, NORM: pron, TAG: "PRP"},
|
||||
{ORTH: "'d", LEMMA: "would", NORM: "would", TAG: "MD"},
|
||||
{ORTH: "'d", NORM: "'d"},
|
||||
]
|
||||
|
||||
_exc[orth + "d"] = [
|
||||
{ORTH: orth, LEMMA: PRON_LEMMA, NORM: pron, TAG: "PRP"},
|
||||
{ORTH: "d", LEMMA: "would", NORM: "would", TAG: "MD"},
|
||||
{ORTH: "d", NORM: "'d"},
|
||||
]
|
||||
|
||||
_exc[orth + "'d've"] = [
|
||||
|
@ -195,7 +195,10 @@ for word in ["who", "what", "when", "where", "why", "how", "there", "that"]:
|
|||
{ORTH: "'d", NORM: "'d"},
|
||||
]
|
||||
|
||||
_exc[orth + "d"] = [{ORTH: orth, LEMMA: word, NORM: word}, {ORTH: "d"}]
|
||||
_exc[orth + "d"] = [
|
||||
{ORTH: orth, LEMMA: word, NORM: word},
|
||||
{ORTH: "d", NORM: "'d"}
|
||||
]
|
||||
|
||||
_exc[orth + "'d've"] = [
|
||||
{ORTH: orth, LEMMA: word, NORM: word},
|
||||
|
|
Loading…
Reference in New Issue
Block a user