From 440b81bddc24669ffe89ef7501fb8c75f98b60d2 Mon Sep 17 00:00:00 2001
From: adrianeboyd <adrianeboyd@gmail.com>
Date: Fri, 8 May 2020 15:10:57 +0200
Subject: [PATCH] Improve exceptions for 'd (would/had) in English (#5379)

Instead of treating `'d` in contractions like `I'd` as `would` in all
cases in the tokenizer exceptions, leave the tagging and lemmatization
up to later components.
---
 spacy/lang/en/tokenizer_exceptions.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/spacy/lang/en/tokenizer_exceptions.py b/spacy/lang/en/tokenizer_exceptions.py
index c45197771..62de81912 100644
--- a/spacy/lang/en/tokenizer_exceptions.py
+++ b/spacy/lang/en/tokenizer_exceptions.py
@@ -77,12 +77,12 @@ for pron in ["i", "you", "he", "she", "it", "we", "they"]:
 
         _exc[orth + "'d"] = [
             {ORTH: orth, LEMMA: PRON_LEMMA, NORM: pron, TAG: "PRP"},
-            {ORTH: "'d", LEMMA: "would", NORM: "would", TAG: "MD"},
+            {ORTH: "'d", NORM: "'d"},
         ]
 
         _exc[orth + "d"] = [
             {ORTH: orth, LEMMA: PRON_LEMMA, NORM: pron, TAG: "PRP"},
-            {ORTH: "d", LEMMA: "would", NORM: "would", TAG: "MD"},
+            {ORTH: "d", NORM: "'d"},
         ]
 
         _exc[orth + "'d've"] = [
@@ -195,7 +195,10 @@ for word in ["who", "what", "when", "where", "why", "how", "there", "that"]:
             {ORTH: "'d", NORM: "'d"},
         ]
 
-        _exc[orth + "d"] = [{ORTH: orth, LEMMA: word, NORM: word}, {ORTH: "d"}]
+        _exc[orth + "d"] = [
+            {ORTH: orth, LEMMA: word, NORM: word},
+            {ORTH: "d", NORM: "'d"}
+        ]
 
         _exc[orth + "'d've"] = [
             {ORTH: orth, LEMMA: word, NORM: word},