From edc596d9a77cf0281b3641297fd5abd62a74edf2 Mon Sep 17 00:00:00 2001 From: ines Date: Mon, 21 Aug 2017 16:11:36 +0200 Subject: [PATCH] Add missing tokenizer exceptions (resolves #1281) --- spacy/en/tokenizer_exceptions.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/spacy/en/tokenizer_exceptions.py b/spacy/en/tokenizer_exceptions.py index d9aa01734..29447314a 100644 --- a/spacy/en/tokenizer_exceptions.py +++ b/spacy/en/tokenizer_exceptions.py @@ -276,7 +276,10 @@ for verb_data in [ {ORTH: "are", LEMMA: "be", TAG: "VBP", "number": 2}, {ORTH: "is", LEMMA: "be", TAG: "VBZ"}, {ORTH: "was", LEMMA: "be"}, - {ORTH: "were", LEMMA: "be"} + {ORTH: "were", LEMMA: "be"}, + {ORTH: "have"}, + {ORTH: "has", LEMMA: "have"}, + {ORTH: "dare"} ]: verb_data_tc = dict(verb_data) verb_data_tc[ORTH] = verb_data_tc[ORTH].title()