From a68dc891eac74eaa276ba7ac221b9be929716002 Mon Sep 17 00:00:00 2001 From: ines Date: Mon, 21 Aug 2017 23:19:18 +0200 Subject: [PATCH] Port over changes from #1281 --- spacy/lang/en/tokenizer_exceptions.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/spacy/lang/en/tokenizer_exceptions.py b/spacy/lang/en/tokenizer_exceptions.py index 392532619..b9fde7882 100644 --- a/spacy/lang/en/tokenizer_exceptions.py +++ b/spacy/lang/en/tokenizer_exceptions.py @@ -232,7 +232,10 @@ for verb_data in [ {ORTH: "are", LEMMA: "be", NORM: "are", TAG: "VBP", "number": 2}, {ORTH: "is", LEMMA: "be", NORM: "is", TAG: "VBZ"}, {ORTH: "was", LEMMA: "be", NORM: "was"}, - {ORTH: "were", LEMMA: "be", NORM: "were"}]: + {ORTH: "were", LEMMA: "be", NORM: "were"}, + {ORTH: "have", NORM: "have"}, + {ORTH: "has", LEMMA: "have", NORM: "has"}, + {ORTH: "dare", NORM: "dare"}]: verb_data_tc = dict(verb_data) verb_data_tc[ORTH] = verb_data_tc[ORTH].title() for data in [verb_data, verb_data_tc]: