From 808cd6cf7f184e20d9b8e42364f7e10f045028dc Mon Sep 17 00:00:00 2001 From: ines Date: Mon, 3 Apr 2017 18:12:52 +0200 Subject: [PATCH] Add missing tags to verbs (resolves #948) --- spacy/en/tokenizer_exceptions.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/spacy/en/tokenizer_exceptions.py b/spacy/en/tokenizer_exceptions.py index 07b01c4fb..3d009241b 100644 --- a/spacy/en/tokenizer_exceptions.py +++ b/spacy/en/tokenizer_exceptions.py @@ -213,15 +213,15 @@ for verb_data in [ {ORTH: "does", LEMMA: "do"}, {ORTH: "did", LEMMA: "do", TAG: "VBD"}, {ORTH: "had", LEMMA: "have", TAG: "VBD"}, - {ORTH: "may"}, - {ORTH: "might"}, - {ORTH: "must"}, + {ORTH: "may", TAG: "MD"}, + {ORTH: "might", TAG: "MD"}, + {ORTH: "must", TAG: "MD"}, {ORTH: "need"}, {ORTH: "ought"}, - {ORTH: "sha", LEMMA: "shall"}, - {ORTH: "should"}, - {ORTH: "wo", LEMMA: "will"}, - {ORTH: "would"} + {ORTH: "sha", LEMMA: "shall", TAG: "MD"}, + {ORTH: "should", TAG: "MD"}, + {ORTH: "wo", LEMMA: "will", TAG: "MD"}, + {ORTH: "would", TAG: "MD"} ]: verb_data_tc = dict(verb_data) verb_data_tc[ORTH] = verb_data_tc[ORTH].title()