From 50878ef598e7978f280026cb939a0c76ba97a766 Mon Sep 17 00:00:00 2001
From: Ines Montani <ines@ines.io>
Date: Mon, 16 Jan 2017 13:10:38 +0100
Subject: [PATCH] Exclude "were" and "Were" from tokenizer exceptions and add
 regression test (resolves #744)

---
 spacy/en/tokenizer_exceptions.py        |  2 +-
 spacy/tests/regression/test_issue744.py | 13 +++++++++++++
 2 files changed, 14 insertions(+), 1 deletion(-)
 create mode 100644 spacy/tests/regression/test_issue744.py

diff --git a/spacy/en/tokenizer_exceptions.py b/spacy/en/tokenizer_exceptions.py
index 798f9ec53..36bb0d7f0 100644
--- a/spacy/en/tokenizer_exceptions.py
+++ b/spacy/en/tokenizer_exceptions.py
@@ -7,7 +7,7 @@ from ..language_data import PRON_LEMMA
 
 EXC = {}
 
-EXCLUDE_EXC = ["Ill", "ill", "Its", "its", "Hell", "hell", "Well", "well", "Whore", "whore"]
+EXCLUDE_EXC = ["Ill", "ill", "Its", "its", "Hell", "hell", "were", "Were", "Well", "well", "Whore", "whore"]
 
 
 # Pronouns
diff --git a/spacy/tests/regression/test_issue744.py b/spacy/tests/regression/test_issue744.py
new file mode 100644
index 000000000..4e5eb2e10
--- /dev/null
+++ b/spacy/tests/regression/test_issue744.py
@@ -0,0 +1,13 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import pytest
+
+
+@pytest.mark.parametrize('text', ["We were scared", "We Were Scared"])
+def test_issue744(en_tokenizer, text):
+    """Test that 'were' and 'Were' are excluded from the contractions
+    generated by the English tokenizer exceptions."""
+    tokens = en_tokenizer(text)
+    assert len(tokens) == 3
+    assert tokens[1].text.lower() == "were"