From ee3a71862ecbf67193edbdcfa83947cc7fffdb32 Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal@gmail.com>
Date: Mon, 5 Jan 2015 17:54:54 +1100
Subject: [PATCH] * Fix unicode bugs in tests

---
 tests/test_canon_case.py    | 20 --------------------
 tests/test_contractions.py  | 22 +++++++++++-----------
 tests/test_flag_features.py |  1 +
 tests/test_number.py        |  2 ++
 tests/test_urlish.py        |  2 ++
 5 files changed, 16 insertions(+), 31 deletions(-)
 delete mode 100644 tests/test_canon_case.py

diff --git a/tests/test_canon_case.py b/tests/test_canon_case.py
deleted file mode 100644
index 2c8dd255b..000000000
--- a/tests/test_canon_case.py
+++ /dev/null
@@ -1,20 +0,0 @@
-from __future__ import unicode_literals
-
-import py.test
-
-from spacy.orth import canon_case as cc
-
-def test_nasa():
-    assert cc('Nasa', 0.6, 0.3, 0.1) == 'NASA'
-
-
-def test_john():
-    assert cc('john', 0.3, 0.6, 0.1) == 'John'
-
-
-def test_apple():
-    assert cc('apple', 0.1, 0.3, 0.6) == 'apple'
-
-
-def test_tie():
-    assert cc('I', 0.0, 0.0, 0.0) == 'I'
diff --git a/tests/test_contractions.py b/tests/test_contractions.py
index aeaccaaf2..2d6cdac03 100644
--- a/tests/test_contractions.py
+++ b/tests/test_contractions.py
@@ -18,25 +18,25 @@ def test_possess(EN):
 def test_apostrophe(EN):
     tokens = EN("schools'")
     assert len(tokens) == 2
-    assert tokens[1].string == "'"
-    assert tokens[0].string == "schools"
+    assert tokens[1].string == b"'"
+    assert tokens[0].string == b"schools"
 
 
 def test_LL(EN):
     tokens = EN("we'll")
     assert len(tokens) == 2
-    assert tokens[1].string == "'ll"
-    assert tokens[1].lemma == "will"
-    assert tokens[0].string == "we"
+    assert tokens[1].string == b"'ll"
+    assert tokens[1].lemma == b"will"
+    assert tokens[0].string == b"we"
 
 
 def test_aint(EN):
     tokens = EN("ain't")
     assert len(tokens) == 2
-    assert tokens[0].string == "ai"
-    assert tokens[0].lemma == "be"
-    assert tokens[1].string == "n't"
-    assert tokens[1].lemma == "not"
+    assert tokens[0].string == b"ai"
+    assert tokens[0].lemma == b"be"
+    assert tokens[1].string == b"n't"
+    assert tokens[1].lemma == b"not"
 
 
 def test_capitalized(EN):
@@ -46,8 +46,8 @@ def test_capitalized(EN):
     assert len(tokens) == 2
     tokens = EN("Ain't")
     assert len(tokens) == 2
-    assert tokens[0].string == "Ai"
-    assert tokens[0].lemma == "be"
+    assert tokens[0].string == b"Ai"
+    assert tokens[0].lemma == b"be"
 
 
 def test_punct(EN):
diff --git a/tests/test_flag_features.py b/tests/test_flag_features.py
index 2e20ea0e5..27b53d6e5 100644
--- a/tests/test_flag_features.py
+++ b/tests/test_flag_features.py
@@ -1,3 +1,4 @@
+from __future__ import unicode_literals
 import pytest
 
 from spacy.orth import is_alpha
diff --git a/tests/test_number.py b/tests/test_number.py
index a9bd6fa74..f305c981c 100644
--- a/tests/test_number.py
+++ b/tests/test_number.py
@@ -1,3 +1,5 @@
+from __future__ import unicode_literals
+
 from spacy.orth import like_number
 
 
diff --git a/tests/test_urlish.py b/tests/test_urlish.py
index d50d7f333..f10659dc1 100644
--- a/tests/test_urlish.py
+++ b/tests/test_urlish.py
@@ -1,3 +1,5 @@
+from __future__ import unicode_literals
+
 from spacy.orth import like_url
 
 def test_basic_url():