From ee3a71862ecbf67193edbdcfa83947cc7fffdb32 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Mon, 5 Jan 2015 17:54:54 +1100 Subject: [PATCH] * Fix unicode bugs in tests --- tests/test_canon_case.py | 20 -------------------- tests/test_contractions.py | 22 +++++++++++----------- tests/test_flag_features.py | 1 + tests/test_number.py | 2 ++ tests/test_urlish.py | 2 ++ 5 files changed, 16 insertions(+), 31 deletions(-) delete mode 100644 tests/test_canon_case.py diff --git a/tests/test_canon_case.py b/tests/test_canon_case.py deleted file mode 100644 index 2c8dd255b..000000000 --- a/tests/test_canon_case.py +++ /dev/null @@ -1,20 +0,0 @@ -from __future__ import unicode_literals - -import py.test - -from spacy.orth import canon_case as cc - -def test_nasa(): - assert cc('Nasa', 0.6, 0.3, 0.1) == 'NASA' - - -def test_john(): - assert cc('john', 0.3, 0.6, 0.1) == 'John' - - -def test_apple(): - assert cc('apple', 0.1, 0.3, 0.6) == 'apple' - - -def test_tie(): - assert cc('I', 0.0, 0.0, 0.0) == 'I' diff --git a/tests/test_contractions.py b/tests/test_contractions.py index aeaccaaf2..2d6cdac03 100644 --- a/tests/test_contractions.py +++ b/tests/test_contractions.py @@ -18,25 +18,25 @@ def test_possess(EN): def test_apostrophe(EN): tokens = EN("schools'") assert len(tokens) == 2 - assert tokens[1].string == "'" - assert tokens[0].string == "schools" + assert tokens[1].string == b"'" + assert tokens[0].string == b"schools" def test_LL(EN): tokens = EN("we'll") assert len(tokens) == 2 - assert tokens[1].string == "'ll" - assert tokens[1].lemma == "will" - assert tokens[0].string == "we" + assert tokens[1].string == b"'ll" + assert tokens[1].lemma == b"will" + assert tokens[0].string == b"we" def test_aint(EN): tokens = EN("ain't") assert len(tokens) == 2 - assert tokens[0].string == "ai" - assert tokens[0].lemma == "be" - assert tokens[1].string == "n't" - assert tokens[1].lemma == "not" + assert tokens[0].string == b"ai" + assert tokens[0].lemma == b"be" + assert tokens[1].string == b"n't" + assert tokens[1].lemma == b"not" def test_capitalized(EN): @@ -46,8 +46,8 @@ def test_capitalized(EN): assert len(tokens) == 2 tokens = EN("Ain't") assert len(tokens) == 2 - assert tokens[0].string == "Ai" - assert tokens[0].lemma == "be" + assert tokens[0].string == b"Ai" + assert tokens[0].lemma == b"be" def test_punct(EN): diff --git a/tests/test_flag_features.py b/tests/test_flag_features.py index 2e20ea0e5..27b53d6e5 100644 --- a/tests/test_flag_features.py +++ b/tests/test_flag_features.py @@ -1,3 +1,4 @@ +from __future__ import unicode_literals import pytest from spacy.orth import is_alpha diff --git a/tests/test_number.py b/tests/test_number.py index a9bd6fa74..f305c981c 100644 --- a/tests/test_number.py +++ b/tests/test_number.py @@ -1,3 +1,5 @@ +from __future__ import unicode_literals + from spacy.orth import like_number diff --git a/tests/test_urlish.py b/tests/test_urlish.py index d50d7f333..f10659dc1 100644 --- a/tests/test_urlish.py +++ b/tests/test_urlish.py @@ -1,3 +1,5 @@ +from __future__ import unicode_literals + from spacy.orth import like_url def test_basic_url():