* Fix unicode bugs in tests

This commit is contained in:
Matthew Honnibal 2015-01-05 17:54:54 +11:00
parent 3f1944d688
commit ee3a71862e
5 changed files with 16 additions and 31 deletions

View File

@ -1,20 +0,0 @@
from __future__ import unicode_literals
import py.test
from spacy.orth import canon_case as cc
def test_nasa():
assert cc('Nasa', 0.6, 0.3, 0.1) == 'NASA'
def test_john():
assert cc('john', 0.3, 0.6, 0.1) == 'John'
def test_apple():
assert cc('apple', 0.1, 0.3, 0.6) == 'apple'
def test_tie():
assert cc('I', 0.0, 0.0, 0.0) == 'I'

View File

@ -18,25 +18,25 @@ def test_possess(EN):
def test_apostrophe(EN): def test_apostrophe(EN):
tokens = EN("schools'") tokens = EN("schools'")
assert len(tokens) == 2 assert len(tokens) == 2
assert tokens[1].string == "'" assert tokens[1].string == b"'"
assert tokens[0].string == "schools" assert tokens[0].string == b"schools"
def test_LL(EN): def test_LL(EN):
tokens = EN("we'll") tokens = EN("we'll")
assert len(tokens) == 2 assert len(tokens) == 2
assert tokens[1].string == "'ll" assert tokens[1].string == b"'ll"
assert tokens[1].lemma == "will" assert tokens[1].lemma == b"will"
assert tokens[0].string == "we" assert tokens[0].string == b"we"
def test_aint(EN): def test_aint(EN):
tokens = EN("ain't") tokens = EN("ain't")
assert len(tokens) == 2 assert len(tokens) == 2
assert tokens[0].string == "ai" assert tokens[0].string == b"ai"
assert tokens[0].lemma == "be" assert tokens[0].lemma == b"be"
assert tokens[1].string == "n't" assert tokens[1].string == b"n't"
assert tokens[1].lemma == "not" assert tokens[1].lemma == b"not"
def test_capitalized(EN): def test_capitalized(EN):
@ -46,8 +46,8 @@ def test_capitalized(EN):
assert len(tokens) == 2 assert len(tokens) == 2
tokens = EN("Ain't") tokens = EN("Ain't")
assert len(tokens) == 2 assert len(tokens) == 2
assert tokens[0].string == "Ai" assert tokens[0].string == b"Ai"
assert tokens[0].lemma == "be" assert tokens[0].lemma == b"be"
def test_punct(EN): def test_punct(EN):

View File

@ -1,3 +1,4 @@
from __future__ import unicode_literals
import pytest import pytest
from spacy.orth import is_alpha from spacy.orth import is_alpha

View File

@ -1,3 +1,5 @@
from __future__ import unicode_literals
from spacy.orth import like_number from spacy.orth import like_number

View File

@ -1,3 +1,5 @@
from __future__ import unicode_literals
from spacy.orth import like_url from spacy.orth import like_url
def test_basic_url(): def test_basic_url():