Merge pull request #161 from andreasgrv/master

fixed error in python2 when printing unicode
This commit is contained in:
Matthew Honnibal 2015-11-03 13:36:13 +11:00
commit 9dd38a80e4
4 changed files with 118 additions and 8 deletions

View File

@ -0,0 +1,98 @@
# -*- coding: utf-8 -*-
import pytest
def test_print_doc(EN):
try:
doc = EN(u'I sat down for coffee at the coffee store')
print(doc)
except Exception:
pytest.fail("Printing failed")
def test_repr_doc(EN):
try:
doc = EN(u'I sat down for coffee at the coffee store')
print(repr(doc))
except Exception:
pytest.fail("Printing failed")
def test_print_doc_unicode(EN):
try:
doc = EN(u'I sat down for coffee at the café')
print(doc)
except Exception:
pytest.fail("Printing failed")
def test_repr_doc_unicode(EN):
try:
doc = EN(u'I sat down for coffee at the café')
print(repr(doc))
except Exception:
pytest.fail("Printing failed")
def test_print_span(EN):
try:
span = EN(u'I sat down for coffee at the coffee store')[-3:]
print(span)
except Exception:
pytest.fail("Printing failed")
def test_repr_span(EN):
try:
span = EN(u'I sat down for coffee at the coffee store')[-3:]
print(repr(span))
except Exception:
pytest.fail("Printing failed")
def test_print_span_unicode(EN):
try:
span = EN(u'I sat down for coffee at the café')[-3:]
print(span)
except Exception:
pytest.fail("Printing failed")
def test_repr_span_unicode(EN):
try:
span = EN(u'I sat down for coffee at the café')[-3:]
print(repr(span))
except Exception:
pytest.fail("Printing failed")
def test_print_token(EN):
try:
token = EN(u'I sat down for coffee at the coffee store')[-1]
print(token)
except Exception:
pytest.fail("Printing failed")
def test_repr_token(EN):
try:
token = EN(u'I sat down for coffee at the coffee store')[-1]
print(repr(token))
except Exception:
pytest.fail("Printing failed")
def test_print_token_unicode(EN):
try:
token = EN(u'I sat down for coffee at the café')[-1]
print(token)
except Exception:
pytest.fail("Printing failed")
def test_repr_token_unicode(EN):
try:
token = EN(u'I sat down for coffee at the café')[-1]
print(repr(token))
except Exception:
pytest.fail("Printing failed")

View File

@ -7,6 +7,7 @@ import numpy.linalg
import struct
cimport numpy as np
import math
import six
from ..lexeme cimport Lexeme
from ..lexeme cimport EMPTY_LEXEME
@ -117,11 +118,16 @@ cdef class Doc:
def __unicode__(self):
return u''.join([t.string for t in self])
def __bytes__(self):
return u''.join([t.string for t in self]).encode('utf-8')
def __str__(self):
return u''.join([t.string for t in self])
if six.PY3:
return self.__unicode__()
return self.__bytes__()
def __repr__(self):
return u''.join([t.string for t in self])
return self.__str__()
def similarity(self, other):
if self.vector_norm == 0 or other.vector_norm == 0:

View File

@ -4,6 +4,7 @@ import numpy
import numpy.linalg
cimport numpy as np
import math
import six
from ..structs cimport TokenC, LexemeC
from ..typedefs cimport flags_t, attr_t
@ -47,10 +48,9 @@ cdef class Span:
return self.end - self.start
def __repr__(self):
text = self.text_with_ws
if self[-1].whitespace_:
text = text[:-1]
return text
if six.PY3:
return self.text
return self.text.encode('utf-8')
def __getitem__(self, object i):
if isinstance(i, slice):

View File

@ -6,6 +6,7 @@ cimport numpy as np
np.import_array()
import numpy
import six
from ..lexeme cimport Lexeme
@ -40,11 +41,16 @@ cdef class Token:
def __unicode__(self):
return self.string
def __bytes__(self):
return self.string.encode('utf-8')
def __str__(self):
return self.string
if six.PY3:
return self.__unicode__()
return self.__bytes__()
def __repr__(self):
return self.string
return self.__str__()
cpdef bint check_flag(self, attr_id_t flag_id) except -1:
return Lexeme.c_check_flag(self.c.lex, flag_id)