fixed error when printing unicode

This commit is contained in:
Andreas Grivas 2015-11-02 20:22:18 +02:00
parent f56209ef2e
commit d418f00eb1
4 changed files with 118 additions and 8 deletions

View File

@ -0,0 +1,98 @@
# -*- coding: utf-8 -*-
import pytest
def test_print_doc(EN):
try:
doc = EN(u'I sat down for coffee at the coffee store')
print(doc)
except Exception:
pytest.fail("Printing failed")
def test_repr_doc(EN):
try:
doc = EN(u'I sat down for coffee at the coffee store')
print(repr(doc))
except Exception:
pytest.fail("Printing failed")
def test_print_doc_unicode(EN):
try:
doc = EN(u'I sat down for coffee at the café')
print(doc)
except Exception:
pytest.fail("Printing failed")
def test_repr_doc_unicode(EN):
try:
doc = EN(u'I sat down for coffee at the café')
print(repr(doc))
except Exception:
pytest.fail("Printing failed")
def test_print_span(EN):
try:
span = EN(u'I sat down for coffee at the coffee store')[-3:]
print(span)
except Exception:
pytest.fail("Printing failed")
def test_repr_span(EN):
try:
span = EN(u'I sat down for coffee at the coffee store')[-3:]
print(repr(span))
except Exception:
pytest.fail("Printing failed")
def test_print_span_unicode(EN):
try:
span = EN(u'I sat down for coffee at the café')[-3:]
print(span)
except Exception:
pytest.fail("Printing failed")
def test_repr_span_unicode(EN):
try:
span = EN(u'I sat down for coffee at the café')[-3:]
print(repr(span))
except Exception:
pytest.fail("Printing failed")
def test_print_token(EN):
try:
token = EN(u'I sat down for coffee at the coffee store')[-1]
print(token)
except Exception:
pytest.fail("Printing failed")
def test_repr_token(EN):
try:
token = EN(u'I sat down for coffee at the coffee store')[-1]
print(repr(token))
except Exception:
pytest.fail("Printing failed")
def test_print_token_unicode(EN):
try:
token = EN(u'I sat down for coffee at the café')[-1]
print(token)
except Exception:
pytest.fail("Printing failed")
def test_repr_token_unicode(EN):
try:
token = EN(u'I sat down for coffee at the café')[-1]
print(repr(token))
except Exception:
pytest.fail("Printing failed")

View File

@ -7,6 +7,7 @@ import numpy.linalg
import struct import struct
cimport numpy as np cimport numpy as np
import math import math
import six
from ..lexeme cimport Lexeme from ..lexeme cimport Lexeme
from ..lexeme cimport EMPTY_LEXEME from ..lexeme cimport EMPTY_LEXEME
@ -117,11 +118,16 @@ cdef class Doc:
def __unicode__(self): def __unicode__(self):
return u''.join([t.string for t in self]) return u''.join([t.string for t in self])
def __bytes__(self):
return u''.join([t.string for t in self]).encode('utf-8')
def __str__(self): def __str__(self):
return u''.join([t.string for t in self]) if six.PY3:
return self.__unicode__()
return self.__bytes__()
def __repr__(self): def __repr__(self):
return u''.join([t.string for t in self]) return self.__str__()
def similarity(self, other): def similarity(self, other):
if self.vector_norm == 0 or other.vector_norm == 0: if self.vector_norm == 0 or other.vector_norm == 0:

View File

@ -4,6 +4,7 @@ import numpy
import numpy.linalg import numpy.linalg
cimport numpy as np cimport numpy as np
import math import math
import six
from ..structs cimport TokenC, LexemeC from ..structs cimport TokenC, LexemeC
from ..typedefs cimport flags_t, attr_t from ..typedefs cimport flags_t, attr_t
@ -47,10 +48,9 @@ cdef class Span:
return self.end - self.start return self.end - self.start
def __repr__(self): def __repr__(self):
text = self.text_with_ws if six.PY3:
if self[-1].whitespace_: return self.text
text = text[:-1] return self.text.encode('utf-8')
return text
def __getitem__(self, object i): def __getitem__(self, object i):
if isinstance(i, slice): if isinstance(i, slice):

View File

@ -6,6 +6,7 @@ cimport numpy as np
np.import_array() np.import_array()
import numpy import numpy
import six
from ..lexeme cimport Lexeme from ..lexeme cimport Lexeme
@ -40,11 +41,16 @@ cdef class Token:
def __unicode__(self): def __unicode__(self):
return self.string return self.string
def __bytes__(self):
return self.string.encode('utf-8')
def __str__(self): def __str__(self):
return self.string if six.PY3:
return self.__unicode__()
return self.__bytes__()
def __repr__(self): def __repr__(self):
return self.string return self.__str__()
cpdef bint check_flag(self, attr_id_t flag_id) except -1: cpdef bint check_flag(self, attr_id_t flag_id) except -1:
return Lexeme.c_check_flag(self.c.lex, flag_id) return Lexeme.c_check_flag(self.c.lex, flag_id)