diff --git a/spacy/tokens/doc.pyx b/spacy/tokens/doc.pyx index 957bc59e6..01ccb4fd9 100644 --- a/spacy/tokens/doc.pyx +++ b/spacy/tokens/doc.pyx @@ -7,6 +7,7 @@ import numpy.linalg import struct cimport numpy as np import math +import six from ..lexeme cimport Lexeme from ..lexeme cimport EMPTY_LEXEME @@ -117,11 +118,16 @@ cdef class Doc: def __unicode__(self): return u''.join([t.string for t in self]) - def __str__(self): + def __bytes__(self): return u''.join([t.string for t in self]).encode('utf-8') + def __str__(self): + if six.PY3: + return self.__unicode__() + return self.__bytes__() + def __repr__(self): - return u''.join([t.string for t in self]).encode('utf-8') + return self.__str__() def similarity(self, other): if self.vector_norm == 0 or other.vector_norm == 0: diff --git a/spacy/tokens/spans.pyx b/spacy/tokens/spans.pyx index 1f6b07636..95b8e0de1 100644 --- a/spacy/tokens/spans.pyx +++ b/spacy/tokens/spans.pyx @@ -4,6 +4,7 @@ import numpy import numpy.linalg cimport numpy as np import math +import six from ..structs cimport TokenC, LexemeC from ..typedefs cimport flags_t, attr_t @@ -47,10 +48,9 @@ cdef class Span: return self.end - self.start def __repr__(self): - text = self.text_with_ws - if self[-1].whitespace_: - text = text[:-1] - return text.encode('utf-8') + if six.PY3: + return self.text + return self.text.encode('utf-8') def __getitem__(self, object i): if isinstance(i, slice): diff --git a/spacy/tokens/token.pyx b/spacy/tokens/token.pyx index 02ef52d0c..81b850285 100644 --- a/spacy/tokens/token.pyx +++ b/spacy/tokens/token.pyx @@ -6,6 +6,7 @@ cimport numpy as np np.import_array() import numpy +import six from ..lexeme cimport Lexeme @@ -40,11 +41,16 @@ cdef class Token: def __unicode__(self): return self.string - def __str__(self): + def __bytes__(self): return self.string.encode('utf-8') + def __str__(self): + if six.PY3: + return self.__unicode__() + return self.__bytes__() + def __repr__(self): - return self.string.encode('utf-8') + return self.__str__() cpdef bint check_flag(self, attr_id_t flag_id) except -1: return Lexeme.c_check_flag(self.c.lex, flag_id)