From 8edd58492e43088212dc9c32c7ed9883f0abad18 Mon Sep 17 00:00:00 2001 From: Andreas Grivas Date: Mon, 2 Nov 2015 13:41:39 +0200 Subject: [PATCH] fixed unicode error in printing - added tests --- spacy/tests/print/test_print.py | 98 +++++++++++++++++++++++++++++++++ spacy/tokens/doc.pyx | 4 +- spacy/tokens/spans.pyx | 2 +- spacy/tokens/token.pyx | 4 +- 4 files changed, 103 insertions(+), 5 deletions(-) create mode 100644 spacy/tests/print/test_print.py diff --git a/spacy/tests/print/test_print.py b/spacy/tests/print/test_print.py new file mode 100644 index 000000000..744a813d6 --- /dev/null +++ b/spacy/tests/print/test_print.py @@ -0,0 +1,98 @@ +# -*- coding: utf-8 -*- +import pytest + + +def test_print_doc(EN): + try: + doc = EN(u'I sat down for coffee at the coffee store') + print(doc) + except Exception: + pytest.fail("Printing failed") + + +def test_repr_doc(EN): + try: + doc = EN(u'I sat down for coffee at the coffee store') + print(repr(doc)) + except Exception: + pytest.fail("Printing failed") + + +def test_print_doc_unicode(EN): + try: + doc = EN(u'I sat down for coffee at the café') + print(doc) + except Exception: + pytest.fail("Printing failed") + + +def test_repr_doc_unicode(EN): + try: + doc = EN(u'I sat down for coffee at the café') + print(repr(doc)) + except Exception: + pytest.fail("Printing failed") + + +def test_print_span(EN): + try: + doc = EN(u'I sat down for coffee at the coffee store')[-3:] + print(doc) + except Exception: + pytest.fail("Printing failed") + + +def test_repr_span(EN): + try: + doc = EN(u'I sat down for coffee at the coffee store')[-3:] + print(repr(doc)) + except Exception: + pytest.fail("Printing failed") + + +def test_print_span_unicode(EN): + try: + doc = EN(u'I sat down for coffee at the café')[-3:] + print(doc) + except Exception: + pytest.fail("Printing failed") + + +def test_repr_span_unicode(EN): + try: + doc = EN(u'I sat down for coffee at the café')[-3:] + print(repr(doc)) + except Exception: + pytest.fail("Printing failed") + + +def test_print_token(EN): + try: + doc = EN(u'I sat down for coffee at the coffee store')[-1] + print(doc) + except Exception: + pytest.fail("Printing failed") + + +def test_repr_token(EN): + try: + doc = EN(u'I sat down for coffee at the coffee store')[-1] + print(repr(doc)) + except Exception: + pytest.fail("Printing failed") + + +def test_print_token_unicode(EN): + try: + doc = EN(u'I sat down for coffee at the café')[-1] + print(doc) + except Exception: + pytest.fail("Printing failed") + + +def test_repr_token_unicode(EN): + try: + doc = EN(u'I sat down for coffee at the café')[-1] + print(repr(doc)) + except Exception: + pytest.fail("Printing failed") diff --git a/spacy/tokens/doc.pyx b/spacy/tokens/doc.pyx index 1626ebfc6..957bc59e6 100644 --- a/spacy/tokens/doc.pyx +++ b/spacy/tokens/doc.pyx @@ -118,10 +118,10 @@ cdef class Doc: return u''.join([t.string for t in self]) def __str__(self): - return u''.join([t.string for t in self]) + return u''.join([t.string for t in self]).encode('utf-8') def __repr__(self): - return u''.join([t.string for t in self]) + return u''.join([t.string for t in self]).encode('utf-8') def similarity(self, other): if self.vector_norm == 0 or other.vector_norm == 0: diff --git a/spacy/tokens/spans.pyx b/spacy/tokens/spans.pyx index e1b881f79..1f6b07636 100644 --- a/spacy/tokens/spans.pyx +++ b/spacy/tokens/spans.pyx @@ -50,7 +50,7 @@ cdef class Span: text = self.text_with_ws if self[-1].whitespace_: text = text[:-1] - return text + return text.encode('utf-8') def __getitem__(self, object i): if isinstance(i, slice): diff --git a/spacy/tokens/token.pyx b/spacy/tokens/token.pyx index cce8eeeb4..02ef52d0c 100644 --- a/spacy/tokens/token.pyx +++ b/spacy/tokens/token.pyx @@ -41,10 +41,10 @@ cdef class Token: return self.string def __str__(self): - return self.string + return self.string.encode('utf-8') def __repr__(self): - return self.string + return self.string.encode('utf-8') cpdef bint check_flag(self, attr_id_t flag_id) except -1: return Lexeme.c_check_flag(self.c.lex, flag_id)