From 45e045a87b0170395efc6fc25ed34c5603f0ec8f Mon Sep 17 00:00:00 2001 From: Gyorgy Orosz Date: Sat, 24 Dec 2016 00:21:00 +0100 Subject: [PATCH] Unicode/UTF8 compatibility for Python2 --- spacy/hu/__init__.py | 1 + spacy/hu/punctuations.py | 3 +++ spacy/hu/tokenizer_exceptions.py | 1 + spacy/tests/hu/tokenizer/test_tokenizer.py | 4 +++- 4 files changed, 8 insertions(+), 1 deletion(-) diff --git a/spacy/hu/__init__.py b/spacy/hu/__init__.py index c529251df..2343b4606 100644 --- a/spacy/hu/__init__.py +++ b/spacy/hu/__init__.py @@ -1,3 +1,4 @@ +# encoding: utf8 from __future__ import unicode_literals, print_function from .language_data import * diff --git a/spacy/hu/punctuations.py b/spacy/hu/punctuations.py index 284f65fd7..3681a2fbe 100644 --- a/spacy/hu/punctuations.py +++ b/spacy/hu/punctuations.py @@ -1,3 +1,6 @@ +# encoding: utf8 +from __future__ import unicode_literals + TOKENIZER_PREFIXES = r''' + '''.strip().split('\n') diff --git a/spacy/hu/tokenizer_exceptions.py b/spacy/hu/tokenizer_exceptions.py index 43a958c0f..627035bb8 100644 --- a/spacy/hu/tokenizer_exceptions.py +++ b/spacy/hu/tokenizer_exceptions.py @@ -1,3 +1,4 @@ +# encoding: utf8 from __future__ import unicode_literals ABBREVIATIONS = """ diff --git a/spacy/tests/hu/tokenizer/test_tokenizer.py b/spacy/tests/hu/tokenizer/test_tokenizer.py index 2b240fd40..2bfbfdf36 100644 --- a/spacy/tests/hu/tokenizer/test_tokenizer.py +++ b/spacy/tests/hu/tokenizer/test_tokenizer.py @@ -1,5 +1,7 @@ -import pytest +# encoding: utf8 +from __future__ import unicode_literals +import pytest from spacy.hu import Hungarian _DEFAULT_TESTS = [('N. kormányzósági\nszékhely.', ['N.', 'kormányzósági', 'székhely', '.']),