Unicode/UTF8 compatibility for Python2

This commit is contained in:
Gyorgy Orosz 2016-12-24 00:21:00 +01:00
parent 72b61b6d03
commit 45e045a87b
4 changed files with 8 additions and 1 deletions

View File

@ -1,3 +1,4 @@
# encoding: utf8
from __future__ import unicode_literals, print_function
from .language_data import *

View File

@ -1,3 +1,6 @@
# encoding: utf8
from __future__ import unicode_literals
TOKENIZER_PREFIXES = r'''
+
'''.strip().split('\n')

View File

@ -1,3 +1,4 @@
# encoding: utf8
from __future__ import unicode_literals
ABBREVIATIONS = """

View File

@ -1,5 +1,7 @@
import pytest
# encoding: utf8
from __future__ import unicode_literals
import pytest
from spacy.hu import Hungarian
_DEFAULT_TESTS = [('N. kormányzósági\nszékhely.', ['N.', 'kormányzósági', 'székhely', '.']),