Try to fix tests on Travis (2.7)

This commit is contained in:
Ines Montani 2020-05-21 14:04:57 +02:00
parent f2a131bd9a
commit 56de520afd
5 changed files with 21 additions and 21 deletions

View File

@ -1,3 +1,4 @@
# coding: utf8
from __future__ import unicode_literals from __future__ import unicode_literals

View File

@ -1,3 +1,4 @@
# coding: utf8
from __future__ import unicode_literals from __future__ import unicode_literals
from ...attrs import LIKE_NUM from ...attrs import LIKE_NUM

View File

@ -1,3 +1,4 @@
# coding: utf8
from __future__ import unicode_literals from __future__ import unicode_literals
@ -105,6 +106,6 @@ STOP_WORDS = set(
յուրաքանչյուր յուրաքանչյուր
այս այս
մեջ մեջ
թ թ
""".split() """.split()
) )

View File

@ -109,6 +109,7 @@ class ChineseTokenizer(DummyTokenizer):
if reset: if reset:
try: try:
import pkuseg import pkuseg
self.pkuseg_seg.preprocesser = pkuseg.Preprocesser(None) self.pkuseg_seg.preprocesser = pkuseg.Preprocesser(None)
except ImportError: except ImportError:
if self.use_pkuseg: if self.use_pkuseg:
@ -118,7 +119,7 @@ class ChineseTokenizer(DummyTokenizer):
) )
raise ImportError(msg) raise ImportError(msg)
for word in words: for word in words:
self.pkuseg_seg.preprocesser.insert(word.strip(), '') self.pkuseg_seg.preprocesser.insert(word.strip(), "")
def _get_config(self): def _get_config(self):
config = OrderedDict( config = OrderedDict(
@ -168,21 +169,19 @@ class ChineseTokenizer(DummyTokenizer):
return util.to_bytes(serializers, []) return util.to_bytes(serializers, [])
def from_bytes(self, data, **kwargs): def from_bytes(self, data, **kwargs):
pkuseg_features_b = b"" data = {"features_b": b"", "weights_b": b"", "processors_data": None}
pkuseg_weights_b = b"" # pkuseg_features_b = b""
pkuseg_processors_data = None # pkuseg_weights_b = b""
# pkuseg_processors_data = None
def deserialize_pkuseg_features(b): def deserialize_pkuseg_features(b):
nonlocal pkuseg_features_b data["features_b"] = b
pkuseg_features_b = b
def deserialize_pkuseg_weights(b): def deserialize_pkuseg_weights(b):
nonlocal pkuseg_weights_b data["weights_b"] = b
pkuseg_weights_b = b
def deserialize_pkuseg_processors(b): def deserialize_pkuseg_processors(b):
nonlocal pkuseg_processors_data data["processors_data"] = srsly.msgpack_loads(b)
pkuseg_processors_data = srsly.msgpack_loads(b)
deserializers = OrderedDict( deserializers = OrderedDict(
( (
@ -194,13 +193,13 @@ class ChineseTokenizer(DummyTokenizer):
) )
util.from_bytes(data, deserializers, []) util.from_bytes(data, deserializers, [])
if pkuseg_features_b and pkuseg_weights_b: if data["features_b"] and data["weights_b"]:
with tempfile.TemporaryDirectory() as tempdir: with tempfile.TemporaryDirectory() as tempdir:
tempdir = Path(tempdir) tempdir = Path(tempdir)
with open(tempdir / "features.pkl", "wb") as fileh: with open(tempdir / "features.pkl", "wb") as fileh:
fileh.write(pkuseg_features_b) fileh.write(data["features_b"])
with open(tempdir / "weights.npz", "wb") as fileh: with open(tempdir / "weights.npz", "wb") as fileh:
fileh.write(pkuseg_weights_b) fileh.write(data["weights_b"])
try: try:
import pkuseg import pkuseg
except ImportError: except ImportError:
@ -209,13 +208,10 @@ class ChineseTokenizer(DummyTokenizer):
+ _PKUSEG_INSTALL_MSG + _PKUSEG_INSTALL_MSG
) )
self.pkuseg_seg = pkuseg.pkuseg(str(tempdir)) self.pkuseg_seg = pkuseg.pkuseg(str(tempdir))
if pkuseg_processors_data: if data["processors_data"]:
( (user_dict, do_process, common_words, other_words) = data[
user_dict, "processors_data"
do_process, ]
common_words,
other_words,
) = pkuseg_processors_data
self.pkuseg_seg.preprocesser = pkuseg.Preprocesser(user_dict) self.pkuseg_seg.preprocesser = pkuseg.Preprocesser(user_dict)
self.pkuseg_seg.postprocesser.do_process = do_process self.pkuseg_seg.postprocesser.do_process = do_process
self.pkuseg_seg.postprocesser.common_words = set(common_words) self.pkuseg_seg.postprocesser.common_words = set(common_words)

View File

@ -1,3 +1,4 @@
# coding: utf8
from __future__ import unicode_literals from __future__ import unicode_literals
import pytest import pytest