mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-12 02:06:31 +03:00
Try to fix tests on Travis (2.7)
This commit is contained in:
parent
f2a131bd9a
commit
56de520afd
|
@ -1,3 +1,4 @@
|
||||||
|
# coding: utf8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
# coding: utf8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from ...attrs import LIKE_NUM
|
from ...attrs import LIKE_NUM
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
# coding: utf8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
|
||||||
|
@ -105,6 +106,6 @@ STOP_WORDS = set(
|
||||||
յուրաքանչյուր
|
յուրաքանչյուր
|
||||||
այս
|
այս
|
||||||
մեջ
|
մեջ
|
||||||
թ
|
թ
|
||||||
""".split()
|
""".split()
|
||||||
)
|
)
|
||||||
|
|
|
@ -109,6 +109,7 @@ class ChineseTokenizer(DummyTokenizer):
|
||||||
if reset:
|
if reset:
|
||||||
try:
|
try:
|
||||||
import pkuseg
|
import pkuseg
|
||||||
|
|
||||||
self.pkuseg_seg.preprocesser = pkuseg.Preprocesser(None)
|
self.pkuseg_seg.preprocesser = pkuseg.Preprocesser(None)
|
||||||
except ImportError:
|
except ImportError:
|
||||||
if self.use_pkuseg:
|
if self.use_pkuseg:
|
||||||
|
@ -118,7 +119,7 @@ class ChineseTokenizer(DummyTokenizer):
|
||||||
)
|
)
|
||||||
raise ImportError(msg)
|
raise ImportError(msg)
|
||||||
for word in words:
|
for word in words:
|
||||||
self.pkuseg_seg.preprocesser.insert(word.strip(), '')
|
self.pkuseg_seg.preprocesser.insert(word.strip(), "")
|
||||||
|
|
||||||
def _get_config(self):
|
def _get_config(self):
|
||||||
config = OrderedDict(
|
config = OrderedDict(
|
||||||
|
@ -168,21 +169,19 @@ class ChineseTokenizer(DummyTokenizer):
|
||||||
return util.to_bytes(serializers, [])
|
return util.to_bytes(serializers, [])
|
||||||
|
|
||||||
def from_bytes(self, data, **kwargs):
|
def from_bytes(self, data, **kwargs):
|
||||||
pkuseg_features_b = b""
|
data = {"features_b": b"", "weights_b": b"", "processors_data": None}
|
||||||
pkuseg_weights_b = b""
|
# pkuseg_features_b = b""
|
||||||
pkuseg_processors_data = None
|
# pkuseg_weights_b = b""
|
||||||
|
# pkuseg_processors_data = None
|
||||||
|
|
||||||
def deserialize_pkuseg_features(b):
|
def deserialize_pkuseg_features(b):
|
||||||
nonlocal pkuseg_features_b
|
data["features_b"] = b
|
||||||
pkuseg_features_b = b
|
|
||||||
|
|
||||||
def deserialize_pkuseg_weights(b):
|
def deserialize_pkuseg_weights(b):
|
||||||
nonlocal pkuseg_weights_b
|
data["weights_b"] = b
|
||||||
pkuseg_weights_b = b
|
|
||||||
|
|
||||||
def deserialize_pkuseg_processors(b):
|
def deserialize_pkuseg_processors(b):
|
||||||
nonlocal pkuseg_processors_data
|
data["processors_data"] = srsly.msgpack_loads(b)
|
||||||
pkuseg_processors_data = srsly.msgpack_loads(b)
|
|
||||||
|
|
||||||
deserializers = OrderedDict(
|
deserializers = OrderedDict(
|
||||||
(
|
(
|
||||||
|
@ -194,13 +193,13 @@ class ChineseTokenizer(DummyTokenizer):
|
||||||
)
|
)
|
||||||
util.from_bytes(data, deserializers, [])
|
util.from_bytes(data, deserializers, [])
|
||||||
|
|
||||||
if pkuseg_features_b and pkuseg_weights_b:
|
if data["features_b"] and data["weights_b"]:
|
||||||
with tempfile.TemporaryDirectory() as tempdir:
|
with tempfile.TemporaryDirectory() as tempdir:
|
||||||
tempdir = Path(tempdir)
|
tempdir = Path(tempdir)
|
||||||
with open(tempdir / "features.pkl", "wb") as fileh:
|
with open(tempdir / "features.pkl", "wb") as fileh:
|
||||||
fileh.write(pkuseg_features_b)
|
fileh.write(data["features_b"])
|
||||||
with open(tempdir / "weights.npz", "wb") as fileh:
|
with open(tempdir / "weights.npz", "wb") as fileh:
|
||||||
fileh.write(pkuseg_weights_b)
|
fileh.write(data["weights_b"])
|
||||||
try:
|
try:
|
||||||
import pkuseg
|
import pkuseg
|
||||||
except ImportError:
|
except ImportError:
|
||||||
|
@ -209,13 +208,10 @@ class ChineseTokenizer(DummyTokenizer):
|
||||||
+ _PKUSEG_INSTALL_MSG
|
+ _PKUSEG_INSTALL_MSG
|
||||||
)
|
)
|
||||||
self.pkuseg_seg = pkuseg.pkuseg(str(tempdir))
|
self.pkuseg_seg = pkuseg.pkuseg(str(tempdir))
|
||||||
if pkuseg_processors_data:
|
if data["processors_data"]:
|
||||||
(
|
(user_dict, do_process, common_words, other_words) = data[
|
||||||
user_dict,
|
"processors_data"
|
||||||
do_process,
|
]
|
||||||
common_words,
|
|
||||||
other_words,
|
|
||||||
) = pkuseg_processors_data
|
|
||||||
self.pkuseg_seg.preprocesser = pkuseg.Preprocesser(user_dict)
|
self.pkuseg_seg.preprocesser = pkuseg.Preprocesser(user_dict)
|
||||||
self.pkuseg_seg.postprocesser.do_process = do_process
|
self.pkuseg_seg.postprocesser.do_process = do_process
|
||||||
self.pkuseg_seg.postprocesser.common_words = set(common_words)
|
self.pkuseg_seg.postprocesser.common_words = set(common_words)
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
# coding: utf8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
Loading…
Reference in New Issue
Block a user