spaCy/spacy/compat.py

97 lines
2.2 KiB
Python
Raw Normal View History

# coding: utf8
from __future__ import unicode_literals
import six
import ftfy
import sys
2017-04-15 14:05:15 +03:00
import ujson
2017-07-25 19:57:59 +03:00
import itertools
2017-05-31 16:25:21 +03:00
from thinc.neural.util import copy_array
try:
import cPickle as pickle
except ImportError:
import pickle
try:
import copy_reg
except ImportError:
import copyreg as copy_reg
2017-05-18 15:12:45 +03:00
try:
from cupy.cuda.stream import Stream as CudaStream
except ImportError:
CudaStream = None
try:
import cupy
except ImportError:
cupy = None
pickle = pickle
copy_reg = copy_reg
CudaStream = CudaStream
cupy = cupy
fix_text = ftfy.fix_text
2017-05-31 16:25:21 +03:00
copy_array = copy_array
2017-07-25 19:57:59 +03:00
izip = getattr(itertools, 'izip', zip)
is_python2 = six.PY2
is_python3 = six.PY3
is_windows = sys.platform.startswith('win')
is_linux = sys.platform.startswith('linux')
is_osx = sys.platform == 'darwin'
if is_python2:
bytes_ = str
unicode_ = unicode
basestring_ = basestring
input_ = raw_input
2017-04-15 14:05:15 +03:00
json_dumps = lambda data: ujson.dumps(data, indent=2).decode('utf8')
2017-05-08 00:24:56 +03:00
path2str = lambda path: str(path).decode('utf8')
elif is_python3:
bytes_ = bytes
unicode_ = str
basestring_ = str
input_ = input
2017-04-15 14:05:15 +03:00
json_dumps = lambda data: ujson.dumps(data, indent=2)
2017-05-08 00:24:56 +03:00
path2str = lambda path: str(path)
2017-05-31 23:21:44 +03:00
def getattr_(obj, name, *default):
if is_python3 and isinstance(name, bytes):
name = name.decode('utf8')
return getattr(obj, name, *default)
def symlink_to(orig, dest):
if is_python2 and is_windows:
import subprocess
2017-05-08 00:24:56 +03:00
subprocess.call(['mklink', '/d', path2str(orig), path2str(dest)], shell=True)
else:
orig.symlink_to(dest)
def is_config(python2=None, python3=None, windows=None, linux=None, osx=None):
return ((python2 == None or python2 == is_python2) and
(python3 == None or python3 == is_python3) and
(windows == None or windows == is_windows) and
(linux == None or linux == is_linux) and
(osx == None or osx == is_osx))
def normalize_string_keys(old):
'''Given a dictionary, make sure keys are unicode strings, not bytes.'''
new = {}
2017-05-31 22:08:16 +03:00
for key, value in old.items():
if isinstance(key, bytes_):
new[key.decode('utf8')] = value
else:
new[key] = value
return new