spaCy/spacy/compat.py

# coding: utf8
from __future__ import unicode_literals

import six
import ftfy
import sys
import ujson
import itertools
import locale

from thinc.neural.util import copy_array

try:
    import cPickle as pickle
except ImportError:
    import pickle

try:
    import copy_reg
except ImportError:
    import copyreg as copy_reg

try:
    from cupy.cuda.stream import Stream as CudaStream
except ImportError:
    CudaStream = None

try:
    import cupy
except ImportError:
    cupy = None

try:
    from thinc.neural.optimizers import Optimizer
except ImportError:
    from thinc.neural.optimizers import Adam as Optimizer

pickle = pickle
copy_reg = copy_reg
CudaStream = CudaStream
cupy = cupy
fix_text = ftfy.fix_text
copy_array = copy_array
izip = getattr(itertools, 'izip', zip)

is_python2 = six.PY2
is_python3 = six.PY3
is_windows = sys.platform.startswith('win')
is_linux = sys.platform.startswith('linux')
is_osx = sys.platform == 'darwin'


if is_python2:
    import imp
    bytes_ = str
    unicode_ = unicode
    basestring_ = basestring
    input_ = raw_input
    json_dumps = lambda data: ujson.dumps(data, indent=2, escape_forward_slashes=False).decode('utf8')
    path2str = lambda path: str(path).decode('utf8')

elif is_python3:
    import importlib.util
    bytes_ = bytes
    unicode_ = str
    basestring_ = str
    input_ = input
    json_dumps = lambda data: ujson.dumps(data, indent=2, escape_forward_slashes=False)
    path2str = lambda path: str(path)


def b_to_str(b_str):
    if is_python2:
        return b_str
    # important: if no encoding is set, string becomes "b'...'"
    return str(b_str, encoding='utf8')


def getattr_(obj, name, *default):
    if is_python3 and isinstance(name, bytes):
        name = name.decode('utf8')
    return getattr(obj, name, *default)


def symlink_to(orig, dest):
    if is_python2 and is_windows:
        import subprocess
        subprocess.call(['mklink', '/d', path2str(orig), path2str(dest)], shell=True)
    else:
        orig.symlink_to(dest)


def is_config(python2=None, python3=None, windows=None, linux=None, osx=None):
    return ((python2 is None or python2 == is_python2) and
            (python3 is None or python3 == is_python3) and
            (windows is None or windows == is_windows) and
            (linux is None or linux == is_linux) and
            (osx is None or osx == is_osx))


def normalize_string_keys(old):
    """Given a dictionary, make sure keys are unicode strings, not bytes."""
    new = {}
    for key, value in old.items():
        if isinstance(key, bytes_):
            new[key.decode('utf8')] = value
        else:
            new[key] = value
    return new


def import_file(name, loc):
    loc = str(loc)
    if is_python2:
        return imp.load_source(name, loc)
    else:
        spec = importlib.util.spec_from_file_location(name, str(loc))
        module = importlib.util.module_from_spec(spec)
        spec.loader.exec_module(module)
        return module


def locale_escape(string, errors='replace'):
    '''
    Mangle non-supported characters, for savages with ascii terminals.
    '''
    encoding = locale.getpreferredencoding()
    string = string.encode(encoding, errors).decode('utf8')
    return string
Add compat module for Python2/3 and platform compatibility 2017-04-15 13:07:02 +03:00			`# coding: utf8`
			`from __future__ import unicode_literals`

			`import six`
Move fix_text to spacy.compat (see #1002) 2017-04-20 16:47:17 +03:00			`import ftfy`
Add compat module for Python2/3 and platform compatibility 2017-04-15 13:07:02 +03:00			`import sys`
Tidy up and fix formatting and imports 2017-04-15 14:05:15 +03:00			`import ujson`
Update text classification model 2017-07-25 19:57:59 +03:00			`import itertools`
Add locale_escape compat function 2017-10-12 23:22:04 +03:00			`import locale`
Add compat module for Python2/3 and platform compatibility 2017-04-15 13:07:02 +03:00
Fix reference to thinc copy_array util 2017-05-31 16:25:21 +03:00			`from thinc.neural.util import copy_array`
Add compat function to normalize dict keys 2017-05-31 15:14:29 +03:00
Add compat module for Python2/3 and platform compatibility 2017-04-15 13:07:02 +03:00			`try:`
			`import cPickle as pickle`
			`except ImportError:`
			`import pickle`

			`try:`
			`import copy_reg`
			`except ImportError:`
			`import copyreg as copy_reg`

Move cupy and CudaStream to compat 2017-05-18 15:12:45 +03:00			`try:`
			`from cupy.cuda.stream import Stream as CudaStream`
			`except ImportError:`
			`CudaStream = None`

			`try:`
			`import cupy`
			`except ImportError:`
			`cupy = None`

Add compat for thinc.neural.optimizers.Optimizer 2017-10-27 13:23:49 +03:00			`try:`
Fix import of Optimizer 2017-10-27 15:33:42 +03:00			`from thinc.neural.optimizers import Optimizer`
Add compat for thinc.neural.optimizers.Optimizer 2017-10-27 13:23:49 +03:00			`except ImportError:`
Fix import of Optimizer 2017-10-27 15:33:42 +03:00			`from thinc.neural.optimizers import Adam as Optimizer`
Move cupy and CudaStream to compat 2017-05-18 15:12:45 +03:00
Reorganise and explicitly state what's importable 2017-05-18 15:12:31 +03:00			`pickle = pickle`
			`copy_reg = copy_reg`
			`CudaStream = CudaStream`
			`cupy = cupy`
			`fix_text = ftfy.fix_text`
Fix reference to thinc copy_array util 2017-05-31 16:25:21 +03:00			`copy_array = copy_array`
Update text classification model 2017-07-25 19:57:59 +03:00			`izip = getattr(itertools, 'izip', zip)`
Add compat module for Python2/3 and platform compatibility 2017-04-15 13:07:02 +03:00
			`is_python2 = six.PY2`
			`is_python3 = six.PY3`
			`is_windows = sys.platform.startswith('win')`
			`is_linux = sys.platform.startswith('linux')`
			`is_osx = sys.platform == 'darwin'`


			`if is_python2:`
Add compat function for importlib.util 2017-08-18 22:56:47 +03:00			`import imp`
Add compat module for Python2/3 and platform compatibility 2017-04-15 13:07:02 +03:00			`bytes_ = str`
			`unicode_ = unicode`
			`basestring_ = basestring`
			`input_ = raw_input`
Don't escape forward slashes on ujson.dumps 2017-08-19 23:32:16 +03:00			`json_dumps = lambda data: ujson.dumps(data, indent=2, escape_forward_slashes=False).decode('utf8')`
Add path2str compat function 2017-05-08 00:24:56 +03:00			`path2str = lambda path: str(path).decode('utf8')`
Add compat module for Python2/3 and platform compatibility 2017-04-15 13:07:02 +03:00
			`elif is_python3:`
Add compat function for importlib.util 2017-08-18 22:56:47 +03:00			`import importlib.util`
Add compat module for Python2/3 and platform compatibility 2017-04-15 13:07:02 +03:00			`bytes_ = bytes`
			`unicode_ = str`
			`basestring_ = str`
			`input_ = input`
Don't escape forward slashes on ujson.dumps 2017-08-19 23:32:16 +03:00			`json_dumps = lambda data: ujson.dumps(data, indent=2, escape_forward_slashes=False)`
Add path2str compat function 2017-05-08 00:24:56 +03:00			`path2str = lambda path: str(path)`
Add compat module for Python2/3 and platform compatibility 2017-04-15 13:07:02 +03:00
Add workaround for displaCy server on Python 2/3 (resolves #1227) Make sure status and headers are bytes on Python 2 and strings on Python 3 2017-08-01 02:11:35 +03:00
			`def b_to_str(b_str):`
			`if is_python2:`
			`return b_str`
			`# important: if no encoding is set, string becomes "b'...'"`
			`return str(b_str, encoding='utf8')`


Fix Python2/3 load bug 2017-05-31 23:21:44 +03:00			`def getattr_(obj, name, *default):`
			`if is_python3 and isinstance(name, bytes):`
			`name = name.decode('utf8')`
			`return getattr(obj, name, *default)`

Add compat module for Python2/3 and platform compatibility 2017-04-15 13:07:02 +03:00
			`def symlink_to(orig, dest):`
Fix symlink function to check for Windows 2017-04-15 13:17:27 +03:00			`if is_python2 and is_windows:`
Add compat module for Python2/3 and platform compatibility 2017-04-15 13:07:02 +03:00			`import subprocess`
Add path2str compat function 2017-05-08 00:24:56 +03:00			`subprocess.call(['mklink', '/d', path2str(orig), path2str(dest)], shell=True)`
Fix symlink function to check for Windows 2017-04-15 13:17:27 +03:00			`else:`
			`orig.symlink_to(dest)`
Add compat module for Python2/3 and platform compatibility 2017-04-15 13:07:02 +03:00

			`def is_config(python2=None, python3=None, windows=None, linux=None, osx=None):`
Tidy up util and helpers 2017-10-27 15:39:09 +03:00			`return ((python2 is None or python2 == is_python2) and`
			`(python3 is None or python3 == is_python3) and`
			`(windows is None or windows == is_windows) and`
			`(linux is None or linux == is_linux) and`
			`(osx is None or osx == is_osx))`
Add compat function to normalize dict keys 2017-05-31 15:14:29 +03:00

			`def normalize_string_keys(old):`
Tidy up util and helpers 2017-10-27 15:39:09 +03:00			`"""Given a dictionary, make sure keys are unicode strings, not bytes."""`
Add compat function to normalize dict keys 2017-05-31 15:14:29 +03:00			`new = {}`
Fix normalize_string_keys function' 2017-05-31 22:08:16 +03:00			`for key, value in old.items():`
Add compat function to normalize dict keys 2017-05-31 15:14:29 +03:00			`if isinstance(key, bytes_):`
			`new[key.decode('utf8')] = value`
			`else:`
			`new[key] = value`
			`return new`


Add compat function for importlib.util 2017-08-18 22:56:47 +03:00			`def import_file(name, loc):`
			`loc = str(loc)`
			`if is_python2:`
			`return imp.load_source(name, loc)`
			`else:`
Fix typo 2017-08-19 23:32:07 +03:00			`spec = importlib.util.spec_from_file_location(name, str(loc))`
Add compat function for importlib.util 2017-08-18 22:56:47 +03:00			`module = importlib.util.module_from_spec(spec)`
			`spec.loader.exec_module(module)`
			`return module`
Add locale_escape compat function 2017-10-12 23:22:04 +03:00

			`def locale_escape(string, errors='replace'):`
			`'''`
			`Mangle non-supported characters, for savages with ascii terminals.`
			`'''`
			`encoding = locale.getpreferredencoding()`
			`string = string.encode(encoding, errors).decode('utf8')`
			`return string`