spaCy/spacy/compat.py
Ines Montani eddeb36c96
💫 Tidy up and auto-format .py files (#2983)
<!--- Provide a general summary of your changes in the title. -->

## Description
- [x] Use [`black`](https://github.com/ambv/black) to auto-format all `.py` files.
- [x] Update flake8 config to exclude very large files (lemmatization tables etc.)
- [x] Update code to be compatible with flake8 rules
- [x] Fix various small bugs, inconsistencies and messy stuff in the language data
- [x] Update docs to explain new code style (`black`, `flake8`, when to use `# fmt: off` and `# fmt: on` and what `# noqa` means)

Once #2932 is merged, which auto-formats and tidies up the CLI, we'll be able to run `flake8 spacy` actually get meaningful results.

At the moment, the code style and linting isn't applied automatically, but I'm hoping that the new [GitHub Actions](https://github.com/features/actions) will let us auto-format pull requests and post comments with relevant linting information.

### Types of change
enhancement, code style

## Checklist
<!--- Before you submit the PR, go over this checklist and make sure you can
tick off all the boxes. [] -> [x] -->
- [x] I have submitted the spaCy Contributor Agreement.
- [x] I ran the tests, and all new and existing tests passed.
- [x] My changes don't require a change to the documentation, or if they do, I've added all required information.
2018-11-30 17:03:03 +01:00

148 lines
3.6 KiB
Python

# coding: utf8
from __future__ import unicode_literals
import os
import sys
import ujson
import itertools
import locale
from thinc.neural.util import copy_array
try:
import cPickle as pickle
except ImportError:
import pickle
try:
import copy_reg
except ImportError:
import copyreg as copy_reg
try:
from cupy.cuda.stream import Stream as CudaStream
except ImportError:
CudaStream = None
try:
import cupy
except ImportError:
cupy = None
try:
from thinc.neural.optimizers import Optimizer # noqa: F401
except ImportError:
from thinc.neural.optimizers import Adam as Optimizer # noqa: F401
pickle = pickle
copy_reg = copy_reg
CudaStream = CudaStream
cupy = cupy
copy_array = copy_array
izip = getattr(itertools, "izip", zip)
is_windows = sys.platform.startswith("win")
is_linux = sys.platform.startswith("linux")
is_osx = sys.platform == "darwin"
# See: https://github.com/benjaminp/six/blob/master/six.py
is_python2 = sys.version_info[0] == 2
is_python3 = sys.version_info[0] == 3
is_python_pre_3_5 = is_python2 or (is_python3 and sys.version_info[1] < 5)
if is_python2:
bytes_ = str
unicode_ = unicode # noqa: F821
basestring_ = basestring # noqa: F821
input_ = raw_input # noqa: F821
json_dumps = lambda data, indent=2: ujson.dumps(
data, indent=indent, escape_forward_slashes=False
).decode("utf8")
path2str = lambda path: str(path).decode("utf8")
elif is_python3:
bytes_ = bytes
unicode_ = str
basestring_ = str
input_ = input
json_dumps = lambda data, indent=2: ujson.dumps(
data, indent=indent, escape_forward_slashes=False
)
path2str = lambda path: str(path)
def b_to_str(b_str):
if is_python2:
return b_str
# important: if no encoding is set, string becomes "b'...'"
return str(b_str, encoding="utf8")
def getattr_(obj, name, *default):
if is_python3 and isinstance(name, bytes):
name = name.decode("utf8")
return getattr(obj, name, *default)
def symlink_to(orig, dest):
if is_windows:
import subprocess
subprocess.call(["mklink", "/d", path2str(orig), path2str(dest)], shell=True)
else:
orig.symlink_to(dest)
def symlink_remove(link):
# https://stackoverflow.com/q/26554135/6400719
if os.path.isdir(path2str(link)) and is_windows:
# this should only be on Py2.7 and windows
os.rmdir(path2str(link))
else:
os.unlink(path2str(link))
def is_config(python2=None, python3=None, windows=None, linux=None, osx=None):
return (
python2 in (None, is_python2)
and python3 in (None, is_python3)
and windows in (None, is_windows)
and linux in (None, is_linux)
and osx in (None, is_osx)
)
def normalize_string_keys(old):
"""Given a dictionary, make sure keys are unicode strings, not bytes."""
new = {}
for key, value in old.items():
if isinstance(key, bytes_):
new[key.decode("utf8")] = value
else:
new[key] = value
return new
def import_file(name, loc):
loc = str(loc)
if is_python_pre_3_5:
import imp
return imp.load_source(name, loc)
else:
import importlib.util
spec = importlib.util.spec_from_file_location(name, str(loc))
module = importlib.util.module_from_spec(spec)
spec.loader.exec_module(module)
return module
def locale_escape(string, errors="replace"):
"""
Mangle non-supported characters, for savages with ascii terminals.
"""
encoding = locale.getpreferredencoding()
string = string.encode(encoding, errors).decode("utf8")
return string