From da1f20036209af7950588ccb0877299ccf946f0e Mon Sep 17 00:00:00 2001 From: ines Date: Wed, 28 Mar 2018 12:45:53 +0200 Subject: [PATCH 1/7] Add compat helpers for urllib --- spacy/compat.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/spacy/compat.py b/spacy/compat.py index dc0883542..5359edefc 100644 --- a/spacy/compat.py +++ b/spacy/compat.py @@ -33,11 +33,23 @@ try: except ImportError: from thinc.neural.optimizers import Adam as Optimizer +try: + import urllib +except ImportError: + import urllib2 as urllib + +try: + from urllib.error import HTTPError as url_error +except ImportError: + from urllib2 import HTTPError as url_error + pickle = pickle copy_reg = copy_reg CudaStream = CudaStream cupy = cupy copy_array = copy_array +urllib = urllib +url_error = url_error izip = getattr(itertools, 'izip', zip) is_windows = sys.platform.startswith('win') @@ -56,6 +68,7 @@ if is_python2: input_ = raw_input # noqa: F821 json_dumps = lambda data: ujson.dumps(data, indent=2, escape_forward_slashes=False).decode('utf8') path2str = lambda path: str(path).decode('utf8') + url_open = lambda url: urllib.urlopen(url) elif is_python3: bytes_ = bytes @@ -64,6 +77,7 @@ elif is_python3: input_ = input json_dumps = lambda data: ujson.dumps(data, indent=2, escape_forward_slashes=False) path2str = lambda path: str(path) + url_open = lambda url: urllib.request.urlopen(url) def b_to_str(b_str): From 7fbc9e587439bc63feb30e2ff1852fbed7f89d83 Mon Sep 17 00:00:00 2001 From: ines Date: Wed, 28 Mar 2018 12:46:07 +0200 Subject: [PATCH 2/7] Replace requests with urllib --- spacy/cli/download.py | 12 +++++++----- spacy/cli/validate.py | 13 +++++++------ 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/spacy/cli/download.py b/spacy/cli/download.py index 656635f79..977545dfe 100644 --- a/spacy/cli/download.py +++ b/spacy/cli/download.py @@ -2,13 +2,14 @@ from __future__ import unicode_literals import plac -import requests import os import subprocess import sys +import ujson from .link import link from ..util import prints, get_package_path +from ..compat import url_open, url_error from .. import about @@ -56,13 +57,14 @@ def download(model, direct=False): def get_json(url, desc): - r = requests.get(url) - if r.status_code != 200: + try: + r = url_open(url) + except url_error as e: msg = ("Couldn't fetch %s. Please find a model for your spaCy " "installation (v%s), and download it manually.") prints(msg % (desc, about.__version__), about.__docs_models__, - title="Server error (%d)" % r.status_code, exits=1) - return r.json() + title="Server error (%d: %s)" % (e.code, e.reason), exits=1) + return ujson.load(r) def get_compatibility(): diff --git a/spacy/cli/validate.py b/spacy/cli/validate.py index b83753509..2b7388988 100644 --- a/spacy/cli/validate.py +++ b/spacy/cli/validate.py @@ -1,12 +1,12 @@ # coding: utf8 from __future__ import unicode_literals, print_function -import requests import pkg_resources from pathlib import Path import sys +import ujson -from ..compat import path2str, locale_escape +from ..compat import path2str, locale_escape, url_open, url_error from ..util import prints, get_data_path, read_json from .. import about @@ -15,11 +15,12 @@ def validate(): """Validate that the currently installed version of spaCy is compatible with the installed models. Should be run after `pip install -U spacy`. """ - r = requests.get(about.__compatibility__) - if r.status_code != 200: + try: + r = url_open(about.__compatibility__) + except url_error as e: prints("Couldn't fetch compatibility table.", - title="Server error (%d)" % r.status_code, exits=1) - compat = r.json()['spacy'] + title="Server error (%d: %s)" % (e.code, e.reason), exits=1) + compat = ujson.load(r)['spacy'] current_compat = compat.get(about.__version__) if not current_compat: prints(about.__compatibility__, exits=1, From 366c98a94be75865586825ea150f3ea32e8665ed Mon Sep 17 00:00:00 2001 From: ines Date: Wed, 28 Mar 2018 12:46:18 +0200 Subject: [PATCH 3/7] Remove requests dependency --- requirements.txt | 1 - setup.py | 1 - 2 files changed, 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index 66b469798..dfc08447b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,7 +8,6 @@ murmurhash>=0.28,<0.29 plac<1.0.0,>=0.9.6 ujson>=1.35 dill>=0.2,<0.3 -requests>=2.13.0,<3.0.0 regex==2017.4.5 pytest>=3.0.6,<4.0.0 mock>=2.0.0,<3.0.0 diff --git a/setup.py b/setup.py index 4bb2c7954..a1d32584f 100755 --- a/setup.py +++ b/setup.py @@ -194,7 +194,6 @@ def setup_package(): 'pathlib', 'ujson>=1.35', 'dill>=0.2,<0.3', - 'requests>=2.13.0,<3.0.0', 'regex==2017.4.5', 'msgpack-python==0.5.4', 'msgpack-numpy==0.4.1'], From 07b8c255a5106ecf9a567677034fd67dd4b4f3a6 Mon Sep 17 00:00:00 2001 From: ines Date: Wed, 28 Mar 2018 12:46:27 +0200 Subject: [PATCH 4/7] Updatee example with note to install requests --- examples/pipeline/custom_component_countries_api.py | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/pipeline/custom_component_countries_api.py b/examples/pipeline/custom_component_countries_api.py index da88bbcad..1a6dc3e42 100644 --- a/examples/pipeline/custom_component_countries_api.py +++ b/examples/pipeline/custom_component_countries_api.py @@ -9,6 +9,7 @@ coordinates. Can be extended with more details from the API. * Custom pipeline components: https://spacy.io//usage/processing-pipelines#custom-components Compatible with: spaCy v2.0.0+ +Prerequisites: pip install requests """ from __future__ import unicode_literals, print_function From b5098079d84a2472d4d8552d8c7867c39b46da88 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Thu, 29 Mar 2018 00:08:16 +0200 Subject: [PATCH 5/7] Fix error on urllib --- spacy/cli/download.py | 8 ++++---- spacy/cli/validate.py | 8 ++++---- spacy/compat.py | 18 +++++++++++++----- 3 files changed, 21 insertions(+), 13 deletions(-) diff --git a/spacy/cli/download.py b/spacy/cli/download.py index 977545dfe..71c1a65dd 100644 --- a/spacy/cli/download.py +++ b/spacy/cli/download.py @@ -9,7 +9,7 @@ import ujson from .link import link from ..util import prints, get_package_path -from ..compat import url_open, url_error +from ..compat import url_read, HTTPError from .. import about @@ -58,13 +58,13 @@ def download(model, direct=False): def get_json(url, desc): try: - r = url_open(url) - except url_error as e: + data = url_read(url) + except HTTPError as e: msg = ("Couldn't fetch %s. Please find a model for your spaCy " "installation (v%s), and download it manually.") prints(msg % (desc, about.__version__), about.__docs_models__, title="Server error (%d: %s)" % (e.code, e.reason), exits=1) - return ujson.load(r) + return ujson.loads(data) def get_compatibility(): diff --git a/spacy/cli/validate.py b/spacy/cli/validate.py index 2b7388988..c140f6bff 100644 --- a/spacy/cli/validate.py +++ b/spacy/cli/validate.py @@ -6,7 +6,7 @@ from pathlib import Path import sys import ujson -from ..compat import path2str, locale_escape, url_open, url_error +from ..compat import path2str, locale_escape, url_read, HTTPError from ..util import prints, get_data_path, read_json from .. import about @@ -16,11 +16,11 @@ def validate(): with the installed models. Should be run after `pip install -U spacy`. """ try: - r = url_open(about.__compatibility__) - except url_error as e: + data = url_read(about.__compatibility__) + except HTTPError as e: prints("Couldn't fetch compatibility table.", title="Server error (%d: %s)" % (e.code, e.reason), exits=1) - compat = ujson.load(r)['spacy'] + compat = ujson.loads(data)['spacy'] current_compat = compat.get(about.__version__) if not current_compat: prints(about.__compatibility__, exits=1, diff --git a/spacy/compat.py b/spacy/compat.py index 5359edefc..d6260a850 100644 --- a/spacy/compat.py +++ b/spacy/compat.py @@ -39,9 +39,9 @@ except ImportError: import urllib2 as urllib try: - from urllib.error import HTTPError as url_error + from urllib.error import HTTPError except ImportError: - from urllib2 import HTTPError as url_error + from urllib2 import HTTPError pickle = pickle copy_reg = copy_reg @@ -49,7 +49,6 @@ CudaStream = CudaStream cupy = cupy copy_array = copy_array urllib = urllib -url_error = url_error izip = getattr(itertools, 'izip', zip) is_windows = sys.platform.startswith('win') @@ -68,7 +67,7 @@ if is_python2: input_ = raw_input # noqa: F821 json_dumps = lambda data: ujson.dumps(data, indent=2, escape_forward_slashes=False).decode('utf8') path2str = lambda path: str(path).decode('utf8') - url_open = lambda url: urllib.urlopen(url) + url_open = urllib.urlopen elif is_python3: bytes_ = bytes @@ -77,7 +76,16 @@ elif is_python3: input_ = input json_dumps = lambda data: ujson.dumps(data, indent=2, escape_forward_slashes=False) path2str = lambda path: str(path) - url_open = lambda url: urllib.request.urlopen(url) + url_open = urllib.request.urlopen + + +def url_read(url): + file_ = url_open(url) + code = file_.getcode() + if code != 200: + raise HTTPError(url, code, "Cannot GET url", [], file_) + data = file_.read() + return data def b_to_str(b_str): From 8308bbc61759c23566bf727bc592aca1dee80b45 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Thu, 29 Mar 2018 00:14:55 +0200 Subject: [PATCH 6/7] Get msgpack and msgpack_numpy via Thinc, to avoid potential version conflicts --- requirements.txt | 2 -- setup.py | 4 +--- spacy/pipeline.pyx | 4 +++- spacy/util.py | 7 ++++--- spacy/vectors.pyx | 7 ++++--- 5 files changed, 12 insertions(+), 12 deletions(-) diff --git a/requirements.txt b/requirements.txt index dfc08447b..4fda2da37 100644 --- a/requirements.txt +++ b/requirements.txt @@ -11,5 +11,3 @@ dill>=0.2,<0.3 regex==2017.4.5 pytest>=3.0.6,<4.0.0 mock>=2.0.0,<3.0.0 -msgpack-python==0.5.4 -msgpack-numpy==0.4.1 diff --git a/setup.py b/setup.py index a1d32584f..b453ec1f4 100755 --- a/setup.py +++ b/setup.py @@ -194,9 +194,7 @@ def setup_package(): 'pathlib', 'ujson>=1.35', 'dill>=0.2,<0.3', - 'regex==2017.4.5', - 'msgpack-python==0.5.4', - 'msgpack-numpy==0.4.1'], + 'regex==2017.4.5'], classifiers=[ 'Development Status :: 5 - Production/Stable', 'Environment :: Console', diff --git a/spacy/pipeline.pyx b/spacy/pipeline.pyx index 743f6ac85..6d14c4ca3 100644 --- a/spacy/pipeline.pyx +++ b/spacy/pipeline.pyx @@ -8,7 +8,9 @@ cimport numpy as np import cytoolz from collections import OrderedDict import ujson -import msgpack + +from .util import msgpack +from .util import msgpack_numpy from thinc.api import chain from thinc.v2v import Affine, SELU, Softmax diff --git a/spacy/util.py b/spacy/util.py index dc51e467d..04cc3c7aa 100644 --- a/spacy/util.py +++ b/spacy/util.py @@ -23,9 +23,10 @@ from .symbols import ORTH from .compat import cupy, CudaStream, path2str, basestring_, input_, unicode_ from .compat import import_file -import msgpack -import msgpack_numpy -msgpack_numpy.patch() +# Import these directly from Thinc, so that we're sure we always have the +# same version. +from thinc.neural._classes.model import msgpack +from thinc.neural._classes.model import msgpack_numpy LANGUAGES = {} diff --git a/spacy/vectors.pyx b/spacy/vectors.pyx index 1b265e189..30fce439c 100644 --- a/spacy/vectors.pyx +++ b/spacy/vectors.pyx @@ -3,9 +3,10 @@ from __future__ import unicode_literals import numpy from collections import OrderedDict -import msgpack -import msgpack_numpy -msgpack_numpy.patch() + +from .util import msgpack +from .util import msgpack_numpy + cimport numpy as np from thinc.neural.util import get_array_module from thinc.neural._classes.model import Model From 83146458a27bda596283bf80ecf5af4b8e621951 Mon Sep 17 00:00:00 2001 From: ines Date: Thu, 29 Mar 2018 00:19:33 +0200 Subject: [PATCH 7/7] Fix urllib for Python 3 --- spacy/compat.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/spacy/compat.py b/spacy/compat.py index d6260a850..c5ddae0ce 100644 --- a/spacy/compat.py +++ b/spacy/compat.py @@ -34,7 +34,7 @@ except ImportError: from thinc.neural.optimizers import Adam as Optimizer try: - import urllib + import urllib.request except ImportError: import urllib2 as urllib @@ -48,7 +48,6 @@ copy_reg = copy_reg CudaStream = CudaStream cupy = cupy copy_array = copy_array -urllib = urllib izip = getattr(itertools, 'izip', zip) is_windows = sys.platform.startswith('win') @@ -83,7 +82,7 @@ def url_read(url): file_ = url_open(url) code = file_.getcode() if code != 200: - raise HTTPError(url, code, "Cannot GET url", [], file_) + raise HTTPError(url, code, "Cannot GET url", [], file_) data = file_.read() return data