From 6e6f995a3cbaeea00ebf453c5cf99ad55bcfdc62 Mon Sep 17 00:00:00 2001 From: ines Date: Sun, 20 May 2018 15:48:59 +0200 Subject: [PATCH] Go back to using requests instead of urllib (closes #2320) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fewer dependencies are good, but this one was simply causing too many other problems around SSL verification and Python 2/3 compatibility. requests is a popular enough package that it's okay for spaCy to depend on it – and this will hopefully make model downloads less flakey. --- requirements.txt | 1 + setup.py | 3 ++- spacy/cli/_messages.py | 2 +- spacy/cli/download.py | 50 ++++++++++++------------------------------ spacy/cli/validate.py | 14 ++++++------ spacy/compat.py | 21 ------------------ 6 files changed, 25 insertions(+), 66 deletions(-) diff --git a/requirements.txt b/requirements.txt index 4fda2da37..b2e245e36 100644 --- a/requirements.txt +++ b/requirements.txt @@ -9,5 +9,6 @@ plac<1.0.0,>=0.9.6 ujson>=1.35 dill>=0.2,<0.3 regex==2017.4.5 +requests>=2.13.0,<3.0.0 pytest>=3.0.6,<4.0.0 mock>=2.0.0,<3.0.0 diff --git a/setup.py b/setup.py index c295e05cf..f52f1ad0f 100755 --- a/setup.py +++ b/setup.py @@ -195,7 +195,8 @@ def setup_package(): 'pathlib', 'ujson>=1.35', 'dill>=0.2,<0.3', - 'regex==2017.4.5'], + 'regex==2017.4.5', + 'requests>=2.13.0,<3.0.0'], classifiers=[ 'Development Status :: 5 - Production/Stable', 'Environment :: Console', diff --git a/spacy/cli/_messages.py b/spacy/cli/_messages.py index c3c9e496f..88dcb1b35 100644 --- a/spacy/cli/_messages.py +++ b/spacy/cli/_messages.py @@ -7,7 +7,7 @@ class Messages(object): M002 = ("Creating a shortcut link for 'en' didn't work (maybe you " "don't have admin permissions?), but you can still load the " "model via its full package name: nlp = spacy.load('{name}')") - M003 = ("Server error ({code}: {desc})") + M003 = ("Server error ({code})") M004 = ("Couldn't fetch {desc}. Please find a model for your spaCy " "installation (v{version}), and download it manually. For more " "details, see the documentation: https://spacy.io/usage/models") diff --git a/spacy/cli/download.py b/spacy/cli/download.py index 558f84c7e..ec39cd014 100644 --- a/spacy/cli/download.py +++ b/spacy/cli/download.py @@ -2,54 +2,33 @@ from __future__ import unicode_literals import plac +import requests import os import subprocess import sys -import ujson -from .link import link from ._messages import Messages +from .link import link from ..util import prints, get_package_path -from ..compat import url_read, HTTPError from .. import about @plac.annotations( model=("model to download, shortcut or name)", "positional", None, str), direct=("force direct download. Needs model name with version and won't " - "perform compatibility check", "flag", "d", bool), - insecure=("insecure mode - disables the verification of certificates", - "flag", "i", bool), - ca_file=("specify a certificate authority file to use for certificates " - "validation. Ignored if --insecure is used", "option", "c")) -def download(model, direct=False, insecure=False, ca_file=None): + "perform compatibility check", "flag", "d", bool)) +def download(model, direct=False): """ Download compatible model from default download path using pip. Model can be shortcut, model name or, if --direct flag is set, full model name with version. - The --insecure optional flag can be used to disable ssl verification - The --ca-file option can be used to provide a local CA file - used for certificate verification. """ - - # ssl_verify is the argument handled to the 'verify' parameter - # of requests package. It must be either None, a boolean, - # or a string containing the path to CA file - ssl_verify = None - if insecure: - ca_file = None - ssl_verify = False - else: - if ca_file is not None: - ssl_verify = ca_file - - # Download the model if direct: dl = download_model('{m}/{m}.tar.gz'.format(m=model)) else: - shortcuts = get_json(about.__shortcuts__, "available shortcuts", ssl_verify) + shortcuts = get_json(about.__shortcuts__, "available shortcuts") model_name = shortcuts.get(model, model) - compatibility = get_compatibility(ssl_verify) + compatibility = get_compatibility() version = get_version(model_name, compatibility) dl = download_model('{m}-{v}/{m}-{v}.tar.gz'.format(m=model_name, v=version)) @@ -69,19 +48,18 @@ def download(model, direct=False, insecure=False, ca_file=None): prints(Messages.M001.format(name=model_name), title=Messages.M002) -def get_json(url, desc, ssl_verify): - try: - data = url_read(url, verify=ssl_verify) - except HTTPError as e: - prints(Messages.M004.format(desc, about.__version__), - title=Messages.M003.format(e.code, e.reason), exits=1) - return ujson.loads(data) +def get_json(url, desc): + r = requests.get(url) + if r.status_code != 200: + prints(Messages.M004.format(desc=desc, version=about.__version__), + title=Messages.M003.format(code=r.status_code), exits=1) + return r.json() -def get_compatibility(ssl_verify): +def get_compatibility(): version = about.__version__ version = version.rsplit('.dev', 1)[0] - comp_table = get_json(about.__compatibility__, "compatibility table", ssl_verify) + comp_table = get_json(about.__compatibility__, "compatibility table") comp = comp_table['spacy'] if version not in comp: prints(Messages.M006.format(version=version), title=Messages.M005, diff --git a/spacy/cli/validate.py b/spacy/cli/validate.py index 2ef78beb5..6b0765c3e 100644 --- a/spacy/cli/validate.py +++ b/spacy/cli/validate.py @@ -5,9 +5,10 @@ import pkg_resources from pathlib import Path import sys import ujson +import requests from ._messages import Messages -from ..compat import path2str, locale_escape, url_read, HTTPError +from ..compat import path2str, locale_escape from ..util import prints, get_data_path, read_json from .. import about @@ -16,12 +17,11 @@ def validate(): """Validate that the currently installed version of spaCy is compatible with the installed models. Should be run after `pip install -U spacy`. """ - try: - data = url_read(about.__compatibility__) - except HTTPError as e: - title = Messages.M003.format(code=e.code, desc=e.reason) - prints(Messages.M021, title=title, exits=1) - compat = ujson.loads(data)['spacy'] + r = requests.get(about.__compatibility__) + if r.status_code != 200: + prints(Messages.M021, title=Messages.M003.format(code=r.status_code), + exits=1) + compat = r.json()['spacy'] current_compat = compat.get(about.__version__) if not current_compat: prints(about.__compatibility__, exits=1, diff --git a/spacy/compat.py b/spacy/compat.py index c5ddae0ce..dc0883542 100644 --- a/spacy/compat.py +++ b/spacy/compat.py @@ -33,16 +33,6 @@ try: except ImportError: from thinc.neural.optimizers import Adam as Optimizer -try: - import urllib.request -except ImportError: - import urllib2 as urllib - -try: - from urllib.error import HTTPError -except ImportError: - from urllib2 import HTTPError - pickle = pickle copy_reg = copy_reg CudaStream = CudaStream @@ -66,7 +56,6 @@ if is_python2: input_ = raw_input # noqa: F821 json_dumps = lambda data: ujson.dumps(data, indent=2, escape_forward_slashes=False).decode('utf8') path2str = lambda path: str(path).decode('utf8') - url_open = urllib.urlopen elif is_python3: bytes_ = bytes @@ -75,16 +64,6 @@ elif is_python3: input_ = input json_dumps = lambda data: ujson.dumps(data, indent=2, escape_forward_slashes=False) path2str = lambda path: str(path) - url_open = urllib.request.urlopen - - -def url_read(url): - file_ = url_open(url) - code = file_.getcode() - if code != 200: - raise HTTPError(url, code, "Cannot GET url", [], file_) - data = file_.read() - return data def b_to_str(b_str):