From abd30d82ad5d6019399ac7f57d735c20048f7e3a Mon Sep 17 00:00:00 2001 From: K0lb3 Date: Wed, 9 Oct 2019 23:36:46 +0200 Subject: [PATCH] web crawler protection bypass The ijg server seems to have a web crawler protection that blocks all requests without the User-Agent header. This pull request fixes this problem. ``` PS D:\Projects\python_c\Pillow-master\winbuild> python build_dep.py Fetching http://www.ijg.org/files/jpegsr9c.zip Traceback (most recent call last): File "D:\Projects\python_c\Pillow-master\winbuild\fetch.py", line 13, in fetch r = urllib.request.urlopen(url) File "C:\Users\W0lf\AppData\Local\Programs\Python\Python37\lib\urllib\request.py", line 222, in urlopen return opener.open(url, data, timeout) File "C:\Users\W0lf\AppData\Local\Programs\Python\Python37\lib\urllib\request.py", line 531, in open response = meth(req, response) File "C:\Users\W0lf\AppData\Local\Programs\Python\Python37\lib\urllib\request.py", line 641, in http_response 'http', request, response, code, msg, hdrs) File "C:\Users\W0lf\AppData\Local\Programs\Python\Python37\lib\urllib\request.py", line 569, in error return self._call_chain(*args) File "C:\Users\W0lf\AppData\Local\Programs\Python\Python37\lib\urllib\request.py", line 503, in _call_chain result = func(*args) File "C:\Users\W0lf\AppData\Local\Programs\Python\Python37\lib\urllib\request.py", line 649, in http_error_default raise HTTPError(req.full_url, code, msg, hdrs, fp) urllib.error.HTTPError: HTTP Error 403: Forbidden During handling of the above exception, another exception occurred: Traceback (most recent call last): File "build_dep.py", line 343, in extract_libs() File "build_dep.py", line 50, in extract_libs filename = fetch(lib["url"]) File "D:\Projects\python_c\Pillow-master\winbuild\fetch.py", line 15, in fetch r = urllib.request.urlopen(url) File "C:\Users\W0lf\AppData\Local\Programs\Python\Python37\lib\urllib\request.py", line 222, in urlopen return opener.open(url, data, timeout) File "C:\Users\W0lf\AppData\Local\Programs\Python\Python37\lib\urllib\request.py", line 531, in open response = meth(req, response) File "C:\Users\W0lf\AppData\Local\Programs\Python\Python37\lib\urllib\request.py", line 641, in http_response 'http', request, response, code, msg, hdrs) File "C:\Users\W0lf\AppData\Local\Programs\Python\Python37\lib\urllib\request.py", line 569, in error return self._call_chain(*args) File "C:\Users\W0lf\AppData\Local\Programs\Python\Python37\lib\urllib\request.py", line 503, in _call_chain result = func(*args) File "C:\Users\W0lf\AppData\Local\Programs\Python\Python37\lib\urllib\request.py", line 649, in http_error_default raise HTTPError(req.full_url, code, msg, hdrs, fp) urllib.error.HTTPError: HTTP Error 403: Forbidden ``` --- winbuild/fetch.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/winbuild/fetch.py b/winbuild/fetch.py index 804e4ef0c..5becfbd45 100644 --- a/winbuild/fetch.py +++ b/winbuild/fetch.py @@ -13,6 +13,8 @@ def fetch(url): r = urllib.request.urlopen(url) except urllib.error.URLError: r = urllib.request.urlopen(url) + except urllib.error.HTTPError: + r = urllib.request.urlopen(urllib.request.Request(url, None, {'User-Agent': ''})) content = r.read() with open(name, "wb") as fd: fd.write(content)