web crawler protection bypass

The ijg server seems to have a web crawler protection that blocks all requests without the User-Agent header.
This pull request fixes this problem.
```
PS D:\Projects\python_c\Pillow-master\winbuild> python build_dep.py
Fetching http://www.ijg.org/files/jpegsr9c.zip
Traceback (most recent call last):
  File "D:\Projects\python_c\Pillow-master\winbuild\fetch.py", line 13, in fetch
    r = urllib.request.urlopen(url)
  File "C:\Users\W0lf\AppData\Local\Programs\Python\Python37\lib\urllib\request.py", line 222, in urlopen
    return opener.open(url, data, timeout)
  File "C:\Users\W0lf\AppData\Local\Programs\Python\Python37\lib\urllib\request.py", line 531, in open
    response = meth(req, response)
  File "C:\Users\W0lf\AppData\Local\Programs\Python\Python37\lib\urllib\request.py", line 641, in http_response
    'http', request, response, code, msg, hdrs)
  File "C:\Users\W0lf\AppData\Local\Programs\Python\Python37\lib\urllib\request.py", line 569, in error
    return self._call_chain(*args)
  File "C:\Users\W0lf\AppData\Local\Programs\Python\Python37\lib\urllib\request.py", line 503, in _call_chain
    result = func(*args)
  File "C:\Users\W0lf\AppData\Local\Programs\Python\Python37\lib\urllib\request.py", line 649, in http_error_default
    raise HTTPError(req.full_url, code, msg, hdrs, fp)
urllib.error.HTTPError: HTTP Error 403: Forbidden

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "build_dep.py", line 343, in <module>
    extract_libs()
  File "build_dep.py", line 50, in extract_libs
    filename = fetch(lib["url"])
  File "D:\Projects\python_c\Pillow-master\winbuild\fetch.py", line 15, in fetch
    r = urllib.request.urlopen(url)
  File "C:\Users\W0lf\AppData\Local\Programs\Python\Python37\lib\urllib\request.py", line 222, in urlopen
    return opener.open(url, data, timeout)
  File "C:\Users\W0lf\AppData\Local\Programs\Python\Python37\lib\urllib\request.py", line 531, in open
    response = meth(req, response)
  File "C:\Users\W0lf\AppData\Local\Programs\Python\Python37\lib\urllib\request.py", line 641, in http_response
    'http', request, response, code, msg, hdrs)
  File "C:\Users\W0lf\AppData\Local\Programs\Python\Python37\lib\urllib\request.py", line 569, in error
    return self._call_chain(*args)
  File "C:\Users\W0lf\AppData\Local\Programs\Python\Python37\lib\urllib\request.py", line 503, in _call_chain
    result = func(*args)
  File "C:\Users\W0lf\AppData\Local\Programs\Python\Python37\lib\urllib\request.py", line 649, in http_error_default
    raise HTTPError(req.full_url, code, msg, hdrs, fp)
urllib.error.HTTPError: HTTP Error 403: Forbidden
```
This commit is contained in:
K0lb3 2019-10-09 23:36:46 +02:00 committed by GitHub
parent 8c94f01842
commit abd30d82ad
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -13,6 +13,8 @@ def fetch(url):
r = urllib.request.urlopen(url)
except urllib.error.URLError:
r = urllib.request.urlopen(url)
except urllib.error.HTTPError:
r = urllib.request.urlopen(urllib.request.Request(url, None, {'User-Agent': ''}))
content = r.read()
with open(name, "wb") as fd:
fd.write(content)