sebpiq's parser improvements

This commit is contained in:
Tom Christie 2011-03-13 17:15:50 +00:00
commit 80e98de933
6 changed files with 226 additions and 86 deletions

View File

@ -27,29 +27,31 @@ class StandardContentMixin(ContentMixin):
class OverloadedContentMixin(ContentMixin):
"""HTTP request content behaviour that also allows arbitrary content to be tunneled in form data."""
"""The name to use for the content override field in the POST form."""
"""The name to use for the content override field in the POST form.
Set this to *None* to desactivate content overloading."""
CONTENT_PARAM = '_content'
"""The name to use for the content-type override field in the POST form."""
"""The name to use for the content-type override field in the POST form.
Taken into account only if content overloading is activated."""
CONTENTTYPE_PARAM = '_contenttype'
def determine_content(self, request):
"""If the request contains content return a tuple of (content_type, content) otherwise return None.
"""If the request contains content, returns a tuple of (content_type, content) otherwise returns None.
Note that content_type may be None if it is unset."""
if not request.META.get('CONTENT_LENGTH', None) and not request.META.get('TRANSFER_ENCODING', None):
return None
content_type = request.META.get('CONTENT_TYPE', None)
if (request.method == 'POST' and self.CONTENT_PARAM and
request.POST.get(self.CONTENT_PARAM, None) is not None):
# Set content type if form contains a none empty FORM_PARAM_CONTENTTYPE field
# Set content type if form contains a non-empty CONTENTTYPE_PARAM field
content_type = None
if self.CONTENTTYPE_PARAM and request.POST.get(self.CONTENTTYPE_PARAM, None):
content_type = request.POST.get(self.CONTENTTYPE_PARAM, None)
request.META['CONTENT_TYPE'] = content_type # TODO : VERY BAD, avoid modifying original request.
return (content_type, request.POST[self.CONTENT_PARAM])
return (content_type, request.raw_post_data)
else:
return (content_type, request.raw_post_data)

View File

@ -1,3 +1,7 @@
from StringIO import StringIO
from django.http.multipartparser import MultiPartParser as DjangoMPParser
from djangorestframework.response import ResponseException
from djangorestframework import status
@ -6,6 +10,10 @@ try:
except ImportError:
import simplejson as json
try:
from urlparse import parse_qs
except ImportError:
from cgi import parse_qs
class ParserMixin(object):
parsers = ()
@ -70,55 +78,90 @@ class JSONParser(BaseParser):
class XMLParser(BaseParser):
media_type = 'application/xml'
class DataFlatener(object):
"""Utility object for flatening dictionaries of lists. Useful for "urlencoded" decoded data."""
class FormParser(BaseParser):
def flatten_data(self, data):
"""Given a data dictionary {<key>: <value_list>}, returns a flattened dictionary
with information provided by the method "is_a_list"."""
flatdata = dict()
for key, val_list in data.items():
if self.is_a_list(key, val_list):
flatdata[key] = val_list
else:
if val_list:
flatdata[key] = val_list[0]
else:
# If the list is empty, but the parameter is not a list,
# we strip this parameter.
data.pop(key)
return flatdata
def is_a_list(self, key, val_list):
"""Returns True if the parameter with name *key* is expected to be a list, or False otherwise.
*val_list* which is the received value for parameter *key* can be used to guess the answer."""
return False
class FormParser(BaseParser, DataFlatener):
"""The default parser for form data.
Return a dict containing a single value for each non-reserved parameter.
"""
In order to handle select multiple (and having possibly more than a single value for each parameter),
you can customize the output by subclassing the method 'is_a_list'."""
media_type = 'application/x-www-form-urlencoded'
def parse(self, input):
# The FormParser doesn't parse the input as other parsers would, since Django's already done the
# form parsing for us. We build the content object from the request directly.
request = self.resource.request
"""The value of the parameter when the select multiple is empty.
Browsers are usually stripping the select multiple that have no option selected from the parameters sent.
A common hack to avoid this is to send the parameter with a value specifying that the list is empty.
This value will always be stripped before the data is returned."""
EMPTY_VALUE = '_empty'
if request.method == 'PUT':
# Fix from piston to force Django to give PUT requests the same
# form processing that POST requests get...
#
# Bug fix: if _load_post_and_files has already been called, for
# example by middleware accessing request.POST, the below code to
# pretend the request is a POST instead of a PUT will be too late
# to make a difference. Also calling _load_post_and_files will result
# in the following exception:
# AttributeError: You cannot set the upload handlers after the upload has been processed.
# The fix is to check for the presence of the _post field which is set
# the first time _load_post_and_files is called (both by wsgi.py and
# modpython.py). If it's set, the request has to be 'reset' to redo
# the query value parsing in POST mode.
if hasattr(request, '_post'):
del request._post
del request._files
try:
request.method = "POST"
request._load_post_and_files()
request.method = "PUT"
except AttributeError:
request.META['REQUEST_METHOD'] = 'POST'
request._load_post_and_files()
request.META['REQUEST_METHOD'] = 'PUT'
def parse(self, input):
data = parse_qs(input, keep_blank_values=True)
# removing EMPTY_VALUEs from the lists and flatening the data
for key, val_list in data.items():
self.remove_empty_val(val_list)
data = self.flatten_data(data)
# Strip any parameters that we are treating as reserved
data = {}
for (key, val) in request.POST.items():
if key not in self.resource.RESERVED_FORM_PARAMS:
data[key] = val
for key in data.keys():
if key in self.resource.RESERVED_FORM_PARAMS:
data.pop(key)
return data
def remove_empty_val(self, val_list):
""" """
while(1): # Because there might be several times EMPTY_VALUE in the list
try:
ind = val_list.index(self.EMPTY_VALUE)
except ValueError:
break
else:
val_list.pop(ind)
# TODO: Allow parsers to specify multiple media_types
class MultipartParser(FormParser):
class MultipartParser(BaseParser, DataFlatener):
media_type = 'multipart/form-data'
def parse(self, input):
request = self.resource.request
#TODO : that's pretty dumb : files are loaded with
#upload_handlers, but as we read the request body completely (input),
#then it kind of misses the point. Why not input as a stream ?
upload_handlers = request._get_upload_handlers()
django_mpp = DjangoMPParser(request.META, StringIO(input), upload_handlers)
data, files = django_mpp.parse()
# Flatening data, files and combining them
data = self.flatten_data(dict(data.iterlists()))
files = self.flatten_data(dict(files.iterlists()))
data.update(files)
# Strip any parameters that we are treating as reserved
for key in data.keys():
if key in self.resource.RESERVED_FORM_PARAMS:
data.pop(key)
return data

View File

@ -5,5 +5,10 @@ modules = [filename.rsplit('.', 1)[0]
for filename in os.listdir(os.path.dirname(__file__))
if filename.endswith('.py') and not filename.startswith('_')]
__test__ = dict()
for module in modules:
exec("from djangorestframework.tests.%s import *" % module)
exec("from djangorestframework.tests.%s import __doc__ as module_doc" % module)
exec("from djangorestframework.tests.%s import *" % module)
__test__[module] = module_doc or ""

View File

@ -111,6 +111,7 @@ class TestContentMixins(TestCase):
OverloadedContentMixin.CONTENTTYPE_PARAM: content_type}
request = self.req.post('/', form_data)
self.assertEqual(OverloadedContentMixin().determine_content(request), (content_type, content))
self.assertEqual(request.META['CONTENT_TYPE'], content_type)
def test_overloaded_behaviour_allows_content_tunnelling_content_type_not_set(self):
"""Ensure determine_content(request) returns (None, content) for overloaded POST request with content type not set"""

View File

@ -0,0 +1,127 @@
"""
..
>>> from djangorestframework.parsers import FormParser
>>> from djangorestframework.resource import Resource
>>> from djangorestframework.compat import RequestFactory
>>> from urllib import urlencode
>>> req = RequestFactory().get('/')
>>> some_resource = Resource()
>>> trash = some_resource.dispatch(req)# Some variables are set only when calling dispatch
FormParser
============
Data flatening
----------------
Here is some example data, which would eventually be sent along with a post request :
>>> inpt = urlencode([
... ('key1', 'bla1'),
... ('key2', 'blo1'), ('key2', 'blo2'),
... ])
Default behaviour for :class:`parsers.FormParser`, is to return a single value for each parameter :
>>> FormParser(some_resource).parse(inpt) == {'key1': 'bla1', 'key2': 'blo1'}
True
However, you can customize this behaviour by subclassing :class:`parsers.FormParser`, and overriding :meth:`parsers.FormParser.is_a_list` :
>>> class MyFormParser(FormParser):
...
... def is_a_list(self, key, val_list):
... return len(val_list) > 1
This new parser only flattens the lists of parameters that contain a single value.
>>> MyFormParser(some_resource).parse(inpt) == {'key1': 'bla1', 'key2': ['blo1', 'blo2']}
True
.. note:: The same functionality is available for :class:`parsers.MultipartParser`.
Submitting an empty list
--------------------------
When submitting an empty select multiple, like this one ::
<select multiple="multiple" name="key2"></select>
The browsers usually strip the parameter completely. A hack to avoid this, and therefore being able to submit an empty select multiple, is to submit a value that tells the server that the list is empty ::
<select multiple="multiple" name="key2"><option value="_empty"></select>
:class:`parsers.FormParser` provides the server-side implementation for this hack. Considering the following posted data :
>>> inpt = urlencode([
... ('key1', 'blo1'), ('key1', '_empty'),
... ('key2', '_empty'),
... ])
:class:`parsers.FormParser` strips the values ``_empty`` from all the lists.
>>> MyFormParser(some_resource).parse(inpt) == {'key1': 'blo1'}
True
Oh ... but wait a second, the parameter ``key2`` isn't even supposed to be a list, so the parser just stripped it.
>>> class MyFormParser(FormParser):
...
... def is_a_list(self, key, val_list):
... return key == 'key2'
...
>>> MyFormParser(some_resource).parse(inpt) == {'key1': 'blo1', 'key2': []}
True
Better like that. Note that you can configure something else than ``_empty`` for the empty value by setting :attr:`parsers.FormParser.EMPTY_VALUE`.
"""
import httplib, mimetypes
from tempfile import TemporaryFile
from django.test import TestCase
from djangorestframework.compat import RequestFactory
from djangorestframework.parsers import MultipartParser
from djangorestframework.resource import Resource
def encode_multipart_formdata(fields, files):
"""For testing multipart parser.
fields is a sequence of (name, value) elements for regular form fields.
files is a sequence of (name, filename, value) elements for data to be uploaded as files
Return (content_type, body)."""
BOUNDARY = '----------ThIs_Is_tHe_bouNdaRY_$'
CRLF = '\r\n'
L = []
for (key, value) in fields:
L.append('--' + BOUNDARY)
L.append('Content-Disposition: form-data; name="%s"' % key)
L.append('')
L.append(value)
for (key, filename, value) in files:
L.append('--' + BOUNDARY)
L.append('Content-Disposition: form-data; name="%s"; filename="%s"' % (key, filename))
L.append('Content-Type: %s' % get_content_type(filename))
L.append('')
L.append(value)
L.append('--' + BOUNDARY + '--')
L.append('')
body = CRLF.join(L)
content_type = 'multipart/form-data; boundary=%s' % BOUNDARY
return content_type, body
def get_content_type(filename):
return mimetypes.guess_type(filename)[0] or 'application/octet-stream'
class TestMultipartParser(TestCase):
def setUp(self):
self.req = RequestFactory()
self.content_type, self.body = encode_multipart_formdata([('key1', 'val1'), ('key1', 'val2')],
[('file1', 'pic.jpg', 'blablabla'), ('file1', 't.txt', 'blobloblo')])
def test_multipartparser(self):
"""Ensure that MultipartParser can parse multipart/form-data that contains a mix of several files and parameters."""
post_req = RequestFactory().post('/', self.body, content_type=self.content_type)
some_resource = Resource()
some_resource.dispatch(post_req)
parsed = MultipartParser(some_resource).parse(self.body)
self.assertEqual(parsed['key1'], 'val1')
self.assertEqual(parsed['file1'].read(), 'blablabla')

View File

@ -34,44 +34,6 @@ def url_resolves(url):
return False
return True
# From piston
def coerce_put_post(request):
"""
Django doesn't particularly understand REST.
In case we send data over PUT, Django won't
actually look at the data and load it. We need
to twist its arm here.
The try/except abominiation here is due to a bug
in mod_python. This should fix it.
"""
if request.method != 'PUT':
return
# Bug fix: if _load_post_and_files has already been called, for
# example by middleware accessing request.POST, the below code to
# pretend the request is a POST instead of a PUT will be too late
# to make a difference. Also calling _load_post_and_files will result
# in the following exception:
# AttributeError: You cannot set the upload handlers after the upload has been processed.
# The fix is to check for the presence of the _post field which is set
# the first time _load_post_and_files is called (both by wsgi.py and
# modpython.py). If it's set, the request has to be 'reset' to redo
# the query value parsing in POST mode.
if hasattr(request, '_post'):
del request._post
del request._files
try:
request.method = "POST"
request._load_post_and_files()
request.method = "PUT"
except AttributeError:
request.META['REQUEST_METHOD'] = 'POST'
request._load_post_and_files()
request.META['REQUEST_METHOD'] = 'PUT'
request.PUT = request.POST
# From http://www.koders.com/python/fidB6E125C586A6F49EAC38992CF3AFDAAE35651975.aspx?s=mdef:xml
#class object_dict(dict):