""" Parsers are used to parse the content of incoming HTTP requests. They give us a generic way of being able to handle various media types on the request, such as form content or json encoded data. """ from __future__ import unicode_literals from django.conf import settings from django.core.files.uploadhandler import StopFutureHandlers from django.http import QueryDict from django.http.multipartparser import MultiPartParser as DjangoMultiPartParser from django.http.multipartparser import MultiPartParserError, parse_header, ChunkIter from django.utils import six from django.utils.six.moves.urllib import parse as urlparse from django.utils.encoding import force_text from rest_framework.compat import etree, yaml from rest_framework.exceptions import ParseError from rest_framework import renderers import json import datetime import decimal class DataAndFiles(object): def __init__(self, data, files): self.data = data self.files = files class BaseParser(object): """ All parsers should extend `BaseParser`, specifying a `media_type` attribute, and overriding the `.parse()` method. """ media_type = None def parse(self, stream, media_type=None, parser_context=None): """ Given a stream to read from, return the parsed representation. Should return parsed data, or a `DataAndFiles` object consisting of the parsed data and files. """ raise NotImplementedError(".parse() must be overridden.") class JSONParser(BaseParser): """ Parses JSON-serialized data. """ media_type = 'application/json' renderer_class = renderers.JSONRenderer def parse(self, stream, media_type=None, parser_context=None): """ Parses the incoming bytestream as JSON and returns the resulting data. """ parser_context = parser_context or {} encoding = parser_context.get('encoding', settings.DEFAULT_CHARSET) try: data = stream.read().decode(encoding) return json.loads(data) except ValueError as exc: raise ParseError('JSON parse error - %s' % six.text_type(exc)) class YAMLParser(BaseParser): """ Parses YAML-serialized data. """ media_type = 'application/yaml' def parse(self, stream, media_type=None, parser_context=None): """ Parses the incoming bytestream as YAML and returns the resulting data. """ assert yaml, 'YAMLParser requires pyyaml to be installed' parser_context = parser_context or {} encoding = parser_context.get('encoding', settings.DEFAULT_CHARSET) try: data = stream.read().decode(encoding) return yaml.safe_load(data) except (ValueError, yaml.parser.ParserError) as exc: raise ParseError('YAML parse error - %s' % six.text_type(exc)) class FormParser(BaseParser): """ Parser for form data. """ media_type = 'application/x-www-form-urlencoded' def parse(self, stream, media_type=None, parser_context=None): """ Parses the incoming bytestream as a URL encoded form, and returns the resulting QueryDict. """ parser_context = parser_context or {} encoding = parser_context.get('encoding', settings.DEFAULT_CHARSET) data = QueryDict(stream.read(), encoding=encoding) return data class MultiPartParser(BaseParser): """ Parser for multipart form data, which may include file data. """ media_type = 'multipart/form-data' def parse(self, stream, media_type=None, parser_context=None): """ Parses the incoming bytestream as a multipart encoded form, and returns a DataAndFiles object. `.data` will be a `QueryDict` containing all the form parameters. `.files` will be a `QueryDict` containing all the form files. """ parser_context = parser_context or {} request = parser_context['request'] encoding = parser_context.get('encoding', settings.DEFAULT_CHARSET) meta = request.META.copy() meta['CONTENT_TYPE'] = media_type upload_handlers = request.upload_handlers try: parser = DjangoMultiPartParser(meta, stream, upload_handlers, encoding) data, files = parser.parse() return DataAndFiles(data, files) except MultiPartParserError as exc: raise ParseError('Multipart form parse error - %s' % six.text_type(exc)) class XMLParser(BaseParser): """ XML parser. """ media_type = 'application/xml' def parse(self, stream, media_type=None, parser_context=None): """ Parses the incoming bytestream as XML and returns the resulting data. """ assert etree, 'XMLParser requires defusedxml to be installed' parser_context = parser_context or {} encoding = parser_context.get('encoding', settings.DEFAULT_CHARSET) parser = etree.DefusedXMLParser(encoding=encoding) try: tree = etree.parse(stream, parser=parser, forbid_dtd=True) except (etree.ParseError, ValueError) as exc: raise ParseError('XML parse error - %s' % six.text_type(exc)) data = self._xml_convert(tree.getroot()) return data def _xml_convert(self, element): """ convert the xml `element` into the corresponding python object """ children = list(element) if len(children) == 0: return self._type_convert(element.text) else: # if the fist child tag is list-item means all children are list-item if children[0].tag == "list-item": data = [] for child in children: data.append(self._xml_convert(child)) else: data = {} for child in children: data[child.tag] = self._xml_convert(child) return data def _type_convert(self, value): """ Converts the value returned by the XMl parse into the equivalent Python type """ if value is None: return value try: return datetime.datetime.strptime(value, '%Y-%m-%d %H:%M:%S') except ValueError: pass try: return int(value) except ValueError: pass try: return decimal.Decimal(value) except decimal.InvalidOperation: pass return value class FileUploadParser(BaseParser): """ Parser for file upload data. """ media_type = '*/*' def parse(self, stream, media_type=None, parser_context=None): """ Treats the incoming bytestream as a raw file upload and returns a `DateAndFiles` object. `.data` will be None (we expect request body to be a file content). `.files` will be a `QueryDict` containing one 'file' element. """ parser_context = parser_context or {} request = parser_context['request'] encoding = parser_context.get('encoding', settings.DEFAULT_CHARSET) meta = request.META upload_handlers = request.upload_handlers filename = self.get_filename(stream, media_type, parser_context) # Note that this code is extracted from Django's handling of # file uploads in MultiPartParser. content_type = meta.get('HTTP_CONTENT_TYPE', meta.get('CONTENT_TYPE', '')) try: content_length = int(meta.get('HTTP_CONTENT_LENGTH', meta.get('CONTENT_LENGTH', 0))) except (ValueError, TypeError): content_length = None # See if the handler will want to take care of the parsing. for handler in upload_handlers: result = handler.handle_raw_input(None, meta, content_length, None, encoding) if result is not None: return DataAndFiles(None, {'file': result[1]}) # This is the standard case. possible_sizes = [x.chunk_size for x in upload_handlers if x.chunk_size] chunk_size = min([2 ** 31 - 4] + possible_sizes) chunks = ChunkIter(stream, chunk_size) counters = [0] * len(upload_handlers) for index, handler in enumerate(upload_handlers): try: handler.new_file(None, filename, content_type, content_length, encoding) except StopFutureHandlers: upload_handlers = upload_handlers[:index + 1] break for chunk in chunks: for index, handler in enumerate(upload_handlers): chunk_length = len(chunk) chunk = handler.receive_data_chunk(chunk, counters[index]) counters[index] += chunk_length if chunk is None: break for index, handler in enumerate(upload_handlers): file_obj = handler.file_complete(counters[index]) if file_obj: return DataAndFiles({}, {'file': file_obj}) raise ParseError("FileUpload parse error - " "none of upload handlers can handle the stream") def get_filename(self, stream, media_type, parser_context): """ Detects the uploaded file name. First searches a 'filename' url kwarg. Then tries to parse Content-Disposition header. """ try: return parser_context['kwargs']['filename'] except KeyError: pass try: meta = parser_context['request'].META disposition = parse_header(meta['HTTP_CONTENT_DISPOSITION'].encode('utf-8')) filename_parm = disposition[1] if 'filename*' in filename_parm: return self.get_encoded_filename(filename_parm) return force_text(filename_parm['filename']) except (AttributeError, KeyError): pass def get_encoded_filename(self, filename_parm): """ Handle encoded filenames per RFC6266. See also: http://tools.ietf.org/html/rfc2231#section-4 """ encoded_filename = force_text(filename_parm['filename*']) try: charset, lang, filename = encoded_filename.split('\'', 2) filename = urlparse.unquote(filename) except (ValueError, LookupError): filename = force_text(filename_parm['filename']) return filename