From 7a93fdbe0a89089d45b2509e943d0d49ef155a2c Mon Sep 17 00:00:00 2001 From: Tom Christie Date: Wed, 22 May 2013 16:46:15 +0100 Subject: [PATCH] Last bits of charset sorting-out-ness --- docs/api-guide/renderers.md | 29 +++++++++++++++++++++++++---- rest_framework/renderers.py | 25 ++++++++++++++++++------- rest_framework/tests/renderers.py | 14 +++++++------- 3 files changed, 50 insertions(+), 18 deletions(-) diff --git a/docs/api-guide/renderers.md b/docs/api-guide/renderers.md index a5eba9fc5..0161b54d1 100644 --- a/docs/api-guide/renderers.md +++ b/docs/api-guide/renderers.md @@ -67,22 +67,40 @@ If your API includes views that can serve both regular webpages and API response ## JSONRenderer -Renders the request data into `JSON`, using ASCII encoding. +Renders the request data into `JSON`, using utf-8 encoding. + +Note that non-ascii charaters will be rendered using JSON's `\uXXXX` character escape. For example: + + {"unicode black star": "\u2605"} The client may additionally include an `'indent'` media type parameter, in which case the returned `JSON` will be indented. For example `Accept: application/json; indent=4`. + { + "unicode black star": "\u2605" + } + **.media_type**: `application/json` **.format**: `'.json'` -**.charset**: `iso-8859-1` +**.charset**: `utf-8` ## UnicodeJSONRenderer Renders the request data into `JSON`, using utf-8 encoding. +Note that non-ascii charaters will not be character escaped. For example: + + {"unicode black star": "★"} + The client may additionally include an `'indent'` media type parameter, in which case the returned `JSON` will be indented. For example `Accept: application/json; indent=4`. + { + "unicode black star": "★" + } + +Both the `JSONRenderer` and `UnicodeJSONRenderer` styles conform to [RFC 4627][rfc4627], and are syntactically valid JSON. + **.media_type**: `application/json` **.format**: `'.json'` @@ -101,7 +119,7 @@ The javascript callback function must be set by the client including a `callback **.format**: `'.jsonp'` -**.charset**: `iso-8859-1` +**.charset**: `utf-8` ## YAMLRenderer @@ -252,7 +270,9 @@ By default renderer classes are assumed to be using the `UTF-8` encoding. To us def render(self, data, media_type=None, renderer_context=None): return data.encode(self.charset) -If the renderer returns a raw bytestring, you should set a charset value of `None`, which will ensure the `Content-Type` header of the response will not have a `charset` value set. Doing so will also ensure that the browsable API will not attempt to display the binary content as a string. +Note that if a renderer class returns a unicode string, then the response content will be coerced into a bytestring by the `Response` class, with the `charset` attribute set on the renderer used to determine the encoding. + +If the renderer returns a bytestring representing raw binary content, you should set a charset value of `None`, which will ensure the `Content-Type` header of the response will not have a `charset` value set. Doing so will also ensure that the browsable API will not attempt to display the binary content as a string. class JPEGRenderer(renderers.BaseRenderer): media_type = 'image/jpeg' @@ -350,6 +370,7 @@ Comma-separated values are a plain-text tabular data format, that can be easily [cite]: https://docs.djangoproject.com/en/dev/ref/template-response/#the-rendering-process [conneg]: content-negotiation.md [browser-accept-headers]: http://www.gethifi.com/blog/browser-rest-http-accept-headers +[rfc4627]: http://www.ietf.org/rfc/rfc4627.txt [cors]: http://www.w3.org/TR/cors/ [cors-docs]: ../topics/ajax-csrf-cors.md [HATEOAS]: http://timelessrepo.com/haters-gonna-hateoas diff --git a/rest_framework/renderers.py b/rest_framework/renderers.py index b4fa55bd2..c42b086fd 100644 --- a/rest_framework/renderers.py +++ b/rest_framework/renderers.py @@ -43,18 +43,21 @@ class BaseRenderer(object): class JSONRenderer(BaseRenderer): """ - Renderer which serializes to json. + Renderer which serializes to JSON. + Applies JSON's backslash-u character escaping for non-ascii characters. """ media_type = 'application/json' format = 'json' encoder_class = encoders.JSONEncoder ensure_ascii = True - charset = 'iso-8859-1' + charset = 'utf-8' + # Note that JSON encodings must be utf-8, utf-16 or utf-32. + # See: http://www.ietf.org/rfc/rfc4627.txt def render(self, data, accepted_media_type=None, renderer_context=None): """ - Render `obj` into json. + Render `data` into JSON. """ if data is None: return '' @@ -77,7 +80,11 @@ class JSONRenderer(BaseRenderer): ret = json.dumps(data, cls=self.encoder_class, indent=indent, ensure_ascii=self.ensure_ascii) - if not self.ensure_ascii: + # On python 2.x json.dumps() returns bytestrings if ensure_ascii=True, + # but if ensure_ascii=False, the return type is underspecified, + # and may (or may not) be unicode. + # On python 3.x json.dumps() returns unicode strings. + if isinstance(ret, six.text_type): return bytes(ret.encode(self.charset)) return ret @@ -85,6 +92,10 @@ class JSONRenderer(BaseRenderer): class UnicodeJSONRenderer(JSONRenderer): ensure_ascii = False charset = 'utf-8' + """ + Renderer which serializes to JSON. + Does *not* apply JSON's character escaping for non-ascii characters. + """ class JSONPRenderer(JSONRenderer): @@ -117,7 +128,7 @@ class JSONPRenderer(JSONRenderer): callback = self.get_callback(renderer_context) json = super(JSONPRenderer, self).render(data, accepted_media_type, renderer_context) - return "%s(%s);" % (callback, json) + return callback.encode(self.charset) + b'(' + json + b');' class XMLRenderer(BaseRenderer): @@ -138,7 +149,7 @@ class XMLRenderer(BaseRenderer): stream = StringIO() - xml = SimplerXMLGenerator(stream, "utf-8") + xml = SimplerXMLGenerator(stream, self.charset) xml.startDocument() xml.startElement("root", {}) @@ -188,7 +199,7 @@ class YAMLRenderer(BaseRenderer): if data is None: return '' - return yaml.dump(data, stream=None, Dumper=self.encoder) + return yaml.dump(data, stream=None, encoding=self.charset, Dumper=self.encoder) class TemplateHTMLRenderer(BaseRenderer): diff --git a/rest_framework/tests/renderers.py b/rest_framework/tests/renderers.py index 1b2b92791..9096c82df 100644 --- a/rest_framework/tests/renderers.py +++ b/rest_framework/tests/renderers.py @@ -29,7 +29,7 @@ RENDERER_B_SERIALIZER = lambda x: ('Renderer B: %s' % x).encode('ascii') expected_results = [ - ((elem for elem in [1, 2, 3]), JSONRenderer, '[1, 2, 3]') # Generator + ((elem for elem in [1, 2, 3]), JSONRenderer, b'[1, 2, 3]') # Generator ] @@ -246,7 +246,7 @@ class JSONRendererTests(TestCase): renderer = JSONRenderer() content = renderer.render(obj, 'application/json') # Fix failing test case which depends on version of JSON library. - self.assertEqual(content, _flat_repr) + self.assertEqual(content.decode('utf-8'), _flat_repr) def test_with_content_type_args(self): """ @@ -255,13 +255,13 @@ class JSONRendererTests(TestCase): obj = {'foo': ['bar', 'baz']} renderer = JSONRenderer() content = renderer.render(obj, 'application/json; indent=2') - self.assertEqual(strip_trailing_whitespace(content), _indented_repr) + self.assertEqual(strip_trailing_whitespace(content.decode('utf-8')), _indented_repr) def test_check_ascii(self): obj = {'countries': ['United Kingdom', 'France', 'España']} renderer = JSONRenderer() content = renderer.render(obj, 'application/json') - self.assertEqual(content, '{"countries": ["United Kingdom", "France", "Espa\\u00f1a"]}') + self.assertEqual(content, '{"countries": ["United Kingdom", "France", "Espa\\u00f1a"]}'.encode('utf-8')) class UnicodeJSONRendererTests(TestCase): @@ -289,7 +289,7 @@ class JSONPRendererTests(TestCase): resp = self.client.get('/jsonp/jsonrenderer', HTTP_ACCEPT='application/javascript') self.assertEqual(resp.status_code, status.HTTP_200_OK) - self.assertEqual(resp['Content-Type'], 'application/javascript; charset=iso-8859-1') + self.assertEqual(resp['Content-Type'], 'application/javascript; charset=utf-8') self.assertEqual(resp.content, ('callback(%s);' % _flat_repr).encode('ascii')) @@ -300,7 +300,7 @@ class JSONPRendererTests(TestCase): resp = self.client.get('/jsonp/nojsonrenderer', HTTP_ACCEPT='application/javascript') self.assertEqual(resp.status_code, status.HTTP_200_OK) - self.assertEqual(resp['Content-Type'], 'application/javascript; charset=iso-8859-1') + self.assertEqual(resp['Content-Type'], 'application/javascript; charset=utf-8') self.assertEqual(resp.content, ('callback(%s);' % _flat_repr).encode('ascii')) @@ -312,7 +312,7 @@ class JSONPRendererTests(TestCase): resp = self.client.get('/jsonp/nojsonrenderer?callback=' + callback_func, HTTP_ACCEPT='application/javascript') self.assertEqual(resp.status_code, status.HTTP_200_OK) - self.assertEqual(resp['Content-Type'], 'application/javascript; charset=iso-8859-1') + self.assertEqual(resp['Content-Type'], 'application/javascript; charset=utf-8') self.assertEqual(resp.content, ('%s(%s);' % (callback_func, _flat_repr)).encode('ascii'))