From eb2b50f65acb1b04d2a3f3d74c5de6ae919c7fe1 Mon Sep 17 00:00:00 2001 From: Jonathan Demirgian Date: Wed, 28 Apr 2021 11:15:12 -0400 Subject: [PATCH] Use Content-Encoding to identify if data is binary When using _whitenoise_ for caching, which provides compression, binary types may include mimetypes, "text/", "application/json": - response.mimetype.startswith("text/") - response.mimetype == "application/json" Assuming that Content-Encoding will be set (as whitenoise apparently does) this allows compression to be applied by the application for "text/" and "application/json". About Content-Encoding: developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Encoding --- The above commit message and functional code change to zappa/handler.py was written by GH user @monkut and a PR was provided with https://github.com/Miserlou/Zappa/pull/2170. That PR was not merged in before the fork from Miserlou/zappa to Zappa/zappa. This commit copies the code from that PR, adds a comment line referencing the new migrated issue, and additionally adds tests to confirm behavior. Co-authored-by: monkut --- tests/test_handler.py | 104 +++++++++++++++++++++----- tests/test_wsgi_binary_support_app.py | 34 ++++++++- zappa/handler.py | 18 ++++- 3 files changed, 133 insertions(+), 23 deletions(-) diff --git a/tests/test_handler.py b/tests/test_handler.py index ce38278b3..4d184b153 100644 --- a/tests/test_handler.py +++ b/tests/test_handler.py @@ -226,10 +226,10 @@ def test_exception_handler_on_web_request(self): self.assertEqual(response["statusCode"], 500) mocked_exception_handler.assert_called() - def test_wsgi_script_binary_support_base64_behavior(self): - """ - With Binary Support enabled, response mimetypes that are not text/* or application/json will be base64 encoded + def test_wsgi_script_binary_support_with_content_encoding(self): """ + Ensure that response body is base64 encoded when BINARY_SUPPORT is enabled and Content-Encoding header is present. + """ # don't linebreak so that whole line is shown during nosetest readout lh = LambdaHandler("tests.test_binary_support_settings") text_plain_event = { @@ -243,48 +243,112 @@ def test_wsgi_script_binary_support_base64_behavior(self): "pathParameters": {"proxy": "return/request/url"}, "httpMethod": "GET", "stageVariables": {}, - "path": "/textplain_mimetype_response1", + "path": "/content_encoding_header_json1", } + # A likely scenario is that the application would be gzip compressing some json response. That's checked first. response = lh.handler(text_plain_event, None) self.assertEqual(response["statusCode"], 200) - self.assertNotIn("isBase64Encoded", response) - self.assertFalse(is_base64(response["body"])) + self.assertIn("isBase64Encoded", response) + self.assertTrue(is_base64(response["body"])) + + # We also verify that some unknown mimetype with a Content-Encoding also encodes to b64. This route serves + # bytes in the response. text_arbitrary_event = { **text_plain_event, - **{"path": "/textarbitrary_mimetype_response1"}, + **{"path": "/content_encoding_header_textarbitrary1"}, } response = lh.handler(text_arbitrary_event, None) self.assertEqual(response["statusCode"], 200) - self.assertNotIn("isBase64Encoded", response) - self.assertFalse(is_base64(response["body"])) + self.assertIn("isBase64Encoded", response) + self.assertTrue(is_base64(response["body"])) + + # This route is similar to the above, but it serves its response as text and not bytes. That the response + # isn't bytes shouldn't matter because it still has a Content-Encoding header. application_json_event = { **text_plain_event, - **{"path": "/json_mimetype_response1"}, + **{"path": "/content_encoding_header_textarbitrary2"}, } response = lh.handler(application_json_event, None) self.assertEqual(response["statusCode"], 200) - self.assertNotIn("isBase64Encoded", response) - self.assertFalse(is_base64(response["body"])) + self.assertIn("isBase64Encoded", response) + self.assertTrue(is_base64(response["body"])) - arbitrary_binary_event = { - **text_plain_event, - **{"path": "/arbitrarybinary_mimetype_response1"}, + def test_wsgi_script_binary_support_without_content_encoding_edgecases( + self, + ): + """ + Ensure zappa response bodies are NOT base64 encoded when BINARY_SUPPORT is enabled and the mimetype is "application/json" or starts with "text/". + """ # don't linebreak so that whole line is shown during nosetest readout + + lh = LambdaHandler("tests.test_binary_support_settings") + + text_plain_event = { + "body": "", + "resource": "/{proxy+}", + "requestContext": {}, + "queryStringParameters": {}, + "headers": { + "Host": "1234567890.execute-api.us-east-1.amazonaws.com", + }, + "pathParameters": {"proxy": "return/request/url"}, + "httpMethod": "GET", + "stageVariables": {}, + "path": "/textplain_mimetype_response1", } - response = lh.handler(arbitrary_binary_event, None) + for path in [ + "/textplain_mimetype_response1", # text/plain mimetype should not be turned to base64 + "/textarbitrary_mimetype_response1", # text/arbitrary mimetype should not be turned to base64 + "/json_mimetype_response1", # application/json mimetype should not be turned to base64 + ]: + event = {**text_plain_event, "path": path} + response = lh.handler(event, None) + + self.assertEqual(response["statusCode"], 200) + self.assertNotIn("isBase64Encoded", response) + self.assertFalse(is_base64(response["body"])) + + def test_wsgi_script_binary_support_without_content_encoding( + self, + ): + """ + Ensure zappa response bodies are base64 encoded when BINARY_SUPPORT is enabled and Content-Encoding is absent. + """ # don't linebreak so that whole line is shown during nosetest readout - self.assertEqual(response["statusCode"], 200) - self.assertIn("isBase64Encoded", response) - self.assertTrue(response["isBase64Encoded"]) - self.assertTrue(is_base64(response["body"])) + lh = LambdaHandler("tests.test_binary_support_settings") + + text_plain_event = { + "body": "", + "resource": "/{proxy+}", + "requestContext": {}, + "queryStringParameters": {}, + "headers": { + "Host": "1234567890.execute-api.us-east-1.amazonaws.com", + }, + "pathParameters": {"proxy": "return/request/url"}, + "httpMethod": "GET", + "stageVariables": {}, + "path": "/textplain_mimetype_response1", + } + + for path in [ + "/arbitrarybinary_mimetype_response1", + "/arbitrarybinary_mimetype_response2", + ]: + event = {**text_plain_event, "path": path} + response = lh.handler(event, None) + + self.assertEqual(response["statusCode"], 200) + self.assertIn("isBase64Encoded", response) + self.assertTrue(is_base64(response["body"])) def test_wsgi_script_on_cognito_event_request(self): """ diff --git a/tests/test_wsgi_binary_support_app.py b/tests/test_wsgi_binary_support_app.py index ef0f62287..d1d2e6638 100644 --- a/tests/test_wsgi_binary_support_app.py +++ b/tests/test_wsgi_binary_support_app.py @@ -3,7 +3,7 @@ # _responses_ when Binary Support is enabled. ### -import io +import gzip import json from flask import Flask, Response, send_file @@ -29,3 +29,35 @@ def json_mimetype_response_1(): @app.route("/arbitrarybinary_mimetype_response1", methods=["GET"]) def arbitrary_mimetype_response_1(): return Response(response=b"some binary data", mimetype="arbitrary/binary_mimetype") + + +@app.route("/arbitrarybinary_mimetype_response2", methods=["GET"]) +def arbitrary_mimetype_response_3(): + return Response(response="doesnt_matter", mimetype="definitely_not_text") + + +@app.route("/content_encoding_header_json1", methods=["GET"]) +def response_with_content_encoding_1(): + return Response( + response=gzip.compress(json.dumps({"some": "data"}).encode()), + mimetype="application/json", + headers={"Content-Encoding": "gzip"}, + ) + + +@app.route("/content_encoding_header_textarbitrary1", methods=["GET"]) +def response_with_content_encoding_2(): + return Response( + response=b"OK", + mimetype="text/arbitrary", + headers={"Content-Encoding": "something_arbitrarily_binary"}, + ) + + +@app.route("/content_encoding_header_textarbitrary2", methods=["GET"]) +def response_with_content_encoding_3(): + return Response( + response="OK", + mimetype="text/arbitrary", + headers={"Content-Encoding": "with_content_type_but_not_bytes_response"}, + ) diff --git a/zappa/handler.py b/zappa/handler.py index 41336dc15..50163c212 100644 --- a/zappa/handler.py +++ b/zappa/handler.py @@ -583,14 +583,28 @@ def handler(self, event, context): ) if response.data: - if ( + # We base64 encode for two reasons when BINARY_SUPPORT is enabled: + # - Content-Encoding is present, which is commonly used by compression mechanisms to indicate + # that the content is in br/gzip/deflate/etc encoding + # (Related: https://github.com/zappa/Zappa/issues/908). Content like this must be + # transmitted as b64. + # - The response is assumed to be some binary format (since BINARY_SUPPORT is enabled and it + # isn't application/json or text/) + if settings.BINARY_SUPPORT and response.headers.get( + "Content-Encoding" + ): + zappa_returndict["body"] = base64.b64encode( + response.data + ).decode() + zappa_returndict["isBase64Encoded"] = True + elif ( settings.BINARY_SUPPORT and not response.mimetype.startswith("text/") and response.mimetype != "application/json" ): zappa_returndict["body"] = base64.b64encode( response.data - ).decode("utf-8") + ).decode() zappa_returndict["isBase64Encoded"] = True else: zappa_returndict["body"] = response.get_data(as_text=True)