From 95c2eecbc790a94a83bc61e821a444306d01228e Mon Sep 17 00:00:00 2001 From: longze chen Date: Wed, 6 Dec 2017 13:50:09 -0500 Subject: [PATCH] Encode download url, fix tests and style updates. --- mfr/core/exceptions.py | 5 +- mfr/core/provider.py | 15 +++--- mfr/core/utils.py | 9 ++-- mfr/extensions/office365/render.py | 26 +++++----- mfr/providers/osf/provider.py | 55 ++++++++++----------- tests/extensions/office365/test_renderer.py | 21 +++++--- tests/server/handlers/test_query_params.py | 2 +- 7 files changed, 72 insertions(+), 61 deletions(-) diff --git a/mfr/core/exceptions.py b/mfr/core/exceptions.py index 9f7d5c8dc..317399568 100644 --- a/mfr/core/exceptions.py +++ b/mfr/core/exceptions.py @@ -147,9 +147,8 @@ def __init__(self, message, *args, metadata_url: str='', response: str='', **kwa class QueryParameterError(ProviderError): - """The MFR related errors raised from a :class:`mfr.core.provider` and relating to query parameters - should inherit from MetadataError - This error is thrown when a query parameter is used missused + """The MFR related errors raised from a :class:`mfr.core.provider`and relating to query + parameters. This error is thrown when the query has an invalid value. """ __TYPE = 'query_parameter' diff --git a/mfr/core/provider.py b/mfr/core/provider.py index 00054c313..32ddbaea5 100644 --- a/mfr/core/provider.py +++ b/mfr/core/provider.py @@ -1,11 +1,12 @@ import abc -import markupsafe +from http import HTTPStatus import furl +import markupsafe -from mfr.core import exceptions -from mfr.server import settings +from mfr.core import exceptions as mfr_exceptions from mfr.core.metrics import MetricsRecord +from mfr.server import settings as mfr_settings class BaseProvider(metaclass=abc.ABCMeta): @@ -17,12 +18,12 @@ class BaseProvider(metaclass=abc.ABCMeta): def __init__(self, request, url): self.request = request url_netloc = furl.furl(url).netloc - if url_netloc not in settings.ALLOWED_PROVIDER_NETLOCS: - raise exceptions.ProviderError( + if url_netloc not in mfr_settings.ALLOWED_PROVIDER_NETLOCS: + raise mfr_exceptions.ProviderError( message="{} is not a permitted provider domain.".format( markupsafe.escape(url_netloc) ), - code=400 + code=HTTPStatus.BAD_REQUEST ) self.url = url self.provider_metrics = MetricsRecord('provider') @@ -60,8 +61,8 @@ def serialize(self): return { 'name': self.name, 'ext': self.ext, - 'is_public': self.is_public, 'content_type': self.content_type, 'unique_key': str(self.unique_key), 'download_url': str(self.download_url), + 'is_public': self.is_public, } diff --git a/mfr/core/utils.py b/mfr/core/utils.py index c5f7713cc..1abea18a9 100644 --- a/mfr/core/utils.py +++ b/mfr/core/utils.py @@ -78,6 +78,7 @@ def make_renderer(name, metadata, file_path, url, assets_url, export_url): normalized_name = (name and name.lower()) or 'none' if metadata.is_public: try: + # Use the public renderer if exist return driver.DriverManager( namespace='mfr.public_renderers', name=normalized_name, @@ -85,18 +86,19 @@ def make_renderer(name, metadata, file_path, url, assets_url, export_url): invoke_args=(metadata, file_path, url, assets_url, export_url), ).driver except: - # Check for a public renderer, if one doesn't exist, use a regular one - # Real exceptions handled by main driver.DriverManager + # If public render does not exist, use default renderer by MFR + # If public render exists but exceptions occurs, delay the exception handling pass try: + # Use the default MFR handler return driver.DriverManager( namespace='mfr.renderers', name=normalized_name, invoke_on_load=True, invoke_args=(metadata, file_path, url, assets_url, export_url), ).driver - except RuntimeError: + except: raise exceptions.MakeRendererError( namespace='mfr.renderers', name=normalized_name, @@ -110,6 +112,7 @@ def make_renderer(name, metadata, file_path, url, assets_url, export_url): } ) + def sizeof_fmt(num, suffix='B'): if abs(num) < 1000: return '%3.0f%s' % (num, suffix) diff --git a/mfr/extensions/office365/render.py b/mfr/extensions/office365/render.py index 2760ce761..2c03cb3e1 100644 --- a/mfr/extensions/office365/render.py +++ b/mfr/extensions/office365/render.py @@ -1,19 +1,22 @@ import os +from urllib import parse + import furl +from mako.lookup import TemplateLookup from mfr.core import extension -from mako.lookup import TemplateLookup -from mfr.extensions.office365 import settings +from mfr.extensions.office365 import settings as office365_settings class Office365Renderer(extension.BaseRenderer): - """A renderer for use with public .docx files. + """A renderer for .docx files that are publicly available. + + Office online can render `.docx` files to `.pdf` for us. This renderer will only be made + if a query param with `public_file=true` is present. It then generates and embeds an + office online URL into an `iframe` and returns the template. The file it is trying to + render MUST be public. - Office online can render .docx files to pdf for us. - This renderer will only ever be made if a query param with `public_file=1` is sent. - It then generates and embeds an office online url into an - iframe and returns the template. The file it is trying to render MUST - be available publically online. This renderer will not work if testing locally. + Note: this renderer DOES NOT work locally. """ @@ -23,9 +26,10 @@ class Office365Renderer(extension.BaseRenderer): ]).get_template('viewer.mako') def render(self): - download_url = furl.furl(self.metadata.download_url).set(query='') - url = settings.OFFICE_BASE_URL + download_url.url - return self.TEMPLATE.render(base=self.assets_url, url=url) + download_url = furl.furl(self.metadata.download_url).set(query='').url + encoded_download_url = parse.quote(download_url) + office_render_url = office365_settings.OFFICE_BASE_URL + encoded_download_url + return self.TEMPLATE.render(base=self.assets_url, url=office_render_url) @property def file_required(self): diff --git a/mfr/providers/osf/provider.py b/mfr/providers/osf/provider.py index 52bf13d36..0480ba2a5 100644 --- a/mfr/providers/osf/provider.py +++ b/mfr/providers/osf/provider.py @@ -1,9 +1,10 @@ -import os import json import hashlib +from http import HTTPStatus import logging -from urllib.parse import urlparse import mimetypes +import os +from urllib.parse import urlparse import furl import aiohttp @@ -11,12 +12,10 @@ from waterbutler.core import streams -from mfr.core import exceptions -from mfr.core import provider -from mfr.core.utils import sizeof_fmt -from mfr.providers.osf import settings -from mfr.settings import MAX_FILE_SIZE_TO_RENDER -from mfr.core.exceptions import TooBigToRenderError +from mfr import settings as mfr_settings +from mfr.core import exceptions as mfr_exceptions +from mfr.core import provider, utils +from mfr.providers.osf import settings as provider_settings logger = logging.getLogger(__name__) @@ -74,14 +73,14 @@ async def metadata(self): response_reason = metadata_response.reason response_headers = metadata_response.headers await metadata_response.release() - if response_code != 200: - raise exceptions.MetadataError( + if response_code != HTTPStatus.OK: + raise mfr_exceptions.MetadataError( 'Failed to fetch file metadata from WaterButler. Received response: ', 'code {} {}'.format(str(response_code), str(response_reason)), metadata_url=download_url, response=response_reason, provider=self.NAME, - code=400 + code=HTTPStatus.BAD_REQUEST ) try: @@ -104,12 +103,12 @@ async def metadata(self): name, ext = os.path.splitext(metadata['data']['name']) size = metadata['data']['size'] - max_file_size = MAX_FILE_SIZE_TO_RENDER.get(ext) + max_file_size = mfr_settings.MAX_FILE_SIZE_TO_RENDER.get(ext) if max_file_size and size and int(size) > max_file_size: - raise TooBigToRenderError( + raise mfr_exceptions.TooBigToRenderError( "This file with extension '{ext}' exceeds the size limit of {max_size} and will not " "be rendered. To view this file download it and view it " - "offline.".format(ext=ext, max_size=sizeof_fmt(max_file_size)), + "offline.".format(ext=ext, max_size=utils.sizeof_fmt(max_file_size)), requested_size=int(size), maximum_size=max_file_size, ) @@ -121,18 +120,16 @@ async def metadata(self): unique_key = hashlib.sha256((metadata['data']['etag'] + cleaned_url.url).encode('utf-8')).hexdigest() is_public = False - - if 'public_file' in cleaned_url.args: - if cleaned_url.args['public_file'] not in ['0', '1']: - raise exceptions.QueryParameterError( - 'The `public_file` query paramter should either `0`, `1`, or unused. Instead ' - 'got {}'.format(cleaned_url.args['public_file']), + public_file = cleaned_url.args.get('public_file', None) + if public_file: + if public_file not in ['True', 'False']: + raise mfr_exceptions.QueryParameterError( + 'Invalid value for query parameter `public_file`: {}'.format(cleaned_url.args['public_file']), url=download_url, provider=self.NAME, - code=400, + code=HTTPStatus.BAD_REQUEST, ) - - is_public = cleaned_url.args['public_file'] == '1' + is_public = public_file == 'True' return provider.ProviderMetadata(name, ext, content_type, unique_key, download_url, is_public=is_public) @@ -140,13 +137,13 @@ async def metadata(self): async def download(self): """Download file from WaterButler, returning stream.""" download_url = await self._fetch_download_url() - headers = {settings.MFR_IDENTIFYING_HEADER: '1'} + headers = {provider_settings.MFR_IDENTIFYING_HEADER: '1'} response = await self._make_request('GET', download_url, allow_redirects=False, headers=headers) - if response.status >= 400: + if response.status >= HTTPStatus.BAD_REQUEST: resp_text = await response.text() logger.error('Unable to download file: ({}) {}'.format(response.status, resp_text)) - raise exceptions.DownloadError( + raise mfr_exceptions.DownloadError( 'Unable to download the requested file, please try again later.', download_url=download_url, response=resp_text, @@ -154,7 +151,7 @@ async def download(self): ) self.metrics.add('download.saw_redirect', False) - if response.status in (302, 301): + if response.status in (HTTPStatus.MOVED_PERMANENTLY, HTTPStatus.FOUND): await response.release() response = await aiohttp.request('GET', response.headers['location']) self.metrics.add('download.saw_redirect', True) @@ -186,8 +183,8 @@ async def _fetch_download_url(self): ) await request.release() - if request.status != 302: - raise exceptions.MetadataError( + if request.status != HTTPStatus.FOUND: + raise mfr_exceptions.MetadataError( request.reason, metadata_url=self.url, provider=self.NAME, diff --git a/tests/extensions/office365/test_renderer.py b/tests/extensions/office365/test_renderer.py index ae485a125..5d90d3990 100644 --- a/tests/extensions/office365/test_renderer.py +++ b/tests/extensions/office365/test_renderer.py @@ -1,16 +1,23 @@ +from urllib import parse + import furl import pytest -from mfr.extensions.office365 import settings from mfr.core.provider import ProviderMetadata from mfr.extensions.office365 import Office365Renderer +from mfr.extensions.office365 import settings as office365_settings @pytest.fixture def metadata(): - return ProviderMetadata('test', '.pdf', 'text/plain', '1234', + return ProviderMetadata( + 'test', + '.pdf', + 'text/plain', + '1234', 'http://wb.osf.io/file/test.pdf?token=1234&public_file=1', - is_public=True) + is_public=True + ) @pytest.fixture @@ -20,7 +27,7 @@ def file_path(): @pytest.fixture def url(): - return 'http://osf.io/file/test.pdf' + return parse.quote('http://osf.io/file/test.pdf') @pytest.fixture @@ -40,8 +47,8 @@ def renderer(metadata, file_path, url, assets_url, export_url): class TestOffice365Renderer: - def test_render_pdf(self, renderer, metadata, assets_url): + def test_render_pdf(self, renderer, metadata): download_url = furl.furl(metadata.download_url).set(query='') - body_url = settings.OFFICE_BASE_URL + download_url.url + office_render_url = office365_settings.OFFICE_BASE_URL + parse.quote(download_url.url) body = renderer.render() - assert ''.format(body_url) in body + assert ''.format(office_render_url) in body diff --git a/tests/server/handlers/test_query_params.py b/tests/server/handlers/test_query_params.py index b707035dc..001c42b27 100644 --- a/tests/server/handlers/test_query_params.py +++ b/tests/server/handlers/test_query_params.py @@ -7,7 +7,7 @@ from tests import utils -class TestRenderHandler(utils.HandlerTestCase): +class TestQueryParamsHandler(utils.HandlerTestCase): @testing.gen_test def test_format_url(self):