Skip to content

Commit

Permalink
Encode download url, fix tests and style updates.
Browse files Browse the repository at this point in the history
  • Loading branch information
cslzchen committed Dec 7, 2017
1 parent 04740f1 commit 95c2eec
Show file tree
Hide file tree
Showing 7 changed files with 72 additions and 61 deletions.
5 changes: 2 additions & 3 deletions mfr/core/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,9 +147,8 @@ def __init__(self, message, *args, metadata_url: str='', response: str='', **kwa


class QueryParameterError(ProviderError):
"""The MFR related errors raised from a :class:`mfr.core.provider` and relating to query parameters
should inherit from MetadataError
This error is thrown when a query parameter is used missused
"""The MFR related errors raised from a :class:`mfr.core.provider`and relating to query
parameters. This error is thrown when the query has an invalid value.
"""

__TYPE = 'query_parameter'
Expand Down
15 changes: 8 additions & 7 deletions mfr/core/provider.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
import abc
import markupsafe
from http import HTTPStatus

import furl
import markupsafe

from mfr.core import exceptions
from mfr.server import settings
from mfr.core import exceptions as mfr_exceptions
from mfr.core.metrics import MetricsRecord
from mfr.server import settings as mfr_settings


class BaseProvider(metaclass=abc.ABCMeta):
Expand All @@ -17,12 +18,12 @@ class BaseProvider(metaclass=abc.ABCMeta):
def __init__(self, request, url):
self.request = request
url_netloc = furl.furl(url).netloc
if url_netloc not in settings.ALLOWED_PROVIDER_NETLOCS:
raise exceptions.ProviderError(
if url_netloc not in mfr_settings.ALLOWED_PROVIDER_NETLOCS:
raise mfr_exceptions.ProviderError(
message="{} is not a permitted provider domain.".format(
markupsafe.escape(url_netloc)
),
code=400
code=HTTPStatus.BAD_REQUEST
)
self.url = url
self.provider_metrics = MetricsRecord('provider')
Expand Down Expand Up @@ -60,8 +61,8 @@ def serialize(self):
return {
'name': self.name,
'ext': self.ext,
'is_public': self.is_public,
'content_type': self.content_type,
'unique_key': str(self.unique_key),
'download_url': str(self.download_url),
'is_public': self.is_public,
}
9 changes: 6 additions & 3 deletions mfr/core/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,25 +78,27 @@ def make_renderer(name, metadata, file_path, url, assets_url, export_url):
normalized_name = (name and name.lower()) or 'none'
if metadata.is_public:
try:
# Use the public renderer if exist
return driver.DriverManager(
namespace='mfr.public_renderers',
name=normalized_name,
invoke_on_load=True,
invoke_args=(metadata, file_path, url, assets_url, export_url),
).driver
except:
# Check for a public renderer, if one doesn't exist, use a regular one
# Real exceptions handled by main driver.DriverManager
# If public render does not exist, use default renderer by MFR
# If public render exists but exceptions occurs, delay the exception handling
pass

try:
# Use the default MFR handler
return driver.DriverManager(
namespace='mfr.renderers',
name=normalized_name,
invoke_on_load=True,
invoke_args=(metadata, file_path, url, assets_url, export_url),
).driver
except RuntimeError:
except:
raise exceptions.MakeRendererError(
namespace='mfr.renderers',
name=normalized_name,
Expand All @@ -110,6 +112,7 @@ def make_renderer(name, metadata, file_path, url, assets_url, export_url):
}
)


def sizeof_fmt(num, suffix='B'):
if abs(num) < 1000:
return '%3.0f%s' % (num, suffix)
Expand Down
26 changes: 15 additions & 11 deletions mfr/extensions/office365/render.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,22 @@
import os
from urllib import parse

import furl
from mako.lookup import TemplateLookup

from mfr.core import extension
from mako.lookup import TemplateLookup
from mfr.extensions.office365 import settings
from mfr.extensions.office365 import settings as office365_settings


class Office365Renderer(extension.BaseRenderer):
"""A renderer for use with public .docx files.
"""A renderer for .docx files that are publicly available.
Office online can render `.docx` files to `.pdf` for us. This renderer will only be made
if a query param with `public_file=true` is present. It then generates and embeds an
office online URL into an `iframe` and returns the template. The file it is trying to
render MUST be public.
Office online can render .docx files to pdf for us.
This renderer will only ever be made if a query param with `public_file=1` is sent.
It then generates and embeds an office online url into an
iframe and returns the template. The file it is trying to render MUST
be available publically online. This renderer will not work if testing locally.
Note: this renderer DOES NOT work locally.
"""

Expand All @@ -23,9 +26,10 @@ class Office365Renderer(extension.BaseRenderer):
]).get_template('viewer.mako')

def render(self):
download_url = furl.furl(self.metadata.download_url).set(query='')
url = settings.OFFICE_BASE_URL + download_url.url
return self.TEMPLATE.render(base=self.assets_url, url=url)
download_url = furl.furl(self.metadata.download_url).set(query='').url
encoded_download_url = parse.quote(download_url)
office_render_url = office365_settings.OFFICE_BASE_URL + encoded_download_url
return self.TEMPLATE.render(base=self.assets_url, url=office_render_url)

@property
def file_required(self):
Expand Down
55 changes: 26 additions & 29 deletions mfr/providers/osf/provider.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,21 @@
import os
import json
import hashlib
from http import HTTPStatus
import logging
from urllib.parse import urlparse
import mimetypes
import os
from urllib.parse import urlparse

import furl
import aiohttp
from aiohttp.errors import ContentEncodingError

from waterbutler.core import streams

from mfr.core import exceptions
from mfr.core import provider
from mfr.core.utils import sizeof_fmt
from mfr.providers.osf import settings
from mfr.settings import MAX_FILE_SIZE_TO_RENDER
from mfr.core.exceptions import TooBigToRenderError
from mfr import settings as mfr_settings
from mfr.core import exceptions as mfr_exceptions
from mfr.core import provider, utils
from mfr.providers.osf import settings as provider_settings

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -74,14 +73,14 @@ async def metadata(self):
response_reason = metadata_response.reason
response_headers = metadata_response.headers
await metadata_response.release()
if response_code != 200:
raise exceptions.MetadataError(
if response_code != HTTPStatus.OK:
raise mfr_exceptions.MetadataError(
'Failed to fetch file metadata from WaterButler. Received response: ',
'code {} {}'.format(str(response_code), str(response_reason)),
metadata_url=download_url,
response=response_reason,
provider=self.NAME,
code=400
code=HTTPStatus.BAD_REQUEST
)

try:
Expand All @@ -104,12 +103,12 @@ async def metadata(self):
name, ext = os.path.splitext(metadata['data']['name'])
size = metadata['data']['size']

max_file_size = MAX_FILE_SIZE_TO_RENDER.get(ext)
max_file_size = mfr_settings.MAX_FILE_SIZE_TO_RENDER.get(ext)
if max_file_size and size and int(size) > max_file_size:
raise TooBigToRenderError(
raise mfr_exceptions.TooBigToRenderError(
"This file with extension '{ext}' exceeds the size limit of {max_size} and will not "
"be rendered. To view this file download it and view it "
"offline.".format(ext=ext, max_size=sizeof_fmt(max_file_size)),
"offline.".format(ext=ext, max_size=utils.sizeof_fmt(max_file_size)),
requested_size=int(size), maximum_size=max_file_size,
)

Expand All @@ -121,40 +120,38 @@ async def metadata(self):
unique_key = hashlib.sha256((metadata['data']['etag'] + cleaned_url.url).encode('utf-8')).hexdigest()

is_public = False

if 'public_file' in cleaned_url.args:
if cleaned_url.args['public_file'] not in ['0', '1']:
raise exceptions.QueryParameterError(
'The `public_file` query paramter should either `0`, `1`, or unused. Instead '
'got {}'.format(cleaned_url.args['public_file']),
public_file = cleaned_url.args.get('public_file', None)
if public_file:
if public_file not in ['True', 'False']:
raise mfr_exceptions.QueryParameterError(
'Invalid value for query parameter `public_file`: {}'.format(cleaned_url.args['public_file']),
url=download_url,
provider=self.NAME,
code=400,
code=HTTPStatus.BAD_REQUEST,
)

is_public = cleaned_url.args['public_file'] == '1'
is_public = public_file == 'True'

return provider.ProviderMetadata(name, ext, content_type,
unique_key, download_url, is_public=is_public)

async def download(self):
"""Download file from WaterButler, returning stream."""
download_url = await self._fetch_download_url()
headers = {settings.MFR_IDENTIFYING_HEADER: '1'}
headers = {provider_settings.MFR_IDENTIFYING_HEADER: '1'}
response = await self._make_request('GET', download_url, allow_redirects=False, headers=headers)

if response.status >= 400:
if response.status >= HTTPStatus.BAD_REQUEST:
resp_text = await response.text()
logger.error('Unable to download file: ({}) {}'.format(response.status, resp_text))
raise exceptions.DownloadError(
raise mfr_exceptions.DownloadError(
'Unable to download the requested file, please try again later.',
download_url=download_url,
response=resp_text,
provider=self.NAME,
)

self.metrics.add('download.saw_redirect', False)
if response.status in (302, 301):
if response.status in (HTTPStatus.MOVED_PERMANENTLY, HTTPStatus.FOUND):
await response.release()
response = await aiohttp.request('GET', response.headers['location'])
self.metrics.add('download.saw_redirect', True)
Expand Down Expand Up @@ -186,8 +183,8 @@ async def _fetch_download_url(self):
)
await request.release()

if request.status != 302:
raise exceptions.MetadataError(
if request.status != HTTPStatus.FOUND:
raise mfr_exceptions.MetadataError(
request.reason,
metadata_url=self.url,
provider=self.NAME,
Expand Down
21 changes: 14 additions & 7 deletions tests/extensions/office365/test_renderer.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,23 @@
from urllib import parse

import furl
import pytest

from mfr.extensions.office365 import settings
from mfr.core.provider import ProviderMetadata
from mfr.extensions.office365 import Office365Renderer
from mfr.extensions.office365 import settings as office365_settings


@pytest.fixture
def metadata():
return ProviderMetadata('test', '.pdf', 'text/plain', '1234',
return ProviderMetadata(
'test',
'.pdf',
'text/plain',
'1234',
'http://wb.osf.io/file/test.pdf?token=1234&public_file=1',
is_public=True)
is_public=True
)


@pytest.fixture
Expand All @@ -20,7 +27,7 @@ def file_path():

@pytest.fixture
def url():
return 'http://osf.io/file/test.pdf'
return parse.quote('http://osf.io/file/test.pdf')


@pytest.fixture
Expand All @@ -40,8 +47,8 @@ def renderer(metadata, file_path, url, assets_url, export_url):

class TestOffice365Renderer:

def test_render_pdf(self, renderer, metadata, assets_url):
def test_render_pdf(self, renderer, metadata):
download_url = furl.furl(metadata.download_url).set(query='')
body_url = settings.OFFICE_BASE_URL + download_url.url
office_render_url = office365_settings.OFFICE_BASE_URL + parse.quote(download_url.url)
body = renderer.render()
assert '<iframe src={} frameborder=\'0\'></iframe>'.format(body_url) in body
assert '<iframe src={} frameborder=\'0\'></iframe>'.format(office_render_url) in body
2 changes: 1 addition & 1 deletion tests/server/handlers/test_query_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from tests import utils


class TestRenderHandler(utils.HandlerTestCase):
class TestQueryParamsHandler(utils.HandlerTestCase):

@testing.gen_test
def test_format_url(self):
Expand Down

0 comments on commit 95c2eec

Please sign in to comment.