Skip to content

Commit

Permalink
Public_file query param and office365 renderer
Browse files Browse the repository at this point in the history
Adding support for a `public_file` query param so the OSF can request a
public renderer. Added office365 which is a public renderer. This uses office
online to do .docx file conversions.
  • Loading branch information
AddisonSchiller authored and cslzchen committed Dec 6, 2017
1 parent 8bb2dd4 commit 04740f1
Show file tree
Hide file tree
Showing 12 changed files with 176 additions and 2 deletions.
19 changes: 19 additions & 0 deletions mfr/core/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,25 @@ def __init__(self, message, *args, metadata_url: str='', response: str='', **kwa
'response': self.response
}])


class QueryParameterError(ProviderError):
"""The MFR related errors raised from a :class:`mfr.core.provider` and relating to query parameters
should inherit from MetadataError
This error is thrown when a query parameter is used missused
"""

__TYPE = 'query_parameter'

def __init__(self, message, *args, url: str='', code: int=400, **kwargs):
super().__init__(message, code=code, *args, **kwargs)
self.url = url
self.return_code = code
self.attr_stack.append([self.__TYPE, {
'url': self.url,
'returncode': self.return_code,
}])


class TooBigToRenderError(ProviderError):
"""If the user tries to render a file larger than a server specified maximum, throw a
TooBigToRenderError.
Expand Down
4 changes: 3 additions & 1 deletion mfr/core/provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,17 +48,19 @@ def download(self):

class ProviderMetadata:

def __init__(self, name, ext, content_type, unique_key, download_url):
def __init__(self, name, ext, content_type, unique_key, download_url, is_public=False):
self.name = name
self.ext = ext
self.content_type = content_type
self.unique_key = unique_key
self.download_url = download_url
self.is_public = is_public

def serialize(self):
return {
'name': self.name,
'ext': self.ext,
'is_public': self.is_public,
'content_type': self.content_type,
'unique_key': str(self.unique_key),
'download_url': str(self.download_url),
Expand Down
13 changes: 13 additions & 0 deletions mfr/core/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,19 @@ def make_renderer(name, metadata, file_path, url, assets_url, export_url):
:rtype: :class:`mfr.core.extension.BaseRenderer`
"""
normalized_name = (name and name.lower()) or 'none'
if metadata.is_public:
try:
return driver.DriverManager(
namespace='mfr.public_renderers',
name=normalized_name,
invoke_on_load=True,
invoke_args=(metadata, file_path, url, assets_url, export_url),
).driver
except:
# Check for a public renderer, if one doesn't exist, use a regular one
# Real exceptions handled by main driver.DriverManager
pass

try:
return driver.DriverManager(
namespace='mfr.renderers',
Expand Down
20 changes: 20 additions & 0 deletions mfr/extensions/office365/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@

# Office 365 Renderer


This renderer uses Office Online to render .docx files for us. If the Office Online URL ever changes, it will also need to be changed here in settings.

Currently there is no OSF side component for these changes. Once there is, this specific note can be removed. In the meantime in order to test this renderer, you need to go to your local OSF copy of this file: https://github.com/CenterForOpenScience/osf.io/blob/develop/addons/base/views.py#L728-L736
and add 'public_file' : 1, to the dict. This will send all files as public files.

Testing this renderer locally is hard. Since Office Online needs access to the files it will not work with private files or ones hosted locally. To see what the docx files will render like, replace the render function with something that looks like this:

```
def render(self):
static_url = 'https://files.osf.io/v1/resources/<fake_project_id>/providers/osfstorage/<fake_file_id>'
url = settings.OFFICE_BASE_URL + download_url.url
return self.TEMPLATE.render(base=self.assets_url, url=url)
```

The file at `static_url` must be publicly available.
1 change: 1 addition & 0 deletions mfr/extensions/office365/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .render import Office365Renderer # noqa
36 changes: 36 additions & 0 deletions mfr/extensions/office365/render.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import os
import furl

from mfr.core import extension
from mako.lookup import TemplateLookup
from mfr.extensions.office365 import settings


class Office365Renderer(extension.BaseRenderer):
"""A renderer for use with public .docx files.
Office online can render .docx files to pdf for us.
This renderer will only ever be made if a query param with `public_file=1` is sent.
It then generates and embeds an office online url into an
iframe and returns the template. The file it is trying to render MUST
be available publically online. This renderer will not work if testing locally.
"""

TEMPLATE = TemplateLookup(
directories=[
os.path.join(os.path.dirname(__file__), 'templates')
]).get_template('viewer.mako')

def render(self):
download_url = furl.furl(self.metadata.download_url).set(query='')
url = settings.OFFICE_BASE_URL + download_url.url
return self.TEMPLATE.render(base=self.assets_url, url=url)

@property
def file_required(self):
return False

@property
def cache_result(self):
return False
6 changes: 6 additions & 0 deletions mfr/extensions/office365/settings.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from mfr import settings


config = settings.child('OFFICE365_EXTENSION_CONFIG')

OFFICE_BASE_URL = 'https://view.officeapps.live.com/op/embed.aspx?src='
11 changes: 11 additions & 0 deletions mfr/extensions/office365/templates/viewer.mako
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
<style>
iframe {
width: 100%;
height: 800;
}
</style>

<iframe src=${url} frameborder='0'></iframe>

<script src="/static/js/mfr.js"></script>
<script src="/static/js/mfr.child.js"></script>
18 changes: 17 additions & 1 deletion mfr/providers/osf/provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,23 @@ async def metadata(self):
cleaned_url.args.pop(unneeded, None)
self.metrics.add('metadata.clean_url_args', str(cleaned_url))
unique_key = hashlib.sha256((metadata['data']['etag'] + cleaned_url.url).encode('utf-8')).hexdigest()
return provider.ProviderMetadata(name, ext, content_type, unique_key, download_url)

is_public = False

if 'public_file' in cleaned_url.args:
if cleaned_url.args['public_file'] not in ['0', '1']:
raise exceptions.QueryParameterError(
'The `public_file` query paramter should either `0`, `1`, or unused. Instead '
'got {}'.format(cleaned_url.args['public_file']),
url=download_url,
provider=self.NAME,
code=400,
)

is_public = cleaned_url.args['public_file'] == '1'

return provider.ProviderMetadata(name, ext, content_type,
unique_key, download_url, is_public=is_public)

async def download(self):
"""Download file from WaterButler, returning stream."""
Expand Down
3 changes: 3 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,9 @@ def parse_requirements(requirements):
'http = mfr.providers.http:HttpProvider',
'osf = mfr.providers.osf:OsfProvider',
],
'mfr.public_renderers': [
'.docx = mfr.extensions.office365:Office365Renderer',
],
'mfr.exporters': [
# google docs
'.gdraw = mfr.extensions.image:ImageExporter',
Expand Down
Empty file.
47 changes: 47 additions & 0 deletions tests/extensions/office365/test_renderer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
import furl
import pytest

from mfr.extensions.office365 import settings
from mfr.core.provider import ProviderMetadata
from mfr.extensions.office365 import Office365Renderer


@pytest.fixture
def metadata():
return ProviderMetadata('test', '.pdf', 'text/plain', '1234',
'http://wb.osf.io/file/test.pdf?token=1234&public_file=1',
is_public=True)


@pytest.fixture
def file_path():
return '/tmp/test.docx'


@pytest.fixture
def url():
return 'http://osf.io/file/test.pdf'


@pytest.fixture
def assets_url():
return 'http://mfr.osf.io/assets'


@pytest.fixture
def export_url():
return 'http://mfr.osf.io/export?url=' + url()


@pytest.fixture
def renderer(metadata, file_path, url, assets_url, export_url):
return Office365Renderer(metadata, file_path, url, assets_url, export_url)


class TestOffice365Renderer:

def test_render_pdf(self, renderer, metadata, assets_url):
download_url = furl.furl(metadata.download_url).set(query='')
body_url = settings.OFFICE_BASE_URL + download_url.url
body = renderer.render()
assert '<iframe src={} frameborder=\'0\'></iframe>'.format(body_url) in body

0 comments on commit 04740f1

Please sign in to comment.