diff --git a/mfr/core/exceptions.py b/mfr/core/exceptions.py index 8f3573b54..9f7d5c8dc 100644 --- a/mfr/core/exceptions.py +++ b/mfr/core/exceptions.py @@ -145,6 +145,25 @@ def __init__(self, message, *args, metadata_url: str='', response: str='', **kwa 'response': self.response }]) + +class QueryParameterError(ProviderError): + """The MFR related errors raised from a :class:`mfr.core.provider` and relating to query parameters + should inherit from MetadataError + This error is thrown when a query parameter is used missused + """ + + __TYPE = 'query_parameter' + + def __init__(self, message, *args, url: str='', code: int=400, **kwargs): + super().__init__(message, code=code, *args, **kwargs) + self.url = url + self.return_code = code + self.attr_stack.append([self.__TYPE, { + 'url': self.url, + 'returncode': self.return_code, + }]) + + class TooBigToRenderError(ProviderError): """If the user tries to render a file larger than a server specified maximum, throw a TooBigToRenderError. diff --git a/mfr/core/provider.py b/mfr/core/provider.py index 7931f1d36..00054c313 100644 --- a/mfr/core/provider.py +++ b/mfr/core/provider.py @@ -48,17 +48,19 @@ def download(self): class ProviderMetadata: - def __init__(self, name, ext, content_type, unique_key, download_url): + def __init__(self, name, ext, content_type, unique_key, download_url, is_public=False): self.name = name self.ext = ext self.content_type = content_type self.unique_key = unique_key self.download_url = download_url + self.is_public = is_public def serialize(self): return { 'name': self.name, 'ext': self.ext, + 'is_public': self.is_public, 'content_type': self.content_type, 'unique_key': str(self.unique_key), 'download_url': str(self.download_url), diff --git a/mfr/core/utils.py b/mfr/core/utils.py index 079f88457..c5f7713cc 100644 --- a/mfr/core/utils.py +++ b/mfr/core/utils.py @@ -76,6 +76,19 @@ def make_renderer(name, metadata, file_path, url, assets_url, export_url): :rtype: :class:`mfr.core.extension.BaseRenderer` """ normalized_name = (name and name.lower()) or 'none' + if metadata.is_public: + try: + return driver.DriverManager( + namespace='mfr.public_renderers', + name=normalized_name, + invoke_on_load=True, + invoke_args=(metadata, file_path, url, assets_url, export_url), + ).driver + except: + # Check for a public renderer, if one doesn't exist, use a regular one + # Real exceptions handled by main driver.DriverManager + pass + try: return driver.DriverManager( namespace='mfr.renderers', diff --git a/mfr/extensions/office365/README.md b/mfr/extensions/office365/README.md new file mode 100644 index 000000000..a43ca9cc4 --- /dev/null +++ b/mfr/extensions/office365/README.md @@ -0,0 +1,20 @@ + +# Office 365 Renderer + + +This renderer uses Office Online to render .docx files for us. If the Office Online URL ever changes, it will also need to be changed here in settings. + +Currently there is no OSF side component for these changes. Once there is, this specific note can be removed. In the meantime in order to test this renderer, you need to go to your local OSF copy of this file: https://github.com/CenterForOpenScience/osf.io/blob/develop/addons/base/views.py#L728-L736 +and add 'public_file' : 1, to the dict. This will send all files as public files. + +Testing this renderer locally is hard. Since Office Online needs access to the files it will not work with private files or ones hosted locally. To see what the docx files will render like, replace the render function with something that looks like this: + +``` + def render(self): + static_url = 'https://files.osf.io/v1/resources//providers/osfstorage/' + url = settings.OFFICE_BASE_URL + download_url.url + return self.TEMPLATE.render(base=self.assets_url, url=url) + +``` + +The file at `static_url` must be publicly available. diff --git a/mfr/extensions/office365/__init__.py b/mfr/extensions/office365/__init__.py new file mode 100644 index 000000000..08833dba1 --- /dev/null +++ b/mfr/extensions/office365/__init__.py @@ -0,0 +1 @@ +from .render import Office365Renderer # noqa diff --git a/mfr/extensions/office365/render.py b/mfr/extensions/office365/render.py new file mode 100644 index 000000000..2760ce761 --- /dev/null +++ b/mfr/extensions/office365/render.py @@ -0,0 +1,36 @@ +import os +import furl + +from mfr.core import extension +from mako.lookup import TemplateLookup +from mfr.extensions.office365 import settings + + +class Office365Renderer(extension.BaseRenderer): + """A renderer for use with public .docx files. + + Office online can render .docx files to pdf for us. + This renderer will only ever be made if a query param with `public_file=1` is sent. + It then generates and embeds an office online url into an + iframe and returns the template. The file it is trying to render MUST + be available publically online. This renderer will not work if testing locally. + + """ + + TEMPLATE = TemplateLookup( + directories=[ + os.path.join(os.path.dirname(__file__), 'templates') + ]).get_template('viewer.mako') + + def render(self): + download_url = furl.furl(self.metadata.download_url).set(query='') + url = settings.OFFICE_BASE_URL + download_url.url + return self.TEMPLATE.render(base=self.assets_url, url=url) + + @property + def file_required(self): + return False + + @property + def cache_result(self): + return False diff --git a/mfr/extensions/office365/settings.py b/mfr/extensions/office365/settings.py new file mode 100644 index 000000000..c92ba78e4 --- /dev/null +++ b/mfr/extensions/office365/settings.py @@ -0,0 +1,6 @@ +from mfr import settings + + +config = settings.child('OFFICE365_EXTENSION_CONFIG') + +OFFICE_BASE_URL = 'https://view.officeapps.live.com/op/embed.aspx?src=' diff --git a/mfr/extensions/office365/templates/viewer.mako b/mfr/extensions/office365/templates/viewer.mako new file mode 100644 index 000000000..cfc2840dc --- /dev/null +++ b/mfr/extensions/office365/templates/viewer.mako @@ -0,0 +1,11 @@ + + + + + + diff --git a/mfr/providers/osf/provider.py b/mfr/providers/osf/provider.py index 042f9e9b1..52bf13d36 100644 --- a/mfr/providers/osf/provider.py +++ b/mfr/providers/osf/provider.py @@ -119,7 +119,23 @@ async def metadata(self): cleaned_url.args.pop(unneeded, None) self.metrics.add('metadata.clean_url_args', str(cleaned_url)) unique_key = hashlib.sha256((metadata['data']['etag'] + cleaned_url.url).encode('utf-8')).hexdigest() - return provider.ProviderMetadata(name, ext, content_type, unique_key, download_url) + + is_public = False + + if 'public_file' in cleaned_url.args: + if cleaned_url.args['public_file'] not in ['0', '1']: + raise exceptions.QueryParameterError( + 'The `public_file` query paramter should either `0`, `1`, or unused. Instead ' + 'got {}'.format(cleaned_url.args['public_file']), + url=download_url, + provider=self.NAME, + code=400, + ) + + is_public = cleaned_url.args['public_file'] == '1' + + return provider.ProviderMetadata(name, ext, content_type, + unique_key, download_url, is_public=is_public) async def download(self): """Download file from WaterButler, returning stream.""" diff --git a/setup.py b/setup.py index 4fa7a9663..4a9618642 100755 --- a/setup.py +++ b/setup.py @@ -40,6 +40,9 @@ def parse_requirements(requirements): 'http = mfr.providers.http:HttpProvider', 'osf = mfr.providers.osf:OsfProvider', ], + 'mfr.public_renderers': [ + '.docx = mfr.extensions.office365:Office365Renderer', + ], 'mfr.exporters': [ # google docs '.gdraw = mfr.extensions.image:ImageExporter', diff --git a/tests/extensions/office365/__init__.py b/tests/extensions/office365/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/extensions/office365/test_renderer.py b/tests/extensions/office365/test_renderer.py new file mode 100644 index 000000000..ae485a125 --- /dev/null +++ b/tests/extensions/office365/test_renderer.py @@ -0,0 +1,47 @@ +import furl +import pytest + +from mfr.extensions.office365 import settings +from mfr.core.provider import ProviderMetadata +from mfr.extensions.office365 import Office365Renderer + + +@pytest.fixture +def metadata(): + return ProviderMetadata('test', '.pdf', 'text/plain', '1234', + 'http://wb.osf.io/file/test.pdf?token=1234&public_file=1', + is_public=True) + + +@pytest.fixture +def file_path(): + return '/tmp/test.docx' + + +@pytest.fixture +def url(): + return 'http://osf.io/file/test.pdf' + + +@pytest.fixture +def assets_url(): + return 'http://mfr.osf.io/assets' + + +@pytest.fixture +def export_url(): + return 'http://mfr.osf.io/export?url=' + url() + + +@pytest.fixture +def renderer(metadata, file_path, url, assets_url, export_url): + return Office365Renderer(metadata, file_path, url, assets_url, export_url) + + +class TestOffice365Renderer: + + def test_render_pdf(self, renderer, metadata, assets_url): + download_url = furl.furl(metadata.download_url).set(query='') + body_url = settings.OFFICE_BASE_URL + download_url.url + body = renderer.render() + assert ''.format(body_url) in body