Skip to content

Commit

Permalink
Response to CR
Browse files Browse the repository at this point in the history
  • Loading branch information
AddisonSchiller committed Nov 21, 2017
1 parent 8aaafb2 commit c803684
Show file tree
Hide file tree
Showing 7 changed files with 90 additions and 46 deletions.
6 changes: 4 additions & 2 deletions tests/providers/dataverse/test_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def test_file_metadata(self, file_metadata_object):
assert not file_metadata_object.created_utc
assert file_metadata_object.content_type == 'text/plain; charset=US-ASCII'
assert file_metadata_object.etag == 'latest::20'
assert file_metadata_object.original_name == 'thefile.txt'
assert file_metadata_object.original_names == ['thefile.txt']
assert file_metadata_object.extra == {
'fileId': '20',
'datasetVersion': 'latest',
Expand All @@ -71,7 +71,9 @@ def test_csv_file_metadata(self, csv_file_metadata_object):
assert not csv_file_metadata_object.created_utc
assert csv_file_metadata_object.content_type == 'text/tab-separated-values'
assert csv_file_metadata_object.etag == 'latest::20'
assert csv_file_metadata_object.original_name == 'thefile.csv'
names = csv_file_metadata_object.original_names
assert 'thefile.csv' in names
assert 'thefile.CSV' in names
assert csv_file_metadata_object.extra == {
'fileId': '20',
'datasetVersion': 'latest',
Expand Down
27 changes: 27 additions & 0 deletions tests/providers/dataverse/test_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from waterbutler.core.path import WaterButlerPath
from waterbutler.providers.dataverse import settings as dvs
from waterbutler.providers.dataverse import DataverseProvider
from waterbutler.providers.dataverse.exceptions import DataverseIngestionLockError
from waterbutler.providers.dataverse.metadata import DataverseFileMetadata, DataverseRevision

from tests.providers.dataverse.fixtures import (
Expand Down Expand Up @@ -235,6 +236,32 @@ async def test_upload_create(self, provider, file_stream, native_file_metadata,
assert aiohttpretty.has_call(method='GET', uri=latest_url)
assert aiohttpretty.has_call(method='GET', uri=latest_published_url)

@pytest.mark.asyncio
@pytest.mark.aiohttpretty
async def test_upload_ingestion_exception(self, provider, file_stream, native_file_metadata,
empty_native_dataset_metadata, native_dataset_metadata):
path = WaterButlerPath('/thefile.txt')
url = provider.build_url(dvs.EDIT_MEDIA_BASE_URL, 'study', provider.doi)
aiohttpretty.register_uri('POST', url, status=400, body=b'something dataset lock: Ingest')

with pytest.raises(DataverseIngestionLockError):
await provider.upload(file_stream, path)

assert aiohttpretty.has_call(method='POST', uri=url)

@pytest.mark.asyncio
@pytest.mark.aiohttpretty
async def test_upload_random_exception(self, provider, file_stream, native_file_metadata,
empty_native_dataset_metadata, native_dataset_metadata):
path = WaterButlerPath('/thefile.txt')
url = provider.build_url(dvs.EDIT_MEDIA_BASE_URL, 'study', provider.doi)
aiohttpretty.register_uri('POST', url, status=400, body=b'something something error')

with pytest.raises(exceptions.UploadError):
await provider.upload(file_stream, path)

assert aiohttpretty.has_call(method='POST', uri=url)

@pytest.mark.asyncio
@pytest.mark.aiohttpretty
async def test_upload_updates(self, provider,
Expand Down
6 changes: 1 addition & 5 deletions tests/providers/dataverse/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,11 @@ def format_dict():
'originalFileFormat': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
'originalFormatLabel': 'MS Excel (XLSX)',
'contentType': 'text/tab-separated-values',

},
'RData': {
'originalFileFormat': 'application/x-rlang-transport',
'originalFormatLabel': 'R Data',
'contentType': 'text/tab-separated-values'

},
'sav': {
'originalFileFormat': 'application/x-spss-sav',
Expand All @@ -27,13 +25,11 @@ def format_dict():
'originalFileFormat': 'application/x-stata',
'originalFormatLabel': 'Stata Binary',
'contentType': 'text/tab-separated-values'

},
'por': {
'originalFileFormat': 'application/x-spss-por',
'originalFormatLabel': 'SPSS Portable',
'contentType': 'text/tab-separated-values'

},
'csv': {
'originalFileFormat': 'text/csv',
Expand All @@ -47,7 +43,7 @@ class TestUtils:

def test_original_ext_from_raw_metadata(self, format_dict):
for key in format_dict:
assert key == dv_utils.original_ext_from_raw_metadata(format_dict[key])
assert key in dv_utils.original_ext_from_raw_metadata(format_dict[key])

def test_original_ext_from_raw_metadata_none_case(self, format_dict):
for key in format_dict:
Expand Down
15 changes: 15 additions & 0 deletions waterbutler/providers/dataverse/exceptions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
from http import HTTPStatus

from waterbutler.core.exceptions import UploadError


class DataverseIngestionLockError(UploadError):
def __init__(self, message, code=HTTPStatus.BAD_REQUEST):
"""``dummy`` argument is because children of ``WaterButlerError`` must be instantiable with
a single integer argument. See :class:`waterbutler.core.exceptions.WaterButlerError`
for details.
"""
super().__init__(
'Some uploads to Dataverse will lock uploading for a time. Please wait'
' a few seconds and try again.',
code=code)
19 changes: 11 additions & 8 deletions waterbutler/providers/dataverse/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,18 +28,21 @@ def name(self):
return self.raw.get('name', None) or self.raw.get('filename', None)

@property
def original_name(self):
def original_names(self):
""" Dataverse 'ingests' some files types. This changes their extension.
This property will look through the metadata to try to determine the original
name of the file.
This property will look through the metadata to try to determine possible
original names of the file.
"""

ext = dv_utils.original_ext_from_raw_metadata(self.raw)
if ext is None:
return self.name
extensions = dv_utils.original_ext_from_raw_metadata(self.raw)
if extensions is None:
return [self.name]
else:
name = self.name[:self.name.rfind('.')]
return name + '.{}'.format(ext)
names = []
for ext in extensions:
name = self.name[:self.name.rfind('.')]
names.append(name + '.{}'.format(ext))
return names

@property
def path(self):
Expand Down
17 changes: 14 additions & 3 deletions waterbutler/providers/dataverse/provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from waterbutler.providers.dataverse import settings
from waterbutler.providers.dataverse.metadata import DataverseRevision
from waterbutler.providers.dataverse.metadata import DataverseDatasetMetadata
from waterbutler.providers.dataverse.exceptions import DataverseIngestionLockError


class DataverseProvider(provider.BaseProvider):
Expand Down Expand Up @@ -170,16 +171,26 @@ async def upload(self, stream, path, **kwargs):
headers=dv_headers,
auth=(self.token, ),
data=file_stream,
expects=(201, ),
expects=(201, 400,),
throws=exceptions.UploadError
)

if resp.status == 400:
data = await resp.read()
data = data.decode('utf-8')

if 'dataset lock: Ingest' in data:
raise DataverseIngestionLockError({'response': data})
else:
raise (await exceptions.exception_from_response(resp,
error=exceptions.UploadError))
await resp.release()

# Find appropriate version of file
metadata = await self._get_data('latest')
files = metadata if isinstance(metadata, list) else []
file_metadata = next(file for file in files if file.name == path.name or
file.original_name == path.name)
file_metadata = next(file for file in files if (file.name == path.name or
path.name in file.original_names))

if stream.writers['md5'].hexdigest != file_metadata.extra['hashes']['md5']:
raise exceptions.UploadChecksumMismatchError()
Expand Down
46 changes: 18 additions & 28 deletions waterbutler/providers/dataverse/utils.py
Original file line number Diff line number Diff line change
@@ -1,56 +1,46 @@
ORIGINAL_FORMATS = {
'xlsx': {
'original_format': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
'original_label': 'MS Excel (XLSX)',
'content_type': 'text/tab-separated-values',

},
# Rdata can come in a few different forms, so just list all of them here
'RData': {
'original_format': 'application/x-rlang-transport',
'original_label': 'R Data',
'content_type': 'text/tab-separated-values'

},
'rdata': {
'original_format': 'application/x-rlang-transport',
'original_label': 'R Data',
'content_type': 'text/tab-separated-values'

},
'Rdata': {
'original_format': 'application/x-rlang-transport',
'original_label': 'R Data',
'content_type': 'text/tab-separated-values'

'content_type': 'text/tab-separated-values',
'all_extensions': ['rdata', 'Rdata', 'RData']
},
'sav': {
'original_format': 'application/x-spss-sav',
'original_label': 'SPSS SAV',
'content_type': 'text/tab-separated-values'
'content_type': 'text/tab-separated-values',
'all_extensions': ['sav']
},
'dta': {
'original_format': 'application/x-stata',
'original_label': 'Stata Binary',
'content_type': 'text/tab-separated-values'

'content_type': 'text/tab-separated-values',
'all_extensions': ['dta']
},
'por': {
'original_format': 'application/x-spss-por',
'original_label': 'SPSS Portable',
'content_type': 'text/tab-separated-values'

'content_type': 'text/tab-separated-values',
'all_extensions': ['por']
},
'csv': {
'original_format': 'text/csv',
'original_label': 'Comma Separated Values',
'content_type': 'text/tab-separated-values'
'content_type': 'text/tab-separated-values',
'all_extensions': ['csv', 'CSV']
},
'xlsx': {
'original_format': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
'original_label': 'MS Excel (XLSX)',
'content_type': 'text/tab-separated-values',
'all_extensions': ['xlsx']
}
}


def original_ext_from_raw_metadata(data):
"""Use the raw metadata to figure out the original extension."""
"""Use the raw metadata to figure out possible original extensions."""
label = data.get('originalFormatLabel', None)
file_format = data.get('originalFileFormat', None)
content_type = data.get('contentType', None)
Expand All @@ -63,6 +53,6 @@ def original_ext_from_raw_metadata(data):
file_format == ORIGINAL_FORMATS[key]['original_format'] and
content_type == ORIGINAL_FORMATS[key]['content_type']):

return key
return ORIGINAL_FORMATS[key]['all_extensions']

return None

0 comments on commit c803684

Please sign in to comment.