-
Notifications
You must be signed in to change notification settings - Fork 0
/
upload_bundles.py
59 lines (48 loc) · 2 KB
/
upload_bundles.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import json
import logging
from _pathlib import Path
from uuid import UUID
from hca import HCAConfig
from hca.dss import DSSClient
from util import (
generate_file_uuid,
get_target_project_dirs,
)
log = logging.getLogger(__name__)
def main():
logging.basicConfig(level=logging.INFO)
hca_config = HCAConfig()
hca_config["DSSClient"].swagger_url = f"https://dss.dev.data.humancellatlas.org/v1/swagger.json"
dss = DSSClient(config=hca_config)
projects = get_target_project_dirs(follow_links=True)
for project in projects:
log.info('Uploading %s', project)
bundle_uuid = project.name
assert str(UUID(bundle_uuid)) == bundle_uuid
bundle = project / 'bundle'
def file_uuid_callback(file_path: str):
file_path = Path(file_path)
file_name = file_path.name
file_uuid = generate_file_uuid(bundle_uuid, file_name)
log.info('Allocated UUID %s for file %s', file_uuid, file_path)
if file_name.endswith('.json'):
with file_path.open('rt') as f:
document = json.load(f)
if file_name == 'links.json':
pass
elif file_name == 'project_0.json':
assert document['provenance']['document_id'] == bundle_uuid
else:
assert document['provenance']['document_id'] == file_uuid
return file_uuid
if bundle.is_dir():
response = dss.upload(src_dir=str(bundle),
replica='aws',
staging_bucket='lon-test-data',
bundle_uuid=bundle_uuid,
file_uuid_callback=file_uuid_callback)
print(f'Successful upload. Bundle information is:\n{json.dumps(response, indent=4)}')
else:
log.warning('Skipping %s because metadata is missing', project)
if __name__ == '__main__':
main()