forked from ICGC-TCGA-PanCancer/vcf-uploader
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsynapse_upload_vcf
executable file
·90 lines (72 loc) · 3.22 KB
/
synapse_upload_vcf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
#!/usr/bin/python
import argparse
import sys
import os
import json
import synapseclient
from synapseclient import File, Activity, Wiki
def build_parser():
"""Set up argument parser and returns"""
parser = argparse.ArgumentParser(description='Uploads files to Synapse by using a json document for meta data.')
parser.add_argument('--parentId', dest='parentId', required=True,
help='Id for Folder where to store files (example DKFZ: syn2898426, Sanger: syn2897245)')
parser.add_argument('--url', dest='url', default = None,
help=('Overides the url/path where file is uploaded. '
'Ex: sftp://tcgaftps.nci.nih.gov/tcgapancan/pancan/variants/'))
parser.add_argument('json_file', type=argparse.FileType('r'),
help='json file with parameters')
return parser
if __name__ == '__main__':
args = build_parser().parse_args()
params = json.load(args.json_file)
##Login into Synapse if you have not logged in before you can use (to cache your credentials)
# syn.login('username', 'secret_password', rememberMe=True)
syn = synapseclient.login()
##Build up provenance and store to Synapse
provenance = Activity(name = params['annotations']['workflow_name'],
description='Variant calling for indels, SNVs, and copy numbers.',
used = params['used_urls'],
executed = params['executed_urls'])
#Step through list of files and store in Synapse
for path in params['files']:
if args.url is not None:
url = syn._sftpUploadFile(path, args.url)
f = File(url, parentId=args.parentId)
else:
f = File(path, parentId=args.parentId)
print f.path
#Set storage option if it is a URL
if synapseclient.utils.is_url(f.path):
f['synapseStore'] = False
#Set annotations
f.annotations = params['annotations']
#Add file specific annotations
filename = os.path.split(path)[-1]
sample_id, pipeline_version, date, call_type, dataType = filename.split('.')[:5]
fileType = [i for i in filename.split('.')[5:] if i != 'gz'][-1]
file_md5 = synapseclient.utils.md5_for_file(path).hexdigest()
f.sample_id = sample_id
f.pipeline_version = pipeline_version
f.date = date
f.call_type = call_type
f.dataSubType = dataType
f.dataType = 'DNA'
f.disease = 'Cancer'
f.fileType = fileType
f.file_md5 = file_md5
#Persist up to Synapse
f = syn.store(f)
provenance = syn.setProvenance(f, provenance)
#Add Description Wiki
try:
wiki = syn.getWiki(f)
except synapseclient.exceptions.SynapseHTTPError as err:
if err.response.status_code == 404:
wiki = Wiki(title = params['wiki_content']['title'], owner=f, markdown=params['wiki_content']['description'])
else:
raise err
else:
wiki.title = params['wiki_content']['title']
wiki.owner=f
wiki.markdown=params['wiki_content']['description']
syn.store(wiki)