Skip to content

Commit

Permalink
Merge pull request #9 from 4dn-dcic/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
SooLee authored Sep 14, 2017
2 parents 3ed4a50 + 91e8fc7 commit d555212
Show file tree
Hide file tree
Showing 6 changed files with 335 additions and 5 deletions.
33 changes: 28 additions & 5 deletions Benchmark/Benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ def benchmark(app_name, input_json, raise_error=False):
return(fastqc_0_11_4_1(input_json))
elif app_name == 'bwa-mem':
return(bwa_mem(input_json))
elif app_name == 'pairsam-parse-sort':
return(pairsam_parse_sort(input_json))
else:
if raise_error:
raise AppNameUnavailableException
Expand Down Expand Up @@ -74,7 +76,6 @@ def fastqc_0_11_4_1(input_json):
return(r.as_dict())


# bwa_mem is still a draft
def bwa_mem(input_json):
assert 'input_size_in_bytes' in input_json
assert 'fastq1' in input_json.get('input_size_in_bytes')
Expand All @@ -91,26 +92,48 @@ def bwa_mem(input_json):
input_sizes = input_json.get('input_size_in_bytes')
data_input_size = input_sizes.get('fastq1') + input_sizes.get('fastq2')
total_input_size = data_input_size + input_sizes.get('bwa_index')
output_bam_size = data_input_size * 1.5
output_bam_size = data_input_size * 2
intermediate_index_size = input_sizes.get('bwa_index') * 2
copied_input_size = data_input_size * 5 # copied and unzipped
copied_input_size = data_input_size * 7 # copied and unzipped
total_intermediate_size \
= intermediate_index_size + output_bam_size + copied_input_size
total_output_size = output_bam_size
additional_size_in_gb = 4.5
additional_size_in_gb = 10

total_file_size_in_bp \
= total_input_size + total_intermediate_size + total_output_size
total_size = total_file_size_in_bp / GB_IN_BYTES + additional_size_in_gb

# mem
mem = input_sizes.get('bwa_index') * 4 / MB_IN_BYTES
mem = input_sizes.get('bwa_index') * 4 / MB_IN_BYTES + (nthreads * 500)

r = BenchmarkResult(size=total_size, mem=mem, cpu=nthreads)

return(r.as_dict())


def pairsam_parse_sort(input_json):
assert 'input_size_in_bytes' in input_json
assert 'bam' in input_json.get('input_size_in_bytes')

# cpu
nthreads = 8 # default from cwl
if 'parameters' in input_json:
if 'nThreads' in input_json.get('parameters'):
nthreads = input_json.get('parameters').get('nThreads')

in_size = input_json.get('input_size_in_bytes')
bamsize = in_size.get('bam') / GB_IN_BYTES
pairsamsize = bamsize * 10 # very rough number
tmp_pairsamsize = pairsamsize
total_size = bamsize + pairsamsize + tmp_pairsamsize
mem = 48000 # very rough number

r = BenchmarkResult(size=total_size, mem=mem, cpu=nthreads * 2)

return(r.as_dict())


def get_aws_ec2_info_file():
this_dir, _ = os.path.split(__file__)
return(os.path.join(this_dir, "aws", "Amazon EC2 Instance Comparison.csv"))
Expand Down
80 changes: 80 additions & 0 deletions cwl_awsem/pairsam-filter.cwl
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
{
"baseCommand": [
"pipeline_Sorted2noDups.sh"
],
"inputs": [
{
"type": [
"null",
"File"
],
"inputBinding": {
"separate": true,
"position": 1
},
"id": "#pairsam"
},
{
"default": "out",
"type": [
"null",
"string"
],
"inputBinding": {
"separate": true,
"position": 2
},
"id": "#outprefix"
},
],
"outputs": [
{
"outputBinding": {
"glob": "$(outprefix + '.lossless.bam')"
},
"type": [
"null",
"File"
],
"id": "#lossless_bamfile"
}
],
"outputs": [
{
"outputBinding": {
"glob": "$(outprefix + '.dedup.pairs.gz')"
},
"type": [
"null",
"File"
],
"id": "#dedup_pairs"
}
],
"outputs": [
{
"outputBinding": {
"glob": "$(outprefix + '.unmapped.sam.pairs.gz')"
},
"type": [
"null",
"File"
],
"id": "#unmapped_pairsam"
}
],
"cwlVersion": "draft-3",
"hints": [
{
"dockerPull": "duplexa/4dn-hic:v35",
"class": "DockerRequirement"
}
],
"requirements": [
{
"class": "InlineJavascriptRequirement"
}
],
"arguments": [],
"class": "CommandLineTool"
}
56 changes: 56 additions & 0 deletions cwl_awsem/pairsam-markasdup.cwl
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
{
"baseCommand": [
"pairsam-markasdup.sh"
],
"inputs": [
{
"type": [
"null",
"File"
],
"inputBinding": {
"separate": true,
"position": 1
},
"id": "#pairsam"
},
{
"default": "out",
"type": [
"null",
"string"
],
"inputBinding": {
"separate": true,
"position": 2
},
"id": "#outprefix"
},
],
"outputs": [
{
"outputBinding": {
"glob": "$(inputs.outprefix + '.marked.sam.pairs.gz')"
},
"type": [
"null",
"File"
],
"id": "#out_markedpairs"
}
],
"cwlVersion": "draft-3",
"hints": [
{
"dockerPull": "duplexa/4dn-hic:v40",
"class": "DockerRequirement"
}
],
"requirements": [
{
"class": "InlineJavascriptRequirement"
}
],
"arguments": [],
"class": "CommandLineTool"
}
72 changes: 72 additions & 0 deletions cwl_awsem/pairsam-merge.cwl
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
{
"baseCommand": [
"pairsam-merge.sh"
],
"inputs": [
{
"default": "out",
"type": [
"null",
"string"
],
"inputBinding": {
"separate": true,
"position": 1
},
"id": "#outprefix"
},
{
"type": [
"int"
],
"id": "#nThreads",
"inputBinding": {
"position": 2,
"separate": true
},
"default": 8
},
{
"id": "#input_pairs",
"inputBinding": {
"itemSeparator": " ",
"position": 3,
"separate": true
},
"type": [
"null",
{
"items": "File",
"type": "array"
}
]
}

],
"outputs": [
{
"outputBinding": {
"glob": "$(inputs.outprefix + '.merged.sam.pairs.gz')"
},
"type": [
"null",
"File"
],
"id": "#merged_pairs"
}
],
"cwlVersion": "draft-3",
"hints": [
{
"dockerPull": "duplexa/4dn-hic:v40",
"class": "DockerRequirement"
}
],
"requirements": [
{
"class": "InlineJavascriptRequirement"
}
],
"arguments": [],
"class": "CommandLineTool"
}
90 changes: 90 additions & 0 deletions cwl_awsem/pairsam-parse-sort.cwl
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
{
"baseCommand": [
"pairsam-parse-sort.sh"
],
"inputs": [
{
"type": [
"null",
"File"
],
"inputBinding": {
"separate": true,
"position": 1
},
"id": "#bam"
},
{
"type": [
"null",
"File"
],
"inputBinding": {
"separate": true,
"position": 3
},
"id": "#chromsize"
},
{
"default": "out",
"type": [
"null",
"string"
],
"inputBinding": {
"separate": true,
"position": 2
},
"id": "#outprefix"
},
{
"type": [
"int"
],
"id": "#Threads",
"inputBinding": {
"position": 4,
"separate": true
},
"default": 8
},
{ "default": "lz4c",
"type": [
"null",
"string"
],
"inputBinding": {
"separate": true,
"position": 5
},
"id": "#compress_programm"
},

],
"outputs": [
{
"outputBinding": {
"glob": "$(inputs.outprefix + '.sam.pairs.gz')"
},
"type": [
"null",
"File"
],
"id": "#out_pairs"
}
],
"cwlVersion": "draft-3",
"hints": [
{
"dockerPull": "duplexa/4dn-hic:v40",
"class": "DockerRequirement"
}
],
"requirements": [
{
"class": "InlineJavascriptRequirement"
}
],
"arguments": [],
"class": "CommandLineTool"
}
9 changes: 9 additions & 0 deletions tests/tests_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,15 @@ def test_benchmark3(self):
assert res['aws']['recommended_instance_type'] == 't2.xlarge'
print(res)

def test_benchmark4(self):
res = B.benchmark('pairsam-parse-sort',
{'input_size_in_bytes': {'bam': 1000000000},
'parameters': {'nThreads': 16}})
assert 'aws' in res
assert 'recommended_instance_type' in res['aws']
assert res['aws']['recommended_instance_type'] == 'c4.8xlarge'
print(res)

def test_benchmark_none1(self):
input_json = {'input_size_in_bytes': {'fastq1': 93520,
'fastq2': 97604,
Expand Down

0 comments on commit d555212

Please sign in to comment.