Skip to content

Commit

Permalink
wip
Browse files Browse the repository at this point in the history
  • Loading branch information
akotlar committed Nov 11, 2024
1 parent 82622e0 commit daa1399
Show file tree
Hide file tree
Showing 2 changed files with 82 additions and 27 deletions.
33 changes: 27 additions & 6 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -196,25 +196,46 @@ process MERGE_ALL_PHASED_VCF {
"""
}

process COPY_SKIPPED_VCFS {
label 'copy_skipped_vcfs'

publishDir "${params.output}/final_vcf", mode: 'copy'

input:
file(skipped_files)

script:
"""
echo "Copying skipped VCF file: ${skipped_files}"
"""
}

workflow {
println "Welcome to ${params.service.name} (${workflow.manifest.version})"

if (params.imputation.enabled) {
INPUT_VALIDATION()

// Copy skipped VCFs to the final output directory's subdirectory 'final_vcfs'
// COPY_SKIPPED_VCFS(
// INPUT_VALIDATION.out.skipped_files
// )

// Proceed with quality control on validated files
QUALITY_CONTROL(
INPUT_VALIDATION.out.validated_files,
INPUT_VALIDATION.out.validation_report,
site_files_ch.collect()
site_files_ch
)

// check if QC chunks exist in case QC failed
// Check if QC chunks exist in case QC failed
QUALITY_CONTROL.out.qc_metafiles.ifEmpty {
error 'QC step failed'
error 'QC step failed'
}

if (params.mode == 'imputation') {
phased_ch = QUALITY_CONTROL.out.qc_metafiles
def phased_ch = QUALITY_CONTROL.out.qc_metafiles

if (phasing_engine != 'no_phasing') {
PHASING(
QUALITY_CONTROL.out.qc_metafiles
Expand All @@ -228,13 +249,13 @@ workflow {
phased_ch
)

if (params.merge_results === true) {
if (params.merge_results == true) {
ENCRYPTION(
IMPUTATION.out.groupTuple()
)
}
} else {
if (params.merge_results === true) {
if (params.merge_results == true) {
MERGE_ALL_PHASED_VCF(
phased_ch.groupTuple()
)
Expand Down
76 changes: 55 additions & 21 deletions modules/local/input_validation/input_validation_vcf.nf
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ process INPUT_VALIDATION_VCF {

output:
path("split_vcfs/*.vcf.gz"), emit: validated_files
path("skipped_vcfs/*.vcf.gz"), emit: skipped_files, optional: true
path("validation_report.txt"), emit: validation_report

script:
Expand All @@ -35,11 +36,13 @@ process INPUT_VALIDATION_VCF {
${refpanel_json}
EOF
# Initialize an array to hold split VCF files
# Initialize arrays to hold split VCF files
split_vcfs=()
vcf_files_to_validate=()
skipped_vcfs=()
# Create the directory for split VCF files
mkdir -p split_vcfs
# Create the directories for split VCF files
mkdir -p split_vcfs skipped_vcfs
# Process each VCF file
for vcf in ${vcf_files}; do
Expand Down Expand Up @@ -120,7 +123,7 @@ EOF
if [ "\$num_chromosomes" -eq 1 ]; then
# Only one chromosome, skip splitting and sorting
echo "Only one chromosome detected (\$chromosomes). Skipping split and sort."
output_vcf="split_vcfs/\${base_name}.vcf.gz"
output_vcf="split_vcfs/\${base_name}_\$chromosomes.vcf.gz"
cp "\$vcf" "\$output_vcf"
# Index the output VCF if necessary
if [ ! -f "\$output_vcf.csi" ] && [ ! -f "\$output_vcf.tbi" ]; then
Expand All @@ -144,25 +147,56 @@ EOF
done
# Now we can use the split_vcfs array
echo "Validated VCF files:"
echo "All split VCF files:"
printf '%s\\n' "\${split_vcfs[@]}"
# Run the validation program
java -Xmx${avail_mem}M -jar /opt/imputationserver-utils/imputationserver-utils.jar \\
validate \\
--population ${params.population} \\
--phasing ${params.phasing.engine} \\
--reference reference-panel.json \\
--build ${params.build} \\
--mode ${params.mode} \\
--minSamples ${params.min_samples} \\
--maxSamples ${params.max_samples} \\
--report validation_report.txt \\
--no-index \\
--contactName "${contactName}" \\
--contactEmail "${contactEmail}" \\
"\${split_vcfs[@]}"
exit_code_a=\$?
# Initialize arrays for files to validate and skipped files
vcf_files_to_validate=()
skipped_vcfs=()
# Filter the split_vcfs array to only include files matching the pattern
for f in "\${split_vcfs[@]}"; do
base=\$(basename "\$f")
if [[ "\$base" =~ _([1-9]|1[0-9]|2[0-2]|X|chr([1-9]|1[0-9]|2[0-2]|X))\\.vcf\\.gz\$ ]]; then
vcf_files_to_validate+=("\$f")
else
skipped_vcfs+=("\$f")
fi
done
for f in "\${skipped_vcfs[@]}"; do
mv "\$f.*" skipped_vcfs/
done
echo "VCF files to validate:"
printf '%s\\n' "\${vcf_files_to_validate[@]}"
echo "Skipped VCF files:"
printf '%s\\n' "\${skipped_vcfs[@]}"
# Run the validation program only if there are files to validate
if [ \${#vcf_files_to_validate[@]} -gt 0 ]; then
java -Xmx${avail_mem}M -jar /opt/imputationserver-utils/imputationserver-utils.jar \\
validate \\
--population ${params.population} \\
--phasing ${params.phasing.engine} \\
--reference reference-panel.json \\
--build ${params.build} \\
--mode ${params.mode} \\
--minSamples ${params.min_samples} \\
--maxSamples ${params.max_samples} \\
--report validation_report.txt \\
--no-index \\
--contactName "${contactName}" \\
--contactEmail "${contactEmail}" \\
"\${vcf_files_to_validate[@]}"
exit_code_a=\$?
else
echo "No VCF files to validate."
# Create an empty validation report
touch validation_report.txt
exit_code_a=0
fi
cat validation_report.txt
exit \$exit_code_a
Expand Down

0 comments on commit daa1399

Please sign in to comment.