diff --git a/src/cmdline.cpp b/src/cmdline.cpp index 8328ac36..10275c59 100644 --- a/src/cmdline.cpp +++ b/src/cmdline.cpp @@ -28,6 +28,7 @@ CommandLineOptions parse_command_line_arguments(int argc, char **argv) { args::Flag no_progress(parser, "no-progress", "Disable progress report (enabled by default if output is a terminal)", {"no-progress"}); args::Flag eqx(parser, "eqx", "Emit =/X instead of M CIGAR operations", {"eqx"}); args::Flag x(parser, "x", "Only map reads, no base level alignment (produces PAF file)", {'x'}); + args::Flag no_pg(parser, "no-PG", "Do not output PG header", {"no-PG"}); args::Flag U(parser, "U", "Suppress output of unmapped reads", {'U'}); args::Flag interleaved(parser, "interleaved", "Interleaved input", {"interleaved"}); args::ValueFlag rgid(parser, "ID", "Read group ID", {"rg-id"}); @@ -91,6 +92,7 @@ CommandLineOptions parse_command_line_arguments(int argc, char **argv) { if (v) { opt.verbose = true; } if (details) { opt.details = true; } if (no_progress) { opt.show_progress = false; } + if (no_pg) { opt.pg_header = false; } if (eqx) { opt.cigar_eqx = true; } if (x) { opt.is_sam_out = false; } if (U) { opt.output_unmapped = false; } diff --git a/src/cmdline.hpp b/src/cmdline.hpp index acb9e0b0..f3bf5265 100644 --- a/src/cmdline.hpp +++ b/src/cmdline.hpp @@ -22,6 +22,7 @@ struct CommandLineOptions { bool only_gen_index { false }; bool use_index { false }; bool is_sam_out { true }; + bool pg_header { true }; bool output_unmapped { true }; int max_secondary { 0 }; diff --git a/src/main.cpp b/src/main.cpp index 158e4954..e84587b1 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -33,7 +33,11 @@ static Logger& logger = Logger::get(); /* * Return formatted SAM header as a string */ -std::string sam_header(const References& references, const std::string& read_group_id, const std::vector& read_group_fields, const std::string& cmd_line) { +std::string sam_header( + const References& references, + const std::string& read_group_id, + const std::vector& read_group_fields +) { std::stringstream out; out << "@HD\tVN:1.6\tSO:unsorted\n"; for (size_t i = 0; i < references.size(); ++i) { @@ -46,6 +50,11 @@ std::string sam_header(const References& references, const std::string& read_gro } out << '\n'; } + return out.str(); +} + +std::string pg_header(const std::string& cmd_line) { + std::stringstream out; out << "@PG\tID:strobealign\tPN:strobealign\tVN:" << version_string() << "\tCL:" << cmd_line << std::endl; return out.str(); } @@ -280,7 +289,10 @@ int run_strobealign(int argc, char **argv) { cmd_line << argv[i] << " "; } - out << sam_header(references, opt.read_group_id, opt.read_group_fields, cmd_line.str()); + out << sam_header(references, opt.read_group_id, opt.read_group_fields); + if (opt.pg_header) { + out << pg_header(cmd_line.str()); + } } std::vector log_stats_vec(opt.n_threads); diff --git a/tests/run.sh b/tests/run.sh index 097e64d1..dbdcb541 100755 --- a/tests/run.sh +++ b/tests/run.sh @@ -26,18 +26,18 @@ ${strobealign} -h > /dev/null samtools --version > /dev/null # Single-end SAM -${strobealign} --eqx --chunk-size 3 --rg-id 1 --rg SM:sample --rg LB:library -v tests/phix.fasta tests/phix.1.fastq | grep -v '^@PG' > phix.se.sam +${strobealign} --no-PG --eqx --chunk-size 3 --rg-id 1 --rg SM:sample --rg LB:library -v tests/phix.fasta tests/phix.1.fastq > phix.se.sam diff tests/phix.se.sam phix.se.sam rm phix.se.sam # Single-end SAM, M CIGAR operators -${strobealign} tests/phix.fasta tests/phix.1.fastq | grep -v '^@PG' > phix.se.m.sam +${strobealign} --no-PG tests/phix.fasta tests/phix.1.fastq > phix.se.m.sam if samtools view phix.se.m.sam | cut -f6 | grep -q '[X=]'; then false; fi rm phix.se.m.sam # Paired-end SAM -${strobealign} --eqx --chunk-size 3 --rg-id 1 --rg SM:sample --rg LB:library tests/phix.fasta tests/phix.1.fastq tests/phix.2.fastq | grep -v '^@PG' > phix.pe.sam +${strobealign} --no-PG --eqx --chunk-size 3 --rg-id 1 --rg SM:sample --rg LB:library tests/phix.fasta tests/phix.1.fastq tests/phix.2.fastq > phix.pe.sam diff tests/phix.pe.sam phix.pe.sam rm phix.pe.sam @@ -57,9 +57,9 @@ diff tests/phix.pe.paf phix.pe.paf rm phix.pe.paf # Build a separate index -${strobealign} -r 150 tests/phix.fasta tests/phix.1.fastq | grep -v '^@PG' > without-sti.sam +${strobealign} --no-PG -r 150 tests/phix.fasta tests/phix.1.fastq > without-sti.sam ${strobealign} -r 150 -i tests/phix.fasta -${strobealign} -r 150 --use-index tests/phix.fasta tests/phix.1.fastq | grep -v '^@PG' > with-sti.sam +${strobealign} --no-PG -r 150 --use-index tests/phix.fasta tests/phix.1.fastq > with-sti.sam diff without-sti.sam with-sti.sam rm without-sti.sam with-sti.sam @@ -73,8 +73,8 @@ ${strobealign} --details tests/phix.fasta tests/phix.1.fastq 2> /dev/null | samt # Secondary alignments # No secondary alignments on phix -${strobealign} tests/phix.fasta tests/phix.1.fastq | grep -v '^@PG' > no-secondary.sam -${strobealign} -N 5 tests/phix.fasta tests/phix.1.fastq | grep -v '^@PG' > with-secondary.sam +${strobealign} --no-PG tests/phix.fasta tests/phix.1.fastq > no-secondary.sam +${strobealign} --no-PG -N 5 tests/phix.fasta tests/phix.1.fastq > with-secondary.sam test $(samtools view -f 0x100 -c with-secondary.sam) -eq 0 rm no-secondary.sam with-secondary.sam @@ -82,8 +82,8 @@ rm no-secondary.sam with-secondary.sam cp tests/phix.fasta repeated-phix.fasta echo ">repeated_NC_001422" >> repeated-phix.fasta sed 1d tests/phix.fasta >> repeated-phix.fasta -${strobealign} repeated-phix.fasta tests/phix.1.fastq | grep -v '^@PG' > no-secondary.sam -${strobealign} -N 5 repeated-phix.fasta tests/phix.1.fastq | grep -v '^@PG' > with-secondary.sam +${strobealign} --no-PG repeated-phix.fasta tests/phix.1.fastq > no-secondary.sam +${strobealign} --no-PG -N 5 repeated-phix.fasta tests/phix.1.fastq > with-secondary.sam test $(samtools view -f 0x100 -c with-secondary.sam) -gt 0 # Removing secondary alignments gives same result as not producing them in the first place