From 9631fc6b8135051b5325658145958b41dee38f18 Mon Sep 17 00:00:00 2001 From: Marcel Martin Date: Tue, 28 Nov 2023 11:10:25 +0100 Subject: [PATCH] Add --no-PG option for not outputting the @PG SAM header This is useful for our tests where we need output to be consistent even if the version number of the program changes. The name of the option is chosen to be consistent with "samtools view". --- src/cmdline.cpp | 2 ++ src/cmdline.hpp | 1 + src/main.cpp | 16 ++++++++++++++-- tests/run.sh | 18 +++++++++--------- 4 files changed, 26 insertions(+), 11 deletions(-) diff --git a/src/cmdline.cpp b/src/cmdline.cpp index 8328ac36..10275c59 100644 --- a/src/cmdline.cpp +++ b/src/cmdline.cpp @@ -28,6 +28,7 @@ CommandLineOptions parse_command_line_arguments(int argc, char **argv) { args::Flag no_progress(parser, "no-progress", "Disable progress report (enabled by default if output is a terminal)", {"no-progress"}); args::Flag eqx(parser, "eqx", "Emit =/X instead of M CIGAR operations", {"eqx"}); args::Flag x(parser, "x", "Only map reads, no base level alignment (produces PAF file)", {'x'}); + args::Flag no_pg(parser, "no-PG", "Do not output PG header", {"no-PG"}); args::Flag U(parser, "U", "Suppress output of unmapped reads", {'U'}); args::Flag interleaved(parser, "interleaved", "Interleaved input", {"interleaved"}); args::ValueFlag rgid(parser, "ID", "Read group ID", {"rg-id"}); @@ -91,6 +92,7 @@ CommandLineOptions parse_command_line_arguments(int argc, char **argv) { if (v) { opt.verbose = true; } if (details) { opt.details = true; } if (no_progress) { opt.show_progress = false; } + if (no_pg) { opt.pg_header = false; } if (eqx) { opt.cigar_eqx = true; } if (x) { opt.is_sam_out = false; } if (U) { opt.output_unmapped = false; } diff --git a/src/cmdline.hpp b/src/cmdline.hpp index acb9e0b0..f3bf5265 100644 --- a/src/cmdline.hpp +++ b/src/cmdline.hpp @@ -22,6 +22,7 @@ struct CommandLineOptions { bool only_gen_index { false }; bool use_index { false }; bool is_sam_out { true }; + bool pg_header { true }; bool output_unmapped { true }; int max_secondary { 0 }; diff --git a/src/main.cpp b/src/main.cpp index 158e4954..e84587b1 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -33,7 +33,11 @@ static Logger& logger = Logger::get(); /* * Return formatted SAM header as a string */ -std::string sam_header(const References& references, const std::string& read_group_id, const std::vector& read_group_fields, const std::string& cmd_line) { +std::string sam_header( + const References& references, + const std::string& read_group_id, + const std::vector& read_group_fields +) { std::stringstream out; out << "@HD\tVN:1.6\tSO:unsorted\n"; for (size_t i = 0; i < references.size(); ++i) { @@ -46,6 +50,11 @@ std::string sam_header(const References& references, const std::string& read_gro } out << '\n'; } + return out.str(); +} + +std::string pg_header(const std::string& cmd_line) { + std::stringstream out; out << "@PG\tID:strobealign\tPN:strobealign\tVN:" << version_string() << "\tCL:" << cmd_line << std::endl; return out.str(); } @@ -280,7 +289,10 @@ int run_strobealign(int argc, char **argv) { cmd_line << argv[i] << " "; } - out << sam_header(references, opt.read_group_id, opt.read_group_fields, cmd_line.str()); + out << sam_header(references, opt.read_group_id, opt.read_group_fields); + if (opt.pg_header) { + out << pg_header(cmd_line.str()); + } } std::vector log_stats_vec(opt.n_threads); diff --git a/tests/run.sh b/tests/run.sh index 097e64d1..dbdcb541 100755 --- a/tests/run.sh +++ b/tests/run.sh @@ -26,18 +26,18 @@ ${strobealign} -h > /dev/null samtools --version > /dev/null # Single-end SAM -${strobealign} --eqx --chunk-size 3 --rg-id 1 --rg SM:sample --rg LB:library -v tests/phix.fasta tests/phix.1.fastq | grep -v '^@PG' > phix.se.sam +${strobealign} --no-PG --eqx --chunk-size 3 --rg-id 1 --rg SM:sample --rg LB:library -v tests/phix.fasta tests/phix.1.fastq > phix.se.sam diff tests/phix.se.sam phix.se.sam rm phix.se.sam # Single-end SAM, M CIGAR operators -${strobealign} tests/phix.fasta tests/phix.1.fastq | grep -v '^@PG' > phix.se.m.sam +${strobealign} --no-PG tests/phix.fasta tests/phix.1.fastq > phix.se.m.sam if samtools view phix.se.m.sam | cut -f6 | grep -q '[X=]'; then false; fi rm phix.se.m.sam # Paired-end SAM -${strobealign} --eqx --chunk-size 3 --rg-id 1 --rg SM:sample --rg LB:library tests/phix.fasta tests/phix.1.fastq tests/phix.2.fastq | grep -v '^@PG' > phix.pe.sam +${strobealign} --no-PG --eqx --chunk-size 3 --rg-id 1 --rg SM:sample --rg LB:library tests/phix.fasta tests/phix.1.fastq tests/phix.2.fastq > phix.pe.sam diff tests/phix.pe.sam phix.pe.sam rm phix.pe.sam @@ -57,9 +57,9 @@ diff tests/phix.pe.paf phix.pe.paf rm phix.pe.paf # Build a separate index -${strobealign} -r 150 tests/phix.fasta tests/phix.1.fastq | grep -v '^@PG' > without-sti.sam +${strobealign} --no-PG -r 150 tests/phix.fasta tests/phix.1.fastq > without-sti.sam ${strobealign} -r 150 -i tests/phix.fasta -${strobealign} -r 150 --use-index tests/phix.fasta tests/phix.1.fastq | grep -v '^@PG' > with-sti.sam +${strobealign} --no-PG -r 150 --use-index tests/phix.fasta tests/phix.1.fastq > with-sti.sam diff without-sti.sam with-sti.sam rm without-sti.sam with-sti.sam @@ -73,8 +73,8 @@ ${strobealign} --details tests/phix.fasta tests/phix.1.fastq 2> /dev/null | samt # Secondary alignments # No secondary alignments on phix -${strobealign} tests/phix.fasta tests/phix.1.fastq | grep -v '^@PG' > no-secondary.sam -${strobealign} -N 5 tests/phix.fasta tests/phix.1.fastq | grep -v '^@PG' > with-secondary.sam +${strobealign} --no-PG tests/phix.fasta tests/phix.1.fastq > no-secondary.sam +${strobealign} --no-PG -N 5 tests/phix.fasta tests/phix.1.fastq > with-secondary.sam test $(samtools view -f 0x100 -c with-secondary.sam) -eq 0 rm no-secondary.sam with-secondary.sam @@ -82,8 +82,8 @@ rm no-secondary.sam with-secondary.sam cp tests/phix.fasta repeated-phix.fasta echo ">repeated_NC_001422" >> repeated-phix.fasta sed 1d tests/phix.fasta >> repeated-phix.fasta -${strobealign} repeated-phix.fasta tests/phix.1.fastq | grep -v '^@PG' > no-secondary.sam -${strobealign} -N 5 repeated-phix.fasta tests/phix.1.fastq | grep -v '^@PG' > with-secondary.sam +${strobealign} --no-PG repeated-phix.fasta tests/phix.1.fastq > no-secondary.sam +${strobealign} --no-PG -N 5 repeated-phix.fasta tests/phix.1.fastq > with-secondary.sam test $(samtools view -f 0x100 -c with-secondary.sam) -gt 0 # Removing secondary alignments gives same result as not producing them in the first place