Skip to content

Commit

Permalink
Add --no-PG option for not outputting the @pg SAM header
Browse files Browse the repository at this point in the history
This is useful for our tests where we need output to be consistent even if
the version number of the program changes.

The name of the option is chosen to be consistent with "samtools view".
  • Loading branch information
marcelm committed Nov 28, 2023
1 parent 6fd4c5d commit 9631fc6
Show file tree
Hide file tree
Showing 4 changed files with 26 additions and 11 deletions.
2 changes: 2 additions & 0 deletions src/cmdline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ CommandLineOptions parse_command_line_arguments(int argc, char **argv) {
args::Flag no_progress(parser, "no-progress", "Disable progress report (enabled by default if output is a terminal)", {"no-progress"});
args::Flag eqx(parser, "eqx", "Emit =/X instead of M CIGAR operations", {"eqx"});
args::Flag x(parser, "x", "Only map reads, no base level alignment (produces PAF file)", {'x'});
args::Flag no_pg(parser, "no-PG", "Do not output PG header", {"no-PG"});
args::Flag U(parser, "U", "Suppress output of unmapped reads", {'U'});
args::Flag interleaved(parser, "interleaved", "Interleaved input", {"interleaved"});
args::ValueFlag<std::string> rgid(parser, "ID", "Read group ID", {"rg-id"});
Expand Down Expand Up @@ -91,6 +92,7 @@ CommandLineOptions parse_command_line_arguments(int argc, char **argv) {
if (v) { opt.verbose = true; }
if (details) { opt.details = true; }
if (no_progress) { opt.show_progress = false; }
if (no_pg) { opt.pg_header = false; }
if (eqx) { opt.cigar_eqx = true; }
if (x) { opt.is_sam_out = false; }
if (U) { opt.output_unmapped = false; }
Expand Down
1 change: 1 addition & 0 deletions src/cmdline.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ struct CommandLineOptions {
bool only_gen_index { false };
bool use_index { false };
bool is_sam_out { true };
bool pg_header { true };
bool output_unmapped { true };
int max_secondary { 0 };

Expand Down
16 changes: 14 additions & 2 deletions src/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,11 @@ static Logger& logger = Logger::get();
/*
* Return formatted SAM header as a string
*/
std::string sam_header(const References& references, const std::string& read_group_id, const std::vector<std::string>& read_group_fields, const std::string& cmd_line) {
std::string sam_header(
const References& references,
const std::string& read_group_id,
const std::vector<std::string>& read_group_fields
) {
std::stringstream out;
out << "@HD\tVN:1.6\tSO:unsorted\n";
for (size_t i = 0; i < references.size(); ++i) {
Expand All @@ -46,6 +50,11 @@ std::string sam_header(const References& references, const std::string& read_gro
}
out << '\n';
}
return out.str();
}

std::string pg_header(const std::string& cmd_line) {
std::stringstream out;
out << "@PG\tID:strobealign\tPN:strobealign\tVN:" << version_string() << "\tCL:" << cmd_line << std::endl;
return out.str();
}
Expand Down Expand Up @@ -280,7 +289,10 @@ int run_strobealign(int argc, char **argv) {
cmd_line << argv[i] << " ";
}

out << sam_header(references, opt.read_group_id, opt.read_group_fields, cmd_line.str());
out << sam_header(references, opt.read_group_id, opt.read_group_fields);
if (opt.pg_header) {
out << pg_header(cmd_line.str());
}
}

std::vector<AlignmentStatistics> log_stats_vec(opt.n_threads);
Expand Down
18 changes: 9 additions & 9 deletions tests/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -26,18 +26,18 @@ ${strobealign} -h > /dev/null
samtools --version > /dev/null

# Single-end SAM
${strobealign} --eqx --chunk-size 3 --rg-id 1 --rg SM:sample --rg LB:library -v tests/phix.fasta tests/phix.1.fastq | grep -v '^@PG' > phix.se.sam
${strobealign} --no-PG --eqx --chunk-size 3 --rg-id 1 --rg SM:sample --rg LB:library -v tests/phix.fasta tests/phix.1.fastq > phix.se.sam
diff tests/phix.se.sam phix.se.sam
rm phix.se.sam

# Single-end SAM, M CIGAR operators
${strobealign} tests/phix.fasta tests/phix.1.fastq | grep -v '^@PG' > phix.se.m.sam
${strobealign} --no-PG tests/phix.fasta tests/phix.1.fastq > phix.se.m.sam
if samtools view phix.se.m.sam | cut -f6 | grep -q '[X=]'; then false; fi

rm phix.se.m.sam

# Paired-end SAM
${strobealign} --eqx --chunk-size 3 --rg-id 1 --rg SM:sample --rg LB:library tests/phix.fasta tests/phix.1.fastq tests/phix.2.fastq | grep -v '^@PG' > phix.pe.sam
${strobealign} --no-PG --eqx --chunk-size 3 --rg-id 1 --rg SM:sample --rg LB:library tests/phix.fasta tests/phix.1.fastq tests/phix.2.fastq > phix.pe.sam
diff tests/phix.pe.sam phix.pe.sam
rm phix.pe.sam

Expand All @@ -57,9 +57,9 @@ diff tests/phix.pe.paf phix.pe.paf
rm phix.pe.paf

# Build a separate index
${strobealign} -r 150 tests/phix.fasta tests/phix.1.fastq | grep -v '^@PG' > without-sti.sam
${strobealign} --no-PG -r 150 tests/phix.fasta tests/phix.1.fastq > without-sti.sam
${strobealign} -r 150 -i tests/phix.fasta
${strobealign} -r 150 --use-index tests/phix.fasta tests/phix.1.fastq | grep -v '^@PG' > with-sti.sam
${strobealign} --no-PG -r 150 --use-index tests/phix.fasta tests/phix.1.fastq > with-sti.sam
diff without-sti.sam with-sti.sam
rm without-sti.sam with-sti.sam

Expand All @@ -73,17 +73,17 @@ ${strobealign} --details tests/phix.fasta tests/phix.1.fastq 2> /dev/null | samt
# Secondary alignments

# No secondary alignments on phix
${strobealign} tests/phix.fasta tests/phix.1.fastq | grep -v '^@PG' > no-secondary.sam
${strobealign} -N 5 tests/phix.fasta tests/phix.1.fastq | grep -v '^@PG' > with-secondary.sam
${strobealign} --no-PG tests/phix.fasta tests/phix.1.fastq > no-secondary.sam
${strobealign} --no-PG -N 5 tests/phix.fasta tests/phix.1.fastq > with-secondary.sam
test $(samtools view -f 0x100 -c with-secondary.sam) -eq 0
rm no-secondary.sam with-secondary.sam

# Secondary alignments for repeated phiX
cp tests/phix.fasta repeated-phix.fasta
echo ">repeated_NC_001422" >> repeated-phix.fasta
sed 1d tests/phix.fasta >> repeated-phix.fasta
${strobealign} repeated-phix.fasta tests/phix.1.fastq | grep -v '^@PG' > no-secondary.sam
${strobealign} -N 5 repeated-phix.fasta tests/phix.1.fastq | grep -v '^@PG' > with-secondary.sam
${strobealign} --no-PG repeated-phix.fasta tests/phix.1.fastq > no-secondary.sam
${strobealign} --no-PG -N 5 repeated-phix.fasta tests/phix.1.fastq > with-secondary.sam
test $(samtools view -f 0x100 -c with-secondary.sam) -gt 0

# Removing secondary alignments gives same result as not producing them in the first place
Expand Down

0 comments on commit 9631fc6

Please sign in to comment.