diff --git a/README.md b/README.md index 745527a..1e7cf81 100644 --- a/README.md +++ b/README.md @@ -72,6 +72,7 @@ Usage: ./bin/TPMCalculator -g GTF_file [-d BAM_files_directory|-i BAM_file] -q Minimum MAPQ value to filter out reads. Default: 0. This value depends on the aligner MAPQ value. -o Minimum overlap between a reads and a feature. Default: 8. -e Extended output. This will include transcript level TPM values. Default: No. + -a Print out all features with read counts equal to zero. Default: No. ## Description diff --git a/doc/Gencode_v25_FPKM_HTSeq.png b/doc/Gencode_v25_FPKM_HTSeq.png new file mode 100644 index 0000000..0e36fa5 Binary files /dev/null and b/doc/Gencode_v25_FPKM_HTSeq.png differ diff --git a/doc/Gencode_v25_FPKM_featureCounts.png b/doc/Gencode_v25_FPKM_featureCounts.png new file mode 100644 index 0000000..5614cf5 Binary files /dev/null and b/doc/Gencode_v25_FPKM_featureCounts.png differ diff --git a/doc/Gencode_v25_TPMCalculator_RSeQC.png b/doc/Gencode_v25_TPMCalculator_RSeQC.png new file mode 100644 index 0000000..1580b6d Binary files /dev/null and b/doc/Gencode_v25_TPMCalculator_RSeQC.png differ diff --git a/includes/GenomeFactory.h b/includes/GenomeFactory.h index fbc14df..22cbe92 100644 --- a/includes/GenomeFactory.h +++ b/includes/GenomeFactory.h @@ -9,6 +9,7 @@ #define GENOMEFACTORY_H #include +#include #include #include #include @@ -1191,6 +1192,10 @@ namespace genome { return currentIso; } + + std::deque getChrOrder() const { + return chrOrder; + } GeneMultiSetItr findGeneUpperBound(std::string chrName, unsigned int start, unsigned int end) { GeneMultiSetItr it; @@ -1242,6 +1247,7 @@ namespace genome { try { setCurrentChr(fParser.getWords()[0]); } catch (exceptions::NotFoundException ex) { + chrOrder.push_back(fParser.getWords()[0]); std::pair < ChromosomeUnMapItr, bool> res = chromosomes.insert(std::make_pair(fParser.getWords()[0], std::make_shared> (fParser.getWords()[0]))); if (!res.second) { std::cerr << "Error inserting new Chromosome" << std::endl; @@ -1268,6 +1274,7 @@ namespace genome { } } private: + std::deque chrOrder; ChromosomeUnMap chromosomes; GeneIsoformUnMap transcript2Chr; SPtrIsoform currentIso; diff --git a/includes/ReadFactory.h b/includes/ReadFactory.h index 0f42efc..3d70b81 100644 --- a/includes/ReadFactory.h +++ b/includes/ReadFactory.h @@ -332,7 +332,7 @@ namespace ngs { int processBAMSAMFromDir(std::string dirName, bool onlyProperlyPaired, uint16_t minMAPQ, uint16_t minOverlap); int processReadsFromBAM(std::string bamFileName, std::string sampleName, bool onlyProperlyPaired, uint16_t minMAPQ, uint16_t minOverlap); std::vector processCigar(std::string cigar); - void printResults(bool singleFile, bool extendedOutput); + void printResults(bool singleFile, bool extendedOutput, bool all_feat); void printResultsMatrix(std::string output_name, std::vector tpmColumns); void processReadAtGenomeLevel(std::string chrName, std::string sampleName, std::set < std::pair, coordinateLessCMP> read_coords, uint16_t minOverlap); diff --git a/src/ReadFactory.cpp b/src/ReadFactory.cpp index f68197a..4a00909 100644 --- a/src/ReadFactory.cpp +++ b/src/ReadFactory.cpp @@ -445,10 +445,10 @@ int ReadFactory::processBAMSAMFromDir(std::string dirName, bool onlyProperlyPair return totalCount; } -void ReadFactory::printResults(bool singleFile, bool extendedOutput) { +void ReadFactory::printResults(bool singleFile, bool extendedOutput, bool all_feat) { int count = 0; string sampleFileName; - SPtrChromosomeNGS c; + Chromosome *c; SPtrGeneNGS g; SPtrIsoformNGS i; SPtrFeatureNGS f; @@ -496,11 +496,11 @@ void ReadFactory::printResults(bool singleFile, bool extendedOutput) { exit(-1); } - for (auto cIt = genomeFactory.getChromosomes().begin(); cIt != genomeFactory.getChromosomes().end(); ++cIt) { - c = cIt->second; + for (string chr : getGenomeFactory().getChrOrder()) { + c = getGenomeFactory().findChromosome(chr); for (auto it = c->getGenes().begin(); it != c->getGenes().end(); ++it) { g = *it; - if (g->isProcessed()) { + if (all_feat || g->isProcessed()) { out_gene << g->getId() << "\t" << c->getId() << "\t" << g->getStart() diff --git a/src/main.cpp b/src/main.cpp index 630d3dd..29f183c 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -43,6 +43,7 @@ void print_usage(char *program_name, int exit_code) { cerr << "-q Minimum MAPQ value to filter out reads. Default: 0. This value depends on the aligner MAPQ value.\n"; cerr << "-o Minimum overlap between a reads and a feature. Default: 8.\n"; cerr << "-e Extended output. This will include transcript level TPM values. Default: No.\n"; + cerr << "-a Print out all features with read counts equal to zero. Default: No.\n"; cerr << "\n********************************************************************************\n"; cerr << "\n Roberto Vera Alvarez, PhD\n"; cerr << " Emails: veraalva@ncbi.nlm.nih.gov\n\n"; @@ -64,6 +65,7 @@ int main(int argc, char *argv[]) { bool onlyProperlyPaired = false; bool singleFile = false; bool extendedOutput = false; + bool all_feat = false; setfeatures = {"exon"}; unordered_map featuresToCreate = { {"exon", "intron"} @@ -184,6 +186,8 @@ int main(int argc, char *argv[]) { onlyProperlyPaired = true; } else if (option.compare(1, 1, "e") == 0) { extendedOutput = true; + } else if (option.compare(1, 1, "a") == 0) { + all_feat = true; } else { cerr << "Unsupported option: " << option << endl; print_usage(argv[0], -1); @@ -241,7 +245,7 @@ int main(int argc, char *argv[]) { cerr << "Printing results" << endl; fflush(NULL); - readFactory.printResults(singleFile, extendedOutput); + readFactory.printResults(singleFile, extendedOutput, all_feat); cerr << "Total time: " << uTime.getTotalTimeSec() << " seconds" << endl; return 0;