Skip to content

Commit

Permalink
Printing output data using teh same chromosome order in the GTF file
Browse files Browse the repository at this point in the history
  • Loading branch information
r78v10a07 committed Dec 5, 2018
1 parent f3e3ba8 commit 8531e34
Show file tree
Hide file tree
Showing 8 changed files with 19 additions and 7 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ Usage: ./bin/TPMCalculator -g GTF_file [-d BAM_files_directory|-i BAM_file]
-q Minimum MAPQ value to filter out reads. Default: 0. This value depends on the aligner MAPQ value.
-o Minimum overlap between a reads and a feature. Default: 8.
-e Extended output. This will include transcript level TPM values. Default: No.
-a Print out all features with read counts equal to zero. Default: No.

## Description

Expand Down
Binary file added doc/Gencode_v25_FPKM_HTSeq.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added doc/Gencode_v25_FPKM_featureCounts.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added doc/Gencode_v25_TPMCalculator_RSeQC.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
7 changes: 7 additions & 0 deletions includes/GenomeFactory.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#define GENOMEFACTORY_H

#include <unordered_map>
#include <deque>
#include <set>
#include <vector>
#include <string>
Expand Down Expand Up @@ -1191,6 +1192,10 @@ namespace genome {

return currentIso;
}

std::deque<std::string> getChrOrder() const {
return chrOrder;
}

GeneMultiSetItr<T> findGeneUpperBound(std::string chrName, unsigned int start, unsigned int end) {
GeneMultiSetItr<T> it;
Expand Down Expand Up @@ -1242,6 +1247,7 @@ namespace genome {
try {
setCurrentChr(fParser.getWords()[0]);
} catch (exceptions::NotFoundException ex) {
chrOrder.push_back(fParser.getWords()[0]);
std::pair < ChromosomeUnMapItr<T>, bool> res = chromosomes.insert(std::make_pair(fParser.getWords()[0], std::make_shared<Chromosome < T >> (fParser.getWords()[0])));
if (!res.second) {
std::cerr << "Error inserting new Chromosome" << std::endl;
Expand All @@ -1268,6 +1274,7 @@ namespace genome {
}
}
private:
std::deque<std::string> chrOrder;
ChromosomeUnMap<T> chromosomes;
GeneIsoformUnMap<T> transcript2Chr;
SPtrIsoform<T> currentIso;
Expand Down
2 changes: 1 addition & 1 deletion includes/ReadFactory.h
Original file line number Diff line number Diff line change
Expand Up @@ -332,7 +332,7 @@ namespace ngs {
int processBAMSAMFromDir(std::string dirName, bool onlyProperlyPaired, uint16_t minMAPQ, uint16_t minOverlap);
int processReadsFromBAM(std::string bamFileName, std::string sampleName, bool onlyProperlyPaired, uint16_t minMAPQ, uint16_t minOverlap);
std::vector<BamTools::CigarOp> processCigar(std::string cigar);
void printResults(bool singleFile, bool extendedOutput);
void printResults(bool singleFile, bool extendedOutput, bool all_feat);
void printResultsMatrix(std::string output_name, std::vector<std::string> tpmColumns);

void processReadAtGenomeLevel(std::string chrName, std::string sampleName, std::set < std::pair<unsigned int, unsigned int>, coordinateLessCMP> read_coords, uint16_t minOverlap);
Expand Down
10 changes: 5 additions & 5 deletions src/ReadFactory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -445,10 +445,10 @@ int ReadFactory::processBAMSAMFromDir(std::string dirName, bool onlyProperlyPair
return totalCount;
}

void ReadFactory::printResults(bool singleFile, bool extendedOutput) {
void ReadFactory::printResults(bool singleFile, bool extendedOutput, bool all_feat) {
int count = 0;
string sampleFileName;
SPtrChromosomeNGS c;
Chromosome<ReadData> *c;
SPtrGeneNGS g;
SPtrIsoformNGS i;
SPtrFeatureNGS f;
Expand Down Expand Up @@ -496,11 +496,11 @@ void ReadFactory::printResults(bool singleFile, bool extendedOutput) {
exit(-1);
}

for (auto cIt = genomeFactory.getChromosomes().begin(); cIt != genomeFactory.getChromosomes().end(); ++cIt) {
c = cIt->second;
for (string chr : getGenomeFactory().getChrOrder()) {
c = getGenomeFactory().findChromosome(chr);
for (auto it = c->getGenes().begin(); it != c->getGenes().end(); ++it) {
g = *it;
if (g->isProcessed()) {
if (all_feat || g->isProcessed()) {
out_gene << g->getId()
<< "\t" << c->getId()
<< "\t" << g->getStart()
Expand Down
6 changes: 5 additions & 1 deletion src/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ void print_usage(char *program_name, int exit_code) {
cerr << "-q Minimum MAPQ value to filter out reads. Default: 0. This value depends on the aligner MAPQ value.\n";
cerr << "-o Minimum overlap between a reads and a feature. Default: 8.\n";
cerr << "-e Extended output. This will include transcript level TPM values. Default: No.\n";
cerr << "-a Print out all features with read counts equal to zero. Default: No.\n";
cerr << "\n********************************************************************************\n";
cerr << "\n Roberto Vera Alvarez, PhD\n";
cerr << " Emails: [email protected]\n\n";
Expand All @@ -64,6 +65,7 @@ int main(int argc, char *argv[]) {
bool onlyProperlyPaired = false;
bool singleFile = false;
bool extendedOutput = false;
bool all_feat = false;
set<string>features = {"exon"};
unordered_map<string, string> featuresToCreate = {
{"exon", "intron"}
Expand Down Expand Up @@ -184,6 +186,8 @@ int main(int argc, char *argv[]) {
onlyProperlyPaired = true;
} else if (option.compare(1, 1, "e") == 0) {
extendedOutput = true;
} else if (option.compare(1, 1, "a") == 0) {
all_feat = true;
} else {
cerr << "Unsupported option: " << option << endl;
print_usage(argv[0], -1);
Expand Down Expand Up @@ -241,7 +245,7 @@ int main(int argc, char *argv[]) {

cerr << "Printing results" << endl;
fflush(NULL);
readFactory.printResults(singleFile, extendedOutput);
readFactory.printResults(singleFile, extendedOutput, all_feat);

cerr << "Total time: " << uTime.getTotalTimeSec() << " seconds" << endl;
return 0;
Expand Down

0 comments on commit 8531e34

Please sign in to comment.