From 37fcb5a79c2b63fafc4fd9b8ceea1aac586887d9 Mon Sep 17 00:00:00 2001
From: "Philip R. Kensche"
Date: Wed, 13 Dec 2023 11:36:31 +0100
Subject: [PATCH 01/79] Made ChrConverter more flexible (second hard-coded
reference) and centralized in GlobalAppConfig.
Signed-off-by: Philip R. Kensche
---
Release_sophia/build-sophia.sh | 24 +-
.../build-sophiaAnnotate.sh | 29 +-
include/Alignment.h | 116 ++++--
include/Breakpoint.h | 19 +-
include/BreakpointReduced.h | 1 +
include/ChrConverter.h | 119 +++---
include/GlobalAppConfig.h | 68 ++++
include/Hg37ChrConverter.h | 44 ++
include/Hg38ChrConverter.h | 44 ++
include/MasterRefProcessor.h | 2 +
include/MrefEntry.h | 2 +
src/Alignment.cpp | 74 +++-
src/AnnotationProcessor.cpp | 18 +-
src/Breakpoint.cpp | 86 +++-
src/BreakpointReduced.cpp | 20 +-
src/ChrConverter.cpp | 310 ++------------
src/GlobalAppConfig.cpp | 62 +++
src/HelperFunctions.cpp | 20 +-
src/Hg37ChrConverter.cpp | 377 ++++++++++++++++++
src/Hg38ChrConverter.cpp | 377 ++++++++++++++++++
src/IndexTable.cpp | 308 ++++++++++++++
src/MasterRefProcessor.cpp | 37 +-
src/MrefEntry.cpp | 9 +-
src/MrefEntryAnno.cpp | 1 -
src/SuppAlignment.cpp | 78 +++-
src/SuppAlignmentAnno.cpp | 9 +-
src/SvEvent.cpp | 21 +-
sophia.cpp => src/sophia.cpp | 112 ++++--
sophiaAnnotate.cpp => src/sophiaAnnotate.cpp | 108 +++--
sophiaMref.cpp => src/sophiaMref.cpp | 53 ++-
30 files changed, 1948 insertions(+), 600 deletions(-)
create mode 100644 include/GlobalAppConfig.h
create mode 100644 include/Hg37ChrConverter.h
create mode 100644 include/Hg38ChrConverter.h
create mode 100644 src/GlobalAppConfig.cpp
create mode 100644 src/Hg37ChrConverter.cpp
create mode 100644 src/Hg38ChrConverter.cpp
create mode 100644 src/IndexTable.cpp
rename sophia.cpp => src/sophia.cpp (57%)
rename sophiaAnnotate.cpp => src/sophiaAnnotate.cpp (62%)
rename sophiaMref.cpp => src/sophiaMref.cpp (51%)
diff --git a/Release_sophia/build-sophia.sh b/Release_sophia/build-sophia.sh
index 286238e..b210941 100755
--- a/Release_sophia/build-sophia.sh
+++ b/Release_sophia/build-sophia.sh
@@ -14,7 +14,7 @@ install_strtk
CPP=x86_64-conda_cos6-linux-gnu-g++
INCLUDES="-I../include -I$CONDA_PREFIX/include"
-CPP_OPTS="-L$CONDA_PREFIX/lib -std=c++1z $INCLUDES -O3 -Wall -Wextra -static -static-libgcc -static-libstdc++ -flto -c -fmessage-length=0 -Wno-attributes"
+CPP_OPTS="-L$CONDA_PREFIX/lib -std=c++17 $INCLUDES -O3 -Wall -Wextra -static -static-libgcc -static-libstdc++ -flto -c -fmessage-length=0 -Wno-attributes"
if [[ "${STATIC:-false}" == "true" ]]; then
CPP_OPTS="-static -static-libgcc -static-libstdc++ $CPP_OPTS"
@@ -23,11 +23,27 @@ fi
$CPP $CPP_OPTS -o "Alignment.o" "../src/Alignment.cpp"
$CPP $CPP_OPTS -o "Breakpoint.o" "../src/Breakpoint.cpp"
$CPP $CPP_OPTS -o "ChosenBp.o" "../src/ChosenBp.cpp"
+$CPP $CPP_OPTS -o "GlobalAppConfig.o" "../src/GlobalAppConfig.cpp"
$CPP $CPP_OPTS -o "ChrConverter.o" "../src/ChrConverter.cpp"
+$CPP $CPP_OPTS -o "Hg37ChrConverter.o" "../src/Hg37ChrConverter.cpp"
+$CPP $CPP_OPTS -o "Hg38ChrConverter.o" "../src/Hg38ChrConverter.cpp"
$CPP $CPP_OPTS -o "SamSegmentMapper.o" "../src/SamSegmentMapper.cpp"
$CPP $CPP_OPTS -o "Sdust.o" "../src/Sdust.cpp"
$CPP $CPP_OPTS -o "SuppAlignment.o" "../src/SuppAlignment.cpp"
$CPP $CPP_OPTS -o "HelperFunctions.o" "../src/HelperFunctions.cpp"
-$CPP $CPP_OPTS -o "sophia.o" "../sophia.cpp"
-
-$CPP -L$CONDA_PREFIX/lib -flto -o "sophia" Alignment.o Breakpoint.o ChosenBp.o ChrConverter.o SamSegmentMapper.o Sdust.o SuppAlignment.o HelperFunctions.o sophia.o -lboost_program_options
+$CPP $CPP_OPTS -o "sophia.o" "../src/sophia.cpp"
+
+$CPP -L$CONDA_PREFIX/lib -flto -o "sophia" \
+ Alignment.o \
+ Breakpoint.o \
+ ChosenBp.o \
+ ChrConverter.o \
+ Hg37ChrConverter.o \
+ Hg38ChrConverter.o \
+ SamSegmentMapper.o \
+ Sdust.o \
+ SuppAlignment.o \
+ HelperFunctions.o \
+ GlobalAppConfig.o \
+ sophia.o \
+ -lboost_program_options
diff --git a/Release_sophiaAnnotate/build-sophiaAnnotate.sh b/Release_sophiaAnnotate/build-sophiaAnnotate.sh
index 205fbb4..a0ccb05 100755
--- a/Release_sophiaAnnotate/build-sophiaAnnotate.sh
+++ b/Release_sophiaAnnotate/build-sophiaAnnotate.sh
@@ -13,7 +13,7 @@ install_strtk
CPP=x86_64-conda_cos6-linux-gnu-g++
INCLUDES="-I../include -I$CONDA_PREFIX/include"
-CPP_OPTS="-L$CONDA_PREFIX/lib -std=c++1z $INCLUDES -O3 -Wall -Wextra -static -static-libgcc -static-libstdc++ -flto -c -fmessage-length=0 -Wno-attributes"
+CPP_OPTS="-L$CONDA_PREFIX/lib -std=c++17 $INCLUDES -O3 -Wall -Wextra -static -static-libgcc -static-libstdc++ -flto -c -fmessage-length=0 -Wno-attributes"
if [[ "${STATIC:-false}" == "true" ]]; then
CPP_OPTS="-static -static-libgcc -static-libstdc++ $CPP_OPTS"
@@ -22,7 +22,10 @@ fi
$CPP $CPP_OPTS -o "AnnotationProcessor.o" "../src/AnnotationProcessor.cpp"
$CPP $CPP_OPTS -o "Breakpoint.o" "../src/Breakpoint.cpp"
$CPP $CPP_OPTS -o "BreakpointReduced.o" "../src/BreakpointReduced.cpp"
+$CPP $CPP_OPTS -o "GlobalAppConfig.o" "../src/GlobalAppConfig.cpp"
$CPP $CPP_OPTS -o "ChrConverter.o" "../src/ChrConverter.cpp"
+$CPP $CPP_OPTS -o "Hg37ChrConverter.o" "../src/Hg37ChrConverter.cpp"
+$CPP $CPP_OPTS -o "Hg38ChrConverter.o" "../src/Hg38ChrConverter.cpp"
$CPP $CPP_OPTS -o "DeFuzzier.o" "../src/DeFuzzier.cpp"
$CPP $CPP_OPTS -o "GermlineMatch.o" "../src/GermlineMatch.cpp"
$CPP $CPP_OPTS -o "MrefEntry.o" "../src/MrefEntry.cpp"
@@ -32,6 +35,24 @@ $CPP $CPP_OPTS -o "SuppAlignment.o" "../src/SuppAlignment.cpp"
$CPP $CPP_OPTS -o "SuppAlignmentAnno.o" "../src/SuppAlignmentAnno.cpp"
$CPP $CPP_OPTS -o "SvEvent.o" "../src/SvEvent.cpp"
$CPP $CPP_OPTS -o "HelperFunctions.o" "../src/HelperFunctions.cpp"
-$CPP $CPP_OPTS -o "sophiaAnnotate.o" "../sophiaAnnotate.cpp"
-
-$CPP -L$CONDA_PREFIX/lib -flto -o "sophiaAnnotate" AnnotationProcessor.o Breakpoint.o BreakpointReduced.o ChrConverter.o DeFuzzier.o GermlineMatch.o MrefEntry.o MrefEntryAnno.o MrefMatch.o SuppAlignment.o SuppAlignmentAnno.o SvEvent.o HelperFunctions.o sophiaAnnotate.o -lz -lboost_system -lboost_iostreams
+$CPP $CPP_OPTS -o "sophiaAnnotate.o" "../src/sophiaAnnotate.cpp"
+
+$CPP -L$CONDA_PREFIX/lib -flto -o "sophiaAnnotate" \
+ AnnotationProcessor.o \
+ Breakpoint.o \
+ BreakpointReduced.o \
+ ChrConverter.o \
+ Hg37ChrConverter.o \
+ Hg38ChrConverter.o \
+ DeFuzzier.o \
+ GermlineMatch.o \
+ MrefEntry.o \
+ MrefEntryAnno.o \
+ MrefMatch.o \
+ SuppAlignment.o \
+ SuppAlignmentAnno.o \
+ SvEvent.o \
+ HelperFunctions.o \
+ GlobalAppConfig.o \
+ sophiaAnnotate.o \
+ -lz -lboost_system -lboost_iostreams
diff --git a/include/Alignment.h b/include/Alignment.h
index efe5459..fb54456 100644
--- a/include/Alignment.h
+++ b/include/Alignment.h
@@ -43,133 +43,167 @@ class Alignment {
public:
Alignment();
+
void continueConstruction();
- static int LOWQUALCLIPTHRESHOLD, BASEQUALITYTHRESHOLD,
- BASEQUALITYTHRESHOLDLOW, CLIPPEDNUCLEOTIDECOUNTTHRESHOLD,
+
+ static int
+ LOWQUALCLIPTHRESHOLD,
+ BASEQUALITYTHRESHOLD,
+ BASEQUALITYTHRESHOLDLOW,
+ CLIPPEDNUCLEOTIDECOUNTTHRESHOLD,
INDELNUCLEOTIDECOUNTTHRESHOLD;
+
static double ISIZEMAX;
+
int getStartPos() const { return startPos; }
+
int getEndPos() const { return endPos; }
+
int getReadType() const { return readType; }
+
const vector &getReadBreakpoints() const { return readBreakpoints; }
+
bool isValidLine() const { return validLine; }
+
const string &getSamLine() const { return samLine; }
+
const vector &getSamChunkPositions() const {
return samChunkPositions;
}
+
bool assessOutlierMateDistance();
+
int getMateChrIndex() const { return mateChrIndex; }
+
int getMatePos() const { return matePos; }
+
const vector &getReadBreakpointTypes() const {
return readBreakpointTypes;
}
+
void setChosenBp(int chosenBpLoc, int alignmentIndex);
+
bool isOverhangEncounteredM() const { return chosenBp->bpEncounteredM; }
+
int getOverhangLength() const { return chosenBp->overhangLength; }
+
int getOverhangStartIndex() const { return chosenBp->overhangStartIndex; }
+
vector generateSuppAlignments(int bpChrIndex, int bpPos);
+
const vector &getSupplementaryAlignments() const {
return chosenBp->supplementaryAlignments;
}
+
int getChrIndex() const { return chrIndex; }
+
const vector &getReadBreakpointsSizes() const {
return readBreakpointSizes;
}
+
bool isLowMapq() const { return lowMapq; }
+
bool isNullMapq() const { return nullMapq; }
+
bool isSupplementary() const { return supplementary; }
+
void addChildNode(int indexIn) { chosenBp->addChildNode(indexIn); }
- void
- addSupplementaryAlignments(const vector &suppAlignments) {
+
+ void addSupplementaryAlignments(const vector &suppAlignments) {
chosenBp->addSupplementaryAlignments(suppAlignments);
}
+
const vector &getChildrenNodes() const {
return chosenBp->childrenNodes;
}
+
int getOriginIndex() const { return chosenBp->selfNodeIndex; }
+
string printOverhang() const;
+
double overhangComplexityMaskRatio() const;
bool isInvertedMate() const { return invertedMate; }
+
bool isDistantMate() const { return distantMate == 1; }
+
private:
+
void mappingQualityCheck();
+
bool isEventCandidate() const;
+
void createCigarChunks();
+
void assignBreakpointsAndOverhangs();
+
void qualityCheckCascade();
+
bool clipCountCheck();
+
bool uniqueSuppCheck();
+
double overhangMedianQuality(const CigarChunk &cigarChunk) const;
+
template
void fullMedianQuality(Iterator qualBegin, Iterator qualEnd,
vector &overhangPerBaseQuality) const;
+
template
double getMedian(Iterator begin, Iterator end) const;
+
void assessReadType();
+
bool lowMapq;
+
bool nullMapq;
+
int distantMate;
+
unique_ptr chosenBp;
+
int chrIndex;
+
int readType;
+
int startPos, endPos;
+
int mateChrIndex, matePos;
+
string samLine;
+
bool validLine;
+
vector samChunkPositions;
+
string::const_iterator saCbegin, saCend;
+
bool hasSa;
+
bool supplementary;
+
bool fwdStrand;
+
bool invertedMate;
+
bool qualChecked;
+
vector cigarChunks;
+
vector readBreakpoints;
+
vector readBreakpointTypes;
+
vector readBreakpointSizes;
+
vector readBreakpointComplexityMaskRatios;
+
deque readBreakpointsEncounteredM;
+
vector readOverhangCoords;
-};
-template
-void
-Alignment::fullMedianQuality(Iterator qualBegin, Iterator qualEnd,
- vector &overhangPerBaseQuality) const {
- overhangPerBaseQuality.reserve(distance(qualBegin, qualEnd));
- auto consecutiveLowQuals = 0;
- for (auto cit = qualBegin; cit != qualEnd; ++cit) {
- if (*cit < BASEQUALITYTHRESHOLDLOW) { // 33 + phred 11
- if (consecutiveLowQuals == 5) {
- overhangPerBaseQuality.clear();
- return;
- }
- ++consecutiveLowQuals;
- } else {
- consecutiveLowQuals = 0;
- }
- overhangPerBaseQuality.push_back(*cit);
- }
-}
-
-// Median Code taken from http://rosettacode.org/wiki/Averages/Median#C.2B.2B
-template
-double
-Alignment::getMedian(Iterator begin, Iterator end) const {
- // this is middle for odd-length, and "upper-middle" for even length
- Iterator middle = begin + (end - begin) / 2;
- // This function runs in O(n) on average, according to the standard
- nth_element(begin, middle, end);
- if ((end - begin) % 2 != 0) { // odd length
- return *middle;
- } else { // even length
- // the "lower middle" is the max of the lower half
- Iterator lower_middle = max_element(begin, middle);
- return (*middle + *lower_middle) / 2.0;
- }
-}
+};
} /* namespace sophia */
diff --git a/include/Breakpoint.h b/include/Breakpoint.h
index 3196193..5e9db4f 100644
--- a/include/Breakpoint.h
+++ b/include/Breakpoint.h
@@ -28,6 +28,7 @@
#include "MateInfo.h"
#include "SuppAlignment.h"
#include "SuppAlignmentAnno.h"
+#include "ChrConverter.h"
#include
#include
#include
@@ -232,24 +233,6 @@ class Breakpoint {
vector supplementsSecondary;
};
-template
-inline void
-Breakpoint::cleanUpVector(vector &objectPool) {
- // cerr << "cleaning up" << endl;
- while (!objectPool.empty() && objectPool.back().isToRemove()) {
- objectPool.pop_back();
- }
- for (auto saIt = objectPool.begin(); saIt != objectPool.end(); ++saIt) {
- if (saIt->isToRemove()) {
- swap(*saIt, objectPool.back());
- }
- while (!objectPool.empty() && objectPool.back().isToRemove()) {
- objectPool.pop_back();
- }
- }
- // cerr << "done" << endl;
-}
-
} /* namespace sophia */
#endif /* BREAKPOINT_H_ */
diff --git a/include/BreakpointReduced.h b/include/BreakpointReduced.h
index 8db13c7..1312914 100644
--- a/include/BreakpointReduced.h
+++ b/include/BreakpointReduced.h
@@ -29,6 +29,7 @@
#include "GermlineMatch.h"
#include "MrefMatch.h"
#include "SuppAlignmentAnno.h"
+#include "ChrConverter.h"
#include
#include
#include
diff --git a/include/ChrConverter.h b/include/ChrConverter.h
index 21afddc..71be479 100644
--- a/include/ChrConverter.h
+++ b/include/ChrConverter.h
@@ -1,11 +1,5 @@
/*
- * ChrConverter.h
- *
- * Created on: 28 Dec 2017
- * Author: Umut H. Toprak, DKFZ Heidelberg (Divisions of Theoretical
- * Bioinformatics, Bioinformatics and Omics Data Analytics and currently
- * Neuroblastoma Genomics) Copyright (C) 2018 Umut H. Toprak, Matthias
- * Schlesner, Roland Eils and DKFZ Heidelberg
+ * Author: Philip R. Kensche, DKFZ Heidelberg (Omics IT and Data Management Core Facility)
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -19,65 +13,66 @@
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
- * LICENSE: GPL
+ * LICENSE: GPL
*/
-#ifndef CHRCONVERTER_H_
-#define CHRCONVERTER_H_
-#include
+#ifndef _CHRCONVERTER_H_
+#define _CHRCONVERTER_H_
+
+#include
#include
+#include
-#include
namespace sophia {
-using namespace std;
-
-class ChrConverter {
- public:
- static inline int readChromosomeIndex(string::const_iterator startIt,
- char stopChar) {
- int chrIndex{0};
- if (isdigit(*startIt)) {
- for (auto chr_cit = startIt; *chr_cit != stopChar; ++chr_cit) {
- chrIndex = chrIndex * 10 + (*chr_cit - '0');
- }
- return chrIndex;
- } else {
- switch (*startIt) {
- case 'h':
- return 999;
- case 'X':
- return 40;
- case 'G':
- for (auto cit = next(startIt, 2); *cit != '.'; ++cit) {
- chrIndex = 10 * chrIndex + *cit - '0';
- }
- return chrIndex;
- case 'Y':
- return 41;
- case 'M':
- ++startIt;
- if (*startIt == 'T') {
- return 1001;
- } else {
- return 1003;
- }
- case 'N':
- return 1000;
- case 'p':
- return 1002;
- default:
- return 1003;
- }
- }
- return 0;
- }
- static const array indexToChr;
- static const array indexConverter;
- static const array indexToChrCompressedMref;
-};
-
-} /* namespace sophia */
-
-#endif /* CHRCONVERTER_H_ */
+ using namespace std;
+
+ /** ChrConverter contains information the names of chromosomes in an assembly. */
+ class ChrConverter {
+ protected:
+
+ /** The constructor should be used to initialize the fields from subclasses. It does
+ additional checks of the dimensions of the input vectors. */
+ ChrConverter(const vector& indexToChr,
+ const vector& indexToChrCompressedMref,
+ const vector& chrSizesCompressedMref,
+ const vector& indexConverter);
+
+
+ public:
+
+ virtual ~ChrConverter();
+
+ /** The name of the assembly. */
+ static const string assembly_name;
+
+ /** Mapping indices to chromosome names. */
+ const vector indexToChr;
+
+ /** Mapping indices to chromosome names for compressed mref files. */
+ const vector indexToChrCompressedMref;
+
+ /** Chromosome sizes in base pairs. */
+ const vector chrSizesCompressedMref;
+
+ /** Mapping chromosome names to indices. */
+ const vector indexConverter;
+
+ /** Parse chromosome index. */
+ virtual int readChromosomeIndex(string::const_iterator startIt, char stopChar) const = 0;
+
+ size_t n_chromosomes() {
+ return indexToChr.size();
+ };
+
+ size_t n_chromosomes_compressed_mref() {
+ return indexToChrCompressedMref.size();
+ };
+
+
+ };
+
+}
+
+#endif /* _CHRCONVERTER_H_ */
\ No newline at end of file
diff --git a/include/GlobalAppConfig.h b/include/GlobalAppConfig.h
new file mode 100644
index 0000000..2fa80c2
--- /dev/null
+++ b/include/GlobalAppConfig.h
@@ -0,0 +1,68 @@
+/*
+ * GlobalAppConfig.h
+ *
+ * Author: Philip R. Kensche Copyright (C) 2023 DKFZ Heidelberg
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see .
+ * LICENSE: GPL
+ */
+
+#ifndef GLOBALAPPCONFIG_H
+#define GLOBALAPPCONFIG_H
+
+#include
+#include
+#include "ChrConverter.h"
+
+
+namespace sophia {
+
+ /** Keep global application config in this singleton. This is mostly to avoid having to hand
+ around configurations. */
+ class GlobalAppConfig {
+
+ private:
+ static GlobalAppConfig *instance_;
+ static std::mutex mutex_;
+
+ protected:
+
+ GlobalAppConfig(unique_ptr chrConverter);
+
+ ~GlobalAppConfig();
+
+ /** The chromosome converter. */
+ const unique_ptr chrConverter;
+
+ public:
+
+ const ChrConverter &getChrConverter() const;
+
+ /** Prevent copying. */
+ GlobalAppConfig(GlobalAppConfig &other) = delete;
+
+ /** Prevent assignment. */
+ void operator=(const GlobalAppConfig &) = delete;
+
+ /** Factory method. */
+ static GlobalAppConfig &init(unique_ptr chrConverter);
+
+ /** Getter. */
+ static const GlobalAppConfig &getInstance();
+
+ };
+
+} /* namespace sophia */
+
+#endif /* GLOBALAPPCONFIG_H */
\ No newline at end of file
diff --git a/include/Hg37ChrConverter.h b/include/Hg37ChrConverter.h
new file mode 100644
index 0000000..31bfa1b
--- /dev/null
+++ b/include/Hg37ChrConverter.h
@@ -0,0 +1,44 @@
+/*
+ * Author: Philip R. Kensche, DKFZ Heidelberg (Omics IT and Data Management Core Facility)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see .
+ * LICENSE: GPL
+ */
+
+#ifndef HG37CHRCONVERTER_H_
+#define HG37CHRCONVERTER_H_
+
+#include "ChrConverter.h"
+#include
+#include
+
+
+namespace sophia {
+
+ using namespace std;
+
+ class Hg37ChrConverter: public ChrConverter {
+ public:
+
+ static const string assembly_name;
+
+ Hg37ChrConverter();
+
+ int readChromosomeIndex(string::const_iterator startIt, char stopChar) const;
+
+ };
+
+}
+
+#endif /* HG37CHRCONVERTER_H_ */
\ No newline at end of file
diff --git a/include/Hg38ChrConverter.h b/include/Hg38ChrConverter.h
new file mode 100644
index 0000000..53db591
--- /dev/null
+++ b/include/Hg38ChrConverter.h
@@ -0,0 +1,44 @@
+/*
+ * Author: Philip R. Kensche, DKFZ Heidelberg (Omics IT and Data Management Core Facility)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see .
+ * LICENSE: GPL
+ */
+
+#ifndef HG38CHRCONVERTER_H_
+#define HG38CHRCONVERTER_H_
+
+#include "ChrConverter.h"
+#include
+#include
+
+
+namespace sophia {
+
+ using namespace std;
+
+ class Hg38ChrConverter: public ChrConverter {
+ public:
+
+ static const string assembly_name;
+
+ Hg38ChrConverter();
+
+ int readChromosomeIndex(string::const_iterator startIt, char stopChar) const;
+
+ };
+
+}
+
+#endif /* HG38CHRCONVERTER_H_ */
\ No newline at end of file
diff --git a/include/MasterRefProcessor.h b/include/MasterRefProcessor.h
index deb370d..2601905 100644
--- a/include/MasterRefProcessor.h
+++ b/include/MasterRefProcessor.h
@@ -26,6 +26,7 @@
#define MASTERREFPROCESSOR_H_
#include "SuppAlignment.h"
+#include "ChrConverter.h"
#include
#include
#include
@@ -47,6 +48,7 @@ class MasterRefProcessor {
MasterRefProcessor(const vector &filesIn,
const string &outputRootName, const string &version,
const int defaultReadLengthIn);
+
~MasterRefProcessor() = default;
private:
diff --git a/include/MrefEntry.h b/include/MrefEntry.h
index 0d32a2d..03d7d4f 100644
--- a/include/MrefEntry.h
+++ b/include/MrefEntry.h
@@ -27,6 +27,7 @@
#include "BreakpointReduced.h"
#include "SuppAlignment.h"
+#include "ChrConverter.h"
#include
#include
@@ -39,6 +40,7 @@ class MrefEntry {
static int NUMPIDS;
static int DEFAULTREADLENGTH;
static boost::format doubleFormatter;
+
MrefEntry();
void addEntry(Breakpoint &tmpBreakpoint, int fileIndex);
void addEntry(BreakpointReduced &tmpBreakpoint, int fileIndex);
diff --git a/src/Alignment.cpp b/src/Alignment.cpp
index a9e049a..0bd32fc 100644
--- a/src/Alignment.cpp
+++ b/src/Alignment.cpp
@@ -24,6 +24,7 @@
#include "Alignment.h"
#include "ChrConverter.h"
+#include "GlobalAppConfig.h"
#include "HelperFunctions.h"
#include "MateInfo.h"
#include "Sdust.h"
@@ -35,19 +36,34 @@ namespace sophia {
using namespace std;
-int Alignment::LOWQUALCLIPTHRESHOLD{}, Alignment::BASEQUALITYTHRESHOLD{},
- Alignment::BASEQUALITYTHRESHOLDLOW{}, //
+int Alignment::LOWQUALCLIPTHRESHOLD{},
+ Alignment::BASEQUALITYTHRESHOLD{},
+ Alignment::BASEQUALITYTHRESHOLDLOW{},
Alignment::CLIPPEDNUCLEOTIDECOUNTTHRESHOLD{},
Alignment::INDELNUCLEOTIDECOUNTTHRESHOLD{};
double Alignment::ISIZEMAX{};
Alignment::Alignment()
- : lowMapq{false}, nullMapq{true}, distantMate{0}, chosenBp{nullptr},
- chrIndex{0}, readType{0}, startPos{0}, endPos{0}, mateChrIndex{0},
- matePos{0}, samLine{}, validLine{error_terminating_getline(cin, samLine)},
- samChunkPositions{}, saCbegin{}, saCend{}, hasSa{false},
- supplementary{false}, fwdStrand{true}, invertedMate{false}, qualChecked{
- false} {
+ : lowMapq(false),
+ nullMapq(true),
+ distantMate(0),
+ chosenBp(nullptr),
+ chrIndex(0),
+ readType(0),
+ startPos(0),
+ endPos(0),
+ mateChrIndex(0),
+ matePos(0),
+ samLine(),
+ validLine(error_terminating_getline(cin, samLine)),
+ samChunkPositions(),
+ saCbegin(),
+ saCend(),
+ hasSa(false),
+ supplementary(false),
+ fwdStrand(true),
+ invertedMate(false),
+ qualChecked(false) {
if (validLine) {
auto index = 0;
for (auto it = samLine.cbegin(); it != samLine.cend(); ++it) {
@@ -56,7 +72,7 @@ Alignment::Alignment()
}
++index;
}
- chrIndex = ChrConverter::readChromosomeIndex(
+ chrIndex = GlobalAppConfig::getInstance().getChrConverter().readChromosomeIndex(
next(samLine.cbegin(), samChunkPositions[1] + 1), '\t');
}
}
@@ -157,7 +173,7 @@ Alignment::continueConstruction() {
if (samLine[1 + samChunkPositions[5]] == '=') {
mateChrIndex = chrIndex;
} else {
- mateChrIndex = ChrConverter::readChromosomeIndex(
+ mateChrIndex = GlobalAppConfig::getInstance().getChrConverter().readChromosomeIndex(
next(samLine.cbegin(), 1 + samChunkPositions[5]), '\t');
}
}
@@ -709,4 +725,42 @@ Alignment::overhangComplexityMaskRatio() const {
return maskedIntervalsTotal / fullSizesTotal;
}
+
+template
+void
+Alignment::fullMedianQuality(Iterator qualBegin, Iterator qualEnd,
+ vector &overhangPerBaseQuality) const {
+ overhangPerBaseQuality.reserve(distance(qualBegin, qualEnd));
+ auto consecutiveLowQuals = 0;
+ for (auto cit = qualBegin; cit != qualEnd; ++cit) {
+ if (*cit < BASEQUALITYTHRESHOLDLOW) { // 33 + phred 11
+ if (consecutiveLowQuals == 5) {
+ overhangPerBaseQuality.clear();
+ return;
+ }
+ ++consecutiveLowQuals;
+ } else {
+ consecutiveLowQuals = 0;
+ }
+ overhangPerBaseQuality.push_back(*cit);
+ }
+}
+
+// Median Code taken from http://rosettacode.org/wiki/Averages/Median#C.2B.2B
+template
+double
+Alignment::getMedian(Iterator begin, Iterator end) const {
+ // this is middle for odd-length, and "upper-middle" for even length
+ Iterator middle = begin + (end - begin) / 2;
+ // This function runs in O(n) on average, according to the standard
+ nth_element(begin, middle, end);
+ if ((end - begin) % 2 != 0) { // odd length
+ return *middle;
+ } else { // even length
+ // the "lower middle" is the max of the lower half
+ Iterator lower_middle = max_element(begin, middle);
+ return (*middle + *lower_middle) / 2.0;
+ }
+}
+
} /* namespace sophia */
diff --git a/src/AnnotationProcessor.cpp b/src/AnnotationProcessor.cpp
index 7f5a9b1..d67f78b 100644
--- a/src/AnnotationProcessor.cpp
+++ b/src/AnnotationProcessor.cpp
@@ -25,6 +25,7 @@
#include "Breakpoint.h"
#include "HelperFunctions.h"
#include "SuppAlignment.h"
+#include "GlobalAppConfig.h"
#include
#include
#include
@@ -57,12 +58,13 @@ AnnotationProcessor::AnnotationProcessor(const string &tumorResultsIn,
tumorGzHandle->push(*tumorInputHandle);
string line;
auto lineIndex = 0;
+ const ChrConverter& chrConverter = GlobalAppConfig::getInstance().getChrConverter();
while (error_terminating_getline(*tumorGzHandle, line)) {
if (line.front() == '#') {
continue;
};
Breakpoint tmpBp{line, true};
- auto chrIndex = ChrConverter::indexConverter[tmpBp.getChrIndex()];
+ auto chrIndex = chrConverter.indexConverter[tmpBp.getChrIndex()];
if (chrIndex < 0) {
continue;
}
@@ -121,6 +123,7 @@ AnnotationProcessor::AnnotationProcessor(
controlGzHandle->push(*controlInputHandle);
string line;
auto lineIndex = 0;
+ const ChrConverter &chrConverter = GlobalAppConfig::getInstance().getChrConverter();
while (error_terminating_getline(*controlGzHandle, line)) {
if (line.front() == '#') {
continue;
@@ -217,7 +220,7 @@ AnnotationProcessor::AnnotationProcessor(
}
}
}
- auto chrIndex = ChrConverter::indexConverter[tmpBp.getChrIndex()];
+ auto chrIndex = chrConverter.indexConverter[tmpBp.getChrIndex()];
controlResults[chrIndex].push_back(tmpBp);
++lineIndex;
}
@@ -237,7 +240,7 @@ AnnotationProcessor::AnnotationProcessor(
continue;
};
Breakpoint tmpBp{line, true};
- auto chrIndex = ChrConverter::indexConverter[tmpBp.getChrIndex()];
+ auto chrIndex = chrConverter.indexConverter[tmpBp.getChrIndex()];
if (chrIndex < 0) {
continue;
}
@@ -310,7 +313,8 @@ AnnotationProcessor::searchSa(int chrIndex, int dbIndex,
}
return;
}
- auto saChrIndex = ChrConverter::indexConverter[sa.getChrIndex()];
+ auto saChrIndex = GlobalAppConfig::getInstance().getChrConverter().
+ indexConverter[sa.getChrIndex()];
if (saChrIndex < 0) {
return;
}
@@ -544,7 +548,8 @@ AnnotationProcessor::searchMrefHitsNew(const BreakpointReduced &bpIn,
int distanceThreshold,
int conservativeDistanceThreshold,
vector> &mref) {
- auto convertedChrIndex = ChrConverter::indexConverter[bpIn.getChrIndex()];
+ auto convertedChrIndex = GlobalAppConfig::getInstance().getChrConverter().
+ indexConverter[bpIn.getChrIndex()];
vector suppMatches{};
if (convertedChrIndex < 0) {
return MrefMatch{0, 0, 10000, suppMatches};
@@ -680,7 +685,8 @@ AnnotationProcessor::searchGermlineHitsNew(const BreakpointReduced &bpIn,
if (NOCONTROLMODE) {
return dummyMatchTrue;
}
- auto convertedChrIndex = ChrConverter::indexConverter[bpIn.getChrIndex()];
+ auto convertedChrIndex = GlobalAppConfig::getInstance().getChrConverter().
+ indexConverter[bpIn.getChrIndex()];
if (convertedChrIndex < 0) {
return dummyMatchFalse;
}
diff --git a/src/Breakpoint.cpp b/src/Breakpoint.cpp
index 1cfccc5..ab0e985 100644
--- a/src/Breakpoint.cpp
+++ b/src/Breakpoint.cpp
@@ -24,6 +24,7 @@
#include "Breakpoint.h"
#include "ChrConverter.h"
+#include "GlobalAppConfig.h"
#include "strtk.hpp"
#include
#include
@@ -56,15 +57,52 @@ bool Breakpoint::PROPERPAIRCOMPENSATIONMODE{false};
int Breakpoint::bpindex{0};
Breakpoint::Breakpoint(int chrIndexIn, int posIn)
- : covFinalized{false}, missingInfoBp{false}, chrIndex{chrIndexIn},
- pos{posIn}, normalSpans{0}, lowQualSpansSoft{0}, lowQualSpansHard{0},
- unpairedBreaksSoft{0}, unpairedBreaksHard{0}, breaksShortIndel{0},
- lowQualBreaksSoft{0}, lowQualBreaksHard{0}, repetitiveOverhangBreaks{0},
- pairedBreaksSoft{0}, pairedBreaksHard{0}, leftSideDiscordantCandidates{0},
- rightSideDiscordantCandidates{0}, mateSupport{0}, leftCoverage{0},
- rightCoverage{0}, totalLowMapqHardClips{0},
- hitsInMref{-1}, germline{false}, poolLeft{}, poolRight{},
- poolLowQualLeft{}, poolLowQualRight{} {}
+ : covFinalized{false},
+ missingInfoBp{false},
+ chrIndex{chrIndexIn},
+ pos{posIn},
+ normalSpans{0},
+ lowQualSpansSoft{0},
+ lowQualSpansHard{0},
+ unpairedBreaksSoft{0},
+ unpairedBreaksHard{0},
+ breaksShortIndel{0},
+ lowQualBreaksSoft{0},
+ lowQualBreaksHard{0},
+ repetitiveOverhangBreaks{0},
+ pairedBreaksSoft{0},
+ pairedBreaksHard{0},
+ leftSideDiscordantCandidates{0},
+ rightSideDiscordantCandidates{0},
+ mateSupport{0},
+ leftCoverage{0},
+ rightCoverage{0},
+ totalLowMapqHardClips{0},
+ hitsInMref{-1},
+ germline{false},
+ poolLeft{},
+ poolRight{},
+ poolLowQualLeft{},
+ poolLowQualRight{} {}
+
+
+template
+inline void
+Breakpoint::cleanUpVector(vector &objectPool) {
+ // cerr << "cleaning up" << endl;
+ while (!objectPool.empty() && objectPool.back().isToRemove()) {
+ objectPool.pop_back();
+ }
+ for (auto saIt = objectPool.begin(); saIt != objectPool.end(); ++saIt) {
+ if (saIt->isToRemove()) {
+ swap(*saIt, objectPool.back());
+ }
+ while (!objectPool.empty() && objectPool.back().isToRemove()) {
+ objectPool.pop_back();
+ }
+ }
+ // cerr << "done" << endl;
+}
void
Breakpoint::addSoftAlignment(shared_ptr alignmentIn) {
@@ -163,7 +201,7 @@ void
Breakpoint::printBreakpointReport(const string &overhangStr) {
string res{};
res.reserve(350);
- res.append(ChrConverter::indexToChr[chrIndex]).append("\t");
+ res.append(GlobalAppConfig::getInstance().getChrConverter().indexToChr[chrIndex]).append("\t");
res.append(strtk::type_to_string(pos)).append("\t");
res.append(strtk::type_to_string(pos + 1)).append("\t");
@@ -1104,12 +1142,26 @@ Breakpoint::collectMateSupportHelper(
}
Breakpoint::Breakpoint(const string &bpIn, bool ignoreOverhang)
- : covFinalized{true}, missingInfoBp{false}, chrIndex{0}, pos{0},
- normalSpans{0}, lowQualSpansSoft{0}, lowQualSpansHard{0},
- unpairedBreaksSoft{0}, unpairedBreaksHard{0}, breaksShortIndel{0},
- lowQualBreaksSoft{0}, lowQualBreaksHard{0}, repetitiveOverhangBreaks{0},
- pairedBreaksSoft{0}, pairedBreaksHard{0}, mateSupport{0}, leftCoverage{0},
- rightCoverage{0}, hitsInMref{0}, germline{false} {
+ : covFinalized{true},
+ missingInfoBp{false},
+ chrIndex{0},
+ pos{0},
+ normalSpans{0},
+ lowQualSpansSoft{0},
+ lowQualSpansHard{0},
+ unpairedBreaksSoft{0},
+ unpairedBreaksHard{0},
+ breaksShortIndel{0},
+ lowQualBreaksSoft{0},
+ lowQualBreaksHard{0},
+ repetitiveOverhangBreaks{0},
+ pairedBreaksSoft{0},
+ pairedBreaksHard{0},
+ mateSupport{0},
+ leftCoverage{0},
+ rightCoverage{0},
+ hitsInMref{0},
+ germline{false} {
auto index = 0;
vector bpChunkPositions{};
bpChunkPositions.reserve(7);
@@ -1119,7 +1171,7 @@ Breakpoint::Breakpoint(const string &bpIn, bool ignoreOverhang)
}
++index;
}
- chrIndex = ChrConverter::readChromosomeIndex(bpIn.cbegin(), '\t');
+ chrIndex = GlobalAppConfig::getInstance().getChrConverter().readChromosomeIndex(bpIn.cbegin(), '\t');
for (auto i = bpChunkPositions[0] + 1; i < bpChunkPositions[1]; ++i) {
pos = pos * 10 + (bpIn[i] - '0');
diff --git a/src/BreakpointReduced.cpp b/src/BreakpointReduced.cpp
index a0dc308..1a887ab 100644
--- a/src/BreakpointReduced.cpp
+++ b/src/BreakpointReduced.cpp
@@ -24,6 +24,7 @@
#include "Breakpoint.h"
#include "ChrConverter.h"
+#include "GlobalAppConfig.h"
#include "strtk.hpp"
#include
#include
@@ -57,17 +58,9 @@ sophia::BreakpointReduced::BreakpointReduced(const Breakpoint &tmpBp,
pairedBreaksHard{tmpBp.getPairedBreaksHard()},
mateSupport{tmpBp.getMateSupport()},
leftCoverage{tmpBp.getLeftCoverage()},
- rightCoverage{tmpBp.getRightCoverage()}, mrefHits{MrefMatch{
- -1,
- -1,
- 10000,
- {},
- }},
- germlineInfo{GermlineMatch{
- 0.0,
- 0.0,
- {},
- }},
+ rightCoverage{tmpBp.getRightCoverage()},
+ mrefHits{MrefMatch{-1, -1, 10000, {}, }},
+ germlineInfo{GermlineMatch{0.0, 0.0, {}, }},
suppAlignments{} {
for (const auto &sa : tmpBp.getDoubleSidedMatches()) {
if (sa.getChrIndex() < 1002) {
@@ -175,10 +168,11 @@ BreakpointReduced::testOverhangBasedCandidacy() const {
}
string
-BreakpointReduced::printOverhang(double germlineClonality, int numHits,
+BreakpointReduced::printOverhang(double germlineClonality,
+ int numHits,
const string &overhang) const {
string res{"##"};
- res.append(ChrConverter::indexToChr[chrIndex]).append("\t");
+ res.append(GlobalAppConfig::getInstance().getChrConverter().indexToChr[chrIndex]).append("\t");
res.append(strtk::type_to_string(pos - 1)).append("\t");
res.append(strtk::type_to_string(pos)).append("\t");
if (germlineClonality > 0.1) {
diff --git a/src/ChrConverter.cpp b/src/ChrConverter.cpp
index a42e50b..c20c238 100644
--- a/src/ChrConverter.cpp
+++ b/src/ChrConverter.cpp
@@ -1,11 +1,5 @@
/*
- * ChrConverter.cpp
- *
- * Created on: 28 Dec 2017
- * Author: Umut H. Toprak, DKFZ Heidelberg (Divisions of Theoretical
- * Bioinformatics, Bioinformatics and Omics Data Analytics and currently
- * Neuroblastoma Genomics) Copyright (C) 2018 Umut H. Toprak, Matthias
- * Schlesner, Roland Eils and DKFZ Heidelberg
+ * Author: Philip R. Kensche, DKFZ Heidelberg (Omics IT and Data Management Core Facility)
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -19,290 +13,34 @@
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
- * LICENSE: GPL
+ * LICENSE: GPL
*/
-#include
+#include "ChrConverter.h"
+#include
+#include
+#include
namespace sophia {
-using namespace std;
-
-const array ChrConverter::indexToChr{
- "0", "1", "2", "3", "4",
- "5", "6", "7", "8", "9",
- "10", "11", "12", "13", "14",
- "15", "16", "17", "18", "19",
- "20", "21", "22", "23", "24",
- "25", "26", "27", "28", "29",
- "30", "31", "32", "33", "34",
- "35", "36", "37", "38", "39",
- "X", "Y", "42", "43", "44",
- "45", "46", "47", "48", "49",
- "50", "51", "52", "53", "54",
- "55", "56", "57", "58", "59",
- "60", "61", "62", "63", "64",
- "65", "66", "67", "68", "69",
- "70", "71", "72", "73", "74",
- "75", "76", "77", "78", "79",
- "80", "81", "82", "83", "84",
- "85", "86", "87", "88", "89",
- "90", "91", "92", "93", "94",
- "95", "96", "97", "98", "99",
- "100", "101", "102", "103", "104",
- "105", "106", "107", "108", "109",
- "110", "111", "112", "113", "114",
- "115", "116", "117", "118", "119",
- "120", "121", "122", "123", "124",
- "125", "126", "127", "128", "129",
- "130", "131", "132", "133", "134",
- "135", "136", "137", "138", "139",
- "140", "141", "142", "143", "144",
- "145", "146", "147", "148", "149",
- "150", "151", "152", "153", "154",
- "155", "156", "157", "158", "159",
- "160", "161", "162", "163", "164",
- "165", "166", "167", "168", "169",
- "170", "171", "172", "173", "174",
- "175", "176", "177", "178", "179",
- "180", "181", "182", "183", "184",
- "185", "186", "187", "188", "189",
- "190", "GL000191.1", "GL000192.1", "GL000193.1", "GL000194.1",
- "GL000195.1", "GL000196.1", "GL000197.1", "GL000198.1", "GL000199.1",
- "GL000200.1", "GL000201.1", "GL000202.1", "GL000203.1", "GL000204.1",
- "GL000205.1", "GL000206.1", "GL000207.1", "GL000208.1", "GL000209.1",
- "GL000210.1", "GL000211.1", "GL000212.1", "GL000213.1", "GL000214.1",
- "GL000215.1", "GL000216.1", "GL000217.1", "GL000218.1", "GL000219.1",
- "GL000220.1", "GL000221.1", "GL000222.1", "GL000223.1", "GL000224.1",
- "GL000225.1", "GL000226.1", "GL000227.1", "GL000228.1", "GL000229.1",
- "GL000230.1", "GL000231.1", "GL000232.1", "GL000233.1", "GL000234.1",
- "GL000235.1", "GL000236.1", "GL000237.1", "GL000238.1", "GL000239.1",
- "GL000240.1", "GL000241.1", "GL000242.1", "GL000243.1", "GL000244.1",
- "GL000245.1", "GL000246.1", "GL000247.1", "GL000248.1", "GL000249.1",
- "250", "251", "252", "253", "254",
- "255", "256", "257", "258", "259",
- "260", "261", "262", "263", "264",
- "265", "266", "267", "268", "269",
- "270", "271", "272", "273", "274",
- "275", "276", "277", "278", "279",
- "280", "281", "282", "283", "284",
- "285", "286", "287", "288", "289",
- "290", "291", "292", "293", "294",
- "295", "296", "297", "298", "299",
- "300", "301", "302", "303", "304",
- "305", "306", "307", "308", "309",
- "310", "311", "312", "313", "314",
- "315", "316", "317", "318", "319",
- "320", "321", "322", "323", "324",
- "325", "326", "327", "328", "329",
- "330", "331", "332", "333", "334",
- "335", "336", "337", "338", "339",
- "340", "341", "342", "343", "344",
- "345", "346", "347", "348", "349",
- "350", "351", "352", "353", "354",
- "355", "356", "357", "358", "359",
- "360", "361", "362", "363", "364",
- "365", "366", "367", "368", "369",
- "370", "371", "372", "373", "374",
- "375", "376", "377", "378", "379",
- "380", "381", "382", "383", "384",
- "385", "386", "387", "388", "389",
- "390", "391", "392", "393", "394",
- "395", "396", "397", "398", "399",
- "400", "401", "402", "403", "404",
- "405", "406", "407", "408", "409",
- "410", "411", "412", "413", "414",
- "415", "416", "417", "418", "419",
- "420", "421", "422", "423", "424",
- "425", "426", "427", "428", "429",
- "430", "431", "432", "433", "434",
- "435", "436", "437", "438", "439",
- "440", "441", "442", "443", "444",
- "445", "446", "447", "448", "449",
- "450", "451", "452", "453", "454",
- "455", "456", "457", "458", "459",
- "460", "461", "462", "463", "464",
- "465", "466", "467", "468", "469",
- "470", "471", "472", "473", "474",
- "475", "476", "477", "478", "479",
- "480", "481", "482", "483", "484",
- "485", "486", "487", "488", "489",
- "490", "491", "492", "493", "494",
- "495", "496", "497", "498", "499",
- "500", "501", "502", "503", "504",
- "505", "506", "507", "508", "509",
- "510", "511", "512", "513", "514",
- "515", "516", "517", "518", "519",
- "520", "521", "522", "523", "524",
- "525", "526", "527", "528", "529",
- "530", "531", "532", "533", "534",
- "535", "536", "537", "538", "539",
- "540", "541", "542", "543", "544",
- "545", "546", "547", "548", "549",
- "550", "551", "552", "553", "554",
- "555", "556", "557", "558", "559",
- "560", "561", "562", "563", "564",
- "565", "566", "567", "568", "569",
- "570", "571", "572", "573", "574",
- "575", "576", "577", "578", "579",
- "580", "581", "582", "583", "584",
- "585", "586", "587", "588", "589",
- "590", "591", "592", "593", "594",
- "595", "596", "597", "598", "599",
- "600", "601", "602", "603", "604",
- "605", "606", "607", "608", "609",
- "610", "611", "612", "613", "614",
- "615", "616", "617", "618", "619",
- "620", "621", "622", "623", "624",
- "625", "626", "627", "628", "629",
- "630", "631", "632", "633", "634",
- "635", "636", "637", "638", "639",
- "640", "641", "642", "643", "644",
- "645", "646", "647", "648", "649",
- "650", "651", "652", "653", "654",
- "655", "656", "657", "658", "659",
- "660", "661", "662", "663", "664",
- "665", "666", "667", "668", "669",
- "670", "671", "672", "673", "674",
- "675", "676", "677", "678", "679",
- "680", "681", "682", "683", "684",
- "685", "686", "687", "688", "689",
- "690", "691", "692", "693", "694",
- "695", "696", "697", "698", "699",
- "700", "701", "702", "703", "704",
- "705", "706", "707", "708", "709",
- "710", "711", "712", "713", "714",
- "715", "716", "717", "718", "719",
- "720", "721", "722", "723", "724",
- "725", "726", "727", "728", "729",
- "730", "731", "732", "733", "734",
- "735", "736", "737", "738", "739",
- "740", "741", "742", "743", "744",
- "745", "746", "747", "748", "749",
- "750", "751", "752", "753", "754",
- "755", "756", "757", "758", "759",
- "760", "761", "762", "763", "764",
- "765", "766", "767", "768", "769",
- "770", "771", "772", "773", "774",
- "775", "776", "777", "778", "779",
- "780", "781", "782", "783", "784",
- "785", "786", "787", "788", "789",
- "790", "791", "792", "793", "794",
- "795", "796", "797", "798", "799",
- "800", "801", "802", "803", "804",
- "805", "806", "807", "808", "809",
- "810", "811", "812", "813", "814",
- "815", "816", "817", "818", "819",
- "820", "821", "822", "823", "824",
- "825", "826", "827", "828", "829",
- "830", "831", "832", "833", "834",
- "835", "836", "837", "838", "839",
- "840", "841", "842", "843", "844",
- "845", "846", "847", "848", "849",
- "850", "851", "852", "853", "854",
- "855", "856", "857", "858", "859",
- "860", "861", "862", "863", "864",
- "865", "866", "867", "868", "869",
- "870", "871", "872", "873", "874",
- "875", "876", "877", "878", "879",
- "880", "881", "882", "883", "884",
- "885", "886", "887", "888", "889",
- "890", "891", "892", "893", "894",
- "895", "896", "897", "898", "899",
- "900", "901", "902", "903", "904",
- "905", "906", "907", "908", "909",
- "910", "911", "912", "913", "914",
- "915", "916", "917", "918", "919",
- "920", "921", "922", "923", "924",
- "925", "926", "927", "928", "929",
- "930", "931", "932", "933", "934",
- "935", "936", "937", "938", "939",
- "940", "941", "942", "943", "944",
- "945", "946", "947", "948", "949",
- "950", "951", "952", "953", "954",
- "955", "956", "957", "958", "959",
- "960", "961", "962", "963", "964",
- "965", "966", "967", "968", "969",
- "970", "971", "972", "973", "974",
- "975", "976", "977", "978", "979",
- "980", "981", "982", "983", "984",
- "985", "986", "987", "988", "989",
- "990", "991", "992", "993", "994",
- "995", "996", "997", "998", "hs37d5",
- "NC_007605", "MT", "phiX174", "INVALID"};
+ using namespace std;
-const array ChrConverter::indexToChrCompressedMref{
- "1", "2", "3", "4", "5",
- "6", "7", "8", "9", "10",
- "11", "12", "13", "14", "15",
- "16", "17", "18", "19", "20",
- "21", "22", "X", "Y", "GL000191.1",
- "GL000192.1", "GL000193.1", "GL000194.1", "GL000195.1", "GL000196.1",
- "GL000197.1", "GL000198.1", "GL000199.1", "GL000200.1", "GL000201.1",
- "GL000202.1", "GL000203.1", "GL000204.1", "GL000205.1", "GL000206.1",
- "GL000207.1", "GL000208.1", "GL000209.1", "GL000210.1", "GL000211.1",
- "GL000212.1", "GL000213.1", "GL000214.1", "GL000215.1", "GL000216.1",
- "GL000217.1", "GL000218.1", "GL000219.1", "GL000220.1", "GL000221.1",
- "GL000222.1", "GL000223.1", "GL000224.1", "GL000225.1", "GL000226.1",
- "GL000227.1", "GL000228.1", "GL000229.1", "GL000230.1", "GL000231.1",
- "GL000232.1", "GL000233.1", "GL000234.1", "GL000235.1", "GL000236.1",
- "GL000237.1", "GL000238.1", "GL000239.1", "GL000240.1", "GL000241.1",
- "GL000242.1", "GL000243.1", "GL000244.1", "GL000245.1", "GL000246.1",
- "GL000247.1", "GL000248.1", "GL000249.1", "hs37d5", "NC_007605"};
+ ChrConverter::ChrConverter(const vector &indexToChr,
+ const vector &indexToChrCompressedMref,
+ const vector &chrSizesCompressedMref,
+ const vector &indexConverter) :
+ indexToChr(indexToChr),
+ indexToChrCompressedMref(indexToChrCompressedMref),
+ chrSizesCompressedMref(chrSizesCompressedMref),
+ indexConverter(indexConverter) {
+ if (indexToChr.size() != indexConverter.size())
+ throw invalid_argument(
+ "indexToChr and indexConverter must have the same size");
+ if (indexToChrCompressedMref.size() != chrSizesCompressedMref.size())
+ throw invalid_argument(
+ "indexToChrCompressedMref and chrSizesCompressedMref must have the same size");
+ }
-const array ChrConverter::indexConverter{
- -2, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
- 18, 19, 20, 21, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
- -2, -2, 22, 23, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
- -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
- -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
- -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
- -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
- -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
- -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
- -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
- -2, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
- 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
- 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
- 80, 81, 82, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
- -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
- -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
- -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
- -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
- -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
- -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
- -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
- -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
- -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
- -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
- -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
- -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
- -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
- -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
- -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
- -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
- -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
- -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
- -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
- -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
- -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
- -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
- -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
- -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
- -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
- -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
- -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
- -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
- -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
- -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
- -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
- -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
- -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
- -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
- -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
- -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
- -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
- -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
- -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, 83, 84, -2, -2, -2};
+ ChrConverter::~ChrConverter() {}
-} /* namespace sophia */
+}
\ No newline at end of file
diff --git a/src/GlobalAppConfig.cpp b/src/GlobalAppConfig.cpp
new file mode 100644
index 0000000..ed19901
--- /dev/null
+++ b/src/GlobalAppConfig.cpp
@@ -0,0 +1,62 @@
+/*
+ * GlobalAppConfig.cpp
+ *
+ * Author: Philip R. Kensche Copyright (C) 2023 DKFZ Heidelberg
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see .
+ * LICENSE: GPL
+ */
+
+#include "GlobalAppConfig.h"
+#include
+#include
+#include
+
+
+namespace sophia {
+
+ using namespace std;
+
+ GlobalAppConfig* GlobalAppConfig::instance_ = nullptr;
+ std::mutex GlobalAppConfig::mutex_ = std::mutex();
+
+ const ChrConverter &GlobalAppConfig::getChrConverter() const {
+ return *chrConverter;
+ }
+
+ GlobalAppConfig::GlobalAppConfig(unique_ptr chrConverter):
+ chrConverter(move(chrConverter)) {}
+
+ GlobalAppConfig::~GlobalAppConfig() {}
+
+ GlobalAppConfig &GlobalAppConfig::init(unique_ptr chrConverter)
+ {
+ lock_guard lock(mutex_);
+ if (GlobalAppConfig::instance_ == nullptr) {
+ GlobalAppConfig::instance_ = new GlobalAppConfig(move(chrConverter));
+ } else {
+ throw new logic_error("GlobalAppConfig already initialized");
+ }
+ return *GlobalAppConfig::instance_;
+ }
+
+ const GlobalAppConfig &GlobalAppConfig::getInstance() {
+ if (GlobalAppConfig::instance_ == nullptr) {
+ throw new logic_error("GlobalAppConfig not initialized");
+ } else {
+ return *GlobalAppConfig::instance_;
+ }
+ }
+
+}
\ No newline at end of file
diff --git a/src/HelperFunctions.cpp b/src/HelperFunctions.cpp
index 260a144..f5bdd57 100644
--- a/src/HelperFunctions.cpp
+++ b/src/HelperFunctions.cpp
@@ -24,18 +24,18 @@
namespace sophia {
-using namespace std;
+ using namespace std;
-istream &
-error_terminating_getline(istream &is, string &str) {
- getline(is, str);
+ istream &
+ error_terminating_getline(istream &is, string &str) {
+ getline(is, str);
- if (is.bad()) {
- perror("Error reading line from file");
- exit(EXITCODE_IOERROR);
- }
+ if (is.bad()) {
+ perror("Error reading line from file");
+ exit(EXITCODE_IOERROR);
+ }
- return is;
-}
+ return is;
+ }
} /* namespace sophia */
diff --git a/src/Hg37ChrConverter.cpp b/src/Hg37ChrConverter.cpp
new file mode 100644
index 0000000..e57dddd
--- /dev/null
+++ b/src/Hg37ChrConverter.cpp
@@ -0,0 +1,377 @@
+/*
+ * Author: Philip R. Kensche, DKFZ Heidelberg (Omics IT and Data Management Core Facility)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see .
+ * LICENSE: GPL
+ */
+
+#include "Hg37ChrConverter.h"
+
+#include
+#include
+
+
+namespace sophia {
+
+ using namespace std;
+
+ namespace hg37 {
+
+ static const vector indexToChr {
+ "0", "1", "2", "3", "4",
+ "5", "6", "7", "8", "9",
+ "10", "11", "12", "13", "14",
+ "15", "16", "17", "18", "19",
+ "20", "21", "22", "23", "24",
+ "25", "26", "27", "28", "29",
+ "30", "31", "32", "33", "34",
+ "35", "36", "37", "38", "39",
+ "X", "Y", "42", "43", "44",
+ "45", "46", "47", "48", "49",
+ "50", "51", "52", "53", "54",
+ "55", "56", "57", "58", "59",
+ "60", "61", "62", "63", "64",
+ "65", "66", "67", "68", "69",
+ "70", "71", "72", "73", "74",
+ "75", "76", "77", "78", "79",
+ "80", "81", "82", "83", "84",
+ "85", "86", "87", "88", "89",
+ "90", "91", "92", "93", "94",
+ "95", "96", "97", "98", "99",
+ "100", "101", "102", "103", "104",
+ "105", "106", "107", "108", "109",
+ "110", "111", "112", "113", "114",
+ "115", "116", "117", "118", "119",
+ "120", "121", "122", "123", "124",
+ "125", "126", "127", "128", "129",
+ "130", "131", "132", "133", "134",
+ "135", "136", "137", "138", "139",
+ "140", "141", "142", "143", "144",
+ "145", "146", "147", "148", "149",
+ "150", "151", "152", "153", "154",
+ "155", "156", "157", "158", "159",
+ "160", "161", "162", "163", "164",
+ "165", "166", "167", "168", "169",
+ "170", "171", "172", "173", "174",
+ "175", "176", "177", "178", "179",
+ "180", "181", "182", "183", "184",
+ "185", "186", "187", "188", "189",
+ "190", "GL000191.1", "GL000192.1", "GL000193.1", "GL000194.1",
+ "GL000195.1", "GL000196.1", "GL000197.1", "GL000198.1", "GL000199.1",
+ "GL000200.1", "GL000201.1", "GL000202.1", "GL000203.1", "GL000204.1",
+ "GL000205.1", "GL000206.1", "GL000207.1", "GL000208.1", "GL000209.1",
+ "GL000210.1", "GL000211.1", "GL000212.1", "GL000213.1", "GL000214.1",
+ "GL000215.1", "GL000216.1", "GL000217.1", "GL000218.1", "GL000219.1",
+ "GL000220.1", "GL000221.1", "GL000222.1", "GL000223.1", "GL000224.1",
+ "GL000225.1", "GL000226.1", "GL000227.1", "GL000228.1", "GL000229.1",
+ "GL000230.1", "GL000231.1", "GL000232.1", "GL000233.1", "GL000234.1",
+ "GL000235.1", "GL000236.1", "GL000237.1", "GL000238.1", "GL000239.1",
+ "GL000240.1", "GL000241.1", "GL000242.1", "GL000243.1", "GL000244.1",
+ "GL000245.1", "GL000246.1", "GL000247.1", "GL000248.1", "GL000249.1",
+ "250", "251", "252", "253", "254",
+ "255", "256", "257", "258", "259",
+ "260", "261", "262", "263", "264",
+ "265", "266", "267", "268", "269",
+ "270", "271", "272", "273", "274",
+ "275", "276", "277", "278", "279",
+ "280", "281", "282", "283", "284",
+ "285", "286", "287", "288", "289",
+ "290", "291", "292", "293", "294",
+ "295", "296", "297", "298", "299",
+ "300", "301", "302", "303", "304",
+ "305", "306", "307", "308", "309",
+ "310", "311", "312", "313", "314",
+ "315", "316", "317", "318", "319",
+ "320", "321", "322", "323", "324",
+ "325", "326", "327", "328", "329",
+ "330", "331", "332", "333", "334",
+ "335", "336", "337", "338", "339",
+ "340", "341", "342", "343", "344",
+ "345", "346", "347", "348", "349",
+ "350", "351", "352", "353", "354",
+ "355", "356", "357", "358", "359",
+ "360", "361", "362", "363", "364",
+ "365", "366", "367", "368", "369",
+ "370", "371", "372", "373", "374",
+ "375", "376", "377", "378", "379",
+ "380", "381", "382", "383", "384",
+ "385", "386", "387", "388", "389",
+ "390", "391", "392", "393", "394",
+ "395", "396", "397", "398", "399",
+ "400", "401", "402", "403", "404",
+ "405", "406", "407", "408", "409",
+ "410", "411", "412", "413", "414",
+ "415", "416", "417", "418", "419",
+ "420", "421", "422", "423", "424",
+ "425", "426", "427", "428", "429",
+ "430", "431", "432", "433", "434",
+ "435", "436", "437", "438", "439",
+ "440", "441", "442", "443", "444",
+ "445", "446", "447", "448", "449",
+ "450", "451", "452", "453", "454",
+ "455", "456", "457", "458", "459",
+ "460", "461", "462", "463", "464",
+ "465", "466", "467", "468", "469",
+ "470", "471", "472", "473", "474",
+ "475", "476", "477", "478", "479",
+ "480", "481", "482", "483", "484",
+ "485", "486", "487", "488", "489",
+ "490", "491", "492", "493", "494",
+ "495", "496", "497", "498", "499",
+ "500", "501", "502", "503", "504",
+ "505", "506", "507", "508", "509",
+ "510", "511", "512", "513", "514",
+ "515", "516", "517", "518", "519",
+ "520", "521", "522", "523", "524",
+ "525", "526", "527", "528", "529",
+ "530", "531", "532", "533", "534",
+ "535", "536", "537", "538", "539",
+ "540", "541", "542", "543", "544",
+ "545", "546", "547", "548", "549",
+ "550", "551", "552", "553", "554",
+ "555", "556", "557", "558", "559",
+ "560", "561", "562", "563", "564",
+ "565", "566", "567", "568", "569",
+ "570", "571", "572", "573", "574",
+ "575", "576", "577", "578", "579",
+ "580", "581", "582", "583", "584",
+ "585", "586", "587", "588", "589",
+ "590", "591", "592", "593", "594",
+ "595", "596", "597", "598", "599",
+ "600", "601", "602", "603", "604",
+ "605", "606", "607", "608", "609",
+ "610", "611", "612", "613", "614",
+ "615", "616", "617", "618", "619",
+ "620", "621", "622", "623", "624",
+ "625", "626", "627", "628", "629",
+ "630", "631", "632", "633", "634",
+ "635", "636", "637", "638", "639",
+ "640", "641", "642", "643", "644",
+ "645", "646", "647", "648", "649",
+ "650", "651", "652", "653", "654",
+ "655", "656", "657", "658", "659",
+ "660", "661", "662", "663", "664",
+ "665", "666", "667", "668", "669",
+ "670", "671", "672", "673", "674",
+ "675", "676", "677", "678", "679",
+ "680", "681", "682", "683", "684",
+ "685", "686", "687", "688", "689",
+ "690", "691", "692", "693", "694",
+ "695", "696", "697", "698", "699",
+ "700", "701", "702", "703", "704",
+ "705", "706", "707", "708", "709",
+ "710", "711", "712", "713", "714",
+ "715", "716", "717", "718", "719",
+ "720", "721", "722", "723", "724",
+ "725", "726", "727", "728", "729",
+ "730", "731", "732", "733", "734",
+ "735", "736", "737", "738", "739",
+ "740", "741", "742", "743", "744",
+ "745", "746", "747", "748", "749",
+ "750", "751", "752", "753", "754",
+ "755", "756", "757", "758", "759",
+ "760", "761", "762", "763", "764",
+ "765", "766", "767", "768", "769",
+ "770", "771", "772", "773", "774",
+ "775", "776", "777", "778", "779",
+ "780", "781", "782", "783", "784",
+ "785", "786", "787", "788", "789",
+ "790", "791", "792", "793", "794",
+ "795", "796", "797", "798", "799",
+ "800", "801", "802", "803", "804",
+ "805", "806", "807", "808", "809",
+ "810", "811", "812", "813", "814",
+ "815", "816", "817", "818", "819",
+ "820", "821", "822", "823", "824",
+ "825", "826", "827", "828", "829",
+ "830", "831", "832", "833", "834",
+ "835", "836", "837", "838", "839",
+ "840", "841", "842", "843", "844",
+ "845", "846", "847", "848", "849",
+ "850", "851", "852", "853", "854",
+ "855", "856", "857", "858", "859",
+ "860", "861", "862", "863", "864",
+ "865", "866", "867", "868", "869",
+ "870", "871", "872", "873", "874",
+ "875", "876", "877", "878", "879",
+ "880", "881", "882", "883", "884",
+ "885", "886", "887", "888", "889",
+ "890", "891", "892", "893", "894",
+ "895", "896", "897", "898", "899",
+ "900", "901", "902", "903", "904",
+ "905", "906", "907", "908", "909",
+ "910", "911", "912", "913", "914",
+ "915", "916", "917", "918", "919",
+ "920", "921", "922", "923", "924",
+ "925", "926", "927", "928", "929",
+ "930", "931", "932", "933", "934",
+ "935", "936", "937", "938", "939",
+ "940", "941", "942", "943", "944",
+ "945", "946", "947", "948", "949",
+ "950", "951", "952", "953", "954",
+ "955", "956", "957", "958", "959",
+ "960", "961", "962", "963", "964",
+ "965", "966", "967", "968", "969",
+ "970", "971", "972", "973", "974",
+ "975", "976", "977", "978", "979",
+ "980", "981", "982", "983", "984",
+ "985", "986", "987", "988", "989",
+ "990", "991", "992", "993", "994",
+ "995", "996", "997", "998", "hs37d5",
+ "NC_007605", "MT", "phiX174", "INVALID"};
+
+ static const vector indexToChrCompressedMref {
+ "1", "2", "3", "4", "5",
+ "6", "7", "8", "9", "10",
+ "11", "12", "13", "14", "15",
+ "16", "17", "18", "19", "20",
+ "21", "22", "X", "Y", "GL000191.1",
+ "GL000192.1", "GL000193.1", "GL000194.1", "GL000195.1", "GL000196.1",
+ "GL000197.1", "GL000198.1", "GL000199.1", "GL000200.1", "GL000201.1",
+ "GL000202.1", "GL000203.1", "GL000204.1", "GL000205.1", "GL000206.1",
+ "GL000207.1", "GL000208.1", "GL000209.1", "GL000210.1", "GL000211.1",
+ "GL000212.1", "GL000213.1", "GL000214.1", "GL000215.1", "GL000216.1",
+ "GL000217.1", "GL000218.1", "GL000219.1", "GL000220.1", "GL000221.1",
+ "GL000222.1", "GL000223.1", "GL000224.1", "GL000225.1", "GL000226.1",
+ "GL000227.1", "GL000228.1", "GL000229.1", "GL000230.1", "GL000231.1",
+ "GL000232.1", "GL000233.1", "GL000234.1", "GL000235.1", "GL000236.1",
+ "GL000237.1", "GL000238.1", "GL000239.1", "GL000240.1", "GL000241.1",
+ "GL000242.1", "GL000243.1", "GL000244.1", "GL000245.1", "GL000246.1",
+ "GL000247.1", "GL000248.1", "GL000249.1", "hs37d5", "NC_007605"};
+
+ static const vector chrSizesCompressedMref {
+ 249250622, 243199374, 198022431, 191154277, 180915261, 171115068,
+ 159138664, 146364023, 141213432, 135534748, 135006517, 133851896,
+ 115169879, 107349541, 102531393, 90354754, 81195211, 78077249,
+ 59128984, 63025521, 48129896, 51304567, 155270561, 59373567,
+ 106434, 547497, 189790, 191470, 182897, 38915,
+ 37176, 90086, 169875, 187036, 36149, 40104,
+ 37499, 81311, 174589, 41002, 4263, 92690,
+ 159170, 27683, 166567, 186859, 164240, 137719,
+ 172546, 172295, 172150, 161148, 179199, 161803,
+ 155398, 186862, 180456, 179694, 211174, 15009,
+ 128375, 129121, 19914, 43692, 27387, 40653,
+ 45942, 40532, 34475, 41935, 45868, 39940,
+ 33825, 41934, 42153, 43524, 43342, 39930,
+ 36652, 38155, 36423, 39787, 38503, 35477944,
+ 171824};
+
+ static const vector indexConverter {
+ -2, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
+ 18, 19, 20, 21, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, 22, 23, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
+ 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
+ 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
+ 80, 81, 82, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, 83, 84, -2, -2, -2};
+
+ } /* namespace hg37 */
+
+ const string Hg37ChrConverter::assembly_name = "hg37";
+
+ Hg37ChrConverter::Hg37ChrConverter()
+ : ChrConverter(hg37::indexToChr,
+ hg37::indexToChrCompressedMref,
+ hg37::chrSizesCompressedMref,
+ hg37::indexConverter) {}
+
+ /* This is parsing code. It takes a position in a character stream, and translates the
+ following character(s) into index positions (see ChrConverter::indexToChr). */
+ int Hg37ChrConverter::readChromosomeIndex(string::const_iterator startIt,
+ char stopChar) const {
+ int chrIndex {0};
+ if (isdigit(*startIt)) {
+ for (auto chr_cit = startIt; *chr_cit != stopChar; ++chr_cit) {
+ chrIndex = chrIndex * 10 + (*chr_cit - '0');
+ }
+ return chrIndex;
+ } else {
+ switch (*startIt) {
+ case 'h':
+ return 999;
+ case 'X':
+ return 40;
+ case 'G':
+ for (auto cit = next(startIt, 2); *cit != '.'; ++cit) {
+ chrIndex = 10 * chrIndex + *cit - '0';
+ }
+ return chrIndex;
+ case 'Y':
+ return 41;
+ case 'M':
+ ++startIt;
+ if (*startIt == 'T') {
+ return 1001;
+ } else {
+ return 1003;
+ }
+ case 'N':
+ return 1000;
+ case 'p':
+ return 1002;
+ default:
+ return 1003;
+ }
+ }
+ return 0;
+ }
+
+
+} /* namespace sophia */
diff --git a/src/Hg38ChrConverter.cpp b/src/Hg38ChrConverter.cpp
new file mode 100644
index 0000000..2ca7ade
--- /dev/null
+++ b/src/Hg38ChrConverter.cpp
@@ -0,0 +1,377 @@
+/*
+ * Author: Philip R. Kensche, DKFZ Heidelberg (Omics IT and Data Management Core Facility)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see .
+ * LICENSE: GPL
+ */
+
+#include "Hg38ChrConverter.h"
+
+#include
+#include
+
+
+namespace sophia {
+
+ using namespace std;
+
+ namespace hg38 {
+
+ static const vector indexToChr {
+ "0", "1", "2", "3", "4",
+ "5", "6", "7", "8", "9",
+ "10", "11", "12", "13", "14",
+ "15", "16", "17", "18", "19",
+ "20", "21", "22", "23", "24",
+ "25", "26", "27", "28", "29",
+ "30", "31", "32", "33", "34",
+ "35", "36", "37", "38", "39",
+ "X", "Y", "42", "43", "44",
+ "45", "46", "47", "48", "49",
+ "50", "51", "52", "53", "54",
+ "55", "56", "57", "58", "59",
+ "60", "61", "62", "63", "64",
+ "65", "66", "67", "68", "69",
+ "70", "71", "72", "73", "74",
+ "75", "76", "77", "78", "79",
+ "80", "81", "82", "83", "84",
+ "85", "86", "87", "88", "89",
+ "90", "91", "92", "93", "94",
+ "95", "96", "97", "98", "99",
+ "100", "101", "102", "103", "104",
+ "105", "106", "107", "108", "109",
+ "110", "111", "112", "113", "114",
+ "115", "116", "117", "118", "119",
+ "120", "121", "122", "123", "124",
+ "125", "126", "127", "128", "129",
+ "130", "131", "132", "133", "134",
+ "135", "136", "137", "138", "139",
+ "140", "141", "142", "143", "144",
+ "145", "146", "147", "148", "149",
+ "150", "151", "152", "153", "154",
+ "155", "156", "157", "158", "159",
+ "160", "161", "162", "163", "164",
+ "165", "166", "167", "168", "169",
+ "170", "171", "172", "173", "174",
+ "175", "176", "177", "178", "179",
+ "180", "181", "182", "183", "184",
+ "185", "186", "187", "188", "189",
+ "190", "GL000191.1", "GL000192.1", "GL000193.1", "GL000194.1",
+ "GL000195.1", "GL000196.1", "GL000197.1", "GL000198.1", "GL000199.1",
+ "GL000200.1", "GL000201.1", "GL000202.1", "GL000203.1", "GL000204.1",
+ "GL000205.1", "GL000206.1", "GL000207.1", "GL000208.1", "GL000209.1",
+ "GL000210.1", "GL000211.1", "GL000212.1", "GL000213.1", "GL000214.1",
+ "GL000215.1", "GL000216.1", "GL000217.1", "GL000218.1", "GL000219.1",
+ "GL000220.1", "GL000221.1", "GL000222.1", "GL000223.1", "GL000224.1",
+ "GL000225.1", "GL000226.1", "GL000227.1", "GL000228.1", "GL000229.1",
+ "GL000230.1", "GL000231.1", "GL000232.1", "GL000233.1", "GL000234.1",
+ "GL000235.1", "GL000236.1", "GL000237.1", "GL000238.1", "GL000239.1",
+ "GL000240.1", "GL000241.1", "GL000242.1", "GL000243.1", "GL000244.1",
+ "GL000245.1", "GL000246.1", "GL000247.1", "GL000248.1", "GL000249.1",
+ "250", "251", "252", "253", "254",
+ "255", "256", "257", "258", "259",
+ "260", "261", "262", "263", "264",
+ "265", "266", "267", "268", "269",
+ "270", "271", "272", "273", "274",
+ "275", "276", "277", "278", "279",
+ "280", "281", "282", "283", "284",
+ "285", "286", "287", "288", "289",
+ "290", "291", "292", "293", "294",
+ "295", "296", "297", "298", "299",
+ "300", "301", "302", "303", "304",
+ "305", "306", "307", "308", "309",
+ "310", "311", "312", "313", "314",
+ "315", "316", "317", "318", "319",
+ "320", "321", "322", "323", "324",
+ "325", "326", "327", "328", "329",
+ "330", "331", "332", "333", "334",
+ "335", "336", "337", "338", "339",
+ "340", "341", "342", "343", "344",
+ "345", "346", "347", "348", "349",
+ "350", "351", "352", "353", "354",
+ "355", "356", "357", "358", "359",
+ "360", "361", "362", "363", "364",
+ "365", "366", "367", "368", "369",
+ "370", "371", "372", "373", "374",
+ "375", "376", "377", "378", "379",
+ "380", "381", "382", "383", "384",
+ "385", "386", "387", "388", "389",
+ "390", "391", "392", "393", "394",
+ "395", "396", "397", "398", "399",
+ "400", "401", "402", "403", "404",
+ "405", "406", "407", "408", "409",
+ "410", "411", "412", "413", "414",
+ "415", "416", "417", "418", "419",
+ "420", "421", "422", "423", "424",
+ "425", "426", "427", "428", "429",
+ "430", "431", "432", "433", "434",
+ "435", "436", "437", "438", "439",
+ "440", "441", "442", "443", "444",
+ "445", "446", "447", "448", "449",
+ "450", "451", "452", "453", "454",
+ "455", "456", "457", "458", "459",
+ "460", "461", "462", "463", "464",
+ "465", "466", "467", "468", "469",
+ "470", "471", "472", "473", "474",
+ "475", "476", "477", "478", "479",
+ "480", "481", "482", "483", "484",
+ "485", "486", "487", "488", "489",
+ "490", "491", "492", "493", "494",
+ "495", "496", "497", "498", "499",
+ "500", "501", "502", "503", "504",
+ "505", "506", "507", "508", "509",
+ "510", "511", "512", "513", "514",
+ "515", "516", "517", "518", "519",
+ "520", "521", "522", "523", "524",
+ "525", "526", "527", "528", "529",
+ "530", "531", "532", "533", "534",
+ "535", "536", "537", "538", "539",
+ "540", "541", "542", "543", "544",
+ "545", "546", "547", "548", "549",
+ "550", "551", "552", "553", "554",
+ "555", "556", "557", "558", "559",
+ "560", "561", "562", "563", "564",
+ "565", "566", "567", "568", "569",
+ "570", "571", "572", "573", "574",
+ "575", "576", "577", "578", "579",
+ "580", "581", "582", "583", "584",
+ "585", "586", "587", "588", "589",
+ "590", "591", "592", "593", "594",
+ "595", "596", "597", "598", "599",
+ "600", "601", "602", "603", "604",
+ "605", "606", "607", "608", "609",
+ "610", "611", "612", "613", "614",
+ "615", "616", "617", "618", "619",
+ "620", "621", "622", "623", "624",
+ "625", "626", "627", "628", "629",
+ "630", "631", "632", "633", "634",
+ "635", "636", "637", "638", "639",
+ "640", "641", "642", "643", "644",
+ "645", "646", "647", "648", "649",
+ "650", "651", "652", "653", "654",
+ "655", "656", "657", "658", "659",
+ "660", "661", "662", "663", "664",
+ "665", "666", "667", "668", "669",
+ "670", "671", "672", "673", "674",
+ "675", "676", "677", "678", "679",
+ "680", "681", "682", "683", "684",
+ "685", "686", "687", "688", "689",
+ "690", "691", "692", "693", "694",
+ "695", "696", "697", "698", "699",
+ "700", "701", "702", "703", "704",
+ "705", "706", "707", "708", "709",
+ "710", "711", "712", "713", "714",
+ "715", "716", "717", "718", "719",
+ "720", "721", "722", "723", "724",
+ "725", "726", "727", "728", "729",
+ "730", "731", "732", "733", "734",
+ "735", "736", "737", "738", "739",
+ "740", "741", "742", "743", "744",
+ "745", "746", "747", "748", "749",
+ "750", "751", "752", "753", "754",
+ "755", "756", "757", "758", "759",
+ "760", "761", "762", "763", "764",
+ "765", "766", "767", "768", "769",
+ "770", "771", "772", "773", "774",
+ "775", "776", "777", "778", "779",
+ "780", "781", "782", "783", "784",
+ "785", "786", "787", "788", "789",
+ "790", "791", "792", "793", "794",
+ "795", "796", "797", "798", "799",
+ "800", "801", "802", "803", "804",
+ "805", "806", "807", "808", "809",
+ "810", "811", "812", "813", "814",
+ "815", "816", "817", "818", "819",
+ "820", "821", "822", "823", "824",
+ "825", "826", "827", "828", "829",
+ "830", "831", "832", "833", "834",
+ "835", "836", "837", "838", "839",
+ "840", "841", "842", "843", "844",
+ "845", "846", "847", "848", "849",
+ "850", "851", "852", "853", "854",
+ "855", "856", "857", "858", "859",
+ "860", "861", "862", "863", "864",
+ "865", "866", "867", "868", "869",
+ "870", "871", "872", "873", "874",
+ "875", "876", "877", "878", "879",
+ "880", "881", "882", "883", "884",
+ "885", "886", "887", "888", "889",
+ "890", "891", "892", "893", "894",
+ "895", "896", "897", "898", "899",
+ "900", "901", "902", "903", "904",
+ "905", "906", "907", "908", "909",
+ "910", "911", "912", "913", "914",
+ "915", "916", "917", "918", "919",
+ "920", "921", "922", "923", "924",
+ "925", "926", "927", "928", "929",
+ "930", "931", "932", "933", "934",
+ "935", "936", "937", "938", "939",
+ "940", "941", "942", "943", "944",
+ "945", "946", "947", "948", "949",
+ "950", "951", "952", "953", "954",
+ "955", "956", "957", "958", "959",
+ "960", "961", "962", "963", "964",
+ "965", "966", "967", "968", "969",
+ "970", "971", "972", "973", "974",
+ "975", "976", "977", "978", "979",
+ "980", "981", "982", "983", "984",
+ "985", "986", "987", "988", "989",
+ "990", "991", "992", "993", "994",
+ "995", "996", "997", "998", "hs37d5",
+ "NC_007605", "MT", "phiX174", "INVALID"};
+
+ static const vector indexToChrCompressedMref {
+ "1", "2", "3", "4", "5",
+ "6", "7", "8", "9", "10",
+ "11", "12", "13", "14", "15",
+ "16", "17", "18", "19", "20",
+ "21", "22", "X", "Y", "GL000191.1",
+ "GL000192.1", "GL000193.1", "GL000194.1", "GL000195.1", "GL000196.1",
+ "GL000197.1", "GL000198.1", "GL000199.1", "GL000200.1", "GL000201.1",
+ "GL000202.1", "GL000203.1", "GL000204.1", "GL000205.1", "GL000206.1",
+ "GL000207.1", "GL000208.1", "GL000209.1", "GL000210.1", "GL000211.1",
+ "GL000212.1", "GL000213.1", "GL000214.1", "GL000215.1", "GL000216.1",
+ "GL000217.1", "GL000218.1", "GL000219.1", "GL000220.1", "GL000221.1",
+ "GL000222.1", "GL000223.1", "GL000224.1", "GL000225.1", "GL000226.1",
+ "GL000227.1", "GL000228.1", "GL000229.1", "GL000230.1", "GL000231.1",
+ "GL000232.1", "GL000233.1", "GL000234.1", "GL000235.1", "GL000236.1",
+ "GL000237.1", "GL000238.1", "GL000239.1", "GL000240.1", "GL000241.1",
+ "GL000242.1", "GL000243.1", "GL000244.1", "GL000245.1", "GL000246.1",
+ "GL000247.1", "GL000248.1", "GL000249.1", "hs37d5", "NC_007605"};
+
+ static const vector chrSizesCompressedMref {
+ 249250622, 243199374, 198022431, 191154277, 180915261, 171115068,
+ 159138664, 146364023, 141213432, 135534748, 135006517, 133851896,
+ 115169879, 107349541, 102531393, 90354754, 81195211, 78077249,
+ 59128984, 63025521, 48129896, 51304567, 155270561, 59373567,
+ 106434, 547497, 189790, 191470, 182897, 38915,
+ 37176, 90086, 169875, 187036, 36149, 40104,
+ 37499, 81311, 174589, 41002, 4263, 92690,
+ 159170, 27683, 166567, 186859, 164240, 137719,
+ 172546, 172295, 172150, 161148, 179199, 161803,
+ 155398, 186862, 180456, 179694, 211174, 15009,
+ 128375, 129121, 19914, 43692, 27387, 40653,
+ 45942, 40532, 34475, 41935, 45868, 39940,
+ 33825, 41934, 42153, 43524, 43342, 39930,
+ 36652, 38155, 36423, 39787, 38503, 35477944,
+ 171824};
+
+ static const vector indexConverter {
+ -2, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
+ 18, 19, 20, 21, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, 22, 23, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
+ 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
+ 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
+ 80, 81, 82, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, 83, 84, -2, -2, -2};
+
+ } /* namespace hg38 */
+
+ const string Hg38ChrConverter::assembly_name = "hg38";
+
+ Hg38ChrConverter::Hg38ChrConverter()
+ : ChrConverter(hg38::indexToChr,
+ hg38::indexToChrCompressedMref,
+ hg38::chrSizesCompressedMref,
+ hg38::indexConverter) {}
+
+ /* This is parsing code. It takes a position in a character stream, and translates the
+ following character(s) into index positions (see ChrConverter::indexToChr). */
+ int Hg38ChrConverter::readChromosomeIndex(string::const_iterator startIt,
+ char stopChar) const {
+ int chrIndex {0};
+ if (isdigit(*startIt)) {
+ for (auto chr_cit = startIt; *chr_cit != stopChar; ++chr_cit) {
+ chrIndex = chrIndex * 10 + (*chr_cit - '0');
+ }
+ return chrIndex;
+ } else {
+ switch (*startIt) {
+ case 'h':
+ return 999;
+ case 'X':
+ return 40;
+ case 'G':
+ for (auto cit = next(startIt, 2); *cit != '.'; ++cit) {
+ chrIndex = 10 * chrIndex + *cit - '0';
+ }
+ return chrIndex;
+ case 'Y':
+ return 41;
+ case 'M':
+ ++startIt;
+ if (*startIt == 'T') {
+ return 1001;
+ } else {
+ return 1003;
+ }
+ case 'N':
+ return 1000;
+ case 'p':
+ return 1002;
+ default:
+ return 1003;
+ }
+ }
+ return 0;
+ }
+
+
+} /* namespace sophia */
diff --git a/src/IndexTable.cpp b/src/IndexTable.cpp
new file mode 100644
index 0000000..50a7f06
--- /dev/null
+++ b/src/IndexTable.cpp
@@ -0,0 +1,308 @@
+/*
+ * Hg37IndexTable.cpp
+ *
+ * Created on: 28 Dec 2017
+ * Author: Umut H. Toprak, DKFZ Heidelberg (Divisions of Theoretical
+ * Bioinformatics, Bioinformatics and Omics Data Analytics and currently
+ * Neuroblastoma Genomics) Copyright (C) 2018 Umut H. Toprak, Matthias
+ * Schlesner, Roland Eils and DKFZ Heidelberg
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see .
+ * LICENSE: GPL
+ */
+
+#include
+
+namespace sophia {
+
+ using namespace std;
+
+ const array Hg37IndexTable::indexToChr {
+ "0", "1", "2", "3", "4",
+ "5", "6", "7", "8", "9",
+ "10", "11", "12", "13", "14",
+ "15", "16", "17", "18", "19",
+ "20", "21", "22", "23", "24",
+ "25", "26", "27", "28", "29",
+ "30", "31", "32", "33", "34",
+ "35", "36", "37", "38", "39",
+ "X", "Y", "42", "43", "44",
+ "45", "46", "47", "48", "49",
+ "50", "51", "52", "53", "54",
+ "55", "56", "57", "58", "59",
+ "60", "61", "62", "63", "64",
+ "65", "66", "67", "68", "69",
+ "70", "71", "72", "73", "74",
+ "75", "76", "77", "78", "79",
+ "80", "81", "82", "83", "84",
+ "85", "86", "87", "88", "89",
+ "90", "91", "92", "93", "94",
+ "95", "96", "97", "98", "99",
+ "100", "101", "102", "103", "104",
+ "105", "106", "107", "108", "109",
+ "110", "111", "112", "113", "114",
+ "115", "116", "117", "118", "119",
+ "120", "121", "122", "123", "124",
+ "125", "126", "127", "128", "129",
+ "130", "131", "132", "133", "134",
+ "135", "136", "137", "138", "139",
+ "140", "141", "142", "143", "144",
+ "145", "146", "147", "148", "149",
+ "150", "151", "152", "153", "154",
+ "155", "156", "157", "158", "159",
+ "160", "161", "162", "163", "164",
+ "165", "166", "167", "168", "169",
+ "170", "171", "172", "173", "174",
+ "175", "176", "177", "178", "179",
+ "180", "181", "182", "183", "184",
+ "185", "186", "187", "188", "189",
+ "190", "GL000191.1", "GL000192.1", "GL000193.1", "GL000194.1",
+ "GL000195.1", "GL000196.1", "GL000197.1", "GL000198.1", "GL000199.1",
+ "GL000200.1", "GL000201.1", "GL000202.1", "GL000203.1", "GL000204.1",
+ "GL000205.1", "GL000206.1", "GL000207.1", "GL000208.1", "GL000209.1",
+ "GL000210.1", "GL000211.1", "GL000212.1", "GL000213.1", "GL000214.1",
+ "GL000215.1", "GL000216.1", "GL000217.1", "GL000218.1", "GL000219.1",
+ "GL000220.1", "GL000221.1", "GL000222.1", "GL000223.1", "GL000224.1",
+ "GL000225.1", "GL000226.1", "GL000227.1", "GL000228.1", "GL000229.1",
+ "GL000230.1", "GL000231.1", "GL000232.1", "GL000233.1", "GL000234.1",
+ "GL000235.1", "GL000236.1", "GL000237.1", "GL000238.1", "GL000239.1",
+ "GL000240.1", "GL000241.1", "GL000242.1", "GL000243.1", "GL000244.1",
+ "GL000245.1", "GL000246.1", "GL000247.1", "GL000248.1", "GL000249.1",
+ "250", "251", "252", "253", "254",
+ "255", "256", "257", "258", "259",
+ "260", "261", "262", "263", "264",
+ "265", "266", "267", "268", "269",
+ "270", "271", "272", "273", "274",
+ "275", "276", "277", "278", "279",
+ "280", "281", "282", "283", "284",
+ "285", "286", "287", "288", "289",
+ "290", "291", "292", "293", "294",
+ "295", "296", "297", "298", "299",
+ "300", "301", "302", "303", "304",
+ "305", "306", "307", "308", "309",
+ "310", "311", "312", "313", "314",
+ "315", "316", "317", "318", "319",
+ "320", "321", "322", "323", "324",
+ "325", "326", "327", "328", "329",
+ "330", "331", "332", "333", "334",
+ "335", "336", "337", "338", "339",
+ "340", "341", "342", "343", "344",
+ "345", "346", "347", "348", "349",
+ "350", "351", "352", "353", "354",
+ "355", "356", "357", "358", "359",
+ "360", "361", "362", "363", "364",
+ "365", "366", "367", "368", "369",
+ "370", "371", "372", "373", "374",
+ "375", "376", "377", "378", "379",
+ "380", "381", "382", "383", "384",
+ "385", "386", "387", "388", "389",
+ "390", "391", "392", "393", "394",
+ "395", "396", "397", "398", "399",
+ "400", "401", "402", "403", "404",
+ "405", "406", "407", "408", "409",
+ "410", "411", "412", "413", "414",
+ "415", "416", "417", "418", "419",
+ "420", "421", "422", "423", "424",
+ "425", "426", "427", "428", "429",
+ "430", "431", "432", "433", "434",
+ "435", "436", "437", "438", "439",
+ "440", "441", "442", "443", "444",
+ "445", "446", "447", "448", "449",
+ "450", "451", "452", "453", "454",
+ "455", "456", "457", "458", "459",
+ "460", "461", "462", "463", "464",
+ "465", "466", "467", "468", "469",
+ "470", "471", "472", "473", "474",
+ "475", "476", "477", "478", "479",
+ "480", "481", "482", "483", "484",
+ "485", "486", "487", "488", "489",
+ "490", "491", "492", "493", "494",
+ "495", "496", "497", "498", "499",
+ "500", "501", "502", "503", "504",
+ "505", "506", "507", "508", "509",
+ "510", "511", "512", "513", "514",
+ "515", "516", "517", "518", "519",
+ "520", "521", "522", "523", "524",
+ "525", "526", "527", "528", "529",
+ "530", "531", "532", "533", "534",
+ "535", "536", "537", "538", "539",
+ "540", "541", "542", "543", "544",
+ "545", "546", "547", "548", "549",
+ "550", "551", "552", "553", "554",
+ "555", "556", "557", "558", "559",
+ "560", "561", "562", "563", "564",
+ "565", "566", "567", "568", "569",
+ "570", "571", "572", "573", "574",
+ "575", "576", "577", "578", "579",
+ "580", "581", "582", "583", "584",
+ "585", "586", "587", "588", "589",
+ "590", "591", "592", "593", "594",
+ "595", "596", "597", "598", "599",
+ "600", "601", "602", "603", "604",
+ "605", "606", "607", "608", "609",
+ "610", "611", "612", "613", "614",
+ "615", "616", "617", "618", "619",
+ "620", "621", "622", "623", "624",
+ "625", "626", "627", "628", "629",
+ "630", "631", "632", "633", "634",
+ "635", "636", "637", "638", "639",
+ "640", "641", "642", "643", "644",
+ "645", "646", "647", "648", "649",
+ "650", "651", "652", "653", "654",
+ "655", "656", "657", "658", "659",
+ "660", "661", "662", "663", "664",
+ "665", "666", "667", "668", "669",
+ "670", "671", "672", "673", "674",
+ "675", "676", "677", "678", "679",
+ "680", "681", "682", "683", "684",
+ "685", "686", "687", "688", "689",
+ "690", "691", "692", "693", "694",
+ "695", "696", "697", "698", "699",
+ "700", "701", "702", "703", "704",
+ "705", "706", "707", "708", "709",
+ "710", "711", "712", "713", "714",
+ "715", "716", "717", "718", "719",
+ "720", "721", "722", "723", "724",
+ "725", "726", "727", "728", "729",
+ "730", "731", "732", "733", "734",
+ "735", "736", "737", "738", "739",
+ "740", "741", "742", "743", "744",
+ "745", "746", "747", "748", "749",
+ "750", "751", "752", "753", "754",
+ "755", "756", "757", "758", "759",
+ "760", "761", "762", "763", "764",
+ "765", "766", "767", "768", "769",
+ "770", "771", "772", "773", "774",
+ "775", "776", "777", "778", "779",
+ "780", "781", "782", "783", "784",
+ "785", "786", "787", "788", "789",
+ "790", "791", "792", "793", "794",
+ "795", "796", "797", "798", "799",
+ "800", "801", "802", "803", "804",
+ "805", "806", "807", "808", "809",
+ "810", "811", "812", "813", "814",
+ "815", "816", "817", "818", "819",
+ "820", "821", "822", "823", "824",
+ "825", "826", "827", "828", "829",
+ "830", "831", "832", "833", "834",
+ "835", "836", "837", "838", "839",
+ "840", "841", "842", "843", "844",
+ "845", "846", "847", "848", "849",
+ "850", "851", "852", "853", "854",
+ "855", "856", "857", "858", "859",
+ "860", "861", "862", "863", "864",
+ "865", "866", "867", "868", "869",
+ "870", "871", "872", "873", "874",
+ "875", "876", "877", "878", "879",
+ "880", "881", "882", "883", "884",
+ "885", "886", "887", "888", "889",
+ "890", "891", "892", "893", "894",
+ "895", "896", "897", "898", "899",
+ "900", "901", "902", "903", "904",
+ "905", "906", "907", "908", "909",
+ "910", "911", "912", "913", "914",
+ "915", "916", "917", "918", "919",
+ "920", "921", "922", "923", "924",
+ "925", "926", "927", "928", "929",
+ "930", "931", "932", "933", "934",
+ "935", "936", "937", "938", "939",
+ "940", "941", "942", "943", "944",
+ "945", "946", "947", "948", "949",
+ "950", "951", "952", "953", "954",
+ "955", "956", "957", "958", "959",
+ "960", "961", "962", "963", "964",
+ "965", "966", "967", "968", "969",
+ "970", "971", "972", "973", "974",
+ "975", "976", "977", "978", "979",
+ "980", "981", "982", "983", "984",
+ "985", "986", "987", "988", "989",
+ "990", "991", "992", "993", "994",
+ "995", "996", "997", "998", "hs37d5",
+ "NC_007605", "MT", "phiX174", "INVALID"};
+
+ const array Hg37IndexTable::indexToChrCompressedMref {
+ "1", "2", "3", "4", "5",
+ "6", "7", "8", "9", "10",
+ "11", "12", "13", "14", "15",
+ "16", "17", "18", "19", "20",
+ "21", "22", "X", "Y", "GL000191.1",
+ "GL000192.1", "GL000193.1", "GL000194.1", "GL000195.1", "GL000196.1",
+ "GL000197.1", "GL000198.1", "GL000199.1", "GL000200.1", "GL000201.1",
+ "GL000202.1", "GL000203.1", "GL000204.1", "GL000205.1", "GL000206.1",
+ "GL000207.1", "GL000208.1", "GL000209.1", "GL000210.1", "GL000211.1",
+ "GL000212.1", "GL000213.1", "GL000214.1", "GL000215.1", "GL000216.1",
+ "GL000217.1", "GL000218.1", "GL000219.1", "GL000220.1", "GL000221.1",
+ "GL000222.1", "GL000223.1", "GL000224.1", "GL000225.1", "GL000226.1",
+ "GL000227.1", "GL000228.1", "GL000229.1", "GL000230.1", "GL000231.1",
+ "GL000232.1", "GL000233.1", "GL000234.1", "GL000235.1", "GL000236.1",
+ "GL000237.1", "GL000238.1", "GL000239.1", "GL000240.1", "GL000241.1",
+ "GL000242.1", "GL000243.1", "GL000244.1", "GL000245.1", "GL000246.1",
+ "GL000247.1", "GL000248.1", "GL000249.1", "hs37d5", "NC_007605"};
+
+ const array Hg37IndexTable::indexConverter {
+ -2, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
+ 18, 19, 20, 21, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, 22, 23, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
+ 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
+ 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
+ 80, 81, 82, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, 83, 84, -2, -2, -2};
+
+} /* namespace sophia */
diff --git a/src/MasterRefProcessor.cpp b/src/MasterRefProcessor.cpp
index 37d27c6..b4ac069 100644
--- a/src/MasterRefProcessor.cpp
+++ b/src/MasterRefProcessor.cpp
@@ -23,6 +23,7 @@
*/
#include "ChrConverter.h"
+#include "GlobalAppConfig.h"
#include "DeFuzzier.h"
#include "HelperFunctions.h"
#include "strtk.hpp"
@@ -42,26 +43,14 @@ MasterRefProcessor::MasterRefProcessor(const vector &filesIn,
const string &version,
const int defaultReadLengthIn)
: NUMPIDS{static_cast(filesIn.size())},
- DEFAULTREADLENGTH{defaultReadLengthIn}, mrefDb{} {
- const vector CHRSIZES{
- 249250622, 243199374, 198022431, 191154277, 180915261, 171115068,
- 159138664, 146364023, 141213432, 135534748, 135006517, 133851896,
- 115169879, 107349541, 102531393, 90354754, 81195211, 78077249,
- 59128984, 63025521, 48129896, 51304567, 155270561, 59373567,
- 106434, 547497, 189790, 191470, 182897, 38915,
- 37176, 90086, 169875, 187036, 36149, 40104,
- 37499, 81311, 174589, 41002, 4263, 92690,
- 159170, 27683, 166567, 186859, 164240, 137719,
- 172546, 172295, 172150, 161148, 179199, 161803,
- 155398, 186862, 180456, 179694, 211174, 15009,
- 128375, 129121, 19914, 43692, 27387, 40653,
- 45942, 40532, 34475, 41935, 45868, 39940,
- 33825, 41934, 42153, 43524, 43342, 39930,
- 36652, 38155, 36423, 39787, 38503, 35477944,
- 171824};
- for (auto i = 0; i < 85; ++i) {
- // mrefDbPtrs.emplace_back(CHRSIZES[i] + 1, nullptr);
- mrefDb.emplace_back(CHRSIZES[i] + 1, MrefEntry{});
+ DEFAULTREADLENGTH{defaultReadLengthIn},
+ mrefDb{} {
+
+ const vector &chrSizes =
+ GlobalAppConfig::getInstance().getChrConverter().chrSizesCompressedMref;
+ for (std::vector::size_type i = 0; i < chrSizes.size(); ++i) {
+ // mrefDbPtrs.emplace_back(chrSizes[i] + 1, nullptr);
+ mrefDb.emplace_back(chrSizes[i] + 1, MrefEntry{});
}
vector header{"#chr", "start", "end"};
for (const auto &gzFile : filesIn) {
@@ -108,6 +97,7 @@ MasterRefProcessor::MasterRefProcessor(const vector &filesIn,
}
auto defuzzier = DeFuzzier{DEFAULTREADLENGTH * 3, true};
auto i = 84;
+ const ChrConverter &chrConverter = GlobalAppConfig::getInstance().getChrConverter();
while (!mrefDb.empty()) {
mrefDb.back().erase(
remove_if(mrefDb.back().begin(), mrefDb.back().end(),
@@ -118,7 +108,7 @@ MasterRefProcessor::MasterRefProcessor(const vector &filesIn,
remove_if(mrefDb.back().begin(), mrefDb.back().end(),
[](const MrefEntry &bp) { return bp.getPos() == -1; }),
mrefDb.back().end());
- auto chromosome = ChrConverter::indexToChrCompressedMref[i];
+ auto chromosome = chrConverter.indexToChrCompressedMref[i];
--i;
for (auto &bp : mrefDb.back()) {
if (bp.getPos() != -1 && bp.getValidityScore() != -1) {
@@ -141,11 +131,12 @@ MasterRefProcessor::processFile(const string &gzPath, short fileIndex) {
string sophiaLine{};
vector> fileBps{85, vector{}};
auto lineIndex = 0;
+ const ChrConverter &chrConverter = GlobalAppConfig::getInstance().getChrConverter();
while (error_terminating_getline(gzStream, sophiaLine)) {
if (sophiaLine[0] != '#') {
auto chrIndex =
- ChrConverter::indexConverter[ChrConverter::readChromosomeIndex(
- sophiaLine.cbegin(), '\t')];
+ chrConverter.indexConverter[chrConverter.readChromosomeIndex(
+ sophiaLine.cbegin(), '\t')];
if (chrIndex < 0) {
continue;
}
diff --git a/src/MrefEntry.cpp b/src/MrefEntry.cpp
index 7db1c71..11405f1 100644
--- a/src/MrefEntry.cpp
+++ b/src/MrefEntry.cpp
@@ -22,6 +22,7 @@
#include "Breakpoint.h"
#include "strtk.hpp"
+#include "GlobalAppConfig.h"
#include
#include
#include
@@ -30,7 +31,7 @@
namespace sophia {
- using namespace std;
+using namespace std;
boost::format MrefEntry::doubleFormatter { "%.5f" };
int MrefEntry::NUMPIDS { };
@@ -52,8 +53,12 @@ void MrefEntry::addEntry(BreakpointReduced& tmpBreakpoint, int fileIndex) {
auto eventTotal = tmpBreakpoint.getPairedBreaksSoft() + tmpBreakpoint.getPairedBreaksHard() + tmpBreakpoint.getUnpairedBreaksSoft() + tmpBreakpoint.getUnpairedBreaksHard() + tmpBreakpoint.getBreaksShortIndel();
auto breakTotal = eventTotal + artifactBreakTotal;
if (breakTotal < 200) {
+ const ChrConverter &chrConverter = GlobalAppConfig::getInstance().getChrConverter();
for (auto saPtr : tmpBreakpoint.getSupplementsPtr()) {
- if (saPtr->isSuspicious() || saPtr->isToRemove() || (saPtr->getChrIndex() != 1001 && ChrConverter::indexConverter[saPtr->getChrIndex()] < 0)) {
+ if (saPtr->isSuspicious()
+ || saPtr->isToRemove()
+ || (saPtr->getChrIndex() != 1001
+ && chrConverter.indexConverter[saPtr->getChrIndex()] < 0)) {
continue;
}
auto qualCheck = false;
diff --git a/src/MrefEntryAnno.cpp b/src/MrefEntryAnno.cpp
index ed1060f..c1afb19 100644
--- a/src/MrefEntryAnno.cpp
+++ b/src/MrefEntryAnno.cpp
@@ -25,7 +25,6 @@
#include
#include
#include
-#include "ChrConverter.h"
namespace sophia {
using namespace std;
diff --git a/src/SuppAlignment.cpp b/src/SuppAlignment.cpp
index 694a640..3fabcee 100644
--- a/src/SuppAlignment.cpp
+++ b/src/SuppAlignment.cpp
@@ -25,17 +25,27 @@
#include
#include "strtk.hpp"
#include "ChrConverter.h"
+#include "GlobalAppConfig.h"
//#include
namespace sophia {
- using namespace std;
+using namespace std;
double SuppAlignment::ISIZEMAX { };
int SuppAlignment::DEFAULTREADLENGTH { };
-SuppAlignment::SuppAlignment(string::const_iterator saCbegin, string::const_iterator saCend, bool primaryIn, bool lowMapqSourceIn, bool nullMapqSourceIn, bool alignmentOnForwardStrand, bool bpEncounteredM, int originIndexIn, int bpChrIndex, int bpPos) :
+SuppAlignment::SuppAlignment(string::const_iterator saCbegin,
+ string::const_iterator saCend,
+ bool primaryIn,
+ bool lowMapqSourceIn,
+ bool nullMapqSourceIn,
+ bool alignmentOnForwardStrand,
+ bool bpEncounteredM,
+ int originIndexIn,
+ int bpChrIndex,
+ int bpPos) :
matchFuzziness { 5 * DEFAULTREADLENGTH },
chrIndex { 0 },
pos { 0 },
@@ -71,6 +81,7 @@ SuppAlignment::SuppAlignment(string::const_iterator saCbegin, string::const_iter
// cerr << *cigarString_cit;
// }
// cerr << endl;
+
vector fieldBegins = { saCbegin };
vector fieldEnds;
for (auto it = saCbegin; it != saCend; ++it) {
@@ -80,14 +91,17 @@ SuppAlignment::SuppAlignment(string::const_iterator saCbegin, string::const_iter
}
}
fieldEnds.push_back(saCend);
- chrIndex = ChrConverter::readChromosomeIndex(fieldBegins[0], ',');
+
+ chrIndex = GlobalAppConfig::getInstance().
+ getChrConverter().readChromosomeIndex(fieldBegins[0], ',');
if (chrIndex > 1001) {
return;
}
for (auto it = fieldBegins[1]; it != fieldEnds[1]; ++it) {
pos = 10 * pos + (*it - '0');
}
-//cerr << "guessSupplementOffset" << endl;
+
+ //cerr << "guessSupplementOffset" << endl;
vector cigarChunks;
auto cigarEncounteredM = false;
auto cumulativeNucleotideCount = 0, currentNucleotideCount = 0, chunkIndex = 0, bestChunkIndex = 0, indelAdjustment = 0;
@@ -106,7 +120,10 @@ SuppAlignment::SuppAlignment(string::const_iterator saCbegin, string::const_iter
if (!cigarEncounteredM) {
leftClipAdjustment = currentNucleotideCount;
}
- cigarChunks.emplace_back(*cigarString_cit, cigarEncounteredM, cumulativeNucleotideCount + indelAdjustment - leftClipAdjustment, currentNucleotideCount);
+ cigarChunks.emplace_back(*cigarString_cit,
+ cigarEncounteredM,
+ cumulativeNucleotideCount + indelAdjustment - leftClipAdjustment,
+ currentNucleotideCount);
if (largestClip < currentNucleotideCount) {
largestClip = currentNucleotideCount;
bestChunkIndex = chunkIndex;
@@ -115,7 +132,10 @@ SuppAlignment::SuppAlignment(string::const_iterator saCbegin, string::const_iter
cumulativeNucleotideCount += currentNucleotideCount;
break;
case 'H':
- cigarChunks.emplace_back(*cigarString_cit, cigarEncounteredM, cumulativeNucleotideCount + indelAdjustment - leftClipAdjustment, currentNucleotideCount);
+ cigarChunks.emplace_back(*cigarString_cit,
+ cigarEncounteredM,
+ cumulativeNucleotideCount + indelAdjustment - leftClipAdjustment,
+ currentNucleotideCount);
if (largestClip < currentNucleotideCount) {
largestClip = currentNucleotideCount;
bestChunkIndex = chunkIndex;
@@ -159,12 +179,24 @@ void SuppAlignment::finalizeSupportingIndices() {
sort(supportingIndices.begin(), supportingIndices.end());
sort(supportingIndicesSecondary.begin(), supportingIndicesSecondary.end());
supportingIndices.erase(unique(supportingIndices.begin(), supportingIndices.end()), supportingIndices.end());
- supportingIndicesSecondary.erase(unique(supportingIndicesSecondary.begin(), supportingIndicesSecondary.end()), supportingIndicesSecondary.end());
+ supportingIndicesSecondary.erase(unique(supportingIndicesSecondary.begin(),
+ supportingIndicesSecondary.end()),
+ supportingIndicesSecondary.end());
support = static_cast(supportingIndices.size());
secondarySupport = static_cast(supportingIndicesSecondary.size());
}
-SuppAlignment::SuppAlignment(int chrIndexIn, int posIn, int mateSupportIn, int expectedDiscordantsIn, bool encounteredMIn, bool invertedIn, int extendedPosIn, bool primaryIn, bool lowMapqSourceIn, bool nullMapqSourceIn, int originIndexIn) :
+SuppAlignment::SuppAlignment(int chrIndexIn,
+ int posIn,
+ int mateSupportIn,
+ int expectedDiscordantsIn,
+ bool encounteredMIn,
+ bool invertedIn,
+ int extendedPosIn,
+ bool primaryIn,
+ bool lowMapqSourceIn,
+ bool nullMapqSourceIn,
+ int originIndexIn) :
matchFuzziness { 5 * DEFAULTREADLENGTH },
chrIndex { chrIndexIn },
pos { posIn },
@@ -213,12 +245,27 @@ string SuppAlignment::print() const {
} else {
invStr.append("|");
}
+ const ChrConverter &chrConverter = GlobalAppConfig::getInstance().getChrConverter();
if (!fuzzy) {
- outStr.append(ChrConverter::indexToChr[chrIndex]).append(":").append(strtk::type_to_string(pos));
+ outStr.
+ append(chrConverter.indexToChr[chrIndex]).
+ append(":").
+ append(strtk::type_to_string(pos));
} else {
- outStr.append(ChrConverter::indexToChr[chrIndex]).append(":").append(strtk::type_to_string(pos)).append("-").append(strtk::type_to_string(extendedPos));
+ outStr.
+ append(chrConverter.indexToChr[chrIndex]).
+ append(":").
+ append(strtk::type_to_string(pos)).
+ append("-").
+ append(strtk::type_to_string(extendedPos));
}
- outStr.append(invStr).append("(").append(strtk::type_to_string(support)).append(",").append(strtk::type_to_string(secondarySupport)).append(",");
+ outStr.
+ append(invStr).
+ append("(").
+ append(strtk::type_to_string(support)).
+ append(",").
+ append(strtk::type_to_string(secondarySupport)).
+ append(",");
if (!suspicious) {
outStr.append(strtk::type_to_string(mateSupport));
if (semiSuspicious || nullMapqSource) {
@@ -261,7 +308,8 @@ SuppAlignment::SuppAlignment(const string& saIn) :
if (encounteredM) {
++index;
}
- chrIndex = ChrConverter::readChromosomeIndex(next(saIn.cbegin(), index), ':');
+ chrIndex = GlobalAppConfig::getInstance().
+ getChrConverter().readChromosomeIndex(next(saIn.cbegin(), index), ':');
if (chrIndex > 1001) {
return;
}
@@ -322,7 +370,8 @@ bool SuppAlignment::saCloseness(const SuppAlignment& rhs, int fuzziness) const {
if (inverted == rhs.isInverted() && chrIndex == rhs.getChrIndex() && encounteredM == rhs.isEncounteredM()) {
if (strictFuzzy || rhs.isStrictFuzzy()) {
fuzziness = 2.5 * DEFAULTREADLENGTH;
- return (rhs.getPos() - fuzziness) <= (extendedPos + fuzziness) && (pos - fuzziness) <= (rhs.getExtendedPos() + fuzziness);
+ return (rhs.getPos() - fuzziness) <= (extendedPos + fuzziness) &&
+ (pos - fuzziness) <= (rhs.getExtendedPos() + fuzziness);
} else {
return abs(pos - rhs.getPos()) <= fuzziness;
}
@@ -338,7 +387,8 @@ bool SuppAlignment::saDistHomologyRescueCloseness(const SuppAlignment& rhs, int
}
if (chrIndex == rhs.getChrIndex() && encounteredM == rhs.isEncounteredM()) {
if (strictFuzzy || rhs.isStrictFuzzy()) {
- return (rhs.getPos() - fuzziness) <= (extendedPos + fuzziness) && (pos - fuzziness) <= (rhs.getExtendedPos() + fuzziness);
+ return (rhs.getPos() - fuzziness) <= (extendedPos + fuzziness) &&
+ (pos - fuzziness) <= (rhs.getExtendedPos() + fuzziness);
} else {
return abs(pos - rhs.getPos()) <= fuzziness;
}
diff --git a/src/SuppAlignmentAnno.cpp b/src/SuppAlignmentAnno.cpp
index f823061..9794157 100644
--- a/src/SuppAlignmentAnno.cpp
+++ b/src/SuppAlignmentAnno.cpp
@@ -23,6 +23,7 @@
*/
#include "SuppAlignmentAnno.h"
+#include "GlobalAppConfig.h"
#include "ChrConverter.h"
#include "strtk.hpp"
#include
@@ -49,7 +50,8 @@ SuppAlignmentAnno::SuppAlignmentAnno(const string &saStrIn)
++index;
}
chrIndex =
- ChrConverter::readChromosomeIndex(next(saStrIn.cbegin(), index), ':');
+ GlobalAppConfig::getInstance().
+ getChrConverter().readChromosomeIndex(next(saStrIn.cbegin(), index), ':');
if (chrIndex > 1001) {
return;
}
@@ -178,12 +180,13 @@ SuppAlignmentAnno::print() const {
} else {
invStr.append("|");
}
+ const ChrConverter &chrConverter = GlobalAppConfig::getInstance().getChrConverter();
if (!fuzzy || pos == extendedPos) {
- outStr.append(ChrConverter::indexToChr[chrIndex])
+ outStr.append(chrConverter.indexToChr[chrIndex])
.append(":")
.append(strtk::type_to_string(pos));
} else {
- outStr.append(ChrConverter::indexToChr[chrIndex])
+ outStr.append(chrConverter.indexToChr[chrIndex])
.append(":")
.append(strtk::type_to_string(pos))
.append("-")
diff --git a/src/SvEvent.cpp b/src/SvEvent.cpp
index 68cae30..1e7e3b7 100644
--- a/src/SvEvent.cpp
+++ b/src/SvEvent.cpp
@@ -23,6 +23,7 @@
*/
#include "ChrConverter.h"
+#include "GlobalAppConfig.h"
#include "strtk.hpp"
#include
@@ -139,10 +140,11 @@ SvEvent::SvEvent(const BreakpointReduced &bp1In, const BreakpointReduced &bp2In,
determineGermlineClonalityBp(bp2In, selectedSa2, germlineClonality2);
germlineStatus2 = germlineClonality2 > 0.15;
+ const ChrConverter &chrConverter = GlobalAppConfig::getInstance().getChrConverter();
auto strictNonDecoy = !selectedSa1.isProperPairErrorProne() &&
!selectedSa2.isProperPairErrorProne() &&
- ChrConverter::indexConverter[chrIndex1] < 23 &&
- ChrConverter::indexConverter[chrIndex2] < 23;
+ chrConverter.indexConverter[chrIndex1] < 23 &&
+ chrConverter.indexConverter[chrIndex2] < 23;
auto splitSupportThreshold1 =
(strictNonDecoy && !selectedSa1.isSemiSuspicious() &&
(mateRatio1 >= 0.6))
@@ -336,9 +338,10 @@ SvEvent::SvEvent(const BreakpointReduced &bp1In, const BreakpointReduced &bp2In,
determineGermlineClonalityBp(bp1In, selectedSa1, germlineClonality1);
germlineStatus1 = germlineClonality1 > 0.15;
+ const ChrConverter &chrConverter = GlobalAppConfig::getInstance().getChrConverter();
auto strictNonDecoy = !selectedSa1.isProperPairErrorProne() &&
- ChrConverter::indexConverter[chrIndex1] < 23 &&
- ChrConverter::indexConverter[chrIndex2] < 23;
+ chrConverter.indexConverter[chrIndex1] < 23 &&
+ chrConverter.indexConverter[chrIndex2] < 23;
auto splitSupportThreshold =
(strictNonDecoy && !selectedSa2.isSemiSuspicious() &&
(mateRatio1 >= 0.66))
@@ -531,9 +534,10 @@ SvEvent::SvEvent(const BreakpointReduced &bp1In, const SuppAlignmentAnno &sa1In,
determineGermlineClonalityBp(bp1In, selectedSa1, germlineClonality1);
germlineStatus1 = germlineClonality1 > 0.15;
+ const ChrConverter &chrConverter = GlobalAppConfig::getInstance().getChrConverter();
auto strictNonDecoy = !selectedSa1.isProperPairErrorProne() &&
- ChrConverter::indexConverter[chrIndex1] < 23 &&
- ChrConverter::indexConverter[chrIndex2] < 23;
+ chrConverter.indexConverter[chrIndex1] < 23 &&
+ chrConverter.indexConverter[chrIndex2] < 23;
auto splitSupportThreshold =
(strictNonDecoy && (mateRatio1 >= 0.66) ? 0 : 2);
@@ -1705,10 +1709,11 @@ string
SvEvent::printMatch(const vector> &overhangDb) const {
vector outputFields;
outputFields.reserve(20);
- outputFields.emplace_back(ChrConverter::indexToChr[chrIndex1]);
+ const ChrConverter &chrConverter = GlobalAppConfig::getInstance().getChrConverter();
+ outputFields.emplace_back(chrConverter.indexToChr[chrIndex1]);
outputFields.emplace_back(strtk::type_to_string(pos1 - 1));
outputFields.emplace_back(strtk::type_to_string(pos1));
- outputFields.emplace_back(ChrConverter::indexToChr[chrIndex2]);
+ outputFields.emplace_back(chrConverter.indexToChr[chrIndex2]);
outputFields.emplace_back(strtk::type_to_string(pos2 - 1));
outputFields.emplace_back(
inputScore > 0
diff --git a/sophia.cpp b/src/sophia.cpp
similarity index 57%
rename from sophia.cpp
rename to src/sophia.cpp
index 5afecba..f8e4a17 100644
--- a/sophia.cpp
+++ b/src/sophia.cpp
@@ -10,21 +10,30 @@
#include
#include