-
Notifications
You must be signed in to change notification settings - Fork 1
/
pattern.h
131 lines (117 loc) · 3.71 KB
/
pattern.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
/**
* This programm calculates the variance of a set of pattern with the same length and weight.
* It is possible to improve your patternset, estimate values for p, q and S, l_hom, li, lj
* from a multiple alignment file in fasta format, and also read patterns from a file.
*
* pattern object header
*
* For theory please have a look at:
*
* B. Morgenstern, B. Zhu, S. Horwege, C.-A Leimeister (2015)
* Estimating evolutionary distances between genomic sequences from spaced-word matches
* Algorithms for Molecular Biology 10, 5. (http://www.almob.org/content/10/1/5/abstract)
*
*
* @author: Lars Hahn - 03.06.2015, Georg-August-Universitaet Goettingen
* @version: 1.0 06/2015
*/
#ifndef PATTERN_H_
#define PATTERN_H_
#include <iostream>
#include <fstream>
#include <random>
#include <vector>
#include <string>
#include <math.h>
#include <stdlib.h>
#include <string.h>
class pattern{
public:
pattern();
pattern(char* pattern_file, char* align_file);
pattern(char* pattern_file, char* align_file, int size, int length, int weight, int l_hom, int l1, int l2, double p, double q);
pattern(int size, int length, int weight);
~pattern();
void ReinitPattern();
std::vector<std::string> GetPattern();
std::vector<std::string> GetBestPattern();
std::string GetPattern(int number);
std::string GetBestPattern(int number);
double Variance();
double GetVariance();
double GetBestVariance();
double GetNormVariance();
double GetBestNormVariance();
double GetP();
double GetQ();
int GetWeight();
int GetSize();
int GetLength();
int GetLHom();
int GetL1();
int GetL2();
int GetWorstPatMaxVal();
int GetWorstPatMaxPat();
bool UniqPattern(int number);
void Improve(int limit);
void ImproveLoop(int limit);
void ImproveMaxValue(int limit);
void ImproveMaxValuePattern(int limit);
void ImproveSecure();
void Quiet();
void Silent();
void Print();
void PrintBest();
void ChangePatternRandom(int number);
protected:
void InitMatrix();
std::vector<std::string> SplitString(std::string pattern, char* tokens);
bool ValidatePatternsFormat(std::string pattern_form);
bool ValidatePatternConditions();
int PatternWeight(std::string pattern_wght);
std::vector<std::string> CreateRandomPattern();
std::vector<std::string> PatternCopy(std::vector<std::string>old_pattern);
double CalcVariance();
double CalcVarianceAlign();
int ShiftPos(int p1, int p2, int s);
int WorstPattern_max_val();
int WorstPattern_max_pat();
void DoImprove(int limit, bool max_val, bool max_pat, bool loop);
void ReadAlign();
int LengthSeq(std::vector<std::string> seq);
std::vector<double> BackgroundProb(std::vector<std::string> sequence);
void InitQValues();
void InitPValues();
double CountMatch(int pos1, int pos2);
int CountHom(int pos1, int pos2);
double Gauss();
double MaxNumberPattern(int p_weight, int p_length);
double Faculty(int value);
void SecureMessage(std::string errmsg, int pos);
private:
std::vector<std::vector<double> > q_values;
std::vector<std::vector<std::string> > seq_matrix;
std::vector<std::string> pattern_set;
std::vector<std::string> best_pattern;
std::vector<std::vector<double> > var_sum; /*Contains for each pattern pair the share of the complete variance...*/
std::vector<std::vector<int> > l_hom_val; /*...which means, the summation of the upper triangulare matrix represents the complete variance*/
std::vector<std::vector<double> > p_values;
std::vector<int> seq_leng;
double variance;
double best_variance;
int size;
int length;
int weight;
int l_hom;
int l1;
int l2;
double p;
double q;
char* pattern_file = NULL;
char* align_file = NULL;
bool improve;
bool quiet;
bool silent;
bool secure;
};
#endif