Skip to content

Commit

Permalink
Renamed "old" and "new" edit dist to "ref" and "query"
Browse files Browse the repository at this point in the history
  • Loading branch information
TimD1 committed Feb 9, 2024
1 parent c58b3fa commit d5d6ae9
Show file tree
Hide file tree
Showing 5 changed files with 45 additions and 45 deletions.
46 changes: 23 additions & 23 deletions src/dist.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1109,7 +1109,7 @@ void calc_prec_recall(
int prev_ti = ti;
int sync_truth_idx = ti_size;
int prev_sync_truth_idx = ti_size;
int new_ed = 0;
int query_ed = 0;
int query_var_ptr = query_end_idx-1;
int query_var_pos = query_vars->poss.size() && query_var_ptr >= 0 ?
query_vars->poss[query_var_ptr] - beg : 0;
Expand Down Expand Up @@ -1172,8 +1172,8 @@ void calc_prec_recall(
query_vars->errtypes[swap][query_var_ptr] = ERRTYPE_FP;
query_vars->sync_group[swap][query_var_ptr] = sync_group++;
query_vars->credit[swap][query_var_ptr] = 0;
query_vars->old_ed[swap][query_var_ptr] = 0;
query_vars->new_ed[swap][query_var_ptr] = 0;
query_vars->ref_ed[swap][query_var_ptr] = 0;
query_vars->query_ed[swap][query_var_ptr] = 0;
query_vars->callq[swap][query_var_ptr] =
query_vars->var_quals[query_var_ptr];
if (print) printf("%s: QUERY REF='%s'\tALT='%s'\t%s\t%f\n", ctg.data(),
Expand Down Expand Up @@ -1227,29 +1227,29 @@ void calc_prec_recall(
// sync point: set TP/PP
if (sync[i][path_idx]) {

// calculate old edit distance
int old_ed = 0;
// calculate edit distance without variants
int ref_ed = 0;
sync_ref_idx = (hi == ri) ? qri+1 : query_ref_ptrs[i][PTRS][qri]+1;
sync_truth_idx = ti+1;
std::vector< std::vector<int> > offs, ptrs;
wf_ed(ref.substr(sync_ref_idx, prev_sync_ref_idx - sync_ref_idx),
truth[i].substr(sync_truth_idx, prev_sync_truth_idx - sync_truth_idx),
old_ed, offs, ptrs);
ref_ed, offs, ptrs);

// This only happens when the truth VCF contains several variants that, when
// combined, are equivalent to no variants. In this case, the truth VCF should
// be fixed. Output a warning, and allow the evaluation to continue.
if (old_ed == 0 && truth_var_ptr != prev_truth_var_ptr) {
if (ref_ed == 0 && truth_var_ptr != prev_truth_var_ptr) {
WARN("Zero edit distance with truth variants at supercluster %d, %s:%d",
sc_idx, ctg.data(), truth_vars->poss[truth_var_ptr]);
old_ed = 1; // prevent divide-by-zero
ref_ed = 1; // prevent divide-by-zero
}
if (new_ed > old_ed)
if (query_ed > ref_ed)
WARN("New edit distance exceeds old edit distance at supercluster %d, %s:%d",
sc_idx, ctg.data(), query_vars->poss[query_var_ptr]);

/* if (prev_query_var_ptr == query_var_ptr && */
/* old_ed != new_ed) { */
/* ref_ed != query_ed) { */
/* WARN("Non-zero edit distance with no truth variants at supercluster %d, %s:%d", */
/* sc_idx, ctg.data(), query_vars->poss[query_var_ptr]); */
/* } */
Expand All @@ -1273,15 +1273,15 @@ void calc_prec_recall(
// process QUERY variants
for (int query_var_idx = prev_query_var_ptr;
query_var_idx > query_var_ptr; query_var_idx--) {
float credit = 1 - float(new_ed)/old_ed;
float credit = 1 - float(query_ed)/ref_ed;
// don't overwrite FPs
if (query_vars->errtypes[swap][query_var_idx] == ERRTYPE_UN) {
if (credit >= g.credit_threshold) { // TP
query_vars->errtypes[swap][query_var_idx] = ERRTYPE_TP;
query_vars->sync_group[swap][query_var_idx] = sync_group;
query_vars->credit[swap][query_var_idx] = credit;
query_vars->old_ed[swap][query_var_idx] = old_ed;
query_vars->new_ed[swap][query_var_idx] = new_ed;
query_vars->ref_ed[swap][query_var_idx] = ref_ed;
query_vars->query_ed[swap][query_var_idx] = query_ed;
query_vars->callq[swap][query_var_idx] = callq;
if (print) printf("%s:%d QUERY REF='%s'\tALT='%s'\t%s\t%f\n",
ctg.data(), query_vars->poss[query_var_idx],
Expand All @@ -1292,8 +1292,8 @@ void calc_prec_recall(
query_vars->errtypes[swap][query_var_idx] = ERRTYPE_FP;
query_vars->sync_group[swap][query_var_idx] = sync_group;
query_vars->credit[swap][query_var_idx] = credit;
query_vars->old_ed[swap][query_var_idx] = old_ed;
query_vars->new_ed[swap][query_var_idx] = new_ed;
query_vars->ref_ed[swap][query_var_idx] = ref_ed;
query_vars->query_ed[swap][query_var_idx] = query_ed;
query_vars->callq[swap][query_var_idx] = callq;
if (print) printf("%s:%d QUERY REF='%s'\tALT='%s'\t%s\t%f\n",
ctg.data(), query_vars->poss[query_var_idx],
Expand All @@ -1307,13 +1307,13 @@ void calc_prec_recall(
// process TRUTH variants
for (int truth_var_idx = prev_truth_var_ptr;
truth_var_idx > truth_var_ptr; truth_var_idx--) {
float credit = 1 - float(new_ed)/old_ed;
float credit = 1 - float(query_ed)/ref_ed;
if (credit >= g.credit_threshold) { // TP
truth_vars->errtypes[swap][truth_var_idx] = ERRTYPE_TP;
truth_vars->sync_group[swap][truth_var_idx] = sync_group;
truth_vars->credit[swap][truth_var_idx] = credit;
truth_vars->old_ed[swap][truth_var_idx] = old_ed;
truth_vars->new_ed[swap][truth_var_idx] = new_ed;
truth_vars->ref_ed[swap][truth_var_idx] = ref_ed;
truth_vars->query_ed[swap][truth_var_idx] = query_ed;
truth_vars->callq[swap][truth_var_idx] = callq;
if (print) printf("%s:%d TRUTH REF='%s'\tALT='%s'\t%s\t%f\n",
ctg.data(), truth_vars->poss[truth_var_idx],
Expand All @@ -1324,8 +1324,8 @@ void calc_prec_recall(
truth_vars->errtypes[swap][truth_var_idx] = ERRTYPE_FN;
truth_vars->sync_group[swap][truth_var_idx] = sync_group;
truth_vars->credit[swap][truth_var_idx] = credit;
truth_vars->old_ed[swap][truth_var_idx] = old_ed;
truth_vars->new_ed[swap][truth_var_idx] = new_ed;
truth_vars->ref_ed[swap][truth_var_idx] = ref_ed;
truth_vars->query_ed[swap][truth_var_idx] = query_ed;
truth_vars->callq[swap][truth_var_idx] = g.max_qual;
if (print) printf("%s:%d TRUTH REF='%s'\tALT='%s'\t%s\t%f\n",
ctg.data(), truth_vars->poss[truth_var_idx],
Expand All @@ -1338,7 +1338,7 @@ void calc_prec_recall(
if (print) printf("%s: SYNC @%s(%2d,%2d) = REF(%2d,%2d), ED %d->%d, QUERY %d-%d, TRUTH %d-%d\n\n", ctg.data(),
(hi == ri) ? "REF" : "QRY", qri, ti,
(hi == ri) ? qri : query_ref_ptrs[i][PTRS][qri],
truth_ref_ptrs[i][PTRS][ti], old_ed, new_ed,
truth_ref_ptrs[i][PTRS][ti], ref_ed, query_ed,
prev_query_var_ptr, query_var_ptr,
prev_truth_var_ptr, truth_var_ptr
);
Expand All @@ -1352,7 +1352,7 @@ void calc_prec_recall(
prev_truth_var_ptr = truth_var_ptr;
prev_sync_ref_idx = sync_ref_idx;
prev_sync_truth_idx = sync_truth_idx;
new_ed = 0;
query_ed = 0;
}

// update pointers and edit distance
Expand All @@ -1372,7 +1372,7 @@ void calc_prec_recall(
ti = path[i][path_idx].ti;
prev_hi = hi;
hi = path[i][path_idx].hi;
new_ed += edits[i][path_idx];
query_ed += edits[i][path_idx];

// update next variant position
query_var_pos = (query_var_ptr < query_beg_idx) ? -1 :
Expand Down
4 changes: 2 additions & 2 deletions src/phase.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@ void phaseblockData::write_summary_vcf(std::string out_vcf_fn) {
fprintf(out_vcf, "##FORMAT=<ID=GT,Number=1,Type=String,Description=\"GenoType\">\n");
fprintf(out_vcf, "##FORMAT=<ID=BD,Number=1,Type=String,Description=\"Benchmark Decision for call (TP/FP/FN).\">\n");
fprintf(out_vcf, "##FORMAT=<ID=BC,Number=1,Type=Float,Description=\"Benchmark Credit (on the interval [0,1], based on sync group edit distance)\">\n");
fprintf(out_vcf, "##FORMAT=<ID=OD,Number=1,Type=Integer,Description=\"Original sync group edit Distance\">\n");
fprintf(out_vcf, "##FORMAT=<ID=ND,Number=1,Type=Integer,Description=\"New sync group edit Distance\">\n");
fprintf(out_vcf, "##FORMAT=<ID=RD,Number=1,Type=Integer,Description=\"Reference edit Distance from truth within current sync group\">\n");
fprintf(out_vcf, "##FORMAT=<ID=QD,Number=1,Type=Integer,Description=\"Query edit Distance from truth within current sync group\">\n");
fprintf(out_vcf, "##FORMAT=<ID=BK,Number=1,Type=String,Description=\"BenchmarK category ('gm' if credit == 1, 'lm' if credit > 0, else '.')\">\n");
fprintf(out_vcf, "##FORMAT=<ID=QQ,Number=1,Type=Float,Description=\"variant Quality\">\n");
fprintf(out_vcf, "##FORMAT=<ID=SC,Number=1,Type=Integer,Description=\"SuperCluster (index in contig)\">\n");
Expand Down
20 changes: 10 additions & 10 deletions src/print.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -673,7 +673,7 @@ void write_results(std::unique_ptr<phaseblockData> & phasedata_ptr) {
if (g.verbosity >= 1) INFO(" Writing query variant results to '%s'", out_query_fn.data());
FILE* out_query = fopen(out_query_fn.data(), "w");
fprintf(out_query, "CONTIG\tPOS\tHAP\tREF\tALT\tQUAL\tTYPE\tERR_TYPE"
"\tCREDIT\tCLUSTER\tSUPERCLUSTER\tSYNC_GROUP\tOLD_DIST\tNEW_DIST\tLOCATION\n");
"\tCREDIT\tCLUSTER\tSUPERCLUSTER\tSYNC_GROUP\tREF_DIST\tQUERY_DIST\tLOCATION\n");
for (std::string ctg : phasedata_ptr->contigs) {

// set pointers to variants and superclusters
Expand Down Expand Up @@ -723,8 +723,8 @@ void write_results(std::unique_ptr<phaseblockData> & phasedata_ptr) {
cluster1_idx,
sci,
query1_vars->sync_group[swap][var1_idx],
query1_vars->old_ed[swap][var1_idx],
query1_vars->new_ed[swap][var1_idx],
query1_vars->ref_ed[swap][var1_idx],
query1_vars->query_ed[swap][var1_idx],
region_strs[query1_vars->locs[var1_idx]].data()
);
var1_idx++;
Expand Down Expand Up @@ -760,8 +760,8 @@ void write_results(std::unique_ptr<phaseblockData> & phasedata_ptr) {
cluster2_idx,
sci,
query2_vars->sync_group[swap][var2_idx],
query2_vars->old_ed[swap][var2_idx],
query2_vars->new_ed[swap][var2_idx],
query2_vars->ref_ed[swap][var2_idx],
query2_vars->query_ed[swap][var2_idx],
region_strs[query2_vars->locs[var2_idx]].data()
);
var2_idx++;
Expand All @@ -774,7 +774,7 @@ void write_results(std::unique_ptr<phaseblockData> & phasedata_ptr) {
std::string out_truth_fn = g.out_prefix + "truth.tsv";
if (g.verbosity >= 1) INFO(" Writing truth variant results to '%s'", out_truth_fn.data());
FILE* out_truth = fopen(out_truth_fn.data(), "w");
fprintf(out_truth, "CONTIG\tPOS\tHAP\tREF\tALT\tQUAL\tTYPE\tERRTYPE\tCREDIT\tCLUSTER\tSUPERCLUSTER\tSYNC_GROUP\tOLD_DIST\tNEW_DIST\tLOCATION\n");
fprintf(out_truth, "CONTIG\tPOS\tHAP\tREF\tALT\tQUAL\tTYPE\tERRTYPE\tCREDIT\tCLUSTER\tSUPERCLUSTER\tSYNC_GROUP\tREF_DIST\tQUERY_DIST\tLOCATION\n");
for (std::string ctg : phasedata_ptr->contigs) {

// set pointers to variants and superclusters
Expand Down Expand Up @@ -822,8 +822,8 @@ void write_results(std::unique_ptr<phaseblockData> & phasedata_ptr) {
cluster1_idx,
sci,
truth1_vars->sync_group[swap][var1_idx],
truth1_vars->old_ed[swap][var1_idx],
truth1_vars->new_ed[swap][var1_idx],
truth1_vars->ref_ed[swap][var1_idx],
truth1_vars->query_ed[swap][var1_idx],
region_strs[truth1_vars->locs[var1_idx]].data()
);
var1_idx++;
Expand Down Expand Up @@ -860,8 +860,8 @@ void write_results(std::unique_ptr<phaseblockData> & phasedata_ptr) {
cluster2_idx,
sci,
truth2_vars->sync_group[swap][var2_idx],
truth2_vars->old_ed[swap][var2_idx],
truth2_vars->new_ed[swap][var2_idx],
truth2_vars->ref_ed[swap][var2_idx],
truth2_vars->query_ed[swap][var2_idx],
region_strs[truth2_vars->locs[var2_idx]].data()
);
var2_idx++;
Expand Down
16 changes: 8 additions & 8 deletions src/variant.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ ctgVariants::ctgVariants() {
this->sync_group.push_back(std::vector<int>());
this->callq.push_back(std::vector<float>());
this->credit.push_back(std::vector<float>());
this->old_ed.push_back(std::vector<int>());
this->new_ed.push_back(std::vector<int>());
this->ref_ed.push_back(std::vector<int>());
this->query_ed.push_back(std::vector<int>());
}
}

Expand All @@ -45,8 +45,8 @@ void ctgVariants::add_var(int pos, int rlen, uint8_t hap, uint8_t type, uint8_t
this->errtypes[i].push_back(ERRTYPE_UN);
this->sync_group[i].push_back(0);
this->credit[i].push_back(0);
this->old_ed[i].push_back(0);
this->new_ed[i].push_back(0);
this->ref_ed[i].push_back(0);
this->query_ed[i].push_back(0);
this->callq[i].push_back(0);
}
}
Expand Down Expand Up @@ -230,14 +230,14 @@ void ctgVariants::print_var_info(FILE* out_fp, std::shared_ptr<fastaData> ref,
char ref_base;
switch (this->types[idx]) {
case TYPE_SUB:
fprintf(out_fp, "%s\t%d\t.\t%s\t%s\t.\tPASS\t.\tGT:BD:BC:OD:ND:BK:QQ:SC:SG:PS:PB:BS:FE",
fprintf(out_fp, "%s\t%d\t.\t%s\t%s\t.\tPASS\t.\tGT:BD:BC:RD:QD:BK:QQ:SC:SG:PS:PB:BS:FE",
ctg.data(), this->poss[idx]+1, this->refs[idx].data(),
this->alts[idx].data());
break;
case TYPE_INS:
case TYPE_DEL:
ref_base = ref->fasta.at(ctg)[this->poss[idx]-1];
fprintf(out_fp, "%s\t%d\t.\t%s\t%s\t.\tPASS\t.\tGT:BD:BC:OD:ND:BK:QQ:SC:SG:PS:PB:BS:FE", ctg.data(),
fprintf(out_fp, "%s\t%d\t.\t%s\t%s\t.\tPASS\t.\tGT:BD:BC:RD:QD:BK:QQ:SC:SG:PS:PB:BS:FE", ctg.data(),
this->poss[idx], (ref_base + this->refs[idx]).data(),
(ref_base + this->alts[idx]).data());
break;
Expand Down Expand Up @@ -275,8 +275,8 @@ void ctgVariants::print_var_sample(FILE* out_fp, int idx, std::string gt,

fprintf(out_fp, "\t%s:%s:%f:%s:%s:%s:%d:%d:%d:%d:%d:%s:%s%s", gt.data(), errtype.data(),
this->credit[swap][idx],
this->old_ed[swap][idx] == 0 ? "." : std::to_string(this->old_ed[swap][idx]).data(),
this->old_ed[swap][idx] == 0 ? "." : std::to_string(this->new_ed[swap][idx]).data(),
this->ref_ed[swap][idx] == 0 ? "." : std::to_string(this->ref_ed[swap][idx]).data(),
this->ref_ed[swap][idx] == 0 ? "." : std::to_string(this->query_ed[swap][idx]).data(),
match_type.data(), int(this->var_quals[idx]), sc_idx,
int(this->sync_group[swap][idx]), this->phase_sets[idx], phase_block,
query ? (phase_switch ? "1" : "0") : "." ,
Expand Down
4 changes: 2 additions & 2 deletions src/variant.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,8 @@ class ctgVariants {
// or for call, min quality in sync group
std::vector< std::vector<float> > callq;
// percentage reduction in edit dist of sync group with variants
std::vector< std::vector<int> > old_ed;
std::vector< std::vector<int> > new_ed;
std::vector< std::vector<int> > ref_ed;
std::vector< std::vector<int> > query_ed;
std::vector< std::vector<float> > credit;
};

Expand Down

0 comments on commit d5d6ae9

Please sign in to comment.