Skip to content

Commit

Permalink
Merge pull request #1652 from moj-analytical-services/issue_1651
Browse files Browse the repository at this point in the history
Fix issue 1651 - comparison viewer bars sorted improperly
  • Loading branch information
RobinL authored Oct 18, 2023
2 parents 6b784a9 + f7a7c03 commit 24008da
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 13 deletions.
27 changes: 14 additions & 13 deletions splink/files/splink_vis_utils/splink_vis_utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -10288,7 +10288,7 @@ ${splink_vis_utils.comparison_column_table(selected_edge, ss)}`;
{
encoding: {
color: {
field: "match_probability",
field: "avg_match_probability",
scale: {
domain: [
0,
Expand Down Expand Up @@ -10317,14 +10317,16 @@ ${splink_vis_utils.comparison_column_table(selected_edge, ss)}`;
type: "quantitative"
},
{
field: "match_probability",
field: "avg_match_probability",
type: "quantitative",
format: ",.1%"
format: ",.1%",
title: "Match probability"
},
{
field: "match_weight",
type: "quantitative",
format: ",.2f"
format: ",.2f",
title: "Match weight"
},
{
field: "sum_matches",
Expand All @@ -10335,17 +10337,12 @@ ${splink_vis_utils.comparison_column_table(selected_edge, ss)}`;
field: "proportion_of_comparisons",
type: "quantitative",
format: ",.1%"
},
{
field: "cumulative_comparisons",
type: "quantitative",
format: ",.1%"
}
],
x: {
field: "gam_concat",
sort: {
field: "match_weight",
field: "sort_avg_match_weight",
op: "sum",
order: "ascending"
},
Expand Down Expand Up @@ -10544,7 +10541,7 @@ ${splink_vis_utils.comparison_column_table(selected_edge, ss)}`;
{
encoding: {
color: {
field: "match_probability",
field: "avg_match_probability",
legend: null,
scale: {
domain: [
Expand All @@ -10563,7 +10560,7 @@ ${splink_vis_utils.comparison_column_table(selected_edge, ss)}`;
x: {
field: "gam_concat",
sort: {
field: "match_weight",
field: "sort_avg_match_weight",
op: "sum",
order: "ascending"
},
Expand Down Expand Up @@ -10636,10 +10633,13 @@ ${splink_vis_utils.comparison_column_table(selected_edge, ss)}`;
let sort_field;
data.forEach((d) => {
d.sum_matches = d.match_probability * d.count;

const bf = Math.pow(2, d.sort_avg_match_weight);
d.avg_match_probability = bf / (1 + bf);
});
if (sort_bars == "sort_match_weight") {
data.sort(sort_match_weight);
sort_field = "match_weight";
sort_field = "sort_avg_match_weight";
}
if (sort_bars == "sort_sum_matches") {

Expand Down Expand Up @@ -10693,6 +10693,7 @@ ${splink_vis_utils.comparison_column_table(selected_edge, ss)}`;
row["bayes_factor"] = d[`bf_${data_col_name}`];
const log2 = Math.log2;
row["match_weight"] = log2(d[`bf_${data_col_name}`]);
row["sort_avg_match_weight"] = d["sort_avg_match_weight"];

row["label_for_charts"] = settings_col.comparison_level_lookup[row["gam_value"]]["label_for_charts"];
row["sql_condition"] = settings_col.comparison_level_lookup[row["gam_value"]]["sql_condition"];
Expand Down
11 changes: 11 additions & 0 deletions splink/splink_comparison_viewer.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from jinja2 import Template

from .misc import EverythingEncoder, read_resource
from .predict import _combine_prior_and_bfs

# https://stackoverflow.com/questions/39740632/python-type-hinting-without-cyclic-imports
if TYPE_CHECKING:
Expand All @@ -26,11 +27,21 @@ def row_examples(linker: Linker, example_rows_per_category=2):

gam_concat = " || ',' || ".join(gamma_columns)

# See https://github.com/moj-analytical-services/splink/issues/1651
# This ensures we have an average match weight that isn't affected by tf
bf_columns_no_tf = [c._bf_column_name for c in linker._settings_obj.comparisons]

p = linker._settings_obj._probability_two_random_records_match
bf_final_no_tf = _combine_prior_and_bfs(
p, bf_terms=bf_columns_no_tf, sql_infinity_expr=linker._infinity_expression
)[0]

sql = f"""
select
*,
{uid_expr} as rec_comparison_id,
{gam_concat} as gam_concat,
log2({bf_final_no_tf}) as sort_avg_match_weight,
random() as rand_order
from __splink__df_predict
"""
Expand Down

0 comments on commit 24008da

Please sign in to comment.