Skip to content

Commit

Permalink
rename arg
Browse files Browse the repository at this point in the history
  • Loading branch information
RobinL committed May 15, 2024
1 parent 9d2f82b commit 59bdc38
Show file tree
Hide file tree
Showing 5 changed files with 20 additions and 18 deletions.
8 changes: 4 additions & 4 deletions splink/analyse_blocking.py
Original file line number Diff line number Diff line change
Expand Up @@ -479,15 +479,15 @@ def count_comparisons_from_blocking_rule(
compute_post_filter_count: bool = True,
max_rows_limit: int = int(1e9),
) -> dict[str, Union[int, str]]:
blocking_rule = to_blocking_rule_creator(blocking_rule_creator).get_blocking_rule(
db_api.sql_dialect.name
)
blocking_rule_creator = to_blocking_rule_creator(
blocking_rule_creator
).get_blocking_rule(db_api.sql_dialect.name)

splink_df_dict = db_api.register_multiple_tables(table_or_tables)

return _count_comparisons_generated_from_blocking_rule(
splink_df_dict=splink_df_dict,
blocking_rule=blocking_rule,
blocking_rule=blocking_rule_creator,
link_type=link_type,
db_api=db_api,
compute_post_filter_count=compute_post_filter_count,
Expand Down
24 changes: 13 additions & 11 deletions tests/test_analyse_blocking.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,62 +49,64 @@ def test_analyse_blocking_slow_methodology(test_helpers, dialect):
}

res_dict = count_comparisons_from_blocking_rule(
table_or_tables=df_1, blocking_rule="1=1", **args
table_or_tables=df_1, blocking_rule_creator="1=1", **args
)
res = res_dict["number_of_comparisons_to_be_scored_post_filter_conditions"]
assert res == 4 * 3 / 2

res_dict = count_comparisons_from_blocking_rule(
table_or_tables=df_1, blocking_rule=block_on("first_name"), **args
table_or_tables=df_1, blocking_rule_creator=block_on("first_name"), **args
)

res = res_dict["number_of_comparisons_to_be_scored_post_filter_conditions"]
assert res == 1

args["link_type"] = "link_only"
res_dict = count_comparisons_from_blocking_rule(
table_or_tables=[df_1, df_2], blocking_rule="1=1", **args
table_or_tables=[df_1, df_2], blocking_rule_creator="1=1", **args
)
res = res_dict["number_of_comparisons_to_be_scored_post_filter_conditions"]

assert res == 4 * 3

res_dict = count_comparisons_from_blocking_rule(
table_or_tables=[df_1, df_2], blocking_rule=block_on("surname"), **args
table_or_tables=[df_1, df_2], blocking_rule_creator=block_on("surname"), **args
)
res = res_dict["number_of_comparisons_to_be_scored_post_filter_conditions"]
assert res == 1

res_dict = count_comparisons_from_blocking_rule(
table_or_tables=[df_1, df_2], blocking_rule=block_on("first_name"), **args
table_or_tables=[df_1, df_2],
blocking_rule_creator=block_on("first_name"),
**args,
)
res = res_dict["number_of_comparisons_to_be_scored_post_filter_conditions"]
assert res == 3

res_dict = count_comparisons_from_blocking_rule(
table_or_tables=[df_1, df_2, df_3], blocking_rule="1=1", **args
table_or_tables=[df_1, df_2, df_3], blocking_rule_creator="1=1", **args
)
res = res_dict["number_of_comparisons_to_be_scored_post_filter_conditions"]
assert res == 4 * 3 + 4 * 2 + 2 * 3

args["link_type"] = "link_and_dedupe"
res_dict = count_comparisons_from_blocking_rule(
table_or_tables=[df_1, df_2], blocking_rule="1=1", **args
table_or_tables=[df_1, df_2], blocking_rule_creator="1=1", **args
)
res = res_dict["number_of_comparisons_to_be_scored_post_filter_conditions"]
expected = 4 * 3 + (4 * 3 / 2) + (3 * 2 / 2)
assert res == expected

rule = "l.first_name = r.first_name and l.surname = r.surname"
res_dict = count_comparisons_from_blocking_rule(
table_or_tables=[df_1, df_2], blocking_rule=rule, **args
table_or_tables=[df_1, df_2], blocking_rule_creator=rule, **args
)
res = res_dict["number_of_comparisons_to_be_scored_post_filter_conditions"]
assert res == 1

rule = block_on("first_name", "surname")
res_dict = count_comparisons_from_blocking_rule(
table_or_tables=[df_1, df_2], blocking_rule=rule, **args
table_or_tables=[df_1, df_2], blocking_rule_creator=rule, **args
)
res = res_dict["number_of_comparisons_to_be_scored_post_filter_conditions"]
assert res == 1
Expand Down Expand Up @@ -416,7 +418,7 @@ def test_analyse_blocking_fast_methodology_edge_cases():
for br in blocking_rules:
res_dict = count_comparisons_from_blocking_rule(
table_or_tables=df,
blocking_rule=br,
blocking_rule_creator=br,
link_type="dedupe_only",
db_api=db_api,
unique_id_column_name="unique_id",
Expand Down Expand Up @@ -453,7 +455,7 @@ def test_analyse_blocking_fast_methodology_edge_cases():
for br in blocking_rules:
res_dict = count_comparisons_from_blocking_rule(
table_or_tables=[df_l, df_r],
blocking_rule=br,
blocking_rule_creator=br,
link_type="link_only",
db_api=db_api,
unique_id_column_name="unique_id",
Expand Down
2 changes: 1 addition & 1 deletion tests/test_full_example_duckdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def test_full_example_duckdb(tmp_path):

count_comparisons_from_blocking_rule(
table_or_tables=df,
blocking_rule='l.first_name = r.first_name and l."SUR name" = r."SUR name"',
blocking_rule_creator='l.first_name = r.first_name and l."SUR name" = r."SUR name"', # noqa: E501
link_type="dedupe_only",
db_api=db_api,
unique_id_column_name="unique_id",
Expand Down
2 changes: 1 addition & 1 deletion tests/test_full_example_postgres.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def test_full_example_postgres(tmp_path, pg_engine):

count_comparisons_from_blocking_rule(
table_or_tables=df,
blocking_rule='l.first_name = r.first_name and l."surname" = r."surname"',
blocking_rule_creator='l.first_name = r.first_name and l."surname" = r."surname"', # noqa: E501
link_type="dedupe_only",
db_api=db_api,
unique_id_column_name="unique_id",
Expand Down
2 changes: 1 addition & 1 deletion tests/test_total_comparison_count.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ def make_dummy_frame(row_count):

res_dict = count_comparisons_from_blocking_rule(
table_or_tables=dfs,
blocking_rule="1=1",
blocking_rule_creator="1=1",
link_type=link_type,
db_api=db_api,
unique_id_column_name="unique_id",
Expand Down

0 comments on commit 59bdc38

Please sign in to comment.