From a59041eac486d36319ebc6def8d14ddca8d6a283 Mon Sep 17 00:00:00 2001 From: Robin Linacre Date: Wed, 1 May 2024 08:43:01 +0100 Subject: [PATCH] blocking redesign initial commit --- splink/blocking_analysis.py | 39 ++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 20 deletions(-) diff --git a/splink/blocking_analysis.py b/splink/blocking_analysis.py index 6a99f8f6e8..c378a91bc1 100644 --- a/splink/blocking_analysis.py +++ b/splink/blocking_analysis.py @@ -6,9 +6,10 @@ from .database_api import DatabaseAPI from .misc import ensure_is_iterable -blocking_rule_or_rules_type = ( - Union[BlockingRuleCreator, str, dict, List[Union[BlockingRuleCreator, str, dict]]], -) +blocking_rule_or_rules_type = Union[ + BlockingRuleCreator, str, dict, List[Union[BlockingRuleCreator, str, dict]] +] + link_type_type = Literal["link_only", "link_and_dedupe", "dedupe_only"] @@ -16,24 +17,22 @@ def cumulative_comparisons_from_blocking_rules(): pass -def count_comparisons_from_blocking_rules( +def count_comparisons_from_blocking_rule( table_or_tables, *, - blocking_rule_or_rules: blocking_rule_or_rules_type, + blocking_rule: Union[BlockingRuleCreator, str, dict], link_type: link_type_type, - database_api: DatabaseAPI, - method: str = "comparisons_generated", + db_api: DatabaseAPI, + compute_post_filter_count: bool = False, + post_filter_limit: int = 1e9, ): - # Something like this? - blocking_rules = [ - to_blocking_rule_creator(br).get_blocking_rule(database_api.sql_dialect) - for br in ensure_is_iterable(blocking_rule_or_rules) - ] - - total = 0 - for blocking_rule in blocking_rules: - total = total + count_comparisons_from_blocking_rule_pre_filter_conditions( - table_or_tables, blocking_rule, link_type, database_api, method - ) - - return total + blocking_rule = to_blocking_rule_creator(blocking_rule).get_blocking_rule( + db_api.sql_dialect + ) + + pre_filter_total = count_comparisons_from_blocking_rule_pre_filter_conditions() + + return { + "number_of_comparison_pre_filter_conditions": pre_filter_total, + "number_of_comparison_post_filter_conditions": 0, + }