Skip to content

Commit

Permalink
blocking redesign initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
RobinL committed May 1, 2024
1 parent 690d2d4 commit a59041e
Showing 1 changed file with 19 additions and 20 deletions.
39 changes: 19 additions & 20 deletions splink/blocking_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,34 +6,33 @@
from .database_api import DatabaseAPI
from .misc import ensure_is_iterable

blocking_rule_or_rules_type = (
Union[BlockingRuleCreator, str, dict, List[Union[BlockingRuleCreator, str, dict]]],
)
blocking_rule_or_rules_type = Union[
BlockingRuleCreator, str, dict, List[Union[BlockingRuleCreator, str, dict]]
]

link_type_type = Literal["link_only", "link_and_dedupe", "dedupe_only"]


def cumulative_comparisons_from_blocking_rules():
pass


def count_comparisons_from_blocking_rules(
def count_comparisons_from_blocking_rule(
table_or_tables,
*,
blocking_rule_or_rules: blocking_rule_or_rules_type,
blocking_rule: Union[BlockingRuleCreator, str, dict],
link_type: link_type_type,
database_api: DatabaseAPI,
method: str = "comparisons_generated",
db_api: DatabaseAPI,
compute_post_filter_count: bool = False,
post_filter_limit: int = 1e9,
):
# Something like this?
blocking_rules = [
to_blocking_rule_creator(br).get_blocking_rule(database_api.sql_dialect)
for br in ensure_is_iterable(blocking_rule_or_rules)
]

total = 0
for blocking_rule in blocking_rules:
total = total + count_comparisons_from_blocking_rule_pre_filter_conditions(
table_or_tables, blocking_rule, link_type, database_api, method
)

return total
blocking_rule = to_blocking_rule_creator(blocking_rule).get_blocking_rule(
db_api.sql_dialect
)

pre_filter_total = count_comparisons_from_blocking_rule_pre_filter_conditions()

return {
"number_of_comparison_pre_filter_conditions": pre_filter_total,
"number_of_comparison_post_filter_conditions": 0,
}

0 comments on commit a59041e

Please sign in to comment.