From 95a92ff6f74fda6419b5b95add7c5c103e0a1975 Mon Sep 17 00:00:00 2001 From: Robin Linacre Date: Thu, 16 May 2024 12:35:13 +0100 Subject: [PATCH] fix bugs introduced by none inputcolumn --- splink/internals/blocking_analysis.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/splink/internals/blocking_analysis.py b/splink/internals/blocking_analysis.py index 4ba02a1165..9807fb4b51 100644 --- a/splink/internals/blocking_analysis.py +++ b/splink/internals/blocking_analysis.py @@ -203,11 +203,16 @@ def _row_counts_per_input_table( *, splink_df_dict: dict[str, "SplinkDataFrame"], link_type: backend_link_type_options, - source_dataset_column_name: Optional[str], + source_dataset_input_column: Optional[InputColumn], db_api: DatabaseAPISubClass, ) -> "SplinkDataFrame": pipeline = CTEPipeline() + if source_dataset_input_column: + source_dataset_column_name = source_dataset_input_column.name + else: + source_dataset_column_name = None + sql = vertically_concatenate_sql( splink_df_dict, salting_required=False, @@ -334,7 +339,7 @@ def _cumulative_comparisons_to_be_scored_from_blocking_rules( rc = _row_counts_per_input_table( splink_df_dict=splink_df_dict, link_type=link_type, - source_dataset_column_name=source_dataset_input_column.name, + source_dataset_input_column=source_dataset_input_column, db_api=db_api, ).as_record_dict() @@ -364,7 +369,7 @@ def _cumulative_comparisons_to_be_scored_from_blocking_rules( input_columns = [source_dataset_input_column, unique_id_input_column] sql_select_expr = ",".join( - [item for c in input_columns for item in c.l_r_names_as_l_r] + [item for c in input_columns if c is not None for item in c.l_r_names_as_l_r] ) blocking_input_tablename_l = "__splink__df_concat"