diff --git a/warehouse/models/staging/payments/littlepay/stg_littlepay__authorisations.sql b/warehouse/models/staging/payments/littlepay/stg_littlepay__authorisations.sql index 66a6610017..4204ca3a6a 100644 --- a/warehouse/models/staging/payments/littlepay/stg_littlepay__authorisations.sql +++ b/warehouse/models/staging/payments/littlepay/stg_littlepay__authorisations.sql @@ -32,8 +32,15 @@ clean_columns_and_dedupe_files AS ( ELSE {{ extract_littlepay_filename_date() }} END AS littlepay_export_date, ts, + -- hash all content not generated by us to enable deduping full dup rows + -- hashing at this step will preserve distinction between nulls and empty strings in case that is meaningful upstream + {{ dbt_utils.generate_surrogate_key(['participant_id', + 'aggregation_id', 'acquirer_id', 'request_type', 'transaction_amount', 'currency_code', + 'retrieval_reference_number', 'littlepay_reference_number', 'external_reference_number', + 'response_code', 'status', 'authorisation_date_time_utc']) }} AS content_hash, FROM source -- remove duplicate instances of the same file (file defined as date-level update from LP) + -- use dense rank instead of row number because we need to allow all rows from a given file to be included (allow ties) QUALIFY DENSE_RANK() OVER (PARTITION BY littlepay_export_date ORDER BY littlepay_export_ts DESC, ts DESC) = 1 ), @@ -55,19 +62,15 @@ stg_littlepay__authorisations AS ( _line_number, `instance`, extract_filename, - - littlepay_export_ts, - - CASE - WHEN extract_filename = "24jan_datafeed.psv" THEN DATE '2023-01-24' - WHEN extract_filename = "25jan_datafeed.psv" THEN DATE '2023-01-25' - ELSE littlepay_export_date - END AS littlepay_export_date, + littlepay_export_date, ts, {{ dbt_utils.generate_surrogate_key(['littlepay_export_date', '_line_number', 'instance']) }} AS _key, - {{ dbt_utils.generate_surrogate_key(['aggregation_id', 'retrieval_reference_number', 'authorisation_date_time_utc']) }} AS _payments_key, + {{ dbt_utils.generate_surrogate_key(['aggregation_id', 'authorisation_date_time_utc']) }} AS _payments_key, FROM clean_columns_and_dedupe_files + -- remove full duplicate rows + QUALIFY ROW_NUMBER() + OVER (PARTITION BY content_hash ORDER BY littlepay_export_ts DESC, _line_number ASC) = 1 ) SELECT * FROM stg_littlepay__authorisations