diff --git a/warehouse/models/mart/transit_database/_mart_transit_database.yml b/warehouse/models/mart/transit_database/_mart_transit_database.yml index 191d8557ed..8e2c80afc5 100644 --- a/warehouse/models/mart/transit_database/_mart_transit_database.yml +++ b/warehouse/models/mart/transit_database/_mart_transit_database.yml @@ -79,10 +79,14 @@ models: description: | The organization's ID in the National Transit Database / FTA agency ID number. Data entry processes / sources of truth for this field have changed over time. - As of May 2023, the source is the `raw_ntd_id` field in the Airtable `organizations` table; + As of May 2023, the source is the `raw_ntd_id` field in the Airtable `organizations` table + which is derived from 2021 ntd ids; for records before May 2023, sources were some combination of the `NTD agency info` table in Airtable and the `ntd_agency_to_organizations` CSV seed file in this dbt project (now renamed to `_deprecated__ntd_agency_to_organizations`). + - name: ntd_id_2022 + description: | + Just pulls out the 5 digit component of the raw_ntd_id - name: name description: Organization name - name: organization_type diff --git a/warehouse/models/mart/transit_database/dim_organizations.sql b/warehouse/models/mart/transit_database/dim_organizations.sql index 05387a44e5..353802be84 100644 --- a/warehouse/models/mart/transit_database/dim_organizations.sql +++ b/warehouse/models/mart/transit_database/dim_organizations.sql @@ -32,9 +32,11 @@ dim_organizations AS ( WHEN _valid_from >= '2023-05-23' THEN raw_ntd_id ELSE ntd_to_org.ntd_id END AS ntd_id, + IF(LENGTH(ntd_id) >= 10, + SUBSTR(ntd_id, -5), + ntd_id) AS ntd_id_2022, public_currently_operating, public_currently_operating_fixed_route, - _is_current, _valid_from, _valid_to