Skip to content

Commit

Permalink
Create NTD external tables for 2022 API data (#3465)
Browse files Browse the repository at this point in the history
[#3403]

Co-authored-by: Erika Pacheco <[email protected]>
  • Loading branch information
erikamov and Erika Pacheco authored Sep 20, 2024
1 parent 62a61fd commit d3cffd6
Show file tree
Hide file tree
Showing 34 changed files with 2,318 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
operator: operators.ExternalTable
bucket: gs://calitp-ntd-api-products
source_objects:
- "breakdowns/2022/*.jsonl.gz"
source_format: NEWLINE_DELIMITED_JSON
use_bq_client: true
hive_options:
mode: CUSTOM
require_partition_filter: false
source_uri_prefix: "breakdowns/2022/{dt:DATE}/{execution_ts:TIMESTAMP}"
destination_project_dataset_table: "external_ntd__annual_reporting.2022__breakdowns"
prefix_bucket: false
post_hook: SELECT * FROM `{{ get_project_id() }}`.external_ntd__annual_reporting.2022__breakdowns LIMIT 1;
schema_fields:
- name: agency
type: STRING
- name: agency_voms
type: NUMERIC
- name: city
type: STRING
- name: major_mechanical_failures
type: NUMERIC
- name: mode
type: STRING
- name: mode_name
type: STRING
- name: mode_voms
type: NUMERIC
- name: ntd_id
type: NUMERIC
- name: organization_type
type: STRING
- name: other_mechanical_failures
type: NUMERIC
- name: primary_uza_population
type: NUMERIC
- name: report_year
type: NUMERIC
- name: reporter_type
type: STRING
- name: state
type: STRING
- name: total_mechanical_failures
type: NUMERIC
- name: train_miles
type: NUMERIC
- name: train_revenue_miles
type: NUMERIC
- name: type_of_service
type: STRING
- name: uace_code
type: STRING
- name: uza_name
type: STRING
- name: vehicle_passenger_car_miles
type: NUMERIC
- name: vehicle_passenger_car_revenue
type: NUMERIC
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
operator: operators.ExternalTable
bucket: gs://calitp-ntd-api-products
source_objects:
- "breakdowns_by_agency/2022/*.jsonl.gz"
source_format: NEWLINE_DELIMITED_JSON
use_bq_client: true
hive_options:
mode: CUSTOM
require_partition_filter: false
source_uri_prefix: "breakdowns_by_agency/2022/{dt:DATE}/{execution_ts:TIMESTAMP}"
destination_project_dataset_table: "external_ntd__annual_reporting.2022__breakdowns_by_agency"
prefix_bucket: false
post_hook: SELECT * FROM `{{ get_project_id() }}`.external_ntd__annual_reporting.2022__breakdowns_by_agency LIMIT 1;
schema_fields:
- name: count_major_mechanical_failures_questionable
type: NUMERIC
- name: count_other_mechanical_failures_questionable
type: NUMERIC
- name: count_total_mechanical_failures_questionable
type: NUMERIC
- name: count_train_miles_questionable
type: NUMERIC
- name: count_train_revenue_miles_questionable
type: NUMERIC
- name: count_vehicle_passenger_car_miles_questionable
type: NUMERIC
- name: max_agency
type: STRING
- name: max_agency_voms
type: NUMERIC
- name: max_city
type: STRING
- name: max_organization_type
type: STRING
- name: max_primary_uza_population
type: NUMERIC
- name: max_reporter_type
type: STRING
- name: max_state
type: STRING
- name: max_uace_code
type: STRING
- name: max_uza_name
type: STRING
- name: ntd_id
type: NUMERIC
- name: report_year
type: NUMERIC
- name: sum_major_mechanical_failures
type: NUMERIC
- name: sum_other_mechanical_failures
type: NUMERIC
- name: sum_total_mechanical_failures
type: NUMERIC
- name: sum_train_miles
type: NUMERIC
- name: sum_train_revenue_miles
type: NUMERIC
- name: sum_vehicle_passenger_car_miles
type: NUMERIC
- name: sum_vehicle_passenger_car_revenue
type: NUMERIC
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
operator: operators.ExternalTable
bucket: gs://calitp-ntd-api-products
source_objects:
- "capital_expenses_by_capital_use/2022/*.jsonl.gz"
source_format: NEWLINE_DELIMITED_JSON
use_bq_client: true
hive_options:
mode: CUSTOM
require_partition_filter: false
source_uri_prefix: "capital_expenses_by_capital_use/2022/{dt:DATE}/{execution_ts:TIMESTAMP}"
destination_project_dataset_table: "external_ntd__annual_reporting.2022__capital_expenses_by_capital_use"
prefix_bucket: false
post_hook: SELECT * FROM `{{ get_project_id() }}`.external_ntd__annual_reporting.2022__capital_expenses_by_capital_use LIMIT 1;
schema_fields:
- name: administrative_buildings
type: STRING
- name: agency
type: STRING
- name: agency_voms
type: NUMERIC
- name: city
type: STRING
- name: communication_information
type: STRING
- name: fare_collection_equipment
type: STRING
- name: form_type
type: STRING
- name: guideway
type: STRING
- name: maintenance_buildings
type: STRING
- name: mode_name
type: STRING
- name: mode_voms
type: NUMERIC
- name: modecd
type: STRING
- name: ntd_id
type: NUMERIC
- name: organization_type
type: STRING
- name: other
type: STRING
- name: other_vehicles
type: STRING
- name: passenger_vehicles
type: STRING
- name: reduced_reporter
type: STRING
- name: report_year
type: NUMERIC
- name: reporter_type
type: STRING
- name: state
type: STRING
- name: stations
type: STRING
- name: total
type: NUMERIC
- name: typeofservicecd
type: STRING
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
operator: operators.ExternalTable
bucket: gs://calitp-ntd-api-products
source_objects:
- "capital_expenses_by_mode/2022/*.jsonl.gz"
source_format: NEWLINE_DELIMITED_JSON
use_bq_client: true
hive_options:
mode: CUSTOM
require_partition_filter: false
source_uri_prefix: "capital_expenses_by_mode/2022/{dt:DATE}/{execution_ts:TIMESTAMP}"
destination_project_dataset_table: "external_ntd__annual_reporting.2022__capital_expenses_by_mode"
prefix_bucket: false
post_hook: SELECT * FROM `{{ get_project_id() }}`.external_ntd__annual_reporting.2022__capital_expenses_by_mode LIMIT 1;
schema_fields:
- name: count_administrative_buildings_q
type: NUMERIC
- name: count_communication_information_q
type: NUMERIC
- name: count_fare_collection_equipment_q
type: NUMERIC
- name: count_maintenance_buildings_q
type: NUMERIC
- name: count_other_q
type: NUMERIC
- name: count_other_vehicles_q
type: NUMERIC
- name: count_passenger_vehicles_q
type: NUMERIC
- name: count_reduced_reporter_q
type: NUMERIC
- name: count_stations_q
type: NUMERIC
- name: max_agency
type: STRING
- name: max_agency_voms
type: STRING
- name: max_city
type: STRING
- name: max_mode_name
type: STRING
- name: max_organization_type
type: STRING
- name: max_reporter_type
type: STRING
- name: max_state
type: STRING
- name: modecd
type: STRING
- name: ntd_id
type: NUMERIC
- name: report_year
type: NUMERIC
- name: sum_administrative_buildings
type: NUMERIC
- name: sum_communication_information
type: NUMERIC
- name: sum_fare_collection_equipment
type: NUMERIC
- name: sum_guideway
type: NUMERIC
- name: sum_maintenance_buildings
type: NUMERIC
- name: sum_other
type: NUMERIC
- name: sum_other_vehicles
type: NUMERIC
- name: sum_passenger_vehicles
type: NUMERIC
- name: sum_reduced_reporter
type: NUMERIC
- name: sum_stations
type: NUMERIC
- name: sum_total
type: NUMERIC
- name: typeofservicecd
type: STRING
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
operator: operators.ExternalTable
bucket: gs://calitp-ntd-api-products
source_objects:
- "capital_expenses_for_existing_service/2022/*.jsonl.gz"
source_format: NEWLINE_DELIMITED_JSON
use_bq_client: true
hive_options:
mode: CUSTOM
require_partition_filter: false
source_uri_prefix: "capital_expenses_for_existing_service/2022/{dt:DATE}/{execution_ts:TIMESTAMP}"
destination_project_dataset_table: "external_ntd__annual_reporting.2022__capital_expenses_for_existing_service"
prefix_bucket: false
post_hook: SELECT * FROM `{{ get_project_id() }}`.external_ntd__annual_reporting.2022__capital_expenses_for_existing_service LIMIT 1;
schema_fields:
- name: form_type
type: STRING
- name: max_agency
type: STRING
- name: max_agency_voms
type: STRING
- name: max_city
type: STRING
- name: max_organization_type
type: STRING
- name: max_primary_uza_population
type: STRING
- name: max_reporter_type
type: STRING
- name: max_state
type: STRING
- name: max_uace_code
type: STRING
- name: max_uza_name
type: STRING
- name: ntd_id
type: NUMERIC
- name: report_year
type: NUMERIC
- name: sum_administrative_buildings
type: NUMERIC
- name: sum_communication_information
type: NUMERIC
- name: sum_fare_collection_equipment
type: NUMERIC
- name: sum_guideway
type: NUMERIC
- name: sum_maintenance_buildings
type: NUMERIC
- name: sum_other
type: NUMERIC
- name: sum_other_vehicles
type: NUMERIC
- name: sum_passenger_vehicles
type: NUMERIC
- name: sum_reduced_reporter
type: NUMERIC
- name: sum_stations
type: NUMERIC
- name: sum_total
type: NUMERIC
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
operator: operators.ExternalTable
bucket: gs://calitp-ntd-api-products
source_objects:
- "capital_expenses_for_expansion_of_service/2022/*.jsonl.gz"
source_format: NEWLINE_DELIMITED_JSON
use_bq_client: true
hive_options:
mode: CUSTOM
require_partition_filter: false
source_uri_prefix: "capital_expenses_for_expansion_of_service/2022/{dt:DATE}/{execution_ts:TIMESTAMP}"
destination_project_dataset_table: "external_ntd__annual_reporting.2022__capital_expenses_for_expansion_of_service"
prefix_bucket: false
post_hook: SELECT * FROM `{{ get_project_id() }}`.external_ntd__annual_reporting.2022__capital_expenses_for_expansion_of_service LIMIT 1;
schema_fields:
- name: form_type
type: STRING
- name: max_agency
type: STRING
- name: max_agency_voms
type: STRING
- name: max_city
type: STRING
- name: max_organization_type
type: STRING
- name: max_primary_uza_population
type: NUMERIC
- name: max_reporter_type
type: STRING
- name: max_state
type: STRING
- name: max_uace_code
type: STRING
- name: max_uza_name
type: STRING
- name: ntd_id
type: NUMERIC
- name: report_year
type: NUMERIC
- name: sum_administrative_buildings
type: NUMERIC
- name: sum_communication_information
type: NUMERIC
- name: sum_fare_collection_equipment
type: NUMERIC
- name: sum_guideway
type: NUMERIC
- name: sum_maintenance_buildings
type: NUMERIC
- name: sum_other
type: NUMERIC
- name: sum_other_vehicles
type: NUMERIC
- name: sum_passenger_vehicles
type: NUMERIC
- name: sum_reduced_reporter
type: NUMERIC
- name: sum_stations
type: NUMERIC
- name: sum_total
type: NUMERIC
Loading

0 comments on commit d3cffd6

Please sign in to comment.