updates to acceptance criteria scripts

BayAreaMetro · Jan 26, 2024 · 4249850 · 4249850
1 parent b4ffbfe
commit 4249850
Show file tree

Hide file tree

Showing 4 changed files with 207 additions and 148 deletions.
diff --git a/tm2py/acceptance/acceptance.py b/tm2py/acceptance/acceptance.py
@@ -165,6 +165,7 @@ def _make_roadway_network_comparisons(self):
         s_bridge_df = self.s.simulated_bridge_details_df.copy()
 
         o_df["time_period"] = o_df.time_period.str.lower()
+        #o_df = o_df.drop(columns = ["standard_link_id"]) 
         s_trim_df = s_df[
             s_df["ft"] <= self.MAX_FACILITY_TYPE_FOR_ROADWAY_COMPARISONS
         ].copy()
@@ -315,6 +316,8 @@ def _make_transit_network_comparisons(self):
             .reset_index()
         )
 
+        sim_df = self._fix_technology_labels(sim_df, "technology")
+
         rail_df = pd.merge(
             sim_df,
             obs_df,
@@ -334,6 +337,8 @@ def _make_transit_network_comparisons(self):
             )
         ].copy()
 
+        sim_df = self._fix_technology_labels(sim_df, "technology")
+
         non_df = pd.merge(
             sim_df,
             obs_df,
@@ -342,7 +347,7 @@ def _make_transit_network_comparisons(self):
             right_on=["standard_line_name", "daily_line_name", "time_period"],
         )
 
-        boards_df = pd.concat([rail_df, non_df])
+        boards_df = pd.concat([rail_df, non_df], axis = "rows",ignore_index=True)
 
         boards_df["operator"] = np.where(
             boards_df["operator"].isnull(),
@@ -357,22 +362,23 @@ def _make_transit_network_comparisons(self):
 
         # step 3 -- create a daily shape
         df = pd.DataFrame(self.s.simulated_transit_segments_gdf).copy()
-        am_shape_df = df[~(df["line"].str.contains("pnr_"))].reset_index().copy()
+        am_shape_df = df[~(df["LINE_ID"].str.contains("pnr_"))].reset_index().copy()
         am_shape_df = self.c.aggregate_line_names_across_time_of_day(
-            am_shape_df, "line"
+            am_shape_df, "LINE_ID"
         )
         b_df = (
             am_shape_df.groupby("daily_line_name")
-            .agg({"line": "first"})
+            .agg({"LINE_ID": "first"})
             .reset_index()
             .copy()
         )
         c_df = pd.DataFrame(
             self.s.simulated_transit_segments_gdf[
-                self.s.simulated_transit_segments_gdf["line"].isin(b_df["line"])
+                self.s.simulated_transit_segments_gdf["LINE_ID"].isin(b_df["LINE_ID"])
             ].copy()
         )
-        daily_shape_df = pd.merge(c_df, b_df, how="left", on="line")
+        daily_shape_df = pd.merge(c_df, b_df, how="left", on="LINE_ID")
+        daily_shape_df = daily_shape_df.rename(columns={"INODE":"emme_a_node_id","JNODE":"emme_b_node_id"})
 
         # step 4 -- join the shapes to the boardings
         # for daily, join boardings to shape, as I care about the boardings more than the daily shapes
@@ -393,16 +399,16 @@ def _make_transit_network_comparisons(self):
         am_join_df = pd.merge(
             am_shape_df,
             boards_df[boards_df["time_period"] == "am"],
-            how="left",
+            how="outer",
             right_on=["line_name", "daily_line_name"],
-            left_on=["line", "daily_line_name"],
+            left_on=["LINE_ID", "daily_line_name"],
         )
         am_join_df["model_line_id"] = am_join_df["line_name"]
         am_join_df["time_period"] = np.where(
             am_join_df["time_period"].isnull(), "am", am_join_df["time_period"]
         )
         am_join_df = self.s._get_operator_name_from_line_name(
-            am_join_df, "line", "temp_operator"
+            am_join_df, "LINE_ID", "temp_operator"
         )
         am_join_df["operator"] = np.where(
             am_join_df["operator"].isnull()
@@ -531,48 +537,7 @@ def _make_home_work_flow_comparisons(self):
 
     def _make_zero_vehicle_household_comparisons(self):
 
-        # prepare simulated data
-        a_df = (
-            pd.merge(
-                self.s.simulated_zero_vehicle_hhs_df,
-                self.s.simulated_maz_data_df[["MAZ_ORIGINAL", "MAZSEQ"]],
-                left_on="maz",
-                right_on="MAZSEQ",
-                how="left",
-            )
-            .drop(columns=["maz", "MAZSEQ"])
-            .rename(columns={"MAZ_ORIGINAL": "maz"})
-        )
-
-        # TODO: probably better if this is done in Simulated, to avoid using Canonical in this class
-        a_df = pd.merge(
-            a_df,
-            self.c.census_2010_to_maz_crosswalk_df,
-            how="left",
-            on="maz",
-        )
-
-        a_df["product"] = a_df["simulated_zero_vehicle_share"] * a_df["maz_share"]
-
-        b_df = (
-            a_df.groupby("blockgroup")
-            .agg({"product": "sum", "simulated_households": "sum"})
-            .reset_index()
-            .rename(columns={"product": "simulated_zero_vehicle_share"})
-        )
-        b_df["tract"] = b_df["blockgroup"].astype("str").str.slice(stop=-1)
-        b_df["product"] = (
-            b_df["simulated_zero_vehicle_share"] * b_df["simulated_households"]
-        )
-
-        c_df = (
-            b_df.groupby("tract")
-            .agg({"product": "sum", "simulated_households": "sum"})
-            .reset_index()
-        )
-        c_df["simulated_zero_vehicle_share"] = (
-            c_df["product"] / c_df["simulated_households"]
-        )
+        c_df = self.s.reduced_simulated_zero_vehicle_hhs_df
 
         # prepare the observed data
         join_df = self.o.census_2017_zero_vehicle_hhs_df.copy()

diff --git a/tm2py/acceptance/canonical.py b/tm2py/acceptance/canonical.py
@@ -225,11 +225,7 @@ def _read_pems_to_link_crosswalk(self) -> pd.DataFrame:
 
         df = pd.read_csv(os.path.join(file_root, in_file))
 
-        df["pems_station_id"] = df["station"].astype(str) + "_" + df["direction"]
-
-        assert df["pems_station_id"].is_unique  # validate crosswalk - correct location?
-
-        df = df[["pems_station_id", "A", "B"]]
+        df = df[["station_id", "A", "B"]]
 
         self.pems_to_link_crosswalk_df = df
 

diff --git a/tm2py/acceptance/observed.py b/tm2py/acceptance/observed.py
@@ -22,7 +22,7 @@ class Observed:
     census_tract_centroids_gdf: gpd.GeoDataFrame
 
     RELEVANT_PEMS_OBSERVED_YEARS_LIST = [2014, 2015, 2016]
-    RELEVANT_BRIDGE_TRANSACTIONS_YEARS_LIST = [2014, 2015, 2016] # ch
+    RELEVANT_BRIDGE_TRANSACTIONS_YEARS_LIST = [2014, 2015, 2016]
     RELEVANT_PEMS_VEHICLE_CLASSES_FOR_LARGE_TRUCK = [6, 7, 8, 9, 10, 11, 12]
 
     ohio_rmse_standards_df = pd.DataFrame(
@@ -325,7 +325,7 @@ def reduce_on_board_survey(self, read_file_from_disk=True):
                 dtype=self.reduced_transit_on_board_df.dtypes.to_dict(),
             )
         else:
-            in_df = pd.read_feather(os.path.join(file_root, in_file))
+            in_df = pd.read_csv(os.path.join(file_root, in_file))
             temp_df = in_df[
                 (in_df["weekpart"].isna()) | (in_df["weekpart"] != "WEEKEND")
             ].copy()
@@ -553,11 +553,11 @@ def _make_district_to_district_transit_flows_by_technology(self):
         o_df = self.reduced_transit_spatial_flow_df.copy()
         o_df = o_df[o_df["time_period"] == "am"].copy()
 
-        tm1_district_dict = self.c.taz_to_district_df.set_index("taz_tm1")[
-            "district_tm1"
+        tm2_district_dict = self.c.taz_to_district_df.set_index("taz_tm2")[
+            "district_tm2"
         ].to_dict()
-        o_df["orig_district"] = o_df["orig_taz"].map(tm1_district_dict)
-        o_df["dest_district"] = o_df["dest_taz"].map(tm1_district_dict)
+        o_df["orig_district"] = o_df["orig_taz"].map(tm2_district_dict)
+        o_df["dest_district"] = o_df["dest_taz"].map(tm2_district_dict)
 
         for prefix in self.c.transit_technology_abbreviation_dict.keys():
             o_df["{}".format(prefix.lower())] = (
@@ -872,9 +872,8 @@ def _reduce_pems_counts(self, read_file_from_disk=True):
                 self.c.pems_to_link_crosswalk_df,
                 out_df,
                 how="left",
-                left_on="pems_station_id",
-                right_on="station_id",
-            ).drop(columns=["pems_station_id"])
+                on="station_id"
+            )
 
             out_df = self._join_tm2_node_ids(out_df)
 
@@ -894,15 +893,16 @@ def _reduce_pems_counts(self, read_file_from_disk=True):
                 .reset_index()
             )
             join_df = out_df[
-                ["A", "B", "observed_flow", "station_id", "type", "vehicle_class"]
+                ["emme_a_node_id","emme_b_node_id", "time_period", "station_id", "type", "vehicle_class"]
             ].copy()
             return_df = pd.merge(
                 median_df,
                 join_df,
                 how="left",
-                on=["A", "B", "observed_flow", "vehicle_class"],
+                on=["emme_a_node_id","emme_b_node_id", "time_period", "vehicle_class"]
             ).reset_index(drop=True)
 
+            #return_df = return_df.rename(columns = {"model_link_id" : "standard_link_id"})
             return_df = self._join_ohio_standards(return_df)
             return_df = self._identify_key_arterials_and_bridges(return_df)