Merge pull request #160 from BayAreaMetro/different_relative_gap_for_…

…global_iter Consolidating network fidelity work
BayAreaMetro · Jul 10, 2024 · 9eef264 · 9eef264
2 parents d5d948b + 5253872
commit 9eef264
Show file tree

Hide file tree

Showing 3 changed files with 259 additions and 1 deletion.
diff --git a/scripts/compare_skims.py b/scripts/compare_skims.py
@@ -0,0 +1,56 @@
+#%%
+import pandas as pd
+import openmatrix as omx
+from pathlib import Path
+
+import numpy as np
+
+network_fid_path = Path(r"Z:\MTC\US0024934.9168\Task_3_runtime_improvements\3.1_network_fidelity\run_result")
+# network_fid_path = Path(r"D:\TEMP\TM2.2.1.1-0.05")
+
+#%%
+
+def read_matrix_as_long_df(path: Path, run_name):
+    run = omx.open_file(path, "r")
+    am_time = np.array(run["AM_da_time"])
+    index_lables = list(range(am_time.shape[0]))
+    return pd.DataFrame(am_time, index=index_lables, columns=index_lables).stack().rename(run_name).to_frame()
+
+a = read_matrix_as_long_df(r"D:\TEMP\TM2.2.1.1-New_network_rerun\TM2.2.1.1_new_taz\skim_matrices\highway\HWYSKMAM_taz.omx", "test")
+#%%
+all_skims = []
+for skim_matrix_path in network_fid_path.rglob("*AM_taz.omx"):
+    print(skim_matrix_path)
+    run_name = skim_matrix_path.parts[6]
+    all_skims.append(read_matrix_as_long_df(skim_matrix_path, run_name))
+
+all_skims = pd.concat(all_skims, axis=1)
+# %%
+#%%%
+all_skims.to_csv(r"Z:\MTC\US0024934.9168\Task_3_runtime_improvements\3.1_network_fidelity\output_summaries\skim_data\skims.csv")
+# %%
+# %%
+import geopandas as gpd
+from importlib import Path
+import pandas as pd 
+#%%
+output_paths_to_consolidate = Path(r"D:\TEMP\output_summaries")
+all_files = []
+for file in output_paths_to_consolidate.glob("*_roadway_network.geojson"):
+    run_name = file.name[0:5]
+    print(run_name)
+    specific_run = gpd.read_file(file)
+    specific_run["run_number"] = run_name
+    all_files.append(specific_run)
+#%%
+all_files = pd.concat(all_files)
+#%%
+all_files.to_file(output_paths_to_consolidate / "all_runs_concat.gdb")
+
+#%%
+
+all_files.drop(columns="geometry").to_csv(output_paths_to_consolidate / "data.csv")
+#%%
+to_be_shape = all_files[["geometry", "model_link_id"]].drop_duplicates()
+print("outputting")
+to_be_shape.to_file(output_paths_to_consolidate / "geom_package")
diff --git a/scripts/compile_model_runs.py b/scripts/compile_model_runs.py
@@ -0,0 +1,202 @@
+#%%
+import geopandas as gpd
+import pandas as pd
+import numpy as np
+from pathlib import Path
+from tqdm import tqdm
+from shapely.geometry import LineString
+
+input_dir = Path(r"Z:\MTC\US0024934.9168\Task_3_runtime_improvements\3.1_network_fidelity\run_result")
+output_dir = input_dir / "consolidated_3"
+
+
+
+# in_file = next(input_dir.rglob('emme_links.shp'))
+# print("reading", in_file)
+# input2 = gpd.read_file(in_file, engine="pyogrio", use_arrow=True)
+# #%%
+# print("writing")
+# input[["#link_id", "geometry"]].to_file(output_dir / "test_geom.geojson")
+
+scenarios_to_consolidate = (11, 12, 13, 14, 15)
+runs_to_consolidate = (3, 4)
+#%%
+
+def read_file_and_tag(path: Path, columns_to_filter = ("@ft", "VOLAU", "@capacity", "run_number", "scenario_number", "#link_id", "geometry")) -> pd.DataFrame:
+
+    scenario = file.parent.stem
+    scenario_number = int(scenario.split("_")[-1])
+    if scenario_number not in scenarios_to_consolidate:
+        return None
+
+    run = file.parent.parent.stem
+    run_number = int(run.split("_")[-1])
+    if run_number not in runs_to_consolidate:
+        return None
+
+    return_gdf = gpd.read_file(path, engine="pyogrio")
+
+    return_gdf["scenario"] = scenario
+    return_gdf["scenario_number"] = scenario_number
+    return_gdf["run"] = run
+    return_gdf["run_number"] = run_number
+
+    if "VOLAU" not in return_gdf.columns:
+        print(return_gdf.columns)
+        print("... No VOLAU, filling with zero")
+        return_gdf["VOLAU" ] = 0
+
+
+    return_gdf = return_gdf[list(columns_to_filter)]
+
+    # assert return_gdf["#link_id"].is_unique
+
+    return return_gdf
+
+def get_linestring_direction(linestring: LineString) -> str:
+    if not isinstance(linestring, LineString) or len(linestring.coords) < 2:
+        raise ValueError("Input must be a LineString with at least two coordinates")
+
+    start_point = linestring.coords[0]
+    end_point = linestring.coords[-1]
+
+    delta_x = end_point[0] - start_point[0]
+    delta_y = end_point[1] - start_point[1]
+
+    if abs(delta_x) > abs(delta_y):
+        if delta_x > 0:
+            return "East"
+        else:
+            return "West"
+    else:
+        if delta_y > 0:
+            return "North"
+        else:
+            return "South"
+#%%
+
+print("Reading Links...", end="")
+all_links = []
+for file in tqdm(input_dir.rglob('run_*/Scenario_*/emme_links.shp')):
+    print(file)
+    all_links.append(read_file_and_tag(file))
+links_table = pd.concat(all_links)
+
+print("done")
+#%%
+scen_map = {
+    11: "EA",
+    12: "AM",
+    13: "MD",
+    14: "PM",
+    15: "EV"
+}
+
+def get_return_first_gem(row):
+    geom_columns = [col for col in row.index if "geometry" in col]
+    return [row[col] for col in geom_columns if (row[col] is not None) and (row[col] != np.NAN)][0]
+
+def combine_tables(dfs, columns_same):
+
+    return_frame = dfs[0][columns_same]
+
+    for df in dfs:
+        run_number = df["run_number"].iloc[0]
+
+        scen_number = df["scenario_number"].iloc[0]
+        scen_number = scen_map[scen_number]
+        df["saturation"] = df["VOLAU"] / df["@capacity"]
+
+        df = df[["#link_id", "@capacity", "VOLAU", "geometry", "@ft"]].rename(columns = {
+            "@capacity": f"capacity_run{run_number}_scen{scen_number}",
+            "VOLAU": f"@volau_run{run_number}_scen{scen_number}",
+            "saturation": f"@saturation_run{run_number}_scen{scen_number}",
+            "geometry": f"geometry_run{run_number}_scen{scen_number}",
+            "@ft": f"ft_run{run_number}_scen{scen_number}"
+            }
+        )
+        # if there are link_ids that are not in the right frame
+        return_frame = return_frame.merge(df, how="outer", on="#link_id", validate="1:1")
+        geometry = return_frame.apply(get_return_first_gem, axis=1)
+        # remove geometries that are not main geometry
+        return_frame = return_frame.drop(columns=[col for col in return_frame.columns if "geometry_" in col])
+        return_frame["geometry"] = geometry
+
+    return return_frame
+all_links_no_none = [links for links in all_links if (links is not None) and (links["#link_id"].is_unique)]    
+links_wide_table = combine_tables(all_links_no_none, ["#link_id", "geometry"])
+
+links_wide_table["direction"] = links_wide_table["geometry"].apply(get_linestring_direction)
+#%%
+ft_cols = [col for col in links_wide_table.columns if "ft_" in col]
+
+links_wide_table["ft"] = links_wide_table[ft_cols].max(axis=1)
+links_wide_table = links_wide_table.drop(columns=ft_cols)
+
+#%%
+links_wide_table.to_file(
+    Path(r"Z:\MTC\US0024934.9168\Task_3_runtime_improvements\3.1_network_fidelity\output_summaries\all_links_data")
+    / "all_data_wide.geojson")
+
+
+#%%
+num_iter = {
+    (3,11): 3,
+    (3,12): 10,
+    (3,13): 10,
+    (3,14): 19,
+    (3,15): 4,
+    (4,12): 20
+}
+#%%
+all_links_no_none = [links for links in all_links if (links is not None)] #and (links["#link_id"].is_unique)]    
+for df in all_links_no_none:
+    df["saturation"] = df["VOLAU"] / df["@capacity"]
+ft6_sat = [(link["run_number"].iloc[0], link["scenario_number"].iloc[0], (link.loc[link["@ft"] == 6, "saturation"] > 1).mean()) for link in all_links_no_none]
+
+y = [val for val in num_iter.values()]
+x = [x[-1] for x in ft6_sat]
+col = [val[0] for val in num_iter.keys()]
+
+#%%
+import matplotlib.pyplot as plt
+plt.scatter(x, y, c=col)
+
+# Calculate the trendline
+z = np.polyfit(x, y, 1)
+p = np.poly1d(z)
+
+# Plot the trendline
+plt.plot(x, p(x), color='red')
+
+plt.xlabel('proportion of ft 6 with saturation > 1')
+plt.ylabel('number of iterations to solve')
+plt.title('Number of iterations to solve (relative gap = 0.05)')
+plt.show()
+#%%
+import matplotlib.pyplot as plt
+data = [links_wide_table[col] for col in links_wide_table.iloc[:, 2:].columns]
+
+fig = plt.boxplot(data)
+fig.show()
+
+# --------------------------------------------------------------------------
+#%%
+links_table["direction"] = links_table["geometry"].apply(get_linestring_direction)
+# %%
+links_table.to_file(output_dir / "all_data.geojson", index=False)
+#%%
+def get_link_counts(df: pd.DataFrame):
+    ret_val = df.value_counts("@ft").sort_index().to_frame().T
+    total = ret_val.sum(axis=1)
+    total_minus_8 = total - ret_val[8.0].iloc[0]
+    ret_val["total"] = total
+    ret_val["total_minus_8"] = total_minus_8
+
+    ret_val["run_number"] = df["run_number"].iloc[0]
+    ret_val["scenario_number"] = df["scenario_number"].iloc[0]
+    return ret_val
+
+pd.concat(
+    [get_link_counts(df) for df in all_links]
+).sort_values(by=["run_number", "scenario_number"])
diff --git a/tm2py/components/network/highway/highway_assign.py b/tm2py/components/network/highway/highway_assign.py
@@ -152,7 +152,7 @@ def run(self):
             demand.run()
         else:
             self.highway_emmebank.zero_matrix
-        for time in self.time_period_names:
+        for time in ["AM"]: #self.time_period_names:
             scenario = self.highway_emmebank.scenario(time)
             with self._setup(scenario, time):
                 iteration = self.controller.iteration