Skip to content

Commit

Permalink
minor_changes
Browse files Browse the repository at this point in the history
  • Loading branch information
lachlan-git committed Jun 18, 2024
1 parent 8848c5d commit 202cb68
Show file tree
Hide file tree
Showing 2 changed files with 270 additions and 31 deletions.
27 changes: 27 additions & 0 deletions scripts/compare_skims.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import numpy as np

network_fid_path = Path(r"Z:\MTC\US0024934.9168\Task_3_runtime_improvements\3.1_network_fidelity\run_result")
# network_fid_path = Path(r"D:\TEMP\TM2.2.1.1-0.05")

#%%

Expand All @@ -17,6 +18,7 @@ def read_matrix_as_long_df(path: Path, run_name):

all_skims = []
for skim_matrix_path in network_fid_path.rglob("*AM_taz.omx"):
print(skim_matrix_path)
run_name = skim_matrix_path.parts[6]
all_skims.append(read_matrix_as_long_df(skim_matrix_path, run_name))

Expand All @@ -25,3 +27,28 @@ def read_matrix_as_long_df(path: Path, run_name):
#%%%
all_skims.to_csv(r"Z:\MTC\US0024934.9168\Task_3_runtime_improvements\3.1_network_fidelity\run_result\consolidated\skims.csv")
# %%
# %%
import geopandas as gpd
from importlib import Path
import pandas as pd
#%%
output_paths_to_consolidate = Path(r"D:\TEMP\output_summaries")
all_files = []
for file in output_paths_to_consolidate.glob("*_roadway_network.geojson"):
run_name = file.name[0:5]
print(run_name)
specific_run = gpd.read_file(file)
specific_run["run_number"] = run_name
all_files.append(specific_run)
#%%
all_files = pd.concat(all_files)
#%%
all_files.to_file(output_paths_to_consolidate / "all_runs_concat.gdb")

#%%

all_files.drop(columns="geometry").to_csv(output_paths_to_consolidate / "data.csv")
#%%
to_be_shape = all_files[["geometry", "model_link_id"]].drop_duplicates()
print("outputting")
to_be_shape.to_file(output_paths_to_consolidate / "geom_package")
274 changes: 243 additions & 31 deletions scripts/compile_model_runs.py
Original file line number Diff line number Diff line change
@@ -1,61 +1,273 @@
print("running")
#%%
import geopandas as gpd
import pandas as pd
from sqlalchemy import create_engine
import numpy as np
from pathlib import Path
from tqdm import tqdm
from shapely.geometry import LineString

input_dir = Path(r"\\corp.pbwan.net\us\CentralData\DCCLDA00\Standard\sag\projects\MTC\US0024934.9168\Task_3_runtime_improvements\3.1_network_fidelity\run_result")
output_dir = input_dir / "consolidated"
input_dir = Path(r"Z:\MTC\US0024934.9168\Task_3_runtime_improvements\3.1_network_fidelity\run_result")
output_dir = input_dir / "consolidated_3"


in_file = next(input_dir.rglob('emme_links.shp'))
print("reading")
input = gpd.read_file(in_file)
print("writing")
input[["geometry"]].to_file(output_dir / "test_geom.geojson")

# def read_file_and_tag(path: Path) -> gpd.GeoDataFrame:
# in_file = next(input_dir.rglob('emme_links.shp'))
# print("reading", in_file)
# input2 = gpd.read_file(in_file, engine="pyogrio", use_arrow=True)
# #%%
# print("writing")
# input[["#link_id", "geometry"]].to_file(output_dir / "test_geom.geojson")

scenarios_to_consolidate = (12, 14)
#%%

def read_file_and_tag(path: Path, columns_to_filter = ("@ft", "VOLAU", "@capacity", "run_number", "scenario_number", "#link_id", "geometry")) -> pd.DataFrame:

scenario = file.parent.stem
scenario_number = int(scenario.split("_")[-1])
if scenario_number not in scenarios_to_consolidate:
return None

run = file.parent.parent.stem
run_number = int(run.split("_")[-1])

return_gdf = gpd.read_file(path, engine="pyogrio")

return_gdf["scenario"] = scenario
return_gdf["scenario_number"] = scenario_number
return_gdf["run"] = run
return_gdf["run_number"] = run_number

if "VOLAU" not in return_gdf.columns:
print(return_gdf.columns)
print("... No VOLAU, filling with zero")
return_gdf["VOLAU" ] = 0


return_gdf = return_gdf[list(columns_to_filter)]

# assert return_gdf["#link_id"].is_unique

return return_gdf

def get_linestring_direction(linestring: LineString) -> str:
if not isinstance(linestring, LineString) or len(linestring.coords) < 2:
raise ValueError("Input must be a LineString with at least two coordinates")

start_point = linestring.coords[0]
end_point = linestring.coords[-1]

delta_x = end_point[0] - start_point[0]
delta_y = end_point[1] - start_point[1]

if abs(delta_x) > abs(delta_y):
if delta_x > 0:
return "East"
else:
return "West"
else:
if delta_y > 0:
return "North"
else:
return "South"
#%%

print("Reading Links...", end="")
all_links = []
for file in tqdm(input_dir.rglob('run_*/Scenario_*/emme_links.shp')):
print(file)
all_links.append(read_file_and_tag(file))
links_table = pd.concat(all_links)

print("done")
#%%
scen_map = {
11: "EA",
12: "AM",
13: "MD",
14: "PM",
15: "EV"
}

def get_return_first_gem(row):
geom_columns = [col for col in row.index if "geometry" in col]
return [row[col] for col in geom_columns if (row[col] is not None) and (row[col] != np.NAN)][0]

def combine_tables(dfs, columns_same):

return_frame = dfs[0][columns_same]

for df in dfs:
run_number = df["run_number"].iloc[0]

scen_number = df["scenario_number"].iloc[0]
scen_number = scen_map[scen_number]
df["saturation"] = df["VOLAU"] / df["@capacity"]

df = df[["#link_id", "@capacity", "VOLAU", "geometry", "@ft"]].rename(columns = {
"@capacity": f"capacity_run{run_number}_scen{scen_number}",
"VOLAU": f"@volau_run{run_number}_scen{scen_number}",
"saturation": f"@saturation_run{run_number}_scen{scen_number}",
"geometry": f"geometry_run{run_number}_scen{scen_number}",
"@ft": f"ft_run{run_number}_scen{scen_number}"
}
)
# if there are link_ids that are not in the right frame
return_frame = return_frame.merge(df, how="outer", on="#link_id", validate="1:1")
geometry = return_frame.apply(get_return_first_gem, axis=1)
# remove geometries that are not main geometry
return_frame = return_frame.drop(columns=[col for col in return_frame.columns if "geometry_" in col])
return_frame["geometry"] = geometry

return return_frame
all_links_no_none = [links for links in all_links if (links is not None) and (links["#link_id"].is_unique)]
links_wide_table = combine_tables(all_links_no_none, ["#link_id", "geometry"])

links_wide_table["direction"] = links_wide_table["geometry"].apply(get_linestring_direction)
#%%
ft_cols = [col for col in links_wide_table.columns if "ft_" in col]

links_wide_table["ft"] = links_wide_table[ft_cols].max(axis=1)
links_wide_table = links_wide_table.drop(columns=ft_cols)

#%%
links_wide_table.to_file(
Path(r"Z:\MTC\US0024934.9168\Task_3_runtime_improvements\3.1_network_fidelity\output_summaries\all_links_data")
/ "all_data_wide.geojson")


#%%
import matplotlib.pyplot as plt
data = [links_wide_table[col] for col in links_wide_table.iloc[:, 2:].columns]

fig = plt.boxplot(links_wide_table, y="total_bill")
fig.show()

# --------------------------------------------------------------------------
#%%
links_table["direction"] = links_table["geometry"].apply(get_linestring_direction)
# %%
links_table.to_file(output_dir / "all_data.geojson", index=False)
#%%
def get_link_counts(df: pd.DataFrame):
ret_val = df.value_counts("@ft").sort_index().to_frame().T
total = ret_val.sum(axis=1)
total_minus_8 = total - ret_val[8.0].iloc[0]
ret_val["total"] = total
ret_val["total_minus_8"] = total_minus_8

ret_val["run_number"] = df["run_number"].iloc[0]
ret_val["scenario_number"] = df["scenario_number"].iloc[0]
return ret_val

pd.concat(
[get_link_counts(df) for df in all_links]
).sort_values(by=["run_number", "scenario_number"])
# #%%
# import geopandas as gpd
# import pandas as pd
# from pathlib import Path
# from tqdm import tqdm

# input_dir = Path(r"Z:\MTC\US0024934.9168\Task_3_runtime_improvements\3.1_network_fidelity\run_result")
# output_dir = input_dir / "consolidated"


# # in_file = next(input_dir.rglob('emme_links.shp'))
# # print("reading", in_file)
# # input2 = gpd.read_file(in_file, engine="pyogrio", use_arrow=True)
# # #%%
# # print("writing")
# # input[["#link_id", "geometry"]].to_file(output_dir / "test_geom.geojson")

# #%%

# def process_frame(return_gdf: pd.DataFrame, path: Path, columns_to_filter = ("@ft", "VOLAU", "@capacity", "run_number", "scenario_number", "#link_id")) -> pd.DataFrame:

# scenario = file.parent.stem
# scenario_number = int(scenario.split("_")[-1])

# run = file.parent.parent.stem
# run_number = int(run.split("_")[-1])

# return_gdf = gpd.read_file(path)
# # return_gdf = #gpd.read_file(path, engine="pyogrio")

# return_gdf["scenario"] = scenario
# return_gdf["scenario_number"] = scenario_number
# return_gdf["run"] = run
# return_gdf["run_number"] = run_number

# return return_gdf
# return_gdf = return_gdf[list(columns_to_filter)]

# # assert return_gdf["#link_id"].is_unique


# return return_gdf
# #%%
# geom_file = next(input_dir.rglob('run_*/Scenario*/emme_links.shp'))
# geometry = gpd.read_file(geom_file, engine="pyogrio")
# #%%
# # geometry[["#link_id", "geometry"]].to_file(output_dir / "test_geom.geojson")

# print("Reading Links...", end="")
# all_links = []
# all_links = {}
# x = 0
# for file in tqdm(input_dir.rglob('emme_links.shp')):
# all_links.append(read_file_and_tag(file))
# if x == 0:
# all_links[-1][["geometry"]].to_file(output_dir / "test_min_geom.geojson")
# x = x + 1
# links_table = pd.concat(all_links)
# links_table = gpd.GeoDataFrame(links_table, geometry="geometry", crs=all_links[0].crs)
# print("done")
# for file in tqdm(input_dir.rglob('run_*/Scenario*/emme_links.shp')):
# print(file)
# all_links[file] = gpd.read_file(file)


# print("reading Nodes...", end="")
# all_nodes = []
# for file in tqdm(input_dir.rglob('emme_nodes.shp')):
# all_nodes.append(read_file_and_tag(file))
# #%%
# processed_values = {path: process_frame(df, path) for path, df in all_links.items()}

# nodes_table = pd.concat(all_nodes)
# nodes_table = gpd.GeoDataFrame(nodes_table, geometry="geometry", crs=all_links[0].crs)
# print("done")
# temp_iter = iter(processed_values)
# wide_data =
# for path, frame in process_name.values():

# print("outputting files...")

# #%%
# links_iter = iter(all_links)
# for frame in
# # %%
# links_table = pd.concat(all_links)
# #%%
# links_table.to_file("links_attr.csv", index=False)
# # #%%
# # print("reading Nodes...", end="")
# # all_nodes = []
# # for file in tqdm(input_dir.rglob('emme_nodes.shp')):
# # all_nodes.append(read_file_and_tag(file))

# # nodes_table = pd.concat(all_nodes)
# # nodes_table = gpd.GeoDataFrame(nodes_table, geometry="geometry", crs=all_links[0].crs)
# # print("done")

# # print("outputting files...")

# links_table.to_file(output_dir/"links.geojson")
# nodes_table.to_file(output_dir/"nodes.geojson")
# nodes_table.to_file(output_dir/"nodes.geojson")


# #%%
# from pathlib import Path
# import os
# import geopandas as gpd
# import pandas as pd
# #%%
# output_paths_to_consolidate = Path(r"D:\TEMP\output_summaries\ss")
# all_files = []
# for file in output_paths_to_consolidate.glob("*_roadway_network.geojson"):
# run_name = file.name[0:5]
# print(run_name)
# specific_run = gpd.read_file(file)
# specific_run["run_number"] = run_name
# all_files.append(specific_run)
# #%%
# all_files = pd.concat(all_files)
# #%%

# all_files.drop(columns="geometry").to_csv(output_paths_to_consolidate / "data.csv")
# #%%
# to_be_shape = all_files[["geometry", "model_link_id"]].drop_duplicates()
# print("outputting")
# to_be_shape.to_file(output_paths_to_consolidate / "geom_package")
# # %%

0 comments on commit 202cb68

Please sign in to comment.