Skip to content

Commit

Permalink
♻️ More clearly delineate the pre-processing and plotting
Browse files Browse the repository at this point in the history
To be consistent with
e-mission#86 (comment)
  • Loading branch information
shankari committed May 5, 2024
1 parent 139a469 commit c16e4eb
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 16 deletions.
20 changes: 10 additions & 10 deletions viz_scripts/generic_metrics.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -170,8 +170,8 @@
" fig, ax = plt.subplots(nrows=2, ncols=1, figsize=(15,2*2), sharex=True)\n",
" # We will have text results corresponding to the axes for simplicity and consistency\n",
" text_results = [[\"Unmodified Alt Text\", \"Unmodified HTML\"], [\"Unmodified Alt Text\", \"Unmodified HTML\"]]\n",
" plot_and_text_stacked_bar_chart(expanded_ct, \"Mode_confirm\", {distance_col: 'count'}, \"Labeled by user\\n (Confirmed trips)\", ax[0], text_results[0], colors_mode, debug_df)\n",
" plot_and_text_stacked_bar_chart(expanded_ct_sensed, \"primary_mode\", {distance_col: 'count'}, \"Sensed by OpenPATH\", ax[1], text_results[1], colors_sensed, debug_df_sensed)\n",
" plot_and_text_stacked_bar_chart(expanded_ct.groupby(\"Mode_confirm\").agg({distance_col: 'count'}), \"Labeled by user\\n (Confirmed trips)\", ax[0], text_results[0], colors_mode, debug_df)\n",
" plot_and_text_stacked_bar_chart(expanded_ct_sensed.groupby(\"primary_mode\").agg({distance_col: 'count'}), \"Sensed by OpenPATH\", ax[1], text_results[1], colors_sensed, debug_df_sensed)\n",
" \n",
" plot_title = plot_title_no_quality + \"\\n\" + \"For Labeled & Sensed: \" + quality_text\n",
" set_title_and_save(fig, text_results, plot_title, file_name)\n",
Expand Down Expand Up @@ -215,7 +215,7 @@
" # Plot entries\n",
" fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(15,2*1), sharex=True) \n",
" text_results = [\"Unmodified Alt Text\", \"Unmodified HTML\"]\n",
" plot_and_text_stacked_bar_chart(expanded_ct_commute, \"Mode_confirm\", {distance_col: 'count'}, \"Labeled by user\\n (Confirmed trips)\", ax, text_results, colors_mode, debug_df)\n",
" plot_and_text_stacked_bar_chart(expanded_ct_commute.groupby(\"Mode_confirm\").agg({distance_col: 'count'}), \"Labeled by user\\n (Confirmed trips)\", ax, text_results, colors_mode, debug_df)\n",
" set_title_and_save(fig, text_results, plot_title, file_name)\n",
"except Exception as e:\n",
" fig, ax = plt.subplots()\n",
Expand All @@ -242,7 +242,7 @@
"try:\n",
" fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(15,2*1), sharex=True)\n",
" text_results = [\"Unmodified Alt Text\", \"Unmodified HTML\"]\n",
" plot_and_text_stacked_bar_chart(expanded_ct, \"Trip_purpose\", {distance_col: 'count'}, \"Labeled by user\\n (Confirmed trips)\", ax, text_results, colors_purpose, debug_df)\n",
" plot_and_text_stacked_bar_chart(expanded_ct.groupby(\"Trip_purpose\").agg({distance_col: 'count'}), \"Labeled by user\\n (Confirmed trips)\", ax, text_results, colors_purpose, debug_df)\n",
" plot_title = plot_title_no_quality + \"\\n\" + quality_text\n",
" set_title_and_save(fig, text_results, plot_title, file_name)\n",
"except Exception as e:\n",
Expand Down Expand Up @@ -287,8 +287,8 @@
" # Plot entries\n",
" fig, ax = plt.subplots(nrows=2, ncols=1, figsize=(15,2*2), sharex=True)\n",
" text_results = [[\"Unmodified Alt Text\", \"Unmodified HTML\"], [\"Unmodified Alt Text\", \"Unmodified HTML\"]]\n",
" plot_and_text_stacked_bar_chart(expanded_ct_u80, \"Mode_confirm\", {distance_col: 'count'}, \"Labeled by user\\n (Confirmed trips)\", ax[0], text_results[0], colors_mode, debug_df)\n",
" plot_and_text_stacked_bar_chart(expanded_ct_sensed_u80, \"primary_mode\", {distance_col: 'count'}, \"Sensed by OpenPATH\", ax[1], text_results[1], colors_sensed, debug_df_sensed)\n",
" plot_and_text_stacked_bar_chart(expanded_ct_u80.groupby(\"Mode_confirm\").agg({distance_col: 'count'}), \"Labeled by user\\n (Confirmed trips)\", ax[0], text_results[0], colors_mode, debug_df)\n",
" plot_and_text_stacked_bar_chart(expanded_ct_sensed_u80.groupby(\"primary_mode\").agg({distance_col: 'count'}), \"Sensed by OpenPATH\", ax[1], text_results[1], colors_sensed, debug_df_sensed)\n",
" set_title_and_save(fig, text_results, plot_title, file_name)\n",
"except AttributeError as e:\n",
" # we can have an missing attribute error during the pre-procssing, in which case we should show the missing plot\n",
Expand Down Expand Up @@ -323,8 +323,8 @@
" fig, ax = plt.subplots(nrows=2, ncols=1, figsize=(15,2*2), sharex=True)\n",
" \n",
" text_results = [[\"Unmodified Alt Text\", \"Unmodified HTML\"], [\"Unmodified Alt Text\", \"Unmodified HTML\"]]\n",
" plot_and_text_stacked_bar_chart(expanded_ct, \"Mode_confirm\", {distance_col: 'sum'}, \"Labeled by user\\n (Trip distance)\", ax[0], text_results[0], colors_mode, debug_df)\n",
" plot_and_text_stacked_bar_chart(expanded_ct_sensed, \"primary_mode\", {distance_col: 'sum'}, \"Sensed by OpenPATH\", ax[1], text_results[1], colors_sensed, debug_df_sensed)\n",
" plot_and_text_stacked_bar_chart(expanded_ct.groupby(\"Mode_confirm\").agg({distance_col: 'sum'}), \"Labeled by user\\n (Trip distance)\", ax[0], text_results[0], colors_mode, debug_df)\n",
" plot_and_text_stacked_bar_chart(expanded_ct_sensed.groupby(\"primary_mode\").agg({distance_col: 'sum'}), \"Sensed by OpenPATH\", ax[1], text_results[1], colors_sensed, debug_df_sensed)\n",
" set_title_and_save(fig, text_results, plot_title, file_name) \n",
"except AttributeError as e:\n",
" # we can have an missing attribute error during the pre-procssing, in which case we should show the missing plot\n",
Expand Down Expand Up @@ -362,8 +362,8 @@
" sensed_land_trips_df = expanded_ct_sensed[expanded_ct_sensed['primary_mode'] != \"AIR_OR_HSR\"]\n",
" \n",
" fig, ax = plt.subplots(nrows=2, ncols=1, figsize=(15,2*2), sharex=True)\n",
" plot_and_text_stacked_bar_chart(labeled_land_trips_df, \"Mode_confirm\", {distance_col: 'sum'}, \"Labeled by user\\n (Trip distance - Land)\", ax[0], text_results[0], colors_mode, debug_df)\n",
" plot_and_text_stacked_bar_chart(sensed_land_trips_df, \"primary_mode\", {distance_col: 'sum'}, \"Sensed by OpenPATH\\n (Trip distance - Land)\", ax[1], text_results[1], colors_sensed, debug_df_sensed)\n",
" plot_and_text_stacked_bar_chart(labeled_land_trips_df.groupby(\"Mode_confirm\").agg({distance_col: 'sum'}), \"Labeled by user\\n (Trip distance - Land)\", ax[0], text_results[0], colors_mode, debug_df)\n",
" plot_and_text_stacked_bar_chart(sensed_land_trips_df.groupby(\"primary_mode\").agg({distance_col: 'sum'}), \"Sensed by OpenPATH\\n (Trip distance - Land)\", ax[1], text_results[1], colors_sensed, debug_df_sensed)\n",
" set_title_and_save(fig, text_results, plot_title, file_name) \n",
"except AttributeError as e:\n",
" # we can have an missing attribute error during the pre-procssing, in which case we should show the missing plot\n",
Expand Down
15 changes: 9 additions & 6 deletions viz_scripts/plots.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,22 +92,25 @@ def plot_and_text_error(e, ax, file_name):
return alt_text, alt_html

# Creates/ Appends single bar to the 100% Stacked Bar Chart
def plot_and_text_stacked_bar_chart(df, df_col, agg_query, bar_label, ax, text_result, colors, debug_df):
def plot_and_text_stacked_bar_chart(df, bar_label, ax, text_result, colors, debug_df):
""" Inputs:
df = Data frame corresponding to the bar in a stacked bar chart
bar_name = Text to represent in case data frame is empty (e.g. "Sensed Trip")
df = Data frame corresponding to the bar in a stacked bar chart. It is
expected to have three columns, which represent the 'label', 'value'
bar_label = Text to represent the Bar (e.g. Labeled by user\n (Confirmed trips))
ax = axis information
colors_combined = color mapping dictionary
text_result = will be filled in with the alt_text and alt_html for the plot
"""
if len(df.columns) > 1:
raise ValueError("dataframe should have two columns (labels and values), found %s" % (df.columns))

sns.set(font_scale=1.5)
bar_height = 0.2
bar_width = [0]
try:
grouped_df = df.groupby(df_col).agg(agg_query).reset_index().set_axis(['label', 'vals'], axis='columns').sort_values(by='vals', ascending=False)
grouped_df = df.reset_index().set_axis(['label', 'value'], axis='columns').sort_values(by='value', ascending=False)

# TODO: Do we need this as a separate function?
df_all_entries, df_only_small = process_trip_data(grouped_df.label.tolist(), grouped_df.vals.tolist())
df_all_entries, df_only_small = process_trip_data(grouped_df.label.tolist(), grouped_df.value.tolist())

# TODO: Fix this to be more pandas-like and change the "long" variable name
for label in pd.unique(df_only_small['Label']):
Expand Down

0 comments on commit c16e4eb

Please sign in to comment.