From 75b529a81b2797e1b1fa74ca79dea7b5f0d12ecc Mon Sep 17 00:00:00 2001 From: Abby Wheelis Date: Sun, 31 Mar 2024 21:21:07 -0600 Subject: [PATCH] remove notebooks replaced by work in #102 --- .../Abby/CanBikeCO_Analysis.ipynb | 940 ------------------ .../Abby/CanBikeCO_DataFiltering.ipynb | 679 ------------- .../Abby/CanBikeCO_Spatial_Analysis.ipynb | 147 --- 3 files changed, 1766 deletions(-) delete mode 100644 viz_scripts/PaperVizualizations/Abby/CanBikeCO_Analysis.ipynb delete mode 100644 viz_scripts/PaperVizualizations/Abby/CanBikeCO_DataFiltering.ipynb delete mode 100644 viz_scripts/PaperVizualizations/Abby/CanBikeCO_Spatial_Analysis.ipynb diff --git a/viz_scripts/PaperVizualizations/Abby/CanBikeCO_Analysis.ipynb b/viz_scripts/PaperVizualizations/Abby/CanBikeCO_Analysis.ipynb deleted file mode 100644 index 5c7de770..00000000 --- a/viz_scripts/PaperVizualizations/Abby/CanBikeCO_Analysis.ipynb +++ /dev/null @@ -1,940 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "b926ac86", - "metadata": {}, - "source": [ - "# Centralizing the Data Vizualizations from the Paper" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "2d3bef49", - "metadata": {}, - "outputs": [], - "source": [ - "# dependencies\n", - "from collections import defaultdict\n", - "\n", - "import matplotlib.pyplot as plt\n", - "import numpy as np\n", - "import pandas as pd\n", - "import seaborn as sns\n", - "from sklearn import linear_model\n", - "\n", - "import emission.core.get_database as edb\n", - "from plots import *\n", - "import scaffolding\n", - "\n", - "sns.set_style(\"whitegrid\")\n", - "sns.set()\n", - "%matplotlib inline\n", - "\n", - "params = {'legend.fontsize': 'small',\n", - " 'figure.figsize': (10, 8),\n", - " 'axes.labelsize': 'small',\n", - " 'axes.titlesize':'small',\n", - " 'xtick.labelsize':'small',\n", - " 'ytick.labelsize':'small'}\n", - "plt.rcParams.update(params)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8fbcfd00", - "metadata": {}, - "outputs": [], - "source": [ - "# loading the data\n", - "data = pd.read_csv(\"filtered_merged_trips.csv\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "746a0a77", - "metadata": {}, - "outputs": [], - "source": [ - "# Summary statistics table #MOVE TO ANALYSIS NOTEBOOK\n", - "mode_of_interest = 'E-bike'\n", - "data_eb = data.query(f\"mode_confirm == '{mode_of_interest}'\")\n", - "print(len(pd.unique(data.user_id)))\n", - "stat_data = data[['distance_miles','duration']]\n", - "stat_data.describe()" - ] - }, - { - "cell_type": "markdown", - "id": "1b7be16b", - "metadata": {}, - "source": [ - "## Modes in Mini vs Full Pilot" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a2042742", - "metadata": {}, - "outputs": [], - "source": [ - "# processing mini data\n", - "mini_data = pd.read_csv(\"trip_program.csv\")\n", - "mini_data = mini_data[mini_data.program == 'prepilot']\n", - "MINI_DATA = mini_data.copy() #saving a copy for later\n", - "\n", - "# Combine variable categories\n", - "mini_data = mini_data.replace('Gas Car, drove alone', 'Car')\n", - "mini_data = mini_data.replace('Gas Car, with others', 'Shared Car')\n", - "mini_data = mini_data.replace('Bikeshare', 'Shared Micromobility')\n", - "mini_data = mini_data.replace('Scooter share', 'Shared Micromobility')\n", - "mini_data = mini_data.replace('Regular Bike', 'Personal Micromobility')\n", - "mini_data = mini_data.replace('Skate board', 'Personal Micromobility')\n", - "mini_data = mini_data.replace('Train', 'Transit')\n", - "mini_data = mini_data.replace('Free Shuttle', 'Transit')\n", - "mini_data = mini_data.replace('Bus', 'Transit')\n", - "mini_data = mini_data.replace('Walk', 'Walk')\n", - "mini_data = mini_data.replace('Taxi/Uber/Lyft', 'Ridehail')\n", - "mini_data = mini_data.replace('Pilot ebike', 'E-Bike')\n", - "\n", - "#filter out 'not a trip' trips\n", - "mini_data = mini_data[~mini_data['Mode_confirm'].isin(['Not a Trip'])]\n", - "mini_data = mini_data[~mini_data['Replaced_mode'].isin(['Not a Trip'])]\n", - "mini_data = mini_data[~mini_data['Trip_purpose'].isin(['not_a_trip'])]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "181a7001", - "metadata": {}, - "outputs": [], - "source": [ - "mini_data.loc[mini_data['Mode_confirm']=='Personal Micromobility', 'Mode_confirm'] = 'Other'\n", - "mini_data.loc[mini_data['Mode_confirm']=='Shared Micromobility', 'Mode_confirm'] = 'Other'\n", - "\n", - "t1 = mini_data.groupby(['Mode_confirm'], as_index=False).count()[['Mode_confirm','distance_miles']]\n", - "t1['proportion'] = t1['distance_miles'] / np.sum(t1.distance_miles)\n", - "t1['trip_type'] = 'All Trips'\n", - "\n", - "t2 = mini_data[mini_data['Trip_purpose']=='Work'].copy()\n", - "t2 = t2.groupby(['Mode_confirm'], as_index=False).count()[['Mode_confirm','distance_miles']]\n", - "t2['proportion'] = t2['distance_miles'] / np.sum(t2.distance_miles)\n", - "t2['trip_type'] = 'Work Trips'\n", - "t2.loc[1.5] = 'Other', 0, 0, 'Work Trips'\n", - "t2 = t2.sort_index().reset_index(drop=True)\n", - "\n", - "mini_data = pd.concat([t1,t2])\n", - "mini_data['Dataset'] = 'Minipilot'\n", - "mini_data.columns = ['Mode','Count','Proportion','Trip Type', \"Dataset\"]\n", - "\n", - "# processing long data\n", - "plot_data = data.copy()\n", - "plot_data.loc[plot_data['Mode_confirm']=='Personal Micromobility', 'Mode_confirm'] = 'Other'\n", - "plot_data.loc[plot_data['Mode_confirm']=='Shared Micromobility', 'Mode_confirm'] = 'Other'\n", - "\n", - "t1 = plot_data.groupby(['Mode_confirm'], as_index=False).count()[['Mode_confirm','distance_miles']]\n", - "t1['proportion'] = t1['distance_miles'] / np.sum(t1.distance_miles)\n", - "t1['trip_type'] = 'All Trips'\n", - "\n", - "t2 = plot_data[plot_data['Trip_purpose']=='Work'].copy()\n", - "t2 = t2.groupby(['Mode_confirm'], as_index=False).count()[['Mode_confirm','distance_miles']]\n", - "t2['proportion'] = t2['distance_miles'] / np.sum(t2.distance_miles)\n", - "t2['trip_type'] = 'Work Trips'\n", - "\n", - "plot_data = pd.concat([t1,t2])\n", - "plot_data['Dataset'] = 'Long Term'\n", - "plot_data.columns = ['Mode','Count','Proportion','Trip Type', \"Dataset\"]\n", - "\n", - "plot_data = pd.concat([plot_data, mini_data])\n", - "plot_data" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "0a94c111", - "metadata": { - "scrolled": false - }, - "outputs": [], - "source": [ - "# make and save the chart\n", - "\n", - "width = 0.8\n", - "fig, ax = plt.subplots(2,1, figsize=(20,10))\n", - "plt.rcParams.update({'font.size': 30}) \n", - "running_total_mini = [0,0]\n", - "running_total_long = [0,0]\n", - "fig_data_mini = plot_data[plot_data['Dataset']=='Minipilot']\n", - "fig_data_long = plot_data[plot_data['Dataset']=='Long Term']\n", - "\n", - "for mode in pd.unique(fig_data_mini.Mode):\n", - " mini = fig_data_mini[fig_data_mini['Mode']==mode]\n", - " long = fig_data_long[fig_data_long['Mode']==mode]\n", - " \n", - " labels = mini['Trip Type']\n", - " vals = mini['Proportion']*100\n", - " bar_labels = mini['Count']\n", - " vals_str = [f'{y:.1f} %\\n({x:,})' if y>5 else '' for x, y in zip(bar_labels, vals)]\n", - " bar = ax[0].barh(labels, vals, width, left=running_total_mini, label=mode)\n", - " ax[0].bar_label(bar, label_type='center', labels=vals_str, rotation=90, fontsize=22)\n", - " running_total_mini[0] = running_total_mini[0]+vals.iloc[0]\n", - " running_total_mini[1] = running_total_mini[1]+vals.iloc[1]\n", - "\n", - " labels = long['Trip Type']\n", - " vals = long['Proportion']*100\n", - " bar_labels = long['Count']\n", - " vals_str = [f'{y:.1f} %\\n({x:,})' if y>5 else '' for x, y in zip(bar_labels, vals)]\n", - " bar = ax[1].barh(labels, vals, width, left=running_total_long, label=mode)\n", - " ax[1].bar_label(bar, label_type='center', labels=vals_str, rotation=90, fontsize=22)\n", - " running_total_long[0] = running_total_long[0]+vals.iloc[0]\n", - " running_total_long[1] = running_total_long[1]+vals.iloc[1]\n", - "\n", - "ax[0].set_title('Minipilot', fontsize=25)\n", - "ax[1].set_title('All Programs', fontsize=25)\n", - "ax[0].legend(bbox_to_anchor=(1,1), fancybox=True, shadow=True, fontsize=25)\n", - "plt.subplots_adjust(bottom=0.20)\n", - "fig.tight_layout()\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "id": "c51143bf", - "metadata": {}, - "source": [ - "## Mode Share by Program" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8a788461", - "metadata": {}, - "outputs": [], - "source": [ - "# process the data for the clustered chart\n", - "data.program.unique()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "841d2811", - "metadata": {}, - "outputs": [], - "source": [ - "mode_data = data.copy()\n", - "\n", - "#clean up the modes\n", - "mode_data.loc[mode_data['Mode_confirm']=='Personal Micromobility', 'Mode_confirm'] = 'Other'\n", - "mode_data.loc[mode_data['Mode_confirm']=='Shared Micromobility', 'Mode_confirm'] = 'Other'\n", - "\n", - "program_list = ['4c', 'cc', 'fc', 'pc', 'sc', 'vail']\n", - "work_plot_data = []\n", - "all_plot_data = []\n", - "for program in program_list:\n", - " program_data = mode_data[mode_data.program == program]\n", - "\n", - " t1 = program_data.groupby(['Mode_confirm'], as_index=False).count()[['Mode_confirm','distance_miles']]\n", - " t1[program] = (t1['distance_miles'] / np.sum(t1.distance_miles)) * 100\n", - " t1 = t1.set_index('Mode_confirm')\n", - " t1 = t1.drop(columns = ['distance_miles'])\n", - " all_plot_data.append(t1)\n", - "\n", - " t2 = program_data[program_data['Trip_purpose']=='Work'].copy()\n", - " t2 = t2.groupby(['Mode_confirm'], as_index=False).count()[['Mode_confirm','distance_miles']]\n", - " t2[program] = (t2['distance_miles'] / np.sum(t2.distance_miles)) * 100\n", - " t2 = t2.set_index('Mode_confirm')\n", - " t2 = t2.drop(columns = ['distance_miles'])\n", - " work_plot_data.append(t2)\n", - " \n", - "work_trips = pd.concat(work_plot_data, axis = 1)\n", - "all_trips = pd.concat(all_plot_data, axis = 1)\n", - "\n", - "all_trips = all_trips.transpose()\n", - "work_trips = work_trips.transpose()\n", - "\n", - "all_trips['program'] = all_trips.index\n", - "all_trips = all_trips.replace({'4c': 'Four Corners\\n(Durango)', \n", - " 'cc': 'Comunity Cycles\\n(Boulder)',\n", - " 'sc': 'Smart Commute\\n(Denver North)',\n", - " 'pc':'Pueblo',\n", - " 'vail':'Vail',\n", - " 'fc':'Fort Collins'})\n", - "all_trips = all_trips.set_index('program')\n", - "\n", - "work_trips['program'] = work_trips.index\n", - "work_trips = work_trips.replace({'4c': 'Four Corners\\n(Durango)', \n", - " 'cc': 'Comunity Cycles\\n(Boulder)',\n", - " 'sc': 'Smart Commute\\n(Denver North)',\n", - " 'pc':'Pueblo',\n", - " 'vail':'Vail',\n", - " 'fc':'Fort Collins'})\n", - "work_trips = work_trips.set_index('program')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6d32d79b", - "metadata": {}, - "outputs": [], - "source": [ - "##COLUMN ORDERS MUST MATCH OR CHART MISREPRESENTS DATA\n", - "\n", - "all_trips" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "596aa44d", - "metadata": {}, - "outputs": [], - "source": [ - "work_trips['Ridehail'] = work_trips['Ridehail'].fillna(0)\n", - "work_trips['Transit'] = work_trips['Transit'].fillna(0)\n", - "work_trips = work_trips[['Car', 'E-bike', 'Other', 'Ridehail', 'Shared Car', 'Transit', 'Walk']]\n", - "\n", - "work_trips" - ] - }, - { - "cell_type": "markdown", - "id": "5c3cb061", - "metadata": {}, - "source": [ - "### make and save the clustered chart" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1ee0bfef", - "metadata": {}, - "outputs": [], - "source": [ - "#from https://stackoverflow.com/questions/22787209/how-to-have-clusters-of-stacked-bars\n", - "import pandas as pd\n", - "import matplotlib.cm as cm\n", - "import numpy as np\n", - "import matplotlib.pyplot as plt\n", - "\n", - "def plot_clustered_stacked(dfall, labels=None, title=\"multiple stacked bar plot\", H=\"/\", **kwargs):\n", - " \"\"\"Given a list of dataframes, with identical columns and index, create a clustered stacked bar plot. \n", - "labels is a list of the names of the dataframe, used for the legend\n", - "title is a string for the title of the plot\n", - "H is the hatch used for identification of the different dataframe\"\"\"\n", - "\n", - " n_df = len(dfall)\n", - " n_col = len(dfall[0].columns) \n", - " n_ind = len(dfall[0].index)\n", - " axe = plt.subplot(111)\n", - "\n", - " for df in dfall : # for each data frame\n", - " axe = df.plot(kind=\"bar\",\n", - " stacked=True,\n", - " ax=axe,\n", - " legend=False,\n", - " grid=False,\n", - " **kwargs) # make bar plots\n", - "\n", - " h,l = axe.get_legend_handles_labels() # get the handles we want to modify\n", - " for i in range(0, n_df * n_col, n_col): # len(h) = n_col * n_df\n", - " for j, pa in enumerate(h[i:i+n_col]):\n", - " for rect in pa.patches: # for each index\n", - " rect.set_x(rect.get_x() + 1 / float(n_df + 1) * i / float(n_col))\n", - " rect.set_hatch(H * int(i / n_col)) #edited part \n", - " rect.set_width(1 / float(n_df + 1))\n", - "\n", - " axe.set_xticks((np.arange(0, 2 * n_ind, 2) + 1 / float(n_df + 1)) / 2.)\n", - " axe.set_xticklabels(df.index, rotation = 0)\n", - " axe.set_title(title, fontsize = 22)\n", - "\n", - " # Add invisible data to add another legend\n", - " n=[] \n", - " for i in range(n_df):\n", - " n.append(axe.bar(0, 0, color=\"gray\", hatch=H * i))\n", - "\n", - " l1 = axe.legend(h[:n_col], l[:n_col], loc=[1.01, 0.5])\n", - " if labels is not None:\n", - " l2 = plt.legend(n, labels, loc=[1.01, 0.1]) \n", - " axe.add_artist(l1)\n", - " return axe" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "692c048d", - "metadata": {}, - "outputs": [], - "source": [ - "# Then, just call :\n", - "ax = plot_clustered_stacked([all_trips, work_trips],[\"All Trips\", \"Work Trips\"], title = \"Mode Share by Program\")\n", - "\n", - "for c in ax.containers:\n", - " labels = [f'{round(v.get_height())}' if v.get_height() > 5 else '' for v in c]\n", - " ax.bar_label(c, labels=labels, label_type='center')\n", - " \n", - "ax.set_xticklabels(all_trips.index, rotation=45, ha='right', fontsize=14)\n", - "\n", - "ax.set_xlabel('Program', fontsize = 18)\n", - "ax.set_ylabel('Proportion of Total Trip Count (%)', fontsize = 18)\n", - "\n", - "plt.savefig(\"CanBikeCO_report_mode_share_overview.jpeg\", bbox_inches='tight')" - ] - }, - { - "cell_type": "markdown", - "id": "d9cf319f", - "metadata": {}, - "source": [ - "## Trip Purpose Mini vs Full" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7ab63059", - "metadata": {}, - "outputs": [], - "source": [ - "#arrange the data\n", - "full_purpose_data = data.copy()\n", - "mini_purpose_data = MINI_DATA.copy()\n", - "\n", - "#MINI DATA\n", - "mini_data = mini_purpose_data.copy()\n", - "mini_data = mini_data[~mini_data['Trip_purpose'].isin(['No travel'])]\n", - "mini_data.loc[mini_data['Trip_purpose']=='Religious', 'Trip_purpose'] = 'Other'\n", - "mini_data.loc[mini_data['Trip_purpose']=='School', 'Trip_purpose'] = 'Other'\n", - "\n", - "t1 = mini_data.groupby(['Trip_purpose'], as_index=False).count()[['Trip_purpose','distance_miles']]\n", - "t1['proportion'] = t1['distance_miles'] / np.sum(t1.distance_miles)\n", - "t1['trip type'] = 'All Trips'\n", - "\n", - "t2 = mini_data[mini_data['Mode_confirm']=='E-bike'].copy()\n", - "t2 = t2.groupby(['Trip_purpose'], as_index=False).count()[['Trip_purpose','distance_miles']]\n", - "t2['proportion'] = t2['distance_miles'] / np.sum(t2.distance_miles)\n", - "t2['trip type'] = 'E-Bike Trips'\n", - "\n", - "mini_data = pd.concat([t1,t2])\n", - "mini_data['Dataset'] = 'Minipilot'\n", - "mini_data.columns = ['Purpose','Count','Proportion','Trip Type', \"Dataset\"]\n", - "\n", - "#FULL DATA\n", - "plot_data = full_purpose_data.copy()\n", - "plot_data.loc[plot_data['Trip_purpose']=='Religious', 'Trip_purpose'] = 'Other'\n", - "plot_data.loc[plot_data['Trip_purpose']=='School', 'Trip_purpose'] = 'Other'\n", - "t1 = plot_data.groupby(['Trip_purpose'], as_index=False).count()[['Trip_purpose','distance_miles']]\n", - "t1['proportion'] = t1['distance_miles'] / np.sum(t1.distance_miles)\n", - "t1['trip type'] = 'All Trips'\n", - "t1.loc[len(t1.index)] = ['Pick-up/Drop off', 0, 0, 'All Trips']\n", - "\n", - "t2 = plot_data[plot_data['Mode_confirm']=='E-bike'].copy()\n", - "t2 = t2.groupby(['Trip_purpose'], as_index=False).count()[['Trip_purpose','distance_miles']]\n", - "t2['proportion'] = t2['distance_miles'] / np.sum(t2.distance_miles)\n", - "t2['trip type'] = 'E-Bike Trips'\n", - "t2.loc[len(t2.index)] = ['Pick-up/Drop off', 0, 0, 'E-Bike Trips']\n", - "\n", - "plot_data = pd.concat([t1,t2])\n", - "plot_data['Dataset'] = 'Long Term'\n", - "plot_data.columns = ['Purpose','Count','Proportion','Trip Type', \"Dataset\"]\n", - "\n", - "plot_data = pd.concat([plot_data, mini_data])\n", - "# plot_data" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "faa4c5f9", - "metadata": {}, - "outputs": [], - "source": [ - "# create the chart\n", - "\n", - "width = 0.8\n", - "fig, ax = plt.subplots(2,1, figsize=(20,10))\n", - "plt.rcParams.update({'font.size': 25}) \n", - "running_total_mini = [0,0]\n", - "running_total_long = [0,0]\n", - "fig_data_mini = plot_data[plot_data['Dataset']=='Minipilot']\n", - "fig_data_long = plot_data[plot_data['Dataset']=='Long Term']\n", - "\n", - "for purp in pd.unique(fig_data_mini.Purpose):\n", - " mini = fig_data_mini[fig_data_mini['Purpose']==purp]\n", - " long = fig_data_long[fig_data_long['Purpose']==purp]\n", - " \n", - " labels = mini['Trip Type']\n", - " vals = mini['Proportion']*100\n", - " bar_labels = mini['Count']\n", - " vals_str = [f'{y:.1f} %\\n({x:,})' if y>5 else '' for x, y in zip(bar_labels, vals)]\n", - " bar = ax[0].barh(labels, vals, width, left=running_total_mini, label=purp)\n", - " ax[0].bar_label(bar, label_type='center', labels=vals_str, rotation=90, fontsize=20)\n", - " running_total_mini[0] = running_total_mini[0]+vals.iloc[0]\n", - " running_total_mini[1] = running_total_mini[1]+vals.iloc[1]\n", - "\n", - " labels = long['Trip Type']\n", - " vals = long['Proportion']*100\n", - " bar_labels = long['Count']\n", - " vals_str = [f'{y:.1f} %\\n({x:,})' if y>5 else '' for x, y in zip(bar_labels, vals)]\n", - " bar = ax[1].barh(labels, vals, width, left=running_total_long, label=purp)\n", - " ax[1].bar_label(bar, label_type='center', labels=vals_str, rotation=90, fontsize=20)\n", - " running_total_long[0] = running_total_long[0]+vals.iloc[0]\n", - " running_total_long[1] = running_total_long[1]+vals.iloc[1]\n", - "\n", - "ax[0].set_title('Minipilot', fontsize=25)\n", - "ax[1].set_title('All Programs', fontsize=25)\n", - "ax[0].legend(bbox_to_anchor=(1,1), fancybox=True, shadow=True, fontsize=20)\n", - "plt.subplots_adjust(bottom=0.20)\n", - "fig.tight_layout()\n", - "plt.savefig(\"CanBikeCO_report_purp_share.jpeg\")\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "id": "78aa120b", - "metadata": {}, - "source": [ - "## Show what purposes diffrent programs used the e-bikes for" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "15605d79", - "metadata": {}, - "outputs": [], - "source": [ - "full_purpose_data = data.copy()\n", - "e_purpose = full_purpose_data[full_purpose_data.Mode_confirm == 'E-bike']\n", - "\n", - "e_purpose.loc[e_purpose['Trip_purpose']=='Religious', 'Trip_purpose'] = 'Other'\n", - "e_purpose.loc[e_purpose['Trip_purpose']=='School', 'Trip_purpose'] = 'Other'\n", - "\n", - "program_list = ['4c', 'cc', 'fc', 'pc', 'sc', 'vail']\n", - "all_plot_data = []\n", - "for program in program_list:\n", - " program_data = e_purpose[mode_data.program == program]\n", - "\n", - " t1 = program_data.groupby(['Trip_purpose'], as_index=False).sum()[['Trip_purpose','distance_miles']]\n", - " t1['distance_miles'].fillna(0, inplace=True)\n", - " t1[program] = (t1['distance_miles'] / np.sum(t1.distance_miles)) * 100\n", - " t1 = t1.set_index('Trip_purpose')\n", - " t1 = t1.drop(columns = ['distance_miles'])\n", - " all_plot_data.append(t1)\n", - " \n", - "all_trips = pd.concat(all_plot_data, axis = 1)\n", - "\n", - "all_trips = all_trips.transpose()\n", - "\n", - "all_trips['program'] = all_trips.index\n", - "all_trips = all_trips.replace({'4c': 'Four Corners\\n(Durango)', \n", - " 'cc': 'Comunity Cycles\\n(Boulder)',\n", - " 'sc': 'Smart Commute\\n(Denver North)',\n", - " 'pc':'Pueblo',\n", - " 'vail':'Vail',\n", - " 'fc':'Fort Collins'})\n", - "all_trips = all_trips.set_index('program')\n", - "# all_trips = all_trips.drop(columns=['Trip_purpose'])\n", - "\n", - "all_trips" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f72e1e0b", - "metadata": {}, - "outputs": [], - "source": [ - "#https://stackoverflow.com/questions/41296313/stacked-bar-chart-with-centered-labels\n", - "ax = all_trips.plot.bar(stacked=True, \n", - " title=\"E-bike Purpose Share by Program\", \n", - " ylabel=\"Proportion E-bike Mileage (%)\",\n", - " xlabel = \"Program\",\n", - " rot = 45)\n", - "\n", - "for c in ax.containers:\n", - " labels = [f'{round(v.get_height())}' if v.get_height() > 5.5 else '' for v in c]\n", - " ax.bar_label(c, labels=labels, label_type='center')\n", - "\n", - "ax.set_xticklabels(all_trips.index, rotation=45, ha='right')\n", - "ax.legend(bbox_to_anchor=(1,1), fancybox=True, shadow=True, fontsize=20)\n", - "\n", - "plt.savefig(\"CanBikeCO_report_ebike_programs_purp_share.jpeg\", bbox_inches='tight')\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "92b26796", - "metadata": {}, - "outputs": [], - "source": [ - "## figure 10 replacement\n", - "#want a bar chart with age bins on the x and %of mode share on the y\n", - "\n", - "#so need mode share by age\n", - "#have done mode share by program\n", - "\n", - "mode_data = data.copy() #complete set of cleaned data\n", - "# mode_data.loc[mode_data['Mode_confirm']=='Personal Micromobility', 'Mode_confirm'] = 'Other'\n", - "# mode_data.loc[mode_data['Mode_confirm']=='Shared Micromobility', 'Mode_confirm'] = 'Other'\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4bd2537f", - "metadata": {}, - "outputs": [], - "source": [ - "bins = [0, 25, 50, 75]\n", - "mode_data['age_bin'] = pd.cut(mode_data['AGE'], bins)\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "0f93b58b", - "metadata": {}, - "outputs": [], - "source": [ - "age_modes = mode_data.groupby(['age_bin', 'Mode_confirm'], as_index=False).count()[['age_bin', 'Mode_confirm','distance_miles']]\n", - "age_modes['proportion'] = age_modes['distance_miles'] / np.sum(age_modes.distance_miles)\n", - "\n", - "list_age_modes = []\n", - "for age_bin in age_modes.age_bin.unique():\n", - " age_data = mode_data[mode_data['age_bin'] == age_bin]\n", - "\n", - " t1 = age_data.groupby(['Mode_confirm'], as_index=False).count()[['Mode_confirm','distance_miles']]\n", - " t1['distance_miles'].fillna(0, inplace=True)\n", - " t1[age_bin] = (t1['distance_miles'] / np.sum(t1.distance_miles)) * 100\n", - " t1 = t1.set_index('Mode_confirm')\n", - " t1 = t1.drop(columns = ['distance_miles'])\n", - " list_age_modes.append(t1)\n", - " \n", - "age_modes = pd.concat(list_age_modes, axis = 1)\n", - "\n", - "age_modes = age_modes.transpose()\n", - "\n", - "age_modes" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7fac68c6", - "metadata": {}, - "outputs": [], - "source": [ - "#https://stackoverflow.com/questions/41296313/stacked-bar-chart-with-centered-labels\n", - "ax = age_modes.plot.bar(stacked=True, \n", - " title=\"Mode Share by Age\", \n", - " ylabel=\"Proportion of Total Trip Count (%)\",\n", - " xlabel = \"Participant Age\",\n", - " rot = 0)\n", - "\n", - "for c in ax.containers:\n", - " labels = [f'{round(v.get_height())}' if v.get_height() > 5.5 else '' for v in c]\n", - " ax.bar_label(c, labels=labels, label_type='center')\n", - "\n", - "# ax.set_xticklabels(age_modes.index, rotation=45, ha='right')\n", - "ax.legend(bbox_to_anchor=(1,1), fancybox=True, shadow=True, fontsize=20)\n", - "\n", - "plt.savefig(\"CanBikeCO_report_age_mode_share.jpeg\", bbox_inches='tight')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8f70e8f9", - "metadata": {}, - "outputs": [], - "source": [ - "expanded_ct = pd.read_csv('expanded_ct.csv')\n", - "\n", - "len(expanded_ct)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1d424645", - "metadata": {}, - "outputs": [], - "source": [ - "# Distribution of distances by program\n", - "plot_data = data.copy()\n", - "plot_data = plot_data[plot_data['Mode_confirm']=='E-bike']\n", - "plot_data['Program'] = plot_data['program'].replace(['4c','cc','fc','pc','sc','vail'],['Four Corners','Community Cycles\\n(Boulder)','Fort Collins','Pueblo','Smart Commute\\n(Denver North)','Vail'])\n", - "\n", - "plot_title = 'Distribution of E-Bike Trip Distances by Program'\n", - "ylab = 'Distance (miles)'\n", - "\n", - "fig, ax = plt.subplots(figsize=(10,8))\n", - "sns.boxplot(ax=ax, data=plot_data, x='Program', y='distance_miles', hue='Mode_confirm', showfliers=False).set(title=plot_title, xlabel='', ylabel=ylab)\n", - "plt.subplots_adjust(bottom=0.25)\n", - "plt.xticks(rotation=35, ha='right', fontsize=14)\n", - "plt.yticks(fontsize=14)\n", - "plt.legend([])\n", - "\n", - "# Calculate number of obs per group & median to position labels\n", - "medians = plot_data.groupby(['Program'])['distance_miles'].median().values\n", - "nobs = plot_data['Program'].value_counts().values\n", - "nobs = [str(x) for x in nobs.tolist()]\n", - "nobs = [\"n: \" + i for i in nobs]\n", - " \n", - "# Add it to the plot\n", - "pos = range(len(nobs))\n", - "for tick,label in zip(pos,ax.get_xticklabels()):\n", - " ax.text(pos[tick],\n", - " medians[tick] + 0.03,\n", - " nobs[tick],\n", - " horizontalalignment='center',\n", - " size='12',\n", - " color='w',\n", - " weight='semibold')\n", - " \n", - "plt.savefig(\"CanBikeCO_report_e-bike_miles_dist.jpeg\", bbox_inches='tight')" - ] - }, - { - "cell_type": "markdown", - "id": "f8e7f9de", - "metadata": {}, - "source": [ - "## E-bike trips across occuptations" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1100f0ba", - "metadata": {}, - "outputs": [], - "source": [ - "data = data.copy()\n", - "data['occupation_cat'] = data['Which best describes your primary job?'].replace(['Sales or service',\n", - " 'Manufacturing, construction, maintenance, or farming', \n", - " 'Janitorial',\n", - " 'Professional, managerial, or technical',\n", - " 'Clerical or administrative support',\n", - " 'Teacher',\n", - " 'Medical',\n", - " 'CNA',\n", - " 'Restaurant manager',\n", - " 'Co op laundry',\n", - " 'Cook',\n", - " 'Nurse',\n", - " 'Dining Services',\n", - " 'Security',\n", - " 'Food service',\n", - " 'Csu custodian',\n", - " 'Residential Dining Services',\n", - " 'education/early childhood',\n", - " 'Inbound cs',\n", - " 'Custodial Maintanace',\n", - " 'Amazon',\n", - " 'Custodian',\n", - " 'Hockey rink',\n", - " 'Pastry chef and line cook', \n", - " 'Cooking',\n", - " 'Education non-profit manager',\n", - " 'Healthcare',\n", - " 'Chef',\n", - " 'Accounting Technician',\n", - " 'Caregiver/ Qmap',\n", - " 'Caregiver',\n", - " 'Health care',\n", - " 'Medical field'],\n", - " ['Sales or Service',\n", - " 'Manufacturing, Construction, Maintenance, or Farming',\n", - " 'Custodial',\n", - " 'Professional, Managerial, or Technical',\n", - " 'Clerical or Administrative Support',\n", - " 'Education',\n", - " 'Medical/Healthcare',\n", - " 'Medical/Healthcare',\n", - " 'Professional, Managerial, or Technical',\n", - " 'Sales or Service',\n", - " 'Sales or Service',\n", - " 'Medical/Healthcare',\n", - " 'Sales or Service',\n", - " 'Professional, Managerial, or Technical',\n", - " 'Sales or Service',\n", - " 'Custodial',\n", - " 'Sales or Service',\n", - " 'Education',\n", - " 'Professional, Managerial, or Technical',\n", - " 'Custodial',\n", - " 'Sales or Service',\n", - " 'Custodial',\n", - " 'Sales or Service',\n", - " 'Sales or Service',\n", - " 'Sales or Service',\n", - " 'Education',\n", - " 'Medical/Healthcare',\n", - " 'Sales or Service',\n", - " 'Professional, Managerial, or Technical',\n", - " 'Medical/Healthcare',\n", - " 'Medical/Healthcare',\n", - " 'Medical/Healthcare',\n", - " 'Medical/Healthcare'])\n", - "data['occupation_cat'].unique()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "ac38a650", - "metadata": {}, - "outputs": [], - "source": [ - "data['occupation_cat']= data['occupation_cat'].replace(['Food Service', 'Cooking ', 'Accounting Technician ','Education ',\n", - " 'Csu custodian ','Custodial ','Maintenance ','Maintenance','Janitorial ',\n", - " 'Amazon ', 'Custodial Maintanace ', 'Hockey rink '],\n", - " ['Sales or Service' , 'Sales or Service', 'Professional, Managerial, or Technical',\n", - " 'Education', 'Custodial', 'Custodial', 'Custodial', 'Custodial', 'Custodial',\n", - " 'Sales or Service','Custodial' ,'Sales or Service'])\n", - "\n", - "data['occupation_cat'] = data['occupation_cat'].replace(['Manufacturing, Construction, Maintenance, or Farming', 'Professional, Managerial, or Technical', 'Clerical or Administrative Support'],\n", - " ['Manufacturing, Construction,\\nMaintenance, or Farming', 'Professional, Managerial,\\nor Technical', 'Clerical or\\nAdministrative Support'])\n", - "data['occupation_cat'].unique()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d42afa3c", - "metadata": {}, - "outputs": [], - "source": [ - "plot_data_1=data[data['occupation_cat'].notnull()]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "eedd817f", - "metadata": {}, - "outputs": [], - "source": [ - "# proportion of trips by occupation\n", - "plot_data = plot_data_1.copy()\n", - "\n", - "t1 = plot_data.groupby(['user_id','Mode_confirm'], as_index=False).count()[['user_id','Mode_confirm','distance_miles']]\n", - "t1['distance_miles'].fillna(0, inplace=True)\n", - "t2 = plot_data.groupby(['user_id'], as_index=False).count()[['user_id','distance_miles']]\n", - "plot_data = t1.merge(t2, on='user_id')\n", - "plot_data['proportion'] = plot_data['distance_miles_x'] / plot_data['distance_miles_y']\n", - "plot_data['proportion'].fillna(0, inplace=True)\n", - "t3 = data.copy().groupby(['occupation_cat','user_id'], as_index=False).nth(0)[['occupation_cat','user_id']]\n", - "\n", - "plot_data = plot_data[plot_data['Mode_confirm']=='E-bike']\n", - "plot_data = plot_data.merge(t3, on='user_id')\n", - "\n", - "plot_title = 'E-bike Use (Trips) by Occupation Categories'\n", - "ylab='Occupation Category'\n", - "fig, ax = plt.subplots(figsize=(8,6))\n", - "sns.barplot(data= plot_data, x='proportion' , y='occupation_cat', estimator=np.mean).set(title=plot_title,xlabel='Proportion of Total Trips',ylabel=ylab)\n", - "plt.xticks(rotation=35, ha='right')\n", - "plt.subplots_adjust(bottom=0.25)\n", - "\n", - "plt.savefig(\"CanBikeCO_report_occ_ebike_trips.jpeg\", bbox_inches='tight')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1dcd5db7", - "metadata": {}, - "outputs": [], - "source": [ - "data['induced']=np.where(data['Replaced_mode']=='No Travel', 'Induced', 'Non-induced')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "39739e67", - "metadata": {}, - "outputs": [], - "source": [ - "data['Program'] = data['program'].replace(['4c','cc','fc','pc','sc','vail'],['Four Corners (Durango)','Community Cycles (Boulder)','Fort Collins','Pueblo County','Smart Commute (Denver North)','Vail'])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b8422b27", - "metadata": {}, - "outputs": [], - "source": [ - "# proportion of induced trips by occupation\n", - "plot_data = data.copy()\n", - "plot_data = plot_data[(plot_data['induced']=='Induced') & (plot_data['Trip_purpose']=='Work')]\n", - "\n", - "t1 = plot_data.groupby(['user_id','Mode_confirm'], as_index=False).count()[['user_id','Mode_confirm','distance_miles']]\n", - "t1['distance_miles'].fillna(0, inplace=True)\n", - "t2 = plot_data.groupby(['user_id'], as_index=False).count()[['user_id','distance_miles']]\n", - "plot_data = t1.merge(t2, on='user_id')\n", - "plot_data['proportion'] = plot_data['distance_miles_x'] / plot_data['distance_miles_y']\n", - "plot_data['proportion'].fillna(0, inplace=True)\n", - "t3 = data.copy().groupby(['occupation_cat','user_id'], as_index=False).nth(0)[['occupation_cat','user_id']]\n", - "\n", - "plot_data = plot_data[plot_data['Mode_confirm']=='E-bike']\n", - "plot_data = plot_data.merge(t3, on='user_id')\n", - "\n", - "plot_title = 'Induced Work E-bike Trips by Occupation Categories'\n", - "ylab='Proportion of Total Trips'\n", - "fig, ax = plt.subplots(figsize=(8,6))\n", - "sns.barplot(data= plot_data, x='proportion' , y='occupation_cat', estimator=np.mean).set(title=plot_title,xlabel=ylab,ylabel='Occupation Category')\n", - "plt.xticks(rotation=35, ha='right', fontsize=14)\n", - "plt.yticks(fontsize=14)\n", - "plt.subplots_adjust(bottom=0.25)\n", - "\n", - "plt.savefig(\"CanBikeCO_report_occ_induced_ebike_trips.jpeg\", bbox_inches='tight')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5c390f1e", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.18" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/viz_scripts/PaperVizualizations/Abby/CanBikeCO_DataFiltering.ipynb b/viz_scripts/PaperVizualizations/Abby/CanBikeCO_DataFiltering.ipynb deleted file mode 100644 index 0fcc8221..00000000 --- a/viz_scripts/PaperVizualizations/Abby/CanBikeCO_DataFiltering.ipynb +++ /dev/null @@ -1,679 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "e9684305", - "metadata": {}, - "source": [ - "# Can Bike CO Data Cleaning\n", - "\n", - "This notebook performs the data treatments we outline in the paper, narrowing the dataset down from it's original size to the 122 users and 61,496 trips we analyze in the paper. The csv from TSDC can be inputted to the notebook as \"trip_program.csv\" and will be transformed and saved as \"filtered_and_merged_trips.csv\". This filtered and merged file is what can be used as input to the analysis notebook. " - ] - }, - { - "cell_type": "markdown", - "id": "7aee77d3", - "metadata": {}, - "source": [ - "## Setup: dependencies and reading in raw file" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6139da5c", - "metadata": {}, - "outputs": [], - "source": [ - "#dependencies\n", - "\n", - "import pandas as pd\n", - "import numpy as np\n", - "\n", - "from collections import defaultdict" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "612c368d", - "metadata": {}, - "outputs": [], - "source": [ - "# Loading mapping dictionaries from mapping_dictionaries notebook\n", - "%store -r df_ei\n", - "%store -r dic_re\n", - "%store -r dic_pur\n", - "%store -r dic_fuel\n", - "\n", - "# convert a dictionary to a defaultdict\n", - "dic_re = defaultdict(lambda: 'Other',dic_re)\n", - "dic_pur = defaultdict(lambda: 'Other',dic_pur)\n", - "dic_fuel = defaultdict(lambda: 'Other',dic_fuel)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b1606878", - "metadata": {}, - "outputs": [], - "source": [ - "#read in the raw csv -- all labeled trips?\n", - "data = pd.read_csv(\"trip_program.csv\")\n", - "print(len(data)) #92395\n", - "data['user_id'].nunique() #235" - ] - }, - { - "cell_type": "markdown", - "id": "fc47f7c2", - "metadata": {}, - "source": [ - "## Filter out stage & minipilot" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7c7d1f0f", - "metadata": {}, - "outputs": [], - "source": [ - "#count the stage and mini users\n", - "stage_data = data[data.program=='stage']\n", - "print(stage_data.user_id.nunique())\n", - "print(len(stage_data))\n", - "\n", - "mini_data = data[data.program=='prepilot']\n", - "print(mini_data.user_id.nunique())\n", - "print(len(mini_data))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "ab83b18e", - "metadata": {}, - "outputs": [], - "source": [ - "#filter out stage and mini pilot users and trips from the full trip dataset\n", - "data_non_stage = data[data.program != \"stage\"]\n", - "print(len(data_non_stage))\n", - "data_non_stage_non_mini = data_non_stage[data_non_stage.program != \"prepilot\"]\n", - "print(len(data_non_stage_non_mini))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "83703424", - "metadata": {}, - "outputs": [], - "source": [ - "data_non_stage_non_mini['user_id'].nunique()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c254747b", - "metadata": {}, - "outputs": [], - "source": [ - "data_non_stage_non_mini['program'].unique()" - ] - }, - { - "cell_type": "markdown", - "id": "5821fcbe", - "metadata": {}, - "source": [ - "## Merge with the Sociodemographic Data" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "79f074c1", - "metadata": {}, - "outputs": [], - "source": [ - "expanded_ct = data.copy()\n", - "socio_data = pd.read_csv(\"Can Do Colorado eBike Program - en.csv\")\n", - "socio_data.rename(columns={'Unique User ID (auto-filled, do not edit)':'user_id',\n", - " 'Please identify which category represents your total household income, before taxes, for last year.':'HHINC',\n", - " 'How many motor vehicles are owned, leased, or available for regular use by the people who currently live in your household?':'VEH',\n", - " 'In which year were you born?':'AGE',\n", - " 'Including yourself, how many people live in your home?':'HHSIZE',\n", - " 'How many children under age 18 live in your home?':'CHILDREN',\n", - " 'What is your gender?':'GENDER',\n", - " 'If you were unable to use your household vehicle(s), which of the following options would be available to you to get you from place to place?':'available_modes',\n", - " 'Are you a student?':'STUDENT'}, inplace=True)\n", - "socio_data = socio_data[~socio_data.user_id.isnull()]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "209a277b", - "metadata": {}, - "outputs": [], - "source": [ - "# socio_data.head() #use to check the data" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "ffc7e547", - "metadata": {}, - "outputs": [], - "source": [ - "# Deal with people who have multiple responses by using most recent\n", - "socio_data = socio_data.sort_values(by=['user_id', 'Timestamp'])\n", - "socio_data.drop_duplicates(subset=['user_id'], keep='last', inplace=True)\n", - "socio_data['user_id_socio'] = socio_data.user_id\n", - "socio_data = socio_data.drop(labels='user_id', axis=1)\n", - "\n", - "# socio_data.head()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "66494032", - "metadata": {}, - "outputs": [], - "source": [ - "# Lose some trips due to people with no survey responses\n", - "expanded_ct['user_id_socio'] = expanded_ct.user_id.astype(str)\n", - "expanded_ct.user_id_socio = [i.replace('-','') for i in expanded_ct.user_id_socio] # remove all dashes from strings\n", - "expanded_ct = expanded_ct.merge(socio_data, on='user_id_socio')\n", - "print(len(expanded_ct))\n", - "\n", - "\n", - "# expanded_ct.head()\n", - "#optionally save the csv at this point\n", - "#expanded_ct.to_csv(\"expanded_ct.csv\")" - ] - }, - { - "cell_type": "markdown", - "id": "33602cdc", - "metadata": {}, - "source": [ - "## Filtering the whole dataset" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "0eb32fa4", - "metadata": {}, - "outputs": [], - "source": [ - "# Optionally initialize from saved dataset\n", - "#data = pd.read_csv(\"expanded_ct.csv\")\n", - "# Or continue from data loaded from the database\n", - "data = expanded_ct.copy()\n", - "\n", - "# Get timestamp from known year/month/day aggregated to days\n", - "data.rename(columns={'start_local_dt_year':'year','start_local_dt_month':'month','start_local_dt_day':'day'}, inplace=True)\n", - "data['date_time'] = pd.to_datetime(data[['year','month','day']])\n", - "\n", - "# Fix age (birth year to age)\n", - "data['AGE'] = 2022 - data['AGE']\n", - "\n", - "# Number of workers (size of HH - kids)\n", - "data['WORKERS'] = data['HHSIZE'] - data['CHILDREN']\n", - "\n", - "# Duration in minutes (hours to minutes)\n", - "data['duration'] = data['duration'] / 60\n", - "\n", - "# E-bike/not E-Bike variable\n", - "data['is_ebike'] = \"E-Bike Trips\"\n", - "data.loc[data['Mode_confirm']!=\"E-bike\", 'is_ebike'] = \"Non E-Bike Trips\"\n", - "\n", - "data = data[~data['HHINC'].isin(['Prefer not to say', '$150,000'])] # Side note why is 150k (n=7) its own bin?\n", - "data['HHINC_NUM'] = data.HHINC.replace(['Less than $24,999',\n", - " '$25,000-$49,999',\n", - " '$50,000-$99,999',\n", - " '$100,000 -$149,999',\n", - " '$150,000-$199,999',\n", - " '$200,000 or more'], [12500,37500,75000,125000,175000,250000])\n", - "\n", - "# Calculate average income per adult in the household\n", - "data['PINC'] = data['HHINC_NUM'] / data['WORKERS']\n", - "\n", - "# Combine variable categories\n", - "data = data.replace('Gas Car, drove alone', 'Car')\n", - "data = data.replace('Gas Car, with others', 'Shared Car')\n", - "data = data.replace('Bikeshare', 'Shared Micromobility')\n", - "data = data.replace('Scooter share', 'Shared Micromobility')\n", - "data = data.replace('Regular Bike', 'Personal Micromobility')\n", - "data = data.replace('Skate board', 'Personal Micromobility')\n", - "data = data.replace('Train', 'Transit')\n", - "data = data.replace('Free Shuttle', 'Transit')\n", - "data = data.replace('Bus', 'Transit')\n", - "data = data.replace('Walk', 'Walk')\n", - "data = data.replace('Taxi/Uber/Lyft', 'Ridehail')\n", - "data = data.replace('Pilot ebike', 'E-Bike')\n", - "\n", - "# Categorical type will include all days/modes in groupby even if there is no data for a particular tabulation\n", - "data.user_id = pd.Categorical(data.user_id)\n", - "data.date_time = pd.Categorical(data.date_time)\n", - "data.mode_confirm = pd.Categorical(data.mode_confirm, ordered=True, categories=np.unique(list(dic_re.keys())))\n", - "\n", - "# Add order to categorical variables\n", - "data.HHINC = pd.Categorical(data.HHINC, ordered=True, categories=['Less than $24,999',\n", - " '$25,000-$49,999',\n", - " '$50,000-$99,999'])\n", - "data['Mode'] = pd.Categorical(data.Mode_confirm, ordered=True, categories=[\n", - " 'E-bike',\n", - " 'Car',\n", - " 'Shared Car',\n", - " 'Walk',\n", - " 'Transit',\n", - " 'Personal Micromobility',\n", - " 'Shared Micromobility',\n", - " 'Ridehail',\n", - " 'Other'])\n", - "data.VEH = pd.Categorical(data.VEH, ordered=True, categories=['0','1','2','3','4+'])\n", - "data['PINC_NUM'] = data['PINC']\n", - "data.PINC = pd.cut(data.PINC, bins=[0,10000,20000,30000,40000,50000,60000,70000,999999],\n", - " labels=[\"$0-9\",\n", - " \"$10-19\",\n", - " \"$20-29\",\n", - " \"$30-39\",\n", - " \"$40-49\",\n", - " \"$50-59\",\n", - " \"$60-69\",\n", - " \"$70+\"])\n", - "\n", - "# Vehicles per driver\n", - "data['VEH_num'] = data['VEH'].replace(['1','2','3','4+'],[1,2,3,4]).astype(int)\n", - "data['DRIVERS'] = data[\"Including yourself, how many people have a driver's license in your household?\"]\n", - "data['DRIVERS_num'] = data['DRIVERS'].replace\n", - "data['veh_per_driver'] = (data['VEH_num'] / data['DRIVERS']).fillna(0)\n", - "data.loc[data['veh_per_driver']==np.inf, 'veh_per_driver'] = 0\n", - "\n", - "print(len(data))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "57a92f6a", - "metadata": {}, - "outputs": [], - "source": [ - "#filtered out ages that were greater than 100\n", - "data = data[data['AGE'] < 100]\n", - "#filter out durations longer than 8 hours\n", - "data = data[data['duration']<480]\n", - "#distances more than 50 miles \n", - "data = data[data['distance_miles']<50]\n", - "#records that had ’prefer not to say’ as a response for household income, household vehicles, and other available modes\n", - "data = data[~data['HHINC'].isin(['Prefer not to say','$100,000 -$149,999','$150,000','$150,000-$199,999','$200,000 or more'])] # Side note why is 150k (n=7) its own bin?\n", - "data = data[~data['VEH'].isin(['Prefer not to say / Prefiero no decir.'])]\n", - "data = data[~data['available_modes'].isin(['None', 'Prefer not to say'])]\n", - "\n", - "#filter household sizes smaller than the number of kids\n", - "data = data[data['HHSIZE']>data['CHILDREN']]\n", - "#filter out households greater than 10\n", - "data = data[data['HHSIZE']<10]\n", - "\n", - "print(len(data))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "043281f6", - "metadata": {}, - "outputs": [], - "source": [ - "#filter out 'not a trip' trips\n", - "data = data[~data['Mode_confirm'].isin(['Not a Trip'])]\n", - "data = data[~data['Replaced_mode'].isin(['Not a Trip'])]\n", - "data = data[~data['Trip_purpose'].isin(['not_a_trip'])]\n", - "\n", - "print(len(data))\n", - "\n", - "a = data[data['AGE']>100]\n", - "print(len(a)) #should be 0" - ] - }, - { - "cell_type": "markdown", - "id": "b6bec354", - "metadata": {}, - "source": [ - "## Filter out trips prior to user's 1st Ebike Trip - done by program\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5f694b05", - "metadata": {}, - "outputs": [], - "source": [ - "#separating programs\n", - "four_corners = data[data.program == \"4c\"]\n", - "community_cycles = data[data.program == \"cc\"]\n", - "fort_collins = data[data.program == \"fc\"]\n", - "pueblo = data[data.program == \"pc\"]\n", - "smart_commute = data[data.program == \"sc\"]\n", - "vail = data[data.program == \"vail\"]\n", - "\n", - "print(four_corners['user_id'].nunique())\n", - "print(community_cycles['user_id'].nunique())\n", - "print(fort_collins['user_id'].nunique())\n", - "print(pueblo['user_id'].nunique())\n", - "print(smart_commute['user_id'].nunique())\n", - "print(vail['user_id'].nunique())\n", - "\n", - "print(len(four_corners))\n", - "print(len(community_cycles))\n", - "print(len(fort_collins))\n", - "print(len(pueblo))\n", - "print(len(smart_commute))\n", - "print(len(vail))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a9733292", - "metadata": {}, - "outputs": [], - "source": [ - "#filtering each of them\n", - "from datetime import datetime" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "0c425ea7", - "metadata": {}, - "outputs": [], - "source": [ - "#smart commute filtering\n", - "\n", - "#timestamp conversion\n", - "smart_commute['start_ts']= pd.to_datetime(smart_commute['start_ts'], utc=True, unit='s')\n", - "\n", - "#grouping, counting unique users\n", - "trip_sep=smart_commute.groupby(['user_id','Mode_confirm']).apply(lambda x:x[x.start_ts==min(x.start_ts)])\n", - "print(trip_sep['user_id'].nunique())\n", - "\n", - "#consider only trips with E-bike (to get first e-bike trip)\n", - "sc_ebike_first=trip_sep[trip_sep['Mode_confirm']=='E-bike']\n", - "\n", - "#get all the trips by ysers who ever had an e-bike trip\n", - "sc_ebike_user_list= sc_ebike_first['user_id'].tolist()\n", - "smart_commute_incl_ebike = smart_commute[smart_commute['user_id'].isin(sc_ebike_user_list)]\n", - "print(smart_commute_incl_ebike['user_id'].nunique())\n", - "\n", - "#filter to the earliest ebike trip\n", - "for unique_id in sc_ebike_first['user_id']:\n", - " for date in sc_ebike_first['start_ts']:\n", - " smart_commute_ebike_first=smart_commute_incl_ebike[(smart_commute_incl_ebike['start_ts'] >= date)]\n", - "\n", - "sc_unique_ebikefirst=smart_commute_ebike_first['user_id'].unique()\n", - "print(smart_commute_ebike_first['user_id'].nunique()) #11" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7ff39352", - "metadata": {}, - "outputs": [], - "source": [ - "#filter four corners\n", - "four_corners['start_ts']= pd.to_datetime(four_corners['start_ts'], utc=True, unit='s')\n", - "\n", - "trip_sep_fc=four_corners.groupby(['user_id','Mode_confirm']).apply(lambda x:x[x.start_ts==min(x.start_ts)])\n", - "print(trip_sep_fc['user_id'].nunique())\n", - "\n", - "fc_ebike_first=trip_sep_fc[trip_sep_fc['Mode_confirm']=='E-bike']\n", - "\n", - "fc_ebike_user_list= fc_ebike_first['user_id'].tolist()\n", - "four_corners_incl_ebike = four_corners[four_corners['user_id'].isin(fc_ebike_user_list)]\n", - "print(four_corners_incl_ebike['user_id'].nunique())\n", - "\n", - "for unique_id in fc_ebike_first['user_id']:\n", - " for date in fc_ebike_first['start_ts']:\n", - " four_corners_ebike_first=four_corners_incl_ebike[(four_corners_incl_ebike['start_ts'] >= date)]\n", - " \n", - "fc_unique_ebikefirst=four_corners_ebike_first['user_id'].unique()\n", - "print(four_corners_ebike_first['user_id'].nunique())" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d9e00678", - "metadata": {}, - "outputs": [], - "source": [ - "#filtering community cycles\n", - "community_cycles['start_ts']= pd.to_datetime(community_cycles['start_ts'], utc=True, unit='s')\n", - "\n", - "trip_sep_cc=community_cycles.groupby(['user_id','Mode_confirm']).apply(lambda x:x[x.start_ts==min(x.start_ts)])\n", - "print(trip_sep_cc['user_id'].nunique())\n", - "\n", - "cc_ebike_first=trip_sep_cc[trip_sep_cc['Mode_confirm']=='E-bike']\n", - "\n", - "cc_ebike_user_list= cc_ebike_first['user_id'].tolist()\n", - "community_cycles_incl_ebike = community_cycles[community_cycles['user_id'].isin(cc_ebike_user_list)]\n", - "print(community_cycles_incl_ebike['user_id'].nunique())\n", - "\n", - "for unique_id in cc_ebike_first['user_id']:\n", - " for date in cc_ebike_first['start_ts']:\n", - " community_cycles_ebike_first=community_cycles_incl_ebike[(community_cycles_incl_ebike['start_ts'] >= date)]\n", - "\n", - "cc_unique_ebikefirst=community_cycles_ebike_first['user_id'].unique()\n", - "print(community_cycles_ebike_first['user_id'].nunique())" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "ee513f85", - "metadata": {}, - "outputs": [], - "source": [ - "#filtering fort collins\n", - "fort_collins['start_ts']= pd.to_datetime(fort_collins['start_ts'], utc=True, unit='s')\n", - "\n", - "trip_sep_fc=fort_collins.groupby(['user_id','Mode_confirm']).apply(lambda x:x[x.start_ts==min(x.start_ts)])\n", - "print(trip_sep_fc['user_id'].nunique())\n", - "\n", - "fc_ebike_first=trip_sep_fc[trip_sep_fc['Mode_confirm']=='E-bike']\n", - "\n", - "fc_ebike_user_list= fc_ebike_first['user_id'].tolist()\n", - "fort_collins_incl_ebike = fort_collins[fort_collins['user_id'].isin(fc_ebike_user_list)]\n", - "print(fort_collins_incl_ebike['user_id'].nunique())\n", - "\n", - "for unique_id in fc_ebike_first['user_id']:\n", - " for date in fc_ebike_first['start_ts']:\n", - " fort_collins_ebike_first=fort_collins_incl_ebike[(fort_collins_incl_ebike['start_ts'] >= date)]\n", - " \n", - "fc_unique_ebikefirst=fort_collins_ebike_first['user_id'].unique()\n", - "print(fort_collins_ebike_first['user_id'].nunique())" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7f1a3bbe", - "metadata": {}, - "outputs": [], - "source": [ - "#filtering pueblo\n", - "pueblo['start_ts']= pd.to_datetime(pueblo['start_ts'], utc=True, unit='s')\n", - "\n", - "trip_sep_pu=pueblo.groupby(['user_id','Mode_confirm']).apply(lambda x:x[x.start_ts==min(x.start_ts)])\n", - "print(trip_sep_pu['user_id'].nunique())\n", - "\n", - "pu_ebike_first=trip_sep_pu[trip_sep_pu['Mode_confirm']=='E-bike']\n", - "\n", - "pu_ebike_user_list= pu_ebike_first['user_id'].tolist()\n", - "pueblo_incl_ebike = pueblo[pueblo['user_id'].isin(pu_ebike_user_list)]\n", - "print(pueblo_incl_ebike['user_id'].nunique())\n", - "\n", - "for unique_id in pu_ebike_first['user_id']:\n", - " for date in pu_ebike_first['start_ts']:\n", - " pueblo_ebike_first=pueblo_incl_ebike[(pueblo_incl_ebike['start_ts'] >= date)]\n", - " \n", - "pu_unique_ebikefirst=pueblo_ebike_first['user_id'].unique()\n", - "print(pueblo_ebike_first['user_id'].nunique())" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "31457b29", - "metadata": {}, - "outputs": [], - "source": [ - "#filtering vail\n", - "vail['start_ts']= pd.to_datetime(vail['start_ts'], utc=True, unit='s')\n", - "\n", - "trip_sep_va=vail.groupby(['user_id','Mode_confirm']).apply(lambda x:x[x.start_ts==min(x.start_ts)])\n", - "print(trip_sep_va['user_id'].nunique())\n", - "\n", - "va_ebike_first=trip_sep_va[trip_sep_va['Mode_confirm']=='E-bike']\n", - "\n", - "va_ebike_user_list= va_ebike_first['user_id'].tolist()\n", - "vail_incl_ebike = vail[vail['user_id'].isin(va_ebike_user_list)]\n", - "print(vail_incl_ebike['user_id'].nunique())\n", - "\n", - "for unique_id in va_ebike_first['user_id']:\n", - " for date in va_ebike_first['start_ts']:\n", - " vail_ebike_first=vail_incl_ebike[(vail_incl_ebike['start_ts'] >= date)]\n", - " \n", - "va_unique_ebikefirst=vail_ebike_first['user_id'].unique()\n", - "print(vail_ebike_first['user_id'].nunique())" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f1ba436d", - "metadata": {}, - "outputs": [], - "source": [ - "## Recombine the Filtered Programs" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6c05dc74", - "metadata": {}, - "outputs": [], - "source": [ - "#checking num users and num trips in each program\n", - "print(four_corners_ebike_first['user_id'].nunique())\n", - "print(community_cycles_ebike_first['user_id'].nunique())\n", - "print(fort_collins_ebike_first['user_id'].nunique())\n", - "print(pueblo_ebike_first['user_id'].nunique())\n", - "print(smart_commute_ebike_first['user_id'].nunique())\n", - "print(vail_ebike_first['user_id'].nunique())\n", - "\n", - "print(len(four_corners_ebike_first))\n", - "print(len(community_cycles_ebike_first))\n", - "print(len(fort_collins_ebike_first))\n", - "print(len(pueblo_ebike_first))\n", - "print(len(smart_commute_ebike_first))\n", - "print(len(vail_ebike_first))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3c25973d", - "metadata": {}, - "outputs": [], - "source": [ - "#combining the filtered datasets\n", - "filtered_merged = pd.concat([four_corners_ebike_first, community_cycles_ebike_first, fort_collins_ebike_first, \n", - " pueblo_ebike_first, smart_commute_ebike_first,vail_ebike_first], axis=0)\n", - "print(len(filtered_merged))\n", - "print(filtered_merged['user_id'].nunique())" - ] - }, - { - "cell_type": "markdown", - "id": "17290400", - "metadata": {}, - "source": [ - "## Save to CSV for use in Analysis Notebooks" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d616fab4", - "metadata": {}, - "outputs": [], - "source": [ - "filtered_merged.columns" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "ec980170", - "metadata": {}, - "outputs": [], - "source": [ - "#save as a csv, to be used as input to analysis!\n", - "filtered_merged.to_csv(\"filtered_merged_trips.csv\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "40d77bc9", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6ebb4bba", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.18" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/viz_scripts/PaperVizualizations/Abby/CanBikeCO_Spatial_Analysis.ipynb b/viz_scripts/PaperVizualizations/Abby/CanBikeCO_Spatial_Analysis.ipynb deleted file mode 100644 index 74ff40d5..00000000 --- a/viz_scripts/PaperVizualizations/Abby/CanBikeCO_Spatial_Analysis.ipynb +++ /dev/null @@ -1,147 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "id": "fc5b7c01", - "metadata": {}, - "outputs": [], - "source": [ - "# Spatial Analysis -- (attention to pie charts)\n", - "import pandas as pd\n", - "import numpy as np\n", - "import matplotlib.pyplot as plt" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "17f2546d", - "metadata": {}, - "outputs": [], - "source": [ - "# load the data from csv -- useful?\n", - "data = pd.read_csv('trip_program.csv')\n", - "data.columns" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "963c7a09", - "metadata": {}, - "outputs": [], - "source": [ - "data[['start_loc', 'end_loc']].head() #these don't look like coords?" - ] - }, - { - "cell_type": "markdown", - "id": "73a35446", - "metadata": {}, - "source": [ - "## we could hard-code the change, for now, since we don't need to change the map" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "838cc6b5", - "metadata": {}, - "outputs": [], - "source": [ - "# establish the datasets -- hardcoded from the pie charts in the paper REPLACE ME\n", - "programs = ('sc', 'cc', 'fc', '4c', 'pc', 'vail')\n", - "\n", - "eBikeDominance = {'Program': ['Smart Commute\\n(Denver North)', 'Comunity Cycles\\n(Boulder)', \n", - " 'Fort Collins', 'Four Corners\\n(Durango)', \n", - " 'Pueblo', 'Vail'], \n", - " 'True': [17.9, 64.7, 47.3, 78.9, 35.5, 23.5 ],\n", - " 'False': [82.1, 35.3, 52.7, 21.1, 64.5, 76.5]}\n", - "\n", - "eBikeGood = {'Program': ['Smart Commute\\n(Denver North)', 'Comunity Cycles\\n(Boulder)', \n", - " 'Fort Collins', 'Four Corners\\n(Durango)', \n", - " 'Pueblo', 'Vail'], \n", - " 'True': [19.8, 74.5, 49.1, 84.2, 47.4, 47.1],\n", - " 'False': [80.2, 25.5, 50.9, 15.8, 52.6, 52.9]}\n", - "\n", - "# gather datasets into dataframes\n", - "eBikeDominance = pd.DataFrame(eBikeDominance)\n", - "eBikeGood = pd.DataFrame(eBikeGood)\n", - "\n", - "eBikeDominance = eBikeDominance.set_index('Program')\n", - "eBikeGood = eBikeGood.set_index('Program')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "314c7d6d", - "metadata": {}, - "outputs": [], - "source": [ - "ax = eBikeDominance.plot.bar(stacked=True,\n", - " title=\"Where E-bikes Used More Than Cars\", \n", - " ylabel=\"Proportion of Pixels (%)\",\n", - " xlabel = \"Program\",)\n", - "for c in ax.containers:\n", - " labels = [f'{round(v.get_height(),1)}' for v in c]\n", - " ax.bar_label(c, labels=labels, label_type='center')\n", - " \n", - "ax.set_xticklabels(eBikeDominance.index, rotation=45, ha='right')\n", - "\n", - "plt.savefig(\"CanBikeCO_report_ebike_pixels_moreThanCars.jpeg\", bbox_inches='tight')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a61724fe", - "metadata": {}, - "outputs": [], - "source": [ - "ax = eBikeGood.plot.bar(stacked=True,\n", - " title=\"Where E-bikes at Least 50% of Driving Trips\", \n", - " ylabel=\"Proportion of Pixels (%)\",\n", - " xlabel = \"Program\",)\n", - "\n", - "for c in ax.containers:\n", - " labels = [f'{round(v.get_height(),1)}' for v in c]\n", - " ax.bar_label(c, labels=labels, label_type='center')\n", - "\n", - "ax.set_xticklabels(eBikeGood.index, rotation=45, ha='right')\n", - "\n", - "plt.savefig(\"CanBikeCO_report_ebike_pixels_atLeast50OfCars.jpeg\", bbox_inches='tight')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "724588dc", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.18" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -}