Skip to content

Commit

Permalink
Update the analyse incidents tool
Browse files Browse the repository at this point in the history
  • Loading branch information
gnrgomes committed Jun 7, 2024
1 parent 74fd98a commit 4740a8e
Showing 1 changed file with 58 additions and 5 deletions.
63 changes: 58 additions & 5 deletions src/lisfloodutilities/gridding/tools/analyse_incidents.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@
import json
from lisfloodutilities.gridding.lib.utils import FileUtils

DELIMITER_INPUT = ';'
DELIMITER_OUTPUT = '\t'

COL_PROVIDER_ID = 'SITE'
COL_STATION_NUM = 'STATION'
Expand All @@ -47,7 +49,11 @@
COL_TOTAL_INCIDENTS = 'totalIncidents'


incident_type_columns = {}


def get_total_incidents(row: pd.Series) -> int:
global incident_type_columns
incidents = row[COL_INCIDENTS]
if incidents is None:
return 0
Expand All @@ -59,14 +65,37 @@ def get_total_incidents(row: pd.Series) -> int:
return 0
total_incidents = 0
for incident_key in incidents_dic:
# Used to store all the incident types available
incident_type_columns[incident_key] = ''
try:
total_incidents += int(incidents_dic[incident_key])
except Exception as e:
print(f'ERROR evaluating row: {row}')
return total_incidents


def get_total_incident_by_type(row: pd.Series, incident_type: str) -> int:
incidents = row[COL_INCIDENTS]
if incidents is None:
return 0
incidents = incidents.strip()
if len(incidents) == 0 or incidents == 'nan' or incidents == '{}':
return 0
incidents_dic = eval(incidents)
if not isinstance(incidents_dic, dict):
return 0
total_incidents = 0
for incident_key in incidents_dic:
if incident_key == incident_type:
try:
total_incidents = int(incidents_dic[incident_key])
except Exception as e:
print(f'ERROR evaluating row: {row}')
return total_incidents


def run(infolder: str, outfolder: str):
global incident_type_columns
inwildcard = '*.csv'

for filename in sorted(Path(infolder).rglob(inwildcard)):
Expand All @@ -75,16 +104,40 @@ def run(infolder: str, outfolder: str):
outfilepath = Path(outfile)
# Create the output parent folders if not exist yet
Path(outfilepath.parent).mkdir(parents=True, exist_ok=True)
df = pd.read_csv(filename, delimiter=';')
df = pd.read_csv(filename, delimiter=DELIMITER_INPUT)
df = df.astype({COL_INCIDENTS: 'str'})
incident_type_columns = {}
df[COL_TOTAL_INCIDENTS] = df.apply(get_total_incidents, axis=1)

df = df.groupby([COL_PROVIDER_ID])[COL_TOTAL_INCIDENTS].agg(['sum','count']).reset_index()

if df is None or df.empty:
# define aggregation functions
agg_funcs = {COL_TOTAL_INCIDENTS: [('Total Incidents', 'sum'), ('Number of Stations', 'count')]}

incident_types = list(incident_type_columns.keys())
for incident_type in incident_types:
agg_funcs[incident_type] = [(incident_type, 'sum')]
df[incident_type] = None
df[incident_type] = df.apply(get_total_incident_by_type, axis=1, incident_type=incident_type)

df = df.groupby(COL_PROVIDER_ID).agg(agg_funcs).reset_index()

# Eliminate the top level of column names since the new names are
# written on the bottom level and insert the name of the the
# aggregation column on the bottom level
df.columns = df.columns.droplevel(0)
df.reset_index(drop=True, inplace=True)
columns = list(df.columns)
columns[0] = 'Provider'
df.columns = columns

df['Incidents per Station'] = df['Total Incidents'].div(df['Number of Stations'])
df['Incidents per Station'] = df['Incidents per Station'].round(2)

df.sort_values(by=['Incidents per Station', 'Total Incidents', 'Number of Stations'], ascending=[False, False, False], inplace=True)

if df.empty:
print(f'WARNING: No data was found in file {filename}')
else:
df.to_csv(outfilepath, index=False, header=True, sep='\t')
df.to_csv(outfilepath, index=False, header=True, sep=DELIMITER_OUTPUT)
print(f'Wrote file: {outfilepath}')
# print(out_df)

Expand Down

0 comments on commit 4740a8e

Please sign in to comment.