Skip to content

Commit

Permalink
Add files via upload
Browse files Browse the repository at this point in the history
  • Loading branch information
zamanzadeh authored Aug 28, 2024
1 parent 546a000 commit 6df4eac
Showing 1 changed file with 292 additions and 0 deletions.
292 changes: 292 additions & 0 deletions Evaluation.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,292 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "f82ba5b3",
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"import os\n",
"from sklearn import metrics\n",
"from sklearn.metrics import roc_auc_score, average_precision_score, confusion_matrix, roc_curve, precision_recall_curve"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "a587f335",
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"def adjust_predicts(label, predict=None, calc_latency=False):\n",
" \n",
" label = np.asarray(label)\n",
" latency = 0\n",
" \n",
" actual = label > 0.1\n",
" anomaly_state = False\n",
" anomaly_count = 0\n",
" for i in range(len(actual)):\n",
" if actual[i] and predict[i] and not anomaly_state:\n",
" anomaly_state = True\n",
" anomaly_count += 1\n",
" for j in range(i, 0, -1):\n",
" if not actual[j]:\n",
" break\n",
" else:\n",
" if not predict[j]:\n",
" predict[j] = True\n",
" latency += 1\n",
" elif not actual[i]:\n",
" anomaly_state = False\n",
" if anomaly_state:\n",
" predict[i] = True\n",
" \n",
" MCM = metrics.multilabel_confusion_matrix(actual, predict, labels = [1, 0])\n",
"\n",
" pa_tn = MCM[0][0, 0]\n",
" pa_tp = MCM[0][1, 1]\n",
" pa_fp = MCM[0][0, 1]\n",
" pa_fn = MCM[0][1, 0]\n",
" \n",
" prec = pa_tp / (pa_tp + pa_fp)\n",
" rec = pa_tp / (pa_tp + pa_fn)\n",
" f1_score = 2 * (prec * rec) / (prec + rec)\n",
" if calc_latency:\n",
" return predict, latency / (anomaly_count + 1e-4), pa_tp, pa_tn, pa_fp, pa_fn, prec , rec, f1_score\n",
" else:\n",
" return predict, prec, rec, f1_score, pa_tp, pa_tn, pa_fp, pa_fn,"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "4460860e",
"metadata": {},
"outputs": [],
"source": [
"def add_summary_statistics(res_df):\n",
" # Compute the sum of 'best_tp', 'best_tn', 'best_fp', 'best_fn'\n",
" sum_best_tp = res_df['best_tp'].sum()\n",
" sum_best_tn = res_df['best_tn'].sum()\n",
" sum_best_fp = res_df['best_fp'].sum()\n",
" sum_best_fn = res_df['best_fn'].sum()\n",
"\n",
" # Calculate precision, recall and f1 score\n",
" precision = sum_best_tp / (sum_best_tp + sum_best_fp) if (sum_best_tp + sum_best_fp) > 0 else 0\n",
" recall = sum_best_tp / (sum_best_tp + sum_best_fn) if (sum_best_tp + sum_best_fn) > 0 else 0\n",
" f1_score = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0\n",
"\n",
" # Calculate the average and std of 'roc' and 'pr'\n",
" roc_avg = res_df['roc'].mean()\n",
" roc_std = res_df['roc'].std()\n",
" pr_avg = res_df['pr'].mean()\n",
" pr_std = res_df['pr'].std()\n",
"\n",
" # Append the results to the dataframe\n",
" summary_row = pd.Series({\n",
" 'best_tp': sum_best_tp,\n",
" 'best_tn': sum_best_tn,\n",
" 'best_fp': sum_best_fp,\n",
" 'best_fn': sum_best_fn,\n",
" 'best_pre': precision,\n",
" 'best_rec': recall,\n",
" 'b_f_1': f1_score,\n",
" 'roc': roc_avg,\n",
" 'pr': pr_avg\n",
" })\n",
"\n",
" std_row = pd.Series({\n",
" 'roc': roc_std,\n",
" 'pr': pr_std\n",
" })\n",
"\n",
" # Append the rows to the dataframe\n",
" res_df = res_df._append(summary_row, ignore_index=True)\n",
" res_df = res_df._append(std_row, ignore_index=True)\n",
" \n",
" return res_df"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "af5cb8af",
"metadata": {},
"outputs": [],
"source": [
"def add_summary_statistics_pa(res_df):\n",
" # Compute the sum of 'best_tp', 'best_tn', 'best_fp', 'best_fn'\n",
" sum_pa_tp = res_df['pa_tp'].sum()\n",
" sum_pa_tn = res_df['pa_tn'].sum()\n",
" sum_pa_fp = res_df['pa_fp'].sum()\n",
" sum_pa_fn = res_df['pa_fn'].sum()\n",
"\n",
" # Calculate precision, recall and f1 score\n",
" precision = sum_pa_tp / (sum_pa_tp + sum_pa_fp) if (sum_pa_tp + sum_pa_fp) > 0 else 0\n",
" recall = sum_pa_tp / (sum_pa_tp + sum_pa_fn) if (sum_pa_tp + sum_pa_fn) > 0 else 0\n",
" f1_score = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0\n",
"\n",
"\n",
" # Append the results to the dataframe\n",
" summary_row = pd.Series({\n",
" 'pa_tp': sum_pa_tp,\n",
" 'pa_tn': sum_pa_tn,\n",
" 'pa_fp': sum_pa_fp,\n",
" 'pa_fn': sum_pa_fn,\n",
" 'pa_pre': precision,\n",
" 'pa_rec': recall,\n",
" 'pa_f1': f1_score,\n",
" })\n",
"\n",
"\n",
" # Append the row to the dataframe\n",
" res_df = res_df._append(summary_row, ignore_index=True)\n",
" \n",
" return res_df"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "9bc18dd8",
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"res_df = pd.DataFrame(columns=['name', 'tp', 'tn', 'fp', 'fn', 'roc', 'pr', \n",
" 'best_tp', 'best_tn', 'best_fp', 'best_fn', 'best_pre', 'best_rec', 'b_f_1']) \n",
"\n",
"pa_df = pd.DataFrame(columns=['name', 'pa_tp', 'pa_tn', 'pa_fp', 'pa_fn', 'pa_pre', 'pa_rec', 'pa_f1', 'latency'])\n",
"\n",
"\n",
"with open('datasets/MSL_SMAP/labeled_anomalies.csv', 'r') as file:\n",
" csv_reader = pd.read_csv(file, delimiter=',')\n",
"\n",
"data_info = csv_reader[csv_reader['spacecraft'] == 'MSL']\n",
"\n",
"\n",
"# data_info = os.listdir('../datasets/KPI/train/')\n",
"\n",
"# data_info = os.listdir(os.path.join('datasets', 'A1Benchmark')) \n",
"\n",
"# data_info = os.listdir('../datasets/SMD/train/')\n",
"# files = [file for file in data_info if file.startswith('machine-')]\n",
"\n",
"\n",
"for filename in data_info['chan_id']:\n",
" if filename!='.json':\n",
" print(filename)\n",
" df_train = pd.read_csv(\"results/MSL/\" + filename + \"/classification/classification_trainprobs.csv\")\n",
" df_test = pd.read_csv(\"results/MSL/\" + filename + \"/classification/classification_testprobs.csv\")\n",
" cl_num = df_train.shape[1] - 1\n",
"\n",
" df_train['Class'] = np.where((df_train['Class'] == 0), 0, 1)\n",
" df_train['pred']=df_train[df_train.columns[0:cl_num]].idxmax(axis=1)\n",
"\n",
" score_col = df_train['pred'].value_counts().idxmax()\n",
" \n",
" df_test['Class'] = np.where((df_test['Class'] == 0), 0, 1)\n",
" df_test['pred'] = df_test[df_test.columns[0:cl_num]].idxmax(axis=1)\n",
" \n",
" roc_auc, pr_auc, best_tn, best_tp, best_fp, best_fn, best_pre, best_rec, best_f1 = 0, 0, 0, 0, 0, 0, 0, 0, 0\n",
" try:\n",
"\n",
" df_test['pred'] = np.where((df_test['pred'] == score_col), 0, 1)\n",
"\n",
" MCM = metrics.multilabel_confusion_matrix(df_test['Class'], df_test['pred'], labels = [1, 0])\n",
"\n",
" tn = MCM[0][0, 0]\n",
" tp = MCM[0][1, 1]\n",
" fp = MCM[0][0, 1]\n",
" fn = MCM[0][1, 0]\n",
"\n",
" pre=tp/(tp+fp)\n",
" recall = tp/(tp+fn)\n",
" f_1 = 2*pre*recall/(pre+recall)\n",
" print('f-1 : ', f_1)\n",
"\n",
" scores = 1-df_test[score_col]\n",
" # Calculate AU-ROC\n",
" roc_auc = roc_auc_score(df_test['Class'], scores)\n",
" print('AU-ROC : ', roc_auc)\n",
"\n",
" # Calculate AU-PR\n",
" pr_auc = average_precision_score(df_test['Class'], scores)\n",
" print('AU-PR : ', pr_auc)\n",
"\n",
" fpr, tpr, thresholds = roc_curve(df_test['Class'], scores, pos_label=1)\n",
" precision, recall, thresholds = precision_recall_curve(df_test['Class'], scores, pos_label=1)\n",
"\n",
"\n",
" res = pd.DataFrame()\n",
" res['pre'] = precision\n",
" res['rec'] = recall\n",
" res['f1'] = 2*res['pre']*res['rec'] / (res['pre']+res['rec'])\n",
" best_idx = res['f1'].argmax()\n",
" best_f1 = res['f1'][best_idx]\n",
" best_pre = res['pre'][best_idx]\n",
" best_rec = res['rec'][best_idx]\n",
" best_thr = thresholds[best_idx]\n",
" print('Best f1 : ', best_f1, 'best_thr', best_thr)\n",
" anomalies = [True if s >= best_thr else False for s in scores]\n",
"\n",
" best_tn, best_fp, best_fn, best_tp = confusion_matrix(df_test['Class'], anomalies).ravel()\n",
" except ValueError:\n",
" pass\n",
"\n",
" new_row = pd.Series([filename, tp, tn, fp, fn, roc_auc, pr_auc, best_tp, best_tn, best_fp, best_fn, best_pre, best_rec, best_f1],\n",
" index=['name', 'tp', 'tn', 'fp', 'fn', 'roc', 'pr', 'best_tp', 'best_tn', 'best_fp', 'best_fn', 'best_pre', 'best_rec', 'b_f_1'])\n",
" res_df = res_df._append(new_row, ignore_index=True)\n",
" \n",
" \n",
" pa_f1 = -1\n",
" for thr in thresholds:\n",
" preds_pa = [True if s >= thr else False for s in scores]\n",
" pa_prediction, t_latency, t_tp, t_tn, t_fp, t_fn, t_pre, t_rec, t_f1 = adjust_predicts(df_test['Class'], preds_pa, True)\n",
" if t_f1 > pa_f1:\n",
" latency, pa_tp, pa_tn, pa_fp, pa_fn, pa_pre, pa_rec, pa_f1 = t_latency, t_tp, t_tn, t_fp, t_fn, t_pre, t_rec, t_f1\n",
" \n",
" new_row1 = pd.Series([filename, pa_tp, pa_tn, pa_fp, pa_fn, pa_pre, pa_rec, pa_f1, latency],\n",
" index=['name', 'pa_tp', 'pa_tn', 'pa_fp', 'pa_fn', 'pa_pre', 'pa_rec', 'pa_f1', 'latency']) \n",
" pa_df = pa_df._append(new_row1, ignore_index=True)\n",
" \n",
" \n",
"res_df = add_summary_statistics(res_df)\n",
"res_df.to_csv('msl_results.csv')\n",
"\n",
"pa_df = add_summary_statistics_pa(pa_df)\n",
"pa_df.to_csv('msl_results_pa.csv')"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "tsenv",
"language": "python",
"name": "tsenv"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.10"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

0 comments on commit 6df4eac

Please sign in to comment.