From d6e28b81f46799219a1ba2041c12d96c853a4851 Mon Sep 17 00:00:00 2001 From: Sverre Nystad Date: Fri, 29 Mar 2024 21:20:10 +0100 Subject: [PATCH] feat: Add dependencies and update code for AutoGluon model --- models/auto_gluon.ipynb | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/models/auto_gluon.ipynb b/models/auto_gluon.ipynb index ee84bcb..9519020 100644 --- a/models/auto_gluon.ipynb +++ b/models/auto_gluon.ipynb @@ -43,6 +43,7 @@ "outputs": [], "source": [ "%pip install autogluon\n", + "%pip install bokeh\n", "%pip install scikit-learn" ] }, @@ -61,15 +62,15 @@ "metadata": {}, "outputs": [], "source": [ - "%autoreload \n", + "%autoreload 2\n", "\n", "from sklearn.metrics import accuracy_score, classification_report\n", "import autogluon.core as ag\n", + "import pandas as pd\n", "from autogluon.tabular import TabularDataset, TabularPredictor\n", "\n", - "from src.features.post_processor import save_predictions\n", - "from src.ml_service import prepare_data, prepare_test_data\n", - "from src.config import TARGET_FEATURES" + "from src.ml_service import prepare_data, prepare_test_data, save_predictions\n", + "from src.config import TARGET_FEATURE" ] }, { @@ -86,10 +87,11 @@ "outputs": [], "source": [ "x_train, _, x_test, y_train, _, y_test = prepare_data(validation_size=0, test_size=0.1)\n", - "for target_feature_name in TARGET_FEATURES:\n", - " x_train[target_feature_name] = y_train\n", "\n", - "data = TabularDataset(x_train)" + "combined_train_data = pd.concat([x_train, y_train], axis=1)\n", + "combined_test_data = pd.concat([x_test, y_test], axis=1)\n", + "training_data = TabularDataset(combined_train_data)\n", + "test_data = TabularDataset(combined_test_data)" ] }, { @@ -122,9 +124,9 @@ "outputs": [], "source": [ "# Initialize the AutoGluon TabularPredictor\n", - "time_limit = 24*60*60 # Set this to longest time you are willing to wait (in seconds)\n", - "metric = 'roc_auc'\n", - "predictor = TabularPredictor(label=target_feature_name, eval_metric=metric).fit(data, time_limit=time_limit, presets='best_quality')" + "time_limit = 3*60 #24*60*60 # Set this to longest time you are willing to wait (in seconds)\n", + "metric = 'log_loss'\n", + "predictor = TabularPredictor(label=TARGET_FEATURE, eval_metric=metric).fit(training_data, time_limit=time_limit, presets='best_quality')" ] }, { @@ -166,7 +168,7 @@ "test_accuracy = accuracy_score(y_test, y_test_pred)\n", "print(\"Test Accuracy: \", test_accuracy)\n", "print(\"Test Classification Report:\\n\", classification_report(y_test, y_test_pred))\n", - "# predictor.leaderboard(x_test, silent=True)\n" + "predictor.leaderboard(test_data)\n" ] }, { @@ -182,8 +184,8 @@ "metadata": {}, "outputs": [], "source": [ - "x_test = prepare_test_data()\n", - "final_predictions = predictor.predict(x_test)" + "x_final_test = prepare_test_data()\n", + "final_predictions = predictor.predict(x_final_test)" ] }, {