From 0766425038b6502b4a549ca8888e8ffbdae4ad28 Mon Sep 17 00:00:00 2001 From: Sverre Nystad Date: Tue, 26 Mar 2024 23:32:47 +0100 Subject: [PATCH] feat: improve performance and documentation of auto gluon --- models/auto_gluon.ipynb | 795 +++------------------------------------- 1 file changed, 61 insertions(+), 734 deletions(-) diff --git a/models/auto_gluon.ipynb b/models/auto_gluon.ipynb index 0f8320b..5e68ec5 100644 --- a/models/auto_gluon.ipynb +++ b/models/auto_gluon.ipynb @@ -4,33 +4,22 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# AutoGluon" + "# AutoGluon\n", + "AutoGluon is a fully automated machine learning (ML) toolkit that simplifies the process of developing and fine-tuning machine learning models. Designed for both beginners and experienced ML practitioners, AutoGluon enables users to achieve high-quality model performance with minimal effort and domain knowledge. At its core, AutoGluon automates model selection, hyperparameter tuning, and ensemble creation, allowing for the efficient handling of various types of data, including tabular, image, and text datasets. By abstracting the complexity of underlying algorithms, AutoGluon facilitates rapid prototyping and deployment of ML applications, making advanced ML techniques accessible and practical for a wide range of applications. " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "## Setup\n", - "### Correct the path" + "## Setup" ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'c:\\\\Users\\\\sverr\\\\OneDrive\\\\Desktop\\\\ai\\\\machine-learning-structure\\\\notebooks'" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "import sys\n", "import os\n", @@ -49,486 +38,9 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Collecting autogluon\n", - " Using cached autogluon-1.0.0-py3-none-any.whl (9.9 kB)\n", - "Collecting autogluon.features==1.0.0\n", - " Using cached autogluon.features-1.0.0-py3-none-any.whl (62 kB)\n", - "Collecting autogluon.tabular[all]==1.0.0\n", - " Using cached autogluon.tabular-1.0.0-py3-none-any.whl (306 kB)\n", - "Collecting autogluon.multimodal==1.0.0\n", - " Using cached autogluon.multimodal-1.0.0-py3-none-any.whl (416 kB)\n", - "Collecting autogluon.core[all]==1.0.0\n", - " Using cached autogluon.core-1.0.0-py3-none-any.whl (229 kB)\n", - "Collecting autogluon.timeseries[all]==1.0.0\n", - " Using cached autogluon.timeseries-1.0.0-py3-none-any.whl (120 kB)\n", - "Collecting pandas<2.2.0,>=2.0.0\n", - " Using cached pandas-2.1.4-cp310-cp310-win_amd64.whl (10.7 MB)\n", - "Requirement already satisfied: scipy<1.13,>=1.5.4 in c:\\users\\sverr\\onedrive\\desktop\\ai\\machine-learning-structure\\venv\\lib\\site-packages (from autogluon.core[all]==1.0.0->autogluon) (1.11.4)\n", - "Collecting autogluon.common==1.0.0\n", - " Using cached autogluon.common-1.0.0-py3-none-any.whl (64 kB)\n", - "Requirement already satisfied: matplotlib in c:\\users\\sverr\\onedrive\\desktop\\ai\\machine-learning-structure\\venv\\lib\\site-packages (from autogluon.core[all]==1.0.0->autogluon) (3.8.3)\n", - "Requirement already satisfied: networkx<4,>=3.0 in c:\\users\\sverr\\onedrive\\desktop\\ai\\machine-learning-structure\\venv\\lib\\site-packages (from autogluon.core[all]==1.0.0->autogluon) (3.2.1)\n", - "Requirement already satisfied: tqdm<5,>=4.38 in c:\\users\\sverr\\onedrive\\desktop\\ai\\machine-learning-structure\\venv\\lib\\site-packages (from autogluon.core[all]==1.0.0->autogluon) (4.65.2)\n", - "Collecting boto3<2,>=1.10\n", - " Using cached boto3-1.34.70-py3-none-any.whl (139 kB)\n", - "Requirement already satisfied: numpy<1.29,>=1.21 in c:\\users\\sverr\\onedrive\\desktop\\ai\\machine-learning-structure\\venv\\lib\\site-packages (from autogluon.core[all]==1.0.0->autogluon) (1.26.4)\n", - "Requirement already satisfied: requests in c:\\users\\sverr\\onedrive\\desktop\\ai\\machine-learning-structure\\venv\\lib\\site-packages (from autogluon.core[all]==1.0.0->autogluon) (2.31.0)\n", - "Requirement already satisfied: scikit-learn<1.5,>=1.3.0 in c:\\users\\sverr\\onedrive\\desktop\\ai\\machine-learning-structure\\venv\\lib\\site-packages (from autogluon.core[all]==1.0.0->autogluon) (1.4.1.post1)\n", - "Collecting ray[default]<2.7,>=2.6.3\n", - " Using cached ray-2.6.3-cp310-cp310-win_amd64.whl (22.4 MB)\n", - "Collecting hyperopt<0.2.8,>=0.2.7\n", - " Using cached hyperopt-0.2.7-py2.py3-none-any.whl (1.6 MB)\n", - "Collecting async-timeout\n", - " Using cached async_timeout-4.0.3-py3-none-any.whl (5.7 kB)\n", - "Collecting torch<2.1,>=2.0\n", - " Using cached torch-2.0.1-cp310-cp310-win_amd64.whl (172.3 MB)\n", - "Collecting timm<0.10.0,>=0.9.5\n", - " Using cached timm-0.9.16-py3-none-any.whl (2.2 MB)\n", - "Requirement already satisfied: text-unidecode<1.4,>=1.3 in c:\\users\\sverr\\onedrive\\desktop\\ai\\machine-learning-structure\\venv\\lib\\site-packages (from autogluon.multimodal==1.0.0->autogluon) (1.3)\n", - "Collecting scikit-image<0.21.0,>=0.19.1\n", - " Using cached scikit_image-0.20.0-cp310-cp310-win_amd64.whl (23.7 MB)\n", - "Collecting torchmetrics<1.2.0,>=1.0.0\n", - " Using cached torchmetrics-1.1.2-py3-none-any.whl (764 kB)\n", - "Collecting tensorboard<3,>=2.9\n", - " Using cached tensorboard-2.16.2-py3-none-any.whl (5.5 MB)\n", - "Collecting accelerate<0.22.0,>=0.21.0\n", - " Using cached accelerate-0.21.0-py3-none-any.whl (244 kB)\n", - "Collecting lightning<2.1,>=2.0.0\n", - " Using cached lightning-2.0.9.post0-py3-none-any.whl (1.9 MB)\n", - "Requirement already satisfied: nvidia-ml-py3==7.352.0 in c:\\users\\sverr\\onedrive\\desktop\\ai\\machine-learning-structure\\venv\\lib\\site-packages (from autogluon.multimodal==1.0.0->autogluon) (7.352.0)\n", - "Collecting torchvision<0.16.0,>=0.14.0\n", - " Using cached torchvision-0.15.2-cp310-cp310-win_amd64.whl (1.2 MB)\n", - "Requirement already satisfied: Pillow<11,>=10.0.1 in c:\\users\\sverr\\onedrive\\desktop\\ai\\machine-learning-structure\\venv\\lib\\site-packages (from autogluon.multimodal==1.0.0->autogluon) (10.2.0)\n", - "Collecting pytorch-metric-learning<2.0,>=1.3.0\n", - " Using cached pytorch_metric_learning-1.7.3-py3-none-any.whl (112 kB)\n", - "Collecting nltk<4.0.0,>=3.4.5\n", - " Using cached nltk-3.8.1-py3-none-any.whl (1.5 MB)\n", - "Collecting evaluate<0.5.0,>=0.4.0\n", - " Using cached evaluate-0.4.1-py3-none-any.whl (84 kB)\n", - "Requirement already satisfied: jinja2<3.2,>=3.0.3 in c:\\users\\sverr\\onedrive\\desktop\\ai\\machine-learning-structure\\venv\\lib\\site-packages (from autogluon.multimodal==1.0.0->autogluon) (3.1.3)\n", - "Collecting defusedxml<0.7.2,>=0.7.1\n", - " Using cached defusedxml-0.7.1-py2.py3-none-any.whl (25 kB)\n", - "Requirement already satisfied: pytesseract<0.3.11,>=0.3.9 in c:\\users\\sverr\\onedrive\\desktop\\ai\\machine-learning-structure\\venv\\lib\\site-packages (from autogluon.multimodal==1.0.0->autogluon) (0.3.10)\n", - "Collecting nlpaug<1.2.0,>=1.1.10\n", - " Using cached nlpaug-1.1.11-py3-none-any.whl (410 kB)\n", - "Collecting nptyping<2.5.0,>=1.4.4\n", - " Using cached nptyping-2.4.1-py3-none-any.whl (36 kB)\n", - "Collecting openmim<0.4.0,>=0.3.7\n", - " Using cached openmim-0.3.9-py2.py3-none-any.whl (52 kB)\n", - "Collecting seqeval<1.3.0,>=1.2.2\n", - " Using cached seqeval-1.2.2.tar.gz (43 kB)\n", - " Preparing metadata (setup.py): started\n", - " Preparing metadata (setup.py): finished with status 'done'\n", - "Collecting omegaconf<2.3.0,>=2.1.1\n", - " Using cached omegaconf-2.2.3-py3-none-any.whl (79 kB)\n", - "Collecting transformers[sentencepiece]<4.32.0,>=4.31.0\n", - " Using cached transformers-4.31.0-py3-none-any.whl (7.4 MB)\n", - "Collecting jsonschema<4.18,>=4.14\n", - " Using cached jsonschema-4.17.3-py3-none-any.whl (90 kB)\n", - "Collecting xgboost<2.1,>=1.6\n", - " Using cached xgboost-2.0.3-py3-none-win_amd64.whl (99.8 MB)\n", - "Collecting catboost<1.3,>=1.1\n", - " Using cached catboost-1.2.3-cp310-cp310-win_amd64.whl (101.0 MB)\n", - "Collecting lightgbm<4.2,>=3.3\n", - " Using cached lightgbm-4.1.0-py3-none-win_amd64.whl (1.3 MB)\n", - "Collecting fastai<2.8,>=2.3.1\n", - " Using cached fastai-2.7.14-py3-none-any.whl (232 kB)\n", - "Collecting gluonts<0.15,>=0.14.0\n", - " Using cached gluonts-0.14.4-py3-none-any.whl (1.5 MB)\n", - "Collecting statsforecast<1.5,>=1.4.0\n", - " Using cached statsforecast-1.4.0-py3-none-any.whl (91 kB)\n", - "Collecting mlforecast<0.10.1,>=0.10.0\n", - " Using cached mlforecast-0.10.0-py3-none-any.whl (47 kB)\n", - "Collecting utilsforecast<0.0.11,>=0.0.10\n", - " Using cached utilsforecast-0.0.10-py3-none-any.whl (30 kB)\n", - "Collecting orjson~=3.9\n", - " Using cached orjson-3.9.15-cp310-none-win_amd64.whl (136 kB)\n", - "Requirement already satisfied: joblib<2,>=1.1 in c:\\users\\sverr\\onedrive\\desktop\\ai\\machine-learning-structure\\venv\\lib\\site-packages (from autogluon.timeseries[all]==1.0.0->autogluon) (1.3.2)\n", - "Requirement already satisfied: statsmodels<0.15,>=0.13.0 in c:\\users\\sverr\\onedrive\\desktop\\ai\\machine-learning-structure\\venv\\lib\\site-packages (from autogluon.timeseries[all]==1.0.0->autogluon) (0.14.1)\n", - "Collecting pytorch-lightning<2.1,>=2.0.0\n", - " Using cached pytorch_lightning-2.0.9.post0-py3-none-any.whl (727 kB)\n", - "Requirement already satisfied: psutil<6,>=5.7.3 in c:\\users\\sverr\\onedrive\\desktop\\ai\\machine-learning-structure\\venv\\lib\\site-packages (from autogluon.common==1.0.0->autogluon.core[all]==1.0.0->autogluon) (5.9.8)\n", - "Requirement already satisfied: setuptools in c:\\users\\sverr\\onedrive\\desktop\\ai\\machine-learning-structure\\venv\\lib\\site-packages (from autogluon.common==1.0.0->autogluon.core[all]==1.0.0->autogluon) (60.2.0)\n", - "Requirement already satisfied: packaging>=20.0 in c:\\users\\sverr\\onedrive\\desktop\\ai\\machine-learning-structure\\venv\\lib\\site-packages (from accelerate<0.22.0,>=0.21.0->autogluon.multimodal==1.0.0->autogluon) (24.0)\n", - "Requirement already satisfied: pyyaml in c:\\users\\sverr\\onedrive\\desktop\\ai\\machine-learning-structure\\venv\\lib\\site-packages (from accelerate<0.22.0,>=0.21.0->autogluon.multimodal==1.0.0->autogluon) (6.0.1)\n", - "Collecting botocore<1.35.0,>=1.34.70\n", - " Using cached botocore-1.34.70-py3-none-any.whl (12.0 MB)\n", - "Collecting jmespath<2.0.0,>=0.7.1\n", - " Using cached jmespath-1.0.1-py3-none-any.whl (20 kB)\n", - "Collecting s3transfer<0.11.0,>=0.10.0\n", - " Using cached s3transfer-0.10.1-py3-none-any.whl (82 kB)\n", - "Collecting plotly\n", - " Using cached plotly-5.20.0-py3-none-any.whl (15.7 MB)\n", - "Requirement already satisfied: six in c:\\users\\sverr\\onedrive\\desktop\\ai\\machine-learning-structure\\venv\\lib\\site-packages (from catboost<1.3,>=1.1->autogluon.tabular[all]==1.0.0->autogluon) (1.16.0)\n", - "Collecting graphviz\n", - " Using cached graphviz-0.20.3-py3-none-any.whl (47 kB)\n", - "Collecting datasets>=2.0.0\n", - " Using cached datasets-2.18.0-py3-none-any.whl (510 kB)\n", - "Collecting dill\n", - " Using cached dill-0.3.8-py3-none-any.whl (116 kB)\n", - "Collecting huggingface-hub>=0.7.0\n", - " Using cached huggingface_hub-0.22.0-py3-none-any.whl (388 kB)\n", - "Collecting fsspec[http]>=2021.05.0\n", - " Using cached fsspec-2024.3.1-py3-none-any.whl (171 kB)\n", - "Collecting multiprocess\n", - " Using cached multiprocess-0.70.16-py310-none-any.whl (134 kB)\n", - "Collecting responses<0.19\n", - " Using cached responses-0.18.0-py3-none-any.whl (38 kB)\n", - "Requirement already satisfied: xxhash in c:\\users\\sverr\\onedrive\\desktop\\ai\\machine-learning-structure\\venv\\lib\\site-packages (from evaluate<0.5.0,>=0.4.0->autogluon.multimodal==1.0.0->autogluon) (3.4.1)\n", - "Requirement already satisfied: pip in c:\\users\\sverr\\onedrive\\desktop\\ai\\machine-learning-structure\\venv\\lib\\site-packages (from fastai<2.8,>=2.3.1->autogluon.tabular[all]==1.0.0->autogluon) (22.0.4)\n", - "Collecting fastprogress>=0.2.4\n", - " Using cached fastprogress-1.0.3-py3-none-any.whl (12 kB)\n", - "Collecting fastdownload<2,>=0.0.5\n", - " Using cached fastdownload-0.0.7-py3-none-any.whl (12 kB)\n", - "Collecting spacy<4\n", - " Using cached spacy-3.7.4-cp310-cp310-win_amd64.whl (12.1 MB)\n", - "Collecting fastcore<1.6,>=1.5.29\n", - " Using cached fastcore-1.5.29-py3-none-any.whl (67 kB)\n", - "Requirement already satisfied: typing-extensions~=4.0 in c:\\users\\sverr\\onedrive\\desktop\\ai\\machine-learning-structure\\venv\\lib\\site-packages (from gluonts<0.15,>=0.14.0->autogluon.timeseries[all]==1.0.0->autogluon) (4.10.0)\n", - "Requirement already satisfied: pydantic<3,>=1.7 in c:\\users\\sverr\\onedrive\\desktop\\ai\\machine-learning-structure\\venv\\lib\\site-packages (from gluonts<0.15,>=0.14.0->autogluon.timeseries[all]==1.0.0->autogluon) (1.10.14)\n", - "Requirement already satisfied: toolz~=0.10 in c:\\users\\sverr\\onedrive\\desktop\\ai\\machine-learning-structure\\venv\\lib\\site-packages (from gluonts<0.15,>=0.14.0->autogluon.timeseries[all]==1.0.0->autogluon) (0.12.1)\n", - "Collecting future\n", - " Using cached future-1.0.0-py3-none-any.whl (491 kB)\n", - "Requirement already satisfied: py4j in c:\\users\\sverr\\onedrive\\desktop\\ai\\machine-learning-structure\\venv\\lib\\site-packages (from hyperopt<0.2.8,>=0.2.7->autogluon.core[all]==1.0.0->autogluon) (0.10.9.7)\n", - "Collecting cloudpickle\n", - " Using cached cloudpickle-3.0.0-py3-none-any.whl (20 kB)\n", - "Requirement already satisfied: MarkupSafe>=2.0 in c:\\users\\sverr\\onedrive\\desktop\\ai\\machine-learning-structure\\venv\\lib\\site-packages (from jinja2<3.2,>=3.0.3->autogluon.multimodal==1.0.0->autogluon) (2.1.5)\n", - "Requirement already satisfied: attrs>=17.4.0 in c:\\users\\sverr\\onedrive\\desktop\\ai\\machine-learning-structure\\venv\\lib\\site-packages (from jsonschema<4.18,>=4.14->autogluon.multimodal==1.0.0->autogluon) (23.2.0)\n", - "Requirement already satisfied: pyrsistent!=0.17.0,!=0.17.1,!=0.17.2,>=0.14.0 in c:\\users\\sverr\\onedrive\\desktop\\ai\\machine-learning-structure\\venv\\lib\\site-packages (from jsonschema<4.18,>=4.14->autogluon.multimodal==1.0.0->autogluon) (0.20.0)\n", - "Collecting backoff<4.0,>=2.2.1\n", - " Using cached backoff-2.2.1-py3-none-any.whl (15 kB)\n", - "Requirement already satisfied: websocket-client<3.0 in c:\\users\\sverr\\onedrive\\desktop\\ai\\machine-learning-structure\\venv\\lib\\site-packages (from lightning<2.1,>=2.0.0->autogluon.multimodal==1.0.0->autogluon) (1.7.0)\n", - "Collecting lightning-utilities<2.0,>=0.7.0\n", - " Using cached lightning_utilities-0.11.1-py3-none-any.whl (26 kB)\n", - "Collecting lightning-cloud>=0.5.38\n", - " Using cached lightning_cloud-0.5.65-py3-none-any.whl (1.0 MB)\n", - "Collecting click<10.0\n", - " Using cached click-8.1.7-py3-none-any.whl (97 kB)\n", - "Collecting starsessions<2.0,>=1.2.1\n", - " Using cached starsessions-1.3.0-py3-none-any.whl (10 kB)\n", - "Requirement already satisfied: python-multipart<2.0,>=0.0.5 in c:\\users\\sverr\\onedrive\\desktop\\ai\\machine-learning-structure\\venv\\lib\\site-packages (from lightning<2.1,>=2.0.0->autogluon.multimodal==1.0.0->autogluon) (0.0.9)\n", - "Collecting croniter<1.5.0,>=1.3.0\n", - " Using cached croniter-1.4.1-py2.py3-none-any.whl (19 kB)\n", - "Collecting starlette\n", - " Using cached starlette-0.37.2-py3-none-any.whl (71 kB)\n", - "Collecting fastapi<2.0,>=0.92.0\n", - " Using cached fastapi-0.110.0-py3-none-any.whl (92 kB)\n", - "Requirement already satisfied: traitlets<7.0,>=5.3.0 in c:\\users\\sverr\\onedrive\\desktop\\ai\\machine-learning-structure\\venv\\lib\\site-packages (from lightning<2.1,>=2.0.0->autogluon.multimodal==1.0.0->autogluon) (5.14.2)\n", - "Collecting dateutils<2.0\n", - " Using cached dateutils-0.6.12-py2.py3-none-any.whl (5.7 kB)\n", - "Requirement already satisfied: urllib3<4.0 in c:\\users\\sverr\\onedrive\\desktop\\ai\\machine-learning-structure\\venv\\lib\\site-packages (from lightning<2.1,>=2.0.0->autogluon.multimodal==1.0.0->autogluon) (1.26.18)\n", - "Collecting beautifulsoup4<6.0,>=4.8.0\n", - " Using cached beautifulsoup4-4.12.3-py3-none-any.whl (147 kB)\n", - "Collecting rich<15.0,>=12.3.0\n", - " Using cached rich-13.7.1-py3-none-any.whl (240 kB)\n", - "Collecting inquirer<5.0,>=2.10.0\n", - " Using cached inquirer-3.2.4-py3-none-any.whl (18 kB)\n", - "Collecting uvicorn<2.0\n", - " Using cached uvicorn-0.29.0-py3-none-any.whl (60 kB)\n", - "Collecting arrow<3.0,>=1.2.0\n", - " Using cached arrow-1.3.0-py3-none-any.whl (66 kB)\n", - "Collecting deepdiff<8.0,>=5.7.0\n", - " Using cached deepdiff-6.7.1-py3-none-any.whl (76 kB)\n", - "Requirement already satisfied: websockets<13.0 in c:\\users\\sverr\\onedrive\\desktop\\ai\\machine-learning-structure\\venv\\lib\\site-packages (from lightning<2.1,>=2.0.0->autogluon.multimodal==1.0.0->autogluon) (12.0)\n", - "Collecting window-ops\n", - " Using cached window_ops-0.0.15-py3-none-any.whl (15 kB)\n", - "Requirement already satisfied: numba in c:\\users\\sverr\\onedrive\\desktop\\ai\\machine-learning-structure\\venv\\lib\\site-packages (from mlforecast<0.10.1,>=0.10.0->autogluon.timeseries[all]==1.0.0->autogluon) (0.59.1)\n", - "Collecting gdown>=4.0.0\n", - " Using cached gdown-5.1.0-py3-none-any.whl (17 kB)\n", - "Requirement already satisfied: regex>=2021.8.3 in c:\\users\\sverr\\onedrive\\desktop\\ai\\machine-learning-structure\\venv\\lib\\site-packages (from nltk<4.0.0,>=3.4.5->autogluon.multimodal==1.0.0->autogluon) (2023.12.25)\n", - "Requirement already satisfied: antlr4-python3-runtime==4.9.* in c:\\users\\sverr\\onedrive\\desktop\\ai\\machine-learning-structure\\venv\\lib\\site-packages (from omegaconf<2.3.0,>=2.1.1->autogluon.multimodal==1.0.0->autogluon) (4.9.3)\n", - "Collecting opendatalab\n", - " Using cached opendatalab-0.0.10-py3-none-any.whl (29 kB)\n", - "Requirement already satisfied: colorama in c:\\users\\sverr\\onedrive\\desktop\\ai\\machine-learning-structure\\venv\\lib\\site-packages (from openmim<0.4.0,>=0.3.7->autogluon.multimodal==1.0.0->autogluon) (0.4.6)\n", - "Collecting model-index\n", - " Using cached model_index-0.1.11-py3-none-any.whl (34 kB)\n", - "Requirement already satisfied: tabulate in c:\\users\\sverr\\onedrive\\desktop\\ai\\machine-learning-structure\\venv\\lib\\site-packages (from openmim<0.4.0,>=0.3.7->autogluon.multimodal==1.0.0->autogluon) (0.9.0)\n", - "Requirement already satisfied: tzdata>=2022.1 in c:\\users\\sverr\\onedrive\\desktop\\ai\\machine-learning-structure\\venv\\lib\\site-packages (from pandas<2.2.0,>=2.0.0->autogluon.core[all]==1.0.0->autogluon) (2024.1)\n", - "Requirement already satisfied: python-dateutil>=2.8.2 in c:\\users\\sverr\\onedrive\\desktop\\ai\\machine-learning-structure\\venv\\lib\\site-packages (from pandas<2.2.0,>=2.0.0->autogluon.core[all]==1.0.0->autogluon) (2.9.0.post0)\n", - "Requirement already satisfied: pytz>=2020.1 in c:\\users\\sverr\\onedrive\\desktop\\ai\\machine-learning-structure\\venv\\lib\\site-packages (from pandas<2.2.0,>=2.0.0->autogluon.core[all]==1.0.0->autogluon) (2023.4)\n", - "Collecting filelock\n", - " Using cached filelock-3.13.3-py3-none-any.whl (11 kB)\n", - "Collecting protobuf!=3.19.5,>=3.15.3\n", - " Using cached protobuf-5.26.0-cp310-abi3-win_amd64.whl (420 kB)\n", - "Collecting aiosignal\n", - " Using cached aiosignal-1.3.1-py3-none-any.whl (7.6 kB)\n", - "Collecting frozenlist\n", - " Using cached frozenlist-1.4.1-cp310-cp310-win_amd64.whl (50 kB)\n", - "Collecting msgpack<2.0.0,>=1.0.0\n", - " Using cached msgpack-1.0.8-cp310-cp310-win_amd64.whl (75 kB)\n", - "Collecting grpcio>=1.42.0\n", - " Using cached grpcio-1.62.1-cp310-cp310-win_amd64.whl (3.8 MB)\n", - "Requirement already satisfied: pyarrow<7.0.0,>=6.0.1 in c:\\users\\sverr\\onedrive\\desktop\\ai\\machine-learning-structure\\venv\\lib\\site-packages (from ray[default]<2.7,>=2.6.3->autogluon.core[all]==1.0.0->autogluon) (6.0.1)\n", - "Collecting tensorboardX>=1.9\n", - " Using cached tensorboardX-2.6.2.2-py2.py3-none-any.whl (101 kB)\n", - "Collecting aiohttp>=3.7\n", - " Using cached aiohttp-3.9.3-cp310-cp310-win_amd64.whl (365 kB)\n", - "Collecting colorful\n", - " Using cached colorful-0.5.6-py2.py3-none-any.whl (201 kB)\n", - "Requirement already satisfied: py-spy>=0.2.0 in c:\\users\\sverr\\onedrive\\desktop\\ai\\machine-learning-structure\\venv\\lib\\site-packages (from ray[default]<2.7,>=2.6.3->autogluon.core[all]==1.0.0->autogluon) (0.3.14)\n", - "Requirement already satisfied: smart-open in c:\\users\\sverr\\onedrive\\desktop\\ai\\machine-learning-structure\\venv\\lib\\site-packages (from ray[default]<2.7,>=2.6.3->autogluon.core[all]==1.0.0->autogluon) (6.4.0)\n", - "Collecting opencensus\n", - " Using cached opencensus-0.11.4-py2.py3-none-any.whl (128 kB)\n", - "Collecting prometheus-client>=0.7.1\n", - " Using cached prometheus_client-0.20.0-py3-none-any.whl (54 kB)\n", - "Collecting virtualenv<20.21.1,>=20.0.24\n", - " Using cached virtualenv-20.21.0-py3-none-any.whl (8.7 MB)\n", - "Collecting gpustat>=1.0.0\n", - " Using cached gpustat-1.1.1-py3-none-any.whl\n", - "Collecting aiohttp-cors\n", - " Using cached aiohttp_cors-0.7.0-py3-none-any.whl (27 kB)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in c:\\users\\sverr\\onedrive\\desktop\\ai\\machine-learning-structure\\venv\\lib\\site-packages (from requests->autogluon.core[all]==1.0.0->autogluon) (3.3.2)\n", - "Requirement already satisfied: certifi>=2017.4.17 in c:\\users\\sverr\\onedrive\\desktop\\ai\\machine-learning-structure\\venv\\lib\\site-packages (from requests->autogluon.core[all]==1.0.0->autogluon) (2024.2.2)\n", - "Requirement already satisfied: idna<4,>=2.5 in c:\\users\\sverr\\onedrive\\desktop\\ai\\machine-learning-structure\\venv\\lib\\site-packages (from requests->autogluon.core[all]==1.0.0->autogluon) (3.6)\n", - "Collecting lazy_loader>=0.1\n", - " Using cached lazy_loader-0.3-py3-none-any.whl (9.1 kB)\n", - "Requirement already satisfied: tifffile>=2019.7.26 in c:\\users\\sverr\\onedrive\\desktop\\ai\\machine-learning-structure\\venv\\lib\\site-packages (from scikit-image<0.21.0,>=0.19.1->autogluon.multimodal==1.0.0->autogluon) (2024.2.12)\n", - "Requirement already satisfied: PyWavelets>=1.1.1 in c:\\users\\sverr\\onedrive\\desktop\\ai\\machine-learning-structure\\venv\\lib\\site-packages (from scikit-image<0.21.0,>=0.19.1->autogluon.multimodal==1.0.0->autogluon) (1.5.0)\n", - "Collecting imageio>=2.4.1\n", - " Using cached imageio-2.34.0-py3-none-any.whl (313 kB)\n", - "Requirement already satisfied: threadpoolctl>=2.0.0 in c:\\users\\sverr\\onedrive\\desktop\\ai\\machine-learning-structure\\venv\\lib\\site-packages (from scikit-learn<1.5,>=1.3.0->autogluon.core[all]==1.0.0->autogluon) (3.4.0)\n", - "Requirement already satisfied: patsy>=0.5.4 in c:\\users\\sverr\\onedrive\\desktop\\ai\\machine-learning-structure\\venv\\lib\\site-packages (from statsmodels<0.15,>=0.13.0->autogluon.timeseries[all]==1.0.0->autogluon) (0.5.6)\n", - "Collecting absl-py>=0.4\n", - " Using cached absl_py-2.1.0-py3-none-any.whl (133 kB)\n", - "Collecting markdown>=2.6.8\n", - " Using cached Markdown-3.6-py3-none-any.whl (105 kB)\n", - "Requirement already satisfied: tensorboard-data-server<0.8.0,>=0.7.0 in c:\\users\\sverr\\onedrive\\desktop\\ai\\machine-learning-structure\\venv\\lib\\site-packages (from tensorboard<3,>=2.9->autogluon.multimodal==1.0.0->autogluon) (0.7.2)\n", - "Requirement already satisfied: werkzeug>=1.0.1 in c:\\users\\sverr\\onedrive\\desktop\\ai\\machine-learning-structure\\venv\\lib\\site-packages (from tensorboard<3,>=2.9->autogluon.multimodal==1.0.0->autogluon) (3.0.1)\n", - "Requirement already satisfied: safetensors in c:\\users\\sverr\\onedrive\\desktop\\ai\\machine-learning-structure\\venv\\lib\\site-packages (from timm<0.10.0,>=0.9.5->autogluon.multimodal==1.0.0->autogluon) (0.4.2)\n", - "Requirement already satisfied: sympy in c:\\users\\sverr\\onedrive\\desktop\\ai\\machine-learning-structure\\venv\\lib\\site-packages (from torch<2.1,>=2.0->autogluon.multimodal==1.0.0->autogluon) (1.12)\n", - "Requirement already satisfied: tokenizers!=0.11.3,<0.14,>=0.11.1 in c:\\users\\sverr\\onedrive\\desktop\\ai\\machine-learning-structure\\venv\\lib\\site-packages (from transformers[sentencepiece]<4.32.0,>=4.31.0->autogluon.multimodal==1.0.0->autogluon) (0.13.3)\n", - "Requirement already satisfied: sentencepiece!=0.1.92,>=0.1.91 in c:\\users\\sverr\\onedrive\\desktop\\ai\\machine-learning-structure\\venv\\lib\\site-packages (from transformers[sentencepiece]<4.32.0,>=4.31.0->autogluon.multimodal==1.0.0->autogluon) (0.2.0)\n", - "Requirement already satisfied: kiwisolver>=1.3.1 in c:\\users\\sverr\\onedrive\\desktop\\ai\\machine-learning-structure\\venv\\lib\\site-packages (from matplotlib->autogluon.core[all]==1.0.0->autogluon) (1.4.5)\n", - "Requirement already satisfied: fonttools>=4.22.0 in c:\\users\\sverr\\onedrive\\desktop\\ai\\machine-learning-structure\\venv\\lib\\site-packages (from matplotlib->autogluon.core[all]==1.0.0->autogluon) (4.50.0)\n", - "Requirement already satisfied: contourpy>=1.0.1 in c:\\users\\sverr\\onedrive\\desktop\\ai\\machine-learning-structure\\venv\\lib\\site-packages (from matplotlib->autogluon.core[all]==1.0.0->autogluon) (1.2.0)\n", - "Requirement already satisfied: pyparsing>=2.3.1 in c:\\users\\sverr\\onedrive\\desktop\\ai\\machine-learning-structure\\venv\\lib\\site-packages (from matplotlib->autogluon.core[all]==1.0.0->autogluon) (3.1.2)\n", - "Requirement already satisfied: cycler>=0.10 in c:\\users\\sverr\\onedrive\\desktop\\ai\\machine-learning-structure\\venv\\lib\\site-packages (from matplotlib->autogluon.core[all]==1.0.0->autogluon) (0.12.1)\n", - "Collecting multidict<7.0,>=4.5\n", - " Using cached multidict-6.0.5-cp310-cp310-win_amd64.whl (28 kB)\n", - "Collecting yarl<2.0,>=1.0\n", - " Using cached yarl-1.9.4-cp310-cp310-win_amd64.whl (76 kB)\n", - "Requirement already satisfied: types-python-dateutil>=2.8.10 in c:\\users\\sverr\\onedrive\\desktop\\ai\\machine-learning-structure\\venv\\lib\\site-packages (from arrow<3.0,>=1.2.0->lightning<2.1,>=2.0.0->autogluon.multimodal==1.0.0->autogluon) (2.9.0.20240316)\n", - "Requirement already satisfied: soupsieve>1.2 in c:\\users\\sverr\\onedrive\\desktop\\ai\\machine-learning-structure\\venv\\lib\\site-packages (from beautifulsoup4<6.0,>=4.8.0->lightning<2.1,>=2.0.0->autogluon.multimodal==1.0.0->autogluon) (2.5)\n", - "Collecting fsspec[http]>=2021.05.0\n", - " Downloading fsspec-2024.2.0-py3-none-any.whl (170 kB)\n", - " -------------------------------------- 170.9/170.9 KB 5.2 MB/s eta 0:00:00\n", - "Collecting datasets>=2.0.0\n", - " Using cached datasets-2.17.1-py3-none-any.whl (536 kB)\n", - " Using cached datasets-2.17.0-py3-none-any.whl (536 kB)\n", - " Using cached datasets-2.16.1-py3-none-any.whl (507 kB)\n", - "Collecting dill\n", - " Downloading dill-0.3.7-py3-none-any.whl (115 kB)\n", - " ---------------------------------------- 115.3/115.3 KB ? eta 0:00:00\n", - "Collecting pyarrow-hotfix\n", - " Using cached pyarrow_hotfix-0.6-py3-none-any.whl (7.9 kB)\n", - "Collecting datasets>=2.0.0\n", - " Using cached datasets-2.16.0-py3-none-any.whl (507 kB)\n", - " Using cached datasets-2.15.0-py3-none-any.whl (521 kB)\n", - " Using cached datasets-2.14.7-py3-none-any.whl (520 kB)\n", - " Using cached datasets-2.14.6-py3-none-any.whl (493 kB)\n", - " Using cached datasets-2.14.5-py3-none-any.whl (519 kB)\n", - "Collecting fsspec[http]>=2021.05.0\n", - " Using cached fsspec-2023.6.0-py3-none-any.whl (163 kB)\n", - "Collecting datasets>=2.0.0\n", - " Using cached datasets-2.14.4-py3-none-any.whl (519 kB)\n", - " Using cached datasets-2.14.3-py3-none-any.whl (519 kB)\n", - " Using cached datasets-2.14.2-py3-none-any.whl (518 kB)\n", - " Using cached datasets-2.14.1-py3-none-any.whl (492 kB)\n", - " Using cached datasets-2.14.0-py3-none-any.whl (492 kB)\n", - " Using cached datasets-2.13.2-py3-none-any.whl (512 kB)\n", - " Using cached datasets-2.13.1-py3-none-any.whl (486 kB)\n", - " Using cached datasets-2.13.0-py3-none-any.whl (485 kB)\n", - " Using cached datasets-2.12.0-py3-none-any.whl (474 kB)\n", - " Using cached datasets-2.11.0-py3-none-any.whl (468 kB)\n", - " Using cached datasets-2.10.1-py3-none-any.whl (469 kB)\n", - "Collecting dill\n", - " Using cached dill-0.3.6-py3-none-any.whl (110 kB)\n", - "Collecting ordered-set<4.2.0,>=4.0.2\n", - " Using cached ordered_set-4.1.0-py3-none-any.whl (7.6 kB)\n", - "Collecting starlette\n", - " Using cached starlette-0.36.3-py3-none-any.whl (71 kB)\n", - "Collecting blessed>=1.17.1\n", - " Using cached blessed-1.20.0-py2.py3-none-any.whl (58 kB)\n", - "Requirement already satisfied: nvidia-ml-py>=11.450.129 in c:\\users\\sverr\\onedrive\\desktop\\ai\\machine-learning-structure\\venv\\lib\\site-packages (from gpustat>=1.0.0->ray[default]<2.7,>=2.6.3->autogluon.core[all]==1.0.0->autogluon) (12.535.133)\n", - "Collecting readchar>=3.0.6\n", - " Using cached readchar-4.0.6-py3-none-any.whl (8.5 kB)\n", - "Collecting editor>=1.6.0\n", - " Using cached editor-1.6.6-py3-none-any.whl (4.0 kB)\n", - "Requirement already satisfied: pyjwt in c:\\users\\sverr\\onedrive\\desktop\\ai\\machine-learning-structure\\venv\\lib\\site-packages (from lightning-cloud>=0.5.38->lightning<2.1,>=2.0.0->autogluon.multimodal==1.0.0->autogluon) (2.8.0)\n", - "Requirement already satisfied: llvmlite<0.43,>=0.42.0dev0 in c:\\users\\sverr\\onedrive\\desktop\\ai\\machine-learning-structure\\venv\\lib\\site-packages (from numba->mlforecast<0.10.1,>=0.10.0->autogluon.timeseries[all]==1.0.0->autogluon) (0.42.0)\n", - "Collecting markdown-it-py>=2.2.0\n", - " Using cached markdown_it_py-3.0.0-py3-none-any.whl (87 kB)\n", - "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in c:\\users\\sverr\\onedrive\\desktop\\ai\\machine-learning-structure\\venv\\lib\\site-packages (from rich<15.0,>=12.3.0->lightning<2.1,>=2.0.0->autogluon.multimodal==1.0.0->autogluon) (2.17.2)\n", - "Requirement already satisfied: cymem<2.1.0,>=2.0.2 in c:\\users\\sverr\\onedrive\\desktop\\ai\\machine-learning-structure\\venv\\lib\\site-packages (from spacy<4->fastai<2.8,>=2.3.1->autogluon.tabular[all]==1.0.0->autogluon) (2.0.8)\n", - "Requirement already satisfied: spacy-loggers<2.0.0,>=1.0.0 in c:\\users\\sverr\\onedrive\\desktop\\ai\\machine-learning-structure\\venv\\lib\\site-packages (from spacy<4->fastai<2.8,>=2.3.1->autogluon.tabular[all]==1.0.0->autogluon) (1.0.5)\n", - "Collecting murmurhash<1.1.0,>=0.28.0\n", - " Using cached murmurhash-1.0.10-cp310-cp310-win_amd64.whl (25 kB)\n", - "Collecting thinc<8.3.0,>=8.2.2\n", - " Using cached thinc-8.2.3-cp310-cp310-win_amd64.whl (1.5 MB)\n", - "Requirement already satisfied: wasabi<1.2.0,>=0.9.1 in c:\\users\\sverr\\onedrive\\desktop\\ai\\machine-learning-structure\\venv\\lib\\site-packages (from spacy<4->fastai<2.8,>=2.3.1->autogluon.tabular[all]==1.0.0->autogluon) (1.1.2)\n", - "Collecting preshed<3.1.0,>=3.0.2\n", - " Using cached preshed-3.0.9-cp310-cp310-win_amd64.whl (122 kB)\n", - "Collecting typer<0.10.0,>=0.3.0\n", - " Using cached typer-0.9.4-py3-none-any.whl (45 kB)\n", - "Collecting langcodes<4.0.0,>=3.2.0\n", - " Using cached langcodes-3.3.0-py3-none-any.whl (181 kB)\n", - "Collecting catalogue<2.1.0,>=2.0.6\n", - " Using cached catalogue-2.0.10-py3-none-any.whl (17 kB)\n", - "Collecting srsly<3.0.0,>=2.4.3\n", - " Using cached srsly-2.4.8-cp310-cp310-win_amd64.whl (481 kB)\n", - "Requirement already satisfied: spacy-legacy<3.1.0,>=3.0.11 in c:\\users\\sverr\\onedrive\\desktop\\ai\\machine-learning-structure\\venv\\lib\\site-packages (from spacy<4->fastai<2.8,>=2.3.1->autogluon.tabular[all]==1.0.0->autogluon) (3.0.12)\n", - "Collecting weasel<0.4.0,>=0.1.0\n", - " Using cached weasel-0.3.4-py3-none-any.whl (50 kB)\n", - "Collecting anyio<5,>=3.4.0\n", - " Using cached anyio-4.3.0-py3-none-any.whl (85 kB)\n", - "Collecting itsdangerous<3.0.0,>=2.0.1\n", - " Using cached itsdangerous-2.1.2-py3-none-any.whl (15 kB)\n", - "Collecting h11>=0.8\n", - " Using cached h11-0.14.0-py3-none-any.whl (58 kB)\n", - "Requirement already satisfied: distlib<1,>=0.3.6 in c:\\users\\sverr\\onedrive\\desktop\\ai\\machine-learning-structure\\venv\\lib\\site-packages (from virtualenv<20.21.1,>=20.0.24->ray[default]<2.7,>=2.6.3->autogluon.core[all]==1.0.0->autogluon) (0.3.8)\n", - "Collecting platformdirs<4,>=2.4\n", - " Using cached platformdirs-3.11.0-py3-none-any.whl (17 kB)\n", - "Collecting multiprocess\n", - " Using cached multiprocess-0.70.15-py310-none-any.whl (134 kB)\n", - " Using cached multiprocess-0.70.14-py310-none-any.whl (134 kB)\n", - "Collecting google-api-core<3.0.0,>=1.0.0\n", - " Using cached google_api_core-2.18.0-py3-none-any.whl (138 kB)\n", - "Requirement already satisfied: opencensus-context>=0.1.3 in c:\\users\\sverr\\onedrive\\desktop\\ai\\machine-learning-structure\\venv\\lib\\site-packages (from opencensus->ray[default]<2.7,>=2.6.3->autogluon.core[all]==1.0.0->autogluon) (0.1.3)\n", - "Requirement already satisfied: pycryptodome in c:\\users\\sverr\\onedrive\\desktop\\ai\\machine-learning-structure\\venv\\lib\\site-packages (from opendatalab->openmim<0.4.0,>=0.3.7->autogluon.multimodal==1.0.0->autogluon) (3.20.0)\n", - "Requirement already satisfied: pywin32 in c:\\users\\sverr\\onedrive\\desktop\\ai\\machine-learning-structure\\venv\\lib\\site-packages (from opendatalab->openmim<0.4.0,>=0.3.7->autogluon.multimodal==1.0.0->autogluon) (306)\n", - "Collecting openxlab\n", - " Using cached openxlab-0.0.37-py3-none-any.whl (302 kB)\n", - "Requirement already satisfied: tenacity>=6.2.0 in c:\\users\\sverr\\onedrive\\desktop\\ai\\machine-learning-structure\\venv\\lib\\site-packages (from plotly->catboost<1.3,>=1.1->autogluon.tabular[all]==1.0.0->autogluon) (8.2.3)\n", - "Requirement already satisfied: mpmath>=0.19 in c:\\users\\sverr\\onedrive\\desktop\\ai\\machine-learning-structure\\venv\\lib\\site-packages (from sympy->torch<2.1,>=2.0->autogluon.multimodal==1.0.0->autogluon) (1.3.0)\n", - "Requirement already satisfied: exceptiongroup>=1.0.2 in c:\\users\\sverr\\onedrive\\desktop\\ai\\machine-learning-structure\\venv\\lib\\site-packages (from anyio<5,>=3.4.0->starlette->lightning<2.1,>=2.0.0->autogluon.multimodal==1.0.0->autogluon) (1.2.0)\n", - "Requirement already satisfied: sniffio>=1.1 in c:\\users\\sverr\\onedrive\\desktop\\ai\\machine-learning-structure\\venv\\lib\\site-packages (from anyio<5,>=3.4.0->starlette->lightning<2.1,>=2.0.0->autogluon.multimodal==1.0.0->autogluon) (1.3.1)\n", - "Collecting jinxed>=1.1.0\n", - " Using cached jinxed-1.2.1-py2.py3-none-any.whl (33 kB)\n", - "Requirement already satisfied: wcwidth>=0.1.4 in c:\\users\\sverr\\onedrive\\desktop\\ai\\machine-learning-structure\\venv\\lib\\site-packages (from blessed>=1.17.1->gpustat>=1.0.0->ray[default]<2.7,>=2.6.3->autogluon.core[all]==1.0.0->autogluon) (0.2.13)\n", - "Collecting runs\n", - " Using cached runs-1.2.2-py3-none-any.whl (7.0 kB)\n", - "Requirement already satisfied: xmod in c:\\users\\sverr\\onedrive\\desktop\\ai\\machine-learning-structure\\venv\\lib\\site-packages (from editor>=1.6.0->inquirer<5.0,>=2.10.0->lightning<2.1,>=2.0.0->autogluon.multimodal==1.0.0->autogluon) (1.8.1)\n", - "Collecting google-auth<3.0.dev0,>=2.14.1\n", - " Using cached google_auth-2.29.0-py2.py3-none-any.whl (189 kB)\n", - "Collecting proto-plus<2.0.0dev,>=1.22.3\n", - " Using cached proto_plus-1.23.0-py3-none-any.whl (48 kB)\n", - "Collecting googleapis-common-protos<2.0.dev0,>=1.56.2\n", - " Using cached googleapis_common_protos-1.63.0-py2.py3-none-any.whl (229 kB)\n", - "Collecting protobuf!=3.19.5,>=3.15.3\n", - " Using cached protobuf-4.25.3-cp310-abi3-win_amd64.whl (413 kB)\n", - "Collecting mdurl~=0.1\n", - " Using cached mdurl-0.1.2-py3-none-any.whl (10.0 kB)\n", - "Collecting confection<1.0.0,>=0.0.1\n", - " Using cached confection-0.1.4-py3-none-any.whl (35 kB)\n", - "Collecting blis<0.8.0,>=0.7.8\n", - " Using cached blis-0.7.11-cp310-cp310-win_amd64.whl (6.6 MB)\n", - "Collecting cloudpathlib<0.17.0,>=0.7.0\n", - " Using cached cloudpathlib-0.16.0-py3-none-any.whl (45 kB)\n", - "Collecting rich<15.0,>=12.3.0\n", - " Using cached rich-13.4.2-py3-none-any.whl (239 kB)\n", - "Collecting oss2~=2.17.0\n", - " Using cached oss2-2.17.0.tar.gz (259 kB)\n", - " Preparing metadata (setup.py): started\n", - " Preparing metadata (setup.py): finished with status 'done'\n", - "Collecting requests\n", - " Using cached requests-2.28.2-py3-none-any.whl (62 kB)\n", - "Collecting requests[socks]\n", - " Using cached requests-2.30.0-py3-none-any.whl (62 kB)\n", - " Using cached requests-2.29.0-py3-none-any.whl (62 kB)\n", - "Requirement already satisfied: PySocks!=1.5.7,>=1.5.6 in c:\\users\\sverr\\onedrive\\desktop\\ai\\machine-learning-structure\\venv\\lib\\site-packages (from requests->autogluon.core[all]==1.0.0->autogluon) (1.7.1)\n", - "Collecting cachetools<6.0,>=2.0.0\n", - " Using cached cachetools-5.3.3-py3-none-any.whl (9.3 kB)\n", - "Collecting pyasn1-modules>=0.2.1\n", - " Using cached pyasn1_modules-0.3.0-py2.py3-none-any.whl (181 kB)\n", - "Collecting rsa<5,>=3.1.4\n", - " Using cached rsa-4.9-py3-none-any.whl (34 kB)\n", - "Requirement already satisfied: ansicon in c:\\users\\sverr\\onedrive\\desktop\\ai\\machine-learning-structure\\venv\\lib\\site-packages (from jinxed>=1.1.0->blessed>=1.17.1->gpustat>=1.0.0->ray[default]<2.7,>=2.6.3->autogluon.core[all]==1.0.0->autogluon) (1.89.0)\n", - "Requirement already satisfied: crcmod>=1.7 in c:\\users\\sverr\\onedrive\\desktop\\ai\\machine-learning-structure\\venv\\lib\\site-packages (from oss2~=2.17.0->openxlab->opendatalab->openmim<0.4.0,>=0.3.7->autogluon.multimodal==1.0.0->autogluon) (1.7)\n", - "Collecting aliyun-python-sdk-kms>=2.4.1\n", - " Using cached aliyun_python_sdk_kms-2.16.2-py2.py3-none-any.whl (94 kB)\n", - "Collecting aliyun-python-sdk-core>=2.13.12\n", - " Using cached aliyun-python-sdk-core-2.15.0.tar.gz (443 kB)\n", - " Preparing metadata (setup.py): started\n", - " Preparing metadata (setup.py): finished with status 'done'\n", - "Collecting jmespath<2.0.0,>=0.7.1\n", - " Using cached jmespath-0.10.0-py2.py3-none-any.whl (24 kB)\n", - "Collecting cryptography>=2.6.0\n", - " Using cached cryptography-42.0.5-cp39-abi3-win_amd64.whl (2.9 MB)\n", - "Requirement already satisfied: pyasn1<0.6.0,>=0.4.6 in c:\\users\\sverr\\onedrive\\desktop\\ai\\machine-learning-structure\\venv\\lib\\site-packages (from pyasn1-modules>=0.2.1->google-auth<3.0.dev0,>=2.14.1->google-api-core<3.0.0,>=1.0.0->opencensus->ray[default]<2.7,>=2.6.3->autogluon.core[all]==1.0.0->autogluon) (0.5.1)\n", - "Collecting cffi>=1.12\n", - " Using cached cffi-1.16.0-cp310-cp310-win_amd64.whl (181 kB)\n", - "Requirement already satisfied: pycparser in c:\\users\\sverr\\onedrive\\desktop\\ai\\machine-learning-structure\\venv\\lib\\site-packages (from cffi>=1.12->cryptography>=2.6.0->aliyun-python-sdk-core>=2.13.12->oss2~=2.17.0->openxlab->opendatalab->openmim<0.4.0,>=0.3.7->autogluon.multimodal==1.0.0->autogluon) (2.21)\n", - "Using legacy 'setup.py install' for seqeval, since package 'wheel' is not installed.\n", - "Using legacy 'setup.py install' for oss2, since package 'wheel' is not installed.\n", - "Using legacy 'setup.py install' for aliyun-python-sdk-core, since package 'wheel' is not installed.\n", - "Installing collected packages: runs, rsa, requests, readchar, pyasn1-modules, protobuf, prometheus-client, plotly, platformdirs, orjson, ordered-set, omegaconf, nptyping, murmurhash, multidict, msgpack, mdurl, markdown, lightning-utilities, lazy_loader, langcodes, jsonschema, jmespath, jinxed, itsdangerous, imageio, h11, grpcio, graphviz, future, fsspec, frozenlist, filelock, fastprogress, fastcore, dill, defusedxml, colorful, cloudpickle, cloudpathlib, click, cffi, catalogue, cachetools, blis, beautifulsoup4, backoff, async-timeout, anyio, absl-py, yarl, xgboost, window-ops, virtualenv, uvicorn, typer, torch, tensorboardX, tensorboard, starlette, srsly, scikit-image, responses, proto-plus, preshed, pandas, nltk, multiprocess, model-index, markdown-it-py, lightgbm, hyperopt, huggingface-hub, googleapis-common-protos, google-auth, fastdownload, editor, deepdiff, dateutils, cryptography, croniter, botocore, blessed, arrow, aiosignal, utilsforecast, transformers, torchvision, torchmetrics, starsessions, seqeval, s3transfer, rich, ray, pytorch-metric-learning, inquirer, gpustat, google-api-core, gluonts, gdown, fastapi, confection, catboost, aliyun-python-sdk-core, aiohttp, accelerate, weasel, timm, thinc, statsforecast, opencensus, nlpaug, mlforecast, boto3, aliyun-python-sdk-kms, aiohttp-cors, spacy, pytorch-lightning, oss2, lightning-cloud, datasets, autogluon.common, openxlab, lightning, fastai, evaluate, autogluon.features, autogluon.core, opendatalab, autogluon.tabular, openmim, autogluon.timeseries, autogluon.multimodal, autogluon\n", - " Attempting uninstall: requests\n", - " Found existing installation: requests 2.31.0\n", - " Uninstalling requests-2.31.0:\n", - " Successfully uninstalled requests-2.31.0\n", - " Attempting uninstall: platformdirs\n", - " Found existing installation: platformdirs 4.2.0\n", - " Can't uninstall 'platformdirs'. No files were found to uninstall.\n", - " Attempting uninstall: pandas\n", - " Found existing installation: pandas 2.2.1\n", - " Uninstalling pandas-2.2.1:\n", - " Successfully uninstalled pandas-2.2.1\n", - " Running setup.py install for seqeval: started\n", - " Running setup.py install for seqeval: finished with status 'done'\n", - " Running setup.py install for aliyun-python-sdk-core: started\n", - " Running setup.py install for aliyun-python-sdk-core: finished with status 'done'\n", - " Running setup.py install for oss2: started\n", - " Running setup.py install for oss2: finished with status 'done'\n", - "Successfully installed absl-py-2.1.0 accelerate-0.21.0 aiohttp-3.9.3 aiohttp-cors-0.7.0 aiosignal-1.3.1 aliyun-python-sdk-core-2.15.0 aliyun-python-sdk-kms-2.16.2 anyio-4.3.0 arrow-1.3.0 async-timeout-4.0.3 autogluon-1.0.0 autogluon.common-1.0.0 autogluon.core-1.0.0 autogluon.features-1.0.0 autogluon.multimodal-1.0.0 autogluon.tabular-1.0.0 autogluon.timeseries-1.0.0 backoff-2.2.1 beautifulsoup4-4.12.3 blessed-1.20.0 blis-0.7.11 boto3-1.34.70 botocore-1.34.70 cachetools-5.3.3 catalogue-2.0.10 catboost-1.2.3 cffi-1.16.0 click-8.1.7 cloudpathlib-0.16.0 cloudpickle-3.0.0 colorful-0.5.6 confection-0.1.4 croniter-1.4.1 cryptography-42.0.5 datasets-2.10.1 dateutils-0.6.12 deepdiff-6.7.1 defusedxml-0.7.1 dill-0.3.6 editor-1.6.6 evaluate-0.4.1 fastai-2.7.14 fastapi-0.110.0 fastcore-1.5.29 fastdownload-0.0.7 fastprogress-1.0.3 filelock-3.13.3 frozenlist-1.4.1 fsspec-2024.3.1 future-1.0.0 gdown-5.1.0 gluonts-0.14.4 google-api-core-2.18.0 google-auth-2.29.0 googleapis-common-protos-1.63.0 gpustat-1.1.1 graphviz-0.20.3 grpcio-1.62.1 h11-0.14.0 huggingface-hub-0.22.0 hyperopt-0.2.7 imageio-2.34.0 inquirer-3.2.4 itsdangerous-2.1.2 jinxed-1.2.1 jmespath-0.10.0 jsonschema-4.17.3 langcodes-3.3.0 lazy_loader-0.3 lightgbm-4.1.0 lightning-2.0.9.post0 lightning-cloud-0.5.65 lightning-utilities-0.11.1 markdown-3.6 markdown-it-py-3.0.0 mdurl-0.1.2 mlforecast-0.10.0 model-index-0.1.11 msgpack-1.0.8 multidict-6.0.5 multiprocess-0.70.14 murmurhash-1.0.10 nlpaug-1.1.11 nltk-3.8.1 nptyping-2.4.1 omegaconf-2.2.3 opencensus-0.11.4 opendatalab-0.0.10 openmim-0.3.9 openxlab-0.0.37 ordered-set-4.1.0 orjson-3.9.15 oss2-2.17.0 pandas-2.1.4 platformdirs-3.11.0 plotly-5.20.0 preshed-3.0.9 prometheus-client-0.20.0 proto-plus-1.23.0 protobuf-4.25.3 pyasn1-modules-0.3.0 pytorch-lightning-2.0.9.post0 pytorch-metric-learning-1.7.3 ray-2.6.3 readchar-4.0.6 requests-2.28.2 responses-0.18.0 rich-13.4.2 rsa-4.9 runs-1.2.2 s3transfer-0.10.1 scikit-image-0.20.0 seqeval-1.2.2 spacy-3.7.4 srsly-2.4.8 starlette-0.36.3 starsessions-1.3.0 statsforecast-1.4.0 tensorboard-2.16.2 tensorboardX-2.6.2.2 thinc-8.2.3 timm-0.9.16 torch-2.0.1 torchmetrics-1.1.2 torchvision-0.15.2 transformers-4.31.0 typer-0.9.4 utilsforecast-0.0.10 uvicorn-0.29.0 virtualenv-20.21.0 weasel-0.3.4 window-ops-0.0.15 xgboost-2.0.3 yarl-1.9.4\n", - "Note: you may need to restart the kernel to use updated packages.\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "WARNING: Error parsing requirements for platformdirs: [Errno 2] No such file or directory: 'c:\\\\users\\\\sverr\\\\onedrive\\\\desktop\\\\ai\\\\machine-learning-structure\\\\venv\\\\lib\\\\site-packages\\\\platformdirs-4.2.0.dist-info\\\\METADATA'\n", - " WARNING: No metadata found in c:\\users\\sverr\\onedrive\\desktop\\ai\\machine-learning-structure\\venv\\lib\\site-packages\n", - " WARNING: No metadata found in c:\\users\\sverr\\onedrive\\desktop\\ai\\machine-learning-structure\\venv\\lib\\site-packages\n", - " WARNING: No metadata found in c:\\users\\sverr\\onedrive\\desktop\\ai\\machine-learning-structure\\venv\\lib\\site-packages\n", - " WARNING: No metadata found in c:\\users\\sverr\\onedrive\\desktop\\ai\\machine-learning-structure\\venv\\lib\\site-packages\n", - " WARNING: No metadata found in c:\\users\\sverr\\onedrive\\desktop\\ai\\machine-learning-structure\\venv\\lib\\site-packages\n", - " WARNING: No metadata found in c:\\users\\sverr\\onedrive\\desktop\\ai\\machine-learning-structure\\venv\\lib\\site-packages\n", - " WARNING: No metadata found in c:\\users\\sverr\\onedrive\\desktop\\ai\\machine-learning-structure\\venv\\lib\\site-packages\n", - "ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", - "ydata-profiling 4.7.0 requires pydantic>=2, but you have pydantic 1.10.14 which is incompatible.\n", - "WARNING: You are using pip version 22.0.4; however, version 24.0 is available.\n", - "You should consider upgrading via the 'c:\\Users\\sverr\\OneDrive\\Desktop\\ai\\machine-learning-structure\\venv\\Scripts\\python.exe -m pip install --upgrade pip' command.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Requirement already satisfied: scikit-learn in c:\\users\\sverr\\onedrive\\desktop\\ai\\machine-learning-structure\\venv\\lib\\site-packages (1.4.1.post1)\n", - "Requirement already satisfied: joblib>=1.2.0 in c:\\users\\sverr\\onedrive\\desktop\\ai\\machine-learning-structure\\venv\\lib\\site-packages (from scikit-learn) (1.3.2)\n", - "Requirement already satisfied: threadpoolctl>=2.0.0 in c:\\users\\sverr\\onedrive\\desktop\\ai\\machine-learning-structure\\venv\\lib\\site-packages (from scikit-learn) (3.4.0)\n", - "Requirement already satisfied: scipy>=1.6.0 in c:\\users\\sverr\\onedrive\\desktop\\ai\\machine-learning-structure\\venv\\lib\\site-packages (from scikit-learn) (1.11.4)\n", - "Requirement already satisfied: numpy<2.0,>=1.19.5 in c:\\users\\sverr\\onedrive\\desktop\\ai\\machine-learning-structure\\venv\\lib\\site-packages (from scikit-learn) (1.26.4)\n", - "Note: you may need to restart the kernel to use updated packages.\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "WARNING: You are using pip version 22.0.4; however, version 24.0 is available.\n", - "You should consider upgrading via the 'c:\\Users\\sverr\\OneDrive\\Desktop\\ai\\machine-learning-structure\\venv\\Scripts\\python.exe -m pip install --upgrade pip' command.\n" - ] - } - ], + "outputs": [], "source": [ "%pip install autogluon\n", "%pip install scikit-learn" @@ -536,7 +48,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -545,18 +57,9 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "c:\\Users\\sverr\\OneDrive\\Desktop\\ai\\machine-learning-structure\\venv\\lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", - " from .autonotebook import tqdm as notebook_tqdm\n" - ] - } - ], + "outputs": [], "source": [ "%autoreload \n", "\n", @@ -578,24 +81,9 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "ename": "InvalidParameterError", - "evalue": "The 'test_size' parameter of train_test_split must be a float in the range (0.0, 1.0), an int in the range [1, inf) or None. Got 1.0 instead.", - "output_type": "error", - "traceback": [ - "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[1;31mInvalidParameterError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[1;32mIn[12], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m x_train, _, x_test, y_train, _, y_test \u001b[38;5;241m=\u001b[39m \u001b[43mprepare_data\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvalidation_size\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m0.0\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtest_size\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m0.1\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[0;32m 2\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m target_feature_name \u001b[38;5;129;01min\u001b[39;00m TARGET_FEATURES:\n\u001b[0;32m 3\u001b[0m x_train[target_feature_name] \u001b[38;5;241m=\u001b[39m y_train\n", - "File \u001b[1;32mc:\\Users\\sverr\\OneDrive\\Desktop\\ai\\machine-learning-structure\\notebooks\\..\\src\\features\\ml_service.py:64\u001b[0m, in \u001b[0;36mprepare_data\u001b[1;34m(validation_size, test_size, loader)\u001b[0m\n\u001b[0;32m 56\u001b[0m x_train, x_temp, y_train, y_temp \u001b[38;5;241m=\u001b[39m train_test_split(\n\u001b[0;32m 57\u001b[0m engineered_features,\n\u001b[0;32m 58\u001b[0m y,\n\u001b[0;32m 59\u001b[0m test_size\u001b[38;5;241m=\u001b[39m(validation_size \u001b[38;5;241m+\u001b[39m test_size),\n\u001b[0;32m 60\u001b[0m random_state\u001b[38;5;241m=\u001b[39mRANDOM_STATE,\n\u001b[0;32m 61\u001b[0m )\n\u001b[0;32m 63\u001b[0m \u001b[38;5;66;03m# Split the temporary set into validation and test sets\u001b[39;00m\n\u001b[1;32m---> 64\u001b[0m x_validate, x_test, y_validate, y_test \u001b[38;5;241m=\u001b[39m \u001b[43mtrain_test_split\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 65\u001b[0m \u001b[43m \u001b[49m\u001b[43mx_temp\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43my_temp\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtest_size\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtemp_test_size\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrandom_state\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mRANDOM_STATE\u001b[49m\n\u001b[0;32m 66\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 68\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m x_train, x_validate, x_test, y_train, y_validate, y_test\n", - "File \u001b[1;32mc:\\Users\\sverr\\OneDrive\\Desktop\\ai\\machine-learning-structure\\venv\\lib\\site-packages\\sklearn\\utils\\_param_validation.py:203\u001b[0m, in \u001b[0;36mvalidate_params..decorator..wrapper\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m 200\u001b[0m to_ignore \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m [\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mself\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcls\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[0;32m 201\u001b[0m params \u001b[38;5;241m=\u001b[39m {k: v \u001b[38;5;28;01mfor\u001b[39;00m k, v \u001b[38;5;129;01min\u001b[39;00m params\u001b[38;5;241m.\u001b[39marguments\u001b[38;5;241m.\u001b[39mitems() \u001b[38;5;28;01mif\u001b[39;00m k \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m to_ignore}\n\u001b[1;32m--> 203\u001b[0m \u001b[43mvalidate_parameter_constraints\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 204\u001b[0m \u001b[43m \u001b[49m\u001b[43mparameter_constraints\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mparams\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcaller_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfunc\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[38;5;18;43m__qualname__\u001b[39;49m\n\u001b[0;32m 205\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 207\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m 208\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m config_context(\n\u001b[0;32m 209\u001b[0m skip_parameter_validation\u001b[38;5;241m=\u001b[39m(\n\u001b[0;32m 210\u001b[0m prefer_skip_nested_validation \u001b[38;5;129;01mor\u001b[39;00m global_skip_validation\n\u001b[0;32m 211\u001b[0m )\n\u001b[0;32m 212\u001b[0m ):\n", - "File \u001b[1;32mc:\\Users\\sverr\\OneDrive\\Desktop\\ai\\machine-learning-structure\\venv\\lib\\site-packages\\sklearn\\utils\\_param_validation.py:95\u001b[0m, in \u001b[0;36mvalidate_parameter_constraints\u001b[1;34m(parameter_constraints, params, caller_name)\u001b[0m\n\u001b[0;32m 89\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m 90\u001b[0m constraints_str \u001b[38;5;241m=\u001b[39m (\n\u001b[0;32m 91\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m, \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;241m.\u001b[39mjoin([\u001b[38;5;28mstr\u001b[39m(c)\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mfor\u001b[39;00m\u001b[38;5;250m \u001b[39mc\u001b[38;5;250m \u001b[39m\u001b[38;5;129;01min\u001b[39;00m\u001b[38;5;250m \u001b[39mconstraints[:\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m]])\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m or\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 92\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mconstraints[\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m]\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 93\u001b[0m )\n\u001b[1;32m---> 95\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m InvalidParameterError(\n\u001b[0;32m 96\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mThe \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mparam_name\u001b[38;5;132;01m!r}\u001b[39;00m\u001b[38;5;124m parameter of \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mcaller_name\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m must be\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 97\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mconstraints_str\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m. Got \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mparam_val\u001b[38;5;132;01m!r}\u001b[39;00m\u001b[38;5;124m instead.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 98\u001b[0m )\n", - "\u001b[1;31mInvalidParameterError\u001b[0m: The 'test_size' parameter of train_test_split must be a float in the range (0.0, 1.0), an int in the range [1, inf) or None. Got 1.0 instead." - ] - } - ], + "outputs": [], "source": [ "x_train, _, x_test, y_train, _, y_test = prepare_data(validation_size=0, test_size=0.1)\n", "for target_feature_name in TARGET_FEATURES:\n", @@ -608,199 +96,56 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Train model" + "## Train model\n", + "\n", + "AutoGloun does not require any hyperparameters to be set. It will automatically select the best model and hyperparameters based on the data.\n", + "Nor does it need any tuning data. It will automatically split the data into training and validation sets. AutoGloun will split the data more intelligently to fit its needs.\n", + "\n", + "**Evaluation metrics:**\n", + "* 'f1' (for binary classification)\n", + "* 'roc_auc' (for binary classification)\n", + "* 'log_loss' (for classification)\n", + "* 'mean_absolute_error' (for regression)\n", + "* 'median_absolute_error' (for regression) \n", + "* You can also define your own custom metric function, see examples in the folder: autogluon/core/metrics/\n", + "See autoGluon documentation for more details: [AutoGluon Documentation](https://auto.gluon.ai/scoredebugweight/tutorials/tabular_prediction/tabular-quickstart.html)\n", + "\n", + "One should also look at parameters: `num_bag_folds`, `num_bag_sets` and `num_stack_levels` parameters. These parameters can help to improve the model's performance.\n", + "\n", + "To see all possible parameters for the .fit() method, see the [AutoGluon documentation .fit()](https://auto.gluon.ai/scoredebugweight/api/autogluon.predictor.html#autogluon.tabular.TabularPredictor.fit)" ] }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "No path specified. Models will be saved in: \"AutogluonModels\\ag-20240325_224410\"\n", - "No presets specified! To achieve strong results with AutoGluon, it is recommended to use the available presets.\n", - "\tRecommended Presets (For more details refer to https://auto.gluon.ai/stable/tutorials/tabular/tabular-essentials.html#presets):\n", - "\tpresets='best_quality' : Maximize accuracy. Default time_limit=3600.\n", - "\tpresets='high_quality' : Strong accuracy with fast inference speed. Default time_limit=3600.\n", - "\tpresets='good_quality' : Good accuracy with very fast inference speed. Default time_limit=3600.\n", - "\tpresets='medium_quality' : Fast training time, ideal for initial prototyping.\n", - "Warning: Training may take a very long time because `time_limit` was not specified and `train_data` is large (320000 samples, 430.31 MB).\n", - "\tConsider setting `time_limit` to ensure training finishes within an expected duration or experiment with a small portion of `train_data` to identify an ideal `presets` and `hyperparameters` configuration.\n", - "Beginning AutoGluon training ...\n", - "AutoGluon will save models to \"AutogluonModels\\ag-20240325_224410\"\n", - "=================== System Info ===================\n", - "AutoGluon Version: 1.0.0\n", - "Python Version: 3.10.5\n", - "Operating System: Windows\n", - "Platform Machine: AMD64\n", - "Platform Version: 10.0.19045\n", - "CPU Count: 12\n", - "Memory Avail: 2.61 GB / 15.86 GB (16.4%)\n", - "Disk Space Avail: 51.90 GB / 475.82 GB (10.9%)\n", - "===================================================\n", - "Train Data Rows: 320000\n", - "Train Data Columns: 24\n", - "Label Column: IsFraud\n", - "AutoGluon infers your prediction problem is: 'binary' (because only two unique label-values observed).\n", - "\t2 unique label values: [0, 1]\n", - "\tIf 'binary' is not the correct problem_type, please manually specify the problem_type parameter during predictor init (You may specify problem_type as one of: ['binary', 'multiclass', 'regression'])\n", - "Problem Type: binary\n", - "Preprocessing data ...\n", - "Selected class <--> label mapping: class 1 = 1, class 0 = 0\n", - "Using Feature Generators to preprocess the data ...\n", - "Fitting AutoMLPipelineFeatureGenerator...\n", - "\tAvailable Memory: 3006.59 MB\n", - "\tTrain Data (Original) Memory Usage: 405.49 MB (13.5% of available memory)\n", - "\tWarning: Data size prior to feature transformation consumes 13.5% of available memory. Consider increasing memory or subsampling the data to avoid instability.\n", - "\tInferring data type of each feature based on column values. Set feature_metadata_in to manually specify special dtypes of the features.\n", - "\tStage 1 Generators:\n", - "\t\tFitting AsTypeFeatureGenerator...\n", - "\t\t\tNote: Converting 1 features to boolean dtype as they only contain 2 unique values.\n", - "\tStage 2 Generators:\n", - "\t\tFitting FillNaFeatureGenerator...\n", - "\tStage 3 Generators:\n", - "\t\tFitting IdentityFeatureGenerator...\n", - "\t\tFitting CategoryFeatureGenerator...\n", - "\t\t\tFitting CategoryMemoryMinimizeFeatureGenerator...\n", - "\t\tFitting DatetimeFeatureGenerator...\n", - "\t\tFitting TextSpecialFeatureGenerator...\n", - "\t\t\tFitting BinnedFeatureGenerator...\n", - "\t\t\tFitting DropDuplicatesFeatureGenerator...\n", - "\t\tFitting TextNgramFeatureGenerator...\n", - "\t\t\tFitting CountVectorizer for text features: ['Transaction_Text', 'Transaction_Description_1', 'Transaction_Description_2', 'Opposite_party_Adress']\n", - "\t\t\tCountVectorizer fit with vocabulary size = 6831\n", - "\t\tWarning: Due to memory constraints, ngram feature count is being reduced. Allocate more memory to maximize model quality.\n", - "\t\tReducing Vectorizer vocab size from 6831 to 211 to avoid OOM error\n", - "\tStage 4 Generators:\n", - "\t\tFitting DropUniqueFeatureGenerator...\n", - "\tStage 5 Generators:\n", - "\t\tFitting DropDuplicatesFeatureGenerator...\n", - "\tUseless Original Features (Count: 3): ['Currency', 'CDB_Location_Country_x', 'CDB_Location_CountryCode']\n", - "\t\tThese features carry no predictive signal and should be manually investigated.\n", - "\t\tThis is typically a feature which has the same value for all rows.\n", - "\t\tThese features do not need to be present at inference time.\n", - "\tUnused Original Features (Count: 1): ['TransactionID']\n", - "\t\tThese features were not used to generate any of the output features. Add a feature generator compatible with these features to utilize them.\n", - "\t\tFeatures can also be unused if they carry very little information, such as being categorical but having almost entirely unique values or being duplicates of other features.\n", - "\t\tThese features do not need to be present at inference time.\n", - "\t\t('object', []) : 1 | ['TransactionID']\n", - "\tTypes of features in original data (raw dtype, special dtypes):\n", - "\t\t('float', []) : 3 | ['Origin_Amount', 'Amount', 'Opposite_party_ID']\n", - "\t\t('int', []) : 1 | ['CustomerID']\n", - "\t\t('object', []) : 9 | ['Origin_Currency', 'Transaction_Type', 'Merchant_Code', 'Deposit_Withdrawal', 'Transaction_Description_0', ...]\n", - "\t\t('object', ['datetime_as_object']) : 3 | ['Origin_Date', 'System_Date', 'Completed_Date']\n", - "\t\t('object', ['text']) : 4 | ['Transaction_Text', 'Transaction_Description_1', 'Transaction_Description_2', 'Opposite_party_Adress']\n", - "\tTypes of features in processed data (raw dtype, special dtypes):\n", - "\t\t('category', []) : 8 | ['Origin_Currency', 'Transaction_Type', 'Merchant_Code', 'Transaction_Description_0', 'Transaction_Location', ...]\n", - "\t\t('category', ['text_as_category']) : 4 | ['Transaction_Text', 'Transaction_Description_1', 'Transaction_Description_2', 'Opposite_party_Adress']\n", - "\t\t('float', []) : 3 | ['Origin_Amount', 'Amount', 'Opposite_party_ID']\n", - "\t\t('int', []) : 1 | ['CustomerID']\n", - "\t\t('int', ['binned', 'text_special']) : 33 | ['Transaction_Text.char_count', 'Transaction_Text.word_count', 'Transaction_Text.capital_ratio', 'Transaction_Text.lower_ratio', 'Transaction_Text.special_ratio', ...]\n", - "\t\t('int', ['bool']) : 1 | ['Deposit_Withdrawal']\n", - "\t\t('int', ['datetime_as_int']) : 12 | ['Origin_Date', 'Origin_Date.month', 'Origin_Date.day', 'Origin_Date.dayofweek', 'System_Date', ...]\n", - "\t\t('int', ['text_ngram']) : 210 | ['__nlp__.aa', '__nlp__.activity', '__nlp__.ae', '__nlp__.after', '__nlp__.against', ...]\n", - "\t78.1s = Fit runtime\n", - "\t20 features in original data used to generate 272 features in processed data.\n", - "\tTrain Data (Processed) Memory Usage: 183.11 MB (6.0% of available memory)\n", - "Data preprocessing and feature engineering runtime = 78.86s ...\n", - "AutoGluon will gauge predictive performance using evaluation metric: 'accuracy'\n", - "\tTo change this, specify the eval_metric parameter of Predictor()\n", - "Automatically generating train/validation split with holdout_frac=0.01, Train Rows: 316800, Val Rows: 3200\n", - "User-specified model hyperparameters to be fit:\n", - "{\n", - "\t'NN_TORCH': {},\n", - "\t'GBM': [{'extra_trees': True, 'ag_args': {'name_suffix': 'XT'}}, {}, 'GBMLarge'],\n", - "\t'CAT': {},\n", - "\t'XGB': {},\n", - "\t'FASTAI': {},\n", - "\t'RF': [{'criterion': 'gini', 'ag_args': {'name_suffix': 'Gini', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'entropy', 'ag_args': {'name_suffix': 'Entr', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'squared_error', 'ag_args': {'name_suffix': 'MSE', 'problem_types': ['regression', 'quantile']}}],\n", - "\t'XT': [{'criterion': 'gini', 'ag_args': {'name_suffix': 'Gini', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'entropy', 'ag_args': {'name_suffix': 'Entr', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'squared_error', 'ag_args': {'name_suffix': 'MSE', 'problem_types': ['regression', 'quantile']}}],\n", - "\t'KNN': [{'weights': 'uniform', 'ag_args': {'name_suffix': 'Unif'}}, {'weights': 'distance', 'ag_args': {'name_suffix': 'Dist'}}],\n", - "}\n", - "Fitting 13 L1 models ...\n", - "Fitting model: KNeighborsUnif ...\n", - "\tWarning: Not enough memory to safely train model. Estimated to require 1.241 GB out of 2.354 GB available memory (52.721%)... (20.000% of avail memory is the max safe size)\n", - "\tTo force training the model, specify the model hyperparameter \"ag.max_memory_usage_ratio\" to a larger value (currently 1.0, set to >=2.69 to avoid the error)\n", - "\t\tTo set the same value for all models, do the following when calling predictor.fit: `predictor.fit(..., ag_args_fit={\"ag.max_memory_usage_ratio\": VALUE})`\n", - "\t\tSetting \"ag.max_memory_usage_ratio\" to values above 1 may result in out-of-memory errors. You may consider using a machine with more memory as a safer alternative.\n", - "\tNot enough memory to train KNeighborsUnif... Skipping this model.\n", - "Fitting model: KNeighborsDist ...\n", - "\tWarning: Not enough memory to safely train model. Estimated to require 1.241 GB out of 2.343 GB available memory (52.949%)... (20.000% of avail memory is the max safe size)\n", - "\tTo force training the model, specify the model hyperparameter \"ag.max_memory_usage_ratio\" to a larger value (currently 1.0, set to >=2.70 to avoid the error)\n", - "\t\tTo set the same value for all models, do the following when calling predictor.fit: `predictor.fit(..., ag_args_fit={\"ag.max_memory_usage_ratio\": VALUE})`\n", - "\t\tSetting \"ag.max_memory_usage_ratio\" to values above 1 may result in out-of-memory errors. You may consider using a machine with more memory as a safer alternative.\n", - "\tNot enough memory to train KNeighborsDist... Skipping this model.\n", - "Fitting model: LightGBMXT ...\n", - "\t0.9316\t = Validation score (accuracy)\n", - "\t3.51s\t = Training runtime\n", - "\t0.02s\t = Validation runtime\n", - "Fitting model: LightGBM ...\n", - "\t0.9316\t = Validation score (accuracy)\n", - "\t2.97s\t = Training runtime\n", - "\t0.02s\t = Validation runtime\n", - "Fitting model: RandomForestGini ...\n", - "\tWarning: Reducing model 'n_estimators' from 300 -> 142 due to low memory. Expected memory usage reduced from 31.51% -> 15.0% of available memory...\n", - "\t0.9316\t = Validation score (accuracy)\n", - "\t69.34s\t = Training runtime\n", - "\t0.06s\t = Validation runtime\n", - "Fitting model: RandomForestEntr ...\n", - "\tWarning: Reducing model 'n_estimators' from 300 -> 141 due to low memory. Expected memory usage reduced from 31.85% -> 15.0% of available memory...\n", - "\t0.9325\t = Validation score (accuracy)\n", - "\t69.97s\t = Training runtime\n", - "\t0.06s\t = Validation runtime\n", - "Fitting model: CatBoost ...\n", - "\t0.9316\t = Validation score (accuracy)\n", - "\t9.2s\t = Training runtime\n", - "\t0.11s\t = Validation runtime\n", - "Fitting model: ExtraTreesGini ...\n", - "\tWarning: Reducing model 'n_estimators' from 300 -> 131 due to low memory. Expected memory usage reduced from 34.2% -> 15.0% of available memory...\n", - "\t0.9316\t = Validation score (accuracy)\n", - "\t66.71s\t = Training runtime\n", - "\t0.06s\t = Validation runtime\n", - "Fitting model: ExtraTreesEntr ...\n", - "\tWarning: Reducing model 'n_estimators' from 300 -> 134 due to low memory. Expected memory usage reduced from 33.35% -> 15.0% of available memory...\n", - "\t0.9316\t = Validation score (accuracy)\n", - "\t83.0s\t = Training runtime\n", - "\t0.06s\t = Validation runtime\n", - "Fitting model: NeuralNetFastAI ...\n", - "c:\\Users\\sverr\\OneDrive\\Desktop\\ai\\machine-learning-structure\\venv\\lib\\site-packages\\autogluon\\tabular\\models\\fastainn\\tabular_nn_fastai.py:200: FutureWarning: The 'downcast' keyword in fillna is deprecated and will be removed in a future version. Use res.infer_objects(copy=False) to infer non-object dtype, or pd.to_numeric with the 'downcast' keyword to downcast numeric results.\n", - " df = df.fillna(column_fills, inplace=False, downcast=False)\n", - "c:\\Users\\sverr\\OneDrive\\Desktop\\ai\\machine-learning-structure\\venv\\lib\\site-packages\\autogluon\\tabular\\models\\fastainn\\tabular_nn_fastai.py:200: FutureWarning: The 'downcast' keyword in fillna is deprecated and will be removed in a future version. Use res.infer_objects(copy=False) to infer non-object dtype, or pd.to_numeric with the 'downcast' keyword to downcast numeric results.\n", - " df = df.fillna(column_fills, inplace=False, downcast=False)\n", - "No improvement since epoch 2: early stopping\n", - "c:\\Users\\sverr\\OneDrive\\Desktop\\ai\\machine-learning-structure\\venv\\lib\\site-packages\\autogluon\\tabular\\models\\fastainn\\tabular_nn_fastai.py:200: FutureWarning: The 'downcast' keyword in fillna is deprecated and will be removed in a future version. Use res.infer_objects(copy=False) to infer non-object dtype, or pd.to_numeric with the 'downcast' keyword to downcast numeric results.\n", - " df = df.fillna(column_fills, inplace=False, downcast=False)\n", - "\t0.9322\t = Validation score (accuracy)\n", - "\t418.59s\t = Training runtime\n", - "\t0.05s\t = Validation runtime\n", - "Fitting model: XGBoost ...\n", - "\t0.9316\t = Validation score (accuracy)\n", - "\t6.28s\t = Training runtime\n", - "\t0.07s\t = Validation runtime\n", - "Fitting model: NeuralNetTorch ...\n", - "\t0.9322\t = Validation score (accuracy)\n", - "\t418.79s\t = Training runtime\n", - "\t0.05s\t = Validation runtime\n", - "Fitting model: LightGBMLarge ...\n", - "\t0.9316\t = Validation score (accuracy)\n", - "\t3.43s\t = Training runtime\n", - "\t0.02s\t = Validation runtime\n", - "Fitting model: WeightedEnsemble_L2 ...\n", - "\tEnsemble Weights: {'RandomForestEntr': 1.0}\n", - "\t0.9325\t = Validation score (accuracy)\n", - "\t0.81s\t = Training runtime\n", - "\t0.01s\t = Validation runtime\n", - "AutoGluon training complete, total runtime = 1242.33s ... Best model: \"WeightedEnsemble_L2\"\n", - "TabularPredictor saved. To load, use: predictor = TabularPredictor.load(\"AutogluonModels\\ag-20240325_224410\")\n" - ] - } - ], + "outputs": [], "source": [ "# Initialize the AutoGluon TabularPredictor\n", - "predictor = TabularPredictor(label=target_feature_name).fit(train_data=data)" + "time_limit = 24*60*60 # Set this to longest time you are willing to wait (in seconds)\n", + "metric = 'roc_auc'\n", + "predictor = TabularPredictor(label=target_feature_name, eval_metric=metric).fit(data, time_limit=time_limit, presets='best_quality')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Loading pre-trained model\n", + "AutoGluon provides a simple way to load a pre-trained model." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "if predictor is None:\n", + " # TODO Correct model path to the one that was saved\n", + " model_path = \"AutogluonModels/ag-20240326_133920/\"\n", + " predictor = TabularPredictor.load(model_path)\n", + "predictor.fit_summary()" ] }, { @@ -812,34 +157,16 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Validation Accuracy: 0.929125\n", - "Test Accuracy: 0.930575\n", - "Test Classification Report:\n", - " precision recall f1-score support\n", - "\n", - " 0 0.93 1.00 0.96 37217\n", - " 1 0.69 0.00 0.01 2783\n", - "\n", - " accuracy 0.93 40000\n", - " macro avg 0.81 0.50 0.49 40000\n", - "weighted avg 0.91 0.93 0.90 40000\n", - "\n" - ] - } - ], + "outputs": [], "source": [ "# Evaluate on the test set\n", "y_test_pred = predictor.predict(x_test)\n", "test_accuracy = accuracy_score(y_test, y_test_pred)\n", "print(\"Test Accuracy: \", test_accuracy)\n", - "print(\"Test Classification Report:\\n\", classification_report(y_test, y_test_pred))" + "print(\"Test Classification Report:\\n\", classification_report(y_test, y_test_pred))\n", + "# predictor.leaderboard(x_test, silent=True)\n" ] }, { @@ -851,7 +178,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -861,7 +188,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "metadata": {}, "outputs": [], "source": [