From cd442e5d43077a0080e68cec92ee6d385f6bbfcc Mon Sep 17 00:00:00 2001 From: castolpe <8385345+castolpe@users.noreply.github.com> Date: Fri, 8 Nov 2024 09:41:07 +0100 Subject: [PATCH] Update README.md (file structure) --- README.md | 89 +++++++++++++++++++++++++++++++++---------------------- 1 file changed, 53 insertions(+), 36 deletions(-) diff --git a/README.md b/README.md index 929f3e6..8317d0a 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ This project was created as part of the MLOps boot camp. The project was used to **Developer Team** - Asma Heena Khalil - Ringo Schwabe -- Carolin Stolpe +- Carolin Stolpe (@castolpe] Business Objectives ------------------ @@ -19,41 +19,58 @@ Architecture File structure ------------ - - ├── LICENSE - ├── README.md <- The top-level README for developers using this project. - ├── data - │   ├── interim <- Intermediate data that has been transformed. - │   ├── processed <- The final, canonical data sets for modeling. - │   └── raw <- The original, immutable data dump. - │ - ├── logs <- Logs from training and predicting - │ - ├── models <- Trained and serialized models, model predictions, or model summaries, tracked by DVC - │ - ├── requirements.txt <- The requirements file for reproducing the analysis environment, e.g. - │ generated with `pip freeze > requirements.txt` - │ - ├── src <- Source code for use in this project. - │   ├── __init__.py <- Makes src a Python module - │ │ - │   ├── data <- Scripts to download or generate data - │   │   ├── check_structure.py - │   │   ├── import_raw_data.py - │   │   └── make_dataset.py - │ │ - │   ├── features <- Scripts to turn raw data into features for modeling - │   │   └── build_features.py - │ │ - │   ├── models <- Scripts to train models and then use trained models to make - │ │ │ predictions - │   │   ├── predict_model.py - │   │   └── train_model.py - │ │ - │   ├── visualization <- Scripts to create exploratory and results oriented visualizations - │   │ └── visualize.py - │   └── config <- Describe the parameters used in train_model.py and predict_model.py - +├── .dvc <- Configuration of the data version control +├── .github +│   └── workflows <- Github Actions to trigger CI/CD pipeline and data pipeline +| +├── data +│   └── interim <- Intermediate data that has been transformed. +│   └── processed <- The final, canonical data sets for modeling. +│   └── raw <- The original, immutable data dump. +│   └── status.txt <- Indicator, if current data is valid. +│ +├── logs <- Logs from training and predicting +│ +├── metrics <- Metrics from the evaluated model. +| +├── models <- Trained and serialized models, model predictions, or model summaries +| +├── monitoring <- All files related to the monitoring of the application. +│   └── alertmanager <- Configuration of the alert manager to inform in the event of deviations. +│   └── grafana <- Configuration of the Grafana dashboard to visualize the metrics collected by Prometheus. +│   └── prometheues <- Configuration to collect metrics about the health status of the app, number of requests etc. +│ +├── notebooks <- Jupyter notebooks. Naming convention is a number (for ordering), +│ the creator's initials, and a short `-` delimited description, e.g. +│ `1.0-jqp-initial-data-exploration`. +│ +├── references <- Data dictionaries, manuals, and all other explanatory materials. +│ +├── reports <- Generated analysis as HTML, PDF, LaTeX, etc. +│   └── figures <- Generated graphics and figures to be used in reporting +│ +├── src <- Source code for use in this project. +│   └── api <- Definiton of API endpoints +│ │ +│   └── data_module_def <- Scripts to download, validate and transform data +│ │ +│   └── models_module_def <- Scripts to train models and then use trained models to make +│ │ predictions +│ └── pipeline_steps <- Scripts for the single pipeline_steps from data download to model evaluation +│ │ +│ └── utils <- Helper functions and other utils (e.g. logger) +│   └── visualization <- Scripts to create exploratory and results oriented visualizations +│   │ └── visualize.py +| └── config_manager.py <- Create configuration objects for each of the stages +│   └── config.py <- Paths to the config files +│   └── config.yaml <- Values for the required configuration fields +│   └── entity.py <- Definition of the config fields +| dvc.lock <- Locks of the last pipeline run +| dvc.yaml <- Orchestration of pipeline steps (DAG). +├── LICENSE +├── README.md <- The top-level README for developers using this project. +├── requirements.txt <- The requirements file for reproducing the analysis environment, e.g. +│ generated with `pip freeze > requirements.txt` -------- ## How to run locally