From cd442e5d43077a0080e68cec92ee6d385f6bbfcc Mon Sep 17 00:00:00 2001
From: castolpe <8385345+castolpe@users.noreply.github.com>
Date: Fri, 8 Nov 2024 09:41:07 +0100
Subject: [PATCH] Update README.md (file structure)

---
 README.md | 89 +++++++++++++++++++++++++++++++++----------------------
 1 file changed, 53 insertions(+), 36 deletions(-)

diff --git a/README.md b/README.md
index 929f3e6..8317d0a 100644
--- a/README.md
+++ b/README.md
@@ -6,7 +6,7 @@ This project was created as part of the MLOps boot camp. The project was used to
 **Developer Team**
 - Asma Heena Khalil
 - Ringo Schwabe
-- Carolin Stolpe
+- Carolin Stolpe (@castolpe]
 
 Business Objectives
 ------------------
@@ -19,41 +19,58 @@ Architecture
 
 File structure
 ------------
-
-    ├── LICENSE
-    ├── README.md          <- The top-level README for developers using this project.
-    ├── data
-    │   ├── interim        <- Intermediate data that has been transformed.
-    │   ├── processed      <- The final, canonical data sets for modeling.
-    │   └── raw            <- The original, immutable data dump.
-    │
-    ├── logs               <- Logs from training and predicting
-    │
-    ├── models             <- Trained and serialized models, model predictions, or model summaries, tracked by DVC
-    │
-    ├── requirements.txt   <- The requirements file for reproducing the analysis environment, e.g.
-    │                         generated with `pip freeze > requirements.txt`
-    │
-    ├── src                <- Source code for use in this project.
-    │   ├── __init__.py    <- Makes src a Python module
-    │   │
-    │   ├── data           <- Scripts to download or generate data
-    │   │   ├── check_structure.py    
-    │   │   ├── import_raw_data.py 
-    │   │   └── make_dataset.py
-    │   │
-    │   ├── features       <- Scripts to turn raw data into features for modeling
-    │   │   └── build_features.py
-    │   │
-    │   ├── models         <- Scripts to train models and then use trained models to make
-    │   │   │                 predictions
-    │   │   ├── predict_model.py
-    │   │   └── train_model.py
-    │   │
-    │   ├── visualization  <- Scripts to create exploratory and results oriented visualizations
-    │   │   └── visualize.py
-    │   └── config         <- Describe the parameters used in train_model.py and predict_model.py
-
+├── .dvc               <- Configuration of the data version control
+├── .github
+│   └── workflows          <- Github Actions to trigger CI/CD pipeline and data pipeline 
+| 
+├── data
+│   └── interim            <- Intermediate data that has been transformed.
+│   └── processed          <- The final, canonical data sets for modeling.
+│   └── raw                <- The original, immutable data dump.
+│   └── status.txt         <- Indicator, if current data is valid.
+│
+├── logs                   <- Logs from training and predicting
+│
+├── metrics                <- Metrics from the evaluated model.
+| 
+├── models                 <- Trained and serialized models, model predictions, or model summaries
+| 
+├── monitoring             <- All files related to the monitoring of the application.
+│   └── alertmanager       <- Configuration of the alert manager to inform in the event of deviations. 
+│   └── grafana            <- Configuration of the Grafana dashboard to visualize the metrics collected by Prometheus.
+│   └── prometheues        <- Configuration to collect metrics about the health status of the app, number of requests etc.
+│
+├── notebooks              <- Jupyter notebooks. Naming convention is a number (for ordering),
+│                         the creator's initials, and a short `-` delimited description, e.g.
+│                         `1.0-jqp-initial-data-exploration`.
+│
+├── references             <- Data dictionaries, manuals, and all other explanatory materials.
+│
+├── reports                <- Generated analysis as HTML, PDF, LaTeX, etc.
+│   └── figures            <- Generated graphics and figures to be used in reporting
+│
+├── src                    <- Source code for use in this project.
+│   └── api                <- Definiton of API endpoints
+│   │
+│   └── data_module_def    <- Scripts to download, validate and transform data
+│   │
+│   └── models_module_def  <- Scripts to train models and then use trained models to make
+│   │                      predictions
+│   └── pipeline_steps    <- Scripts for the single pipeline_steps from data download to model evaluation
+│   │
+│   └── utils              <- Helper functions and other utils (e.g. logger)
+│   └── visualization      <- Scripts to create exploratory and results oriented visualizations
+│   │   └── visualize.py
+|   └── config_manager.py  <- Create configuration objects for each of the stages
+│   └── config.py          <- Paths to the config files
+│   └── config.yaml        <- Values for the required configuration fields 
+│   └── entity.py          <- Definition of the config fields
+|   dvc.lock               <- Locks of the last pipeline run
+|   dvc.yaml               <- Orchestration of pipeline steps (DAG).
+├── LICENSE
+├── README.md              <- The top-level README for developers using this project.
+├── requirements.txt       <- The requirements file for reproducing the analysis environment, e.g.
+│                          generated with `pip freeze > requirements.txt`
 --------
 
 ## How to run locally