Skip to content

Commit

Permalink
🚧 Maj strcture_folers & kubernetes
Browse files Browse the repository at this point in the history
  • Loading branch information
AntoinePELAMOURGUES committed Dec 1, 2024
1 parent 93faf86 commit 942741b
Show file tree
Hide file tree
Showing 154 changed files with 1,382 additions and 673 deletions.
Empty file modified .env.example
100644 → 100755
Empty file.
Empty file modified .github/workflows/ci_airflow.yml
100644 → 100755
Empty file.
Empty file modified .github/workflows/docker-deploy.yml
100644 → 100755
Empty file.
Empty file modified .github/workflows/python-app.yml
100644 → 100755
Empty file.
Empty file modified .github/workflows/test-databases.yml
100644 → 100755
Empty file.
Empty file modified .gitignore
100644 → 100755
Empty file.
Empty file modified LICENSE
100644 → 100755
Empty file.
55 changes: 16 additions & 39 deletions Makefile
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
NAMESPACE1 = reco-movies
NAMESPACE2 = airflow

.PHONY: help setup1 setup2 start stop down restart logs-supabase logs-airflow logs-api logs-fastapi clean network all namespace pv secrets configmaps deployments services ingress clean-kube-reco clean-kube-airflow apply-configmap load-data-minikube install-airflow pv-airflow airflow reco
.PHONY: help setup1 setup2 start stop down restart logs-supabase logs-airflow logs-api logs-fastapi clean network all namespace pv secrets configmaps deployments services ingress clean-kube-reco clean-kube-airflow apply-configmap start-minikube start-airflow pv-airflow reco

# Help command to list all available targets
help:
Expand Down Expand Up @@ -114,52 +114,29 @@ network:
###### MAKEFILE KUBERNETES
all: namespace install-airflow pv-airflow pv secrets configmaps deployments services ingress

start-minikube:
minikube start --driver=docker --memory=8192 --cpus=4

install-helm:
curl -fsSL -o get_helm.sh https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3
chmod 700 get_helm.sh
./get_helm.sh

# Installation de helm Airflow
install-airflow:
start-airflow:
sudo apt-get update
helm repo add apache-airflow https://airflow.apache.org
helm upgrade --install airflow apache-airflow/airflow --namespace airflow --create-namespace -f kubernetes/airflow/my_values.yml
kubectl apply -f kubernetes/airflow/airflow-local-dags-folder-pv.yml -n Airflow
kubectl apply -f kubernetes/airflow/airflow-local-dags-folder-pvc.yml -n airflow
kubectl apply -f kubernetes/airflow/airflow-local-logs-folder-pv.yml -n airflow
kubectl apply -f kubernetes/airflow/airflow-local-logs-folder-pvc.yml -n airflow
kubectl apply -f kubernetes/airflow/order/order-data-folder-pv.yaml
kubectl apply -f kubernetes/airflow/order/order-data-folder-pvc.yaml
kubectl apply -f kubernetes/secrets/airflow-secrets.yaml
kubectl apply -f kubernetes/airflow/order/python-transform-job.yaml -n airflow
kubectl apply -f kubernetes/airflow/order/python-load-job.yaml -n airflow


delete-airflow-statefulsets:
kubectl delete statefulset -n airflow airflow-triggerer || true
kubectl delete statefulset -n airflow airflow-worker || true

pv-airflow:
kubectl apply -f kubernetes/airflow/airflow-local-dags-folder-pv.yml -n airflow --validate=false
kubectl apply -f kubernetes/airflow/airflow-local-dags-folder-pvc.yml -n airflow --validate=false
kubectl apply -f kubernetes/airflow/airflow-local-logs-folder-pv.yml -n airflow --validate=false
kubectl apply -f kubernetes/airflow/airflow-local-logs-folder-pvc.yml -n airflow --validate=false
kubectl apply -f kubernetes/airflow/order/order-data-folder-pv.yaml
kubectl apply -f kubernetes/airflow/order/order-data-folder-pvc.yaml
kubectl apply -f kubernetes/storageclass/storageclass.yaml -n airflow
kubectl apply -f kubernetes/persistent-volumes/airflow-local-dags-folder.yml -n airflow
kubectl apply -f kubernetes/persistent-volumes/airflow-local-logs-folder.yml -n airflow
kubectl apply -f kubernetes/secrets/airflow-secrets.yaml -n airflow


delete-pv-airflow:
kubectl delete pv airflow-local-dags-folder || true
kubectl delete pv airflow-local-logs-folder || true
kubectl delete pv order-data-folder || true

airflow: namespace pv-airflow
helm -n airflow upgrade --install airflow apache-airflow/airflow -f kubernetes/airflow/my_values.yml

# Chargement des données dans minikube : https://minikube.sigs.k8s.io/docs/handbook/filesync/
load-data-minikube:
mkdir -p ~/.minikube/files/processed_raw
mkdir -p ~/.minikube/files/dags
mkdir -p ~/.minikube/files/logs
cp -r ml/data/processed/* ~/.minikube/files/processed_raw
cp -r postgres/init.sql ~/.minikube/files/init.sql
cp -r prometheus/prometheus.yml ~/.minikube/files/prometheus.yml
cp -r airflow/dags/* ~/.minikube/files/dags
minikube start


# Vérifie si kubectl est connecté à un cluster
check-kube:
Expand Down
Empty file modified README.md
100644 → 100755
Empty file.
Empty file modified airflow/.env.example
100644 → 100755
Empty file.
Empty file modified airflow/Dockerfile
100644 → 100755
Empty file.
Empty file modified airflow/dags/predict_knn_model.py
100644 → 100755
Empty file.
Empty file modified airflow/dags/predict_surprise_SVD.py
100644 → 100755
Empty file.
Empty file modified airflow/dags/scrapping.py
100644 → 100755
Empty file.
Empty file modified airflow/docker-compose.yaml
100644 → 100755
Empty file.
Empty file modified airflow/logs/.gitkeep
100644 → 100755
Empty file.
Empty file modified airflow/requirements.txt
100644 → 100755
Empty file.
Empty file modified clean_docker.sh
100644 → 100755
Empty file.
Empty file modified clean_kubernetes.sh
100644 → 100755
Empty file.
95 changes: 95 additions & 0 deletions data/dags_airflow/init_order.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
from airflow import DAG
from airflow.utils.dates import days_ago
from airflow.operators.python_operator import PythonOperator
from airflow import settings
from airflow.models.connection import Connection
from airflow.operators.postgres_operator import PostgresOperator
import os

conn_keys = ['conn_id', 'conn_type', 'host', 'login', 'password', 'schema']

def get_postgres_conn_conf():
postgres_conn_conf = {}
postgres_conn_conf['host'] = os.getenv("AIRFLOW_POSTGRESQL_SERVICE_HOST")
postgres_conn_conf['port'] = os.getenv("AIRFLOW_POSTGRESQL_SERVICE_PORT")
if (postgres_conn_conf['host'] == None):
raise TypeError("The AIRFLOW_POSTGRESQL_SERVICE_HOST isn't defined")
elif (postgres_conn_conf['port'] == None):
raise TypeError("The AIRFLOW_POSTGRESQL_SERVICE_PORT isn't defined")
postgres_conn_conf['conn_id'] = 'postgres'
postgres_conn_conf['conn_type'] = 'postgres'
postgres_conn_conf['login'] = 'postgres'
postgres_conn_conf['password'] = 'postgres'
postgres_conn_conf['schema'] = 'postgres'
return postgres_conn_conf

def create_conn(**kwargs):
session = settings.Session()
print("Session created")
connections = session.query(Connection)
print("Connections listed")
if not kwargs['conn_id'] in [connection.conn_id for connection in connections]:
conn_params = { key: kwargs[key] for key in conn_keys }
conn = Connection(**conn_params)
session.add(conn)
session.commit()
print("Connection Created")
else:
print("Connection already exists")
session.close()

postgres_conn_conf = get_postgres_conn_conf()

with DAG(
dag_id='init_order',
tags=['order', 'antoine'],
default_args={
'owner': 'airflow',
'start_date': days_ago(0, minute=1),
},
catchup=False
) as dag:

create_postgres_conn = PythonOperator(
task_id='create_postgres_conn',
python_callable=create_conn,
op_kwargs=postgres_conn_conf
)

create_tables = PostgresOperator(
task_id='create_tables',
postgres_conn_id='postgres',
sql="""
CREATE TABLE IF NOT EXISTS movies (
movieId SERIAL PRIMARY KEY,
title VARCHAR(200) NOT NULL,
genres TEXT,
year INT
);
CREATE TABLE IF NOT EXISTS ratings (
id SERIAL PRIMARY KEY,
userId INT,
movieId INT REFERENCES movies(movieId),
rating FLOAT NOT NULL,
timestamp INT,
bayesian_mean FLOAT NOT NULL
);
CREATE TABLE IF NOT EXISTS links (
id SERIAL PRIMARY KEY,
movieId INT REFERENCES movies(movieId),
imdbId INT,
tmdbId INT
);
CREATE TABLE IF NOT EXISTS users (
userId SERIAL PRIMARY KEY,
username VARCHAR(50) NOT NULL,
email VARCHAR(100) NOT NULL UNIQUE,
hached_password VARCHAR(300) NOT NULL
);
"""
)

create_postgres_conn >> create_tables
46 changes: 46 additions & 0 deletions data/dags_airflow/load_transform_save_db.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
from airflow import DAG
from airflow.utils.dates import days_ago
from airflow.providers.cncf.kubernetes.operators.pod import KubernetesPodOperator
from airflow.kubernetes.secret import Secret
from kubernetes.client import models as k8s

secret_database = Secret(
deploy_type="env",
deploy_target="DATABASE",
secret="sql-conn",
namespace="airflow",
)

secret_user = Secret(
deploy_type="env",
deploy_target="USER",
secret="sql-conn",
namespace="airflow",
)

secret_password = Secret(
deploy_type="env",
deploy_target="PASSWORD",
secret="sql-conn",
namespace="airflow",
)

with DAG(
dag_id='load_transform_save_db',
tags=['antoine'],
default_args={
'owner': 'airflow',
'start_date': days_ago(0, minute=1),
},
schedule_interval=None, # Pas de planification automatique,
catchup=False
) as dag:

python_transform = KubernetesPodOperator(
task_id="python_transform",
image="antoinepela/projet_reco_movies:order-python-transform-latest",
cmds=["/bin/bash", "-c", "/app/start.sh"],
namespace= "airflow",
)

python_transform
Loading

0 comments on commit 942741b

Please sign in to comment.