From 5f0f47d06ac1f3d43c43b89ae16b6211a081bbb2 Mon Sep 17 00:00:00 2001 From: Isaias Venegas Date: Sun, 5 Jan 2025 16:01:28 -0300 Subject: [PATCH] Add experiment checking before modifying the dataset --- DashAI/back/api/api_v1/endpoints/datasets.py | 112 +++++++++++++++++- DashAI/front/src/api/datasets.ts | 12 ++ .../datasets/ConvertDatasetModal.jsx | 41 ++++++- .../components/datasets/CopyDatasetModal.jsx | 81 +++++++++++++ 4 files changed, 239 insertions(+), 7 deletions(-) create mode 100644 DashAI/front/src/components/datasets/CopyDatasetModal.jsx diff --git a/DashAI/back/api/api_v1/endpoints/datasets.py b/DashAI/back/api/api_v1/endpoints/datasets.py index fc44bd879..fbbe5a381 100644 --- a/DashAI/back/api/api_v1/endpoints/datasets.py +++ b/DashAI/back/api/api_v1/endpoints/datasets.py @@ -25,7 +25,7 @@ to_dashai_dataset, update_columns_spec, ) -from DashAI.back.dependencies.database.models import Dataset +from DashAI.back.dependencies.database.models import Dataset, Experiment from DashAI.back.dependencies.registry import ComponentRegistry logger = logging.getLogger(__name__) @@ -182,6 +182,47 @@ async def get_info( return info +@router.get("/{dataset_id}/experiments-exist") +@inject +async def get_experiments_exist( + dataset_id: int, + session_factory: sessionmaker = Depends(lambda: di["session_factory"]), +): + """Get a boolean indicating if there are experiments associated with the dataset. + + Parameters + ---------- + dataset_id : int + id of the dataset to query. + + Returns + ------- + bool + True if there are experiments associated with the dataset, False otherwise. + """ + with session_factory() as db: + try: + dataset = db.get(Dataset, dataset_id) + if not dataset: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail="Dataset not found", + ) + # Check if there are any experiments associated with the dataset + experiments_exist = db.query(Experiment).filter( + Experiment.dataset_id == dataset_id + ).first() is not None + + return experiments_exist + + except exc.SQLAlchemyError as e: + logger.exception(e) + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail="Internal database error", + ) from e + + @router.get("/{dataset_id}/types") @inject async def get_types( @@ -344,6 +385,75 @@ async def upload_dataset( logger.debug("Dataset creation sucessfully finished.") return new_dataset +@router.post("/copy", status_code=status.HTTP_201_CREATED) +@inject +async def copy_dataset( + dataset: Dict[str, int], + session_factory: sessionmaker = Depends(lambda: di["session_factory"]), + config: Dict[str, Any] = Depends(lambda: di["config"]), +): + """Copy an existing dataset to create a new one. + + Parameters + ---------- + dataset_id : int + ID of the dataset to copy. + + Returns + ------- + Dataset + The newly created dataset. + """ + dataset_id = dataset["dataset_id"] + print("COPYING DATASET", dataset_id) + logger.debug(f"Copying dataset with ID {dataset_id}.") + + with session_factory() as db: + # Retrieve the existing dataset + original_dataset = db.query(Dataset).filter(Dataset.id == dataset_id).first() + if not original_dataset: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail="Original dataset not found.", + ) + + # Create a new folder for the copied dataset + new_name = f"{original_dataset.name}_copy" + new_folder_path = config["DATASETS_PATH"] / new_name + try: + shutil.copytree(original_dataset.file_path, new_folder_path) + except FileExistsError: + raise HTTPException( + status_code=status.HTTP_409_CONFLICT, + detail=f"A dataset with the name '{new_name}' already exists.", + ) + except Exception as e: + logger.exception(e) + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail="Failed to copy dataset files.", + ) from e + + # Save metadata for the new dataset + try: + new_dataset = Dataset( + name=new_name, + file_path=str(new_folder_path), + ) + db.add(new_dataset) + db.commit() + db.refresh(new_dataset) + except exc.SQLAlchemyError as e: + logger.exception(e) + shutil.rmtree(new_folder_path, ignore_errors=True) + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail="Internal database error.", + ) from e + + logger.debug(f"Dataset copied successfully to '{new_name}'.") + return new_dataset + @router.delete("/{dataset_id}") @inject diff --git a/DashAI/front/src/api/datasets.ts b/DashAI/front/src/api/datasets.ts index d6ae60d2b..bc34c7ee9 100644 --- a/DashAI/front/src/api/datasets.ts +++ b/DashAI/front/src/api/datasets.ts @@ -8,6 +8,11 @@ export const uploadDataset = async (formData: object): Promise => { return response.data; }; +export const copyDataset = async (formData: object): Promise => { + const response = await api.post(`${datasetEndpoint}/copy`, formData); + return response.data; +}; + export const getDatasets = async (): Promise => { const response = await api.get(datasetEndpoint); return response.data; @@ -28,6 +33,13 @@ export const getDatasetInfo = async (id: number): Promise => { return response.data; }; +export const getExperimentsExist = async (id: number): Promise => { + const response = await api.get( + `${datasetEndpoint}/${id}/experiments-exist`, + ); + return response.data; +}; + export const updateDataset = async ( id: number, formData: object, diff --git a/DashAI/front/src/components/datasets/ConvertDatasetModal.jsx b/DashAI/front/src/components/datasets/ConvertDatasetModal.jsx index 2ce1e0ade..0535e228c 100644 --- a/DashAI/front/src/components/datasets/ConvertDatasetModal.jsx +++ b/DashAI/front/src/components/datasets/ConvertDatasetModal.jsx @@ -26,12 +26,16 @@ import { getDatasetConverterList, } from "../../api/converter"; import { ConverterListStatus } from "../../types/converter"; +import { getExperimentsExist } from "../../api/datasets"; +import CopyDatasetModal from "./CopyDatasetModal"; function ConvertDatasetModal({ datasetId }) { const { enqueueSnackbar } = useSnackbar(); const [open, setOpen] = useState(false); const [targetColumnIndex, setTargetColumnIndex] = useState(null); const [convertersToApply, setConvertersToApply] = useState([]); + const [openCopyModal, setOpenCopyModal] = useState(false); + const [datasetIdToModify, setDatasetIdToModify] = useState(datasetId); const [converterListId, setConverterListId] = useState(null); const [converterListStatus, setConverterListStatus] = useState(null); const [running, setRunning] = useState(false); @@ -73,11 +77,11 @@ function ConvertDatasetModal({ datasetId }) { } }; - const handleSaveConfig = async () => { + const saveAndEnqueueConverterList = async (id) => { try { // Save the list of converters to apply const response = await saveDatasetConverterList( - datasetId, + id, convertersToApply.reduce((acc, { name, params, scope, pipelineId }) => { acc[name] = { params: params, @@ -117,14 +121,31 @@ function ConvertDatasetModal({ datasetId }) { } }; + const handleSaveConfig = async () => { + // Check if there are experiments associated with the dataset + try { + const hasExperiments = await getExperimentsExist(datasetIdToModify); + if (hasExperiments) { + setOpenCopyModal(true); + } else { + await saveAndEnqueueConverterList(datasetIdToModify); + } + } catch (error) { + enqueueSnackbar( + "Error while trying to check if there are experiments associated with the dataset", + { + variant: "error", + }, + ); + } + }; + const getConverterListStatus = async () => { getDatasetConverterList(converterListId) .then((convertersFromDB) => { - console.log(convertersFromDB); setConverterListStatus(convertersFromDB.status); }) .catch((error) => { - console.log(error); enqueueSnackbar("Error while trying to fetch converters", { variant: "error", }); @@ -193,7 +214,7 @@ function ConvertDatasetModal({ datasetId }) { Dataset summary - + {/* Converter selector */} @@ -244,7 +265,7 @@ function ConvertDatasetModal({ datasetId }) { {/* Selected converters table */} @@ -269,6 +290,14 @@ function ConvertDatasetModal({ datasetId }) { + {/* Modal to make a copy of the dataset */} + ); } diff --git a/DashAI/front/src/components/datasets/CopyDatasetModal.jsx b/DashAI/front/src/components/datasets/CopyDatasetModal.jsx new file mode 100644 index 000000000..bc6d8a947 --- /dev/null +++ b/DashAI/front/src/components/datasets/CopyDatasetModal.jsx @@ -0,0 +1,81 @@ +import React from "react"; +import PropTypes from "prop-types"; +import { useSnackbar } from "notistack"; +import { + Button, + Dialog, + DialogActions, + DialogContent, + DialogContentText, + DialogTitle, +} from "@mui/material"; +import { copyDataset } from "../../api/datasets"; + +function CopyDatasetModal({ + datasetId, + updateDatasetId, + open, + setOpen, + modifyDataset, +}) { + const { enqueueSnackbar } = useSnackbar(); + + const handleDatasetModification = async (id) => { + modifyDataset(id); + setOpen(false); + }; + + const handleDatasetCopyModification = async () => { + try { + const datasetCopy = await copyDataset({ + dataset_id: datasetId, + }); + updateDatasetId(datasetCopy.id); + enqueueSnackbar("Dataset copied successfully", { + variant: "success", + }); + + handleDatasetModification(datasetCopy.id); + } catch (error) { + enqueueSnackbar("Error while trying to create a copy of the dataset."); + } finally { + setOpen(false); + } + }; + return ( + setOpen(false)}> + Existing experiments + + + This dataset is currently used in existing experiments. Modifying it + may impact the results of re-running those experiments. Would you like + to create and modify a copy of this dataset instead? + + + + + + + + + ); +} +CopyDatasetModal.propTypes = { + datasetId: PropTypes.number.isRequired, + updateDatasetId: PropTypes.func.isRequired, + open: PropTypes.bool.isRequired, + setOpen: PropTypes.func.isRequired, + modifyDataset: PropTypes.func.isRequired, +}; + +export default CopyDatasetModal;