Skip to content

Commit

Permalink
Add experiment checking before modifying the dataset
Browse files Browse the repository at this point in the history
  • Loading branch information
IsaiasVenegas committed Jan 5, 2025
1 parent e939ed7 commit 5f0f47d
Show file tree
Hide file tree
Showing 4 changed files with 239 additions and 7 deletions.
112 changes: 111 additions & 1 deletion DashAI/back/api/api_v1/endpoints/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
to_dashai_dataset,
update_columns_spec,
)
from DashAI.back.dependencies.database.models import Dataset
from DashAI.back.dependencies.database.models import Dataset, Experiment
from DashAI.back.dependencies.registry import ComponentRegistry

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -182,6 +182,47 @@ async def get_info(
return info


@router.get("/{dataset_id}/experiments-exist")
@inject
async def get_experiments_exist(
dataset_id: int,
session_factory: sessionmaker = Depends(lambda: di["session_factory"]),
):
"""Get a boolean indicating if there are experiments associated with the dataset.
Parameters
----------
dataset_id : int
id of the dataset to query.
Returns
-------
bool
True if there are experiments associated with the dataset, False otherwise.
"""
with session_factory() as db:
try:
dataset = db.get(Dataset, dataset_id)
if not dataset:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="Dataset not found",
)
# Check if there are any experiments associated with the dataset
experiments_exist = db.query(Experiment).filter(
Experiment.dataset_id == dataset_id
).first() is not None

return experiments_exist

except exc.SQLAlchemyError as e:
logger.exception(e)
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="Internal database error",
) from e


@router.get("/{dataset_id}/types")
@inject
async def get_types(
Expand Down Expand Up @@ -344,6 +385,75 @@ async def upload_dataset(
logger.debug("Dataset creation sucessfully finished.")
return new_dataset

@router.post("/copy", status_code=status.HTTP_201_CREATED)
@inject
async def copy_dataset(
dataset: Dict[str, int],
session_factory: sessionmaker = Depends(lambda: di["session_factory"]),
config: Dict[str, Any] = Depends(lambda: di["config"]),
):
"""Copy an existing dataset to create a new one.
Parameters
----------
dataset_id : int
ID of the dataset to copy.
Returns
-------
Dataset
The newly created dataset.
"""
dataset_id = dataset["dataset_id"]
print("COPYING DATASET", dataset_id)
logger.debug(f"Copying dataset with ID {dataset_id}.")

with session_factory() as db:
# Retrieve the existing dataset
original_dataset = db.query(Dataset).filter(Dataset.id == dataset_id).first()
if not original_dataset:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="Original dataset not found.",
)

# Create a new folder for the copied dataset
new_name = f"{original_dataset.name}_copy"
new_folder_path = config["DATASETS_PATH"] / new_name
try:
shutil.copytree(original_dataset.file_path, new_folder_path)
except FileExistsError:
raise HTTPException(
status_code=status.HTTP_409_CONFLICT,
detail=f"A dataset with the name '{new_name}' already exists.",
)
except Exception as e:
logger.exception(e)
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="Failed to copy dataset files.",
) from e

# Save metadata for the new dataset
try:
new_dataset = Dataset(
name=new_name,
file_path=str(new_folder_path),
)
db.add(new_dataset)
db.commit()
db.refresh(new_dataset)
except exc.SQLAlchemyError as e:
logger.exception(e)
shutil.rmtree(new_folder_path, ignore_errors=True)
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="Internal database error.",
) from e

logger.debug(f"Dataset copied successfully to '{new_name}'.")
return new_dataset


@router.delete("/{dataset_id}")
@inject
Expand Down
12 changes: 12 additions & 0 deletions DashAI/front/src/api/datasets.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,11 @@ export const uploadDataset = async (formData: object): Promise<object> => {
return response.data;
};

export const copyDataset = async (formData: object): Promise<object> => {
const response = await api.post<object>(`${datasetEndpoint}/copy`, formData);
return response.data;
};

export const getDatasets = async (): Promise<IDataset[]> => {
const response = await api.get<IDataset[]>(datasetEndpoint);
return response.data;
Expand All @@ -28,6 +33,13 @@ export const getDatasetInfo = async (id: number): Promise<object> => {
return response.data;
};

export const getExperimentsExist = async (id: number): Promise<object> => {
const response = await api.get<object>(
`${datasetEndpoint}/${id}/experiments-exist`,
);
return response.data;
};

export const updateDataset = async (
id: number,
formData: object,
Expand Down
41 changes: 35 additions & 6 deletions DashAI/front/src/components/datasets/ConvertDatasetModal.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,16 @@ import {
getDatasetConverterList,
} from "../../api/converter";
import { ConverterListStatus } from "../../types/converter";
import { getExperimentsExist } from "../../api/datasets";
import CopyDatasetModal from "./CopyDatasetModal";

function ConvertDatasetModal({ datasetId }) {
const { enqueueSnackbar } = useSnackbar();
const [open, setOpen] = useState(false);
const [targetColumnIndex, setTargetColumnIndex] = useState(null);
const [convertersToApply, setConvertersToApply] = useState([]);
const [openCopyModal, setOpenCopyModal] = useState(false);
const [datasetIdToModify, setDatasetIdToModify] = useState(datasetId);
const [converterListId, setConverterListId] = useState(null);
const [converterListStatus, setConverterListStatus] = useState(null);
const [running, setRunning] = useState(false);
Expand Down Expand Up @@ -73,11 +77,11 @@ function ConvertDatasetModal({ datasetId }) {
}
};

const handleSaveConfig = async () => {
const saveAndEnqueueConverterList = async (id) => {
try {
// Save the list of converters to apply
const response = await saveDatasetConverterList(
datasetId,
id,
convertersToApply.reduce((acc, { name, params, scope, pipelineId }) => {
acc[name] = {
params: params,
Expand Down Expand Up @@ -117,14 +121,31 @@ function ConvertDatasetModal({ datasetId }) {
}
};

const handleSaveConfig = async () => {
// Check if there are experiments associated with the dataset
try {
const hasExperiments = await getExperimentsExist(datasetIdToModify);
if (hasExperiments) {
setOpenCopyModal(true);
} else {
await saveAndEnqueueConverterList(datasetIdToModify);
}
} catch (error) {
enqueueSnackbar(
"Error while trying to check if there are experiments associated with the dataset",
{
variant: "error",
},
);
}
};

const getConverterListStatus = async () => {
getDatasetConverterList(converterListId)
.then((convertersFromDB) => {
console.log(convertersFromDB);
setConverterListStatus(convertersFromDB.status);
})
.catch((error) => {
console.log(error);
enqueueSnackbar("Error while trying to fetch converters", {
variant: "error",
});
Expand Down Expand Up @@ -193,7 +214,7 @@ function ConvertDatasetModal({ datasetId }) {
Dataset summary
</Typography>
</Grid>
<DatasetSummaryTable datasetId={datasetId} />
<DatasetSummaryTable datasetId={datasetIdToModify} />

{/* Converter selector */}
<Grid item xs={12} display={"flex"} alignItems={"center"} gap={2}>
Expand Down Expand Up @@ -244,7 +265,7 @@ function ConvertDatasetModal({ datasetId }) {
</Grid>
{/* Selected converters table */}
<ConverterTable
datasetId={datasetId}
datasetId={datasetIdToModify}
convertersToApply={convertersToApply}
setConvertersToApply={setConvertersToApply}
/>
Expand All @@ -269,6 +290,14 @@ function ConvertDatasetModal({ datasetId }) {
</Button>
</DialogActions>
</Dialog>
{/* Modal to make a copy of the dataset */}
<CopyDatasetModal
datasetId={datasetIdToModify}
updateDatasetId={setDatasetIdToModify}
open={openCopyModal}
setOpen={setOpenCopyModal}
modifyDataset={saveAndEnqueueConverterList}
/>
</React.Fragment>
);
}
Expand Down
81 changes: 81 additions & 0 deletions DashAI/front/src/components/datasets/CopyDatasetModal.jsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
import React from "react";
import PropTypes from "prop-types";
import { useSnackbar } from "notistack";
import {
Button,
Dialog,
DialogActions,
DialogContent,
DialogContentText,
DialogTitle,
} from "@mui/material";
import { copyDataset } from "../../api/datasets";

function CopyDatasetModal({
datasetId,
updateDatasetId,
open,
setOpen,
modifyDataset,
}) {
const { enqueueSnackbar } = useSnackbar();

const handleDatasetModification = async (id) => {
modifyDataset(id);
setOpen(false);
};

const handleDatasetCopyModification = async () => {
try {
const datasetCopy = await copyDataset({
dataset_id: datasetId,
});
updateDatasetId(datasetCopy.id);
enqueueSnackbar("Dataset copied successfully", {
variant: "success",
});

handleDatasetModification(datasetCopy.id);
} catch (error) {
enqueueSnackbar("Error while trying to create a copy of the dataset.");
} finally {
setOpen(false);
}
};
return (
<Dialog open={open} onClose={() => setOpen(false)}>
<DialogTitle>Existing experiments</DialogTitle>
<DialogContent>
<DialogContentText>
This dataset is currently used in existing experiments. Modifying it
may impact the results of re-running those experiments. Would you like
to create and modify a copy of this dataset instead?
</DialogContentText>
</DialogContent>
<DialogActions>
<Button onClick={() => setOpen(false)} autoFocus>
Cancel
</Button>
<Button onClick={() => handleDatasetModification(datasetId)}>
Modify anyway
</Button>
<Button
onClick={handleDatasetCopyModification}
variant="contained"
color="primary"
>
Make a copy
</Button>
</DialogActions>
</Dialog>
);
}
CopyDatasetModal.propTypes = {
datasetId: PropTypes.number.isRequired,
updateDatasetId: PropTypes.func.isRequired,
open: PropTypes.bool.isRequired,
setOpen: PropTypes.func.isRequired,
modifyDataset: PropTypes.func.isRequired,
};

export default CopyDatasetModal;

0 comments on commit 5f0f47d

Please sign in to comment.