From 84af094113745e69b37b8f2a902f4dc99ee555d1 Mon Sep 17 00:00:00 2001
From: Florence Townend <f.j.townend@live.com>
Date: Wed, 15 May 2024 13:52:54 +0200
Subject: [PATCH] added documentation for GPU on customising training section.
 Currently not implemented for subspace methods but that is on the list

---
 docs/customising_training.rst                 |  33 +++
 fusilli/data.py                               | 237 ++++++++++--------
 .../fusionmodels/tabularfusion/mcvae_model.py |  17 +-
 3 files changed, 176 insertions(+), 111 deletions(-)

diff --git a/docs/customising_training.rst b/docs/customising_training.rst
index 5a455c4..f24cfdd 100644
--- a/docs/customising_training.rst
+++ b/docs/customising_training.rst
@@ -5,6 +5,7 @@ This page will show you how to customise the training and evaluation of your fus
 
 We will cover the following topics:
 
+* Using GPU
 * Early stopping
 * Valildation metrics
 * Batch size
@@ -13,6 +14,38 @@ We will cover the following topics:
 * Number of workers in PyTorch DataLoader
 * Train/test and cross-validation splitting yourself
 
+Using GPU
+------------
+
+If you want to use a GPU to train your model, you can pass the ``training_modifications`` argument to the :func:`~.fusilli.data.prepare_fusion_data` and :func:`~.fusilli.train.train_and_save_models` functions. By default, the model will train on the CPU.
+
+For example, to train on a single GPU, you can do the following:
+
+.. code-block:: python
+
+    from fusilli.data import prepare_fusion_data
+    from fusilli.train import train_and_save_models
+
+    datamodule = prepare_fusion_data(
+            prediction_task="binary",
+            fusion_model=example_model,
+            data_paths=data_paths,
+            output_paths=output_path,
+        )
+
+    trained_model_list = train_and_save_models(
+            data_module=datamodule,
+            fusion_model=example_model,
+            training_modifications={"accelerator": "gpu", "devices": 1},
+        )
+
+.. warning::
+
+    This is currently not implemented for subspace-based models as of May 2024. 
+    When this is implemented, the documentation will be updated.
+
+
+
 Early stopping
 --------------
 
diff --git a/fusilli/data.py b/fusilli/data.py
index b6c54ef..5a39f56 100644
--- a/fusilli/data.py
+++ b/fusilli/data.py
@@ -228,7 +228,9 @@ def __init__(self, sources, img_downsample_dims=None):
         if "ID" not in tab1_df.columns:
             raise ValueError("The CSV must have an index column named 'ID'.")
         if "prediction_label" not in tab1_df.columns:
-            raise ValueError("The CSV must have a label column named 'prediction_label'.")
+            raise ValueError(
+                "The CSV must have a label column named 'prediction_label'."
+            )
 
         # if tabular2_source exists, check it has the right columns
         if self.tabular2_source != "":
@@ -236,7 +238,9 @@ def __init__(self, sources, img_downsample_dims=None):
             if "ID" not in tab2_df.columns:
                 raise ValueError("The CSV must have an index column named 'ID'.")
             if "prediction_label" not in tab2_df.columns:
-                raise ValueError("The CSV must have a label column named 'prediction_label'.")
+                raise ValueError(
+                    "The CSV must have a label column named 'prediction_label'."
+                )
 
     def load_tabular1(self):
         """
@@ -337,11 +341,17 @@ def load_tabular_tabular(self):
         tab1_df.set_index("ID", inplace=True)
         tab2_df.set_index("ID", inplace=True)
 
-        tab1_pred_features = torch.Tensor(tab1_df.drop(columns=["prediction_label"]).values)
-        tab2_pred_features = torch.Tensor(tab2_df.drop(columns=["prediction_label"]).values)
+        tab1_pred_features = torch.Tensor(
+            tab1_df.drop(columns=["prediction_label"]).values
+        )
+        tab2_pred_features = torch.Tensor(
+            tab2_df.drop(columns=["prediction_label"]).values
+        )
 
         prediction_label = tab1_df[["prediction_label"]]
-        dataset = CustomDataset([tab1_pred_features, tab2_pred_features], prediction_label)
+        dataset = CustomDataset(
+            [tab1_pred_features, tab2_pred_features], prediction_label
+        )
 
         mod1_dim = tab1_pred_features.shape[1]
         mod2_dim = tab2_pred_features.shape[1]
@@ -430,23 +440,23 @@ class TrainTestDataModule(pl.LightningDataModule):
     """
 
     def __init__(
-            self,
-            fusion_model,
-            sources,
-            output_paths,
-            prediction_task,
-            batch_size,
-            test_size,
-            multiclass_dimensions,
-            subspace_method=None,
-            image_downsample_size=None,
-            layer_mods=None,
-            max_epochs=1000,
-            extra_log_string_dict=None,
-            own_early_stopping_callback=None,
-            num_workers=0,
-            test_indices=None,
-            kwargs=None,
+        self,
+        fusion_model,
+        sources,
+        output_paths,
+        prediction_task,
+        batch_size,
+        test_size,
+        multiclass_dimensions,
+        subspace_method=None,
+        image_downsample_size=None,
+        layer_mods=None,
+        max_epochs=1000,
+        extra_log_string_dict=None,
+        own_early_stopping_callback=None,
+        num_workers=0,
+        test_indices=None,
+        kwargs=None,
     ):
         """
         Parameters
@@ -539,8 +549,8 @@ def prepare_data(self):
         self.dataset, self.data_dims = self.modality_methods[self.modality_type]()
 
     def setup(
-            self,
-            checkpoint_path=None,
+        self,
+        checkpoint_path=None,
     ):
         """
         Splits the data into train and test sets, and runs the subspace method if specified.
@@ -565,30 +575,31 @@ def setup(
                 self.dataset, [1 - self.test_size, self.test_size]
             )
         else:
-            self.test_dataset = torch.utils.data.Subset(
-                self.dataset, self.test_indices
-            )
+            self.test_dataset = torch.utils.data.Subset(self.dataset, self.test_indices)
 
             self.train_dataset = torch.utils.data.Subset(
-                self.dataset, list(set(range(len(self.dataset))) - set(self.test_indices))
+                self.dataset,
+                list(set(range(len(self.dataset))) - set(self.test_indices)),
             )
 
         if self.subspace_method is not None:  # if subspace method is specified
             if (
-                    checkpoint_path is None
+                checkpoint_path is None
             ):  # if no checkpoint path specified, train the subspace method
                 self.subspace_method_train = self.subspace_method(
                     datamodule=self,
                     max_epochs=self.max_epochs,
                     k=None,
-                    train_subspace=True
+                    train_subspace=True,
                 )
 
                 # modify the subspace method architecture if specified
                 if self.layer_mods is not None:
-                    self.subspace_method_train = model_modifier.modify_model_architecture(
-                        self.subspace_method_train,
-                        self.layer_mods,
+                    self.subspace_method_train = (
+                        model_modifier.modify_model_architecture(
+                            self.subspace_method_train,
+                            self.layer_mods,
+                        )
                     )
 
                 # train the subspace method and convert train dataset to the latent space
@@ -612,17 +623,16 @@ def setup(
                 # we have already trained the subspace method, so load it from the checkpoint
 
                 self.subspace_method_train = self.subspace_method(
-                    self,
-                    max_epochs=self.max_epochs,
-                    k=None,
-                    train_subspace=False
+                    self, max_epochs=self.max_epochs, k=None, train_subspace=False
                 )  # will return a init subspace method with the subspace models as instance attributes
 
                 # modify the subspace method architecture if specified
                 if self.layer_mods is not None:
-                    self.subspace_method_train = model_modifier.modify_model_architecture(
-                        self.subspace_method_train,
-                        self.layer_mods,
+                    self.subspace_method_train = (
+                        model_modifier.modify_model_architecture(
+                            self.subspace_method_train,
+                            self.layer_mods,
+                        )
                     )
 
                 # load checkpoint state dict
@@ -656,7 +666,10 @@ def train_dataloader(self):
             Dataloader for training.
         """
         return DataLoader(
-            self.train_dataset, batch_size=self.batch_size, shuffle=True, num_workers=self.num_workers
+            self.train_dataset,
+            batch_size=self.batch_size,
+            shuffle=True,
+            num_workers=self.num_workers,
         )
 
     def val_dataloader(self):
@@ -669,7 +682,10 @@ def val_dataloader(self):
             Dataloader for validation.
         """
         return DataLoader(
-            self.test_dataset, batch_size=self.batch_size, shuffle=False, num_workers=self.num_workers
+            self.test_dataset,
+            batch_size=self.batch_size,
+            shuffle=False,
+            num_workers=self.num_workers,
         )
 
 
@@ -728,23 +744,23 @@ class KFoldDataModule(pl.LightningDataModule):
     """
 
     def __init__(
-            self,
-            fusion_model,
-            sources,
-            output_paths,
-            prediction_task,
-            batch_size,
-            num_folds,
-            multiclass_dimensions,
-            subspace_method=None,
-            image_downsample_size=None,
-            layer_mods=None,
-            max_epochs=1000,
-            extra_log_string_dict=None,
-            own_early_stopping_callback=None,
-            num_workers=0,
-            own_kfold_indices=None,
-            kwargs=None,
+        self,
+        fusion_model,
+        sources,
+        output_paths,
+        prediction_task,
+        batch_size,
+        num_folds,
+        multiclass_dimensions,
+        subspace_method=None,
+        image_downsample_size=None,
+        layer_mods=None,
+        max_epochs=1000,
+        extra_log_string_dict=None,
+        own_early_stopping_callback=None,
+        num_workers=0,
+        own_kfold_indices=None,
+        kwargs=None,
     ):
         """
         Parameters
@@ -877,8 +893,8 @@ def kfold_split(self):
         return folds  # list of tuples of (train_dataset, test_dataset)
 
     def setup(
-            self,
-            checkpoint_path=None,
+        self,
+        checkpoint_path=None,
     ):
         """
         Splits the data into train and test sets, and runs the subspace method if specified
@@ -1014,7 +1030,10 @@ def train_dataloader(self, fold_idx):
         self.train_dataset, self.test_dataset = self.folds[fold_idx]
 
         return DataLoader(
-            self.train_dataset, batch_size=self.batch_size, shuffle=True, num_workers=self.num_workers
+            self.train_dataset,
+            batch_size=self.batch_size,
+            shuffle=True,
+            num_workers=self.num_workers,
         )
 
     def val_dataloader(self, fold_idx):
@@ -1034,7 +1053,10 @@ def val_dataloader(self, fold_idx):
         self.train_dataset, self.test_dataset = self.folds[fold_idx]
 
         return DataLoader(
-            self.test_dataset, batch_size=self.batch_size, shuffle=False, num_workers=self.num_workers
+            self.test_dataset,
+            batch_size=self.batch_size,
+            shuffle=False,
+            num_workers=self.num_workers,
         )
 
 
@@ -1078,15 +1100,15 @@ class TrainTestGraphDataModule:
     """
 
     def __init__(
-            self,
-            fusion_model,
-            sources,
-            graph_creation_method,
-            test_size,
-            image_downsample_size=None,
-            layer_mods=None,
-            extra_log_string_dict=None,
-            own_test_indices=None,
+        self,
+        fusion_model,
+        sources,
+        graph_creation_method,
+        test_size,
+        image_downsample_size=None,
+        layer_mods=None,
+        extra_log_string_dict=None,
+        own_test_indices=None,
     ):
         """
         Parameters
@@ -1174,9 +1196,7 @@ def setup(self):
             self.test_idxs = test_dataset.indices
         else:
             self.test_idxs = self.own_test_indices
-            self.train_idxs = list(
-                set(range(len(self.dataset))) - set(self.test_idxs)
-            )
+            self.train_idxs = list(set(range(len(self.dataset))) - set(self.test_idxs))
 
         # get the graph data structure
         self.graph_maker_instance = self.graph_creation_method(self.dataset)
@@ -1247,15 +1267,15 @@ class KFoldGraphDataModule:
     """
 
     def __init__(
-            self,
-            num_folds,
-            fusion_model,
-            sources,
-            graph_creation_method,
-            image_downsample_size=None,
-            layer_mods=None,
-            extra_log_string_dict=None,
-            own_kfold_indices=None,
+        self,
+        num_folds,
+        fusion_model,
+        sources,
+        graph_creation_method,
+        image_downsample_size=None,
+        layer_mods=None,
+        extra_log_string_dict=None,
+        own_kfold_indices=None,
     ):
         """
         Parameters
@@ -1369,7 +1389,7 @@ def setup(self):
 
             # modify the graph maker architecture if specified
             if self.layer_mods is not None:
-                graph_maker = model_modifier.modify_model_architecture(
+                self.graph_maker_instance = model_modifier.modify_model_architecture(
                     self.graph_maker_instance,
                     self.layer_mods,
                 )
@@ -1414,25 +1434,25 @@ def get_lightning_module(self):
 
 
 def prepare_fusion_data(
-        prediction_task,
-        fusion_model,
-        data_paths,
-        output_paths,
-        kfold=False,
-        num_folds=None,
-        test_size=0.2,
-        batch_size=8,
-        multiclass_dimensions=None,
-        image_downsample_size=None,
-        layer_mods=None,
-        max_epochs=1000,
-        checkpoint_path=None,
-        extra_log_string_dict=None,
-        own_early_stopping_callback=None,
-        num_workers=0,
-        test_indices=None,
-        own_kfold_indices=None,
-        **kwargs,
+    prediction_task,
+    fusion_model,
+    data_paths,
+    output_paths,
+    kfold=False,
+    num_folds=None,
+    test_size=0.2,
+    batch_size=8,
+    multiclass_dimensions=None,
+    image_downsample_size=None,
+    layer_mods=None,
+    max_epochs=1000,
+    checkpoint_path=None,
+    extra_log_string_dict=None,
+    own_early_stopping_callback=None,
+    num_workers=0,
+    test_indices=None,
+    own_kfold_indices=None,
+    **kwargs,
 ):
     """
     Gets the data module for a specific fusion model and training protocol.
@@ -1497,7 +1517,8 @@ def prepare_fusion_data(
 
     if kfold and own_early_stopping_callback is not None:
         raise ValueError(
-            "Cannot use own early stopping callback with kfold cross validation yet. Working on fixing this currently (Nov 2023)")
+            "Cannot use own early stopping callback with kfold cross validation yet. Working on fixing this currently (Nov 2023)"
+        )
 
     # Getting the data paths from the data_paths dictionary into a list
     data_sources = [
@@ -1519,7 +1540,7 @@ def prepare_fusion_data(
                 image_downsample_size=image_downsample_size,
                 layer_mods=layer_mods,
                 extra_log_string_dict=extra_log_string_dict,
-                # here is where the kfold split will go
+                own_kfold_indices=own_kfold_indices,
             )
         else:
             graph_data_module = TrainTestGraphDataModule(
@@ -1543,7 +1564,9 @@ def prepare_fusion_data(
             for dm_instance in data_module:
                 dm_instance.data_dims = graph_data_module.data_dims
                 dm_instance.own_early_stopping_callback = own_early_stopping_callback
-                dm_instance.graph_maker_instance = graph_data_module.graph_maker_instance
+                dm_instance.graph_maker_instance = (
+                    graph_data_module.graph_maker_instance
+                )
                 dm_instance.output_paths = output_paths
                 dm_instance.num_folds = num_folds
                 dm_instance.prediction_task = prediction_task
diff --git a/fusilli/fusionmodels/tabularfusion/mcvae_model.py b/fusilli/fusionmodels/tabularfusion/mcvae_model.py
index d1563ab..f11981e 100644
--- a/fusilli/fusionmodels/tabularfusion/mcvae_model.py
+++ b/fusilli/fusionmodels/tabularfusion/mcvae_model.py
@@ -11,6 +11,7 @@
 import pandas as pd
 import numpy as np
 from fusilli.utils.training_utils import get_checkpoint_filenames_for_subspace_models
+import sys
 
 from fusilli.utils import check_model_validity
 
@@ -136,7 +137,9 @@ def load_ckpt(self, checkpoint_path):
         init_dict = {
             "n_channels": 2,
             "lat_dim": self.num_latent_dims,
-            "n_feats": tuple([self.datamodule.data_dims[0], self.datamodule.data_dims[1]]),
+            "n_feats": tuple(
+                [self.datamodule.data_dims[0], self.datamodule.data_dims[1]]
+            ),
         }
 
         self.fit_model = Mcvae(**init_dict, sparse=True)
@@ -261,7 +264,9 @@ def train(self, train_dataset, val_dataset=None):
         with contextlib.redirect_stdout(None):
             mcvae_fit.optimize(epochs=self.max_epochs, data=mcvae_training_data)
             ideal_epoch = mcvae_early_stopping_tol(
-                tolerance=mcvae_tolerance, patience=mcvae_patience, loss_logs=mcvae_fit.loss["total"]
+                tolerance=mcvae_tolerance,
+                patience=mcvae_patience,
+                loss_logs=mcvae_fit.loss["total"],
             )
 
         mcvae_esfit = Mcvae(**init_dict, sparse=True)
@@ -284,7 +289,9 @@ def train(self, train_dataset, val_dataset=None):
         # getting mean latent space
         mean_latents = self.get_latents(mcvae_training_data)
 
-        return torch.Tensor(mean_latents), pd.DataFrame(labels, columns=["prediction_label"])
+        return torch.Tensor(mean_latents), pd.DataFrame(
+            labels, columns=["prediction_label"]
+        )
 
     def convert_to_latent(self, test_dataset):
         """
@@ -373,7 +380,9 @@ def __init__(self, prediction_task, data_dims, multiclass_dimensions):
         multiclass_dimensions : int
             Number of classes in the multiclass classification task.
         """
-        ParentFusionModel.__init__(self, prediction_task, data_dims, multiclass_dimensions)
+        ParentFusionModel.__init__(
+            self, prediction_task, data_dims, multiclass_dimensions
+        )
 
         self.prediction_task = prediction_task