Analyse data from experiments run on multiple days (#57)

* Add code to pool directions * Add notes * Cleanup * Fix broken tests * Recalculate outputs * Update manifest and pyproject * Fix package name * Update package links * Update dependencies * Add more descriptions * Edit pyproject file * Expand readme with more details on the analysis * Add docstring * Test if we can see the schema in the readme file * Add handling of multiple days * Improve performance to deal with very large files, from O(n ^2) to O(1) * Delete commented code * Handle fit fails * Save the data in a given folder * Skip ROIs that do not fit * Add missing fit handling in sftf analysis * Regenerate mock data according to new "day" values * Update docstring * Remove big chunk of duplicated code * Test image in readme file * Remove image * Apply suggestions from code review Co-authored-by: Niko Sirmpilatze <[email protected]> * Change prompt message * Adjust input params of method get_median_subtracted_response_and_params * Renaming and other small suggested fixes * Renaming 2 * Small fixes * Move out the method to make gaussian matrices * Clean up * Add tests for multiple days and add more structure and comments regarding creation of mock data * Update readme * Small updates in the docs * Simplify fitting methods according to discussion * Improve description on dataset creation optimization * Remove unused config * Rearrange methods in generate mock data * Renaming ad adding new fixture * Add missing n_roi fixture calls * Renaming and folder reorganization * Remove try-catch construct * Linting * Add missing adjustments * Add readme file with a description of the regression tests * Add more descriptions / docs * Revert readme change * Fix usage of conftest * Linting --------- Co-authored-by: Niko Sirmpilatze <[email protected]>
neuroinformatics-unit · Jun 15, 2023 · 5728e8f · 5728e8f
1 parent ccaf866
commit 5728e8f
Show file tree

Hide file tree

Showing 13 changed files with 783 additions and 383 deletions.
diff --git a/rsp_vision/analysis/spatial_freq_temporal_freq.py b/rsp_vision/analysis/spatial_freq_temporal_freq.py
@@ -6,6 +6,7 @@
 import numpy as np
 import pandas as pd
 import scipy.stats as ss
+from scipy.optimize import OptimizeResult
 
 from rsp_vision.analysis.gaussians_calculations import (
     fit_2D_gaussian_to_data,
@@ -262,7 +263,8 @@ def nonparam_anova_over_rois(self) -> dict:
             samples = np.zeros(
                 (
                     len(self.data.directions)
-                    * self.data.n_triggers_per_stimulus,
+                    * self.data.n_triggers_per_stimulus
+                    * self.data.total_n_days,
                     len(self.data.sf_tf_combinations),
                 )
             )
@@ -525,72 +527,80 @@ def get_median_subtracted_response_and_params(
         return median_subtracted_response_2d_matrix, sf_0, tf_0, peak_response
 
     def get_gaussian_fits_for_roi(self, roi_id: int) -> dict:
-        """Calculate the best fit parameters for each ROI and direction.
-        This method takes as input the ROI index, and loops over the
-        directions to calculate the best fit parameters for each spatial
-        and temporal frequency. First, it calls the
-        get_median_subtracted_response_and_params method to extract the
-        median subtracted response matrix for the given ROI and direction.
-
-        Then, the method performs a 2D Gaussian fit to the 2D response
-        matrix using the fit_2D_gaussian_to_data method. The resulting best
-        fit parameters are stored in a dictionary, where the keys are the
-        directions, and the values are tuples containing the
-        preferred spatial and temporal frequencies and the peak response
-        amplitude, the best fit parameter values obtained from the Gaussian
-        fit, and the median-subtracted response matrix.
-
-        Args:
-            roi_id (int): The index of the ROI for which to calculate the best
-                fit parameters.
+        """Calculates the best fit parameters for each direction and for
+        the pooled data for a given ROI. It calls the `manage_fitting`
+        method in oredr to find the best fit parameters.
 
-        Returns:
-            dict: A dictionary with the best fit parameters for each direction.
-                The keys are the directions, and the values are tuples
-                containing the preferred spatial and temporal frequencies
-                and the peak response amplitude, the best fit parameter values
-                obtained from the Gaussian fit, and the median-subtracted
-                response matrix.
+        Parameters
+        ----------
+        roi_id (int)
+            The index of the ROI for which to calculate the best
+            fit parameters.
+
+        Returns
+        -------
+        dict
+            A dictionary with the best fit parameters for each direction
+            and for the pooled data.The keys are the directions, and the
+            values are tuples containing the preferred spatial and temporal
+            frequencies and the peak response amplitude, the best fit parameter
+            values obtained from the Gaussian fit, and the median-subtracted
+            response matrix.
         """
+
         roi_data = {}
         for dir in self.data.directions:
-            (
-                response_matrix,
-                sf_0,
-                tf_0,
-                peak_response,
-            ) = self.get_median_subtracted_response_and_params(
-                responses=self.data.responses,
+            roi_data[dir] = self.manage_fitting(
                 roi_id=roi_id,
-                sfs=self.data.spatial_frequencies,
-                tfs=self.data.temporal_frequencies,
-                dir=dir,
+                direction=dir,
+                pool_directions=False,
             )
 
-            initial_parameters = [
-                peak_response,
-                sf_0,
-                tf_0,
-                np.std(self.data.spatial_frequencies, ddof=1),
-                np.std(self.data.temporal_frequencies, ddof=1),
-                self.data.config["fitting"]["power_law_exp"],
-            ]
+        # now the same by pooling directions
+        roi_data["pooled"] = self.manage_fitting(
+            roi_id=roi_id,
+            pool_directions=True,
+        )
 
-            best_result = fit_2D_gaussian_to_data(
-                self.data.spatial_frequencies,
-                self.data.temporal_frequencies,
-                response_matrix,
-                initial_parameters,
-                self.data.config,
-            )
+        return roi_data
 
-            roi_data[dir] = (
-                (sf_0, tf_0, peak_response),
-                best_result.x,
-                response_matrix,
-            )
+    def manage_fitting(
+        self,
+        roi_id: int,
+        pool_directions: bool = False,
+        direction: float = sys.float_info.min,
+    ) -> Tuple[Tuple[float, float, float], np.ndarray, np.ndarray]:
+        """
+        This method is called by the get_gaussian_fits_for_roi method.
+        It calls the get_median_subtracted_response_and_params method to
+        extract the median subtracted response matrix for the given ROI and
+        direction. Then, it calls the fit_2D_gaussian_to_data method to
+        perform a 2D Gaussian fit to the 2D response matrix. The resulting
+        best fit parameters are stored in a tuple, where the first element
+        is the peak response amplitude, the second element is the best fit
+        parameter values obtained from the Gaussian fit, and the third
+        element is the median-subtracted response matrix.
 
-        # now the same by pooling directions
+        Parameters
+        ----------
+        roi_id : int
+            The ID of the ROI to extract the response matrix from.
+        pool_directions : bool, optional
+            Whether to extract the response matrix for a single direction,
+            by default False
+        direction : float, optional
+            The direction to extract the response matrix from. To be used
+            only if `pool_directions` is set to False. By default
+            sys.float_info.min
+
+        Returns
+        -------
+        Tuple[Tuple[float, float, float], np.ndarray, np.ndarray]
+            A tuple containing the peak response amplitude and its
+            corresponding SF and TF, the best fit parameter values
+            obtained from the Gaussian fit, and the median-subtracted
+            response matrix.
+        """
         (
             response_matrix,
             sf_0,
@@ -601,7 +611,8 @@ def get_gaussian_fits_for_roi(self, roi_id: int) -> dict:
             roi_id=roi_id,
             sfs=self.data.spatial_frequencies,
             tfs=self.data.temporal_frequencies,
-            pool_directions=True,
+            dir=direction,
+            pool_directions=pool_directions,
         )
 
         initial_parameters = [
@@ -621,14 +632,20 @@ def get_gaussian_fits_for_roi(self, roi_id: int) -> dict:
             self.data.config,
         )
 
-        roi_data["pooled"] = (
+        if best_result is None:
+            logging.warning(
+                f"ROI {roi_id} and direction {dir} failed to fit."
+                + "Skipping..."
+            )
+            best_result = OptimizeResult()
+            best_result.x = np.nan * np.ones(6)
+
+        return (
             (sf_0, tf_0, peak_response),
             best_result.x,
             response_matrix,
         )
 
-        return roi_data
-
     def get_all_fits(self) -> None:
         """Calculate the Gaussian fits for all ROIs using multiprocessing.
 

diff --git a/rsp_vision/console_application/app.py b/rsp_vision/console_application/app.py
@@ -1,6 +1,7 @@
 import logging
 import pickle
 import sys
+from pathlib import Path
 
 import rich
 from fancylog import fancylog
@@ -47,12 +48,15 @@ def analysis_pipeline() -> None:
     # pipeline draft
     start_logging()
 
-    # TODO: add TUI or GUI fuctionality to get input from user
     folder_name = Prompt.ask(
         " \
         Please provide only the dataset name.\n \
         Format: Mouse_Id_Hemisphere_BrainRegion_Monitor_position.\n \
-        Example: AK_1111739_hL_RSPd_monitor_front\n \
+        Example (1 day): AK_1111739_hL_RSPd_monitor_front\n \
+        Example (2 days): BY_IAA_1117276_hR_RSPg_monitor_front\n \
+        Example (1 day, BIG): AS_1112809_hL_V1_monitor_front-right_low\n \
+        Example (2 days, big file): CX_1112654_hL_RSPd_monitor_front\n \
+        Example (2 days, big file): CX_1112837_hL_RSPd_monitor_front\n \
         📄"
     )
 
@@ -71,7 +75,10 @@ def analysis_pipeline() -> None:
     logging.info("Analysis finished")
     logging.info(f"Updated photon_data object: {photon_data}")
 
-    with open(f"{folder_name}_data.pickle", "wb") as f:
+    saving_path = (
+        Path(config["paths"]["output"]) / f"{folder_name}_data.pickle"
+    )
+    with open(saving_path, "wb") as f:
         pickle.dump(photon_data, f)
         logging.info("Analysis saved")