egocentric align

sgoldenlab · Dec 3, 2024 · b2cb092 · b2cb092
1 parent 18b87c2
commit b2cb092
Show file tree

Hide file tree

Showing 13 changed files with 235 additions and 136 deletions.
diff --git a/docs/_static/img/EgocentricalAligner_2.webm b/docs/_static/img/EgocentricalAligner_2.webm
diff --git a/docs/tables/egocentrically_align_pose_numba.csv b/docs/tables/egocentrically_align_pose_numba.csv
@@ -0,0 +1,10 @@
+FRAMES (MILLIONS),NUMBA TIME (S),NUMBA TIME (STEV),NUMPY TIME (S),NUMPY TIME (STEV)
+1,,,10.138,0.4589
+2,,,16.894,0.264
+4,,,33.813,0.3712255
+8,,,73.43435,0.526412
+16,,,134.0284325,0.858443488
+32,,,270.4346202,1.3789
+64,,,540.896359,1.781485522
+7 BODY-PARTS PER FRAME,,,,
+3 ITERATIONS,,,,
diff --git a/setup.py b/setup.py
@@ -29,7 +29,7 @@
 # Setup configuration
 setuptools.setup(
     name="Simba-UW-tf-dev",
-    version="2.3.8",
+    version="2.3.9",
     author="Simon Nilsson, Jia Jie Choong, Sophia Hwang",
     author_email="[email protected]",
     description="Toolkit for computer classification and analysis of behaviors in experimental animals",

diff --git a/simba/data_processors/egocentric_aligner.py b/simba/data_processors/egocentric_aligner.py
diff --git a/simba/mixins/train_model_mixin.py b/simba/mixins/train_model_mixin.py
@@ -326,10 +326,10 @@ def calc_permutation_importance(self,
         print("Calculating feature permutation importances...")
         timer = SimbaTimer(start=True)
         p_importances = permutation_importance(clf, x_test, y_test, n_repeats=n_repeats, random_state=0)
-        df = pd.DataFrame(
-            np.column_stack([feature_names, p_importances.importances_mean, p_importances.importances_std]),
-            columns=["FEATURE_NAME", "FEATURE_IMPORTANCE_MEAN", "FEATURE_IMPORTANCE_STDEV"])
+        df = pd.DataFrame(np.column_stack([feature_names, p_importances.importances_mean, p_importances.importances_std]), columns=["FEATURE_NAME", "FEATURE_IMPORTANCE_MEAN", "FEATURE_IMPORTANCE_STDEV"])
         df = df.sort_values(by=["FEATURE_IMPORTANCE_MEAN"], ascending=False)
+        df["FEATURE_IMPORTANCE_MEAN"] = df["FEATURE_IMPORTANCE_MEAN"].astype(np.float64)
+        df["FEATURE_IMPORTANCE_STDEV"] = df["FEATURE_IMPORTANCE_STDEV"].astype(np.float64)
         if save_file_no != None:
             save_file_path = os.path.join(save_dir, f'{clf_name}_{save_file_no}_permutations_importances.csv')
             save_file_path_plot = os.path.join(save_dir, f'{clf_name}_{save_file_no}_permutations_importances.png')

diff --git a/simba/model/regression/model.py b/simba/model/regression/model.py
@@ -42,6 +42,7 @@ def fit_xgb(x: pd.DataFrame,
     check_int(name=f'{fit_xgb.__name__} verbosity', value=verbosity, min_value=0, max_value=3)
     check_float(name=f'{fit_xgb.__name__} learning_rate', value=learning_rate, min_value=0.1, max_value=1.0)
     xgb_reg = xgb.XGBRegressor(objective=objective, max_depth=max_depth, n_estimators=n_estimators, verbosity=verbosity)
+
     return xgb_reg.fit(X=x, y=y)
 
 def transform_xgb(x: pd.DataFrame, model: xgb.XGBRegressor):

diff --git a/simba/outlier_tools/outlier_corrector_location.py b/simba/outlier_tools/outlier_corrector_location.py
@@ -108,7 +108,7 @@ def __correct_outliers(self, df: pd.DataFrame, above_criteria_dict: dict):
                 col_names = [f'{body_part_name}_x', f'{body_part_name}_y']
                 if len(frm_idx) > 0:
                     df.loc[frm_idx, col_names] = np.nan
-        return df.fillna(method='ffill', axis=1).fillna(0)
+        return df.fillna(method='ffill', axis=0).fillna(0)
 
     def run(self):
         self.logs, self.frm_cnts = {}, {}
@@ -131,7 +131,6 @@ def run(self):
                     bp_name = animal_bps["X_bps"][bp_cnt][:-2]
                     bp_dict[animal_name][bp_name] = animal_arr[:, bp_col_start: bp_col_start + 2]
             above_criteria_dict = self.__find_location_outliers(bp_dict=bp_dict, animal_criteria=animal_criteria)
-
             df = self.__correct_outliers(df=df, above_criteria_dict=above_criteria_dict)
             write_df(df=df, file_type=self.file_type, save_path=save_path)
             self.logs[video_name], self.frm_cnts[video_name] = above_criteria_dict, len(df)

diff --git a/simba/outlier_tools/outlier_corrector_location_mp.py b/simba/outlier_tools/outlier_corrector_location_mp.py
@@ -50,7 +50,7 @@ def __correct_outliers(df: pd.DataFrame, above_criteria_dict: dict):
                 col_names = [f'{body_part_name}_x', f'{body_part_name}_y']
                 if len(frm_idx) > 0:
                     df.loc[frm_idx, col_names] = np.nan
-        return df.fillna(method='ffill', axis=1).fillna(0)
+        return df.fillna(method='ffill', axis=0).fillna(0)
 
     video_timer = SimbaTimer(start=True)
     _, video_name, _ = get_fn_ext(data_path)

diff --git a/simba/utils/data.py b/simba/utils/data.py
@@ -1423,6 +1423,75 @@ def get_library_version(library_name: str) -> str:
 
 
 
+def egocentrically_align_pose(data: np.ndarray,
+                              anchor_1_idx: int,
+                              anchor_2_idx: int,
+                              anchor_location: np.ndarray,
+                              direction: int) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
+
+    """
+    Aligns a set of 2D points egocentrically based on two anchor points and a target direction.
+
+    Rotates and translates a 3D array of 2D points (e.g., time-series of frame-wise data) such that
+    one anchor point is aligned to a specified location, and the direction between the two anchors is aligned
+    to a target angle.
+
+
+    .. video:: _static/img/EgocentricalAligner_2.webm
+       :width: 800
+       :autoplay:
+       :loop:
+
+    :param np.ndarray data: A 3D array of shape `(num_frames, num_points, 2)` containing 2D points for each frame. Each frame is represented as a 2D array of shape `(num_points, 2)`, where each row corresponds to a point's (x, y) coordinates.
+    :param int anchor_1_idx: The index of the first anchor point in `data` used as the center of alignment. This body-part will be placed in the center of the image.
+    :param int anchor_2_idx: The index of the second anchor point in `data` used to calculate the direction vector. This bosy-part will be located `direction` degrees from the anchor_1 body-part.
+    :param int direction: The target direction in degrees to which the vector between the two anchors will be aligned.
+    :param np.ndarray anchor_location: A 1D array of shape `(2,)` specifying the target (x, y) location for `anchor_1_idx` after alignment.
+    :return: A tuple containing the rotated data, and variables required for also rotating the video using the same rules:
+             - `aligned_data`: A 3D array of shape `(num_frames, num_points, 2)` with the aligned 2D points.
+             - `centers`: A 2D array of shape `(num_frames, 2)` containing the original locations of `anchor_1_idx` in each frame before alignment.
+             - `rotation_vectors`: A 3D array of shape `(num_frames, 2, 2)` containing the rotation matrices applied to each frame.
+    :rtype: Tuple[np.ndarray, np.ndarray, np.ndarray]
+
+    :example:
+    >>> data = np.random.randint(0, 500, (100, 7, 2))
+    >>> anchor_1_idx = 5 # E.g., the animal tail-base is the 5th body-part
+    >>> anchor_2_idx = 7 # E.g., the animal nose is the 7th row in the data
+    >>> anchor_location = np.array([250, 250]) # the tail-base (index 5) is placed at x=250, y=250 in the image.
+    >>> direction = 90 # The nose (index 7) will be placed in direction 90 degrees (S) relative to the tailbase.
+    >>> results, centers, rotation_vectors = egocentrically_align_pose(data=data, anchor_1_idx=anchor_1_idx, anchor_2_idx=anchor_2_idx, direction=direction)
+    """
+
+    check_valid_array(data=data, source=egocentrically_align_pose.__name__, accepted_ndims=(3,), accepted_dtypes=Formats.NUMERIC_DTYPES.value)
+    check_int(name=f'{egocentrically_align_pose.__name__} anchor_1_idx', min_value=0, max_value=data.shape[1], value=anchor_1_idx)
+    check_int(name=f'{egocentrically_align_pose.__name__} anchor_2_idx', min_value=0, max_value=data.shape[1], value=anchor_2_idx)
+    if anchor_1_idx == anchor_2_idx: raise InvalidInputError(msg=f'Anchor 1 index ({anchor_1_idx}) cannot be the same as Anchor 2 index ({anchor_2_idx})', source=egocentrically_align_pose.__name__)
+    check_int(name=f'{egocentrically_align_pose.__name__} direction', value=direction, min_value=0, max_value=360)
+    check_valid_array(data=anchor_location, source=egocentrically_align_pose.__name__, accepted_ndims=(1,), accepted_axis_0_shape=[2,], accepted_dtypes=Formats.NUMERIC_DTYPES.value)
+    target_angle = np.deg2rad(direction)
+    centers = np.full((data.shape[0], 2), fill_value=-1, dtype=np.int32)
+    rotation_vectors = np.full((data.shape[0], 2, 2), fill_value=-1, dtype=np.float64)
+    results = np.zeros_like(data, dtype=np.int32)
+    for frm_idx in range(data.shape[0]):
+        frm_points = data[frm_idx]
+        frm_anchor_1, frm_anchor_2 = frm_points[anchor_1_idx], frm_points[anchor_2_idx]
+        centers[frm_idx] = frm_anchor_1
+        delta_x, delta_y = frm_anchor_2[0] - frm_anchor_1[0], frm_anchor_2[1] - frm_anchor_1[1]
+        frm_angle = np.arctan2(delta_y, delta_x)
+        frm_rotation_angle = target_angle - frm_angle
+        #frm_angle = (np.pi / 2 - np.arctan2(delta_y, delta_x)) % (2 * np.pi)
+        #frm_rotation_angle = target_angle - frm_angle
+        frm_cos_theta, frm_sin_theta = np.cos(frm_rotation_angle), np.sin(frm_rotation_angle)
+        R = np.array([[frm_cos_theta, -frm_sin_theta], [frm_sin_theta, frm_cos_theta]])
+        rotation_vectors[frm_idx] = R
+        keypoints_rotated = np.dot(frm_points - frm_anchor_1, R.T)
+        anchor_1_position_after_rotation = keypoints_rotated[anchor_1_idx]
+        translation_to_target = np.array(anchor_location) - anchor_1_position_after_rotation
+        results[frm_idx] = keypoints_rotated + translation_to_target
+
+    return results, centers, rotation_vectors
+
+
 # run_user_defined_feature_extraction_class(config_path='/Users/simon/Desktop/envs/troubleshooting/circular_features_zebrafish/project_folder/project_config.ini', file_path='/Users/simon/Desktop/fish_feature_extractor_2023_version_5.py')
 
 

diff --git a/simba/utils/enums.py b/simba/utils/enums.py
@@ -171,7 +171,7 @@ class Formats(Enum):
         "box": ["bx.h5", "bx_filtered.h5"],
         "ellipse": ["el.h5", "el_filtered.h5"],
     }
-
+    EXPECTED_VIDEO_INFO_COLS = ["Video", "fps", "Resolution_width", "Resolution_height", "Distance_in_mm", "pixels/mm"]
 
 class Options(Enum):
     ROLLING_WINDOW_DIVISORS = [2, 5, 6, 7.5, 15]

diff --git a/simba/utils/read_write.py b/simba/utils/read_write.py
@@ -48,7 +48,7 @@
                                 check_instance, check_int,
                                 check_nvidea_gpu_available, check_str,
                                 check_valid_array, check_valid_boolean,
-                                check_valid_dataframe, check_valid_lst)
+                                check_valid_dataframe, check_valid_lst, check_if_valid_rgb_tuple)
 from simba.utils.enums import ConfigKey, Dtypes, Formats, Keys, Options
 from simba.utils.errors import (DataHeaderError, DuplicationError,
                                 FFMPEGCodecGPUError, FileExistError,
@@ -353,6 +353,11 @@ def read_project_path_and_file_type(config: configparser.ConfigParser) -> Tuple[
     return project_path, file_type
 
 
+def bgr_to_rgb_tuple(value: Tuple[int, int, int]) -> Tuple[int, int, int]:
+    """ convert bgr tuple to rgb tuple"""
+    check_if_valid_rgb_tuple(data=value)
+    return (value[2], value[1], value[0])
+
 def read_video_info_csv(file_path: Union[str, os.PathLike]) -> pd.DataFrame:
     """
     Helper to read the project_folder/logs/video_info.csv of the SimBA project in as a pd.DataFrame

diff --git a/...st_projects/two_c57/models/generated_models/Attack_permutations_importances.png b/...st_projects/two_c57/models/generated_models/Attack_permutations_importances.png
diff --git a/tests/test_train_model_mixin.py b/tests/test_train_model_mixin.py
@@ -5,7 +5,7 @@
 from sklearn.ensemble import RandomForestClassifier
 
 from simba.mixins.train_model_mixin import TrainModelMixin
-from simba.utils.read_write import read_config_file, read_df
+from simba.utils.read_write import read_config_file, read_df, read_pickle
 
 
 IN_GITHUB_ACTIONS = os.getenv("GITHUB_ACTIONS") == "true"
@@ -47,10 +47,11 @@ def test_random_undersampler(sample_ratio):
     assert y_train_out.reset_index(drop=True).equals(pd.Series([1, 0], name='Test'))
 
 @pytest.mark.parametrize("clf_path", ['tests/data/test_projects/two_c57/models/generated_models/Attack.sav'])
+#@pytest.mark.parametrize("clf_path", [r"C:\projects\simba\simba\tests\data\test_projects\two_c57\models\generated_models\Attack.sav"])
 def test_calc_permutation_importance(clf_path):
     x_test = np.array([[1, 2], [1, 2], [1, 2]])
     y_test = np.array([[1], [1], [0]])
-    clf = read_df(file_path=clf_path, file_type='pickle')
+    clf = read_pickle(data_path=clf_path)
     _ = TrainModelMixin().calc_permutation_importance(x_test=x_test, y_test=y_test, clf=clf, feature_names=['Feature_1', 'Feature_2'], clf_name='Attack', save_dir=os.path.dirname(clf_path))
     assert os.path.isfile(os.path.join(os.path.dirname(clf_path), 'Attack_permutations_importances.csv'))