Merge pull request #361 from freemansw1/add_tests_time_cell_min

Add tests for `time_cell_min` and update documentation for the parameter
tobac-project · Dec 1, 2023 · 509a17c · 509a17c
2 parents f1d3a4e + c0c55d4
commit 509a17c
Show file tree

Hide file tree

Showing 4 changed files with 223 additions and 144 deletions.
diff --git a/tobac/feature_detection.py b/tobac/feature_detection.py
@@ -423,14 +423,14 @@ def feature_detection_threshold(
         Standard deviation for intial filtering step. Default is 0.5.
 
     n_erosion_threshold: int, optional
-        Number of pixel by which to erode the identified features.
+        Number of pixels by which to erode the identified features.
         Default is 0.
 
     n_min_threshold : int, optional
-        Minimum number of identified features. Default is 0.
+        Minimum number of identified contiguous pixels for a feature to be detected. Default is 0.
 
     min_distance : float, optional
-        Minimum distance between detected features (in meter). Default is 0.
+        Minimum distance between detected features (in meters). Default is 0.
 
     idx_start : int, optional
         Feature id to start with. Default is 0.
@@ -939,14 +939,14 @@ def feature_detection_multithreshold_timestep(
         Standard deviation for intial filtering step. Default is 0.5.
 
     n_erosion_threshold: int, optional
-        Number of pixel by which to erode the identified features.
+        Number of pixels by which to erode the identified features.
         Default is 0.
 
     n_min_threshold : int, optional
-        Minimum number of identified features. Default is 0.
+        Minimum number of identified contiguous pixels for a feature to be detected. Default is 0.
 
     min_distance : float, optional
-        Minimum distance between detected features (in meter). Default is 0.
+        Minimum distance between detected features (in meters). Default is 0.
 
     feature_number_start : int, optional
         Feature id to start with. Default is 1.
@@ -961,10 +961,10 @@ def feature_detection_multithreshold_timestep(
     vertical_axis: int
         The vertical axis number of the data.
     dxy : float
-        Grid spacing in meter.
+        Grid spacing in meters.
 
     wavelength_filtering: tuple, optional
-       Minimum and maximum wavelength for spectral filtering in meter. Default is None.
+       Minimum and maximum wavelength for spectral filtering in meters. Default is None.
 
     strict_thresholding: Bool, optional
         If True, a feature can only be detected if all previous thresholds have been met.
@@ -1174,25 +1174,18 @@ def feature_detection_multithreshold(
         Flag choosing method used for the position of the tracked
         feature. Default is 'center'.
 
-    coord_interp_kind : str, optional
-        The kind of interpolation for coordinates. Default is 'linear'.
-        For 1d interp, {'linear', 'nearest', 'nearest-up', 'zero',
-                        'slinear', 'quadratic', 'cubic',
-                        'previous', 'next'}.
-        For 2d interp, {'linear', 'cubic', 'quintic'}.
-
     sigma_threshold: float, optional
         Standard deviation for intial filtering step. Default is 0.5.
 
     n_erosion_threshold: int, optional
-        Number of pixel by which to erode the identified features.
+        Number of pixels by which to erode the identified features.
         Default is 0.
 
     n_min_threshold : int, optional
-        Minimum number of identified features. Default is 0.
+        Minimum number of identified contiguous pixels for a feature to be detected. Default is 0.
 
     min_distance : float, optional
-        Minimum distance between detected features (in meter). Default is 0.
+        Minimum distance between detected features (in meters). Default is 0.
 
     feature_number_start : int, optional
         Feature id to start with. Default is 1.
@@ -1441,15 +1434,15 @@ def filter_min_distance(
     features:      pandas DataFrame
                    features
     dxy:           float
-        Constant horzontal grid spacing (m).
+        Constant horzontal grid spacing (meters).
     dz: float
-        Constant vertical grid spacing (m), optional. If not specified
+        Constant vertical grid spacing (meters), optional. If not specified
         and the input is 3D, this function requires that `z_coordinate_name` is available
         in the `features` input. If you specify a value here, this function assumes
         that it is the constant z spacing between points, even if ```z_coordinate_name```
         is specified.
     min_distance:  float
-        minimum distance between detected features (m)
+        minimum distance between detected features (meters)
     x_coordinate_name: str
         The name of the x coordinate to calculate distance based on in meters.
         This is typically `projection_x_coordinate`. Currently unused.
@@ -1460,7 +1453,7 @@ def filter_min_distance(
         The name of the z coordinate to calculate distance based on in meters.
         This is typically `altitude`. If None, tries to auto-detect.
     target: {'maximum', 'minimum'}, optional
-        Flag to determine if tracking is targetting minima or maxima in
+        Flag to determine if tracking is targeting minima or maxima in
         the data. Default is 'maximum'.
     PBC_flag : str('none', 'hdim_1', 'hdim_2', 'both')
         Sets whether to use periodic boundaries, and if so in which directions.
@@ -1477,7 +1470,6 @@ def filter_min_distance(
     max_h2: int, optional
         Maximum point in hdim_2, exclusive. max_h2-min_h2 should be the size.
 
-
     Returns
     -------
     pandas DataFrame

diff --git a/tobac/segmentation.py b/tobac/segmentation.py
@@ -70,9 +70,9 @@ def add_markers(
          or a box of user-set size
     seed_3D_size: int or tuple (dimensions equal to dimensions of `field`)
         This sets the size of the seed box when `seed_3D_flag` is 'box'. If it's an
-        integer, the seed box is identical in all dimensions. If it's a tuple, it specifies the
-        seed area for each dimension separately.
-        Note: we recommend the use of odd numbers for this. If you give
+        integer (units of number of pixels), the seed box is identical in all dimensions.
+        If it's a tuple, it specifies the seed area for each dimension separately, in units of pixels.
+        Note: we strongly recommend the use of odd numbers for this. If you give
         an even number, your seed box will be biased and not centered
         around the feature.
         Note: if two seed boxes overlap, the feature that is seeded will be the
@@ -366,7 +366,7 @@ def segmentation_timestep(
         Default is 3e-3.
 
     target : {'maximum', 'minimum'}, optional
-        Flag to determine if tracking is targetting minima or maxima in
+        Flag to determine if tracking is targeting minima or maxima in
         the data to determine from which direction to approach the threshold
         value. Default is 'maximum'.
 
@@ -376,11 +376,11 @@ def segmentation_timestep(
 
     method : {'watershed'}, optional
         Flag determining the algorithm to use (currently watershedding
-        implemented). 'random_walk' could be uncommented.
+        implemented).
 
     max_distance : float, optional
         Maximum distance from a marker allowed to be classified as
-        belonging to that cell. Default is None.
+        belonging to that cell in meters. Default is None.
 
     vertical_coord : str, optional
         Vertical coordinate in 3D input data. If None, input is checked for
@@ -398,14 +398,18 @@ def segmentation_timestep(
          or a box of user-set size
     seed_3D_size: int or tuple (dimensions equal to dimensions of `field`)
         This sets the size of the seed box when `seed_3D_flag` is 'box'. If it's an
-        integer, the seed box is identical in all dimensions. If it's a tuple, it specifies the
-        seed area for each dimension separately. Note: we recommend the use
-        of odd numbers for this. If you give an even number, your seed box will be
-        biased and not centered around the feature.
+        integer (units of number of pixels), the seed box is identical in all dimensions.
+        If it's a tuple, it specifies the seed area for each dimension separately, in units of pixels.
+        Note: we strongly recommend the use of odd numbers for this. If you give
+        an even number, your seed box will be biased and not centered
+        around the feature.
+        Note: if two seed boxes overlap, the feature that is seeded will be the
+        closer feature.
     segment_number_below_threshold: int
         the marker to use to indicate a segmentation point is below the threshold.
     segment_number_unassigned: int
         the marker to use to indicate a segmentation point is above the threshold but unsegmented.
+        This can be the same as `segment_number_below_threshold`, but can also be set separately.
     statistics: boolean, optional
         Default is None. If True, bulk statistics for the data points assigned to each feature are saved in output.
 
@@ -1129,100 +1133,93 @@ def segmentation(
     statistic: Union[dict[str, Union[Callable, tuple[Callable, dict]]], None] = None,
 ) -> tuple[iris.cube.Cube, pd.DataFrame]:
     """Use watershedding to determine region above a threshold
-            value around initial seeding position for all time steps of
-            the input data. Works both in 2D (based on single seeding
-            point) and 3D and returns a mask with zeros everywhere around
-            the identified regions and the feature id inside the regions.
-
-            Calls segmentation_timestep at each individal timestep of the
-            input data.
-
-            Parameters
-            ----------
-            features : pandas.DataFrame
-                Output from trackpy/maketrack.
-
-            field : iris.cube.Cube
-                Containing the field to perform the watershedding on.
-
-            dxy : float
-                Grid spacing of the input data.
-
-            statistic : dict, optional
-                Default is None. Optional parameter to calculate bulk statistics within feature detection.
-                Dictionary with callable function(s) to apply over the region of each detected feature and the name of the statistics to appear in the feature output dataframe. The functions should be the values and the names of the metric the keys (e.g. {'mean': np.mean})
-
-    boolean, optional
-                Default is False. If True, bulk statistics for the data points assigned to each feature are saved in output.
-
-            Output:
-            segmentation_out: iris.cube.Cube
-                           Cloud mask, 0 outside and integer numbers according to track inside the cloud
-        =======
-            threshold : float, optional
-                Threshold for the watershedding field to be used for the mask. The watershedding is exclusive of the threshold value, i.e. values greater (less) than the threshold are included in the target region, while values equal to the threshold value are excluded.
-                Default is 3e-3.
-
-            target : {'maximum', 'minimum'}, optional
-                Flag to determine if tracking is targetting minima or maxima in
-                the data. Default is 'maximum'.
-
-            level : slice of iris.cube.Cube, optional
-                Levels at which to seed the cells for the watershedding
-                algorithm. Default is None.
-
-            method : {'watershed'}, optional
-                Flag determining the algorithm to use (currently watershedding
-                implemented). 'random_walk' could be uncommented.
-
-            max_distance : float, optional
-                Maximum distance from a marker allowed to be classified as
-                belonging to that cell. Default is None.
-
-            vertical_coord : {'auto', 'z', 'model_level_number', 'altitude',
-                              'geopotential_height'}, optional
-                Name of the vertical coordinate for use in 3D segmentation case
-
-            PBC_flag : {'none', 'hdim_1', 'hdim_2', 'both'}
-                Sets whether to use periodic boundaries, and if so in which directions.
-                'none' means that we do not have periodic boundaries
-                'hdim_1' means that we are periodic along hdim1
-                'hdim_2' means that we are periodic along hdim2
-                'both' means that we are periodic along both horizontal dimensions
-
-            seed_3D_flag: str('column', 'box')
-                Seed 3D field at feature positions with either the full column (default)
-                 or a box of user-set size
-
-            seed_3D_size: int or tuple (dimensions equal to dimensions of `field`)
-                This sets the size of the seed box when `seed_3D_flag` is 'box'. If it's an
-                integer, the seed box is identical in all dimensions. If it's a tuple, it specifies the
-                seed area for each dimension separately. Note: we recommend the use
-                of odd numbers for this. If you give an even number, your seed box will be
-                biased and not centered around the feature.
-            segment_number_below_threshold: int
-                the marker to use to indicate a segmentation point is below the threshold.
-            segment_number_unassigned: int
-                the marker to use to indicate a segmentation point is above the threshold but unsegmented.
-        statistic: boolean, optional
-            Default is False. If True, bulk statistics for the data points assigned to each feature are saved in output.
-
-
-            Returns
-            -------
-            segmentation_out : iris.cube.Cube
-                Mask, 0 outside and integer numbers according to track
-                inside the area/volume of the feature.
-
-            features_out : pandas.DataFrame
-                Feature dataframe including the number of cells (2D or 3D) in
-                the segmented area/volume of the feature at the timestep.
-
-            Raises
-            ------
-            ValueError
-                If field_in.ndim is neither 3 nor 4 and 'time' is not included
-                in coords.
+    value around initial seeding position for all time steps of
+    the input data. Works both in 2D (based on single seeding
+    point) and 3D and returns a mask with zeros everywhere around
+    the identified regions and the feature id inside the regions.
+
+    Calls segmentation_timestep at each individal timestep of the
+    input data.
+
+    Parameters
+    ----------
+    features : pandas.DataFrame
+        Output from trackpy/maketrack.
+
+    field : iris.cube.Cube
+        Containing the field to perform the watershedding on.
+
+    dxy : float
+        Grid spacing of the input data in meters.
+
+    threshold : float, optional
+        Threshold for the watershedding field to be used for the mask.
+        Default is 3e-3.
+
+    target : {'maximum', 'minimum'}, optional
+        Flag to determine if tracking is targetting minima or maxima in
+        the data. Default is 'maximum'.
+
+    level : slice of iris.cube.Cube, optional
+        Levels at which to seed the cells for the watershedding
+        algorithm. Default is None.
+
+    method : {'watershed'}, optional
+        Flag determining the algorithm to use (currently watershedding
+        implemented). 'random_walk' could be uncommented.
+
+    max_distance : float, optional
+        Maximum distance from a marker allowed to be classified as
+        belonging to that cell in meters. Default is None.
+
+    vertical_coord : {'auto', 'z', 'model_level_number', 'altitude',
+                      'geopotential_height'}, optional
+        Name of the vertical coordinate for use in 3D segmentation case
+
+    PBC_flag : {'none', 'hdim_1', 'hdim_2', 'both'}
+        Sets whether to use periodic boundaries, and if so in which directions.
+        'none' means that we do not have periodic boundaries
+        'hdim_1' means that we are periodic along hdim1
+        'hdim_2' means that we are periodic along hdim2
+        'both' means that we are periodic along both horizontal dimensions
+
+    seed_3D_flag: str('column', 'box')
+        Seed 3D field at feature positions with either the full column (default)
+         or a box of user-set size
+
+    seed_3D_size: int or tuple (dimensions equal to dimensions of `field`)
+        This sets the size of the seed box when `seed_3D_flag` is 'box'. If it's an
+        integer (units of number of pixels), the seed box is identical in all dimensions.
+        If it's a tuple, it specifies the seed area for each dimension separately, in units of pixels.
+        Note: we strongly recommend the use of odd numbers for this. If you give
+        an even number, your seed box will be biased and not centered
+        around the feature.
+        Note: if two seed boxes overlap, the feature that is seeded will be the
+        closer feature.
+    segment_number_below_threshold: int
+        the marker to use to indicate a segmentation point is below the threshold.
+    segment_number_unassigned: int
+        the marker to use to indicate a segmentation point is above the threshold but unsegmented.
+    statistic : dict, optional
+        Default is None. Optional parameter to calculate bulk statistics within feature detection.
+        Dictionary with callable function(s) to apply over the region of each detected feature and the name of the statistics to appear in the feature output dataframe. The functions should be the values and the names of the metric the keys (e.g. {'mean': np.mean})
+
+
+    Returns
+    -------
+    segmentation_out : iris.cube.Cube
+        Mask, 0 outside and integer numbers according to track
+        inside the area/volume of the feature.
+
+    features_out : pandas.DataFrame
+        Feature dataframe including the number of cells (2D or 3D) in
+        the segmented area/volume of the feature at the timestep.
+
+    Raises
+    ------
+    ValueError
+        If field_in.ndim is neither 3 nor 4 and 'time' is not included
+        in coords.
     """
     import pandas as pd
     from iris.cube import CubeList