Merge pull request #64 from kmnhan/dev-io

NeXus support in `io`
kmnhan · Oct 22, 2024 · bda6d2c · bda6d2c
2 parents 2f1fd18 + 53e2cf2
commit bda6d2c
Show file tree

Hide file tree

Showing 28 changed files with 1,352 additions and 169 deletions.
diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -171,6 +171,11 @@ def linkcode_resolve(domain, info) -> str | None:
     "ColorType": "`ColorType <matplotlib.typing.ColorType>`",
     "RGBColorType": "`RGBColorType <matplotlib.typing.RGBColorType>`",
     "RGBAColorType": "`RGBAColorType <matplotlib.typing.RGBAColorType>`",
+    "path-like": "`path-like <os.PathLike>`",
+    "NXfield": "`NXfield <nexusformat.nexus.tree.NXfield>`",
+    "NXgroup": "`NXgroup <nexusformat.nexus.tree.NXgroup>`",
+    "NXlink": "`NXlink <nexusformat.nexus.tree.NXlink>`",
+    "NXdata": "`NXdata <nexusformat.nexus.tree.NXdata>`",
 }
 napoleon_custom_sections = [("Signals", "params_style")]
 
@@ -201,6 +206,7 @@ def linkcode_resolve(domain, info) -> str | None:
     "joblib": ("https://joblib.readthedocs.io/en/latest", None),
     "panel": ("https://panel.holoviz.org", None),
     "hvplot": ("https://hvplot.holoviz.org", None),
+    "nexusformat": ("https://nexpy.github.io/nexpy", None),
 }
 
 

diff --git a/docs/source/user-guide/curve-fitting.ipynb b/docs/source/user-guide/curve-fitting.ipynb
@@ -828,6 +828,30 @@
     "result_ds"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "\n",
+    "The accessor works with any `lmfit` model, including background models from\n",
+    "[lmfitxps](https://lmfitxps.readthedocs.io/). If you have\n",
+    "[lmfitxps](https://lmfitxps.readthedocs.io/) installed, you can use the `ShirleyBG`\n",
+    "model to iteratively fit a Shirley background to the data:\n",
+    "```python\n",
+    "from lmfitxps.models import ShirleyBG\n",
+    "from lmfit.models import GaussianModel\n",
+    "\n",
+    "darr.modelfit(\"alpha\", GaussianModel() + ShirleyBG())\n",
+    "```"
+   ]
+  },
   {
    "cell_type": "raw",
    "metadata": {

diff --git a/docs/source/user-guide/io.ipynb b/docs/source/user-guide/io.ipynb
@@ -860,7 +860,7 @@
     "# Generate and save cuts with different beta values\n",
     "for i, beta in enumerate(beta_coords):\n",
     "    data = make_data(beta=beta, temp=20.0, hv=50.0)\n",
-    "    filename = f\"{tmp_dir.name}/data_001_S{str(i+1).zfill(3)}.h5\"\n",
+    "    filename = f\"{tmp_dir.name}/data_001_S{str(i + 1).zfill(3)}.h5\"\n",
     "    data.to_netcdf(filename, engine=\"h5netcdf\")\n",
     "\n",
     "# Write scan coordinates to a csv file\n",
@@ -874,7 +874,7 @@
     "# Generate some cuts with different band shifts\n",
     "for i in range(4):\n",
     "    data = make_data(beta=5.0, temp=20.0, hv=50.0, bandshift=-i * 0.05)\n",
-    "    filename = f\"{tmp_dir.name}/data_{str(i+2).zfill(3)}.h5\"\n",
+    "    filename = f\"{tmp_dir.name}/data_{str(i + 2).zfill(3)}.h5\"\n",
     "    data.to_netcdf(filename, engine=\"h5netcdf\")"
    ]
   },
@@ -1148,15 +1148,10 @@
     "        # Get all valid data files in directory\n",
     "        files = {}\n",
     "        for path in erlab.io.utils.get_files(data_dir, extensions=[\".h5\"]):\n",
-    "            # Base name\n",
-    "            data_name = os.path.splitext(os.path.basename(path))[0]\n",
-    "\n",
     "            # If multiple scans, strip the _S### part\n",
-    "            name_match = re.match(r\"(.*?_\\d{3})_(?:_S\\d{3})?\", data_name)\n",
-    "            if name_match is not None:\n",
-    "                data_name = name_match.group(1)\n",
-    "\n",
-    "            files[data_name] = path\n",
+    "            name_match = re.match(r\"(.*?_\\d{3})_(?:_S\\d{3})?\", path.stem)\n",
+    "            data_name = path.stem if name_match is None else name_match.group(1)\n",
+    "            files[data_name] = str(path)\n",
     "\n",
     "        # Map dataframe column names to data attributes\n",
     "        attrs_mapping = {\n",

diff --git a/src/erlab/accessors/general.py b/src/erlab/accessors/general.py
@@ -29,9 +29,12 @@ class PlotAccessor(ERLabDataArrayAccessor):
     def __call__(self, *args, **kwargs):
         """Plot the data.
 
-        If a 2D data array is provided, it is plotted using :func:`plot_array
-        <erlab.plotting.general.plot_array>`. Otherwise, it is equivalent to calling
-        :meth:`xarray.DataArray.plot`.
+        Plots two-dimensional data using :func:`plot_array
+        <erlab.plotting.general.plot_array>`. For non-two-dimensional data, the method
+        falls back to :meth:`xarray.DataArray.plot`.
+
+        Also sets fancy labels using :func:`fancy_labels
+        <erlab.plotting.annotations.fancy_labels>`.
 
         Parameters
         ----------
@@ -99,6 +102,8 @@ def itool(self, *args, **kwargs):
     def hvplot(self, *args, **kwargs):
         """`hvplot <https://hvplot.holoviz.org/>`_-based interactive visualization.
 
+        This method is a convenience wrapper that handles importing ``hvplot``.
+
         Parameters
         ----------
         *args
@@ -313,13 +318,13 @@ def __call__(
         Parameters
         ----------
         indexers
-            Dictionary specifying the dimensions and their values or slices.
-            Position along a dimension can be specified in three ways:
+            Dictionary specifying the dimensions and their values or slices. Position
+            along a dimension can be specified in three ways:
 
             - As a scalar value: `alpha=-1.2`
 
               If no width is specified, the data is selected along the nearest value. It
-              is equivalent to `xarray.DataArray.sel` with `method='nearest'`.
+              is equivalent to :meth:`xarray.DataArray.sel` with `method='nearest'`.
 
             - As a value and width: `alpha=5, alpha_width=0.5`
 
@@ -342,10 +347,23 @@ def __call__(
         DataArray
             The selected and averaged data.
 
-        Raises
-        ------
-        ValueError
-            If a specified dimension is not present in the data.
+
+        Note
+        ----
+        Unlike :meth:`xarray.DataArray.sel`, this method treats all dimensions without
+        coordinates as equivalent to having coordinates assigned from 0 to ``n-1``,
+        where ``n`` is the size of the dimension. For example:
+
+        .. code-block:: python
+
+            da = xr.DataArray(np.random.rand(10), dims=("x",))
+
+            da.sel(x=slice(2, 3))  # This works
+
+            da.sel(x=slice(2.0, 3.0))  # This raises a TypeError
+
+            da.qsel(x=slice(2.0, 3.0))  # This works
+
         """
         indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "qsel")
 
@@ -359,9 +377,16 @@ def __call__(
                     raise ValueError(
                         f"Slice not allowed for width of dimension `{dim}`"
                     )
+
                 bin_widths[dim] = float(width)
                 if dim not in self._obj.dims:
-                    raise ValueError(f"Dimension `{dim}` not found in data.")
+                    raise ValueError(f"Dimension `{dim}` not found in data")
+            else:
+                target_dim = str(dim).removesuffix("_width")
+                if target_dim not in indexers:
+                    raise ValueError(
+                        f"`{target_dim}_width` was specified without `{target_dim}`"
+                    )
 
         scalars: dict[Hashable, float] = {}
         slices: dict[Hashable, slice] = {}
@@ -381,35 +406,53 @@ def __call__(
                         f"Slice not allowed for value of dimension `{dim}` "
                         "with width specified"
                     )
-                slices[dim] = slice(value - width / 2, value + width / 2)
+                slices[dim] = slice(value - width / 2.0, value + width / 2.0)
                 avg_dims.append(dim)
 
+        unindexed_dims: list[Hashable] = [
+            k for k in slices | scalars if k not in self._obj.indexes
+        ]
+
+        if len(unindexed_dims) >= 1:
+            out = self._obj.assign_coords(
+                {k: np.arange(self._obj.sizes[k]) for k in unindexed_dims}
+            )
+        else:
+            out = self._obj
+
         if len(scalars) >= 1:
             for k, v in scalars.items():
-                if v < self._obj[k].min() or v > self._obj[k].max():
+                if v < out[k].min() or v > out[k].max():
                     warnings.warn(
                         f"Selected value {v} for `{k}` is outside coordinate bounds",
                         stacklevel=2,
                     )
-            out = self._obj.sel(
-                {str(k): v for k, v in scalars.items()}, method="nearest"
-            )
-        else:
-            out = self._obj
+            out = out.sel({str(k): v for k, v in scalars.items()}, method="nearest")
 
         if len(slices) >= 1:
             out = out.sel(slices)
 
-            lost_coords = {k: out[k].mean() for k in avg_dims}
+            lost_coords = {
+                k: out[k].mean() for k in avg_dims if k not in unindexed_dims
+            }
             out = out.mean(dim=avg_dims, keep_attrs=True)
             out = out.assign_coords(lost_coords)
 
         if verbose:
-            print(
-                f"Selected data with {scalars} and {slices}, averaging over {avg_dims}"
-            )
+            out_str = "Selected data with "
+            if len(scalars) >= 1:
+                out_str = out_str + f"{scalars}"
+            if len(slices) >= 1:
+                if len(scalars) >= 1:
+                    out_str = out_str + " and "
+                out_str = out_str + f"{slices}"
 
-        return out
+            if len(avg_dims) >= 1:
+                out_str = out_str + f", averaging over {avg_dims}"
+
+            print(out_str)
+
+        return out.drop_vars(unindexed_dims, errors="ignore")
 
     def around(
         self, radius: float | dict[Hashable, float], *, average: bool = True, **sel_kw

diff --git a/src/erlab/analysis/fit/functions/dynamic.py b/src/erlab/analysis/fit/functions/dynamic.py
@@ -154,12 +154,16 @@ class MultiPeakFunction(DynamicFunction):
         `offset`, each corresponding to the Fermi level, temperature in K, and constant
         background.
     background
-        The type of background to include in the model. The options are: ``'constant'``,
-        ``'linear'``, ``'polynomial'``, or ``'none'``. If ``'constant'``, adds a
-        ``const_bkg`` parameter. If ``'linear'``, adds a ``lin_bkg`` parameter and a
-        ``const_bkg`` parameter. If ``'polynomial'``, adds  ``c0``, ``c1``, ...
-        corresponding to the polynomial coefficients. The polynomial degree can be
-        specified with `degree`. If ``'none'``, no background is added.
+        The type of background to include in the model. Possible values are:
+
+        ============  ============================================================
+        Value         Additional parameters
+        ============  ============================================================
+        'none'        None
+        'constant'    ``const_bkg``
+        'linear'      ``lin_bkg``, ``const_bkg``
+        'polynomial'  ``c0``, ``c1``, ... depending on `degree`
+        ============  ============================================================
     degree
         The degree of the polynomial background. Only used if `background` is
         ``'polynomial'``. Default is 2.

diff --git a/src/erlab/analysis/fit/models.py b/src/erlab/analysis/fit/models.py
@@ -267,7 +267,7 @@ def __init__(
         npeaks: int = 1,
         peak_shapes: list[str] | str | None = None,
         fd: bool = True,
-        background: Literal["constant", "linear", "polynomial", "none"] = "linear",
+        background: Literal["none", "constant", "linear", "polynomial"] = "linear",
         degree: int = 2,
         convolve: bool = True,
         **kwargs,

diff --git a/src/erlab/analysis/image.py b/src/erlab/analysis/image.py
@@ -99,28 +99,29 @@ def gaussian_filter(
 
     Parameters
     ----------
-    darr
+    darr : DataArray
         The input DataArray.
-    sigma
+    sigma : float or Sequence of floats or dict
         The standard deviation(s) of the Gaussian filter in data dimensions. If a float,
         the same value is used for all dimensions, each scaled by the data step. If a
         dict, the value can be specified for each dimension using dimension names as
         keys. The filter is only applied to the dimensions specified in the dict. If a
         sequence, the values are used in the same order as the dimensions of the
         DataArray.
-    order
+    order : int or Sequence of ints or dict
         The order of the filter along each dimension. If an int, the same order is used
         for all dimensions. See Notes below for other options. Defaults to 0.
-    mode
+    mode : str or Sequence of str or dict
         The boundary mode used for the filter. If a str, the same mode is used for all
         dimensions. See Notes below for other options. Defaults to 'nearest'.
     cval
         Value to fill past edges of input if mode is 'constant'. Defaults to 0.0.
     truncate
         The truncation value used for the Gaussian filter. Defaults to 4.0.
-    radius
-        The radius of the Gaussian filter in data units. See Notes below. Defaults to
-        None.
+    radius : float or Sequence of floats or dict, optional
+        The radius of the Gaussian filter in data units. See Notes below. If specified,
+        the size of the kernel along each axis will be ``2*radius + 1``, and `truncate`
+        is ignored.
 
     Returns
     -------
@@ -367,30 +368,30 @@ def ndsavgol(
 
     Parameters
     ----------
-    arr
+    arr : array-like
         The input N-dimensional array to be filtered. The array will be cast to float64
         before filtering.
-    window_shape
+    window_shape : int or tuple of ints
         The shape of the window used for filtering. If an integer, the same size will be
         used across all axes.
-    polyorder
+    polyorder : int
         The order of the polynomial used to fit the samples. `polyorder` must be less
         than the minimum of `window_shape`.
-    deriv
+    deriv : int or tuple of ints
         The order of the derivative to compute given as a single integer or a tuple of
         integers. If an integer, the derivative of that order is computed along all
         axes. If a tuple of integers, the derivative of each order is computed along the
         corresponding dimension. The default is 0, which means to filter the data
         without differentiating.
-    delta
+    delta : float or tuple of floats
         The spacing of the samples to which the filter will be applied. If a float, the
         same value is used for all axes. If a tuple, the values are used in the same
         order as in `deriv`. The default is 1.0.
     mode
         Must be 'mirror', 'constant', 'nearest', or 'wrap'. This determines the type of
         extension to use for the padded signal to which the filter is applied.  When
         `mode` is 'constant', the padding value is given by `cval`.
-    cval
+    cval : float
         Value to fill past the edges of the input if `mode` is 'constant'. Default is
         0.0.
     method
@@ -524,8 +525,8 @@ def _calc_savgol(values_ptr, len_values, result, data) -> int:
 
 def gradient_magnitude(
     arr: npt.NDArray[np.float64],
-    dx: np.float64,
-    dy: np.float64,
+    dx: float,
+    dy: float,
     mode: str = "nearest",
     cval: float = 0.0,
 ) -> npt.NDArray[np.float64]: