lsst-dm · eacharles · Sep 24, 2024 · Jul 29, 2024 · Jul 30, 2024 · Jul 30, 2024
diff --git a/python/lsst/meas/pz/estimate_pz_task.py b/python/lsst/meas/pz/estimate_pz_task.py
diff --git a/python/lsst/meas/pz/estimate_pz_task_knn.py b/python/lsst/meas/pz/estimate_pz_task_knn.py
@@ -0,0 +1,98 @@
+# This file is part of meas_pz.
+#
+# Developed for the LSST Data Management System.
+# This product includes software developed by the LSST Project
+# (https://www.lsst.org).
+# See the COPYRIGHT file at the top-level directory of this distribution
+# for details of code ownership.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+__all__ = [
+    "EstimatePZKNNAlgoConfig",
+    "EstimatePZKNNAlgoTask",
+    "EstimatePZKNNTask",
+    "EstimatePZKNNConfig",
+]
+
+from rail.estimation.estimator import CatEstimator
+from rail.estimation.algos.k_nearneigh import KNearNeighEstimator
+
+from .estimate_pz_task import (
+    EstimatePZAlgoConfigBase,
+    EstimatePZAlgoTask,
+    EstimatePZTaskConfig,
+    EstimatePZTask,
+)
+
+
+class EstimatePZKNNAlgoConfig(EstimatePZAlgoConfigBase):
+    """Config for EstimatePZAlgoKNNTask
+
+    This will select and configure the KNearNeighEstimator p(z)
+    estimation algorithm
+
+    See https://github.com/LSSTDESC/rail_sklearn/blob/main/src/rail/estimation/algos/k_nearneigh.py  # noqa
+    for parameters and default values.
+    """
+
+    @classmethod
+    def estimator_class(cls) -> type[CatEstimator]:
+        return KNearNeighEstimator
+
+
+EstimatePZKNNAlgoConfig._make_fields()
+
+
+class EstimatePZKNNAlgoTask(EstimatePZAlgoTask):
+    """SubTask that runs RAIL KNN algorithm for p(z) estimation
+
+    See https://github.com/LSSTDESC/rail_sklearn/blob/main/src/rail/estimation/algos/k_nearneigh.py  # noqa
+    for algorithm implementation.
+
+    KNN estimates the p(z) distribution by taking
+    a weighted mixture of the nearest neigheboors in
+    color space.
+    """
+
+    ConfigClass = EstimatePZKNNAlgoConfig
+    _DefaultName = "estimatePZKNNAlgo"
+
+
+class EstimatePZKNNConfig(EstimatePZTaskConfig):
+    """Config for EstimatePZKNNTask
+
+    Overrides setDefaults to use KNN algorithm
+    """
+
+    def setDefaults(self):
+        self.pz_algo.retarget(EstimatePZKNNAlgoTask)
+        self.pz_algo.stage_name = "knn"
+        self.pz_algo.output_mode = "return"
+        self.pz_algo.bands = [
+            "mag_g_lsst",
+            "mag_r_lsst",
+            "mag_i_lsst",
+            "mag_z_lsst",
+            "mag_y_lsst",
+        ]
+        self.pz_algo.ref_band = "mag_i_lsst"
+        self.pz_algo.band_a_env = dict(g=3.64, r=2.70, i=2.06, z=1.58, y=1.31)
+
+
+class EstimatePZKNNTask(EstimatePZTask):
+    """Task that runs RAIL KNN algorithm for p(z) estimation"""
+
+    ConfigClass = EstimatePZKNNConfig
+    _DefaultName = "estimatePZKNN"
diff --git a/python/lsst/meas/pz/estimate_pz_task_trainz.py b/python/lsst/meas/pz/estimate_pz_task_trainz.py
@@ -0,0 +1,111 @@
+# This file is part of meas_pz.
+#
+# Developed for the LSST Data Management System.
+# This product includes software developed by the LSST Project
+# (https://www.lsst.org).
+# See the COPYRIGHT file at the top-level directory of this distribution
+# for details of code ownership.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+__all__ = [
+    "EstimatePZTrainZAlgoConfig",
+    "EstimatePZTrainZAlgoTask",
+    "EstimatePZTrainZTask",
+    "EstimatePZTrainZConfig",
+]
+
+import numpy as np
+from astropy.table import Table
+from rail.estimation.estimator import CatEstimator
+from rail.estimation.algos.train_z import TrainZEstimator
+
+from .estimate_pz_task import (
+    EstimatePZAlgoConfigBase,
+    EstimatePZAlgoTask,
+    EstimatePZTaskConfig,
+    EstimatePZTask,
+)
+
+
+class EstimatePZTrainZAlgoConfig(EstimatePZAlgoConfigBase):
+    """Config for EstimatePZTrainZAlgoTask
+
+    This will select and comnfigure the TrainZEsimator p(z)
+    estimation algorithm
+
+    See https://github.com/LSSTDESC/rail_base/blob/main/src/rail/estimation/algos/train_z.py  # noqa
+    for parameters and default values.
+    """
+
+    @classmethod
+    def estimator_class(cls) -> type[CatEstimator]:
+        return TrainZEstimator
+
+
+EstimatePZTrainZAlgoConfig._make_fields()
+
+
+class EstimatePZTrainZAlgoTask(EstimatePZAlgoTask):
+    """SubTask that runs RAIL TrainZ algorithm for p(z) estimation
+
+    See https://github.com/LSSTDESC/rail_base/blob/main/src/rail/estimation/algos/train_z.py  # noqa
+    for algorithm implementation.
+
+    TrainZ is just a placeholder algorithm that assigns that same
+    p(z) distribution (taken from the input model file) to every object.
+    """
+
+    ConfigClass = EstimatePZTrainZAlgoConfig
+    _DefaultName = "estimatePZTrainZAlgo"
+
+    def _get_mags_and_errs(
+        self,
+        fluxes: Table,
+        mag_offset: float,
+    ) -> dict[str, np.array]:
+
+        flux_names = self._get_flux_names()
+        mag_names = self._get_mag_names()
+
+        mag_dict = {}
+        # loop over bands, make mags and mag errors and fill dict
+        for band, band_name in flux_names.items():
+            fluxVals = fluxes[band_name]
+            mag_dict[mag_names[band]] = self._flux_to_mag(
+                fluxVals,
+                mag_offset,
+                99.0,
+            )
+        return mag_dict
+
+
+class EstimatePZTrainZConfig(EstimatePZTaskConfig):
+    """Config for EstimatePZTrainZTask
+
+    Overrides setDefaults to use TrainZ algorithm
+    """
+
+    def setDefaults(self):
+        self.pz_algo.retarget(EstimatePZTrainZAlgoTask)
+        self.pz_algo.stage_name = "trainz"
+        self.pz_algo.output_mode = "return"
+        self.pz_algo.band_a_env = dict(i=2.06)
+
+
+class EstimatePZTrainZTask(EstimatePZTask):
+    """Task that runs RAIL TrainZ algorithm for p(z) estimation"""
+
+    ConfigClass = EstimatePZTrainZConfig
+    _DefaultName = "estimatePZTrainZ"
diff --git a/python/lsst/meas/pz/model_formatter.py b/python/lsst/meas/pz/model_formatter.py
@@ -0,0 +1,63 @@
+# This file is part of meaz_pz
+#
+# Developed for the LSST Data Management System.
+# This product includes software developed by the LSST Project
+# (http://www.lsst.org).
+# See the COPYRIGHT file at the top-level directory of this distribution
+# for details of code ownership.
+#
+# This software is dual licensed under the GNU General Public License and also
+# under a 3-clause BSD license. Recipients may choose which of these licenses
+# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
+# respectively.  If you choose the GPL option then the following text applies
+# (but note that there is still no warranty even if you opt for BSD instead):
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+__all__ = ("ModelFormatter",)
+
+from typing import Any
+
+from rail.core.model import Model as RailModel
+from lsst.daf.butler import FormatterV2
+from lsst.resources import ResourcePath
+
+
+class ModelFormatter(FormatterV2):
+    """Read and write `rail.core.model.Model` objects.
+
+    Currently assumes only local file reads are possible.
+    """
+
+    supported_write_parameters = frozenset({"format"})
+    supported_extensions = frozenset({".pickle"})
+    can_read_from_local_file = True
+
+    def get_write_extension(self) -> str:
+        # Default to hdf5 but allow configuration via write parameter
+        format = self.write_parameters.get("format", "pickle")
+        if format == "pickle":
+            return ".pickle"
+        # Other supported formats can be added here
+        raise RuntimeError(
+            f"Requested file format '{format}' is not supported for PZModel"
+        )
+
+    def read_from_local_file(
+        self, path: str, component: str | None = None, expected_size: int = -1
+    ) -> Any:
+        return RailModel.read(path)  # type: ignore
+
+    def write_local_file(self, in_memory_dataset: Any, uri: ResourcePath) -> None:
+        in_memory_dataset.write(uri.ospath)
diff --git a/python/lsst/meas/pz/qp_formatter.py b/python/lsst/meas/pz/qp_formatter.py
@@ -0,0 +1,65 @@
+# This file is part of meaz_pz
+#
+# Developed for the LSST Data Management System.
+# This product includes software developed by the LSST Project
+# (http://www.lsst.org).
+# See the COPYRIGHT file at the top-level directory of this distribution
+# for details of code ownership.
+#
+# This software is dual licensed under the GNU General Public License and also
+# under a 3-clause BSD license. Recipients may choose which of these licenses
+# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
+# respectively.  If you choose the GPL option then the following text applies
+# (but note that there is still no warranty even if you opt for BSD instead):
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+__all__ = ("QPFormatter",)
+
+from typing import Any
+
+import qp
+from lsst.daf.butler import FormatterV2
+from lsst.resources import ResourcePath
+
+
+class QPFormatter(FormatterV2):
+    """Read and write `qp.Ensemble` objects.
+
+    Currently assumes only local file reads are possible.
+    """
+
+    supported_write_parameters = frozenset({"format"})
+    supported_extensions = frozenset({".hdf5", ".fits"})
+    can_read_from_local_file = True
+
+    def get_write_extension(self) -> str:
+        # Default to hdf5 but allow configuration via write parameter
+        format = self.write_parameters.get("format", "hdf5")
+        if format == "hdf5":
+            return ".hdf5"
+        if format == "fits":
+            return ".fits"
+        # Other supported formats can be added here
+        raise RuntimeError(
+            f"Requested file format '{format}' is not supported for Table"
+        )
+
+    def read_from_local_file(
+        self, path: str, component: str | None = None, expected_size: int = -1
+    ) -> Any:
+        return qp.read(path)  # type: ignore
+
+    def write_local_file(self, in_memory_dataset: Any, uri: ResourcePath) -> None:
+        in_memory_dataset.write_to(uri.ospath)
diff --git a/tests/cleanup.sh b/tests/cleanup.sh
@@ -0,0 +1,14 @@
+#!/usr/bin/env bash
+butler remove-collections --no-confirm ../ci_hsc_gen3/DATA u/testing/pz_rail_testing
+butler remove-runs --no-confirm ../ci_hsc_gen3/DATA "u/testing/pz_rail_testing*"
+butler remove-runs --no-confirm ../ci_hsc_gen3/DATA "u/testing/pz_models"
+butler remove-dataset-type ../ci_hsc_gen3/DATA/ pz_estimate_knn
+butler remove-dataset-type ../ci_hsc_gen3/DATA/ pz_knn_config
+butler remove-dataset-type ../ci_hsc_gen3/DATA/ pz_knn_log
+butler remove-dataset-type ../ci_hsc_gen3/DATA/ pz_knn_metadata
+butler remove-dataset-type ../ci_hsc_gen3/DATA/ pz_estimate_trainz
+butler remove-dataset-type ../ci_hsc_gen3/DATA/ pz_trainz_config
+butler remove-dataset-type ../ci_hsc_gen3/DATA/ pz_trainz_log
+butler remove-dataset-type ../ci_hsc_gen3/DATA/ pz_trainz_metadata
+butler remove-dataset-type ../ci_hsc_gen3/DATA/ pzModel_knn
+butler remove-dataset-type ../ci_hsc_gen3/DATA/ pzModel_trainz
diff --git a/tests/data/model_inform_train_z_wrap.pickle b/tests/data/model_inform_train_z_wrap.pickle
diff --git a/tests/data/model_table_knn_hsc.csv b/tests/data/model_table_knn_hsc.csv
@@ -0,0 +1,3 @@
+file,instrument
+tests/data/model_inform_knn_hsc_wrap.pickle,HSC
+
diff --git a/tests/data/model_table_knn_lsst.csv b/tests/data/model_table_knn_lsst.csv
@@ -0,0 +1,3 @@
+file,instrument
+tests/data/model_inform_knn_lsst_wrap.pickle,LSST
+
diff --git a/tests/data/model_table_train_z.csv b/tests/data/model_table_train_z.csv
@@ -0,0 +1,3 @@
+file,instrument
+tests/data/model_inform_train_z_wrap.pickle,HSC
+
diff --git a/tests/data/pz_pipeline_hsc.yaml b/tests/data/pz_pipeline_hsc.yaml
@@ -0,0 +1,20 @@
+description: |
+  Photo-z madness
+tasks:
+  pz_trainz:
+    class: lsst.meas.pz.estimate_pz_task_trainz.EstimatePZTrainZTask
+    config:
+      connections.pzModel: 'pzModel_trainz'
+      connections.pzEnsemble: 'pz_estimate_trainz'
+  pz_knn:
+    class: lsst.meas.pz.estimate_pz_task_knn.EstimatePZKNNTask
+    config:
+      connections.pzModel: 'pzModel_knn'
+      connections.pzEnsemble: 'pz_estimate_knn'
+subsets:
+  all_pz:
+    subset:
+      - pz_trainz
+      - pz_knn
+    description: |
+      All of the photoz algorithms