lsst-dm · eacharles · Sep 24, 2024 · Jul 29, 2024 · Jul 30, 2024 · Jul 30, 2024
diff --git a/python/lsst/meas/pz/estimate_pz_task.py b/python/lsst/meas/pz/estimate_pz_task.py
diff --git a/python/lsst/meas/pz/estimate_pz_task_knn.py b/python/lsst/meas/pz/estimate_pz_task_knn.py
@@ -0,0 +1,60 @@
+# This file is part of meas_pz.
+#
+# Developed for the LSST Data Management System.
+# This product includes software developed by the LSST Project
+# (https://www.lsst.org).
+# See the COPYRIGHT file at the top-level directory of this distribution
+# for details of code ownership.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+__all__ = [
+    "EstimatePZKNNTask",
+    "EstimatePZKNNConfig",
+]
+
+
+from rail.estimation.algos.k_nearneigh import KNearNeighEstimator
+
+from .estimate_pz_task import EstimatePZAlgoConfigBase, EstimatePZAlgoTask
+
+
+class EstimatePZKNNConfig(EstimatePZAlgoConfigBase):
+    """Config for EstimatePZKNNTask
+
+    This will select and comnfigure the KNearNeighEstimator p(z)
+    estimation algorithm
+
+    See https://github.com/LSSTDESC/rail_sklearn/blob/main/src/rail/estimation/algos/k_nearneigh.py  # noqa
+    for parameters and default values.
+    """
+
+    estimator_class = KNearNeighEstimator
+
+
+EstimatePZKNNConfig._make_fields()
+
+
+class EstimatePZKNNTask(EstimatePZAlgoTask):
+    """SubTask that runs RAIL KNN algorithm for p(z) estimation
+
+    See https://github.com/LSSTDESC/rail_sklearn/blob/main/src/rail/estimation/algos/k_nearneigh.py  # noqa
+    for algorithm implementation.
+
+    KNN estimates the p(z) distribution by taking
+    a weighted mixture of the nearest neigheboors in
+    color space.
+    """
+
+    ConfigClass = EstimatePZKNNConfig
diff --git a/python/lsst/meas/pz/estimate_pz_task_trainz.py b/python/lsst/meas/pz/estimate_pz_task_trainz.py
@@ -0,0 +1,80 @@
+# This file is part of meas_pz.
+#
+# Developed for the LSST Data Management System.
+# This product includes software developed by the LSST Project
+# (https://www.lsst.org).
+# See the COPYRIGHT file at the top-level directory of this distribution
+# for details of code ownership.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+__all__ = [
+    "EstimatePZTrainZTask",
+    "EstimatePZTrainZConfig",
+]
+
+import numpy as np
+from pandas import DataFrame
+from rail.estimation.algos.train_z import TrainZEstimator
+
+from .estimate_pz_task import EstimatePZAlgoConfigBase, EstimatePZAlgoTask
+
+
+class EstimatePZTrainZConfig(EstimatePZAlgoConfigBase):
+    """Config for EstimatePZTrainZTask
+
+    This will select and comnfigure the TrainZEsimator p(z)
+    estimation algorithm
+
+    See https://github.com/LSSTDESC/rail_base/blob/main/src/rail/estimation/algos/train_z.py  # noqa
+    for parameters and default values.
+    """
+
+    estimator_class = TrainZEstimator
+
+
+EstimatePZTrainZConfig._make_fields()
+
+
+class EstimatePZTrainZTask(EstimatePZAlgoTask):
+    """SubTask that runs RAIL TrainZ algorithm for p(z) estimation
+
+    See https://github.com/LSSTDESC/rail_base/blob/main/src/rail/estimation/algos/train_z.py  # noqa
+    for algorithm implementation.
+
+    TrainZ is just a placeholder algorithm that assigns that same
+    p(z) distribution (taken from the input model file) to every object.
+    """
+
+    ConfigClass = EstimatePZTrainZConfig
+
+    def _get_mags_and_errs(
+        self,
+        fluxes: DataFrame,
+        mag_offset: float,
+    ) -> dict[str, np.array]:
+
+        flux_names = self._get_flux_names()
+        mag_names = self._get_mag_names()
+
+        mag_dict = {}
+        # loop over bands, make mags and mag errors and fill dict
+        for band in flux_names.keys():
+            fluxVals = fluxes[flux_names[band]]
+            mag_dict[mag_names[band]] = self._flux_to_mag(
+                fluxVals,
+                mag_offset,
+                99.0,
+            )
+        return mag_dict
diff --git a/python/lsst/meas/pz/model_formatter.py b/python/lsst/meas/pz/model_formatter.py
@@ -0,0 +1,63 @@
+# This file is part of meaz_pz
+#
+# Developed for the LSST Data Management System.
+# This product includes software developed by the LSST Project
+# (http://www.lsst.org).
+# See the COPYRIGHT file at the top-level directory of this distribution
+# for details of code ownership.
+#
+# This software is dual licensed under the GNU General Public License and also
+# under a 3-clause BSD license. Recipients may choose which of these licenses
+# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
+# respectively.  If you choose the GPL option then the following text applies
+# (but note that there is still no warranty even if you opt for BSD instead):
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+__all__ = ("ModelFormatter",)
+
+from typing import Any
+
+from rail.core.model import Model as RailModel
+from lsst.daf.butler import FormatterV2
+from lsst.resources import ResourcePath
+
+
+class ModelFormatter(FormatterV2):
+    """Read and write `rail.core.model.Model` objects.
+
+    Currently assumes only local file reads are possible.
+    """
+
+    supported_write_parameters = frozenset({"format"})
+    supported_extensions = frozenset({".pickle"})
+    can_read_from_local_file = True
+
+    def get_write_extension(self) -> str:
+        # Default to hdf5 but allow configuration via write parameter
+        format = self.write_parameters.get("format", "pickle")
+        if format == "pickle":
+            return ".pickle"
+        # Other supported formats can be added here
+        raise RuntimeError(
+            f"Requested file format '{format}' is not supported for PZModel"
+        )
+
+    def read_from_local_file(
+        self, path: str, component: str | None = None, expected_size: int = -1
+    ) -> Any:
+        return RailModel.read(path)  # type: ignore
+
+    def write_local_file(self, in_memory_dataset: Any, uri: ResourcePath) -> None:
+        in_memory_dataset.write(uri.ospath)
diff --git a/python/lsst/meas/pz/qp_formatter.py b/python/lsst/meas/pz/qp_formatter.py
@@ -0,0 +1,65 @@
+# This file is part of meaz_pz
+#
+# Developed for the LSST Data Management System.
+# This product includes software developed by the LSST Project
+# (http://www.lsst.org).
+# See the COPYRIGHT file at the top-level directory of this distribution
+# for details of code ownership.
+#
+# This software is dual licensed under the GNU General Public License and also
+# under a 3-clause BSD license. Recipients may choose which of these licenses
+# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
+# respectively.  If you choose the GPL option then the following text applies
+# (but note that there is still no warranty even if you opt for BSD instead):
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+__all__ = ("QPFormatter",)
+
+from typing import Any
+
+import qp
+from lsst.daf.butler import FormatterV2
+from lsst.resources import ResourcePath
+
+
+class QPFormatter(FormatterV2):
+    """Read and write `qp.Ensemble` objects.
+
+    Currently assumes only local file reads are possible.
+    """
+
+    supported_write_parameters = frozenset({"format"})
+    supported_extensions = frozenset({".hdf5", ".fits"})
+    can_read_from_local_file = True
+
+    def get_write_extension(self) -> str:
+        # Default to hdf5 but allow configuration via write parameter
+        format = self.write_parameters.get("format", "hdf5")
+        if format == "hdf5":
+            return ".hdf5"
+        if format == "fits":
+            return ".fits"
+        # Other supported formats can be added here
+        raise RuntimeError(
+            f"Requested file format '{format}' is not supported for Table"
+        )
+
+    def read_from_local_file(
+        self, path: str, component: str | None = None, expected_size: int = -1
+    ) -> Any:
+        return qp.read(path)  # type: ignore
+
+    def write_local_file(self, in_memory_dataset: Any, uri: ResourcePath) -> None:
+        in_memory_dataset.write_to(uri.ospath)
diff --git a/tests/cleanup_script b/tests/cleanup_script
@@ -0,0 +1,13 @@
+butler remove-collections --no-confirm ../ci_hsc_gen3/DATA u/testing/pz_rail_testing
+butler remove-runs --no-confirm ../ci_hsc_gen3/DATA "u/testing/pz_rail_testing*"
+butler remove-runs --no-confirm ../ci_hsc_gen3/DATA "u/testing/pz_models"
+butler remove-dataset-type ../ci_hsc_gen3/DATA/ pz_estimate_knn
+butler remove-dataset-type ../ci_hsc_gen3/DATA/ pz_knn_config
+butler remove-dataset-type ../ci_hsc_gen3/DATA/ pz_knn_log
+butler remove-dataset-type ../ci_hsc_gen3/DATA/ pz_knn_metadata
+butler remove-dataset-type ../ci_hsc_gen3/DATA/ pz_estimate_trainz
+butler remove-dataset-type ../ci_hsc_gen3/DATA/ pz_trainz_config
+butler remove-dataset-type ../ci_hsc_gen3/DATA/ pz_trainz_log
+butler remove-dataset-type ../ci_hsc_gen3/DATA/ pz_trainz_metadata
+butler remove-dataset-type ../ci_hsc_gen3/DATA/ pzModel_knn
+butler remove-dataset-type ../ci_hsc_gen3/DATA/ pzModel_trainz
diff --git a/tests/model_inform_train_z_wrap.pickle b/tests/model_inform_train_z_wrap.pickle
diff --git a/tests/model_table_knn_hsc.csv b/tests/model_table_knn_hsc.csv
@@ -0,0 +1,3 @@
+file,instrument
+tests/model_inform_knn_hsc_wrap.pickle,HSC
+
diff --git a/tests/model_table_knn_lsst.csv b/tests/model_table_knn_lsst.csv
@@ -0,0 +1,3 @@
+file,instrument
+tests/model_inform_knn_lsst_wrap.pickle,LSST
+
diff --git a/tests/model_table_train_z.csv b/tests/model_table_train_z.csv
@@ -0,0 +1,3 @@
+file,instrument
+tests/model_inform_train_z_wrap.pickle,HSC
+
diff --git a/tests/pz_pipeline_hsc.yaml b/tests/pz_pipeline_hsc.yaml
@@ -0,0 +1,41 @@
+description: |
+  Photo-z madness
+tasks:
+  pz_trainz:
+    class: lsst.meas.pz.estimate_pz_task.EstimatePZTask
+    config:
+      connections.pzModel: 'pzModel_trainz'
+      connections.pzEnsemble: 'pz_estimate_trainz'
+      python: |
+         from lsst.meas.pz.estimate_pz_task_trainz import EstimatePZTrainZTask
+         config.pz_algo.retarget(EstimatePZTrainZTask)
+         config.pz_algo.stage_name='trainz'
+         config.pz_algo.output_mode='return'
+         config.pz_algo.band_a_env=dict(i=2.06)
+  pz_knn:
+    class: lsst.meas.pz.estimate_pz_task.EstimatePZTask
+    config:
+      connections.pzModel: 'pzModel_knn'
+      connections.pzEnsemble: 'pz_estimate_knn'
+      python: |
+         from lsst.meas.pz.estimate_pz_task_knn import EstimatePZKNNTask
+         config.pz_algo.retarget(EstimatePZKNNTask)
+         config.pz_algo.stage_name='knn'
+         config.pz_algo.output_mode='return'
+         config.pz_algo.bands=['mag_g_lsst','mag_r_lsst','mag_i_lsst','mag_z_lsst','mag_y_lsst']
+         config.pz_algo.ref_band='mag_i_lsst'
+         config.pz_algo.band_a_env=dict(g=3.64,r=2.70,i=2.06,z=1.58,y=1.31)
+subsets:
+  all_pz:
+    subset:
+      - pz_trainz
+      - pz_knn
+    description: |
+      All of the photoz algorithms
+# run me with
+# pipetask run
+#  -b $CI_HSC_GEN3_DIR/DATA
+#  -i HSC/runs/ci_hsc
+#  -o u/echarles/pz_rail_testing
+#  -p "${MEAS_PZ_DIR}/tests/pz_pipeline.yaml"
+#  -d "skymap='discrete/ci_hsc' AND tract=0 AND patch=69"