From 1b76f9f195cf6a0732dff27c6c3c56ebb530df9d Mon Sep 17 00:00:00 2001 From: Joe Zuntz Date: Mon, 15 Jul 2024 12:01:33 +0100 Subject: [PATCH 1/8] Add mock catalog type requiring no calibration, and add example pipeline of just 2 stages --- examples/mock_shear/config.yml | 17 +++++ examples/mock_shear/pipeline.yml | 48 ++++++++++++++ txpipe/calibrate.py | 1 + txpipe/source_selector.py | 48 ++++++++++++++ txpipe/utils/calibration_tools.py | 101 +++++++++++++++++++++++++++++- txpipe/utils/calibrators.py | 2 +- 6 files changed, 215 insertions(+), 2 deletions(-) create mode 100644 examples/mock_shear/config.yml create mode 100644 examples/mock_shear/pipeline.yml diff --git a/examples/mock_shear/config.yml b/examples/mock_shear/config.yml new file mode 100644 index 000000000..bffa66536 --- /dev/null +++ b/examples/mock_shear/config.yml @@ -0,0 +1,17 @@ +global: + chunk_rows: 100000 + pixelization: healpix + nside: 64 + sparse: True # Generate sparse maps - faster if using small areas + + +TXSourceSelectorSimple: + input_pz: False + true_z: False + bands: riz + T_cut: 0.5 + s2n_cut: 10.0 + max_rows: 1000 + delta_gamma: 0.02 + source_zbin_edges: [0.5, 0.7, 0.9, 1.1, 2.0] + shear_prefix: "" diff --git a/examples/mock_shear/pipeline.yml b/examples/mock_shear/pipeline.yml new file mode 100644 index 000000000..508f5e8e9 --- /dev/null +++ b/examples/mock_shear/pipeline.yml @@ -0,0 +1,48 @@ + +# Stages to run +stages: + - name: TXSourceSelectorSimple # select and split objects into source bins + - name: TXShearCalibration # Calibrate and split the source sample tomographically + + + +# modules and packages to import that have pipeline +# stages defined in them +modules: > + txpipe + +# where to find any modules that are not in this repo, +# and any other code we need. +python_paths: + - submodules/WLMassMap/python/desc/ + +# Where to put outputs +output_dir: data/example/mock_shear_outputs + +# How to run the pipeline: mini, parsl, or cwl +launcher: + name: mini + interval: 1.0 + +# Where to run the pipeline: cori-interactive, cori-batch, or local +site: + name: local + max_threads: 2 + +# configuration settings +config: examples/mock_shear/config.yml + +# These are overall inputs to the whole pipeline, not generated within it +inputs: + # See README for paths to download these files + shear_catalog: data/example/inputs/simple_shear_catalog.hdf5 + calibration_table: data/example/inputs/sample_cosmodc2_w10year_errors.dat + + +# if supported by the launcher, restart the pipeline where it left off +# if interrupted +resume: True +# where to put output logs for individual stages +log_dir: data/example/mock_shear_logs +# where to put an overall parsl pipeline log +pipeline_log: data/example/mock_shear_log.txt diff --git a/txpipe/calibrate.py b/txpipe/calibrate.py index 0ec88e3b2..68e9a50d3 100644 --- a/txpipe/calibrate.py +++ b/txpipe/calibrate.py @@ -62,6 +62,7 @@ def run(self): # is needed tomo_file = self.get_input("shear_tomography_catalog") cals, cal2d = Calibrator.load(tomo_file, null=use_true) + print("Using calibration method:", cal2d.__class__.__name__) # The catalog columns are named differently in different cases #  Get the correct shear catalogs diff --git a/txpipe/source_selector.py b/txpipe/source_selector.py index dc2c90bd8..82d1e19ca 100755 --- a/txpipe/source_selector.py +++ b/txpipe/source_selector.py @@ -17,12 +17,14 @@ LensfitCalculator, HSCCalculator, MetaDetectCalculator, + MockCalculator, ) from .utils.calibrators import ( MetaCalibrator, MetaDetectCalibrator, LensfitCalibrator, HSCCalibrator, + NullCalibrator, ) from .binning import build_tomographic_classifier, apply_classifier import numpy as np @@ -791,6 +793,52 @@ def compute_output_stats(self, calculator, mean, variance): return BinStats(N, Neff, mean_e, sigma_e, calibrator) +class TXSourceSelectorSimple(TXSourceSelectorBase): + """ + Source selection and tomography for mock catalogs that do not + require any calibration. + """ + name = "TXSourceSelectorSimple" + config_options = TXSourceSelectorBase.config_options.copy() + + def data_iterator(self): + chunk_rows = self.config["chunk_rows"] + bands = self.config["bands"] + + # Select columns we need. + shear_cols = [ + "psf_T_mean", + "weight", + "flags", + "T", + "s2n", + "g1", + "g2", + "weight", + ] + shear_cols += band_variants(bands, "mag", "mag_err", shear_catalog_type="hsc") + if self.config["input_pz"]: + shear_cols += ["mean_z"] + elif self.config["true_z"]: + shear_cols += ["redshift_true"] + + # Iterate using parent class method + return self.iterate_hdf("shear_catalog", "shear", shear_cols, chunk_rows) + + def setup_response_calculators(self, nbin_source): + calculators = [MockCalculator(self.select) for i in range(nbin_source)] + calculators.append(MockCalculator(self.select_2d)) + return calculators + + def compute_output_stats(self, calculator, mean, variance): + # Collate calibration values + N, Neff = calculator.collect(self.comm, allgather=True) + calibrator = NullCalibrator(mean) + mean_e = mean.copy() + sigma_e = np.sqrt(0.5 * (variance[0] + variance[1])) + return BinStats(N, Neff, mean_e, sigma_e, calibrator) + + class TXSourceSelectorHSC(TXSourceSelectorBase): """ diff --git a/txpipe/utils/calibration_tools.py b/txpipe/utils/calibration_tools.py index 9da47890b..0e857d356 100755 --- a/txpipe/utils/calibration_tools.py +++ b/txpipe/utils/calibration_tools.py @@ -13,7 +13,6 @@ def read_shear_catalog_type(stage): with stage.open_input("shear_catalog", wrapper=True) as f: shear_catalog_type = f.catalog_type stage.config["shear_catalog_type"] = shear_catalog_type - print('Temporarily setting cat_type to: ', f.catalog_type) return shear_catalog_type @@ -715,6 +714,106 @@ def collect(self, comm=None, allgather=False): _, R = self.R.collect(comm, mode) _, K = self.K.collect(comm, mode) return R, K, count, sum_weights**2/sum_weights_sq + +class MockCalculator: + """ + This class calculates tomographic statistics for mock catalogs + where no calibration is necessary. + + It only accumulates the statistics of the selected object weights + instead of any calibration quantities like its sibling classes. + + """ + + def __init__(self, selector): + """ + Initialize the Calibrator using the function you will use to select + objects. That function should take at least one argument, + the chunk of data to select on. + + The selector can take further *args and **kwargs, passed in when adding + data. + + Parameters + ---------- + selector: function + Function that selects objects + """ + self.selector = selector + self.count = 0 + self.sum_weights = 0 + self.sum_weights_sq = 0 + + + def add_data(self, data, *args, **kwargs): + """Select objects from a new chunk of data and tally their responses + + Parameters + ---------- + data: dict + Dictionary of data columns to select on and add + + *args + Positional arguments to be passed to the selection function + **kwargs + Keyword arguments to be passed to the selection function + + """ + data = _DataWrapper(data, "") + sel = self.selector(data, *args, **kwargs) + + # Extract the calibration quantities for the selected objects + w = data["weight"] + n = w[sel].size + self.count += n + w = w[sel] + self.sum_weights += np.sum(w) + self.sum_weights_sq += np.sum(w) + + return sel + + def collect(self, comm=None, allgather=False): + """ + Finalize and sum up all the response values, returning calibration + quantities. + + Parameters + ---------- + comm: MPI Communicator + If supplied, all processors response values will be combined together. + All processes will return the same final value + + Returns + ------- + N: int + Total object count + + Neff: float + Total effective number of galaxies + + + """ + # The total number of objects is just the + # number from all the processes summed together. + if comm is not None: + if allgather: + count = comm.allreduce(self.count) + sum_weights = comm.allreduce(self.sum_weights) + sum_weights_sq = comm.allreduce(self.sum_weights_sq) + + else: + count = comm.reduce(self.count) + sum_weights = comm.reduce(self.sum_weights) + sum_weights_sq = comm.reduce(self.sum_weights_sq) + else: + count = self.count + sum_weights = self.sum_weights + sum_weights_sq = self.sum_weights_sq + # Collect the weighted means of these numbers. + # this collects all the values from the different + # processes and over all the chunks of data + mode = "allgather" if allgather else "gather" + return count, sum_weights**2/sum_weights_sq class MeanShearInBins: diff --git a/txpipe/utils/calibrators.py b/txpipe/utils/calibrators.py index 590eddf48..e2f448c9e 100644 --- a/txpipe/utils/calibrators.py +++ b/txpipe/utils/calibrators.py @@ -48,7 +48,7 @@ def load(cls, tomo_file, null=False): cat_type = f["tomography"].attrs["catalog_type"] # choose a subclass based on this - if null: + if null or cat_type == "simple": subcls = NullCalibrator elif cat_type == "metacal": subcls = MetaCalibrator From 5ab179e1d452493e1a000c8dde547b4c2f84dc3a Mon Sep 17 00:00:00 2001 From: Joe Zuntz Date: Mon, 15 Jul 2024 13:32:36 +0000 Subject: [PATCH 2/8] WIP on ingest --- examples/mock_shear/config.yml | 6 +-- txpipe/ingest/mocks.py | 67 +++++++++++++++++++++++++++++++++- txpipe/source_selector.py | 3 +- 3 files changed, 71 insertions(+), 5 deletions(-) diff --git a/examples/mock_shear/config.yml b/examples/mock_shear/config.yml index bffa66536..d08d805e2 100644 --- a/examples/mock_shear/config.yml +++ b/examples/mock_shear/config.yml @@ -2,12 +2,12 @@ global: chunk_rows: 100000 pixelization: healpix nside: 64 - sparse: True # Generate sparse maps - faster if using small areas + sparse: true TXSourceSelectorSimple: - input_pz: False - true_z: False + input_pz: false + true_z: true bands: riz T_cut: 0.5 s2n_cut: 10.0 diff --git a/txpipe/ingest/mocks.py b/txpipe/ingest/mocks.py index 24eac94f7..980f44a2b 100755 --- a/txpipe/ingest/mocks.py +++ b/txpipe/ingest/mocks.py @@ -1,5 +1,5 @@ from ..base_stage import PipelineStage -from ..data_types import ShearCatalog, HDFFile +from ..data_types import ShearCatalog, HDFFile, TextFile from ..utils import ( band_variants, metacal_variants, @@ -1216,6 +1216,70 @@ def generate_mock_metacal_mag_responses(bands, nobj): return mag_responses +class TXSimpleMock: + """ + Load an ascii astropy table and put it in shear catalog format. + """ + inputs = [("mock_shear_catalog", TextFile)] + outputs = [("shear_catalog", ShearCatalog)] + config_options = { + "mock_size_snr": False, + } + def run(self): + from astropy.table import Table + import numpy as np + + # Load the data. We are assuming here it is small enough to fit in memory + input_filename = self.get_input("mock_shear_catalog") + input_data = Table.read(input_filename, format="ascii") + n = len(input_data) + + data = {} + # required columns + for col in ["ra", "dec", "g1", "g2", "s2n", "T"]: + data[col] = input_data[col] + + # It's most likely we will have a redshift column. + # Check for both that and "redshift_true" + if "redshift" in input_data.colnames: + data["redshift_true"] = input_data["redshift"] + elif "redshift_true" in input_data.colnames: + data["redshift_true"] = input_data["redshift_true"] + + # If there is an ID column then use it, but otherwise just use + # sequential IDs + if "id" in input_data.colnames: + data["galaxy_id"] = input_data["id"] + else: + data["galaxy_id"] = np.arange(len(input_data)) + + # if these catalogs are not present then we fake them. + defaults = { + "T_err": 0.0, + "psf_g1": 0.0, + "psf_g2": 0.0, + "psf_T_mean": 0.202, # this corresponds to a FWHM of 0.75 arcsec + "weight": 1.0, + } + + for key, value in defaults.items(): + if key in input_data.colnames: + data[key] = input_data[key] + else: + data[key] = np.full(n, value) + + self.save_catalog(data) + + def save_catalog(self, data): + with self.open_output("shear_catalog") as f: + g = f.create_group("shear") + g.attrs["catalog_type"] = "simple" + for key, value in data.items(): + g.create_dataset(key, data=value) + + + + def test(): import pylab @@ -1232,3 +1296,4 @@ def test(): results = make_mock_photometry(n_visit, bands, data, True) pylab.hist(results["snr_r"], bins=50, histtype="step") pylab.savefig("snr_r.png") + diff --git a/txpipe/source_selector.py b/txpipe/source_selector.py index 82d1e19ca..86118b849 100755 --- a/txpipe/source_selector.py +++ b/txpipe/source_selector.py @@ -816,11 +816,12 @@ def data_iterator(self): "g2", "weight", ] - shear_cols += band_variants(bands, "mag", "mag_err", shear_catalog_type="hsc") if self.config["input_pz"]: shear_cols += ["mean_z"] elif self.config["true_z"]: shear_cols += ["redshift_true"] + else: + shear_cols += band_variants(bands, "mag", "mag_err", shear_catalog_type="hsc") # Iterate using parent class method return self.iterate_hdf("shear_catalog", "shear", shear_cols, chunk_rows) From 43138ded4e0674962260cae53fec14daaaab55b7 Mon Sep 17 00:00:00 2001 From: Joe Zuntz Date: Tue, 16 Jul 2024 13:32:48 +0100 Subject: [PATCH 3/8] Working mock shear pipeline --- .github/workflows/ci.yml | 41 ++++++++++++++++++- bin/generate-nfw-shear-cat.py | 70 ++++++++++++++++++++++++++++++++ examples/mock_shear/config.yml | 1 + examples/mock_shear/pipeline.yml | 8 ++-- txpipe/ingest/mocks.py | 4 +- 5 files changed, 118 insertions(+), 6 deletions(-) create mode 100644 bin/generate-nfw-shear-cat.py diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 3ea10d3e5..ceaec3d57 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -10,7 +10,7 @@ on: - cron: '0 12 * * 1' env: - EXAMPLE_DATA_FILE_VERSION: v2 + EXAMPLE_DATA_FILE_VERSION: v3 jobs: # Run a download step first so that it is in @@ -256,6 +256,45 @@ jobs: run: | tail -n +1 data/example/logs_redmagic/* + Mock_Shear_Pipeline: + runs-on: ubuntu-latest + + needs: Download_Data + + container: + image: ghcr.io/lsstdesc/txpipe:v0.9 + + steps: + - name: Checkout repository + uses: actions/checkout@v2 + with: + submodules: true + + - name: Restore example data + uses: actions/cache/restore@v3 + with: + path: example.tar.gz + # update this when we change package contents and want + # to force an update + key: example-data-${{ env.EXAMPLE_DATA_FILE_VERSION }} + fail-on-cache-miss: true + + - name: Extract test data + run: | + tar -zxvf example.tar.gz + + - name: Run mock shear pipeline + run: | + ceci examples/mock_shear/pipeline.yml + test -f data/example/outputs_mock/binned_shear_catalog.hdf5 + + - name: Show logs + if: ${{ always() }} + run: | + tail -n +1 data/example/logs_redmagic/* + + + Other_Pipeline_Dry_Runs: runs-on: ubuntu-latest diff --git a/bin/generate-nfw-shear-cat.py b/bin/generate-nfw-shear-cat.py new file mode 100644 index 000000000..076a03336 --- /dev/null +++ b/bin/generate-nfw-shear-cat.py @@ -0,0 +1,70 @@ +import argparse + + + +parser = argparse.ArgumentParser() +parser.add_argument("--output", type=str, default="simple_cat.txt") +parser.add_argument("--nmax", type=int, default=10000, help="Number of objects to generate, though cat size is reduced by redshift cut") +parser.add_argument("--mass", type=float, default=10.0e14, help="Cluster mass in Msun") +parser.add_argument("--cluster_z", type=float, default=0.22, help="Cluster redshift") +parser.add_argument("--concentration", type=float, default=4.0, help="Cluster concentration parameter") +parser.add_argument("--size", type=float, default=240.0, help="Size of the region in arcmin") +parser.add_argument("--mean-z", type=float, default=0.5, help="Mean redshift of the background galaxies") +parser.add_argument("--sigma-z", type=float, default=0.1, help="Redshift std dev of the background galaxies") +parser.add_argument("--mean-snr", type=float, default=20., help="Mean SNR") +parser.add_argument("--mean-size", type=float, default=0.3, help="Galaxy mean size^2 T parameter in arcsec") +parser.add_argument("--sigma-size", type=float, default=0.3, help="Galaxy std dev size^2 T parameter in arcsec") +args = parser.parse_args() + +mass = args.mass +cluster_z = args.cluster_z +concentration = args.concentration + + +import galsim +import numpy as np +from astropy.table import Table +import argparse + +nfw = galsim.NFWHalo(mass, concentration, cluster_z) + +half_size = args.size / 2 +xmin = -half_size +xmax = half_size +ymin = -half_size +ymax = half_size + +N = 10000 + +x = np.random.uniform(xmin, xmax, N) +y = np.random.uniform(ymin, ymax, N) +z = np.random.normal(args.mean_z, args.sigma_z, N) + +w = np.where(z > nfw.z) +x1 = x[w] +y1 = y[w] +z1 = z[w] +n = z1.size + +ra = np.zeros(n) + x1 / 3600 +dec = np.zeros(n) + y1 / 3600 +g1, g2 = nfw.getShear([x1, y1], z1, reduced=False) +s2n = np.random.exponential(args.mean_snr, size=n) +print(s2n.mean()) + +# This should give plenty of selected objects since we cut on T/Tpsf > 0.5 by default +# and Tpsf default is ~.2 +T = np.random.normal(args.mean_size, args.sigma_size, size=n).clip(0.01, np.inf) + +data = { + "ra": ra, + "dec": dec, + "g1": g1, + "g2": g2, + "s2n": s2n, + "T": T, + "redshift": z1, +} + +table = Table(data=data) +table.write(args.output, overwrite=True, format="ascii.commented_header") diff --git a/examples/mock_shear/config.yml b/examples/mock_shear/config.yml index d08d805e2..6e9d92e1c 100644 --- a/examples/mock_shear/config.yml +++ b/examples/mock_shear/config.yml @@ -15,3 +15,4 @@ TXSourceSelectorSimple: delta_gamma: 0.02 source_zbin_edges: [0.5, 0.7, 0.9, 1.1, 2.0] shear_prefix: "" + verbose: true \ No newline at end of file diff --git a/examples/mock_shear/pipeline.yml b/examples/mock_shear/pipeline.yml index 508f5e8e9..fb47230d7 100644 --- a/examples/mock_shear/pipeline.yml +++ b/examples/mock_shear/pipeline.yml @@ -1,6 +1,7 @@ # Stages to run stages: + - name: TXSimpleMock - name: TXSourceSelectorSimple # select and split objects into source bins - name: TXShearCalibration # Calibrate and split the source sample tomographically @@ -17,7 +18,7 @@ python_paths: - submodules/WLMassMap/python/desc/ # Where to put outputs -output_dir: data/example/mock_shear_outputs +output_dir: data/example/outputs_mock # How to run the pipeline: mini, parsl, or cwl launcher: @@ -34,8 +35,7 @@ config: examples/mock_shear/config.yml # These are overall inputs to the whole pipeline, not generated within it inputs: - # See README for paths to download these files - shear_catalog: data/example/inputs/simple_shear_catalog.hdf5 + mock_shear_catalog: data/example/inputs/mock_nfw_shear_catalog.txt calibration_table: data/example/inputs/sample_cosmodc2_w10year_errors.dat @@ -43,6 +43,6 @@ inputs: # if interrupted resume: True # where to put output logs for individual stages -log_dir: data/example/mock_shear_logs +log_dir: data/example/logs_mock # where to put an overall parsl pipeline log pipeline_log: data/example/mock_shear_log.txt diff --git a/txpipe/ingest/mocks.py b/txpipe/ingest/mocks.py index 980f44a2b..f27801e91 100755 --- a/txpipe/ingest/mocks.py +++ b/txpipe/ingest/mocks.py @@ -1216,10 +1216,11 @@ def generate_mock_metacal_mag_responses(bands, nobj): return mag_responses -class TXSimpleMock: +class TXSimpleMock(PipelineStage): """ Load an ascii astropy table and put it in shear catalog format. """ + name = "TXSimpleMock" inputs = [("mock_shear_catalog", TextFile)] outputs = [("shear_catalog", ShearCatalog)] config_options = { @@ -1260,6 +1261,7 @@ def run(self): "psf_g2": 0.0, "psf_T_mean": 0.202, # this corresponds to a FWHM of 0.75 arcsec "weight": 1.0, + "flags": 0, } for key, value in defaults.items(): From bf10d098cae431a6fc526c4c44d5cebb5a33b97a Mon Sep 17 00:00:00 2001 From: Joe Zuntz Date: Tue, 16 Jul 2024 13:36:03 +0100 Subject: [PATCH 4/8] no op --- examples/mock_shear/config.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/mock_shear/config.yml b/examples/mock_shear/config.yml index 6e9d92e1c..82d8bf68c 100644 --- a/examples/mock_shear/config.yml +++ b/examples/mock_shear/config.yml @@ -15,4 +15,4 @@ TXSourceSelectorSimple: delta_gamma: 0.02 source_zbin_edges: [0.5, 0.7, 0.9, 1.1, 2.0] shear_prefix: "" - verbose: true \ No newline at end of file + verbose: true From 96e18d117ba99640d9e372412f0649fe4d6a9e1e Mon Sep 17 00:00:00 2001 From: Joe Zuntz Date: Tue, 16 Jul 2024 13:39:44 +0100 Subject: [PATCH 5/8] Add note in comment --- examples/mock_shear/pipeline.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/mock_shear/pipeline.yml b/examples/mock_shear/pipeline.yml index fb47230d7..30bec5579 100644 --- a/examples/mock_shear/pipeline.yml +++ b/examples/mock_shear/pipeline.yml @@ -1,8 +1,8 @@ # Stages to run stages: - - name: TXSimpleMock - - name: TXSourceSelectorSimple # select and split objects into source bins + - name: TXSimpleMock # Convert a text file mock catalog to HDF5 + - name: TXSourceSelectorSimple # select and split objects into source bins - name: TXShearCalibration # Calibrate and split the source sample tomographically From f017bd8b4afb2a73392fe9f05b2cb582448a1226 Mon Sep 17 00:00:00 2001 From: Joe Zuntz Date: Tue, 16 Jul 2024 14:02:35 +0100 Subject: [PATCH 6/8] correct mock dir --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ceaec3d57..29352f9ea 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -291,7 +291,7 @@ jobs: - name: Show logs if: ${{ always() }} run: | - tail -n +1 data/example/logs_redmagic/* + tail -n +1 data/example/logs_mock/* From bd90e83dd3b7dd369d6e079c7416948d198b3d77 Mon Sep 17 00:00:00 2001 From: Joe Zuntz Date: Thu, 18 Jul 2024 16:17:16 +0100 Subject: [PATCH 7/8] add mock truth p(z) estimator and add to pipeline --- .github/workflows/ci.yml | 2 +- examples/mock_shear/config.yml | 5 +++++ examples/mock_shear/pipeline.yml | 5 +++++ txpipe/ingest/mocks.py | 35 +++++++++++++++++++++++++++++++- 4 files changed, 45 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 29352f9ea..8b8b1f6bd 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -10,7 +10,7 @@ on: - cron: '0 12 * * 1' env: - EXAMPLE_DATA_FILE_VERSION: v3 + EXAMPLE_DATA_FILE_VERSION: v4 jobs: # Run a download step first so that it is in diff --git a/examples/mock_shear/config.yml b/examples/mock_shear/config.yml index 82d8bf68c..a5993cda8 100644 --- a/examples/mock_shear/config.yml +++ b/examples/mock_shear/config.yml @@ -16,3 +16,8 @@ TXSourceSelectorSimple: source_zbin_edges: [0.5, 0.7, 0.9, 1.1, 2.0] shear_prefix: "" verbose: true + +TXMockTruthPZ: + mock_sigma_z: 0.001 + aliases: + photoz_pdfs: source_photoz_pdfs diff --git a/examples/mock_shear/pipeline.yml b/examples/mock_shear/pipeline.yml index 30bec5579..e57afa5e9 100644 --- a/examples/mock_shear/pipeline.yml +++ b/examples/mock_shear/pipeline.yml @@ -4,6 +4,8 @@ stages: - name: TXSimpleMock # Convert a text file mock catalog to HDF5 - name: TXSourceSelectorSimple # select and split objects into source bins - name: TXShearCalibration # Calibrate and split the source sample tomographically + - name: TXMockTruthPZ # Generate PDFs as narrow gaussian centered on the true redshifts + - name: CLClusterShearCatalogs # Extract and weight the shear catalog around every cluster @@ -37,6 +39,9 @@ config: examples/mock_shear/config.yml inputs: mock_shear_catalog: data/example/inputs/mock_nfw_shear_catalog.txt calibration_table: data/example/inputs/sample_cosmodc2_w10year_errors.dat + cluster_catalog: data/example/inputs/mock_single_cluster_catalog.hdf5 + fiducial_cosmology: data/fiducial_cosmology.yml + # if supported by the launcher, restart the pipeline where it left off diff --git a/txpipe/ingest/mocks.py b/txpipe/ingest/mocks.py index f27801e91..91ea09db7 100755 --- a/txpipe/ingest/mocks.py +++ b/txpipe/ingest/mocks.py @@ -1,5 +1,5 @@ from ..base_stage import PipelineStage -from ..data_types import ShearCatalog, HDFFile, TextFile +from ..data_types import ShearCatalog, HDFFile, TextFile, QPPDFFile from ..utils import ( band_variants, metacal_variants, @@ -1280,6 +1280,38 @@ def save_catalog(self, data): g.create_dataset(key, data=value) +class TXMockTruthPZ(PipelineStage): + name = "TXMockTruthPZ" + inputs = [("shear_catalog", ShearCatalog)] + outputs = [("photoz_pdfs", QPPDFFile)] + config_options = { + "mock_sigma_z": 0.001, + } + def run(self): + import qp + import numpy as np + sigma_z = self.config["mock_sigma_z"] + + + # read the input truth redshifts + with self.open_input("shear_catalog", wrapper=True) as f: + group = f.file[f.get_primary_catalog_group()] + n = group["ra"].size + redshifts = group["redshift_true"][:] + + zgrid = np.linspace(0, 3, 301) + pdfs = np.zeros((n, len(zgrid))) + + spread_z = sigma_z * (1 + redshifts) + # make a gaussian PDF for each object + delta = zgrid[np.newaxis, :] - redshifts[:, np.newaxis] + pdfs = np.exp(-0.5 * (delta / spread_z[:, np.newaxis])**2) / np.sqrt(2 * np.pi) / spread_z[:, np.newaxis] + + q = qp.Ensemble(qp.interp, data=dict(xvals=zgrid, yvals=pdfs)) + q.set_ancil(dict(zmode=redshifts, zmean=redshifts, zmedian=redshifts)) + q.write_to(self.get_output("photoz_pdfs")) + + def test(): @@ -1299,3 +1331,4 @@ def test(): pylab.hist(results["snr_r"], bins=50, histtype="step") pylab.savefig("snr_r.png") + From 6ccb0715a5e1fc8f8e41aeb49a5d0d56e2341507 Mon Sep 17 00:00:00 2001 From: Joe Zuntz Date: Thu, 18 Jul 2024 17:00:23 +0100 Subject: [PATCH 8/8] add script and bump example version --- .github/workflows/ci.yml | 2 +- bin/make_single_cluster_catalog.py | 24 ++++++++++++++++++++++++ 2 files changed, 25 insertions(+), 1 deletion(-) create mode 100644 bin/make_single_cluster_catalog.py diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 8b8b1f6bd..3fcb499b0 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -10,7 +10,7 @@ on: - cron: '0 12 * * 1' env: - EXAMPLE_DATA_FILE_VERSION: v4 + EXAMPLE_DATA_FILE_VERSION: v5 jobs: # Run a download step first so that it is in diff --git a/bin/make_single_cluster_catalog.py b/bin/make_single_cluster_catalog.py new file mode 100644 index 000000000..c4e6d901d --- /dev/null +++ b/bin/make_single_cluster_catalog.py @@ -0,0 +1,24 @@ +import numpy as np +import h5py +ra = np.array([0.0]) +dec = np.array([0.0]) +z = np.array([0.22]) +z_err = np.array([0.01]) +ids = np.array([0]) +richness = np.array([10.0]) +richness_err = np.array([1.0]) +scale = np.array([1.0]) + +with h5py.File("mock_single_cluster_catalog.hdf5", "w") as f: + g = f.create_group("clusters") + g + g.create_dataset("cluster_id", data=ids) + g.create_dataset("ra", data=ra) + g.create_dataset("dec", data=dec) + g.create_dataset("redshift", data=z) + g.create_dataset("redshift_err", data=z_err) + g.create_dataset("richness", data=richness) + g.create_dataset("richness_err", data=richness_err) + g.create_dataset("scaleval", data=scale) + + g.attrs["n_clusters"] = 1