diff --git a/CL_pipeline.png b/CL_pipeline.png new file mode 100644 index 000000000..3f8f6227c Binary files /dev/null and b/CL_pipeline.png differ diff --git a/examples/cosmodc2/1deg2-in2p3.sub b/examples/cosmodc2/1deg2-in2p3.sub new file mode 100644 index 000000000..a104d767c --- /dev/null +++ b/examples/cosmodc2/1deg2-in2p3.sub @@ -0,0 +1,9 @@ +#!/usr/bin/bash +#SBATCH --time=01:00:00 +#SBATCH --partition=hpc +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=1 +#SBATCH --mem=128000 + +source /pbs/throng/lsst/users/jzuntz/txpipe-environments/setup-txpipe +ceci examples/cosmodc2/pipeline-1deg2-CL-in2p3.yml diff --git a/examples/cosmodc2/1deg2-nersc.sub b/examples/cosmodc2/1deg2-nersc.sub new file mode 100644 index 000000000..3ed6809a4 --- /dev/null +++ b/examples/cosmodc2/1deg2-nersc.sub @@ -0,0 +1,10 @@ +#!/bin/bash +#SBATCH -A m1727 +#SBATCH -C cpu +#SBATCH --qos=debug +#SBATCH --time=00:30:00 +#SBATCH --nodes=1 +#SBATCH --ntasks-per-node=32 + +source $CFS/lsst/groups/WL/users/zuntz/setup-txpipe +tx ceci examples/cosmodc2/pipeline-1deg2-CL-nersc.yml diff --git a/examples/cosmodc2/20deg2-in2p3.sub b/examples/cosmodc2/20deg2-in2p3.sub index 717d168cd..e628081c4 100644 --- a/examples/cosmodc2/20deg2-in2p3.sub +++ b/examples/cosmodc2/20deg2-in2p3.sub @@ -6,4 +6,4 @@ #SBATCH --mem=128000 source /pbs/throng/lsst/users/jzuntz/txpipe-environments/setup-txpipe -ceci examples/cosmodc2/pipeline-20deg2-clmm.yml +ceci examples/cosmodc2/pipeline-20deg2-CL-in2p3.yml diff --git a/examples/cosmodc2/20deg2-nersc.sub b/examples/cosmodc2/20deg2-nersc.sub index d2cabb3f3..50a4d2d56 100644 --- a/examples/cosmodc2/20deg2-nersc.sub +++ b/examples/cosmodc2/20deg2-nersc.sub @@ -7,5 +7,4 @@ #SBATCH --ntasks-per-node=32 source $CFS/lsst/groups/WL/users/zuntz/setup-txpipe - -tx ceci examples/cosmodc2/pipeline-20deg2-clmm-nersc.yml +tx ceci examples/cosmodc2/pipeline-20deg2-CL-nersc.yml diff --git a/examples/cosmodc2/config-20deg2-clmm.yml b/examples/cosmodc2/config-1deg2-CL.yml similarity index 81% rename from examples/cosmodc2/config-20deg2-clmm.yml rename to examples/cosmodc2/config-1deg2-CL.yml index 0322e132d..91a4e7a4f 100644 --- a/examples/cosmodc2/config-20deg2-clmm.yml +++ b/examples/cosmodc2/config-1deg2-CL.yml @@ -63,7 +63,25 @@ BPZ_lite: mag_i: 28.62 mag_z: 27.98 +CLClusterBinningRedshiftRichness: + zedge : [0.1, 0.4, 0.6, 0.8] + richedge : [5., 10., 20.,25.] CLClusterShearCatalogs: max_radius: 5.0 # Mpc delta_z: 0.2 # redshift buffer + +CLClusterEnsembleProfiles: + #radial bin definition + r_min : 0.2 #in Mpc + r_max : 5.0 #in Mpc + #type of profile + delta_sigma_profile : True + shear_profile : False + magnification_profile : False + #stacking step or not + stack_profile : True + + + + diff --git a/examples/cosmodc2/config-20deg2-CL.yml b/examples/cosmodc2/config-20deg2-CL.yml new file mode 100644 index 000000000..9fdd92ea9 --- /dev/null +++ b/examples/cosmodc2/config-20deg2-CL.yml @@ -0,0 +1,88 @@ +TXSourceSelectorMetadetect: + input_pz: False + bands: riz #used for selection + T_cut: 0.5 + s2n_cut: 10.0 + max_rows: 1000 + delta_gamma: 0.02 + source_zbin_edges: [0.1, 3.0] + chunk_rows: 100000 + true_z: False + shear_prefix: '' + +Inform_BPZ_lite: + aliases: + input: spectroscopic_catalog + model: photoz_model + zmin: 0.0 + zmax: 3.0 + nzbins: 301 + columns_file: ./data/bpz_riz.columns + data_path: ./data/example/rail-bpz-inputs + spectra_file: CWWSB4.list + prior_band: i + ref_band: i + # Not sure about this + prior_file: hdfn_gen + p_min: 0.005 + gauss_kernel: 0.0 + mag_err_min: 0.005 + inform_options: {'save_train': False, 'load_model': False, 'modelfile': 'BPZpriormodel.out'} + madau_reddening: no + bands: riz + zp_errors: [0.01, 0.01, 0.01] + hdf5_groupname: photometry + + + +BPZ_lite: + aliases: + model: photoz_model + input: shear_catalog + output: source_photoz_pdfs + zmin: 0.0 + zmax: 3.0 + dz: 0.01 + nzbins: 301 + data_path: ./data/example/rail-bpz-inputs + bands: [mag_r, mag_i, mag_z] + err_bands: [mag_err_r, mag_err_i, mag_err_z] + hdf5_groupname: shear/00 + nondetect_val: .inf + columns_file: ./data/bpz_riz.columns + spectra_file: CWWSB4.list + ref_band: mag_i + prior_file: hdfn_gen + p_min: 0.005 + gauss_kernel: 0.0 + zp_errors: [0.01, 0.01, 0.01] + mag_err_min: 0.005 + madau_reddening: false + mag_limits: + mag_r: 29.06 + mag_i: 28.62 + mag_z: 27.98 + +CLClusterBinningRedshiftRichness: + zedge : [0.1, 0.4, 0.6, 0.8] + richedge : [5., 10., 20.,25.] + +CLClusterShearCatalogs: + max_radius: 5.0 # Mpc + delta_z: 0.2 # redshift buffer + +CLClusterEnsembleProfiles: + #radial bin definition + r_min : 0.2 #in Mpc + r_max : 5.0 #in Mpc + #type of profile + delta_sigma_profile : True + shear_profile : False + magnification_profile : False + #stacking step or not + stack_profile : True + + + + + diff --git a/examples/cosmodc2/pipeline-1deg2-CL-in2p3.yml b/examples/cosmodc2/pipeline-1deg2-CL-in2p3.yml new file mode 100644 index 000000000..cd9bccbc2 --- /dev/null +++ b/examples/cosmodc2/pipeline-1deg2-CL-in2p3.yml @@ -0,0 +1,57 @@ +#this step depends on where you run +#for CCin2p3 +site: + name: cc-parallel + mpi_command: "mpirun -n" + +#for NERSC +#site: +# name: cori-batch +# image: ghcr.io/lsstdesc/txpipe-dev + + +#all the following steps should not depend on where you run +launcher: + name: mini + interval: 3.0 + +modules: > + txpipe + rail.estimation.algos.bpz_lite + +python_paths: [] + +stages: +# - name: TXSourceSelectorMetadetect +# nprocess: 1 +# - name: Inform_BPZ_lite +# nprocess: 1 +# - name: BPZ_lite +# nprocess: 1 + - name: CLClusterBinningRedshiftRichness + nprocess: 1 +# - name: CLClusterShearCatalogs +# nprocess: 1 +# - name: CLClusterEnsembleProfiles +# nprocess: 1 +# - name: CLClusterDataVector +# nprocess: 1 + + + +output_dir: ./data/cosmodc2/outputs-1deg2-CL +config: examples/cosmodc2/config-1deg2-CL.yml + +inputs: + # See README for paths to download these files + #shear_catalog: ./data/example/inputs/metadetect_shear_catalog.hdf5 + #photometry_catalog: ./data/example/inputs/photometry_catalog.hdf5 + #fiducial_cosmology: ./data/fiducial_cosmology.yml + #calibration_table: ./data/example/inputs/sample_cosmodc2_w10year_errors.dat + #spectroscopic_catalog: ./data/example/inputs/mock_spectroscopic_catalog.hdf5 + cluster_catalog: ./data/example/inputs/cluster_catalog.hdf5 + +resume: True +log_dir: ./data/cosmodc2/logs +pipeline_log: ./data/cosmodc2/log_1deg2.txt + diff --git a/examples/cosmodc2/pipeline-1deg2-CL-nersc.yml b/examples/cosmodc2/pipeline-1deg2-CL-nersc.yml new file mode 100644 index 000000000..f2a3d6c5f --- /dev/null +++ b/examples/cosmodc2/pipeline-1deg2-CL-nersc.yml @@ -0,0 +1,57 @@ +#this step depends on where you run +#for CCin2p3 +# site: +# name: cc-parallel +# mpi_command: "mpirun -n" + +#for NERSC +site: + name: cori-batch + image: ghcr.io/lsstdesc/txpipe-dev + + +#all the following steps should not depend on where you run +launcher: + name: mini + interval: 3.0 + +modules: > + txpipe + rail.estimation.algos.bpz_lite + +python_paths: [] + +stages: +# - name: TXSourceSelectorMetadetect +# nprocess: 1 +# - name: Inform_BPZ_lite +# nprocess: 1 +# - name: BPZ_lite +# nprocess: 1 + - name: CLClusterBinningRedshiftRichness + nprocess: 1 +# - name: CLClusterShearCatalogs +# nprocess: 1 +# - name: CLClusterEnsembleProfiles +# nprocess: 1 +# - name: CLClusterDataVector +# nprocess: 1 + + + +output_dir: ./data/cosmodc2/outputs-1deg2-CL +config: ./examples/cosmodc2/config-1deg2-CL.yml + +inputs: + # See README for paths to download these files +# shear_catalog: ./data/example/inputs/metadetect_shear_catalog.hdf5 +# photometry_catalog: ./data/example/inputs/photometry_catalog.hdf5 +# fiducial_cosmology: ./data/fiducial_cosmology.yml +# calibration_table: ./data/example/inputs/sample_cosmodc2_w10year_errors.dat +# spectroscopic_catalog: ./data/example/inputs/mock_spectroscopic_catalog.hdf5 + cluster_catalog: ./data/example/inputs/cluster_catalog.hdf5 + +resume: True +log_dir: ./data/cosmodc2/logs +pipeline_log: ./data/cosmodc2/log_1deg2.txt + diff --git a/examples/cosmodc2/pipeline-20deg2-CL-in2p3.yml b/examples/cosmodc2/pipeline-20deg2-CL-in2p3.yml new file mode 100644 index 000000000..80370648c --- /dev/null +++ b/examples/cosmodc2/pipeline-20deg2-CL-in2p3.yml @@ -0,0 +1,56 @@ +#this step depends on where you run +#for CCin2p3 +site: + name: cc-parallel + mpi_command: "mpirun -n" + +#for NERSC +#site: +# name: cori-batch +# image: ghcr.io/lsstdesc/txpipe-dev + + +#all the following steps should not depend on where you run +launcher: + name: mini + interval: 3.0 +modules: > + txpipe + rail.estimation.algos.bpz_lite + +python_paths: [] + +stages: +# - name: TXSourceSelectorMetadetect +# nprocess: 30 +# - name: Inform_BPZ_lite +# nprocess: 1 +# - name: BPZ_lite +# nprocess: 30 + - name: CLClusterBinningRedshiftRichness + nprocess: 1 +# - name: CLClusterShearCatalogs +# nprocess: 30 +# - name: CLClusterEnsembleProfiles +# nprocess: 30 +# - name: CLClusterDataVector +# nprocess: 1 + + + +output_dir: ./data/cosmodc2/outputs-20deg2-CL +config: ./examples/cosmodc2/config-20deg2-CL.yml + +inputs: + # See README for paths to download these files + #shear_catalog: ./data/cosmodc2/20deg2/shear_catalog.hdf5 + #photometry_catalog: ./data/cosmodc2/20deg2/photometry_catalog.hdf5 + #fiducial_cosmology: ./data/fiducial_cosmology.yml + #calibration_table: ./data/cosmodc2/20deg2/sample_cosmodc2_w10year_errors.dat + #spectroscopic_catalog: ./data/cosmodc2/20deg2/spectroscopic_catalog.hdf5 + cluster_catalog: ./data/cosmodc2/20deg2/cluster_catalog.hdf5 + +resume: True +log_dir: ./data/cosmodc2/logs +pipeline_log: ./data/cosmodc2/log_20deg2.txt + diff --git a/examples/cosmodc2/pipeline-20deg2-CL-nersc.yml b/examples/cosmodc2/pipeline-20deg2-CL-nersc.yml new file mode 100644 index 000000000..c9d8b0cdd --- /dev/null +++ b/examples/cosmodc2/pipeline-20deg2-CL-nersc.yml @@ -0,0 +1,56 @@ +#this step depends on where you run +#for CCin2p3 +# site: +# name: cc-parallel +# mpi_command: "mpirun -n" + +#for NERSC +site: + name: cori-batch + image: ghcr.io/lsstdesc/txpipe-dev + + +#all the following steps should not depend on where you run +launcher: + name: mini + interval: 3.0 +modules: > + txpipe + rail.estimation.algos.bpz_lite + +python_paths: [] + +stages: +# - name: TXSourceSelectorMetadetect +# nprocess: 30 +# - name: Inform_BPZ_lite +# nprocess: 1 +# - name: BPZ_lite +# nprocess: 30 + - name: CLClusterBinningRedshiftRichness + nprocess: 1 +# - name: CLClusterShearCatalogs +# nprocess: 30 +# - name: CLClusterEnsembleProfiles +# nprocess: 1 +# - name: CLClusterDataVector +# nprocess: 1 + + + +output_dir: ./data/cosmodc2/outputs-20deg2-CL +config: ./examples/cosmodc2/config-20deg2-CL.yml + +inputs: + # See README for paths to download these files + #shear_catalog: ./data/cosmodc2/20deg2/shear_catalog.hdf5 + #photometry_catalog: ./data/cosmodc2/20deg2/photometry_catalog.hdf5 + #fiducial_cosmology: ./data/fiducial_cosmology.yml + #calibration_table: ./data/cosmodc2/20deg2/sample_cosmodc2_w10year_errors.dat + #spectroscopic_catalog: ./data/cosmodc2/20deg2/spectroscopic_catalog.hdf5 + cluster_catalog: ./data/cosmodc2/20deg2/cluster_catalog.hdf5 + +resume: True +log_dir: ./data/cosmodc2/logs +pipeline_log: ./data/cosmodc2/log_20deg2.txt + diff --git a/examples/cosmodc2/pipeline-20deg2-clmm-nersc.yml b/examples/cosmodc2/pipeline-20deg2-clmm-nersc.yml deleted file mode 100644 index 8b80c78b6..000000000 --- a/examples/cosmodc2/pipeline-20deg2-clmm-nersc.yml +++ /dev/null @@ -1,43 +0,0 @@ -launcher: - name: mini - interval: 3.0 - -site: - name: cori-batch - image: ghcr.io/lsstdesc/txpipe-dev - - -modules: > - txpipe - rail.estimation.algos.bpz_lite - -python_paths: [] - -stages: - - name: TXSourceSelectorMetadetect - nprocess: 30 - - name: Inform_BPZ_lite - nprocess: 1 - - name: BPZ_lite - nprocess: 30 - - name: CLClusterShearCatalogs - nprocess: 30 - - - -output_dir: data/cosmodc2/outputs-20deg2 -config: examples/cosmodc2/config-20deg2-clmm.yml - -inputs: - # See README for paths to download these files - shear_catalog: ./data/cosmodc2/20deg2/shear_catalog.hdf5 - photometry_catalog: ./data/cosmodc2/20deg2/photometry_catalog.hdf5 - fiducial_cosmology: data/fiducial_cosmology.yml - calibration_table: ./data/cosmodc2/20deg2/sample_cosmodc2_w10year_errors.dat - spectroscopic_catalog: ./data/cosmodc2/20deg2/spectroscopic_catalog.hdf5 - cluster_catalog: ./data/cosmodc2/20deg2/cluster_catalog.hdf5 - -resume: True -log_dir: data/cosmodc2/logs -pipeline_log: data/cosmodc2/log.txt - diff --git a/examples/cosmodc2/pipeline-20deg2-clmm.yml b/examples/cosmodc2/pipeline-20deg2-clmm.yml deleted file mode 100644 index b286a0a8d..000000000 --- a/examples/cosmodc2/pipeline-20deg2-clmm.yml +++ /dev/null @@ -1,43 +0,0 @@ -launcher: - name: mini - interval: 3.0 - -site: - name: cc-parallel - mpi_command: "mpirun -n" - - -modules: > - txpipe - rail.estimation.algos.bpz_lite - -python_paths: [] - -stages: - - name: TXSourceSelectorMetadetect - nprocess: 30 - - name: Inform_BPZ_lite - nprocess: 1 - - name: BPZ_lite - nprocess: 30 - - name: CLClusterShearCatalogs - nprocess: 30 - - - -output_dir: data/cosmodc2/outputs-20deg2 -config: examples/cosmodc2/config-20deg2-clmm.yml - -inputs: - # See README for paths to download these files - shear_catalog: ./data/cosmodc2/20deg2/shear_catalog.hdf5 - photometry_catalog: ./data/cosmodc2/20deg2/photometry_catalog.hdf5 - fiducial_cosmology: data/fiducial_cosmology.yml - calibration_table: ./data/cosmodc2/20deg2/sample_cosmodc2_w10year_errors.dat - spectroscopic_catalog: ./data/cosmodc2/20deg2/spectroscopic_catalog.hdf5 - cluster_catalog: ./data/cosmodc2/20deg2/cluster_catalog.hdf5 - -resume: True -log_dir: data/cosmodc2/logs -pipeline_log: data/cosmodc2/log.txt - diff --git a/notebooks/Run_CL_counts_pipeline.ipynb b/notebooks/Run_CL_counts_pipeline.ipynb new file mode 100644 index 000000000..9cb3e2e41 --- /dev/null +++ b/notebooks/Run_CL_counts_pipeline.ipynb @@ -0,0 +1,831 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "a01235ba-b30a-413c-a381-eef2712dfcb9", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "import os\n", + "from pprint import pprint\n", + "import numpy as np\n", + "%matplotlib inline\n", + "import matplotlib.pyplot as plt\n", + "from IPython.display import Image\n", + "import ceci\n", + "import h5py\n", + "import yaml" + ] + }, + { + "cell_type": "markdown", + "id": "f1a90282-ed70-4496-bc4b-bc2d2e8ee302", + "metadata": {}, + "source": [ + "Make sure to change your path in the next cell that leads to your TXPipe directory. See examples for IN2P3 and NERSC below." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "d5eb6757-e79f-445c-a2c6-4d25af5f6f65", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "#user specific paths -- IN2P3 example\n", + "my_txpipe_dir = \"/pbs/home/m/mricci/throng_mricci/desc/TXPipe\"\n", + "\n", + "#user specific paths -- NERSC example\n", + "#my_txpipe_dir = \"/pscratch/sd/a/avestruz/TXPipe\"\n", + "\n", + "os.chdir(my_txpipe_dir)\n", + "\n", + "import txpipe" + ] + }, + { + "cell_type": "markdown", + "id": "33d82af0-d1c4-43cc-ac0b-a2d6f1e66889", + "metadata": {}, + "source": [ + "# Let's start working with the 1deg2 data file on Jupyter" + ] + }, + { + "cell_type": "markdown", + "id": "f4e0ef7e-9659-4a03-b0ca-ab856c5e5f54", + "metadata": {}, + "source": [ + "First we will do some runs on the 1 deg^2 example data set with around 80k galaxies. This is small enough that we can do it all in jupyter.\n", + "\n", + "The data set, which is based on CosmoDC2, contains pre-computed photo-z and and contains a RedMapper cluster catalog for the field." + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "id": "274c2ea4-b366-4e3d-8690-efa8af3f6d96", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Options for this pipeline and their defaults (this may be override by config file):\n", + "{'zedge': [0.2, 0.4, 0.6, 0.8, 1.0], 'richedge': [5.0, 10.0, 20.0], 'initial_size': 100000, 'chunk_rows': 100000}\n" + ] + } + ], + "source": [ + "print(\"Options for this pipeline and their defaults (this may be override by config file):\")\n", + "print(txpipe.extensions.CLClusterBinningRedshiftRichness.config_options)\n", + "\n", + "pip_stage = txpipe.extensions.CLClusterBinningRedshiftRichness.make_stage(\n", + " # This is the initial cluster catalog - RAs, Decs, richess, redshift, etc.\n", + " cluster_catalog=\"./data/example/inputs/cluster_catalog.hdf5\",\n", + " \n", + " # This is the output for this stage\n", + " cluster_catalog_tomography=\"./data/example/cluster_catalog_tomography.hdf5\",\n", + "\n", + " # This contains all the options for this stage. You can override them here, \n", + " #as we do with the max_radius below.\n", + " config=\"examples/cosmodc2/config-1deg2-CL.yml\", \n", + ")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "id": "0df094c5-3e3a-4335-89ff-4df52bfb8a43", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "pip_stage.run()\n", + "pip_stage.finalize()" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "id": "009eb8b0-1c54-497f-a419-055e869e2ece", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Actual options used for this pipeline (as defined in config file or default):\n", + "{zedge:[0.1, 0.4, 0.6, 0.8],richedge:[5.0, 10.0, 20.0, 25.0],initial_size:100000,chunk_rows:100000,cluster_catalog:./data/example/inputs/cluster_catalog.hdf5,cluster_catalog_tomography:./data/example/cluster_catalog_tomography.hdf5,config:examples/cosmodc2/config-1deg2-CL.yml,aliases:{},}\n" + ] + } + ], + "source": [ + "print(\"Actual options used for this pipeline (as defined in config file or default):\")\n", + "print(pip_stage.config)" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "8ee60427-f399-4fbe-ab97-39ea2387d40e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'./data/example/inputs/cluster_catalog.hdf5'" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pip_stage.config['cluster_catalog']" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "22639ea6-38d4-4061-acb4-e402939a3fd4", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'./data/example/cluster_catalog_tomography.hdf5'" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pip_stage.config['cluster_catalog_tomography']" + ] + }, + { + "cell_type": "markdown", + "id": "6174784d-ba23-4e33-bbf9-559295cb9d0f", + "metadata": {}, + "source": [ + "# Open cluster catalog input and compare to binning outputs" + ] + }, + { + "cell_type": "markdown", + "id": "b7101483-b7a2-45ce-a8b1-c6030e15da0d", + "metadata": {}, + "source": [ + "## Open cluster catalog input " + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "3218ac13-3d30-4bc9-bd8d-25d40a3eeddd", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "./data/example/inputs/cluster_catalog.hdf5\n" + ] + } + ], + "source": [ + "filename_in = pip_stage.config['cluster_catalog']\n", + "print(filename_in)" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "d8c38523-a8f7-4a43-9831-9ae273adeaa6", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "f_in = h5py.File(filename_in, \"r\")" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "c6d59b2a-8857-44be-93ad-912d8a893ec0", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + } + ], + "source": [ + "print(f_in.keys())" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "1ecd9464-7ef8-4600-b931-8b8c76830564", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "dset_in = f_in['clusters']" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "f5624d30-cea6-4999-8f53-0ee81030b632", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['cluster_id', 'dec', 'ra', 'redshift', 'redshift_err', 'richness', 'richness_err', 'scaleval']\n" + ] + } + ], + "source": [ + "cols = [col for col in dset_in]\n", + "print(cols)" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "4349ca71-0548-490b-b5c3-5af0ce98f7f2", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Text(0, 0.5, 'richness')" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plt.semilogy(dset_in['redshift'][()], dset_in['richness'][()],'.', alpha=1)\n", + "\n", + "plt.xlabel('redshift')\n", + "plt.ylabel('richness')" + ] + }, + { + "cell_type": "markdown", + "id": "bfd77d78-6514-4c26-babb-24f3f167956e", + "metadata": {}, + "source": [ + "## Open binning output" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "0b4c65de-82c8-4f99-89de-dd0a598a31f0", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "./data/example/cluster_catalog_tomography.hdf5\n" + ] + } + ], + "source": [ + "filename_out = pip_stage.config['cluster_catalog_tomography'] #output_dir + \"/cluster_catalog_tomography.hdf5\"\n", + "print (filename_out)" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "da3761cd-c7ed-47a2-a0bc-562cd7ad3381", + "metadata": {}, + "outputs": [], + "source": [ + "f_out = h5py.File(filename_out, \"r\")" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "b3274869-a729-45c7-b531-05d4f8ef69da", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + } + ], + "source": [ + "print(f_out.keys())" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "7785436c-8652-4c98-9ecb-373abd2dc5d2", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "dat_out = f_out['provenance']\n", + "dset_out = f_out['cluster_bin']" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "736b4dc3-381b-45f4-a93e-f9c00b9e5e17", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + } + ], + "source": [ + "print(dset_out.keys())" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "id": "9ad81296-06fb-4c27-bef9-116e8bf7dee8", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "bin_zbin_0_richbin_0 {'rich_max': 10.0, 'rich_min': 5.0, 'z_max': 0.4, 'z_min': 0.1} 5\n", + "bin_zbin_0_richbin_1 {'rich_max': 20.0, 'rich_min': 10.0, 'z_max': 0.4, 'z_min': 0.1} 3\n", + "bin_zbin_0_richbin_2 {'rich_max': 25.0, 'rich_min': 20.0, 'z_max': 0.4, 'z_min': 0.1} 0\n", + "bin_zbin_1_richbin_0 {'rich_max': 10.0, 'rich_min': 5.0, 'z_max': 0.6, 'z_min': 0.4} 20\n", + "bin_zbin_1_richbin_1 {'rich_max': 20.0, 'rich_min': 10.0, 'z_max': 0.6, 'z_min': 0.4} 8\n", + "bin_zbin_1_richbin_2 {'rich_max': 25.0, 'rich_min': 20.0, 'z_max': 0.6, 'z_min': 0.4} 0\n", + "bin_zbin_2_richbin_0 {'rich_max': 10.0, 'rich_min': 5.0, 'z_max': 0.8, 'z_min': 0.6} 13\n", + "bin_zbin_2_richbin_1 {'rich_max': 20.0, 'rich_min': 10.0, 'z_max': 0.8, 'z_min': 0.6} 3\n", + "bin_zbin_2_richbin_2 {'rich_max': 25.0, 'rich_min': 20.0, 'z_max': 0.8, 'z_min': 0.6} 2\n" + ] + } + ], + "source": [ + "[print (i, dict(dset_out[i].attrs), dset_out[i]['redshift'][:].size) for i in dset_out.keys()];" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "id": "38be57ca-e370-480d-a2e0-c2d45baf7b13", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['cluster_id', 'dec', 'ra', 'redshift', 'redshift_err', 'richness', 'richness_err', 'scaleval']\n" + ] + } + ], + "source": [ + "print ([col for col in dset_out['bin_zbin_0_richbin_0']])" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "id": "9b7670e8-3672-41c4-bd24-f17b1b3a5622", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The file contains 9 keys corresponding to 3 redshift bins times 3 richness bins\n" + ] + } + ], + "source": [ + "print('The file contains',len(dset_out.keys()), 'keys corresponding to',\n", + " len(pip_stage.config.zedge) - 1, ' redshift bins times', \n", + " len(pip_stage.config.richedge) - 1,'richness bins')" + ] + }, + { + "cell_type": "markdown", + "id": "99017d4e-0ad1-44da-ace2-4fa54fb28b2e", + "metadata": {}, + "source": [ + "## Compare the two " + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "id": "2b146a75-3aff-4fa7-be65-679c6f49bf6e", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "#plot data from input catalog\n", + "plt.semilogy(dset_in['redshift'][()], dset_in['richness'][()],'k.', alpha=1)\n", + "plt.xlabel('redshift')\n", + "plt.ylabel('richness')\n", + "\n", + "#plot bin limits as defined in the config file\n", + "[plt.axvline(i,linestyle='dashed', color='black') for i in pip_stage.config.zedge]\n", + "[plt.axhline(i,linestyle='dotted', color='black') for i in pip_stage.config.richedge]\n", + "\n", + "#overplot data from output file to make sure the bins are ordered correctly\n", + "markers=['s','o', 'D', 'P', '^']\n", + "\n", + "for i in range(len(pip_stage.config.zedge)-1):\n", + " for j in range(len(pip_stage.config.richedge)-1):\n", + " plt.scatter(dset_out['bin_zbin_'+str(i)+'_richbin_'+str(j)]['redshift'][:], \n", + " dset_out['bin_zbin_'+str(i)+'_richbin_'+str(j)]['richness'][:], marker=markers[j], label='bin_zbin_'+str(i)+'_richbin_'+str(j))\n", + " \n", + " plt.legend(fontsize='x-small')" + ] + }, + { + "cell_type": "markdown", + "id": "93633ac1-206d-42ac-88a8-b33c7b6656ac", + "metadata": {}, + "source": [ + "# Now let's do the same using the pipeline approach\n", + "\n", + "Here we will use the 20deg2, but we can also use the 1deg2 files (just need to change 20deg2 to 1deg2 in the name of the files)" + ] + }, + { + "cell_type": "markdown", + "id": "3e233ec3-29eb-45b1-ae97-7e03f3719478", + "metadata": {}, + "source": [ + "### Launching a pipeline\n", + "\n", + "Let's have a look at the submission script for this pipeline:\n", + "- to work at CCin2p3 we can use: `examples/cosmodc2/1deg2-in2p3.sub`:\n", + "- to work at NERSC we can use: `examples/cosmodc2/1deg2-nersc.sub`:\n", + "\n", + "If we use the CCin2p3 example :" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "id": "9660328e-0f08-4e92-9350-d4831c308e55", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "#!/usr/bin/bash\n", + "#SBATCH --time=01:00:00\n", + "#SBATCH --partition=hpc\n", + "#SBATCH --ntasks=30\n", + "#SBATCH --cpus-per-task=1\n", + "#SBATCH --mem=128000\n", + "\n", + "source /pbs/throng/lsst/users/jzuntz/txpipe-environments/setup-txpipe\n", + "ceci examples/cosmodc2/pipeline-20deg2-CL-in2p3.yml\n" + ] + } + ], + "source": [ + "! cat examples/cosmodc2/20deg2-in2p3.sub" + ] + }, + { + "cell_type": "markdown", + "id": "94939a10-4dab-4cc2-9570-3de230423b48", + "metadata": {}, + "source": [ + "If we use the NERSC example:" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "id": "dc09313d-ab83-4ab8-8d39-3cf1d495ebd5", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "#!/bin/bash\n", + "#SBATCH -A m1727\n", + "#SBATCH -C cpu\n", + "#SBATCH --qos=debug\n", + "#SBATCH --time=00:30:00\n", + "#SBATCH --nodes=1\n", + "#SBATCH --ntasks-per-node=32\n", + "\n", + "source $CFS/lsst/groups/WL/users/zuntz/setup-txpipe\n", + "tx ceci examples/cosmodc2/pipeline-20deg2-CL-nersc.yml\n" + ] + } + ], + "source": [ + "! cat examples/cosmodc2/20deg2-nersc.sub" + ] + }, + { + "cell_type": "markdown", + "id": "53430447-8c6d-413f-9138-9806050f01fe", + "metadata": {}, + "source": [ + "This will launch a job of up to one hour (it should finish in 30 min) on a single CC-IN2P3 node to run a pipeline. After the first run, the output files are created and following runs take much less time.\n", + "\n", + "In a terminal, **navigate to your TXPipe directory on IN2P3 and run**:\n", + "\n", + "```\n", + "sbatch examples/cosmodc2/20deg2-in2p3.sub\n", + "```\n", + "to set it running.\n", + "\n", + "If you are **on NERSC, you will instead run**:\n", + "```\n", + "sbatch examples/cosmodc2/20deg2-nersc.sub\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "b61f90b0-7f33-4fea-8ab7-310a1e00a6ca", + "metadata": {}, + "source": [ + "Below, you will need to select the appropriate yaml file to comment/uncomment for `pipeline_file`, depending on if you are in IN2P3 or on NERSC. " + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "id": "af7452ac-b01e-454d-a521-3426392ca4dc", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0" + ] + }, + "execution_count": 51, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Read the appropriate pipeline configuration, and ask for a flow-chart.\n", + "pipeline_file = \"examples/cosmodc2/pipeline-20deg2-CL-in2p3.yml\"\n", + "#pipeline_file = \"examples/cosmodc2/pipeline-20deg2-CL-nersc.yml\"\n", + "flowchart_file = \"CL_pipeline.png\"\n", + "\n", + "\n", + "pipeline_config = ceci.Pipeline.build_config(\n", + " pipeline_file,\n", + " flow_chart=flowchart_file,\n", + " dry_run=True\n", + ")\n", + "\n", + "# Run the flow-chart pipeline\n", + "ceci.run_pipeline(pipeline_config)" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "id": "a88eb44a-3a78-41f1-bffe-b1aa8d061842", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "" + ] + }, + "execution_count": 52, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "Image(flowchart_file)" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "id": "59f26536-3416-4229-8664-bea506599179", + "metadata": {}, + "outputs": [], + "source": [ + "## Open the corresponding pipeline file to load correct input/output file names" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "id": "7077bf36-77a3-4c3b-8e49-47687f989807", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "with open(pipeline_file, 'r') as file:\n", + " pipeline_content = yaml.safe_load(file)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "id": "80ed821b-354d-4ae2-8240-cf28e2f3909f", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "./data/cosmodc2/20deg2/cluster_catalog.hdf5\n" + ] + } + ], + "source": [ + "#open input cluster catalog\n", + "filename_in = pipeline_content['inputs']['cluster_catalog']\n", + "print(filename_in)\n", + "f_in = h5py.File(filename_in, \"r\")\n", + "dset_in = f_in['clusters']" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "id": "6346d5b9-5196-4342-8a67-a69ca84705cc", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "./data/cosmodc2/outputs-20deg2-CL/cluster_catalog_tomography.hdf5\n" + ] + } + ], + "source": [ + "#open output binning output\n", + "filename_out =pipeline_content['output_dir']+\"/cluster_catalog_tomography.hdf5\"\n", + "print (filename_out)\n", + "f_out = h5py.File(filename_out, \"r\")\n", + "dat_out = f_out['provenance']\n", + "dset_out = f_out['cluster_bin']" + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "id": "604121fa-0cc8-4844-92c7-ec8d80642385", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "#plot data from input catalog\n", + "plt.semilogy(dset_in['redshift'][()], dset_in['richness'][()],'k.', alpha=1)\n", + "plt.xlabel('redshift')\n", + "plt.ylabel('richness')\n", + "\n", + "#plot bin limits as defined in the config file\n", + "[plt.axvline(i,linestyle='dashed', color='black') for i in pip_stage.config.zedge]\n", + "[plt.axhline(i,linestyle='dotted', color='black') for i in pip_stage.config.richedge]\n", + "\n", + "#overplot data from output file to make sure the bins are ordered correctly\n", + "markers=['s','o', 'D', 'P', '^']\n", + "\n", + "for i in range(len(pip_stage.config.zedge)-1):\n", + " for j in range(len(pip_stage.config.richedge)-1):\n", + " plt.scatter(dset_out['bin_zbin_'+str(i)+'_richbin_'+str(j)]['redshift'][:], \n", + " dset_out['bin_zbin_'+str(i)+'_richbin_'+str(j)]['richness'][:], marker=markers[j], label='bin_zbin_'+str(i)+'_richbin_'+str(j))\n", + " \n", + " plt.legend(fontsize='x-small')" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "TXPipe-2023-Jul-12", + "language": "python", + "name": "txpipe-2023-jul-12" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/txpipe/__init__.py b/txpipe/__init__.py index 2436ccc40..a92beb959 100755 --- a/txpipe/__init__.py +++ b/txpipe/__init__.py @@ -41,5 +41,5 @@ # Here are the stages that mostly will be used for other projects # such as the self-calibration of Intrinsic alignment. from .extensions.twopoint_scia import TXSelfCalibrationIA -from .extensions.clmm import TXTwoPointRLens +from .extensions.clmm import CLClusterShearCatalogs, CLClusterBinningRedshiftRichness from .covariance_nmt import TXFourierNamasterCovariance, TXRealNamasterCovariance diff --git a/txpipe/extensions/clmm/__init__.py b/txpipe/extensions/clmm/__init__.py index b7900fdcb..e69c33016 100644 --- a/txpipe/extensions/clmm/__init__.py +++ b/txpipe/extensions/clmm/__init__.py @@ -1,3 +1,4 @@ -from .ingest import * -from .select import * +#from .ingest import * +from .select import CLClusterShearCatalogs +from .bin_cluster import CLClusterBinningRedshiftRichness from .rlens import TXTwoPointRLens diff --git a/txpipe/extensions/clmm/bin_cluster.py b/txpipe/extensions/clmm/bin_cluster.py new file mode 100644 index 000000000..8140893c7 --- /dev/null +++ b/txpipe/extensions/clmm/bin_cluster.py @@ -0,0 +1,95 @@ +# -*- coding: utf-8 -*- +import os +import gc +import numpy as np +from ...base_stage import PipelineStage +from ...data_types import ShearCatalog, HDFFile, PhotozPDFFile, FiducialCosmology, TomographyCatalog, ShearCatalog +from ...utils.calibrators import Calibrator +from ...utils import DynamicSplitter +from collections import defaultdict +import yaml +import ceci +import itertools + + +class CLClusterBinningRedshiftRichness(PipelineStage): + name = "CLClusterBinningRedshiftRichness" + parallel = False + inputs = [("cluster_catalog", HDFFile)] + outputs = [("cluster_catalog_tomography", HDFFile)] + config_options = { + "zedge": [0.2, 0.4, 0.6, 0.8, 1.0], + "richedge": [5., 10., 20.], + "initial_size": 100_000, + "chunk_rows": 100_000, + } + def run(self): + initial_size = self.config["initial_size"] + chunk_rows = self.config["chunk_rows"] + + zedge = np.array(self.config['zedge']) + richedge = np.array(self.config['richedge']) + + nz = len(zedge) - 1 + nr = len(richedge) - 1 + + # add infinities to either end to catch objects that spill out + zedge = np.concatenate([[-np.inf], zedge, [np.inf]]) + richedge = np.concatenate([[-np.inf], richedge, [np.inf]]) + + # all pairs of z bin, richness bin indices + bins = list(itertools.product(range(nz), range(nr))) + bin_names = {f"zbin_{i}_richbin_{j}":initial_size for i,j in bins} + #bin_names = [f"zbin_{i}_richbin_{j}" for i,j in bins] + + + # Columns we want to save for each object + cols = ['cluster_id', 'dec', 'ra', 'redshift', 'redshift_err', 'richness', 'richness_err', 'scaleval'] + + + f = self.open_output("cluster_catalog_tomography") + g = f.create_group("cluster_bin") + g.attrs['nr'] = nr + g.attrs['nz'] = nz + splitter = DynamicSplitter(g, "bin", cols, bin_names) + + # Make an iterator that will read a chunk of data at a time + it = self.iterate_hdf("cluster_catalog", "clusters", cols, chunk_rows) + + # Loop through the chunks of data; each time `data` will be a + # dictionary of column names -> numpy arrays + for _, _, data in it: + n = len(data["redshift"]) + + # Figure out which bin each halo it in, if any, starts at 0 + zbin = np.digitize(data['redshift'], zedge) - 2 + richbin = np.digitize(data["richness"], richedge) - 2 + + # Find which bin each object is in, or None + for zi in range(0, nz): + for ri in range(0, nr): + w = np.where((zbin == zi) & (richbin == ri)) + # if there are no objects in this bin in this chunk, + # then we skip the rest + if w[0].size == 0: + continue + + # Otherwise we extract the bit of this chunk of + # data that is in this bin and have our splitter + # object write it out. + d = {name:col[w] for name, col in data.items()} + bin_name = f"zbin_{zi}_richbin_{ri}" #TO CHANGE ? + splitter.write_bin(d, bin_name) + + # Truncate arrays to correct size + splitter.finish() + + # Save metadata + for (i, j), name in zip(bins, bin_names): + metadata = splitter.subgroups[name].attrs + metadata['rich_min'] = richedge[j+1] + metadata['rich_max'] = richedge[j+2] + metadata['z_min'] = zedge[i+1] + metadata['z_max'] = zedge[i+2] + + f.close() diff --git a/txpipe/extensions/clmm/select.py b/txpipe/extensions/clmm/select.py index c6f3a8cff..118264401 100644 --- a/txpipe/extensions/clmm/select.py +++ b/txpipe/extensions/clmm/select.py @@ -8,6 +8,8 @@ import yaml import ceci + + class CLClusterShearCatalogs(PipelineStage): name = "CLClusterShearCatalogs" inputs = [