Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add mock catalog type requiring no calibration, and add example pipeline #361

Open
wants to merge 9 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 40 additions & 4 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,7 @@ on:
- cron: '0 12 * * 1'

env:
EXAMPLE_DATA_FILE_VERSION: v2
# Setting this did not appear to work. Instead will need to
# find/replace when changing it.
# CONTAINER_IMAGE: ghcr.io/lsstdesc/txpipe:latest
EXAMPLE_DATA_FILE_VERSION: v5

jobs:
# Run a download step first so that it is in
Expand Down Expand Up @@ -258,6 +255,45 @@ jobs:
run: |
tail -n +1 data/example/logs_redmagic/*

Mock_Shear_Pipeline:
runs-on: ubuntu-latest

needs: Download_Data

container:
image: ghcr.io/lsstdesc/txpipe:v0.9

steps:
- name: Checkout repository
uses: actions/checkout@v2
with:
submodules: true

- name: Restore example data
uses: actions/cache/restore@v3
with:
path: example.tar.gz
# update this when we change package contents and want
# to force an update
key: example-data-${{ env.EXAMPLE_DATA_FILE_VERSION }}
fail-on-cache-miss: true

- name: Extract test data
run: |
tar -zxvf example.tar.gz

- name: Run mock shear pipeline
run: |
ceci examples/mock_shear/pipeline.yml
test -f data/example/outputs_mock/binned_shear_catalog.hdf5

- name: Show logs
if: ${{ always() }}
run: |
tail -n +1 data/example/logs_mock/*



Other_Pipeline_Dry_Runs:
runs-on: ubuntu-latest

Expand Down
70 changes: 70 additions & 0 deletions bin/generate-nfw-shear-cat.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
import argparse



parser = argparse.ArgumentParser()
parser.add_argument("--output", type=str, default="simple_cat.txt")
parser.add_argument("--nmax", type=int, default=10000, help="Number of objects to generate, though cat size is reduced by redshift cut")
parser.add_argument("--mass", type=float, default=10.0e14, help="Cluster mass in Msun")
parser.add_argument("--cluster_z", type=float, default=0.22, help="Cluster redshift")
parser.add_argument("--concentration", type=float, default=4.0, help="Cluster concentration parameter")
parser.add_argument("--size", type=float, default=240.0, help="Size of the region in arcmin")
parser.add_argument("--mean-z", type=float, default=0.5, help="Mean redshift of the background galaxies")
parser.add_argument("--sigma-z", type=float, default=0.1, help="Redshift std dev of the background galaxies")
parser.add_argument("--mean-snr", type=float, default=20., help="Mean SNR")
parser.add_argument("--mean-size", type=float, default=0.3, help="Galaxy mean size^2 T parameter in arcsec")
parser.add_argument("--sigma-size", type=float, default=0.3, help="Galaxy std dev size^2 T parameter in arcsec")
args = parser.parse_args()

mass = args.mass
cluster_z = args.cluster_z
concentration = args.concentration


import galsim
import numpy as np
from astropy.table import Table
import argparse

nfw = galsim.NFWHalo(mass, concentration, cluster_z)

half_size = args.size / 2
xmin = -half_size
xmax = half_size
ymin = -half_size
ymax = half_size

N = 10000

x = np.random.uniform(xmin, xmax, N)
y = np.random.uniform(ymin, ymax, N)
z = np.random.normal(args.mean_z, args.sigma_z, N)

w = np.where(z > nfw.z)
x1 = x[w]
y1 = y[w]
z1 = z[w]
n = z1.size

ra = np.zeros(n) + x1 / 3600
dec = np.zeros(n) + y1 / 3600
g1, g2 = nfw.getShear([x1, y1], z1, reduced=False)
s2n = np.random.exponential(args.mean_snr, size=n)
print(s2n.mean())

# This should give plenty of selected objects since we cut on T/Tpsf > 0.5 by default
# and Tpsf default is ~.2
T = np.random.normal(args.mean_size, args.sigma_size, size=n).clip(0.01, np.inf)

data = {
"ra": ra,
"dec": dec,
"g1": g1,
"g2": g2,
"s2n": s2n,
"T": T,
"redshift": z1,
}

table = Table(data=data)
table.write(args.output, overwrite=True, format="ascii.commented_header")
24 changes: 24 additions & 0 deletions bin/make_single_cluster_catalog.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import numpy as np
import h5py
ra = np.array([0.0])
dec = np.array([0.0])
z = np.array([0.22])
z_err = np.array([0.01])
ids = np.array([0])
richness = np.array([10.0])
richness_err = np.array([1.0])
scale = np.array([1.0])

with h5py.File("mock_single_cluster_catalog.hdf5", "w") as f:
g = f.create_group("clusters")
g
g.create_dataset("cluster_id", data=ids)
g.create_dataset("ra", data=ra)
g.create_dataset("dec", data=dec)
g.create_dataset("redshift", data=z)
g.create_dataset("redshift_err", data=z_err)
g.create_dataset("richness", data=richness)
g.create_dataset("richness_err", data=richness_err)
g.create_dataset("scaleval", data=scale)

g.attrs["n_clusters"] = 1
23 changes: 23 additions & 0 deletions examples/mock_shear/config.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
global:
chunk_rows: 100000
pixelization: healpix
nside: 64
sparse: true


TXSourceSelectorSimple:
input_pz: false
true_z: true
bands: riz
T_cut: 0.5
s2n_cut: 10.0
max_rows: 1000
delta_gamma: 0.02
source_zbin_edges: [0.5, 0.7, 0.9, 1.1, 2.0]
shear_prefix: ""
verbose: true

TXMockTruthPZ:
mock_sigma_z: 0.001
aliases:
photoz_pdfs: source_photoz_pdfs
53 changes: 53 additions & 0 deletions examples/mock_shear/pipeline.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@

# Stages to run
stages:
- name: TXSimpleMock # Convert a text file mock catalog to HDF5
- name: TXSourceSelectorSimple # select and split objects into source bins
- name: TXShearCalibration # Calibrate and split the source sample tomographically
- name: TXMockTruthPZ # Generate PDFs as narrow gaussian centered on the true redshifts
- name: CLClusterShearCatalogs # Extract and weight the shear catalog around every cluster



# modules and packages to import that have pipeline
# stages defined in them
modules: >
txpipe

# where to find any modules that are not in this repo,
# and any other code we need.
python_paths:
- submodules/WLMassMap/python/desc/

# Where to put outputs
output_dir: data/example/outputs_mock

# How to run the pipeline: mini, parsl, or cwl
launcher:
name: mini
interval: 1.0

# Where to run the pipeline: cori-interactive, cori-batch, or local
site:
name: local
max_threads: 2

# configuration settings
config: examples/mock_shear/config.yml

# These are overall inputs to the whole pipeline, not generated within it
inputs:
mock_shear_catalog: data/example/inputs/mock_nfw_shear_catalog.txt
calibration_table: data/example/inputs/sample_cosmodc2_w10year_errors.dat
cluster_catalog: data/example/inputs/mock_single_cluster_catalog.hdf5
fiducial_cosmology: data/fiducial_cosmology.yml



# if supported by the launcher, restart the pipeline where it left off
# if interrupted
resume: True
# where to put output logs for individual stages
log_dir: data/example/logs_mock
# where to put an overall parsl pipeline log
pipeline_log: data/example/mock_shear_log.txt
1 change: 1 addition & 0 deletions txpipe/calibrate.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ def run(self):
# is needed
tomo_file = self.get_input("shear_tomography_catalog")
cals, cal2d = Calibrator.load(tomo_file, null=use_true)
print("Using calibration method:", cal2d.__class__.__name__)

# The catalog columns are named differently in different cases
#  Get the correct shear catalogs
Expand Down
102 changes: 101 additions & 1 deletion txpipe/ingest/mocks.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from ..base_stage import PipelineStage
from ..data_types import ShearCatalog, HDFFile
from ..data_types import ShearCatalog, HDFFile, TextFile, QPPDFFile
from ..utils import (
band_variants,
metacal_variants,
Expand Down Expand Up @@ -1216,6 +1216,104 @@ def generate_mock_metacal_mag_responses(bands, nobj):
return mag_responses


class TXSimpleMock(PipelineStage):
"""
Load an ascii astropy table and put it in shear catalog format.
"""
name = "TXSimpleMock"
inputs = [("mock_shear_catalog", TextFile)]
outputs = [("shear_catalog", ShearCatalog)]
config_options = {
"mock_size_snr": False,
}
def run(self):
from astropy.table import Table
import numpy as np

# Load the data. We are assuming here it is small enough to fit in memory
input_filename = self.get_input("mock_shear_catalog")
input_data = Table.read(input_filename, format="ascii")
n = len(input_data)

data = {}
# required columns
for col in ["ra", "dec", "g1", "g2", "s2n", "T"]:
data[col] = input_data[col]

# It's most likely we will have a redshift column.
# Check for both that and "redshift_true"
if "redshift" in input_data.colnames:
data["redshift_true"] = input_data["redshift"]
elif "redshift_true" in input_data.colnames:
data["redshift_true"] = input_data["redshift_true"]

# If there is an ID column then use it, but otherwise just use
# sequential IDs
if "id" in input_data.colnames:
data["galaxy_id"] = input_data["id"]
else:
data["galaxy_id"] = np.arange(len(input_data))

# if these catalogs are not present then we fake them.
defaults = {
"T_err": 0.0,
"psf_g1": 0.0,
"psf_g2": 0.0,
"psf_T_mean": 0.202, # this corresponds to a FWHM of 0.75 arcsec
"weight": 1.0,
"flags": 0,
}

for key, value in defaults.items():
if key in input_data.colnames:
data[key] = input_data[key]
else:
data[key] = np.full(n, value)

self.save_catalog(data)

def save_catalog(self, data):
with self.open_output("shear_catalog") as f:
g = f.create_group("shear")
g.attrs["catalog_type"] = "simple"
for key, value in data.items():
g.create_dataset(key, data=value)


class TXMockTruthPZ(PipelineStage):
name = "TXMockTruthPZ"
inputs = [("shear_catalog", ShearCatalog)]
outputs = [("photoz_pdfs", QPPDFFile)]
config_options = {
"mock_sigma_z": 0.001,
}
def run(self):
import qp
import numpy as np
sigma_z = self.config["mock_sigma_z"]


# read the input truth redshifts
with self.open_input("shear_catalog", wrapper=True) as f:
group = f.file[f.get_primary_catalog_group()]
n = group["ra"].size
redshifts = group["redshift_true"][:]

zgrid = np.linspace(0, 3, 301)
pdfs = np.zeros((n, len(zgrid)))

spread_z = sigma_z * (1 + redshifts)
# make a gaussian PDF for each object
delta = zgrid[np.newaxis, :] - redshifts[:, np.newaxis]
pdfs = np.exp(-0.5 * (delta / spread_z[:, np.newaxis])**2) / np.sqrt(2 * np.pi) / spread_z[:, np.newaxis]

q = qp.Ensemble(qp.interp, data=dict(xvals=zgrid, yvals=pdfs))
q.set_ancil(dict(zmode=redshifts, zmean=redshifts, zmedian=redshifts))
q.write_to(self.get_output("photoz_pdfs"))




def test():
import pylab

Expand All @@ -1232,3 +1330,5 @@ def test():
results = make_mock_photometry(n_visit, bands, data, True)
pylab.hist(results["snr_r"], bins=50, histtype="step")
pylab.savefig("snr_r.png")


Loading
Loading