Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

added the IO functions removed from PR #141 #150

Open
wants to merge 8 commits into
base: main
Choose a base branch
from
204 changes: 204 additions & 0 deletions pyirf/io/gadf.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import numpy as np
from astropy.time import Time
import pyirf.binning as binning
from pyirf.cuts import compare_irf_cuts

from ..version import __version__

Expand All @@ -13,6 +14,11 @@
"create_energy_dispersion_hdu",
"create_psf_table_hdu",
"create_rad_max_hdu",
"compare_irf_cuts_in_files",
"read_fits_bins_lo_hi",
"read_irf_grid",
"read_aeff2d_hdu",
"read_energy_dispersion_hdu",
]


Expand Down Expand Up @@ -303,3 +309,201 @@ def create_rad_max_hdu(
_add_header_cards(header, **header_cards)

return BinTableHDU(rad_max_table, header=header, name=extname)


def compare_irf_cuts_in_files(files, extname='THETA_CUTS'):
"""
Reads in a list of IRF files and checks if the same cuts have been applied in all of them

Parameters
----------
files: list of strings
files to be read
extname: string
name of the extension with cut values to read the data from in fits file
Returns
-------
match: Boolean
if the cuts are the same in all the files
"""
cuts = read_irf_cuts(files, extname=extname)
return compare_irf_cuts(cuts)


def read_irf_cuts(files, extname='THETA_CUTS'):
"""
Reads in a list of IRF files, extracts cuts from them and returns them as a list

Parameters
----------
files: list of strings
files to be read
extname: string
name of the extension with cut values to read the data from in fits file
Returns
-------
cuts: list
list of cuts
"""
if isinstance(files, str):
files = [files]

cuts = list()
for file_name in files:
table = QTable.read(file_name, hdu=extname)
cuts.append(table)
return cuts


def read_fits_bins_lo_hi(file_name, extname, tags):
"""
Reads from a fits file two arrays of tag_LO and tag_HI.
If more then one file is given it checks that all of them are consistent before returning the value

Parameters
----------
file_name: string or list of string
file to be read, if a list of
extname: string
name of the extension to read the data from in fits file
tags: list of string
names of the field in the extension to extract, _LO and _HI will be added

Returns
-------
bins: list of astropy.units.Quantity
list of ntuples (LO, HI) of bins (with size of extnames)
"""

# to allow the function work on either single file or a list of files convert a single string into a list
if isinstance(file_name, str):
file_name = [file_name]

# same to allow to run over single tag
if isinstance(tags, str):
tags = [tags]

tags_lo_hi = [(tag + '_LO', tag + '_HI') for tag in tags]

old_table = None
bins = list()
for this_file in file_name:
table = QTable.read(this_file, hdu=extname)
for tag_lo, tag_hi in tags_lo_hi:
if old_table is not None:
if not old_table[tag_lo][0].shape == table[tag_lo][0].shape:
raise ValueError('Non matching bins in ' + extname)
if not ((old_table[tag_lo][0] == table[tag_lo][0]).all() and (old_table[tag_hi][0] == table[tag_hi][0]).all()):
raise ValueError('Non matching bins in ' + extname)
else:
bins.append([table[tag_lo][0], table[tag_hi][0]])
old_table = table

return bins


def read_irf_grid(files, extname, field_name):
"""
Reads in a grid of IRFs for a bunch of different parameters and stores them in lists

Parameters
----------
files: string or list of strings
files to be read
extname: string
name of the extension to read the data from in fits file
field_name: string
name of the field in the extension to extract

Returns
-------
irfs_all: np.array
array of IRFs, first axis specify the file number(if more then one is given),
second axis is the offset angle, rest of the axes are IRF-specific
theta_bins: astropy.units.Quantity[angle]
theta bins
"""

# to allow the function work on either single file or a list of files convert a single string into a list
if isinstance(files, str):
files = [files]

n_files = len(files)

irfs_all = list()

for this_file in files:
# [0] because there the IRFs are written as a single row of the table
irfs_all.append(QTable.read(this_file, hdu=extname)[field_name][0])
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The transpose is missing here, this is why you have to do non-trivial transposes in edisp.

This goes back to the point that I really don't understand why we need this read_irf_grid function. Simple functions that read a table once and return the arrays. And stacking can then just be edisp = np.stack(edisps)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also, keeping the table around is good, since it contains the metadata. So this function is just not as general as you might think it is.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I switched to np.stack.
Please note @maxnoe that the function is also making the required transposition that is there for all the IRFs and extracting the 0-th row - all the operations to convert between the FITS format and the one used inside pyirf. And it is working fine on either individual files or lists of files so I think it is very useful and it makes the custom functions that read individual IRFs simpler.

I agree that the table includes additional important information via metadata, but I think it would be more useful to extract this metadata and return such concrete information. Nevertheless, first we would need to agree (in #126 ) what information should be put there, with what names, etc.

Copy link
Member

@maxnoe maxnoe Apr 28, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jsitarek

Nevertheless, first we would need to agree (in #126 ) what information should be put there, with what names, etc.

Actually no. Since at no point pyirf will use this metadata, users of pyirf can fill and read any metadata they like and use it for whatever purpose.

That's also the problem with this read_irf_grid function. You would have to open all the files again to also read the metadata.

So, please, write functions that read each IRF type, validating it's the correct type by looking at HDUCLASX header keywords and returning also the metadata.

This functions can then easily be called for multiple files and the results stacked / compared.

I think this has much better advantages than the little code repetition you avoid by using this read_irf_grid function.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jsitarek

Nevertheless, first we would need to agree (in #126 ) what information should be put there, with what names, etc.

Actually no. Since at no point pyirf will use this metadata, users of pyirf can fill and read any metadata they like and use it for whatever purpose.

well, I would not be so sure about this. If we really have standardized metadata we can use them inside pyirf to do e.g. standarized interpolation over those values.

That's also the problem with this read_irf_grid function. You would have to open all the files again to also read the metadata.

as I explained above the metadata processing can be also added to this function. Please note that they should be likely the same for all the types of IRFs (no matter if you want to read Aeff or energy migration you still want to know at which zenith it was produced) so it takes perfect sense to read them in standarized way

So, please, write functions that read each IRF type, validating it's the correct type by looking at HDUCLASX header keywords and returning also the metadata.

I just implemented checking of all the HDUCLASX headers.
Again because of using one universal function it was pretty easy, especially that many of those fields are the same for different IRFs.

This functions can then easily be called for multiple files and the results stacked / compared.

I think this has much better advantages than the little code repetition you avoid by using this read_irf_grid function.

With every thing that we discuss in this thread the amount of code repetition in the approach that you suggested would grow larger and larger. Already now we have 3 issues that are repeated for all IRFs:

  • swapping of axis sequence
  • checks of HDUCLASX headers
  • combining IRFs produced for different parameters

and we have two IRFs already implemented, and a few more that can be added easily. So in the end I think we are avoiding a lot of code repetition


# if the function is run on single file do not need the first axis dimension
if n_files == 1:
irfs_all = irfs_all[0]
# the last operation converts the list to a multidimentional table
return np.array(irfs_all)


def read_aeff2d_hdu(file_name, extname="EFFECTIVE AREA"):
"""
Reads effective area from FITS file

Parameters
----------
file_name: string or list of strings
file(s) to be read
extname:
Name for BinTableHDU

Returns
-------
effective_area: astropy.units.Quantity[area]
Effective area array of shape (n_energy_bins, n_fov_offset_bins) or (n_files, n_energy_bins, n_fov_offset_bins)
true_energy_bins: astropy.units.Quantity[energy]
Bin edges in true energy
fov_offset_bins: astropy.units.Quantity[angle]
Bin edges in the field of view offset.
"""

field_name = "EFFAREA"
effective_area = read_irf_grid(file_name, extname, field_name)
true_energy_bins, fov_offset_bins = read_fits_bins_lo_hi(file_name, extname, ["ENERG", "THETA"])
true_energy_bins = binning.join_bin_lo_hi(*true_energy_bins)
fov_offset_bins = binning.join_bin_lo_hi(*fov_offset_bins)
return effective_area, true_energy_bins, fov_offset_bins


def read_energy_dispersion_hdu(file_name, extname="EDISP"):
"""
Reads energy dispersion table from a FITS file

Parameters
----------
file_name: string or list of strings
file(s) to be read
extname:
Name for BinTableHDU

Returns
-------
energy_dispersion: numpy.ndarray
Energy dispersion array, with shape
(n_energy_bins, n_migra_bins, n_source_offset_bins)
or (n_files, n_energy_bins, n_migra_bins, n_source_offset_bins)
true_energy_bins: astropy.units.Quantity[energy]
Bin edges in true energy
migration_bins: numpy.ndarray
Bin edges for the relative energy migration (``reco_energy / true_energy``)
fov_offset_bins: astropy.units.Quantity[angle]
Bin edges in the field of view offset.
"""

field_name = "MATRIX"
energy_dispersion = read_irf_grid(file_name, extname, field_name)
last_axis = len(energy_dispersion.shape) - 1
energy_dispersion = np.swapaxes(energy_dispersion, last_axis - 2, last_axis)
maxnoe marked this conversation as resolved.
Show resolved Hide resolved
true_energy_bins, migration_bins, fov_offset_bins = read_fits_bins_lo_hi(file_name, extname, ["ENERG", "MIGRA", "THETA"])
true_energy_bins = binning.join_bin_lo_hi(*true_energy_bins)
migration_bins = binning.join_bin_lo_hi(*migration_bins)
fov_offset_bins = binning.join_bin_lo_hi(*fov_offset_bins)

return energy_dispersion, true_energy_bins, migration_bins, fov_offset_bins
79 changes: 78 additions & 1 deletion pyirf/io/tests/test_gadf.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
'''
Test export to GADF format
Test export to / import from GADF format
'''
import astropy.units as u
import numpy as np
from astropy.io import fits
from pyirf.io import gadf
import pytest
import tempfile

Expand Down Expand Up @@ -181,3 +182,79 @@ def test_rad_max_schema(rad_max_hdu):

_, hdu = rad_max_hdu
RAD_MAX.validate_hdu(hdu)


def test_compare_irf_cuts_files():
"""Test of cut consistency using real files: two same ones and one different."""
from pyirf.io.gadf import compare_irf_cuts_in_files
file1a = '../../../data/interp_test_data/pyirf_eventdisplay_68.fits.gz'
file1b = '../../../data/interp_test_data/pyirf_eventdisplay_68_copy.fits.gz'
file2 = '../../../data/interp_test_data/pyirf_eventdisplay_80.fits.gz'

assert compare_irf_cuts_in_files([file1a, file1b], 'THETA_CUTS')

assert compare_irf_cuts_in_files([file1a, file1b, file2], 'THETA_CUTS') is False

def test_read_irf_cuts():
"""Simple test of reading cuts from a file."""
from pyirf.io.gadf import read_irf_cuts
file1 = '../../../data/interp_test_data/pyirf_eventdisplay_68.fits.gz'
cuts = read_irf_cuts(file1)
# check if you get one set of cuts and the number of rows matches
assert len(cuts) == 1
assert cuts[0].as_array().shape==(212,)

# now for reading two files
cuts = read_irf_cuts([file1, file1])
assert len(cuts) == 2
assert cuts[1].as_array().shape==(212,)


def test_read_fits_bins_lo_hi():
"""Tests read_fits_bins_lo_hi on sample file."""
file_name = '../../../data/interp_test_data/pyirf_eventdisplay_68.fits.gz'
bin_lo, bin_hi = gadf.read_fits_bins_lo_hi(file_name, 'EFFECTIVE_AREA', 'ENERG')[0]

# check that the bins are not empty
assert len(bin_lo) > 0

# check if the right edge bin of one bin matches the start of the next one
# (allow for numerical precision of 1.e-5)
assert np.allclose(bin_lo[1:], bin_hi[:-1], rtol=1.e-5)


def test_read_irf_grid():
"""Tests read_irf_grid on a single file and on a list of files."""
file_name = '../../../data/interp_test_data/irf_file_prod3b-v2_North_z20_N_50h.fits'
extname = "EFFECTIVE AREA"
fname = "EFFAREA"
# check on a single file
aeff = gadf.read_irf_grid(file_name, extname=extname, field_name=fname)
assert aeff.shape == (6, 42)

# check on a list of files
aeff = gadf.read_irf_grid([file_name, file_name], extname=extname, field_name=fname)
assert aeff.shape == (2, 6, 42)


def test_read_aeff2d_hdu():
"""Test read_aeff2d_hdu function."""
file_name = '../../../data/interp_test_data/irf_file_prod3b-v2_North_z20_N_50h.fits'
aeff, e_bins, th_bins = gadf.read_aeff2d_hdu([file_name, file_name], extname="EFFECTIVE AREA")

# check if correct shapes are recovered from the file
assert aeff.shape == (2, 6, 42)
assert e_bins.shape == (43,)
assert th_bins.shape == (7,)


def test_read_energy_dispersion_hdu():
"""Test energy_dispersion_hdu function."""
file_name = '../../../data/interp_test_data/irf_file_prod3b-v2_North_z20_N_50h.fits'
edisp, e_bins, mig_bins, th_bins = gadf.read_energy_dispersion_hdu(file_name, extname="ENERGY DISPERSION")

# check if correct shapes are recovered from the file
assert edisp.shape == (500, 300, 6)
assert mig_bins.shape == (301,)
assert e_bins.shape == (501,)
assert th_bins.shape == (7,)