Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New readers from MM satellite group #112

Merged
merged 50 commits into from
Jun 13, 2024
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
Show all changes
50 commits
Select commit Hold shift + click to select a range
82d5565
Develop satellite (#29)
rrbuchholz May 4, 2023
f423e27
format
zmoon May 30, 2023
2ca5fc2
please flake8
zmoon May 30, 2023
db5c69a
No top-level h5py import; some formatting
zmoon May 30, 2023
85b0a00
Initial mop test
zmoon Jun 5, 2023
e0778ac
Test da extractor
zmoon Jun 5, 2023
c3b917c
better msg
zmoon Jun 5, 2023
793e923
Add h5py to dev env
zmoon Jun 5, 2023
a04f468
Merge branch 'noaa-oar-arl:develop' into develop
rrbuchholz Aug 10, 2023
2df1d76
Update monetio/sat/_tropomi_l2_no2_mm.py
rrbuchholz Aug 10, 2023
0fda080
Update monetio/sat/_omps_l3_mm.py
rrbuchholz Aug 10, 2023
38fc2f1
Update monetio/sat/_omps_l3_mm.py
rrbuchholz Aug 10, 2023
9598318
Update monetio/sat/_mopitt_l3_mm.py
rrbuchholz Aug 10, 2023
eb82711
Update monetio/sat/_mopitt_l3_mm.py
rrbuchholz Aug 10, 2023
fece70d
Update monetio/sat/_mopitt_l3_mm.py
rrbuchholz Aug 10, 2023
07eaf82
Adding units and long_name
rrbuchholz Aug 10, 2023
f90380f
Style; fix file list/str
zmoon Sep 11, 2023
961554f
Test MOPITT `open_dataset` too
zmoon Sep 11, 2023
86f8b15
Let exceptions be raised; check `varname`
zmoon Sep 11, 2023
be7a9a7
File lock for MOPITT test file
zmoon Sep 11, 2023
1e0bfad
Add filelock to dev deps
zmoon Sep 11, 2023
e643eb2
Alternate lock method
zmoon Sep 11, 2023
d74f9f8
Revert somewhat
zmoon Sep 11, 2023
0877a44
Merge remote-tracking branch 'noaa/develop' into ncar-develop
zmoon Sep 11, 2023
6d2cb70
cleanup
zmoon Sep 11, 2023
3655f4e
Merge branch 'noaa-oar-arl:develop' into develop
rrbuchholz Feb 22, 2024
7ba5a77
Merge branch 'noaa-oar-arl:develop' into develop
rrbuchholz Mar 7, 2024
f097e22
Modifications to Mopitt L3 reader (#31)
mebruckner Mar 7, 2024
3cc8a91
Merge branch 'noaa-oar-arl:develop' into develop
rrbuchholz Mar 21, 2024
afc51e3
Merge branch 'develop' into develop
zmoon May 3, 2024
2789f2e
Auto format
zmoon May 3, 2024
72febb9
No ##
zmoon May 3, 2024
be143a3
Allow single string varname
zmoon May 3, 2024
dc7a2de
Variable attrs instead
zmoon May 3, 2024
0648ffe
Sanity check
zmoon May 3, 2024
95cee34
sp
zmoon May 3, 2024
d6b5d59
Allow passing Path
zmoon May 5, 2024
9a141eb
dims and such; get time from data
zmoon May 5, 2024
bcb5b3c
Initial tropomi test
zmoon May 5, 2024
24c6ec9
Apply dataset fill value
zmoon May 5, 2024
b6a3f17
Check min thing works
zmoon May 5, 2024
198c51e
Only modify logging config in debug case
zmoon May 5, 2024
dabc84f
Update docstring
zmoon May 5, 2024
952b7ee
Switch y and x
zmoon May 5, 2024
33c7734
Add attrs; time as coord
zmoon May 5, 2024
f4f4580
Initial OMPS test using local files
zmoon May 5, 2024
aeb9296
Use uploaded data
zmoon May 5, 2024
10cd0fc
Back to single quality flag var
zmoon May 6, 2024
90bb5a8
Allow passing vn(s); doc variable_dict a bit
zmoon May 7, 2024
899e530
Merge branch 'noaa-oar-arl:develop' into develop
rrbuchholz May 30, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
._.DS_Store

docs/api/

tests/data/MOP*.he5

# Default GitHub .gitignore for Python below:

Expand Down
6 changes: 6 additions & 0 deletions monetio/sat/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
from . import (
_gridded_eos_mm,
_modis_l2_mm,
_mopitt_l3_mm,
_omps_l3_mm,
_omps_nadir_mm,
_tropomi_l2_no2_mm,
goes,
modis_ornl,
nesdis_edr_viirs,
Expand All @@ -10,9 +13,12 @@
)

__all__ = [
"_tropomi_l2_no2_mm",
"_gridded_eos_mm",
"_modis_l2_mm",
"_omps_nadir_mm",
"_omps_l3_mm",
"_mopitt_l3_mm",
"nesdis_edr_viirs",
"nesdis_eps_viirs",
"nesdis_frp",
Expand Down
147 changes: 147 additions & 0 deletions monetio/sat/_mopitt_l3_mm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
""" MOPITT gridded data File reader
updated 2022-10 rrb
* DataSet instead of DataArray
created 2021-12 rrb
"""
import glob

import pandas as pd
import xarray as xr


def getStartTime(filename):
rrbuchholz marked this conversation as resolved.
Show resolved Hide resolved
"""Method to read the time in MOPITT level 3 hdf files.

Parameters
----------
filename : str
Path to the file.

Returns
-------
pandas.Timestamp or None or 0
"""
import h5py

structure = "/HDFEOS/ADDITIONAL/FILE_ATTRIBUTES"
# print("READING FILE " + inFileName)
# fName = os.path.basename(filename)
rrbuchholz marked this conversation as resolved.
Show resolved Hide resolved

try:
inFile = h5py.File(filename, "r")
except Exception:
print("ERROR: CANNOT OPEN " + filename)
return 0
zmoon marked this conversation as resolved.
Show resolved Hide resolved

grp = inFile[structure]
k = grp.attrs
startTimeBytes = k.get("StartTime", default=None)
startTime = pd.to_datetime(startTimeBytes[0], unit="s", origin="1993-01-01 00:00:00")
# print("******************", startTime)
rrbuchholz marked this conversation as resolved.
Show resolved Hide resolved

try:
inFile.close()
except Exception:
print("ERROR CANNOT CLOSE " + filename)
return 0
zmoon marked this conversation as resolved.
Show resolved Hide resolved

return startTime


def loadAndExtractGriddedHDF(filename, varname):
zmoon marked this conversation as resolved.
Show resolved Hide resolved
"""Method to open MOPITT gridded hdf files.
Masks data that is missing (turns into np.nan).

Parameters
----------
filename : str
Path to the file.
varname : str
The variable to load from the MOPITT file.

Returns
-------
xarray.Dataset
"""
import h5py

# initialize into dataset
ds = xr.Dataset()

# load the dimensions
he5_load = h5py.File(filename, mode="r")
lat = he5_load["/HDFEOS/GRIDS/MOP03/Data Fields/Latitude"][:]
lon = he5_load["/HDFEOS/GRIDS/MOP03/Data Fields/Longitude"][:]
alt = he5_load["/HDFEOS/GRIDS/MOP03/Data Fields/Pressure2"][:]
alt_short = he5_load["/HDFEOS/GRIDS/MOP03/Data Fields/Pressure"][:]

# 2D or 3D variables to choose from
variable_dict = {
"column": "/HDFEOS/GRIDS/MOP03/Data Fields/RetrievedCOTotalColumnDay",
"apriori_col": "/HDFEOS/GRIDS/MOP03/Data Fields/APrioriCOTotalColumnDay",
"apriori_surf": "/HDFEOS/GRIDS/MOP03/Data Fields/APrioriCOSurfaceMixingRatioDay",
"pressure_surf": "/HDFEOS/GRIDS/MOP03/Data Fields/SurfacePressureDay",
"ak_col": "/HDFEOS/GRIDS/MOP03/Data Fields/TotalColumnAveragingKernelDay",
"ak_prof": "/HDFEOS/GRIDS/MOP03/Data Fields/APrioriCOMixingRatioProfileDay",
}
try:
data_loaded = he5_load[variable_dict[varname]][:]
except Exception:
print("ERROR: Cannot load " + varname + " from " + filename)
return 0
zmoon marked this conversation as resolved.
Show resolved Hide resolved

he5_load.close()

# DEBEG
# print(data_loaded.shape)

# create xarray DataArray
if (
varname == "column"
or varname == "apriori_col"
or varname == "apriori_surf"
or varname == "pressure_surf"
):
ds[varname] = xr.DataArray(data_loaded, dims=["lon", "lat"], coords=[lon, lat])
# missing value -> nan
ds[varname] = ds[varname].where(ds[varname] != -9999.0)
elif varname == "ak_col":
ds[varname] = xr.DataArray(data_loaded, dims=["lon", "lat", "alt"], coords=[lon, lat, alt])
elif varname == "apriori_prof":
ds[varname] = xr.DataArray(
data_loaded, dims=["lon", "lat", "alt"], coords=[lon, lat, alt_short]
)

return ds
zmoon marked this conversation as resolved.
Show resolved Hide resolved


def read_mopittdataset(files, varname):
zmoon marked this conversation as resolved.
Show resolved Hide resolved
"""Loop through files to open the MOPITT level 3 data.

Parameters
----------
files : str or list of str
The full path to the file or files. Can take a file template with wildcard (*) symbol.
zmoon marked this conversation as resolved.
Show resolved Hide resolved
varname : str
The variable to load from the MOPITT file.

Returns
-------
xarray.Dataset
"""

count = 0
filelist = sorted(glob.glob(files, recursive=False))

for filename in filelist:
print(filename)
data = loadAndExtractGriddedHDF(filename, varname)
time = getStartTime(filename)
data = data.expand_dims(axis=0, time=[time])
if count == 0:
zmoon marked this conversation as resolved.
Show resolved Hide resolved
full_dataset = data
count += 1
else:
full_dataset = xr.concat([full_dataset, data], "time")
zmoon marked this conversation as resolved.
Show resolved Hide resolved

return full_dataset
72 changes: 72 additions & 0 deletions monetio/sat/_omps_l3_mm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
def read_OMPS_l3(files):
zmoon marked this conversation as resolved.
Show resolved Hide resolved
"""Loop to open OMPS nadir mapper Total Column Ozone L3 files.
Parameters:
-----------
files: string or list of strings

returns xarray Dataset.
rrbuchholz marked this conversation as resolved.
Show resolved Hide resolved
"""
from glob import glob

import numpy as np
import xarray as xr

start_dataset = True
times = []
filelist = sorted(glob(files, recursive=False))

for filename in filelist:
data = extract_OMPS_l3(filename)
times.append(data.attrs["time"])
del data.attrs["time"]
if start_dataset:
data_array = data
start_dataset = False
else:
data_array = xr.concat([data_array, data], "time")
data_array["time"] = (("time"), np.array(times))
data_array = data_array.reset_coords().set_coords(["latitude", "longitude", "time"])
return data_array


def extract_OMPS_l3(fname):
"""Read locally stored NASA Suomi NPP OMPS Level 3 Nadir Mapper TO3 files
Parameters
----------
fname: string
fname is local path to h5 file

Returns
-------
ds: xarray dataset

rrbuchholz marked this conversation as resolved.
Show resolved Hide resolved
"""
import h5py
import numpy as np
import pandas as pd
import xarray as xr

with h5py.File(fname, "r") as f:
lat = f["Latitude"][:]
lon = f["Longitude"][:]
column = f["ColumnAmountO3"][:]
cloud_fraction = f["RadiativeCloudFraction"][:]
time = pd.to_datetime(f.attrs.__getitem__("Date").decode("UTF-8"), format="%Y-%m-%d")

# remove cloudy scenes and points with no data (eg. polar dark zone)
column[(column < 0)] = np.nan
column[(cloud_fraction > 0.3)] = np.nan
lon_2d, lat_2d = np.meshgrid(lon, lat)

ds = xr.Dataset(
{
"ozone_column": (["time", "x", "y"], column[None, :, :]),
},
coords={
"longitude": (["x", "y"], lon_2d),
"latitude": (["x", "y"], lat_2d),
},
attrs={"time": time},
)

return ds
138 changes: 138 additions & 0 deletions monetio/sat/_tropomi_l2_no2_mm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
# Reading TROPOMI L2 NO2 data

import logging
import os
import sys
from collections import OrderedDict
from glob import glob

import numpy as np
import xarray as xr
from netCDF4 import Dataset

# from monetio.hdf.hdfio import hdf_open, hdf_close, hdf_list, hdf_read


def read_dataset(fname, variable_dict):
"""
Parameters
__________
rrbuchholz marked this conversation as resolved.
Show resolved Hide resolved
fname : str
Input file path.

Returns
_______
xarray.Dataset
"""
print("reading " + fname)

ds = xr.Dataset()

dso = Dataset(fname, "r")

longitude = dso.groups["PRODUCT"]["longitude"]
latitude = dso.groups["PRODUCT"]["latitude"]
start_time = dso.groups["PRODUCT"]["time"]

# squeeze 1-dimension
longitude = np.squeeze(longitude)
latitude = np.squeeze(latitude)
start_time = np.squeeze(start_time)

ds["lon"] = xr.DataArray(longitude)
ds["lat"] = xr.DataArray(latitude)

for varname in variable_dict:
print(varname)
values = dso.groups["PRODUCT"][varname]
# squeeze out 1-dimension
values = np.squeeze(values)

if "fillvalue" in variable_dict[varname]:
fillvalue = variable_dict[varname]["fillvalue"]
values[:][values[:] == fillvalue] = np.nan

if "scale" in variable_dict[varname]:
values[:] = variable_dict[varname]["scale"] * values[:]

if "minimum" in variable_dict[varname]:
minimum = variable_dict[varname]["minimum"]
values[:][values[:] < minimum] = np.nan

if "maximum" in variable_dict[varname]:
maximum = variable_dict[varname]["maximum"]
values[:][values[:] > maximum] = np.nan

ds[varname] = xr.DataArray(values)

if "quality_flag_min" in variable_dict[varname]:
ds.attrs["quality_flag"] = varname
ds.attrs["quality_thresh_min"] = variable_dict[varname]["quality_flag_min"]
Comment on lines +90 to +91
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These are dataset attrs so they will be overwritten if there is more than one variable in variable_dict.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

True. This is an old version of the TROPOMI reader. I add a new dataset attribute 'var_applied' to define the variables list for each quality flag in the new reader

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@mlirenzhenmayi I don't see this in the current version of the code.


dso.close()

return ds


def apply_quality_flag(ds):
"""
Parameters
__________
ds : xarray.Dataset
"""
if "quality_flag" in ds.attrs:
quality_flag = ds[ds.attrs["quality_flag"]]
quality_thresh_min = ds.attrs["quality_thresh_min"]

# apply the quality thresh minimum to all variables in ds

for varname in ds:
print(varname)
if varname != ds.attrs["quality_flag"]:
logging.debug(varname)
values = ds[varname].values

values[quality_flag <= quality_thresh_min] = np.nan
ds[varname].values = values


def read_trpdataset(fnames, variable_dict, debug=False):
"""
Parameters
__________
fnames : str
Regular expression for input file paths.

Returns
_______
xarray.Dataset
"""
if debug:
logging_level = logging.DEBUG
else:
logging_level = logging.INFO
logging.basicConfig(stream=sys.stdout, level=logging_level)

for subpath in fnames.split("/"):
if "$" in subpath:
envvar = subpath.replace("$", "")
envval = os.getenv(envvar)
if envval is None:
print("environment variable not defined: " + subpath)
exit(1)
else:
fnames = fnames.replace(subpath, envval)

print(fnames)
files = sorted(glob(fnames))
granules = OrderedDict()
for file in files:
granule = read_dataset(file, variable_dict)
apply_quality_flag(granule)
granule_str = file.split("/")[-1]
granule_info = granule_str.split("____")

datetime_str = granule_info[1][0:8]
granules[datetime_str] = granule

return granules
Loading