Remove PR old association #63
41 fail, 547 pass in 35m 57s
588 tests 547 ✅ 35m 57s ⏱️
1 suites 0 💤
1 files 41 ❌
Results for commit facdaab.
Annotations
Check warning on line 0 in tests.verify_collection
github-actions / Regression test results for ops
test_spatial_subset[C2832224417-POCLOUD] (tests.verify_collection) failed
test-results/ops_test_report.xml [took 1m 22s]
Raw output
Failed: Unable to find latitude and longitude variables.
collection_concept_id = 'C2832224417-POCLOUD', env = 'ops'
granule_json = {'meta': {'collection-concept-id': 'C2832224417-POCLOUD', 'concept-id': 'G3215912666-POCLOUD', 'concept-type': 'granul...pecification': {'Name': 'UMM-G', 'URL': 'https://cdn.earthdata.nasa.gov/umm/granule/v1.6.6', 'Version': '1.6.6'}, ...}}
collection_variables = [{'associations': {'collections': [{'concept-id': 'C2832224417-POCLOUD'}]}, 'meta': {'association-details': {'collecti...me': 'look', 'Size': 2, 'Type': 'OTHER'}], 'FillValues': [{'Type': 'SCIENCE_FILLVALUE', 'Value': -9999.0}], ...}}, ...]
harmony_env = <Environment.PROD: 4>
tmp_path = PosixPath('/tmp/pytest-of-runner/pytest-0/popen-gw4/test_spatial_subset_C2832224410')
bearer_token = 'eyJ0eXAiOiJKV1QiLCJvcmlnaW4iOiJFYXJ0aGRhdGEgTG9naW4iLCJzaWciOiJlZGxqd3RwdWJrZXlfb3BzIiwiYWxnIjoiUlMyNTYifQ.eyJ0eXBlIj...Jb1BnM7KDdEqo_3EoDdVhQCL6YyPeFO5phn_VtkdRRgce7fIpgz79Xenaj8C_tgpF1XCp4JT2t834vHhVXrf_lRaldMpl7WxjRzMfWMdoxY5ZFahb_B-Yw'
@pytest.mark.timeout(600)
def test_spatial_subset(collection_concept_id, env, granule_json, collection_variables,
harmony_env, tmp_path: pathlib.Path, bearer_token):
test_spatial_subset.__doc__ = f"Verify spatial subset for {collection_concept_id} in {env}"
logging.info("Using granule %s for test", granule_json['meta']['concept-id'])
# Compute a box that is smaller than the granule extent bounding box
north, south, east, west = get_bounding_box(granule_json)
east, west, north, south = create_smaller_bounding_box(east, west, north, south, .95)
start_time = granule_json['umm']["TemporalExtent"]["RangeDateTime"]["BeginningDateTime"]
end_time = granule_json['umm']["TemporalExtent"]["RangeDateTime"]["EndingDateTime"]
# Build harmony request
harmony_client = harmony.Client(env=harmony_env, token=bearer_token)
request_bbox = harmony.BBox(w=west, s=south, e=east, n=north)
request_collection = harmony.Collection(id=collection_concept_id)
harmony_request = harmony.Request(collection=request_collection, spatial=request_bbox,
granule_id=[granule_json['meta']['concept-id']])
logging.info("Sending harmony request %s", harmony_client.request_as_url(harmony_request))
# Submit harmony request and download result
job_id = harmony_client.submit(harmony_request)
logging.info("Submitted harmony job %s", job_id)
harmony_client.wait_for_processing(job_id, show_progress=True)
subsetted_filepath = None
for filename in [file_future.result()
for file_future
in harmony_client.download_all(job_id, directory=f'{tmp_path}', overwrite=True)]:
logging.info(f'Downloaded: %s', filename)
subsetted_filepath = pathlib.Path(filename)
# Verify spatial subset worked
subsetted_ds = xarray.open_dataset(subsetted_filepath, decode_times=False)
group = None
# Try to read group in file
> lat_var_name, lon_var_name = get_lat_lon_var_names(subsetted_ds, subsetted_filepath, collection_variables, collection_concept_id)
verify_collection.py:406:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
dataset = <xarray.Dataset> Size: 232B
Dimensions: (ydim_grid: 1, xdim_grid: 1, look: 1,
... -0.43
history_json: [{"date_time": "2024-...
file_to_subset = PosixPath('/tmp/pytest-of-runner/pytest-0/popen-gw4/test_spatial_subset_C2832224410/76528933_RSS_SMAP_SSS_L2C_r51135_20240827T223619_2024240_NRT_V06.0_001.nc4')
collection_variable_list = [{'associations': {'collections': [{'concept-id': 'C2832224417-POCLOUD'}]}, 'meta': {'association-details': {'collecti...me': 'look', 'Size': 2, 'Type': 'OTHER'}], 'FillValues': [{'Type': 'SCIENCE_FILLVALUE', 'Value': -9999.0}], ...}}, ...]
collection_concept_id = 'C2832224417-POCLOUD'
def get_lat_lon_var_names(dataset: xarray.Dataset, file_to_subset: str, collection_variable_list: List[Dict], collection_concept_id: str):
# Try getting it from UMM-Var first
lat_var_json, lon_var_json, _ = get_coordinate_vars_from_umm(collection_variable_list)
lat_var_name = get_variable_name_from_umm_json(lat_var_json)
lon_var_name = get_variable_name_from_umm_json(lon_var_json)
if lat_var_name and lon_var_name:
return lat_var_name, lon_var_name
logging.warning("Unable to find lat/lon vars in UMM-Var")
# If that doesn't work, try using cf-xarray to infer lat/lon variable names
try:
latitude = [lat for lat in dataset.cf.coordinates['latitude']
if lat.lower() in VALID_LATITUDE_VARIABLE_NAMES][0]
longitude = [lon for lon in dataset.cf.coordinates['longitude']
if lon.lower() in VALID_LONGITUDE_VARIABLE_NAMES][0]
return latitude, longitude
except:
logging.warning("Unable to find lat/lon vars using cf_xarray")
# If that still doesn't work, try using l2ss-py directly
try:
# file not able to be flattened unless locally downloaded
filename = f'my_copy_file_{collection_concept_id}.nc'
shutil.copy(file_to_subset, filename)
nc_dataset = netCDF4.Dataset(filename, mode='r+')
# flatten the dataset
nc_dataset_flattened = podaac.subsetter.group_handling.transform_grouped_dataset(nc_dataset, filename)
args = {
'decode_coords': False,
'mask_and_scale': False,
'decode_times': False
}
with xarray.open_dataset(
xarray.backends.NetCDF4DataStore(nc_dataset_flattened),
**args
) as flat_dataset:
# use l2ss-py to find lat and lon names
lat_var_names, lon_var_names = podaac.subsetter.subset.compute_coordinate_variable_names(flat_dataset)
os.remove(filename)
if lat_var_names and lon_var_names:
lat_var_name = lat_var_names.split('__')[-1] if isinstance(lat_var_names, str) else lat_var_names[0].split('__')[-1]
lon_var_name = lon_var_names.split('__')[-1] if isinstance(lon_var_names, str) else lon_var_names[0].split('__')[-1]
return lat_var_name, lon_var_name
except ValueError:
logging.warning("Unable to find lat/lon vars using l2ss-py")
# Still no dice, try using the 'units' variable attribute
for coord_name, coord in dataset.coords.items():
if 'units' not in coord.attrs:
continue
if coord.attrs['units'] == 'degrees_north' and lat_var_name is None:
lat_var_name = coord_name
if coord.attrs['units'] == 'degrees_east' and lon_var_name is None:
lon_var_name = coord_name
if lat_var_name and lon_var_name:
return lat_var_name, lon_var_name
else:
logging.warning("Unable to find lat/lon vars using 'units' attribute")
# Out of options, fail the test because we couldn't determine lat/lon variables
> pytest.fail(f"Unable to find latitude and longitude variables.")
E Failed: Unable to find latitude and longitude variables.
verify_collection.py:359: Failed
--------------------------------- Captured Log ---------------------------------
INFO root:verify_collection.py:373 Using granule G3215912666-POCLOUD for test
INFO root:verify_collection.py:389 Sending harmony request https://harmony.earthdata.nasa.gov/C2832224417-POCLOUD/ogc-api-coverages/1.0.0/collections/all/coverage/rangeset?forceAsync=true&subset=lat%28-83.83352500000001%3A21.368524999999998%29&subset=lon%284.518525000000011%3A175.500475%29&granuleId=G3215912666-POCLOUD
INFO root:verify_collection.py:393 Submitted harmony job 142ca2cf-667d-4050-a728-fb6ddaf8aebf
INFO root:verify_collection.py:399 Downloaded: /tmp/pytest-of-runner/pytest-0/popen-gw4/test_spatial_subset_C2832224410/76528933_RSS_SMAP_SSS_L2C_r51135_20240827T223619_2024240_NRT_V06.0_001.nc4
WARNING root:verify_collection.py:302 Unable to find lat/lon vars in UMM-Var
WARNING root:verify_collection.py:312 Unable to find lat/lon vars using cf_xarray
WARNING root:verify_collection.py:343 Unable to find lat/lon vars using l2ss-py
WARNING root:verify_collection.py:356 Unable to find lat/lon vars using 'units' attribute
Check warning on line 0 in tests.verify_collection
github-actions / Regression test results for ops
test_spatial_subset[C1918210023-GES_DISC] (tests.verify_collection) failed
test-results/ops_test_report.xml [took 1m 52s]
Raw output
IndexError: list index out of range
collection_concept_id = 'C1918210023-GES_DISC', env = 'ops'
granule_json = {'meta': {'collection-concept-id': 'C1918210023-GES_DISC', 'concept-id': 'G3216208137-GES_DISC', 'concept-type': 'gran...pecification': {'Name': 'UMM-G', 'URL': 'https://cdn.earthdata.nasa.gov/umm/granule/v1.6.6', 'Version': '1.6.6'}, ...}}
collection_variables = [{'associations': {'collections': [{'concept-id': 'C1918210023-GES_DISC'}]}, 'meta': {'association-details': {'collect...RL': 'https://cdn.earthdata.nasa.gov/umm/variable/v1.9.0', 'Version': '1.9.0'}, 'Name': 'PRODUCT/qa_value', ...}}, ...]
harmony_env = <Environment.PROD: 4>
tmp_path = PosixPath('/tmp/pytest-of-runner/pytest-0/popen-gw9/test_spatial_subset_C1918210020')
bearer_token = 'eyJ0eXAiOiJKV1QiLCJvcmlnaW4iOiJFYXJ0aGRhdGEgTG9naW4iLCJzaWciOiJlZGxqd3RwdWJrZXlfb3BzIiwiYWxnIjoiUlMyNTYifQ.eyJ0eXBlIj...Jb1BnM7KDdEqo_3EoDdVhQCL6YyPeFO5phn_VtkdRRgce7fIpgz79Xenaj8C_tgpF1XCp4JT2t834vHhVXrf_lRaldMpl7WxjRzMfWMdoxY5ZFahb_B-Yw'
@pytest.mark.timeout(600)
def test_spatial_subset(collection_concept_id, env, granule_json, collection_variables,
harmony_env, tmp_path: pathlib.Path, bearer_token):
test_spatial_subset.__doc__ = f"Verify spatial subset for {collection_concept_id} in {env}"
logging.info("Using granule %s for test", granule_json['meta']['concept-id'])
# Compute a box that is smaller than the granule extent bounding box
north, south, east, west = get_bounding_box(granule_json)
east, west, north, south = create_smaller_bounding_box(east, west, north, south, .95)
start_time = granule_json['umm']["TemporalExtent"]["RangeDateTime"]["BeginningDateTime"]
end_time = granule_json['umm']["TemporalExtent"]["RangeDateTime"]["EndingDateTime"]
# Build harmony request
harmony_client = harmony.Client(env=harmony_env, token=bearer_token)
request_bbox = harmony.BBox(w=west, s=south, e=east, n=north)
request_collection = harmony.Collection(id=collection_concept_id)
harmony_request = harmony.Request(collection=request_collection, spatial=request_bbox,
granule_id=[granule_json['meta']['concept-id']])
logging.info("Sending harmony request %s", harmony_client.request_as_url(harmony_request))
# Submit harmony request and download result
job_id = harmony_client.submit(harmony_request)
logging.info("Submitted harmony job %s", job_id)
harmony_client.wait_for_processing(job_id, show_progress=True)
subsetted_filepath = None
for filename in [file_future.result()
for file_future
in harmony_client.download_all(job_id, directory=f'{tmp_path}', overwrite=True)]:
logging.info(f'Downloaded: %s', filename)
subsetted_filepath = pathlib.Path(filename)
# Verify spatial subset worked
subsetted_ds = xarray.open_dataset(subsetted_filepath, decode_times=False)
group = None
# Try to read group in file
lat_var_name, lon_var_name = get_lat_lon_var_names(subsetted_ds, subsetted_filepath, collection_variables, collection_concept_id)
lat_var_name = lat_var_name.split('/')[-1]
lon_var_name = lon_var_name.split('/')[-1]
with netCDF4.Dataset(subsetted_filepath) as f:
group_list = []
def group_walk(groups, nc_d, current_group):
global subsetted_ds_new
subsetted_ds_new = None
# check if the top group has lat or lon variable
if lat_var_name in list(nc_d.variables.keys()):
subsetted_ds_new = subsetted_ds
else:
# if not then we'll need to keep track of the group layers
group_list.append(current_group)
# loop through the groups in the current layer
for g in groups:
# end the loop if we've already found latitude
if subsetted_ds_new:
break
# check if the groups have latitude, define the dataset and end the loop if found
if lat_var_name in list(nc_d.groups[g].variables.keys()):
group_list.append(g)
lat_group = '/'.join(group_list)
subsetted_ds_new = xarray.open_dataset(subsetted_filepath, group=lat_group, decode_times=False)
# add a science variable to the dataset if other groups are in the lat/lon group
# some GPM collections won't have any other variables in the same group as lat/lon
if len(list(nc_d.groups[g].groups.keys())) > 0:
data_group = [v for v in list(nc_d.groups[g].groups.keys()) if 'time' not in str(v).lower()][0]
g_data = lat_group+'/'+data_group
subsetted_ds_data = xarray.open_dataset(subsetted_filepath, group=g_data, decode_times=False)
sci_var = list(subsetted_ds_data.variables.keys())[0]
subsetted_ds_new['science_test'] = subsetted_ds_data[sci_var]
break
# recall the function on a group that has groups in it and didn't find latitude
# this is going 'deeper' into the groups
if len(list(nc_d.groups[g].groups.keys())) > 0:
group_walk(nc_d.groups[g].groups, nc_d.groups[g], g)
else:
continue
> group_walk(f.groups, f, '')
verify_collection.py:448:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
groups = {'METADATA': <[AttributeError('NetCDF: Not a valid ID') raised in repr()] Group object at 0x7f9dabd29640>, 'PRODUCT': <[RuntimeError('NetCDF: Not a valid ID') raised in repr()] Group object at 0x7f9dabd28f40>}
nc_d = <[AttributeError('NetCDF: Not a valid ID') raised in repr()] Dataset object at 0x7f9dabd28e40>
current_group = ''
def group_walk(groups, nc_d, current_group):
global subsetted_ds_new
subsetted_ds_new = None
# check if the top group has lat or lon variable
if lat_var_name in list(nc_d.variables.keys()):
subsetted_ds_new = subsetted_ds
else:
# if not then we'll need to keep track of the group layers
group_list.append(current_group)
# loop through the groups in the current layer
for g in groups:
# end the loop if we've already found latitude
if subsetted_ds_new:
break
# check if the groups have latitude, define the dataset and end the loop if found
if lat_var_name in list(nc_d.groups[g].variables.keys()):
group_list.append(g)
lat_group = '/'.join(group_list)
subsetted_ds_new = xarray.open_dataset(subsetted_filepath, group=lat_group, decode_times=False)
# add a science variable to the dataset if other groups are in the lat/lon group
# some GPM collections won't have any other variables in the same group as lat/lon
if len(list(nc_d.groups[g].groups.keys())) > 0:
data_group = [v for v in list(nc_d.groups[g].groups.keys()) if 'time' not in str(v).lower()][0]
g_data = lat_group+'/'+data_group
subsetted_ds_data = xarray.open_dataset(subsetted_filepath, group=g_data, decode_times=False)
> sci_var = list(subsetted_ds_data.variables.keys())[0]
E IndexError: list index out of range
verify_collection.py:438: IndexError
--------------------------------- Captured Log ---------------------------------
INFO root:verify_collection.py:373 Using granule G3216208137-GES_DISC for test
INFO root:verify_collection.py:389 Sending harmony request https://harmony.earthdata.nasa.gov/C1918210023-GES_DISC/ogc-api-coverages/1.0.0/collections/all/coverage/rangeset?forceAsync=true&subset=lat%28-85.242425%3A-67.474575%29&subset=lon%28-98.62950000000001%3A78.69750000000002%29&granuleId=G3216208137-GES_DISC
INFO root:verify_collection.py:393 Submitted harmony job 14c6efeb-56f0-4edd-9d02-9bbf38181197
INFO root:verify_collection.py:399 Downloaded: /tmp/pytest-of-runner/pytest-0/popen-gw9/test_spatial_subset_C1918210020/76528935_S5P_OFFL_L2_HCHO_20240826T133706_20240826T151836_35599_03_020601_20240828T060359_subsetted.nc4
Check warning on line 0 in tests.verify_collection
github-actions / Regression test results for ops
test_spatial_subset[C1627516298-GES_DISC] (tests.verify_collection) failed
test-results/ops_test_report.xml [took 40s]
Raw output
IndexError: list index out of range
collection_concept_id = 'C1627516298-GES_DISC', env = 'ops'
granule_json = {'meta': {'collection-concept-id': 'C1627516298-GES_DISC', 'concept-id': 'G2087797426-GES_DISC', 'concept-type': 'gran...pecification': {'Name': 'UMM-G', 'URL': 'https://cdn.earthdata.nasa.gov/umm/granule/v1.6.6', 'Version': '1.6.6'}, ...}}
collection_variables = [{'associations': {'collections': [{'concept-id': 'C1627516298-GES_DISC'}]}, 'meta': {'association-details': {'collect..., 'Version': '1.9.0'}, 'Name': 'METADATA/QA_STATISTICS/nitrogendioxide_tropospheric_column_histogram_axis', ...}}, ...]
harmony_env = <Environment.PROD: 4>
tmp_path = PosixPath('/tmp/pytest-of-runner/pytest-0/popen-gw8/test_spatial_subset_C1627516290')
bearer_token = 'eyJ0eXAiOiJKV1QiLCJvcmlnaW4iOiJFYXJ0aGRhdGEgTG9naW4iLCJzaWciOiJlZGxqd3RwdWJrZXlfb3BzIiwiYWxnIjoiUlMyNTYifQ.eyJ0eXBlIj...Jb1BnM7KDdEqo_3EoDdVhQCL6YyPeFO5phn_VtkdRRgce7fIpgz79Xenaj8C_tgpF1XCp4JT2t834vHhVXrf_lRaldMpl7WxjRzMfWMdoxY5ZFahb_B-Yw'
@pytest.mark.timeout(600)
def test_spatial_subset(collection_concept_id, env, granule_json, collection_variables,
harmony_env, tmp_path: pathlib.Path, bearer_token):
test_spatial_subset.__doc__ = f"Verify spatial subset for {collection_concept_id} in {env}"
logging.info("Using granule %s for test", granule_json['meta']['concept-id'])
# Compute a box that is smaller than the granule extent bounding box
north, south, east, west = get_bounding_box(granule_json)
east, west, north, south = create_smaller_bounding_box(east, west, north, south, .95)
start_time = granule_json['umm']["TemporalExtent"]["RangeDateTime"]["BeginningDateTime"]
end_time = granule_json['umm']["TemporalExtent"]["RangeDateTime"]["EndingDateTime"]
# Build harmony request
harmony_client = harmony.Client(env=harmony_env, token=bearer_token)
request_bbox = harmony.BBox(w=west, s=south, e=east, n=north)
request_collection = harmony.Collection(id=collection_concept_id)
harmony_request = harmony.Request(collection=request_collection, spatial=request_bbox,
granule_id=[granule_json['meta']['concept-id']])
logging.info("Sending harmony request %s", harmony_client.request_as_url(harmony_request))
# Submit harmony request and download result
job_id = harmony_client.submit(harmony_request)
logging.info("Submitted harmony job %s", job_id)
harmony_client.wait_for_processing(job_id, show_progress=True)
subsetted_filepath = None
for filename in [file_future.result()
for file_future
in harmony_client.download_all(job_id, directory=f'{tmp_path}', overwrite=True)]:
logging.info(f'Downloaded: %s', filename)
subsetted_filepath = pathlib.Path(filename)
# Verify spatial subset worked
subsetted_ds = xarray.open_dataset(subsetted_filepath, decode_times=False)
group = None
# Try to read group in file
lat_var_name, lon_var_name = get_lat_lon_var_names(subsetted_ds, subsetted_filepath, collection_variables, collection_concept_id)
lat_var_name = lat_var_name.split('/')[-1]
lon_var_name = lon_var_name.split('/')[-1]
with netCDF4.Dataset(subsetted_filepath) as f:
group_list = []
def group_walk(groups, nc_d, current_group):
global subsetted_ds_new
subsetted_ds_new = None
# check if the top group has lat or lon variable
if lat_var_name in list(nc_d.variables.keys()):
subsetted_ds_new = subsetted_ds
else:
# if not then we'll need to keep track of the group layers
group_list.append(current_group)
# loop through the groups in the current layer
for g in groups:
# end the loop if we've already found latitude
if subsetted_ds_new:
break
# check if the groups have latitude, define the dataset and end the loop if found
if lat_var_name in list(nc_d.groups[g].variables.keys()):
group_list.append(g)
lat_group = '/'.join(group_list)
subsetted_ds_new = xarray.open_dataset(subsetted_filepath, group=lat_group, decode_times=False)
# add a science variable to the dataset if other groups are in the lat/lon group
# some GPM collections won't have any other variables in the same group as lat/lon
if len(list(nc_d.groups[g].groups.keys())) > 0:
data_group = [v for v in list(nc_d.groups[g].groups.keys()) if 'time' not in str(v).lower()][0]
g_data = lat_group+'/'+data_group
subsetted_ds_data = xarray.open_dataset(subsetted_filepath, group=g_data, decode_times=False)
sci_var = list(subsetted_ds_data.variables.keys())[0]
subsetted_ds_new['science_test'] = subsetted_ds_data[sci_var]
break
# recall the function on a group that has groups in it and didn't find latitude
# this is going 'deeper' into the groups
if len(list(nc_d.groups[g].groups.keys())) > 0:
group_walk(nc_d.groups[g].groups, nc_d.groups[g], g)
else:
continue
> group_walk(f.groups, f, '')
verify_collection.py:448:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
groups = {'METADATA': <[AttributeError('NetCDF: Not a valid ID') raised in repr()] Group object at 0x7f2141399c40>, 'PRODUCT': <[RuntimeError('NetCDF: Not a valid ID') raised in repr()] Group object at 0x7f2141399740>}
nc_d = <[AttributeError('NetCDF: Not a valid ID') raised in repr()] Dataset object at 0x7f2141399540>
current_group = ''
def group_walk(groups, nc_d, current_group):
global subsetted_ds_new
subsetted_ds_new = None
# check if the top group has lat or lon variable
if lat_var_name in list(nc_d.variables.keys()):
subsetted_ds_new = subsetted_ds
else:
# if not then we'll need to keep track of the group layers
group_list.append(current_group)
# loop through the groups in the current layer
for g in groups:
# end the loop if we've already found latitude
if subsetted_ds_new:
break
# check if the groups have latitude, define the dataset and end the loop if found
if lat_var_name in list(nc_d.groups[g].variables.keys()):
group_list.append(g)
lat_group = '/'.join(group_list)
subsetted_ds_new = xarray.open_dataset(subsetted_filepath, group=lat_group, decode_times=False)
# add a science variable to the dataset if other groups are in the lat/lon group
# some GPM collections won't have any other variables in the same group as lat/lon
if len(list(nc_d.groups[g].groups.keys())) > 0:
data_group = [v for v in list(nc_d.groups[g].groups.keys()) if 'time' not in str(v).lower()][0]
g_data = lat_group+'/'+data_group
subsetted_ds_data = xarray.open_dataset(subsetted_filepath, group=g_data, decode_times=False)
> sci_var = list(subsetted_ds_data.variables.keys())[0]
E IndexError: list index out of range
verify_collection.py:438: IndexError
--------------------------------- Captured Log ---------------------------------
INFO root:verify_collection.py:373 Using granule G2087797426-GES_DISC for test
INFO root:verify_collection.py:389 Sending harmony request https://harmony.earthdata.nasa.gov/C1627516298-GES_DISC/ogc-api-coverages/1.0.0/collections/all/coverage/rangeset?forceAsync=true&subset=lat%28-76.82889999999999%3A-59.7251%29&subset=lon%28-77.22455%3A-1.6634499999999974%29&granuleId=G2087797426-GES_DISC
INFO root:verify_collection.py:393 Submitted harmony job a89bddb0-0cf2-462d-a31c-76b5fe45e780
INFO root:verify_collection.py:399 Downloaded: /tmp/pytest-of-runner/pytest-0/popen-gw8/test_spatial_subset_C1627516290/76528955_S5P_OFFL_L2_NO2_20210701T170324_20210701T184453_19257_01_010400_20210703T102341_subsetted.nc4
Check warning on line 0 in tests.verify_collection
github-actions / Regression test results for ops
test_spatial_subset[C2936721448-POCLOUD] (tests.verify_collection) failed
test-results/ops_test_report.xml [took 27s]
Raw output
Failed: Unable to find latitude and longitude variables.
collection_concept_id = 'C2936721448-POCLOUD', env = 'ops'
granule_json = {'meta': {'collection-concept-id': 'C2936721448-POCLOUD', 'concept-id': 'G3062447313-POCLOUD', 'concept-type': 'granul...pecification': {'Name': 'UMM-G', 'URL': 'https://cdn.earthdata.nasa.gov/umm/granule/v1.6.6', 'Version': '1.6.6'}, ...}}
collection_variables = [{'associations': {'collections': [{'concept-id': 'C2936721448-POCLOUD'}]}, 'meta': {'association-details': {'collecti...rization_2', 'Size': 2, 'Type': 'OTHER'}], 'FillValues': [{'Type': 'SCIENCE_FILLVALUE', 'Value': -9999.0}], ...}}, ...]
harmony_env = <Environment.PROD: 4>
tmp_path = PosixPath('/tmp/pytest-of-runner/pytest-0/popen-gw9/test_spatial_subset_C2936721440')
bearer_token = 'eyJ0eXAiOiJKV1QiLCJvcmlnaW4iOiJFYXJ0aGRhdGEgTG9naW4iLCJzaWciOiJlZGxqd3RwdWJrZXlfb3BzIiwiYWxnIjoiUlMyNTYifQ.eyJ0eXBlIj...Jb1BnM7KDdEqo_3EoDdVhQCL6YyPeFO5phn_VtkdRRgce7fIpgz79Xenaj8C_tgpF1XCp4JT2t834vHhVXrf_lRaldMpl7WxjRzMfWMdoxY5ZFahb_B-Yw'
@pytest.mark.timeout(600)
def test_spatial_subset(collection_concept_id, env, granule_json, collection_variables,
harmony_env, tmp_path: pathlib.Path, bearer_token):
test_spatial_subset.__doc__ = f"Verify spatial subset for {collection_concept_id} in {env}"
logging.info("Using granule %s for test", granule_json['meta']['concept-id'])
# Compute a box that is smaller than the granule extent bounding box
north, south, east, west = get_bounding_box(granule_json)
east, west, north, south = create_smaller_bounding_box(east, west, north, south, .95)
start_time = granule_json['umm']["TemporalExtent"]["RangeDateTime"]["BeginningDateTime"]
end_time = granule_json['umm']["TemporalExtent"]["RangeDateTime"]["EndingDateTime"]
# Build harmony request
harmony_client = harmony.Client(env=harmony_env, token=bearer_token)
request_bbox = harmony.BBox(w=west, s=south, e=east, n=north)
request_collection = harmony.Collection(id=collection_concept_id)
harmony_request = harmony.Request(collection=request_collection, spatial=request_bbox,
granule_id=[granule_json['meta']['concept-id']])
logging.info("Sending harmony request %s", harmony_client.request_as_url(harmony_request))
# Submit harmony request and download result
job_id = harmony_client.submit(harmony_request)
logging.info("Submitted harmony job %s", job_id)
harmony_client.wait_for_processing(job_id, show_progress=True)
subsetted_filepath = None
for filename in [file_future.result()
for file_future
in harmony_client.download_all(job_id, directory=f'{tmp_path}', overwrite=True)]:
logging.info(f'Downloaded: %s', filename)
subsetted_filepath = pathlib.Path(filename)
# Verify spatial subset worked
subsetted_ds = xarray.open_dataset(subsetted_filepath, decode_times=False)
group = None
# Try to read group in file
> lat_var_name, lon_var_name = get_lat_lon_var_names(subsetted_ds, subsetted_filepath, collection_variables, collection_concept_id)
verify_collection.py:406:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
dataset = <xarray.Dataset> Size: 240B
Dimensions: (ydim_grid: 1, xdim_grid: 1, look: 1,
... -0.43
history_json: [{"date_time": "2...
file_to_subset = PosixPath('/tmp/pytest-of-runner/pytest-0/popen-gw9/test_spatial_subset_C2936721440/76528960_RSS_SMAP_SSS_L2C_r47700_20240106T014035_2024006_FNL_V05.3.nc4')
collection_variable_list = [{'associations': {'collections': [{'concept-id': 'C2936721448-POCLOUD'}]}, 'meta': {'association-details': {'collecti...rization_2', 'Size': 2, 'Type': 'OTHER'}], 'FillValues': [{'Type': 'SCIENCE_FILLVALUE', 'Value': -9999.0}], ...}}, ...]
collection_concept_id = 'C2936721448-POCLOUD'
def get_lat_lon_var_names(dataset: xarray.Dataset, file_to_subset: str, collection_variable_list: List[Dict], collection_concept_id: str):
# Try getting it from UMM-Var first
lat_var_json, lon_var_json, _ = get_coordinate_vars_from_umm(collection_variable_list)
lat_var_name = get_variable_name_from_umm_json(lat_var_json)
lon_var_name = get_variable_name_from_umm_json(lon_var_json)
if lat_var_name and lon_var_name:
return lat_var_name, lon_var_name
logging.warning("Unable to find lat/lon vars in UMM-Var")
# If that doesn't work, try using cf-xarray to infer lat/lon variable names
try:
latitude = [lat for lat in dataset.cf.coordinates['latitude']
if lat.lower() in VALID_LATITUDE_VARIABLE_NAMES][0]
longitude = [lon for lon in dataset.cf.coordinates['longitude']
if lon.lower() in VALID_LONGITUDE_VARIABLE_NAMES][0]
return latitude, longitude
except:
logging.warning("Unable to find lat/lon vars using cf_xarray")
# If that still doesn't work, try using l2ss-py directly
try:
# file not able to be flattened unless locally downloaded
filename = f'my_copy_file_{collection_concept_id}.nc'
shutil.copy(file_to_subset, filename)
nc_dataset = netCDF4.Dataset(filename, mode='r+')
# flatten the dataset
nc_dataset_flattened = podaac.subsetter.group_handling.transform_grouped_dataset(nc_dataset, filename)
args = {
'decode_coords': False,
'mask_and_scale': False,
'decode_times': False
}
with xarray.open_dataset(
xarray.backends.NetCDF4DataStore(nc_dataset_flattened),
**args
) as flat_dataset:
# use l2ss-py to find lat and lon names
lat_var_names, lon_var_names = podaac.subsetter.subset.compute_coordinate_variable_names(flat_dataset)
os.remove(filename)
if lat_var_names and lon_var_names:
lat_var_name = lat_var_names.split('__')[-1] if isinstance(lat_var_names, str) else lat_var_names[0].split('__')[-1]
lon_var_name = lon_var_names.split('__')[-1] if isinstance(lon_var_names, str) else lon_var_names[0].split('__')[-1]
return lat_var_name, lon_var_name
except ValueError:
logging.warning("Unable to find lat/lon vars using l2ss-py")
# Still no dice, try using the 'units' variable attribute
for coord_name, coord in dataset.coords.items():
if 'units' not in coord.attrs:
continue
if coord.attrs['units'] == 'degrees_north' and lat_var_name is None:
lat_var_name = coord_name
if coord.attrs['units'] == 'degrees_east' and lon_var_name is None:
lon_var_name = coord_name
if lat_var_name and lon_var_name:
return lat_var_name, lon_var_name
else:
logging.warning("Unable to find lat/lon vars using 'units' attribute")
# Out of options, fail the test because we couldn't determine lat/lon variables
> pytest.fail(f"Unable to find latitude and longitude variables.")
E Failed: Unable to find latitude and longitude variables.
verify_collection.py:359: Failed
--------------------------------- Captured Log ---------------------------------
INFO root:verify_collection.py:373 Using granule G3062447313-POCLOUD for test
INFO root:verify_collection.py:389 Sending harmony request https://harmony.earthdata.nasa.gov/C2936721448-POCLOUD/ogc-api-coverages/1.0.0/collections/all/coverage/rangeset?forceAsync=true&subset=lat%28-82.29044999999999%3A82.08044999999998%29&subset=lon%284.51755%3A175.50045%29&granuleId=G3062447313-POCLOUD
INFO root:verify_collection.py:393 Submitted harmony job 35feb2aa-62d7-4c34-ba18-6ef786fbce93
INFO root:verify_collection.py:399 Downloaded: /tmp/pytest-of-runner/pytest-0/popen-gw9/test_spatial_subset_C2936721440/76528960_RSS_SMAP_SSS_L2C_r47700_20240106T014035_2024006_FNL_V05.3.nc4
WARNING root:verify_collection.py:302 Unable to find lat/lon vars in UMM-Var
WARNING root:verify_collection.py:312 Unable to find lat/lon vars using cf_xarray
WARNING root:verify_collection.py:343 Unable to find lat/lon vars using l2ss-py
WARNING root:verify_collection.py:356 Unable to find lat/lon vars using 'units' attribute
Check warning on line 0 in tests.verify_collection
github-actions / Regression test results for ops
test_spatial_subset[C1627516285-GES_DISC] (tests.verify_collection) failed
test-results/ops_test_report.xml [took 31s]
Raw output
IndexError: list index out of range
collection_concept_id = 'C1627516285-GES_DISC', env = 'ops'
granule_json = {'meta': {'collection-concept-id': 'C1627516285-GES_DISC', 'concept-id': 'G2084435970-GES_DISC', 'concept-type': 'gran...pecification': {'Name': 'UMM-G', 'URL': 'https://cdn.earthdata.nasa.gov/umm/granule/v1.6.6', 'Version': '1.6.6'}, ...}}
collection_variables = [{'associations': {'collections': [{'concept-id': 'C1627516285-GES_DISC'}]}, 'meta': {'association-details': {'collect.../variable/v1.9.0', 'Version': '1.9.0'}, 'Name': 'METADATA/QA_STATISTICS/aerosol_index_354_388_histogram_bounds'}}, ...]
harmony_env = <Environment.PROD: 4>
tmp_path = PosixPath('/tmp/pytest-of-runner/pytest-0/popen-gw6/test_spatial_subset_C1627516280')
bearer_token = 'eyJ0eXAiOiJKV1QiLCJvcmlnaW4iOiJFYXJ0aGRhdGEgTG9naW4iLCJzaWciOiJlZGxqd3RwdWJrZXlfb3BzIiwiYWxnIjoiUlMyNTYifQ.eyJ0eXBlIj...Jb1BnM7KDdEqo_3EoDdVhQCL6YyPeFO5phn_VtkdRRgce7fIpgz79Xenaj8C_tgpF1XCp4JT2t834vHhVXrf_lRaldMpl7WxjRzMfWMdoxY5ZFahb_B-Yw'
@pytest.mark.timeout(600)
def test_spatial_subset(collection_concept_id, env, granule_json, collection_variables,
harmony_env, tmp_path: pathlib.Path, bearer_token):
test_spatial_subset.__doc__ = f"Verify spatial subset for {collection_concept_id} in {env}"
logging.info("Using granule %s for test", granule_json['meta']['concept-id'])
# Compute a box that is smaller than the granule extent bounding box
north, south, east, west = get_bounding_box(granule_json)
east, west, north, south = create_smaller_bounding_box(east, west, north, south, .95)
start_time = granule_json['umm']["TemporalExtent"]["RangeDateTime"]["BeginningDateTime"]
end_time = granule_json['umm']["TemporalExtent"]["RangeDateTime"]["EndingDateTime"]
# Build harmony request
harmony_client = harmony.Client(env=harmony_env, token=bearer_token)
request_bbox = harmony.BBox(w=west, s=south, e=east, n=north)
request_collection = harmony.Collection(id=collection_concept_id)
harmony_request = harmony.Request(collection=request_collection, spatial=request_bbox,
granule_id=[granule_json['meta']['concept-id']])
logging.info("Sending harmony request %s", harmony_client.request_as_url(harmony_request))
# Submit harmony request and download result
job_id = harmony_client.submit(harmony_request)
logging.info("Submitted harmony job %s", job_id)
harmony_client.wait_for_processing(job_id, show_progress=True)
subsetted_filepath = None
for filename in [file_future.result()
for file_future
in harmony_client.download_all(job_id, directory=f'{tmp_path}', overwrite=True)]:
logging.info(f'Downloaded: %s', filename)
subsetted_filepath = pathlib.Path(filename)
# Verify spatial subset worked
subsetted_ds = xarray.open_dataset(subsetted_filepath, decode_times=False)
group = None
# Try to read group in file
lat_var_name, lon_var_name = get_lat_lon_var_names(subsetted_ds, subsetted_filepath, collection_variables, collection_concept_id)
lat_var_name = lat_var_name.split('/')[-1]
lon_var_name = lon_var_name.split('/')[-1]
with netCDF4.Dataset(subsetted_filepath) as f:
group_list = []
def group_walk(groups, nc_d, current_group):
global subsetted_ds_new
subsetted_ds_new = None
# check if the top group has lat or lon variable
if lat_var_name in list(nc_d.variables.keys()):
subsetted_ds_new = subsetted_ds
else:
# if not then we'll need to keep track of the group layers
group_list.append(current_group)
# loop through the groups in the current layer
for g in groups:
# end the loop if we've already found latitude
if subsetted_ds_new:
break
# check if the groups have latitude, define the dataset and end the loop if found
if lat_var_name in list(nc_d.groups[g].variables.keys()):
group_list.append(g)
lat_group = '/'.join(group_list)
subsetted_ds_new = xarray.open_dataset(subsetted_filepath, group=lat_group, decode_times=False)
# add a science variable to the dataset if other groups are in the lat/lon group
# some GPM collections won't have any other variables in the same group as lat/lon
if len(list(nc_d.groups[g].groups.keys())) > 0:
data_group = [v for v in list(nc_d.groups[g].groups.keys()) if 'time' not in str(v).lower()][0]
g_data = lat_group+'/'+data_group
subsetted_ds_data = xarray.open_dataset(subsetted_filepath, group=g_data, decode_times=False)
sci_var = list(subsetted_ds_data.variables.keys())[0]
subsetted_ds_new['science_test'] = subsetted_ds_data[sci_var]
break
# recall the function on a group that has groups in it and didn't find latitude
# this is going 'deeper' into the groups
if len(list(nc_d.groups[g].groups.keys())) > 0:
group_walk(nc_d.groups[g].groups, nc_d.groups[g], g)
else:
continue
> group_walk(f.groups, f, '')
verify_collection.py:448:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
groups = {'METADATA': <[AttributeError('NetCDF: Not a valid ID') raised in repr()] Group object at 0x7fe2889b6540>, 'PRODUCT': <[RuntimeError('NetCDF: Not a valid ID') raised in repr()] Group object at 0x7fe2889b6b40>}
nc_d = <[AttributeError('NetCDF: Not a valid ID') raised in repr()] Dataset object at 0x7fe2889b6d40>
current_group = ''
def group_walk(groups, nc_d, current_group):
global subsetted_ds_new
subsetted_ds_new = None
# check if the top group has lat or lon variable
if lat_var_name in list(nc_d.variables.keys()):
subsetted_ds_new = subsetted_ds
else:
# if not then we'll need to keep track of the group layers
group_list.append(current_group)
# loop through the groups in the current layer
for g in groups:
# end the loop if we've already found latitude
if subsetted_ds_new:
break
# check if the groups have latitude, define the dataset and end the loop if found
if lat_var_name in list(nc_d.groups[g].variables.keys()):
group_list.append(g)
lat_group = '/'.join(group_list)
subsetted_ds_new = xarray.open_dataset(subsetted_filepath, group=lat_group, decode_times=False)
# add a science variable to the dataset if other groups are in the lat/lon group
# some GPM collections won't have any other variables in the same group as lat/lon
if len(list(nc_d.groups[g].groups.keys())) > 0:
data_group = [v for v in list(nc_d.groups[g].groups.keys()) if 'time' not in str(v).lower()][0]
g_data = lat_group+'/'+data_group
subsetted_ds_data = xarray.open_dataset(subsetted_filepath, group=g_data, decode_times=False)
> sci_var = list(subsetted_ds_data.variables.keys())[0]
E IndexError: list index out of range
verify_collection.py:438: IndexError
--------------------------------- Captured Log ---------------------------------
INFO root:verify_collection.py:373 Using granule G2084435970-GES_DISC for test
INFO root:verify_collection.py:389 Sending harmony request https://harmony.earthdata.nasa.gov/C1627516285-GES_DISC/ogc-api-coverages/1.0.0/collections/all/coverage/rangeset?forceAsync=true&subset=lat%28-76.82889999999999%3A-59.7251%29&subset=lon%28-77.22455%3A-1.6634499999999974%29&granuleId=G2084435970-GES_DISC
INFO root:verify_collection.py:393 Submitted harmony job 80b76585-b0ee-4d19-87a5-1ba0116a6299
INFO root:verify_collection.py:399 Downloaded: /tmp/pytest-of-runner/pytest-0/popen-gw6/test_spatial_subset_C1627516280/76528967_S5P_OFFL_L2_AER_AI_20210701T170324_20210701T184453_19257_01_010400_20210703T065109_subsetted.nc4
Check warning on line 0 in tests.verify_collection
github-actions / Regression test results for ops
test_spatial_subset[C1627516292-GES_DISC] (tests.verify_collection) failed
test-results/ops_test_report.xml [took 53s]
Raw output
IndexError: list index out of range
collection_concept_id = 'C1627516292-GES_DISC', env = 'ops'
granule_json = {'meta': {'collection-concept-id': 'C1627516292-GES_DISC', 'concept-id': 'G1898261144-GES_DISC', 'concept-type': 'gran...pecification': {'Name': 'UMM-G', 'URL': 'https://cdn.earthdata.nasa.gov/umm/granule/v1.6.6', 'Version': '1.6.6'}, ...}}
collection_variables = [{'associations': {'collections': [{'concept-id': 'C1627516292-GES_DISC'}]}, 'meta': {'association-details': {'collect... 'URL': 'https://cdn.earthdata.nasa.gov/umm/variable/v1.9.0', 'Version': '1.9.0'}, 'Name': 'PRODUCT/layer', ...}}, ...]
harmony_env = <Environment.PROD: 4>
tmp_path = PosixPath('/tmp/pytest-of-runner/pytest-0/popen-gw4/test_spatial_subset_C1627516290')
bearer_token = 'eyJ0eXAiOiJKV1QiLCJvcmlnaW4iOiJFYXJ0aGRhdGEgTG9naW4iLCJzaWciOiJlZGxqd3RwdWJrZXlfb3BzIiwiYWxnIjoiUlMyNTYifQ.eyJ0eXBlIj...Jb1BnM7KDdEqo_3EoDdVhQCL6YyPeFO5phn_VtkdRRgce7fIpgz79Xenaj8C_tgpF1XCp4JT2t834vHhVXrf_lRaldMpl7WxjRzMfWMdoxY5ZFahb_B-Yw'
@pytest.mark.timeout(600)
def test_spatial_subset(collection_concept_id, env, granule_json, collection_variables,
harmony_env, tmp_path: pathlib.Path, bearer_token):
test_spatial_subset.__doc__ = f"Verify spatial subset for {collection_concept_id} in {env}"
logging.info("Using granule %s for test", granule_json['meta']['concept-id'])
# Compute a box that is smaller than the granule extent bounding box
north, south, east, west = get_bounding_box(granule_json)
east, west, north, south = create_smaller_bounding_box(east, west, north, south, .95)
start_time = granule_json['umm']["TemporalExtent"]["RangeDateTime"]["BeginningDateTime"]
end_time = granule_json['umm']["TemporalExtent"]["RangeDateTime"]["EndingDateTime"]
# Build harmony request
harmony_client = harmony.Client(env=harmony_env, token=bearer_token)
request_bbox = harmony.BBox(w=west, s=south, e=east, n=north)
request_collection = harmony.Collection(id=collection_concept_id)
harmony_request = harmony.Request(collection=request_collection, spatial=request_bbox,
granule_id=[granule_json['meta']['concept-id']])
logging.info("Sending harmony request %s", harmony_client.request_as_url(harmony_request))
# Submit harmony request and download result
job_id = harmony_client.submit(harmony_request)
logging.info("Submitted harmony job %s", job_id)
harmony_client.wait_for_processing(job_id, show_progress=True)
subsetted_filepath = None
for filename in [file_future.result()
for file_future
in harmony_client.download_all(job_id, directory=f'{tmp_path}', overwrite=True)]:
logging.info(f'Downloaded: %s', filename)
subsetted_filepath = pathlib.Path(filename)
# Verify spatial subset worked
subsetted_ds = xarray.open_dataset(subsetted_filepath, decode_times=False)
group = None
# Try to read group in file
lat_var_name, lon_var_name = get_lat_lon_var_names(subsetted_ds, subsetted_filepath, collection_variables, collection_concept_id)
lat_var_name = lat_var_name.split('/')[-1]
lon_var_name = lon_var_name.split('/')[-1]
with netCDF4.Dataset(subsetted_filepath) as f:
group_list = []
def group_walk(groups, nc_d, current_group):
global subsetted_ds_new
subsetted_ds_new = None
# check if the top group has lat or lon variable
if lat_var_name in list(nc_d.variables.keys()):
subsetted_ds_new = subsetted_ds
else:
# if not then we'll need to keep track of the group layers
group_list.append(current_group)
# loop through the groups in the current layer
for g in groups:
# end the loop if we've already found latitude
if subsetted_ds_new:
break
# check if the groups have latitude, define the dataset and end the loop if found
if lat_var_name in list(nc_d.groups[g].variables.keys()):
group_list.append(g)
lat_group = '/'.join(group_list)
subsetted_ds_new = xarray.open_dataset(subsetted_filepath, group=lat_group, decode_times=False)
# add a science variable to the dataset if other groups are in the lat/lon group
# some GPM collections won't have any other variables in the same group as lat/lon
if len(list(nc_d.groups[g].groups.keys())) > 0:
data_group = [v for v in list(nc_d.groups[g].groups.keys()) if 'time' not in str(v).lower()][0]
g_data = lat_group+'/'+data_group
subsetted_ds_data = xarray.open_dataset(subsetted_filepath, group=g_data, decode_times=False)
sci_var = list(subsetted_ds_data.variables.keys())[0]
subsetted_ds_new['science_test'] = subsetted_ds_data[sci_var]
break
# recall the function on a group that has groups in it and didn't find latitude
# this is going 'deeper' into the groups
if len(list(nc_d.groups[g].groups.keys())) > 0:
group_walk(nc_d.groups[g].groups, nc_d.groups[g], g)
else:
continue
> group_walk(f.groups, f, '')
verify_collection.py:448:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
groups = {'METADATA': <[AttributeError('NetCDF: Not a valid ID') raised in repr()] Group object at 0x7f095ee7c540>, 'PRODUCT': <[RuntimeError('NetCDF: Not a valid ID') raised in repr()] Group object at 0x7f095f127d40>}
nc_d = <[AttributeError('NetCDF: Not a valid ID') raised in repr()] Dataset object at 0x7f095f127c40>
current_group = ''
def group_walk(groups, nc_d, current_group):
global subsetted_ds_new
subsetted_ds_new = None
# check if the top group has lat or lon variable
if lat_var_name in list(nc_d.variables.keys()):
subsetted_ds_new = subsetted_ds
else:
# if not then we'll need to keep track of the group layers
group_list.append(current_group)
# loop through the groups in the current layer
for g in groups:
# end the loop if we've already found latitude
if subsetted_ds_new:
break
# check if the groups have latitude, define the dataset and end the loop if found
if lat_var_name in list(nc_d.groups[g].variables.keys()):
group_list.append(g)
lat_group = '/'.join(group_list)
subsetted_ds_new = xarray.open_dataset(subsetted_filepath, group=lat_group, decode_times=False)
# add a science variable to the dataset if other groups are in the lat/lon group
# some GPM collections won't have any other variables in the same group as lat/lon
if len(list(nc_d.groups[g].groups.keys())) > 0:
data_group = [v for v in list(nc_d.groups[g].groups.keys()) if 'time' not in str(v).lower()][0]
g_data = lat_group+'/'+data_group
subsetted_ds_data = xarray.open_dataset(subsetted_filepath, group=g_data, decode_times=False)
> sci_var = list(subsetted_ds_data.variables.keys())[0]
E IndexError: list index out of range
verify_collection.py:438: IndexError
--------------------------------- Captured Log ---------------------------------
INFO root:verify_collection.py:373 Using granule G1898261144-GES_DISC for test
INFO root:verify_collection.py:389 Sending harmony request https://harmony.earthdata.nasa.gov/C1627516292-GES_DISC/ogc-api-coverages/1.0.0/collections/all/coverage/rangeset?forceAsync=true&subset=lat%28-78.0453%3A-60.6907%29&subset=lon%28-164.82465%3A-84.66935000000001%29&granuleId=G1898261144-GES_DISC
INFO root:verify_collection.py:393 Submitted harmony job d785df56-6384-4ff1-87e5-8bc4fd76a95f
INFO root:verify_collection.py:399 Downloaded: /tmp/pytest-of-runner/pytest-0/popen-gw4/test_spatial_subset_C1627516290/76528992_S5P_OFFL_L2_HCHO_20200712T224601_20200713T002730_14238_01_010108_20200715T122623_subsetted.nc4
Check warning on line 0 in tests.verify_collection
github-actions / Regression test results for ops
test_spatial_subset[C2087131083-GES_DISC] (tests.verify_collection) failed
test-results/ops_test_report.xml [took 36s]
Raw output
OSError: [Errno group not found: PRODUCT] 'PRODUCT'
ds = <[AttributeError('NetCDF: Not a valid ID') raised in repr()] Group object at 0x7f188f970640>
group = '/METADATA/PRODUCT', mode = 'r'
create_group = <function _netcdf4_create_group at 0x7f1898308b80>
def _nc4_require_group(ds, group, mode, create_group=_netcdf4_create_group):
if group in {None, "", "/"}:
# use the root group
return ds
else:
# make sure it's a string
if not isinstance(group, str):
raise ValueError("group must be a string or None")
# support path-like syntax
path = group.strip("/").split("/")
for key in path:
try:
> ds = ds.groups[key]
E KeyError: 'PRODUCT'
../../../../.cache/pypoetry/virtualenvs/l2ss-py-autotest-iYz8Sff2-py3.10/lib/python3.10/site-packages/xarray/backends/netCDF4_.py:190: KeyError
During handling of the above exception, another exception occurred:
collection_concept_id = 'C2087131083-GES_DISC', env = 'ops'
granule_json = {'meta': {'collection-concept-id': 'C2087131083-GES_DISC', 'concept-id': 'G3216208296-GES_DISC', 'concept-type': 'gran...pecification': {'Name': 'UMM-G', 'URL': 'https://cdn.earthdata.nasa.gov/umm/granule/v1.6.6', 'Version': '1.6.6'}, ...}}
collection_variables = [{'associations': {'collections': [{'concept-id': 'C2087131083-GES_DISC'}]}, 'meta': {'association-details': {'collect.../variable/v1.9.0', 'Version': '1.9.0'}, 'Name': 'METADATA/QA_STATISTICS/aerosol_index_354_388_histogram_bounds'}}, ...]
harmony_env = <Environment.PROD: 4>
tmp_path = PosixPath('/tmp/pytest-of-runner/pytest-0/popen-gw3/test_spatial_subset_C2087131080')
bearer_token = 'eyJ0eXAiOiJKV1QiLCJvcmlnaW4iOiJFYXJ0aGRhdGEgTG9naW4iLCJzaWciOiJlZGxqd3RwdWJrZXlfb3BzIiwiYWxnIjoiUlMyNTYifQ.eyJ0eXBlIj...Jb1BnM7KDdEqo_3EoDdVhQCL6YyPeFO5phn_VtkdRRgce7fIpgz79Xenaj8C_tgpF1XCp4JT2t834vHhVXrf_lRaldMpl7WxjRzMfWMdoxY5ZFahb_B-Yw'
@pytest.mark.timeout(600)
def test_spatial_subset(collection_concept_id, env, granule_json, collection_variables,
harmony_env, tmp_path: pathlib.Path, bearer_token):
test_spatial_subset.__doc__ = f"Verify spatial subset for {collection_concept_id} in {env}"
logging.info("Using granule %s for test", granule_json['meta']['concept-id'])
# Compute a box that is smaller than the granule extent bounding box
north, south, east, west = get_bounding_box(granule_json)
east, west, north, south = create_smaller_bounding_box(east, west, north, south, .95)
start_time = granule_json['umm']["TemporalExtent"]["RangeDateTime"]["BeginningDateTime"]
end_time = granule_json['umm']["TemporalExtent"]["RangeDateTime"]["EndingDateTime"]
# Build harmony request
harmony_client = harmony.Client(env=harmony_env, token=bearer_token)
request_bbox = harmony.BBox(w=west, s=south, e=east, n=north)
request_collection = harmony.Collection(id=collection_concept_id)
harmony_request = harmony.Request(collection=request_collection, spatial=request_bbox,
granule_id=[granule_json['meta']['concept-id']])
logging.info("Sending harmony request %s", harmony_client.request_as_url(harmony_request))
# Submit harmony request and download result
job_id = harmony_client.submit(harmony_request)
logging.info("Submitted harmony job %s", job_id)
harmony_client.wait_for_processing(job_id, show_progress=True)
subsetted_filepath = None
for filename in [file_future.result()
for file_future
in harmony_client.download_all(job_id, directory=f'{tmp_path}', overwrite=True)]:
logging.info(f'Downloaded: %s', filename)
subsetted_filepath = pathlib.Path(filename)
# Verify spatial subset worked
subsetted_ds = xarray.open_dataset(subsetted_filepath, decode_times=False)
group = None
# Try to read group in file
lat_var_name, lon_var_name = get_lat_lon_var_names(subsetted_ds, subsetted_filepath, collection_variables, collection_concept_id)
lat_var_name = lat_var_name.split('/')[-1]
lon_var_name = lon_var_name.split('/')[-1]
with netCDF4.Dataset(subsetted_filepath) as f:
group_list = []
def group_walk(groups, nc_d, current_group):
global subsetted_ds_new
subsetted_ds_new = None
# check if the top group has lat or lon variable
if lat_var_name in list(nc_d.variables.keys()):
subsetted_ds_new = subsetted_ds
else:
# if not then we'll need to keep track of the group layers
group_list.append(current_group)
# loop through the groups in the current layer
for g in groups:
# end the loop if we've already found latitude
if subsetted_ds_new:
break
# check if the groups have latitude, define the dataset and end the loop if found
if lat_var_name in list(nc_d.groups[g].variables.keys()):
group_list.append(g)
lat_group = '/'.join(group_list)
subsetted_ds_new = xarray.open_dataset(subsetted_filepath, group=lat_group, decode_times=False)
# add a science variable to the dataset if other groups are in the lat/lon group
# some GPM collections won't have any other variables in the same group as lat/lon
if len(list(nc_d.groups[g].groups.keys())) > 0:
data_group = [v for v in list(nc_d.groups[g].groups.keys()) if 'time' not in str(v).lower()][0]
g_data = lat_group+'/'+data_group
subsetted_ds_data = xarray.open_dataset(subsetted_filepath, group=g_data, decode_times=False)
sci_var = list(subsetted_ds_data.variables.keys())[0]
subsetted_ds_new['science_test'] = subsetted_ds_data[sci_var]
break
# recall the function on a group that has groups in it and didn't find latitude
# this is going 'deeper' into the groups
if len(list(nc_d.groups[g].groups.keys())) > 0:
group_walk(nc_d.groups[g].groups, nc_d.groups[g], g)
else:
continue
> group_walk(f.groups, f, '')
verify_collection.py:448:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
verify_collection.py:431: in group_walk
subsetted_ds_new = xarray.open_dataset(subsetted_filepath, group=lat_group, decode_times=False)
../../../../.cache/pypoetry/virtualenvs/l2ss-py-autotest-iYz8Sff2-py3.10/lib/python3.10/site-packages/xarray/backends/api.py:571: in open_dataset
backend_ds = backend.open_dataset(
../../../../.cache/pypoetry/virtualenvs/l2ss-py-autotest-iYz8Sff2-py3.10/lib/python3.10/site-packages/xarray/backends/netCDF4_.py:646: in open_dataset
store = NetCDF4DataStore.open(
../../../../.cache/pypoetry/virtualenvs/l2ss-py-autotest-iYz8Sff2-py3.10/lib/python3.10/site-packages/xarray/backends/netCDF4_.py:409: in open
return cls(manager, group=group, mode=mode, lock=lock, autoclose=autoclose)
../../../../.cache/pypoetry/virtualenvs/l2ss-py-autotest-iYz8Sff2-py3.10/lib/python3.10/site-packages/xarray/backends/netCDF4_.py:356: in __init__
self.format = self.ds.data_model
../../../../.cache/pypoetry/virtualenvs/l2ss-py-autotest-iYz8Sff2-py3.10/lib/python3.10/site-packages/xarray/backends/netCDF4_.py:418: in ds
return self._acquire()
../../../../.cache/pypoetry/virtualenvs/l2ss-py-autotest-iYz8Sff2-py3.10/lib/python3.10/site-packages/xarray/backends/netCDF4_.py:413: in _acquire
ds = _nc4_require_group(root, self._group, self._mode)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
ds = <[AttributeError('NetCDF: Not a valid ID') raised in repr()] Group object at 0x7f188f970640>
group = '/METADATA/PRODUCT', mode = 'r'
create_group = <function _netcdf4_create_group at 0x7f1898308b80>
def _nc4_require_group(ds, group, mode, create_group=_netcdf4_create_group):
if group in {None, "", "/"}:
# use the root group
return ds
else:
# make sure it's a string
if not isinstance(group, str):
raise ValueError("group must be a string or None")
# support path-like syntax
path = group.strip("/").split("/")
for key in path:
try:
ds = ds.groups[key]
except KeyError as e:
if mode != "r":
ds = create_group(ds, key)
else:
# wrap error to provide slightly more helpful message
> raise OSError(f"group not found: {key}", e)
E OSError: [Errno group not found: PRODUCT] 'PRODUCT'
../../../../.cache/pypoetry/virtualenvs/l2ss-py-autotest-iYz8Sff2-py3.10/lib/python3.10/site-packages/xarray/backends/netCDF4_.py:196: OSError
--------------------------------- Captured Log ---------------------------------
INFO root:verify_collection.py:373 Using granule G3216208296-GES_DISC for test
INFO root:verify_collection.py:389 Sending harmony request https://harmony.earthdata.nasa.gov/C2087131083-GES_DISC/ogc-api-coverages/1.0.0/collections/all/coverage/rangeset?forceAsync=true&subset=lat%28-85.23204999999999%3A-67.18395000000001%29&subset=lon%28-149.695425%3A27.938424999999995%29&granuleId=G3216208296-GES_DISC
INFO root:verify_collection.py:393 Submitted harmony job 822e17b9-1869-4431-8608-f1716b23f777
INFO root:verify_collection.py:399 Downloaded: /tmp/pytest-of-runner/pytest-0/popen-gw3/test_spatial_subset_C2087131080/76528998_S5P_OFFL_L2_AER_AI_20240826T170005_20240826T184134_35601_03_020600_20240828T064747_subsetted.nc4
Check warning on line 0 in tests.verify_collection
github-actions / Regression test results for ops
test_spatial_subset[C1442068493-GES_DISC] (tests.verify_collection) failed
test-results/ops_test_report.xml [took 25s]
Raw output
IndexError: list index out of range
collection_concept_id = 'C1442068493-GES_DISC', env = 'ops'
granule_json = {'meta': {'collection-concept-id': 'C1442068493-GES_DISC', 'concept-id': 'G1628685465-GES_DISC', 'concept-type': 'gran...pecification': {'Name': 'UMM-G', 'URL': 'https://cdn.earthdata.nasa.gov/umm/granule/v1.6.6', 'Version': '1.6.6'}, ...}}
collection_variables = [{'associations': {'collections': [{'concept-id': 'C1442068493-GES_DISC'}]}, 'meta': {'association-details': {'collect...arthdata.nasa.gov/umm/variable/v1.9.0', 'Version': '1.9.0'}, 'Name': 'PRODUCT/carbonmonoxide_total_column', ...}}, ...]
harmony_env = <Environment.PROD: 4>
tmp_path = PosixPath('/tmp/pytest-of-runner/pytest-0/popen-gw7/test_spatial_subset_C1442068490')
bearer_token = 'eyJ0eXAiOiJKV1QiLCJvcmlnaW4iOiJFYXJ0aGRhdGEgTG9naW4iLCJzaWciOiJlZGxqd3RwdWJrZXlfb3BzIiwiYWxnIjoiUlMyNTYifQ.eyJ0eXBlIj...Jb1BnM7KDdEqo_3EoDdVhQCL6YyPeFO5phn_VtkdRRgce7fIpgz79Xenaj8C_tgpF1XCp4JT2t834vHhVXrf_lRaldMpl7WxjRzMfWMdoxY5ZFahb_B-Yw'
@pytest.mark.timeout(600)
def test_spatial_subset(collection_concept_id, env, granule_json, collection_variables,
harmony_env, tmp_path: pathlib.Path, bearer_token):
test_spatial_subset.__doc__ = f"Verify spatial subset for {collection_concept_id} in {env}"
logging.info("Using granule %s for test", granule_json['meta']['concept-id'])
# Compute a box that is smaller than the granule extent bounding box
north, south, east, west = get_bounding_box(granule_json)
east, west, north, south = create_smaller_bounding_box(east, west, north, south, .95)
start_time = granule_json['umm']["TemporalExtent"]["RangeDateTime"]["BeginningDateTime"]
end_time = granule_json['umm']["TemporalExtent"]["RangeDateTime"]["EndingDateTime"]
# Build harmony request
harmony_client = harmony.Client(env=harmony_env, token=bearer_token)
request_bbox = harmony.BBox(w=west, s=south, e=east, n=north)
request_collection = harmony.Collection(id=collection_concept_id)
harmony_request = harmony.Request(collection=request_collection, spatial=request_bbox,
granule_id=[granule_json['meta']['concept-id']])
logging.info("Sending harmony request %s", harmony_client.request_as_url(harmony_request))
# Submit harmony request and download result
job_id = harmony_client.submit(harmony_request)
logging.info("Submitted harmony job %s", job_id)
harmony_client.wait_for_processing(job_id, show_progress=True)
subsetted_filepath = None
for filename in [file_future.result()
for file_future
in harmony_client.download_all(job_id, directory=f'{tmp_path}', overwrite=True)]:
logging.info(f'Downloaded: %s', filename)
subsetted_filepath = pathlib.Path(filename)
# Verify spatial subset worked
subsetted_ds = xarray.open_dataset(subsetted_filepath, decode_times=False)
group = None
# Try to read group in file
lat_var_name, lon_var_name = get_lat_lon_var_names(subsetted_ds, subsetted_filepath, collection_variables, collection_concept_id)
lat_var_name = lat_var_name.split('/')[-1]
lon_var_name = lon_var_name.split('/')[-1]
with netCDF4.Dataset(subsetted_filepath) as f:
group_list = []
def group_walk(groups, nc_d, current_group):
global subsetted_ds_new
subsetted_ds_new = None
# check if the top group has lat or lon variable
if lat_var_name in list(nc_d.variables.keys()):
subsetted_ds_new = subsetted_ds
else:
# if not then we'll need to keep track of the group layers
group_list.append(current_group)
# loop through the groups in the current layer
for g in groups:
# end the loop if we've already found latitude
if subsetted_ds_new:
break
# check if the groups have latitude, define the dataset and end the loop if found
if lat_var_name in list(nc_d.groups[g].variables.keys()):
group_list.append(g)
lat_group = '/'.join(group_list)
subsetted_ds_new = xarray.open_dataset(subsetted_filepath, group=lat_group, decode_times=False)
# add a science variable to the dataset if other groups are in the lat/lon group
# some GPM collections won't have any other variables in the same group as lat/lon
if len(list(nc_d.groups[g].groups.keys())) > 0:
data_group = [v for v in list(nc_d.groups[g].groups.keys()) if 'time' not in str(v).lower()][0]
g_data = lat_group+'/'+data_group
subsetted_ds_data = xarray.open_dataset(subsetted_filepath, group=g_data, decode_times=False)
sci_var = list(subsetted_ds_data.variables.keys())[0]
subsetted_ds_new['science_test'] = subsetted_ds_data[sci_var]
break
# recall the function on a group that has groups in it and didn't find latitude
# this is going 'deeper' into the groups
if len(list(nc_d.groups[g].groups.keys())) > 0:
group_walk(nc_d.groups[g].groups, nc_d.groups[g], g)
else:
continue
> group_walk(f.groups, f, '')
verify_collection.py:448:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
groups = {'METADATA': <[AttributeError('NetCDF: Not a valid ID') raised in repr()] Group object at 0x7f5a1d70aa40>, 'PRODUCT': <[RuntimeError('NetCDF: Not a valid ID') raised in repr()] Group object at 0x7f5a1d708b40>}
nc_d = <[AttributeError('NetCDF: Not a valid ID') raised in repr()] Dataset object at 0x7f5a1d708840>
current_group = ''
def group_walk(groups, nc_d, current_group):
global subsetted_ds_new
subsetted_ds_new = None
# check if the top group has lat or lon variable
if lat_var_name in list(nc_d.variables.keys()):
subsetted_ds_new = subsetted_ds
else:
# if not then we'll need to keep track of the group layers
group_list.append(current_group)
# loop through the groups in the current layer
for g in groups:
# end the loop if we've already found latitude
if subsetted_ds_new:
break
# check if the groups have latitude, define the dataset and end the loop if found
if lat_var_name in list(nc_d.groups[g].variables.keys()):
group_list.append(g)
lat_group = '/'.join(group_list)
subsetted_ds_new = xarray.open_dataset(subsetted_filepath, group=lat_group, decode_times=False)
# add a science variable to the dataset if other groups are in the lat/lon group
# some GPM collections won't have any other variables in the same group as lat/lon
if len(list(nc_d.groups[g].groups.keys())) > 0:
data_group = [v for v in list(nc_d.groups[g].groups.keys()) if 'time' not in str(v).lower()][0]
g_data = lat_group+'/'+data_group
subsetted_ds_data = xarray.open_dataset(subsetted_filepath, group=g_data, decode_times=False)
> sci_var = list(subsetted_ds_data.variables.keys())[0]
E IndexError: list index out of range
verify_collection.py:438: IndexError
--------------------------------- Captured Log ---------------------------------
INFO root:verify_collection.py:373 Using granule G1628685465-GES_DISC for test
INFO root:verify_collection.py:389 Sending harmony request https://harmony.earthdata.nasa.gov/C1442068493-GES_DISC/ogc-api-coverages/1.0.0/collections/all/coverage/rangeset?forceAsync=true&subset=lat%28-82.468625%3A-64.100375%29&subset=lon%28-112.00605%3A163.26405%29&granuleId=G1628685465-GES_DISC
INFO root:verify_collection.py:393 Submitted harmony job 98e0c0c3-7fb1-4c24-aeb4-408103a6aee1
INFO root:verify_collection.py:399 Downloaded: /tmp/pytest-of-runner/pytest-0/popen-gw7/test_spatial_subset_C1442068490/76529003_S5P_OFFL_L2_CO_20190806T003836_20190806T022006_09387_01_010302_20190811T235959_subsetted.nc4
Check warning on line 0 in tests.verify_collection
github-actions / Regression test results for ops
test_spatial_subset[C1442068508-GES_DISC] (tests.verify_collection) failed
test-results/ops_test_report.xml [took 54s]
Raw output
IndexError: list index out of range
collection_concept_id = 'C1442068508-GES_DISC', env = 'ops'
granule_json = {'meta': {'collection-concept-id': 'C1442068508-GES_DISC', 'concept-id': 'G1628710396-GES_DISC', 'concept-type': 'gran...pecification': {'Name': 'UMM-G', 'URL': 'https://cdn.earthdata.nasa.gov/umm/granule/v1.6.6', 'Version': '1.6.6'}, ...}}
collection_variables = [{'associations': {'collections': [{'concept-id': 'C1442068508-GES_DISC'}]}, 'meta': {'association-details': {'collect...v1.9.0', 'Version': '1.9.0'}, 'Name': 'PRODUCT/SUPPORT_DATA/DETAILED_RESULTS/fitted_radiance_squeeze_win3', ...}}, ...]
harmony_env = <Environment.PROD: 4>
tmp_path = PosixPath('/tmp/pytest-of-runner/pytest-0/popen-gw8/test_spatial_subset_C1442068500')
bearer_token = 'eyJ0eXAiOiJKV1QiLCJvcmlnaW4iOiJFYXJ0aGRhdGEgTG9naW4iLCJzaWciOiJlZGxqd3RwdWJrZXlfb3BzIiwiYWxnIjoiUlMyNTYifQ.eyJ0eXBlIj...Jb1BnM7KDdEqo_3EoDdVhQCL6YyPeFO5phn_VtkdRRgce7fIpgz79Xenaj8C_tgpF1XCp4JT2t834vHhVXrf_lRaldMpl7WxjRzMfWMdoxY5ZFahb_B-Yw'
@pytest.mark.timeout(600)
def test_spatial_subset(collection_concept_id, env, granule_json, collection_variables,
harmony_env, tmp_path: pathlib.Path, bearer_token):
test_spatial_subset.__doc__ = f"Verify spatial subset for {collection_concept_id} in {env}"
logging.info("Using granule %s for test", granule_json['meta']['concept-id'])
# Compute a box that is smaller than the granule extent bounding box
north, south, east, west = get_bounding_box(granule_json)
east, west, north, south = create_smaller_bounding_box(east, west, north, south, .95)
start_time = granule_json['umm']["TemporalExtent"]["RangeDateTime"]["BeginningDateTime"]
end_time = granule_json['umm']["TemporalExtent"]["RangeDateTime"]["EndingDateTime"]
# Build harmony request
harmony_client = harmony.Client(env=harmony_env, token=bearer_token)
request_bbox = harmony.BBox(w=west, s=south, e=east, n=north)
request_collection = harmony.Collection(id=collection_concept_id)
harmony_request = harmony.Request(collection=request_collection, spatial=request_bbox,
granule_id=[granule_json['meta']['concept-id']])
logging.info("Sending harmony request %s", harmony_client.request_as_url(harmony_request))
# Submit harmony request and download result
job_id = harmony_client.submit(harmony_request)
logging.info("Submitted harmony job %s", job_id)
harmony_client.wait_for_processing(job_id, show_progress=True)
subsetted_filepath = None
for filename in [file_future.result()
for file_future
in harmony_client.download_all(job_id, directory=f'{tmp_path}', overwrite=True)]:
logging.info(f'Downloaded: %s', filename)
subsetted_filepath = pathlib.Path(filename)
# Verify spatial subset worked
subsetted_ds = xarray.open_dataset(subsetted_filepath, decode_times=False)
group = None
# Try to read group in file
lat_var_name, lon_var_name = get_lat_lon_var_names(subsetted_ds, subsetted_filepath, collection_variables, collection_concept_id)
lat_var_name = lat_var_name.split('/')[-1]
lon_var_name = lon_var_name.split('/')[-1]
with netCDF4.Dataset(subsetted_filepath) as f:
group_list = []
def group_walk(groups, nc_d, current_group):
global subsetted_ds_new
subsetted_ds_new = None
# check if the top group has lat or lon variable
if lat_var_name in list(nc_d.variables.keys()):
subsetted_ds_new = subsetted_ds
else:
# if not then we'll need to keep track of the group layers
group_list.append(current_group)
# loop through the groups in the current layer
for g in groups:
# end the loop if we've already found latitude
if subsetted_ds_new:
break
# check if the groups have latitude, define the dataset and end the loop if found
if lat_var_name in list(nc_d.groups[g].variables.keys()):
group_list.append(g)
lat_group = '/'.join(group_list)
subsetted_ds_new = xarray.open_dataset(subsetted_filepath, group=lat_group, decode_times=False)
# add a science variable to the dataset if other groups are in the lat/lon group
# some GPM collections won't have any other variables in the same group as lat/lon
if len(list(nc_d.groups[g].groups.keys())) > 0:
data_group = [v for v in list(nc_d.groups[g].groups.keys()) if 'time' not in str(v).lower()][0]
g_data = lat_group+'/'+data_group
subsetted_ds_data = xarray.open_dataset(subsetted_filepath, group=g_data, decode_times=False)
sci_var = list(subsetted_ds_data.variables.keys())[0]
subsetted_ds_new['science_test'] = subsetted_ds_data[sci_var]
break
# recall the function on a group that has groups in it and didn't find latitude
# this is going 'deeper' into the groups
if len(list(nc_d.groups[g].groups.keys())) > 0:
group_walk(nc_d.groups[g].groups, nc_d.groups[g], g)
else:
continue
> group_walk(f.groups, f, '')
verify_collection.py:448:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
groups = {'METADATA': <[AttributeError('NetCDF: Not a valid ID') raised in repr()] Group object at 0x7f2141399540>, 'PRODUCT': <[RuntimeError('NetCDF: Not a valid ID') raised in repr()] Group object at 0x7f2141399b40>}
nc_d = <[AttributeError('NetCDF: Not a valid ID') raised in repr()] Dataset object at 0x7f214139b540>
current_group = ''
def group_walk(groups, nc_d, current_group):
global subsetted_ds_new
subsetted_ds_new = None
# check if the top group has lat or lon variable
if lat_var_name in list(nc_d.variables.keys()):
subsetted_ds_new = subsetted_ds
else:
# if not then we'll need to keep track of the group layers
group_list.append(current_group)
# loop through the groups in the current layer
for g in groups:
# end the loop if we've already found latitude
if subsetted_ds_new:
break
# check if the groups have latitude, define the dataset and end the loop if found
if lat_var_name in list(nc_d.groups[g].variables.keys()):
group_list.append(g)
lat_group = '/'.join(group_list)
subsetted_ds_new = xarray.open_dataset(subsetted_filepath, group=lat_group, decode_times=False)
# add a science variable to the dataset if other groups are in the lat/lon group
# some GPM collections won't have any other variables in the same group as lat/lon
if len(list(nc_d.groups[g].groups.keys())) > 0:
data_group = [v for v in list(nc_d.groups[g].groups.keys()) if 'time' not in str(v).lower()][0]
g_data = lat_group+'/'+data_group
subsetted_ds_data = xarray.open_dataset(subsetted_filepath, group=g_data, decode_times=False)
> sci_var = list(subsetted_ds_data.variables.keys())[0]
E IndexError: list index out of range
verify_collection.py:438: IndexError
--------------------------------- Captured Log ---------------------------------
INFO root:verify_collection.py:373 Using granule G1628710396-GES_DISC for test
INFO root:verify_collection.py:389 Sending harmony request https://harmony.earthdata.nasa.gov/C1442068508-GES_DISC/ogc-api-coverages/1.0.0/collections/all/coverage/rangeset?forceAsync=true&subset=lat%28-82.265975%3A-63.873025000000005%29&subset=lon%28-112.057275%3A162.74827499999998%29&granuleId=G1628710396-GES_DISC
INFO root:verify_collection.py:393 Submitted harmony job ed989006-374b-4cc6-af55-cb9e9bc7573f
INFO root:verify_collection.py:399 Downloaded: /tmp/pytest-of-runner/pytest-0/popen-gw8/test_spatial_subset_C1442068500/76529007_S5P_OFFL_L2_SO2_20190806T003836_20190806T022006_09387_01_010107_20190812T085130_subsetted.nc4
Check warning on line 0 in tests.verify_collection
github-actions / Regression test results for ops
test_spatial_subset[C2179081549-GES_DISC] (tests.verify_collection) failed
test-results/ops_test_report.xml [took 39s]
Raw output
IndexError: list index out of range
collection_concept_id = 'C2179081549-GES_DISC', env = 'ops'
granule_json = {'meta': {'collection-concept-id': 'C2179081549-GES_DISC', 'concept-id': 'G3215100934-GES_DISC', 'concept-type': 'gran...pecification': {'Name': 'UMM-G', 'URL': 'https://cdn.earthdata.nasa.gov/umm/granule/v1.6.6', 'Version': '1.6.6'}, ...}}
collection_variables = [{'associations': {'collections': [{'concept-id': 'C2179081549-GES_DISC'}]}, 'meta': {'association-details': {'collect...escription': 'Extracted from _FillValue metadata attribute', 'Type': 'SCIENCE_FILLVALUE', 'Value': -9999}], ...}}, ...]
harmony_env = <Environment.PROD: 4>
tmp_path = PosixPath('/tmp/pytest-of-runner/pytest-0/popen-gw1/test_spatial_subset_C2179081540')
bearer_token = 'eyJ0eXAiOiJKV1QiLCJvcmlnaW4iOiJFYXJ0aGRhdGEgTG9naW4iLCJzaWciOiJlZGxqd3RwdWJrZXlfb3BzIiwiYWxnIjoiUlMyNTYifQ.eyJ0eXBlIj...Jb1BnM7KDdEqo_3EoDdVhQCL6YyPeFO5phn_VtkdRRgce7fIpgz79Xenaj8C_tgpF1XCp4JT2t834vHhVXrf_lRaldMpl7WxjRzMfWMdoxY5ZFahb_B-Yw'
@pytest.mark.timeout(600)
def test_spatial_subset(collection_concept_id, env, granule_json, collection_variables,
harmony_env, tmp_path: pathlib.Path, bearer_token):
test_spatial_subset.__doc__ = f"Verify spatial subset for {collection_concept_id} in {env}"
logging.info("Using granule %s for test", granule_json['meta']['concept-id'])
# Compute a box that is smaller than the granule extent bounding box
north, south, east, west = get_bounding_box(granule_json)
east, west, north, south = create_smaller_bounding_box(east, west, north, south, .95)
start_time = granule_json['umm']["TemporalExtent"]["RangeDateTime"]["BeginningDateTime"]
end_time = granule_json['umm']["TemporalExtent"]["RangeDateTime"]["EndingDateTime"]
# Build harmony request
harmony_client = harmony.Client(env=harmony_env, token=bearer_token)
request_bbox = harmony.BBox(w=west, s=south, e=east, n=north)
request_collection = harmony.Collection(id=collection_concept_id)
harmony_request = harmony.Request(collection=request_collection, spatial=request_bbox,
granule_id=[granule_json['meta']['concept-id']])
logging.info("Sending harmony request %s", harmony_client.request_as_url(harmony_request))
# Submit harmony request and download result
job_id = harmony_client.submit(harmony_request)
logging.info("Submitted harmony job %s", job_id)
harmony_client.wait_for_processing(job_id, show_progress=True)
subsetted_filepath = None
for filename in [file_future.result()
for file_future
in harmony_client.download_all(job_id, directory=f'{tmp_path}', overwrite=True)]:
logging.info(f'Downloaded: %s', filename)
subsetted_filepath = pathlib.Path(filename)
# Verify spatial subset worked
subsetted_ds = xarray.open_dataset(subsetted_filepath, decode_times=False)
group = None
# Try to read group in file
lat_var_name, lon_var_name = get_lat_lon_var_names(subsetted_ds, subsetted_filepath, collection_variables, collection_concept_id)
lat_var_name = lat_var_name.split('/')[-1]
lon_var_name = lon_var_name.split('/')[-1]
with netCDF4.Dataset(subsetted_filepath) as f:
group_list = []
def group_walk(groups, nc_d, current_group):
global subsetted_ds_new
subsetted_ds_new = None
# check if the top group has lat or lon variable
if lat_var_name in list(nc_d.variables.keys()):
subsetted_ds_new = subsetted_ds
else:
# if not then we'll need to keep track of the group layers
group_list.append(current_group)
# loop through the groups in the current layer
for g in groups:
# end the loop if we've already found latitude
if subsetted_ds_new:
break
# check if the groups have latitude, define the dataset and end the loop if found
if lat_var_name in list(nc_d.groups[g].variables.keys()):
group_list.append(g)
lat_group = '/'.join(group_list)
subsetted_ds_new = xarray.open_dataset(subsetted_filepath, group=lat_group, decode_times=False)
# add a science variable to the dataset if other groups are in the lat/lon group
# some GPM collections won't have any other variables in the same group as lat/lon
if len(list(nc_d.groups[g].groups.keys())) > 0:
data_group = [v for v in list(nc_d.groups[g].groups.keys()) if 'time' not in str(v).lower()][0]
g_data = lat_group+'/'+data_group
subsetted_ds_data = xarray.open_dataset(subsetted_filepath, group=g_data, decode_times=False)
sci_var = list(subsetted_ds_data.variables.keys())[0]
subsetted_ds_new['science_test'] = subsetted_ds_data[sci_var]
break
# recall the function on a group that has groups in it and didn't find latitude
# this is going 'deeper' into the groups
if len(list(nc_d.groups[g].groups.keys())) > 0:
group_walk(nc_d.groups[g].groups, nc_d.groups[g], g)
else:
continue
> group_walk(f.groups, f, '')
verify_collection.py:448:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
groups = {'Swath': <[RuntimeError('NetCDF: Not a valid ID') raised in repr()] Group object at 0x7fd751800740>}
nc_d = <[RuntimeError('NetCDF: Not a valid ID') raised in repr()] Dataset object at 0x7fd751800640>
current_group = ''
def group_walk(groups, nc_d, current_group):
global subsetted_ds_new
subsetted_ds_new = None
# check if the top group has lat or lon variable
if lat_var_name in list(nc_d.variables.keys()):
subsetted_ds_new = subsetted_ds
else:
# if not then we'll need to keep track of the group layers
group_list.append(current_group)
# loop through the groups in the current layer
for g in groups:
# end the loop if we've already found latitude
if subsetted_ds_new:
break
# check if the groups have latitude, define the dataset and end the loop if found
if lat_var_name in list(nc_d.groups[g].variables.keys()):
group_list.append(g)
lat_group = '/'.join(group_list)
subsetted_ds_new = xarray.open_dataset(subsetted_filepath, group=lat_group, decode_times=False)
# add a science variable to the dataset if other groups are in the lat/lon group
# some GPM collections won't have any other variables in the same group as lat/lon
if len(list(nc_d.groups[g].groups.keys())) > 0:
> data_group = [v for v in list(nc_d.groups[g].groups.keys()) if 'time' not in str(v).lower()][0]
E IndexError: list index out of range
verify_collection.py:435: IndexError
--------------------------------- Captured Log ---------------------------------
INFO root:verify_collection.py:373 Using granule G3215100934-GES_DISC for test
INFO root:verify_collection.py:389 Sending harmony request https://harmony.earthdata.nasa.gov/C2179081549-GES_DISC/ogc-api-coverages/1.0.0/collections/all/coverage/rangeset?forceAsync=true&subset=lat%28-66.2483695%3A-60.4639905%29&subset=lon%280.4232307500000019%3A30.694059249999995%29&granuleId=G3215100934-GES_DISC
INFO root:verify_collection.py:393 Submitted harmony job 803e08a6-5281-4610-9ba8-096f7953cec8
INFO root:verify_collection.py:399 Downloaded: /tmp/pytest-of-runner/pytest-0/popen-gw1/test_spatial_subset_C2179081540/76529033_2A.GPM.DPR.GPM-SLH.20240826-S192542-E205853.059600.V07C_subsetted.nc4
WARNING root:verify_collection.py:302 Unable to find lat/lon vars in UMM-Var
WARNING root:verify_collection.py:312 Unable to find lat/lon vars using cf_xarray
Check warning on line 0 in tests.verify_collection
github-actions / Regression test results for ops
test_spatial_subset[C1918209846-GES_DISC] (tests.verify_collection) failed
test-results/ops_test_report.xml [took 38s]
Raw output
OSError: [Errno group not found: PRODUCT] 'PRODUCT'
ds = <[AttributeError('NetCDF: Not a valid ID') raised in repr()] Group object at 0x7f9da967de40>
group = '/METADATA/PRODUCT', mode = 'r'
create_group = <function _netcdf4_create_group at 0x7f9daf262560>
def _nc4_require_group(ds, group, mode, create_group=_netcdf4_create_group):
if group in {None, "", "/"}:
# use the root group
return ds
else:
# make sure it's a string
if not isinstance(group, str):
raise ValueError("group must be a string or None")
# support path-like syntax
path = group.strip("/").split("/")
for key in path:
try:
> ds = ds.groups[key]
E KeyError: 'PRODUCT'
../../../../.cache/pypoetry/virtualenvs/l2ss-py-autotest-iYz8Sff2-py3.10/lib/python3.10/site-packages/xarray/backends/netCDF4_.py:190: KeyError
During handling of the above exception, another exception occurred:
collection_concept_id = 'C1918209846-GES_DISC', env = 'ops'
granule_json = {'meta': {'collection-concept-id': 'C1918209846-GES_DISC', 'concept-id': 'G3216208540-GES_DISC', 'concept-type': 'gran...pecification': {'Name': 'UMM-G', 'URL': 'https://cdn.earthdata.nasa.gov/umm/granule/v1.6.6', 'Version': '1.6.6'}, ...}}
collection_variables = [{'associations': {'collections': [{'concept-id': 'C1918209846-GES_DISC'}]}, 'meta': {'association-details': {'collect...tracted from _FillValue metadata attribute', 'Type': 'SCIENCE_FILLVALUE', 'Value': 9.969209968386869e+36}], ...}}, ...]
harmony_env = <Environment.PROD: 4>
tmp_path = PosixPath('/tmp/pytest-of-runner/pytest-0/popen-gw9/test_spatial_subset_C1918209840')
bearer_token = 'eyJ0eXAiOiJKV1QiLCJvcmlnaW4iOiJFYXJ0aGRhdGEgTG9naW4iLCJzaWciOiJlZGxqd3RwdWJrZXlfb3BzIiwiYWxnIjoiUlMyNTYifQ.eyJ0eXBlIj...Jb1BnM7KDdEqo_3EoDdVhQCL6YyPeFO5phn_VtkdRRgce7fIpgz79Xenaj8C_tgpF1XCp4JT2t834vHhVXrf_lRaldMpl7WxjRzMfWMdoxY5ZFahb_B-Yw'
@pytest.mark.timeout(600)
def test_spatial_subset(collection_concept_id, env, granule_json, collection_variables,
harmony_env, tmp_path: pathlib.Path, bearer_token):
test_spatial_subset.__doc__ = f"Verify spatial subset for {collection_concept_id} in {env}"
logging.info("Using granule %s for test", granule_json['meta']['concept-id'])
# Compute a box that is smaller than the granule extent bounding box
north, south, east, west = get_bounding_box(granule_json)
east, west, north, south = create_smaller_bounding_box(east, west, north, south, .95)
start_time = granule_json['umm']["TemporalExtent"]["RangeDateTime"]["BeginningDateTime"]
end_time = granule_json['umm']["TemporalExtent"]["RangeDateTime"]["EndingDateTime"]
# Build harmony request
harmony_client = harmony.Client(env=harmony_env, token=bearer_token)
request_bbox = harmony.BBox(w=west, s=south, e=east, n=north)
request_collection = harmony.Collection(id=collection_concept_id)
harmony_request = harmony.Request(collection=request_collection, spatial=request_bbox,
granule_id=[granule_json['meta']['concept-id']])
logging.info("Sending harmony request %s", harmony_client.request_as_url(harmony_request))
# Submit harmony request and download result
job_id = harmony_client.submit(harmony_request)
logging.info("Submitted harmony job %s", job_id)
harmony_client.wait_for_processing(job_id, show_progress=True)
subsetted_filepath = None
for filename in [file_future.result()
for file_future
in harmony_client.download_all(job_id, directory=f'{tmp_path}', overwrite=True)]:
logging.info(f'Downloaded: %s', filename)
subsetted_filepath = pathlib.Path(filename)
# Verify spatial subset worked
subsetted_ds = xarray.open_dataset(subsetted_filepath, decode_times=False)
group = None
# Try to read group in file
lat_var_name, lon_var_name = get_lat_lon_var_names(subsetted_ds, subsetted_filepath, collection_variables, collection_concept_id)
lat_var_name = lat_var_name.split('/')[-1]
lon_var_name = lon_var_name.split('/')[-1]
with netCDF4.Dataset(subsetted_filepath) as f:
group_list = []
def group_walk(groups, nc_d, current_group):
global subsetted_ds_new
subsetted_ds_new = None
# check if the top group has lat or lon variable
if lat_var_name in list(nc_d.variables.keys()):
subsetted_ds_new = subsetted_ds
else:
# if not then we'll need to keep track of the group layers
group_list.append(current_group)
# loop through the groups in the current layer
for g in groups:
# end the loop if we've already found latitude
if subsetted_ds_new:
break
# check if the groups have latitude, define the dataset and end the loop if found
if lat_var_name in list(nc_d.groups[g].variables.keys()):
group_list.append(g)
lat_group = '/'.join(group_list)
subsetted_ds_new = xarray.open_dataset(subsetted_filepath, group=lat_group, decode_times=False)
# add a science variable to the dataset if other groups are in the lat/lon group
# some GPM collections won't have any other variables in the same group as lat/lon
if len(list(nc_d.groups[g].groups.keys())) > 0:
data_group = [v for v in list(nc_d.groups[g].groups.keys()) if 'time' not in str(v).lower()][0]
g_data = lat_group+'/'+data_group
subsetted_ds_data = xarray.open_dataset(subsetted_filepath, group=g_data, decode_times=False)
sci_var = list(subsetted_ds_data.variables.keys())[0]
subsetted_ds_new['science_test'] = subsetted_ds_data[sci_var]
break
# recall the function on a group that has groups in it and didn't find latitude
# this is going 'deeper' into the groups
if len(list(nc_d.groups[g].groups.keys())) > 0:
group_walk(nc_d.groups[g].groups, nc_d.groups[g], g)
else:
continue
> group_walk(f.groups, f, '')
verify_collection.py:448:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
verify_collection.py:431: in group_walk
subsetted_ds_new = xarray.open_dataset(subsetted_filepath, group=lat_group, decode_times=False)
../../../../.cache/pypoetry/virtualenvs/l2ss-py-autotest-iYz8Sff2-py3.10/lib/python3.10/site-packages/xarray/backends/api.py:571: in open_dataset
backend_ds = backend.open_dataset(
../../../../.cache/pypoetry/virtualenvs/l2ss-py-autotest-iYz8Sff2-py3.10/lib/python3.10/site-packages/xarray/backends/netCDF4_.py:646: in open_dataset
store = NetCDF4DataStore.open(
../../../../.cache/pypoetry/virtualenvs/l2ss-py-autotest-iYz8Sff2-py3.10/lib/python3.10/site-packages/xarray/backends/netCDF4_.py:409: in open
return cls(manager, group=group, mode=mode, lock=lock, autoclose=autoclose)
../../../../.cache/pypoetry/virtualenvs/l2ss-py-autotest-iYz8Sff2-py3.10/lib/python3.10/site-packages/xarray/backends/netCDF4_.py:356: in __init__
self.format = self.ds.data_model
../../../../.cache/pypoetry/virtualenvs/l2ss-py-autotest-iYz8Sff2-py3.10/lib/python3.10/site-packages/xarray/backends/netCDF4_.py:418: in ds
return self._acquire()
../../../../.cache/pypoetry/virtualenvs/l2ss-py-autotest-iYz8Sff2-py3.10/lib/python3.10/site-packages/xarray/backends/netCDF4_.py:413: in _acquire
ds = _nc4_require_group(root, self._group, self._mode)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
ds = <[AttributeError('NetCDF: Not a valid ID') raised in repr()] Group object at 0x7f9da967de40>
group = '/METADATA/PRODUCT', mode = 'r'
create_group = <function _netcdf4_create_group at 0x7f9daf262560>
def _nc4_require_group(ds, group, mode, create_group=_netcdf4_create_group):
if group in {None, "", "/"}:
# use the root group
return ds
else:
# make sure it's a string
if not isinstance(group, str):
raise ValueError("group must be a string or None")
# support path-like syntax
path = group.strip("/").split("/")
for key in path:
try:
ds = ds.groups[key]
except KeyError as e:
if mode != "r":
ds = create_group(ds, key)
else:
# wrap error to provide slightly more helpful message
> raise OSError(f"group not found: {key}", e)
E OSError: [Errno group not found: PRODUCT] 'PRODUCT'
../../../../.cache/pypoetry/virtualenvs/l2ss-py-autotest-iYz8Sff2-py3.10/lib/python3.10/site-packages/xarray/backends/netCDF4_.py:196: OSError
--------------------------------- Captured Log ---------------------------------
INFO root:verify_collection.py:373 Using granule G3216208540-GES_DISC for test
INFO root:verify_collection.py:389 Sending harmony request https://harmony.earthdata.nasa.gov/C1918209846-GES_DISC/ogc-api-coverages/1.0.0/collections/all/coverage/rangeset?forceAsync=true&subset=lat%28-85.24002499999999%3A-67.15297500000001%29&subset=lon%28-21.706575%3A154.59157499999998%29&granuleId=G3216208540-GES_DISC
INFO root:verify_collection.py:393 Submitted harmony job a6f15ea9-88e8-4d85-a634-ce01d410c8dd
INFO root:verify_collection.py:399 Downloaded: /tmp/pytest-of-runner/pytest-0/popen-gw9/test_spatial_subset_C1918209840/76529051_S5P_OFFL_L2_O3_20240826T083238_20240826T101408_35596_03_020601_20240828T004333_subsetted.nc4
Check warning on line 0 in tests.verify_collection
github-actions / Regression test results for ops
test_spatial_subset[C1918209669-GES_DISC] (tests.verify_collection) failed
test-results/ops_test_report.xml [took 50s]
Raw output
IndexError: list index out of range
collection_concept_id = 'C1918209669-GES_DISC', env = 'ops'
granule_json = {'meta': {'collection-concept-id': 'C1918209669-GES_DISC', 'concept-id': 'G3216192512-GES_DISC', 'concept-type': 'gran...pecification': {'Name': 'UMM-G', 'URL': 'https://cdn.earthdata.nasa.gov/umm/granule/v1.6.6', 'Version': '1.6.6'}, ...}}
collection_variables = [{'associations': {'collections': [{'concept-id': 'C1918209669-GES_DISC'}]}, 'meta': {'association-details': {'collect...tracted from _FillValue metadata attribute', 'Type': 'SCIENCE_FILLVALUE', 'Value': 9.969209968386869e+36}], ...}}, ...]
harmony_env = <Environment.PROD: 4>
tmp_path = PosixPath('/tmp/pytest-of-runner/pytest-0/popen-gw7/test_spatial_subset_C1918209660')
bearer_token = 'eyJ0eXAiOiJKV1QiLCJvcmlnaW4iOiJFYXJ0aGRhdGEgTG9naW4iLCJzaWciOiJlZGxqd3RwdWJrZXlfb3BzIiwiYWxnIjoiUlMyNTYifQ.eyJ0eXBlIj...Jb1BnM7KDdEqo_3EoDdVhQCL6YyPeFO5phn_VtkdRRgce7fIpgz79Xenaj8C_tgpF1XCp4JT2t834vHhVXrf_lRaldMpl7WxjRzMfWMdoxY5ZFahb_B-Yw'
@pytest.mark.timeout(600)
def test_spatial_subset(collection_concept_id, env, granule_json, collection_variables,
harmony_env, tmp_path: pathlib.Path, bearer_token):
test_spatial_subset.__doc__ = f"Verify spatial subset for {collection_concept_id} in {env}"
logging.info("Using granule %s for test", granule_json['meta']['concept-id'])
# Compute a box that is smaller than the granule extent bounding box
north, south, east, west = get_bounding_box(granule_json)
east, west, north, south = create_smaller_bounding_box(east, west, north, south, .95)
start_time = granule_json['umm']["TemporalExtent"]["RangeDateTime"]["BeginningDateTime"]
end_time = granule_json['umm']["TemporalExtent"]["RangeDateTime"]["EndingDateTime"]
# Build harmony request
harmony_client = harmony.Client(env=harmony_env, token=bearer_token)
request_bbox = harmony.BBox(w=west, s=south, e=east, n=north)
request_collection = harmony.Collection(id=collection_concept_id)
harmony_request = harmony.Request(collection=request_collection, spatial=request_bbox,
granule_id=[granule_json['meta']['concept-id']])
logging.info("Sending harmony request %s", harmony_client.request_as_url(harmony_request))
# Submit harmony request and download result
job_id = harmony_client.submit(harmony_request)
logging.info("Submitted harmony job %s", job_id)
harmony_client.wait_for_processing(job_id, show_progress=True)
subsetted_filepath = None
for filename in [file_future.result()
for file_future
in harmony_client.download_all(job_id, directory=f'{tmp_path}', overwrite=True)]:
logging.info(f'Downloaded: %s', filename)
subsetted_filepath = pathlib.Path(filename)
# Verify spatial subset worked
subsetted_ds = xarray.open_dataset(subsetted_filepath, decode_times=False)
group = None
# Try to read group in file
lat_var_name, lon_var_name = get_lat_lon_var_names(subsetted_ds, subsetted_filepath, collection_variables, collection_concept_id)
lat_var_name = lat_var_name.split('/')[-1]
lon_var_name = lon_var_name.split('/')[-1]
with netCDF4.Dataset(subsetted_filepath) as f:
group_list = []
def group_walk(groups, nc_d, current_group):
global subsetted_ds_new
subsetted_ds_new = None
# check if the top group has lat or lon variable
if lat_var_name in list(nc_d.variables.keys()):
subsetted_ds_new = subsetted_ds
else:
# if not then we'll need to keep track of the group layers
group_list.append(current_group)
# loop through the groups in the current layer
for g in groups:
# end the loop if we've already found latitude
if subsetted_ds_new:
break
# check if the groups have latitude, define the dataset and end the loop if found
if lat_var_name in list(nc_d.groups[g].variables.keys()):
group_list.append(g)
lat_group = '/'.join(group_list)
subsetted_ds_new = xarray.open_dataset(subsetted_filepath, group=lat_group, decode_times=False)
# add a science variable to the dataset if other groups are in the lat/lon group
# some GPM collections won't have any other variables in the same group as lat/lon
if len(list(nc_d.groups[g].groups.keys())) > 0:
data_group = [v for v in list(nc_d.groups[g].groups.keys()) if 'time' not in str(v).lower()][0]
g_data = lat_group+'/'+data_group
subsetted_ds_data = xarray.open_dataset(subsetted_filepath, group=g_data, decode_times=False)
sci_var = list(subsetted_ds_data.variables.keys())[0]
subsetted_ds_new['science_test'] = subsetted_ds_data[sci_var]
break
# recall the function on a group that has groups in it and didn't find latitude
# this is going 'deeper' into the groups
if len(list(nc_d.groups[g].groups.keys())) > 0:
group_walk(nc_d.groups[g].groups, nc_d.groups[g], g)
else:
continue
> group_walk(f.groups, f, '')
verify_collection.py:448:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
groups = {'METADATA': <[AttributeError('NetCDF: Not a valid ID') raised in repr()] Group object at 0x7f5a17d8eb40>, 'PRODUCT': <[RuntimeError('NetCDF: Not a valid ID') raised in repr()] Group object at 0x7f5a17d8e440>}
nc_d = <[AttributeError('NetCDF: Not a valid ID') raised in repr()] Dataset object at 0x7f5a17d8e340>
current_group = ''
def group_walk(groups, nc_d, current_group):
global subsetted_ds_new
subsetted_ds_new = None
# check if the top group has lat or lon variable
if lat_var_name in list(nc_d.variables.keys()):
subsetted_ds_new = subsetted_ds
else:
# if not then we'll need to keep track of the group layers
group_list.append(current_group)
# loop through the groups in the current layer
for g in groups:
# end the loop if we've already found latitude
if subsetted_ds_new:
break
# check if the groups have latitude, define the dataset and end the loop if found
if lat_var_name in list(nc_d.groups[g].variables.keys()):
group_list.append(g)
lat_group = '/'.join(group_list)
subsetted_ds_new = xarray.open_dataset(subsetted_filepath, group=lat_group, decode_times=False)
# add a science variable to the dataset if other groups are in the lat/lon group
# some GPM collections won't have any other variables in the same group as lat/lon
if len(list(nc_d.groups[g].groups.keys())) > 0:
data_group = [v for v in list(nc_d.groups[g].groups.keys()) if 'time' not in str(v).lower()][0]
g_data = lat_group+'/'+data_group
subsetted_ds_data = xarray.open_dataset(subsetted_filepath, group=g_data, decode_times=False)
> sci_var = list(subsetted_ds_data.variables.keys())[0]
E IndexError: list index out of range
verify_collection.py:438: IndexError
--------------------------------- Captured Log ---------------------------------
INFO root:verify_collection.py:373 Using granule G3216192512-GES_DISC for test
INFO root:verify_collection.py:389 Sending harmony request https://harmony.earthdata.nasa.gov/C1918209669-GES_DISC/ogc-api-coverages/1.0.0/collections/all/coverage/rangeset?forceAsync=true&subset=lat%28-85.242425%3A-67.474575%29&subset=lon%28-98.62950000000001%3A78.69750000000002%29&granuleId=G3216192512-GES_DISC
INFO root:verify_collection.py:393 Submitted harmony job fdac00cd-551a-49eb-ad9c-0d0983114a14
INFO root:verify_collection.py:399 Downloaded: /tmp/pytest-of-runner/pytest-0/popen-gw7/test_spatial_subset_C1918209660/76529074_S5P_OFFL_L2_CLOUD_20240826T133706_20240826T151836_35599_03_020601_20240828T054121_subsetted.nc4
Check warning on line 0 in tests.verify_collection
github-actions / Regression test results for ops
test_spatial_subset[C2832221740-POCLOUD] (tests.verify_collection) failed
test-results/ops_test_report.xml [took 41s]
Raw output
harmony.harmony.ProcessingFailedException: WorkItem failed: podaac/l2ss-py:2.11.0: Service request failed with an unknown error
collection_concept_id = 'C2832221740-POCLOUD', env = 'ops'
granule_json = {'meta': {'collection-concept-id': 'C2832221740-POCLOUD', 'concept-id': 'G3215246926-POCLOUD', 'concept-type': 'granul...pecification': {'Name': 'UMM-G', 'URL': 'https://cdn.earthdata.nasa.gov/umm/granule/v1.6.6', 'Version': '1.6.6'}, ...}}
collection_variables = [{'associations': {'collections': [{'concept-id': 'C2832221740-POCLOUD'}]}, 'meta': {'association-details': {'collecti...rization_2', 'Size': 2, 'Type': 'OTHER'}], 'FillValues': [{'Type': 'SCIENCE_FILLVALUE', 'Value': -9999.0}], ...}}, ...]
harmony_env = <Environment.PROD: 4>
tmp_path = PosixPath('/tmp/pytest-of-runner/pytest-0/popen-gw9/test_spatial_subset_C2832221740')
bearer_token = 'eyJ0eXAiOiJKV1QiLCJvcmlnaW4iOiJFYXJ0aGRhdGEgTG9naW4iLCJzaWciOiJlZGxqd3RwdWJrZXlfb3BzIiwiYWxnIjoiUlMyNTYifQ.eyJ0eXBlIj...Jb1BnM7KDdEqo_3EoDdVhQCL6YyPeFO5phn_VtkdRRgce7fIpgz79Xenaj8C_tgpF1XCp4JT2t834vHhVXrf_lRaldMpl7WxjRzMfWMdoxY5ZFahb_B-Yw'
@pytest.mark.timeout(600)
def test_spatial_subset(collection_concept_id, env, granule_json, collection_variables,
harmony_env, tmp_path: pathlib.Path, bearer_token):
test_spatial_subset.__doc__ = f"Verify spatial subset for {collection_concept_id} in {env}"
logging.info("Using granule %s for test", granule_json['meta']['concept-id'])
# Compute a box that is smaller than the granule extent bounding box
north, south, east, west = get_bounding_box(granule_json)
east, west, north, south = create_smaller_bounding_box(east, west, north, south, .95)
start_time = granule_json['umm']["TemporalExtent"]["RangeDateTime"]["BeginningDateTime"]
end_time = granule_json['umm']["TemporalExtent"]["RangeDateTime"]["EndingDateTime"]
# Build harmony request
harmony_client = harmony.Client(env=harmony_env, token=bearer_token)
request_bbox = harmony.BBox(w=west, s=south, e=east, n=north)
request_collection = harmony.Collection(id=collection_concept_id)
harmony_request = harmony.Request(collection=request_collection, spatial=request_bbox,
granule_id=[granule_json['meta']['concept-id']])
logging.info("Sending harmony request %s", harmony_client.request_as_url(harmony_request))
# Submit harmony request and download result
job_id = harmony_client.submit(harmony_request)
logging.info("Submitted harmony job %s", job_id)
> harmony_client.wait_for_processing(job_id, show_progress=True)
verify_collection.py:394:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <harmony.harmony.Client object at 0x7f9dabba4fd0>
job_id = 'aabe0fda-0c81-4011-8179-8a09a4a0916d', show_progress = True
def wait_for_processing(self, job_id: str, show_progress: bool = False) -> None:
"""Retrieve a submitted job's completion status in percent.
Args:
job_id: UUID string for the job you wish to interrogate.
Returns:
The job's processing progress as a percentage.
:raises
Exception: This can happen if an invalid job_id is provided or Harmony services
can't be reached.
"""
# How often to refresh the screen for progress updates and animating spinners.
ui_update_interval = 0.33 # in seconds
running_w_errors_logged = False
intervals = round(self.check_interval / ui_update_interval)
if show_progress:
with progressbar.ProgressBar(max_value=100, widgets=progressbar_widgets) as bar:
progress = 0
while progress < 100:
progress, status, message = self.progress(job_id)
if status == 'failed':
> raise ProcessingFailedException(job_id, message)
E harmony.harmony.ProcessingFailedException: WorkItem failed: podaac/l2ss-py:2.11.0: Service request failed with an unknown error
../../../../.cache/pypoetry/virtualenvs/l2ss-py-autotest-iYz8Sff2-py3.10/lib/python3.10/site-packages/harmony/harmony.py:986: ProcessingFailedException
--------------------------------- Captured Log ---------------------------------
INFO root:verify_collection.py:373 Using granule G3215246926-POCLOUD for test
INFO root:verify_collection.py:389 Sending harmony request https://harmony.earthdata.nasa.gov/C2832221740-POCLOUD/ogc-api-coverages/1.0.0/collections/all/coverage/rangeset?forceAsync=true&subset=lat%28-82.29166699999999%3A82.04666699999999%29&subset=lon%28-171.0%3A171.0%29&granuleId=G3215246926-POCLOUD
INFO root:verify_collection.py:393 Submitted harmony job aabe0fda-0c81-4011-8179-8a09a4a0916d
Check warning on line 0 in tests.verify_collection
github-actions / Regression test results for ops
test_spatial_subset[C1442068505-GES_DISC] (tests.verify_collection) failed
test-results/ops_test_report.xml [took 24s]
Raw output
IndexError: list index out of range
collection_concept_id = 'C1442068505-GES_DISC', env = 'ops'
granule_json = {'meta': {'collection-concept-id': 'C1442068505-GES_DISC', 'concept-id': 'G1628685470-GES_DISC', 'concept-type': 'gran...pecification': {'Name': 'UMM-G', 'URL': 'https://cdn.earthdata.nasa.gov/umm/granule/v1.6.6', 'Version': '1.6.6'}, ...}}
collection_variables = [{'associations': {'collections': [{'concept-id': 'C1442068505-GES_DISC'}]}, 'meta': {'association-details': {'collect...hdata.nasa.gov/umm/variable/v1.9.0', 'Version': '1.9.0'}, 'Name': 'PRODUCT/methane_mixing_ratio_precision', ...}}, ...]
harmony_env = <Environment.PROD: 4>
tmp_path = PosixPath('/tmp/pytest-of-runner/pytest-0/popen-gw1/test_spatial_subset_C1442068500')
bearer_token = 'eyJ0eXAiOiJKV1QiLCJvcmlnaW4iOiJFYXJ0aGRhdGEgTG9naW4iLCJzaWciOiJlZGxqd3RwdWJrZXlfb3BzIiwiYWxnIjoiUlMyNTYifQ.eyJ0eXBlIj...Jb1BnM7KDdEqo_3EoDdVhQCL6YyPeFO5phn_VtkdRRgce7fIpgz79Xenaj8C_tgpF1XCp4JT2t834vHhVXrf_lRaldMpl7WxjRzMfWMdoxY5ZFahb_B-Yw'
@pytest.mark.timeout(600)
def test_spatial_subset(collection_concept_id, env, granule_json, collection_variables,
harmony_env, tmp_path: pathlib.Path, bearer_token):
test_spatial_subset.__doc__ = f"Verify spatial subset for {collection_concept_id} in {env}"
logging.info("Using granule %s for test", granule_json['meta']['concept-id'])
# Compute a box that is smaller than the granule extent bounding box
north, south, east, west = get_bounding_box(granule_json)
east, west, north, south = create_smaller_bounding_box(east, west, north, south, .95)
start_time = granule_json['umm']["TemporalExtent"]["RangeDateTime"]["BeginningDateTime"]
end_time = granule_json['umm']["TemporalExtent"]["RangeDateTime"]["EndingDateTime"]
# Build harmony request
harmony_client = harmony.Client(env=harmony_env, token=bearer_token)
request_bbox = harmony.BBox(w=west, s=south, e=east, n=north)
request_collection = harmony.Collection(id=collection_concept_id)
harmony_request = harmony.Request(collection=request_collection, spatial=request_bbox,
granule_id=[granule_json['meta']['concept-id']])
logging.info("Sending harmony request %s", harmony_client.request_as_url(harmony_request))
# Submit harmony request and download result
job_id = harmony_client.submit(harmony_request)
logging.info("Submitted harmony job %s", job_id)
harmony_client.wait_for_processing(job_id, show_progress=True)
subsetted_filepath = None
for filename in [file_future.result()
for file_future
in harmony_client.download_all(job_id, directory=f'{tmp_path}', overwrite=True)]:
logging.info(f'Downloaded: %s', filename)
subsetted_filepath = pathlib.Path(filename)
# Verify spatial subset worked
subsetted_ds = xarray.open_dataset(subsetted_filepath, decode_times=False)
group = None
# Try to read group in file
lat_var_name, lon_var_name = get_lat_lon_var_names(subsetted_ds, subsetted_filepath, collection_variables, collection_concept_id)
lat_var_name = lat_var_name.split('/')[-1]
lon_var_name = lon_var_name.split('/')[-1]
with netCDF4.Dataset(subsetted_filepath) as f:
group_list = []
def group_walk(groups, nc_d, current_group):
global subsetted_ds_new
subsetted_ds_new = None
# check if the top group has lat or lon variable
if lat_var_name in list(nc_d.variables.keys()):
subsetted_ds_new = subsetted_ds
else:
# if not then we'll need to keep track of the group layers
group_list.append(current_group)
# loop through the groups in the current layer
for g in groups:
# end the loop if we've already found latitude
if subsetted_ds_new:
break
# check if the groups have latitude, define the dataset and end the loop if found
if lat_var_name in list(nc_d.groups[g].variables.keys()):
group_list.append(g)
lat_group = '/'.join(group_list)
subsetted_ds_new = xarray.open_dataset(subsetted_filepath, group=lat_group, decode_times=False)
# add a science variable to the dataset if other groups are in the lat/lon group
# some GPM collections won't have any other variables in the same group as lat/lon
if len(list(nc_d.groups[g].groups.keys())) > 0:
data_group = [v for v in list(nc_d.groups[g].groups.keys()) if 'time' not in str(v).lower()][0]
g_data = lat_group+'/'+data_group
subsetted_ds_data = xarray.open_dataset(subsetted_filepath, group=g_data, decode_times=False)
sci_var = list(subsetted_ds_data.variables.keys())[0]
subsetted_ds_new['science_test'] = subsetted_ds_data[sci_var]
break
# recall the function on a group that has groups in it and didn't find latitude
# this is going 'deeper' into the groups
if len(list(nc_d.groups[g].groups.keys())) > 0:
group_walk(nc_d.groups[g].groups, nc_d.groups[g], g)
else:
continue
> group_walk(f.groups, f, '')
verify_collection.py:448:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
groups = {'METADATA': <[AttributeError('NetCDF: Not a valid ID') raised in repr()] Group object at 0x7fd751803b40>, 'PRODUCT': <[RuntimeError('NetCDF: Not a valid ID') raised in repr()] Group object at 0x7fd751803640>}
nc_d = <[AttributeError('NetCDF: Not a valid ID') raised in repr()] Dataset object at 0x7fd751803540>
current_group = ''
def group_walk(groups, nc_d, current_group):
global subsetted_ds_new
subsetted_ds_new = None
# check if the top group has lat or lon variable
if lat_var_name in list(nc_d.variables.keys()):
subsetted_ds_new = subsetted_ds
else:
# if not then we'll need to keep track of the group layers
group_list.append(current_group)
# loop through the groups in the current layer
for g in groups:
# end the loop if we've already found latitude
if subsetted_ds_new:
break
# check if the groups have latitude, define the dataset and end the loop if found
if lat_var_name in list(nc_d.groups[g].variables.keys()):
group_list.append(g)
lat_group = '/'.join(group_list)
subsetted_ds_new = xarray.open_dataset(subsetted_filepath, group=lat_group, decode_times=False)
# add a science variable to the dataset if other groups are in the lat/lon group
# some GPM collections won't have any other variables in the same group as lat/lon
if len(list(nc_d.groups[g].groups.keys())) > 0:
data_group = [v for v in list(nc_d.groups[g].groups.keys()) if 'time' not in str(v).lower()][0]
g_data = lat_group+'/'+data_group
subsetted_ds_data = xarray.open_dataset(subsetted_filepath, group=g_data, decode_times=False)
> sci_var = list(subsetted_ds_data.variables.keys())[0]
E IndexError: list index out of range
verify_collection.py:438: IndexError
--------------------------------- Captured Log ---------------------------------
INFO root:verify_collection.py:373 Using granule G1628685470-GES_DISC for test
INFO root:verify_collection.py:389 Sending harmony request https://harmony.earthdata.nasa.gov/C1442068505-GES_DISC/ogc-api-coverages/1.0.0/collections/all/coverage/rangeset?forceAsync=true&subset=lat%28-82.468625%3A-64.100375%29&subset=lon%28-112.00605%3A163.26405%29&granuleId=G1628685470-GES_DISC
INFO root:verify_collection.py:393 Submitted harmony job bc93ebb4-0d25-4053-81c7-d8ce87fa4482
INFO root:verify_collection.py:399 Downloaded: /tmp/pytest-of-runner/pytest-0/popen-gw1/test_spatial_subset_C1442068500/76529094_S5P_OFFL_L2_CH4_20190806T003836_20190806T022006_09387_01_010302_20190812T015759_subsetted.nc4
Check warning on line 0 in tests.verify_collection
github-actions / Regression test results for ops
test_spatial_subset[C1442068491-GES_DISC] (tests.verify_collection) failed
test-results/ops_test_report.xml [took 23s]
Raw output
IndexError: list index out of range
collection_concept_id = 'C1442068491-GES_DISC', env = 'ops'
granule_json = {'meta': {'collection-concept-id': 'C1442068491-GES_DISC', 'concept-id': 'G1642673899-GES_DISC', 'concept-type': 'gran...pecification': {'Name': 'UMM-G', 'URL': 'https://cdn.earthdata.nasa.gov/umm/granule/v1.6.6', 'Version': '1.6.6'}, ...}}
collection_variables = [{'associations': {'collections': [{'concept-id': 'C1442068491-GES_DISC'}]}, 'meta': {'association-details': {'collect...s://cdn.earthdata.nasa.gov/umm/variable/v1.9.0', 'Version': '1.9.0'}, 'Name': 'PRODUCT/aerosol_mid_height', ...}}, ...]
harmony_env = <Environment.PROD: 4>
tmp_path = PosixPath('/tmp/pytest-of-runner/pytest-0/popen-gw9/test_spatial_subset_C1442068490')
bearer_token = 'eyJ0eXAiOiJKV1QiLCJvcmlnaW4iOiJFYXJ0aGRhdGEgTG9naW4iLCJzaWciOiJlZGxqd3RwdWJrZXlfb3BzIiwiYWxnIjoiUlMyNTYifQ.eyJ0eXBlIj...Jb1BnM7KDdEqo_3EoDdVhQCL6YyPeFO5phn_VtkdRRgce7fIpgz79Xenaj8C_tgpF1XCp4JT2t834vHhVXrf_lRaldMpl7WxjRzMfWMdoxY5ZFahb_B-Yw'
@pytest.mark.timeout(600)
def test_spatial_subset(collection_concept_id, env, granule_json, collection_variables,
harmony_env, tmp_path: pathlib.Path, bearer_token):
test_spatial_subset.__doc__ = f"Verify spatial subset for {collection_concept_id} in {env}"
logging.info("Using granule %s for test", granule_json['meta']['concept-id'])
# Compute a box that is smaller than the granule extent bounding box
north, south, east, west = get_bounding_box(granule_json)
east, west, north, south = create_smaller_bounding_box(east, west, north, south, .95)
start_time = granule_json['umm']["TemporalExtent"]["RangeDateTime"]["BeginningDateTime"]
end_time = granule_json['umm']["TemporalExtent"]["RangeDateTime"]["EndingDateTime"]
# Build harmony request
harmony_client = harmony.Client(env=harmony_env, token=bearer_token)
request_bbox = harmony.BBox(w=west, s=south, e=east, n=north)
request_collection = harmony.Collection(id=collection_concept_id)
harmony_request = harmony.Request(collection=request_collection, spatial=request_bbox,
granule_id=[granule_json['meta']['concept-id']])
logging.info("Sending harmony request %s", harmony_client.request_as_url(harmony_request))
# Submit harmony request and download result
job_id = harmony_client.submit(harmony_request)
logging.info("Submitted harmony job %s", job_id)
harmony_client.wait_for_processing(job_id, show_progress=True)
subsetted_filepath = None
for filename in [file_future.result()
for file_future
in harmony_client.download_all(job_id, directory=f'{tmp_path}', overwrite=True)]:
logging.info(f'Downloaded: %s', filename)
subsetted_filepath = pathlib.Path(filename)
# Verify spatial subset worked
subsetted_ds = xarray.open_dataset(subsetted_filepath, decode_times=False)
group = None
# Try to read group in file
lat_var_name, lon_var_name = get_lat_lon_var_names(subsetted_ds, subsetted_filepath, collection_variables, collection_concept_id)
lat_var_name = lat_var_name.split('/')[-1]
lon_var_name = lon_var_name.split('/')[-1]
with netCDF4.Dataset(subsetted_filepath) as f:
group_list = []
def group_walk(groups, nc_d, current_group):
global subsetted_ds_new
subsetted_ds_new = None
# check if the top group has lat or lon variable
if lat_var_name in list(nc_d.variables.keys()):
subsetted_ds_new = subsetted_ds
else:
# if not then we'll need to keep track of the group layers
group_list.append(current_group)
# loop through the groups in the current layer
for g in groups:
# end the loop if we've already found latitude
if subsetted_ds_new:
break
# check if the groups have latitude, define the dataset and end the loop if found
if lat_var_name in list(nc_d.groups[g].variables.keys()):
group_list.append(g)
lat_group = '/'.join(group_list)
subsetted_ds_new = xarray.open_dataset(subsetted_filepath, group=lat_group, decode_times=False)
# add a science variable to the dataset if other groups are in the lat/lon group
# some GPM collections won't have any other variables in the same group as lat/lon
if len(list(nc_d.groups[g].groups.keys())) > 0:
data_group = [v for v in list(nc_d.groups[g].groups.keys()) if 'time' not in str(v).lower()][0]
g_data = lat_group+'/'+data_group
subsetted_ds_data = xarray.open_dataset(subsetted_filepath, group=g_data, decode_times=False)
sci_var = list(subsetted_ds_data.variables.keys())[0]
subsetted_ds_new['science_test'] = subsetted_ds_data[sci_var]
break
# recall the function on a group that has groups in it and didn't find latitude
# this is going 'deeper' into the groups
if len(list(nc_d.groups[g].groups.keys())) > 0:
group_walk(nc_d.groups[g].groups, nc_d.groups[g], g)
else:
continue
> group_walk(f.groups, f, '')
verify_collection.py:448:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
groups = {'METADATA': <[AttributeError('NetCDF: Not a valid ID') raised in repr()] Group object at 0x7f9da958cb40>, 'PRODUCT': <[RuntimeError('NetCDF: Not a valid ID') raised in repr()] Group object at 0x7f9da958c640>}
nc_d = <[AttributeError('NetCDF: Not a valid ID') raised in repr()] Dataset object at 0x7f9da958c540>
current_group = ''
def group_walk(groups, nc_d, current_group):
global subsetted_ds_new
subsetted_ds_new = None
# check if the top group has lat or lon variable
if lat_var_name in list(nc_d.variables.keys()):
subsetted_ds_new = subsetted_ds
else:
# if not then we'll need to keep track of the group layers
group_list.append(current_group)
# loop through the groups in the current layer
for g in groups:
# end the loop if we've already found latitude
if subsetted_ds_new:
break
# check if the groups have latitude, define the dataset and end the loop if found
if lat_var_name in list(nc_d.groups[g].variables.keys()):
group_list.append(g)
lat_group = '/'.join(group_list)
subsetted_ds_new = xarray.open_dataset(subsetted_filepath, group=lat_group, decode_times=False)
# add a science variable to the dataset if other groups are in the lat/lon group
# some GPM collections won't have any other variables in the same group as lat/lon
if len(list(nc_d.groups[g].groups.keys())) > 0:
data_group = [v for v in list(nc_d.groups[g].groups.keys()) if 'time' not in str(v).lower()][0]
g_data = lat_group+'/'+data_group
subsetted_ds_data = xarray.open_dataset(subsetted_filepath, group=g_data, decode_times=False)
> sci_var = list(subsetted_ds_data.variables.keys())[0]
E IndexError: list index out of range
verify_collection.py:438: IndexError
--------------------------------- Captured Log ---------------------------------
INFO root:verify_collection.py:373 Using granule G1642673899-GES_DISC for test
INFO root:verify_collection.py:389 Sending harmony request https://harmony.earthdata.nasa.gov/C1442068491-GES_DISC/ogc-api-coverages/1.0.0/collections/all/coverage/rangeset?forceAsync=true&subset=lat%28-82.32457500000001%3A-63.840424999999996%29&subset=lon%28-112.0001%3A163.1541%29&granuleId=G1642673899-GES_DISC
INFO root:verify_collection.py:393 Submitted harmony job 00c8fbde-afdb-42a6-8307-125d8e9e1ab0
INFO root:verify_collection.py:399 Downloaded: /tmp/pytest-of-runner/pytest-0/popen-gw9/test_spatial_subset_C1442068490/76529117_S5P_OFFL_L2_AER_LH_20190806T003836_20190806T022006_09387_01_010302_20190812T015801_subsetted.nc4
Check warning on line 0 in tests.verify_collection
github-actions / Regression test results for ops
test_spatial_subset[C1627516287-GES_DISC] (tests.verify_collection) failed
test-results/ops_test_report.xml [took 26s]
Raw output
OSError: [Errno group not found: PRODUCT] 'PRODUCT'
ds = <[AttributeError('NetCDF: Not a valid ID') raised in repr()] Group object at 0x7f21413e5940>
group = '/METADATA/PRODUCT', mode = 'r'
create_group = <function _netcdf4_create_group at 0x7f2149ba6f80>
def _nc4_require_group(ds, group, mode, create_group=_netcdf4_create_group):
if group in {None, "", "/"}:
# use the root group
return ds
else:
# make sure it's a string
if not isinstance(group, str):
raise ValueError("group must be a string or None")
# support path-like syntax
path = group.strip("/").split("/")
for key in path:
try:
> ds = ds.groups[key]
E KeyError: 'PRODUCT'
../../../../.cache/pypoetry/virtualenvs/l2ss-py-autotest-iYz8Sff2-py3.10/lib/python3.10/site-packages/xarray/backends/netCDF4_.py:190: KeyError
During handling of the above exception, another exception occurred:
collection_concept_id = 'C1627516287-GES_DISC', env = 'ops'
granule_json = {'meta': {'collection-concept-id': 'C1627516287-GES_DISC', 'concept-id': 'G2084463561-GES_DISC', 'concept-type': 'gran...pecification': {'Name': 'UMM-G', 'URL': 'https://cdn.earthdata.nasa.gov/umm/granule/v1.6.6', 'Version': '1.6.6'}, ...}}
collection_variables = [{'associations': {'collections': [{'concept-id': 'C1627516287-GES_DISC'}]}, 'meta': {'association-details': {'collect...'URL': 'https://cdn.earthdata.nasa.gov/umm/variable/v1.9.0', 'Version': '1.9.0'}, 'Name': 'PRODUCT/corner', ...}}, ...]
harmony_env = <Environment.PROD: 4>
tmp_path = PosixPath('/tmp/pytest-of-runner/pytest-0/popen-gw8/test_spatial_subset_C1627516280')
bearer_token = 'eyJ0eXAiOiJKV1QiLCJvcmlnaW4iOiJFYXJ0aGRhdGEgTG9naW4iLCJzaWciOiJlZGxqd3RwdWJrZXlfb3BzIiwiYWxnIjoiUlMyNTYifQ.eyJ0eXBlIj...Jb1BnM7KDdEqo_3EoDdVhQCL6YyPeFO5phn_VtkdRRgce7fIpgz79Xenaj8C_tgpF1XCp4JT2t834vHhVXrf_lRaldMpl7WxjRzMfWMdoxY5ZFahb_B-Yw'
@pytest.mark.timeout(600)
def test_spatial_subset(collection_concept_id, env, granule_json, collection_variables,
harmony_env, tmp_path: pathlib.Path, bearer_token):
test_spatial_subset.__doc__ = f"Verify spatial subset for {collection_concept_id} in {env}"
logging.info("Using granule %s for test", granule_json['meta']['concept-id'])
# Compute a box that is smaller than the granule extent bounding box
north, south, east, west = get_bounding_box(granule_json)
east, west, north, south = create_smaller_bounding_box(east, west, north, south, .95)
start_time = granule_json['umm']["TemporalExtent"]["RangeDateTime"]["BeginningDateTime"]
end_time = granule_json['umm']["TemporalExtent"]["RangeDateTime"]["EndingDateTime"]
# Build harmony request
harmony_client = harmony.Client(env=harmony_env, token=bearer_token)
request_bbox = harmony.BBox(w=west, s=south, e=east, n=north)
request_collection = harmony.Collection(id=collection_concept_id)
harmony_request = harmony.Request(collection=request_collection, spatial=request_bbox,
granule_id=[granule_json['meta']['concept-id']])
logging.info("Sending harmony request %s", harmony_client.request_as_url(harmony_request))
# Submit harmony request and download result
job_id = harmony_client.submit(harmony_request)
logging.info("Submitted harmony job %s", job_id)
harmony_client.wait_for_processing(job_id, show_progress=True)
subsetted_filepath = None
for filename in [file_future.result()
for file_future
in harmony_client.download_all(job_id, directory=f'{tmp_path}', overwrite=True)]:
logging.info(f'Downloaded: %s', filename)
subsetted_filepath = pathlib.Path(filename)
# Verify spatial subset worked
subsetted_ds = xarray.open_dataset(subsetted_filepath, decode_times=False)
group = None
# Try to read group in file
lat_var_name, lon_var_name = get_lat_lon_var_names(subsetted_ds, subsetted_filepath, collection_variables, collection_concept_id)
lat_var_name = lat_var_name.split('/')[-1]
lon_var_name = lon_var_name.split('/')[-1]
with netCDF4.Dataset(subsetted_filepath) as f:
group_list = []
def group_walk(groups, nc_d, current_group):
global subsetted_ds_new
subsetted_ds_new = None
# check if the top group has lat or lon variable
if lat_var_name in list(nc_d.variables.keys()):
subsetted_ds_new = subsetted_ds
else:
# if not then we'll need to keep track of the group layers
group_list.append(current_group)
# loop through the groups in the current layer
for g in groups:
# end the loop if we've already found latitude
if subsetted_ds_new:
break
# check if the groups have latitude, define the dataset and end the loop if found
if lat_var_name in list(nc_d.groups[g].variables.keys()):
group_list.append(g)
lat_group = '/'.join(group_list)
subsetted_ds_new = xarray.open_dataset(subsetted_filepath, group=lat_group, decode_times=False)
# add a science variable to the dataset if other groups are in the lat/lon group
# some GPM collections won't have any other variables in the same group as lat/lon
if len(list(nc_d.groups[g].groups.keys())) > 0:
data_group = [v for v in list(nc_d.groups[g].groups.keys()) if 'time' not in str(v).lower()][0]
g_data = lat_group+'/'+data_group
subsetted_ds_data = xarray.open_dataset(subsetted_filepath, group=g_data, decode_times=False)
sci_var = list(subsetted_ds_data.variables.keys())[0]
subsetted_ds_new['science_test'] = subsetted_ds_data[sci_var]
break
# recall the function on a group that has groups in it and didn't find latitude
# this is going 'deeper' into the groups
if len(list(nc_d.groups[g].groups.keys())) > 0:
group_walk(nc_d.groups[g].groups, nc_d.groups[g], g)
else:
continue
> group_walk(f.groups, f, '')
verify_collection.py:448:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
verify_collection.py:431: in group_walk
subsetted_ds_new = xarray.open_dataset(subsetted_filepath, group=lat_group, decode_times=False)
../../../../.cache/pypoetry/virtualenvs/l2ss-py-autotest-iYz8Sff2-py3.10/lib/python3.10/site-packages/xarray/backends/api.py:571: in open_dataset
backend_ds = backend.open_dataset(
../../../../.cache/pypoetry/virtualenvs/l2ss-py-autotest-iYz8Sff2-py3.10/lib/python3.10/site-packages/xarray/backends/netCDF4_.py:646: in open_dataset
store = NetCDF4DataStore.open(
../../../../.cache/pypoetry/virtualenvs/l2ss-py-autotest-iYz8Sff2-py3.10/lib/python3.10/site-packages/xarray/backends/netCDF4_.py:409: in open
return cls(manager, group=group, mode=mode, lock=lock, autoclose=autoclose)
../../../../.cache/pypoetry/virtualenvs/l2ss-py-autotest-iYz8Sff2-py3.10/lib/python3.10/site-packages/xarray/backends/netCDF4_.py:356: in __init__
self.format = self.ds.data_model
../../../../.cache/pypoetry/virtualenvs/l2ss-py-autotest-iYz8Sff2-py3.10/lib/python3.10/site-packages/xarray/backends/netCDF4_.py:418: in ds
return self._acquire()
../../../../.cache/pypoetry/virtualenvs/l2ss-py-autotest-iYz8Sff2-py3.10/lib/python3.10/site-packages/xarray/backends/netCDF4_.py:413: in _acquire
ds = _nc4_require_group(root, self._group, self._mode)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
ds = <[AttributeError('NetCDF: Not a valid ID') raised in repr()] Group object at 0x7f21413e5940>
group = '/METADATA/PRODUCT', mode = 'r'
create_group = <function _netcdf4_create_group at 0x7f2149ba6f80>
def _nc4_require_group(ds, group, mode, create_group=_netcdf4_create_group):
if group in {None, "", "/"}:
# use the root group
return ds
else:
# make sure it's a string
if not isinstance(group, str):
raise ValueError("group must be a string or None")
# support path-like syntax
path = group.strip("/").split("/")
for key in path:
try:
ds = ds.groups[key]
except KeyError as e:
if mode != "r":
ds = create_group(ds, key)
else:
# wrap error to provide slightly more helpful message
> raise OSError(f"group not found: {key}", e)
E OSError: [Errno group not found: PRODUCT] 'PRODUCT'
../../../../.cache/pypoetry/virtualenvs/l2ss-py-autotest-iYz8Sff2-py3.10/lib/python3.10/site-packages/xarray/backends/netCDF4_.py:196: OSError
--------------------------------- Captured Log ---------------------------------
INFO root:verify_collection.py:373 Using granule G2084463561-GES_DISC for test
INFO root:verify_collection.py:389 Sending harmony request https://harmony.earthdata.nasa.gov/C1627516287-GES_DISC/ogc-api-coverages/1.0.0/collections/all/coverage/rangeset?forceAsync=true&subset=lat%28-76.99937499999999%3A-59.951625%29&subset=lon%28-76.6214%3A-1.5866000000000042%29&granuleId=G2084463561-GES_DISC
INFO root:verify_collection.py:393 Submitted harmony job 8f56d2df-3cc6-4d4b-a6bd-4a2ff203e51c
INFO root:verify_collection.py:399 Downloaded: /tmp/pytest-of-runner/pytest-0/popen-gw8/test_spatial_subset_C1627516280/76529147_S5P_OFFL_L2_CO_20210701T170324_20210701T184453_19257_01_010400_20210703T065107_subsetted.nc4
Check warning on line 0 in tests.verify_collection
github-actions / Regression test results for ops
test_spatial_subset[C1627516300-GES_DISC] (tests.verify_collection) failed
test-results/ops_test_report.xml [took 31s]
Raw output
IndexError: list index out of range
collection_concept_id = 'C1627516300-GES_DISC', env = 'ops'
granule_json = {'meta': {'collection-concept-id': 'C1627516300-GES_DISC', 'concept-id': 'G1902371249-GES_DISC', 'concept-type': 'gran...pecification': {'Name': 'UMM-G', 'URL': 'https://cdn.earthdata.nasa.gov/umm/granule/v1.6.6', 'Version': '1.6.6'}, ...}}
collection_variables = [{'associations': {'collections': [{'concept-id': 'C1627516300-GES_DISC'}]}, 'meta': {'association-details': {'collect...asa.gov/umm/variable/v1.9.0', 'Version': '1.9.0'}, 'Name': 'PRODUCT/ozone_total_vertical_column_precision', ...}}, ...]
harmony_env = <Environment.PROD: 4>
tmp_path = PosixPath('/tmp/pytest-of-runner/pytest-0/popen-gw8/test_spatial_subset_C1627516300')
bearer_token = 'eyJ0eXAiOiJKV1QiLCJvcmlnaW4iOiJFYXJ0aGRhdGEgTG9naW4iLCJzaWciOiJlZGxqd3RwdWJrZXlfb3BzIiwiYWxnIjoiUlMyNTYifQ.eyJ0eXBlIj...Jb1BnM7KDdEqo_3EoDdVhQCL6YyPeFO5phn_VtkdRRgce7fIpgz79Xenaj8C_tgpF1XCp4JT2t834vHhVXrf_lRaldMpl7WxjRzMfWMdoxY5ZFahb_B-Yw'
@pytest.mark.timeout(600)
def test_spatial_subset(collection_concept_id, env, granule_json, collection_variables,
harmony_env, tmp_path: pathlib.Path, bearer_token):
test_spatial_subset.__doc__ = f"Verify spatial subset for {collection_concept_id} in {env}"
logging.info("Using granule %s for test", granule_json['meta']['concept-id'])
# Compute a box that is smaller than the granule extent bounding box
north, south, east, west = get_bounding_box(granule_json)
east, west, north, south = create_smaller_bounding_box(east, west, north, south, .95)
start_time = granule_json['umm']["TemporalExtent"]["RangeDateTime"]["BeginningDateTime"]
end_time = granule_json['umm']["TemporalExtent"]["RangeDateTime"]["EndingDateTime"]
# Build harmony request
harmony_client = harmony.Client(env=harmony_env, token=bearer_token)
request_bbox = harmony.BBox(w=west, s=south, e=east, n=north)
request_collection = harmony.Collection(id=collection_concept_id)
harmony_request = harmony.Request(collection=request_collection, spatial=request_bbox,
granule_id=[granule_json['meta']['concept-id']])
logging.info("Sending harmony request %s", harmony_client.request_as_url(harmony_request))
# Submit harmony request and download result
job_id = harmony_client.submit(harmony_request)
logging.info("Submitted harmony job %s", job_id)
harmony_client.wait_for_processing(job_id, show_progress=True)
subsetted_filepath = None
for filename in [file_future.result()
for file_future
in harmony_client.download_all(job_id, directory=f'{tmp_path}', overwrite=True)]:
logging.info(f'Downloaded: %s', filename)
subsetted_filepath = pathlib.Path(filename)
# Verify spatial subset worked
subsetted_ds = xarray.open_dataset(subsetted_filepath, decode_times=False)
group = None
# Try to read group in file
lat_var_name, lon_var_name = get_lat_lon_var_names(subsetted_ds, subsetted_filepath, collection_variables, collection_concept_id)
lat_var_name = lat_var_name.split('/')[-1]
lon_var_name = lon_var_name.split('/')[-1]
with netCDF4.Dataset(subsetted_filepath) as f:
group_list = []
def group_walk(groups, nc_d, current_group):
global subsetted_ds_new
subsetted_ds_new = None
# check if the top group has lat or lon variable
if lat_var_name in list(nc_d.variables.keys()):
subsetted_ds_new = subsetted_ds
else:
# if not then we'll need to keep track of the group layers
group_list.append(current_group)
# loop through the groups in the current layer
for g in groups:
# end the loop if we've already found latitude
if subsetted_ds_new:
break
# check if the groups have latitude, define the dataset and end the loop if found
if lat_var_name in list(nc_d.groups[g].variables.keys()):
group_list.append(g)
lat_group = '/'.join(group_list)
subsetted_ds_new = xarray.open_dataset(subsetted_filepath, group=lat_group, decode_times=False)
# add a science variable to the dataset if other groups are in the lat/lon group
# some GPM collections won't have any other variables in the same group as lat/lon
if len(list(nc_d.groups[g].groups.keys())) > 0:
data_group = [v for v in list(nc_d.groups[g].groups.keys()) if 'time' not in str(v).lower()][0]
g_data = lat_group+'/'+data_group
subsetted_ds_data = xarray.open_dataset(subsetted_filepath, group=g_data, decode_times=False)
sci_var = list(subsetted_ds_data.variables.keys())[0]
subsetted_ds_new['science_test'] = subsetted_ds_data[sci_var]
break
# recall the function on a group that has groups in it and didn't find latitude
# this is going 'deeper' into the groups
if len(list(nc_d.groups[g].groups.keys())) > 0:
group_walk(nc_d.groups[g].groups, nc_d.groups[g], g)
else:
continue
> group_walk(f.groups, f, '')
verify_collection.py:448:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
groups = {'METADATA': <[AttributeError('NetCDF: Not a valid ID') raised in repr()] Group object at 0x7f21412fa040>, 'PRODUCT': <[RuntimeError('NetCDF: Not a valid ID') raised in repr()] Group object at 0x7f21412fb040>}
nc_d = <[AttributeError('NetCDF: Not a valid ID') raised in repr()] Dataset object at 0x7f21412fb140>
current_group = ''
def group_walk(groups, nc_d, current_group):
global subsetted_ds_new
subsetted_ds_new = None
# check if the top group has lat or lon variable
if lat_var_name in list(nc_d.variables.keys()):
subsetted_ds_new = subsetted_ds
else:
# if not then we'll need to keep track of the group layers
group_list.append(current_group)
# loop through the groups in the current layer
for g in groups:
# end the loop if we've already found latitude
if subsetted_ds_new:
break
# check if the groups have latitude, define the dataset and end the loop if found
if lat_var_name in list(nc_d.groups[g].variables.keys()):
group_list.append(g)
lat_group = '/'.join(group_list)
subsetted_ds_new = xarray.open_dataset(subsetted_filepath, group=lat_group, decode_times=False)
# add a science variable to the dataset if other groups are in the lat/lon group
# some GPM collections won't have any other variables in the same group as lat/lon
if len(list(nc_d.groups[g].groups.keys())) > 0:
data_group = [v for v in list(nc_d.groups[g].groups.keys()) if 'time' not in str(v).lower()][0]
g_data = lat_group+'/'+data_group
subsetted_ds_data = xarray.open_dataset(subsetted_filepath, group=g_data, decode_times=False)
> sci_var = list(subsetted_ds_data.variables.keys())[0]
E IndexError: list index out of range
verify_collection.py:438: IndexError
--------------------------------- Captured Log ---------------------------------
INFO root:verify_collection.py:373 Using granule G1902371249-GES_DISC for test
INFO root:verify_collection.py:389 Sending harmony request https://harmony.earthdata.nasa.gov/C1627516300-GES_DISC/ogc-api-coverages/1.0.0/collections/all/coverage/rangeset?forceAsync=true&subset=lat%28-78.0453%3A-60.6907%29&subset=lon%28-164.82465%3A-84.66935000000001%29&granuleId=G1902371249-GES_DISC
INFO root:verify_collection.py:393 Submitted harmony job 49f49174-1a35-4eb0-99d7-97dfd70ea211
INFO root:verify_collection.py:399 Downloaded: /tmp/pytest-of-runner/pytest-0/popen-gw8/test_spatial_subset_C1627516300/76529167_S5P_OFFL_L2_O3_20200712T224601_20200713T002730_14238_01_010108_20200715T122623_subsetted.nc4
Check warning on line 0 in tests.verify_collection
github-actions / Regression test results for ops
test_spatial_subset[C1442068510-GES_DISC] (tests.verify_collection) failed
test-results/ops_test_report.xml [took 39s]
Raw output
OSError: [Errno group not found: PRODUCT] 'PRODUCT'
ds = <[AttributeError('NetCDF: Not a valid ID') raised in repr()] Group object at 0x7f095ef10540>
group = '/METADATA/PRODUCT', mode = 'r'
create_group = <function _netcdf4_create_group at 0x7f09639b5d80>
def _nc4_require_group(ds, group, mode, create_group=_netcdf4_create_group):
if group in {None, "", "/"}:
# use the root group
return ds
else:
# make sure it's a string
if not isinstance(group, str):
raise ValueError("group must be a string or None")
# support path-like syntax
path = group.strip("/").split("/")
for key in path:
try:
> ds = ds.groups[key]
E KeyError: 'PRODUCT'
../../../../.cache/pypoetry/virtualenvs/l2ss-py-autotest-iYz8Sff2-py3.10/lib/python3.10/site-packages/xarray/backends/netCDF4_.py:190: KeyError
During handling of the above exception, another exception occurred:
collection_concept_id = 'C1442068510-GES_DISC', env = 'ops'
granule_json = {'meta': {'collection-concept-id': 'C1442068510-GES_DISC', 'concept-id': 'G1628685468-GES_DISC', 'concept-type': 'gran...pecification': {'Name': 'UMM-G', 'URL': 'https://cdn.earthdata.nasa.gov/umm/granule/v1.6.6', 'Version': '1.6.6'}, ...}}
collection_variables = [{'associations': {'collections': [{'concept-id': 'C1442068510-GES_DISC'}]}, 'meta': {'association-details': {'collect...mm/variable/v1.9.0', 'Version': '1.9.0'}, 'Name': 'PRODUCT/SUPPORT_DATA/DETAILED_RESULTS/averaging_kernel', ...}}, ...]
harmony_env = <Environment.PROD: 4>
tmp_path = PosixPath('/tmp/pytest-of-runner/pytest-0/popen-gw4/test_spatial_subset_C1442068510')
bearer_token = 'eyJ0eXAiOiJKV1QiLCJvcmlnaW4iOiJFYXJ0aGRhdGEgTG9naW4iLCJzaWciOiJlZGxqd3RwdWJrZXlfb3BzIiwiYWxnIjoiUlMyNTYifQ.eyJ0eXBlIj...Jb1BnM7KDdEqo_3EoDdVhQCL6YyPeFO5phn_VtkdRRgce7fIpgz79Xenaj8C_tgpF1XCp4JT2t834vHhVXrf_lRaldMpl7WxjRzMfWMdoxY5ZFahb_B-Yw'
@pytest.mark.timeout(600)
def test_spatial_subset(collection_concept_id, env, granule_json, collection_variables,
harmony_env, tmp_path: pathlib.Path, bearer_token):
test_spatial_subset.__doc__ = f"Verify spatial subset for {collection_concept_id} in {env}"
logging.info("Using granule %s for test", granule_json['meta']['concept-id'])
# Compute a box that is smaller than the granule extent bounding box
north, south, east, west = get_bounding_box(granule_json)
east, west, north, south = create_smaller_bounding_box(east, west, north, south, .95)
start_time = granule_json['umm']["TemporalExtent"]["RangeDateTime"]["BeginningDateTime"]
end_time = granule_json['umm']["TemporalExtent"]["RangeDateTime"]["EndingDateTime"]
# Build harmony request
harmony_client = harmony.Client(env=harmony_env, token=bearer_token)
request_bbox = harmony.BBox(w=west, s=south, e=east, n=north)
request_collection = harmony.Collection(id=collection_concept_id)
harmony_request = harmony.Request(collection=request_collection, spatial=request_bbox,
granule_id=[granule_json['meta']['concept-id']])
logging.info("Sending harmony request %s", harmony_client.request_as_url(harmony_request))
# Submit harmony request and download result
job_id = harmony_client.submit(harmony_request)
logging.info("Submitted harmony job %s", job_id)
harmony_client.wait_for_processing(job_id, show_progress=True)
subsetted_filepath = None
for filename in [file_future.result()
for file_future
in harmony_client.download_all(job_id, directory=f'{tmp_path}', overwrite=True)]:
logging.info(f'Downloaded: %s', filename)
subsetted_filepath = pathlib.Path(filename)
# Verify spatial subset worked
subsetted_ds = xarray.open_dataset(subsetted_filepath, decode_times=False)
group = None
# Try to read group in file
lat_var_name, lon_var_name = get_lat_lon_var_names(subsetted_ds, subsetted_filepath, collection_variables, collection_concept_id)
lat_var_name = lat_var_name.split('/')[-1]
lon_var_name = lon_var_name.split('/')[-1]
with netCDF4.Dataset(subsetted_filepath) as f:
group_list = []
def group_walk(groups, nc_d, current_group):
global subsetted_ds_new
subsetted_ds_new = None
# check if the top group has lat or lon variable
if lat_var_name in list(nc_d.variables.keys()):
subsetted_ds_new = subsetted_ds
else:
# if not then we'll need to keep track of the group layers
group_list.append(current_group)
# loop through the groups in the current layer
for g in groups:
# end the loop if we've already found latitude
if subsetted_ds_new:
break
# check if the groups have latitude, define the dataset and end the loop if found
if lat_var_name in list(nc_d.groups[g].variables.keys()):
group_list.append(g)
lat_group = '/'.join(group_list)
subsetted_ds_new = xarray.open_dataset(subsetted_filepath, group=lat_group, decode_times=False)
# add a science variable to the dataset if other groups are in the lat/lon group
# some GPM collections won't have any other variables in the same group as lat/lon
if len(list(nc_d.groups[g].groups.keys())) > 0:
data_group = [v for v in list(nc_d.groups[g].groups.keys()) if 'time' not in str(v).lower()][0]
g_data = lat_group+'/'+data_group
subsetted_ds_data = xarray.open_dataset(subsetted_filepath, group=g_data, decode_times=False)
sci_var = list(subsetted_ds_data.variables.keys())[0]
subsetted_ds_new['science_test'] = subsetted_ds_data[sci_var]
break
# recall the function on a group that has groups in it and didn't find latitude
# this is going 'deeper' into the groups
if len(list(nc_d.groups[g].groups.keys())) > 0:
group_walk(nc_d.groups[g].groups, nc_d.groups[g], g)
else:
continue
> group_walk(f.groups, f, '')
verify_collection.py:448:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
verify_collection.py:431: in group_walk
subsetted_ds_new = xarray.open_dataset(subsetted_filepath, group=lat_group, decode_times=False)
../../../../.cache/pypoetry/virtualenvs/l2ss-py-autotest-iYz8Sff2-py3.10/lib/python3.10/site-packages/xarray/backends/api.py:571: in open_dataset
backend_ds = backend.open_dataset(
../../../../.cache/pypoetry/virtualenvs/l2ss-py-autotest-iYz8Sff2-py3.10/lib/python3.10/site-packages/xarray/backends/netCDF4_.py:646: in open_dataset
store = NetCDF4DataStore.open(
../../../../.cache/pypoetry/virtualenvs/l2ss-py-autotest-iYz8Sff2-py3.10/lib/python3.10/site-packages/xarray/backends/netCDF4_.py:409: in open
return cls(manager, group=group, mode=mode, lock=lock, autoclose=autoclose)
../../../../.cache/pypoetry/virtualenvs/l2ss-py-autotest-iYz8Sff2-py3.10/lib/python3.10/site-packages/xarray/backends/netCDF4_.py:356: in __init__
self.format = self.ds.data_model
../../../../.cache/pypoetry/virtualenvs/l2ss-py-autotest-iYz8Sff2-py3.10/lib/python3.10/site-packages/xarray/backends/netCDF4_.py:418: in ds
return self._acquire()
../../../../.cache/pypoetry/virtualenvs/l2ss-py-autotest-iYz8Sff2-py3.10/lib/python3.10/site-packages/xarray/backends/netCDF4_.py:413: in _acquire
ds = _nc4_require_group(root, self._group, self._mode)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
ds = <[AttributeError('NetCDF: Not a valid ID') raised in repr()] Group object at 0x7f095ef10540>
group = '/METADATA/PRODUCT', mode = 'r'
create_group = <function _netcdf4_create_group at 0x7f09639b5d80>
def _nc4_require_group(ds, group, mode, create_group=_netcdf4_create_group):
if group in {None, "", "/"}:
# use the root group
return ds
else:
# make sure it's a string
if not isinstance(group, str):
raise ValueError("group must be a string or None")
# support path-like syntax
path = group.strip("/").split("/")
for key in path:
try:
ds = ds.groups[key]
except KeyError as e:
if mode != "r":
ds = create_group(ds, key)
else:
# wrap error to provide slightly more helpful message
> raise OSError(f"group not found: {key}", e)
E OSError: [Errno group not found: PRODUCT] 'PRODUCT'
../../../../.cache/pypoetry/virtualenvs/l2ss-py-autotest-iYz8Sff2-py3.10/lib/python3.10/site-packages/xarray/backends/netCDF4_.py:196: OSError
--------------------------------- Captured Log ---------------------------------
INFO root:verify_collection.py:373 Using granule G1628685468-GES_DISC for test
INFO root:verify_collection.py:389 Sending harmony request https://harmony.earthdata.nasa.gov/C1442068510-GES_DISC/ogc-api-coverages/1.0.0/collections/all/coverage/rangeset?forceAsync=true&subset=lat%28-82.265975%3A-63.873025000000005%29&subset=lon%28-112.057275%3A162.74827499999998%29&granuleId=G1628685468-GES_DISC
INFO root:verify_collection.py:393 Submitted harmony job 5be4994e-d80b-44e1-92cf-350e728d3488
INFO root:verify_collection.py:399 Downloaded: /tmp/pytest-of-runner/pytest-0/popen-gw4/test_spatial_subset_C1442068510/76529175_S5P_OFFL_L2_HCHO_20190806T003836_20190806T022006_09387_01_010107_20190812T015759_subsetted.nc4
Check warning on line 0 in tests.verify_collection
github-actions / Regression test results for ops
test_spatial_subset[C1442068490-GES_DISC] (tests.verify_collection) failed
test-results/ops_test_report.xml [took 29s]
Raw output
IndexError: list index out of range
collection_concept_id = 'C1442068490-GES_DISC', env = 'ops'
granule_json = {'meta': {'collection-concept-id': 'C1442068490-GES_DISC', 'concept-id': 'G1628672811-GES_DISC', 'concept-type': 'gran...pecification': {'Name': 'UMM-G', 'URL': 'https://cdn.earthdata.nasa.gov/umm/granule/v1.6.6', 'Version': '1.6.6'}, ...}}
collection_variables = [{'associations': {'collections': [{'concept-id': 'C1442068490-GES_DISC'}]}, 'meta': {'association-details': {'collect...ov/umm/variable/v1.9.0', 'Version': '1.9.0'}, 'Name': 'METADATA/QA_STATISTICS/aerosol_index_354_388_pdf_bounds'}}, ...]
harmony_env = <Environment.PROD: 4>
tmp_path = PosixPath('/tmp/pytest-of-runner/pytest-0/popen-gw6/test_spatial_subset_C1442068490')
bearer_token = 'eyJ0eXAiOiJKV1QiLCJvcmlnaW4iOiJFYXJ0aGRhdGEgTG9naW4iLCJzaWciOiJlZGxqd3RwdWJrZXlfb3BzIiwiYWxnIjoiUlMyNTYifQ.eyJ0eXBlIj...Jb1BnM7KDdEqo_3EoDdVhQCL6YyPeFO5phn_VtkdRRgce7fIpgz79Xenaj8C_tgpF1XCp4JT2t834vHhVXrf_lRaldMpl7WxjRzMfWMdoxY5ZFahb_B-Yw'
@pytest.mark.timeout(600)
def test_spatial_subset(collection_concept_id, env, granule_json, collection_variables,
harmony_env, tmp_path: pathlib.Path, bearer_token):
test_spatial_subset.__doc__ = f"Verify spatial subset for {collection_concept_id} in {env}"
logging.info("Using granule %s for test", granule_json['meta']['concept-id'])
# Compute a box that is smaller than the granule extent bounding box
north, south, east, west = get_bounding_box(granule_json)
east, west, north, south = create_smaller_bounding_box(east, west, north, south, .95)
start_time = granule_json['umm']["TemporalExtent"]["RangeDateTime"]["BeginningDateTime"]
end_time = granule_json['umm']["TemporalExtent"]["RangeDateTime"]["EndingDateTime"]
# Build harmony request
harmony_client = harmony.Client(env=harmony_env, token=bearer_token)
request_bbox = harmony.BBox(w=west, s=south, e=east, n=north)
request_collection = harmony.Collection(id=collection_concept_id)
harmony_request = harmony.Request(collection=request_collection, spatial=request_bbox,
granule_id=[granule_json['meta']['concept-id']])
logging.info("Sending harmony request %s", harmony_client.request_as_url(harmony_request))
# Submit harmony request and download result
job_id = harmony_client.submit(harmony_request)
logging.info("Submitted harmony job %s", job_id)
harmony_client.wait_for_processing(job_id, show_progress=True)
subsetted_filepath = None
for filename in [file_future.result()
for file_future
in harmony_client.download_all(job_id, directory=f'{tmp_path}', overwrite=True)]:
logging.info(f'Downloaded: %s', filename)
subsetted_filepath = pathlib.Path(filename)
# Verify spatial subset worked
subsetted_ds = xarray.open_dataset(subsetted_filepath, decode_times=False)
group = None
# Try to read group in file
lat_var_name, lon_var_name = get_lat_lon_var_names(subsetted_ds, subsetted_filepath, collection_variables, collection_concept_id)
lat_var_name = lat_var_name.split('/')[-1]
lon_var_name = lon_var_name.split('/')[-1]
with netCDF4.Dataset(subsetted_filepath) as f:
group_list = []
def group_walk(groups, nc_d, current_group):
global subsetted_ds_new
subsetted_ds_new = None
# check if the top group has lat or lon variable
if lat_var_name in list(nc_d.variables.keys()):
subsetted_ds_new = subsetted_ds
else:
# if not then we'll need to keep track of the group layers
group_list.append(current_group)
# loop through the groups in the current layer
for g in groups:
# end the loop if we've already found latitude
if subsetted_ds_new:
break
# check if the groups have latitude, define the dataset and end the loop if found
if lat_var_name in list(nc_d.groups[g].variables.keys()):
group_list.append(g)
lat_group = '/'.join(group_list)
subsetted_ds_new = xarray.open_dataset(subsetted_filepath, group=lat_group, decode_times=False)
# add a science variable to the dataset if other groups are in the lat/lon group
# some GPM collections won't have any other variables in the same group as lat/lon
if len(list(nc_d.groups[g].groups.keys())) > 0:
data_group = [v for v in list(nc_d.groups[g].groups.keys()) if 'time' not in str(v).lower()][0]
g_data = lat_group+'/'+data_group
subsetted_ds_data = xarray.open_dataset(subsetted_filepath, group=g_data, decode_times=False)
sci_var = list(subsetted_ds_data.variables.keys())[0]
subsetted_ds_new['science_test'] = subsetted_ds_data[sci_var]
break
# recall the function on a group that has groups in it and didn't find latitude
# this is going 'deeper' into the groups
if len(list(nc_d.groups[g].groups.keys())) > 0:
group_walk(nc_d.groups[g].groups, nc_d.groups[g], g)
else:
continue
> group_walk(f.groups, f, '')
verify_collection.py:448:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
groups = {'METADATA': <[AttributeError('NetCDF: Not a valid ID') raised in repr()] Group object at 0x7fe2888c8b40>, 'PRODUCT': <[RuntimeError('NetCDF: Not a valid ID') raised in repr()] Group object at 0x7fe288a39f40>}
nc_d = <[AttributeError('NetCDF: Not a valid ID') raised in repr()] Dataset object at 0x7fe288a3bb40>
current_group = ''
def group_walk(groups, nc_d, current_group):
global subsetted_ds_new
subsetted_ds_new = None
# check if the top group has lat or lon variable
if lat_var_name in list(nc_d.variables.keys()):
subsetted_ds_new = subsetted_ds
else:
# if not then we'll need to keep track of the group layers
group_list.append(current_group)
# loop through the groups in the current layer
for g in groups:
# end the loop if we've already found latitude
if subsetted_ds_new:
break
# check if the groups have latitude, define the dataset and end the loop if found
if lat_var_name in list(nc_d.groups[g].variables.keys()):
group_list.append(g)
lat_group = '/'.join(group_list)
subsetted_ds_new = xarray.open_dataset(subsetted_filepath, group=lat_group, decode_times=False)
# add a science variable to the dataset if other groups are in the lat/lon group
# some GPM collections won't have any other variables in the same group as lat/lon
if len(list(nc_d.groups[g].groups.keys())) > 0:
data_group = [v for v in list(nc_d.groups[g].groups.keys()) if 'time' not in str(v).lower()][0]
g_data = lat_group+'/'+data_group
subsetted_ds_data = xarray.open_dataset(subsetted_filepath, group=g_data, decode_times=False)
> sci_var = list(subsetted_ds_data.variables.keys())[0]
E IndexError: list index out of range
verify_collection.py:438: IndexError
--------------------------------- Captured Log ---------------------------------
INFO root:verify_collection.py:373 Using granule G1628672811-GES_DISC for test
INFO root:verify_collection.py:389 Sending harmony request https://harmony.earthdata.nasa.gov/C1442068490-GES_DISC/ogc-api-coverages/1.0.0/collections/all/coverage/rangeset?forceAsync=true&subset=lat%28-82.265975%3A-63.873025000000005%29&subset=lon%28-112.057275%3A162.74827499999998%29&granuleId=G1628672811-GES_DISC
INFO root:verify_collection.py:393 Submitted harmony job bb106916-f09b-4684-9bed-73bc85029a8d
INFO root:verify_collection.py:399 Downloaded: /tmp/pytest-of-runner/pytest-0/popen-gw6/test_spatial_subset_C1442068490/76529186_S5P_OFFL_L2_AER_AI_20190806T003836_20190806T022006_09387_01_010302_20190812T000004_subsetted.nc4
Check warning on line 0 in tests.verify_collection
github-actions / Regression test results for ops
test_spatial_subset[C1442068509-GES_DISC] (tests.verify_collection) failed
test-results/ops_test_report.xml [took 29s]
Raw output
IndexError: list index out of range
collection_concept_id = 'C1442068509-GES_DISC', env = 'ops'
granule_json = {'meta': {'collection-concept-id': 'C1442068509-GES_DISC', 'concept-id': 'G1628706233-GES_DISC', 'concept-type': 'gran...pecification': {'Name': 'UMM-G', 'URL': 'https://cdn.earthdata.nasa.gov/umm/granule/v1.6.6', 'Version': '1.6.6'}, ...}}
collection_variables = [{'associations': {'collections': [{'concept-id': 'C1442068509-GES_DISC'}]}, 'meta': {'association-details': {'collect... 'URL': 'https://cdn.earthdata.nasa.gov/umm/variable/v1.9.0', 'Version': '1.9.0'}, 'Name': 'PRODUCT/level', ...}}, ...]
harmony_env = <Environment.PROD: 4>
tmp_path = PosixPath('/tmp/pytest-of-runner/pytest-0/popen-gw7/test_spatial_subset_C1442068500')
bearer_token = 'eyJ0eXAiOiJKV1QiLCJvcmlnaW4iOiJFYXJ0aGRhdGEgTG9naW4iLCJzaWciOiJlZGxqd3RwdWJrZXlfb3BzIiwiYWxnIjoiUlMyNTYifQ.eyJ0eXBlIj...Jb1BnM7KDdEqo_3EoDdVhQCL6YyPeFO5phn_VtkdRRgce7fIpgz79Xenaj8C_tgpF1XCp4JT2t834vHhVXrf_lRaldMpl7WxjRzMfWMdoxY5ZFahb_B-Yw'
@pytest.mark.timeout(600)
def test_spatial_subset(collection_concept_id, env, granule_json, collection_variables,
harmony_env, tmp_path: pathlib.Path, bearer_token):
test_spatial_subset.__doc__ = f"Verify spatial subset for {collection_concept_id} in {env}"
logging.info("Using granule %s for test", granule_json['meta']['concept-id'])
# Compute a box that is smaller than the granule extent bounding box
north, south, east, west = get_bounding_box(granule_json)
east, west, north, south = create_smaller_bounding_box(east, west, north, south, .95)
start_time = granule_json['umm']["TemporalExtent"]["RangeDateTime"]["BeginningDateTime"]
end_time = granule_json['umm']["TemporalExtent"]["RangeDateTime"]["EndingDateTime"]
# Build harmony request
harmony_client = harmony.Client(env=harmony_env, token=bearer_token)
request_bbox = harmony.BBox(w=west, s=south, e=east, n=north)
request_collection = harmony.Collection(id=collection_concept_id)
harmony_request = harmony.Request(collection=request_collection, spatial=request_bbox,
granule_id=[granule_json['meta']['concept-id']])
logging.info("Sending harmony request %s", harmony_client.request_as_url(harmony_request))
# Submit harmony request and download result
job_id = harmony_client.submit(harmony_request)
logging.info("Submitted harmony job %s", job_id)
harmony_client.wait_for_processing(job_id, show_progress=True)
subsetted_filepath = None
for filename in [file_future.result()
for file_future
in harmony_client.download_all(job_id, directory=f'{tmp_path}', overwrite=True)]:
logging.info(f'Downloaded: %s', filename)
subsetted_filepath = pathlib.Path(filename)
# Verify spatial subset worked
subsetted_ds = xarray.open_dataset(subsetted_filepath, decode_times=False)
group = None
# Try to read group in file
lat_var_name, lon_var_name = get_lat_lon_var_names(subsetted_ds, subsetted_filepath, collection_variables, collection_concept_id)
lat_var_name = lat_var_name.split('/')[-1]
lon_var_name = lon_var_name.split('/')[-1]
with netCDF4.Dataset(subsetted_filepath) as f:
group_list = []
def group_walk(groups, nc_d, current_group):
global subsetted_ds_new
subsetted_ds_new = None
# check if the top group has lat or lon variable
if lat_var_name in list(nc_d.variables.keys()):
subsetted_ds_new = subsetted_ds
else:
# if not then we'll need to keep track of the group layers
group_list.append(current_group)
# loop through the groups in the current layer
for g in groups:
# end the loop if we've already found latitude
if subsetted_ds_new:
break
# check if the groups have latitude, define the dataset and end the loop if found
if lat_var_name in list(nc_d.groups[g].variables.keys()):
group_list.append(g)
lat_group = '/'.join(group_list)
subsetted_ds_new = xarray.open_dataset(subsetted_filepath, group=lat_group, decode_times=False)
# add a science variable to the dataset if other groups are in the lat/lon group
# some GPM collections won't have any other variables in the same group as lat/lon
if len(list(nc_d.groups[g].groups.keys())) > 0:
data_group = [v for v in list(nc_d.groups[g].groups.keys()) if 'time' not in str(v).lower()][0]
g_data = lat_group+'/'+data_group
subsetted_ds_data = xarray.open_dataset(subsetted_filepath, group=g_data, decode_times=False)
sci_var = list(subsetted_ds_data.variables.keys())[0]
subsetted_ds_new['science_test'] = subsetted_ds_data[sci_var]
break
# recall the function on a group that has groups in it and didn't find latitude
# this is going 'deeper' into the groups
if len(list(nc_d.groups[g].groups.keys())) > 0:
group_walk(nc_d.groups[g].groups, nc_d.groups[g], g)
else:
continue
> group_walk(f.groups, f, '')
verify_collection.py:448:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
groups = {'METADATA': <[AttributeError('NetCDF: Not a valid ID') raised in repr()] Group object at 0x7f5a17c7bb40>, 'PRODUCT': <[RuntimeError('NetCDF: Not a valid ID') raised in repr()] Group object at 0x7f5a17c7b540>}
nc_d = <[AttributeError('NetCDF: Not a valid ID') raised in repr()] Dataset object at 0x7f5a17c7b440>
current_group = ''
def group_walk(groups, nc_d, current_group):
global subsetted_ds_new
subsetted_ds_new = None
# check if the top group has lat or lon variable
if lat_var_name in list(nc_d.variables.keys()):
subsetted_ds_new = subsetted_ds
else:
# if not then we'll need to keep track of the group layers
group_list.append(current_group)
# loop through the groups in the current layer
for g in groups:
# end the loop if we've already found latitude
if subsetted_ds_new:
break
# check if the groups have latitude, define the dataset and end the loop if found
if lat_var_name in list(nc_d.groups[g].variables.keys()):
group_list.append(g)
lat_group = '/'.join(group_list)
subsetted_ds_new = xarray.open_dataset(subsetted_filepath, group=lat_group, decode_times=False)
# add a science variable to the dataset if other groups are in the lat/lon group
# some GPM collections won't have any other variables in the same group as lat/lon
if len(list(nc_d.groups[g].groups.keys())) > 0:
data_group = [v for v in list(nc_d.groups[g].groups.keys()) if 'time' not in str(v).lower()][0]
g_data = lat_group+'/'+data_group
subsetted_ds_data = xarray.open_dataset(subsetted_filepath, group=g_data, decode_times=False)
> sci_var = list(subsetted_ds_data.variables.keys())[0]
E IndexError: list index out of range
verify_collection.py:438: IndexError
--------------------------------- Captured Log ---------------------------------
INFO root:verify_collection.py:373 Using granule G1628706233-GES_DISC for test
INFO root:verify_collection.py:389 Sending harmony request https://harmony.earthdata.nasa.gov/C1442068509-GES_DISC/ogc-api-coverages/1.0.0/collections/all/coverage/rangeset?forceAsync=true&subset=lat%28-82.265975%3A-63.873025000000005%29&subset=lon%28-112.057275%3A162.74827499999998%29&granuleId=G1628706233-GES_DISC
INFO root:verify_collection.py:393 Submitted harmony job 712c2aa7-f8e3-4fb4-987b-eab5f7fd53ab
INFO root:verify_collection.py:399 Downloaded: /tmp/pytest-of-runner/pytest-0/popen-gw7/test_spatial_subset_C1442068500/76529187_S5P_OFFL_L2_O3_20190806T003836_20190806T022006_09387_01_010107_20190812T015759_subsetted.nc4
Check warning on line 0 in tests.verify_collection
github-actions / Regression test results for ops
test_spatial_subset[C2646932894-POCLOUD] (tests.verify_collection) failed
test-results/ops_test_report.xml [took 10m 0s]
Raw output
Failed: Timeout >600.0s
collection_concept_id = 'C2646932894-POCLOUD', env = 'ops'
granule_json = {'meta': {'collection-concept-id': 'C2646932894-POCLOUD', 'concept-id': 'G3215135157-POCLOUD', 'concept-type': 'granul...pecification': {'Name': 'UMM-G', 'URL': 'https://cdn.earthdata.nasa.gov/umm/granule/v1.6.6', 'Version': '1.6.6'}, ...}}
collection_variables = [{'associations': {'collections': [{'concept-id': 'C2646932894-POCLOUD'}]}, 'meta': {'association-details': {'collecti...ample', 'Size': 585310, 'Type': 'OTHER'}], 'FillValues': [{'Type': 'SCIENCE_FILLVALUE', 'Value': -9999.0}], ...}}, ...]
harmony_env = <Environment.PROD: 4>
tmp_path = PosixPath('/tmp/pytest-of-runner/pytest-0/popen-gw3/test_spatial_subset_C2646932890')
bearer_token = 'eyJ0eXAiOiJKV1QiLCJvcmlnaW4iOiJFYXJ0aGRhdGEgTG9naW4iLCJzaWciOiJlZGxqd3RwdWJrZXlfb3BzIiwiYWxnIjoiUlMyNTYifQ.eyJ0eXBlIj...Jb1BnM7KDdEqo_3EoDdVhQCL6YyPeFO5phn_VtkdRRgce7fIpgz79Xenaj8C_tgpF1XCp4JT2t834vHhVXrf_lRaldMpl7WxjRzMfWMdoxY5ZFahb_B-Yw'
@pytest.mark.timeout(600)
def test_spatial_subset(collection_concept_id, env, granule_json, collection_variables,
harmony_env, tmp_path: pathlib.Path, bearer_token):
test_spatial_subset.__doc__ = f"Verify spatial subset for {collection_concept_id} in {env}"
logging.info("Using granule %s for test", granule_json['meta']['concept-id'])
# Compute a box that is smaller than the granule extent bounding box
north, south, east, west = get_bounding_box(granule_json)
east, west, north, south = create_smaller_bounding_box(east, west, north, south, .95)
start_time = granule_json['umm']["TemporalExtent"]["RangeDateTime"]["BeginningDateTime"]
end_time = granule_json['umm']["TemporalExtent"]["RangeDateTime"]["EndingDateTime"]
# Build harmony request
harmony_client = harmony.Client(env=harmony_env, token=bearer_token)
request_bbox = harmony.BBox(w=west, s=south, e=east, n=north)
request_collection = harmony.Collection(id=collection_concept_id)
harmony_request = harmony.Request(collection=request_collection, spatial=request_bbox,
granule_id=[granule_json['meta']['concept-id']])
logging.info("Sending harmony request %s", harmony_client.request_as_url(harmony_request))
# Submit harmony request and download result
job_id = harmony_client.submit(harmony_request)
logging.info("Submitted harmony job %s", job_id)
> harmony_client.wait_for_processing(job_id, show_progress=True)
verify_collection.py:394:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <harmony.harmony.Client object at 0x7f188f8d8520>
job_id = 'bf6ee1e0-e7dc-4e16-86c8-2827b7662d6d', show_progress = True
def wait_for_processing(self, job_id: str, show_progress: bool = False) -> None:
"""Retrieve a submitted job's completion status in percent.
Args:
job_id: UUID string for the job you wish to interrogate.
Returns:
The job's processing progress as a percentage.
:raises
Exception: This can happen if an invalid job_id is provided or Harmony services
can't be reached.
"""
# How often to refresh the screen for progress updates and animating spinners.
ui_update_interval = 0.33 # in seconds
running_w_errors_logged = False
intervals = round(self.check_interval / ui_update_interval)
if show_progress:
with progressbar.ProgressBar(max_value=100, widgets=progressbar_widgets) as bar:
progress = 0
while progress < 100:
progress, status, message = self.progress(job_id)
if status == 'failed':
raise ProcessingFailedException(job_id, message)
if status == 'canceled':
print('Job has been canceled.')
break
if status == 'paused':
print('\nJob has been paused. Call `resume()` to resume.', file=sys.stderr)
break
if (not running_w_errors_logged and status == 'running_with_errors'):
print('\nJob is running with errors.', file=sys.stderr)
running_w_errors_logged = True
# This gets around an issue with progressbar. If we update() with 0, the
# output shows up as "N/A". If we update with, e.g. 0.1, it rounds down or
# truncates to 0 but, importantly, actually displays that.
if progress == 0:
progress = 0.1
for _ in range(intervals):
bar.update(progress) # causes spinner to rotate even when no data change
sys.stdout.flush() # ensures correct behavior in Jupyter notebooks
if progress >= 100:
break
else:
> time.sleep(ui_update_interval)
E Failed: Timeout >600.0s
../../../../.cache/pypoetry/virtualenvs/l2ss-py-autotest-iYz8Sff2-py3.10/lib/python3.10/site-packages/harmony/harmony.py:1009: Failed
--------------------------------- Captured Log ---------------------------------
INFO root:verify_collection.py:373 Using granule G3215135157-POCLOUD for test
INFO root:verify_collection.py:389 Sending harmony request https://harmony.earthdata.nasa.gov/C2646932894-POCLOUD/ogc-api-coverages/1.0.0/collections/all/coverage/rangeset?forceAsync=true&subset=lat%28-57.0%3A57.0%29&subset=lon%284.5%3A175.5%29&granuleId=G3215135157-POCLOUD
INFO root:verify_collection.py:393 Submitted harmony job bf6ee1e0-e7dc-4e16-86c8-2827b7662d6d
Check warning on line 0 in tests.verify_collection
github-actions / Regression test results for ops
test_spatial_subset[C2089270961-GES_DISC] (tests.verify_collection) failed
test-results/ops_test_report.xml [took 1m 6s]
Raw output
IndexError: list index out of range
collection_concept_id = 'C2089270961-GES_DISC', env = 'ops'
granule_json = {'meta': {'collection-concept-id': 'C2089270961-GES_DISC', 'concept-id': 'G3215164205-GES_DISC', 'concept-type': 'gran...pecification': {'Name': 'UMM-G', 'URL': 'https://cdn.earthdata.nasa.gov/umm/granule/v1.6.6', 'Version': '1.6.6'}, ...}}
collection_variables = [{'associations': {'collections': [{'concept-id': 'C2089270961-GES_DISC'}]}, 'meta': {'association-details': {'collect....0', 'Version': '1.9.0'}, 'Name': 'METADATA/QA_STATISTICS/nitrogendioxide_tropospheric_column_histogram_bounds'}}, ...]
harmony_env = <Environment.PROD: 4>
tmp_path = PosixPath('/tmp/pytest-of-runner/pytest-0/popen-gw8/test_spatial_subset_C2089270960')
bearer_token = 'eyJ0eXAiOiJKV1QiLCJvcmlnaW4iOiJFYXJ0aGRhdGEgTG9naW4iLCJzaWciOiJlZGxqd3RwdWJrZXlfb3BzIiwiYWxnIjoiUlMyNTYifQ.eyJ0eXBlIj...Jb1BnM7KDdEqo_3EoDdVhQCL6YyPeFO5phn_VtkdRRgce7fIpgz79Xenaj8C_tgpF1XCp4JT2t834vHhVXrf_lRaldMpl7WxjRzMfWMdoxY5ZFahb_B-Yw'
@pytest.mark.timeout(600)
def test_spatial_subset(collection_concept_id, env, granule_json, collection_variables,
harmony_env, tmp_path: pathlib.Path, bearer_token):
test_spatial_subset.__doc__ = f"Verify spatial subset for {collection_concept_id} in {env}"
logging.info("Using granule %s for test", granule_json['meta']['concept-id'])
# Compute a box that is smaller than the granule extent bounding box
north, south, east, west = get_bounding_box(granule_json)
east, west, north, south = create_smaller_bounding_box(east, west, north, south, .95)
start_time = granule_json['umm']["TemporalExtent"]["RangeDateTime"]["BeginningDateTime"]
end_time = granule_json['umm']["TemporalExtent"]["RangeDateTime"]["EndingDateTime"]
# Build harmony request
harmony_client = harmony.Client(env=harmony_env, token=bearer_token)
request_bbox = harmony.BBox(w=west, s=south, e=east, n=north)
request_collection = harmony.Collection(id=collection_concept_id)
harmony_request = harmony.Request(collection=request_collection, spatial=request_bbox,
granule_id=[granule_json['meta']['concept-id']])
logging.info("Sending harmony request %s", harmony_client.request_as_url(harmony_request))
# Submit harmony request and download result
job_id = harmony_client.submit(harmony_request)
logging.info("Submitted harmony job %s", job_id)
harmony_client.wait_for_processing(job_id, show_progress=True)
subsetted_filepath = None
for filename in [file_future.result()
for file_future
in harmony_client.download_all(job_id, directory=f'{tmp_path}', overwrite=True)]:
logging.info(f'Downloaded: %s', filename)
subsetted_filepath = pathlib.Path(filename)
# Verify spatial subset worked
subsetted_ds = xarray.open_dataset(subsetted_filepath, decode_times=False)
group = None
# Try to read group in file
lat_var_name, lon_var_name = get_lat_lon_var_names(subsetted_ds, subsetted_filepath, collection_variables, collection_concept_id)
lat_var_name = lat_var_name.split('/')[-1]
lon_var_name = lon_var_name.split('/')[-1]
with netCDF4.Dataset(subsetted_filepath) as f:
group_list = []
def group_walk(groups, nc_d, current_group):
global subsetted_ds_new
subsetted_ds_new = None
# check if the top group has lat or lon variable
if lat_var_name in list(nc_d.variables.keys()):
subsetted_ds_new = subsetted_ds
else:
# if not then we'll need to keep track of the group layers
group_list.append(current_group)
# loop through the groups in the current layer
for g in groups:
# end the loop if we've already found latitude
if subsetted_ds_new:
break
# check if the groups have latitude, define the dataset and end the loop if found
if lat_var_name in list(nc_d.groups[g].variables.keys()):
group_list.append(g)
lat_group = '/'.join(group_list)
subsetted_ds_new = xarray.open_dataset(subsetted_filepath, group=lat_group, decode_times=False)
# add a science variable to the dataset if other groups are in the lat/lon group
# some GPM collections won't have any other variables in the same group as lat/lon
if len(list(nc_d.groups[g].groups.keys())) > 0:
data_group = [v for v in list(nc_d.groups[g].groups.keys()) if 'time' not in str(v).lower()][0]
g_data = lat_group+'/'+data_group
subsetted_ds_data = xarray.open_dataset(subsetted_filepath, group=g_data, decode_times=False)
sci_var = list(subsetted_ds_data.variables.keys())[0]
subsetted_ds_new['science_test'] = subsetted_ds_data[sci_var]
break
# recall the function on a group that has groups in it and didn't find latitude
# this is going 'deeper' into the groups
if len(list(nc_d.groups[g].groups.keys())) > 0:
group_walk(nc_d.groups[g].groups, nc_d.groups[g], g)
else:
continue
> group_walk(f.groups, f, '')
verify_collection.py:448:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
groups = {'METADATA': <[AttributeError('NetCDF: Not a valid ID') raised in repr()] Group object at 0x7f214133e140>, 'PRODUCT': <[RuntimeError('NetCDF: Not a valid ID') raised in repr()] Group object at 0x7f214133da40>}
nc_d = <[AttributeError('NetCDF: Not a valid ID') raised in repr()] Dataset object at 0x7f214133d940>
current_group = ''
def group_walk(groups, nc_d, current_group):
global subsetted_ds_new
subsetted_ds_new = None
# check if the top group has lat or lon variable
if lat_var_name in list(nc_d.variables.keys()):
subsetted_ds_new = subsetted_ds
else:
# if not then we'll need to keep track of the group layers
group_list.append(current_group)
# loop through the groups in the current layer
for g in groups:
# end the loop if we've already found latitude
if subsetted_ds_new:
break
# check if the groups have latitude, define the dataset and end the loop if found
if lat_var_name in list(nc_d.groups[g].variables.keys()):
group_list.append(g)
lat_group = '/'.join(group_list)
subsetted_ds_new = xarray.open_dataset(subsetted_filepath, group=lat_group, decode_times=False)
# add a science variable to the dataset if other groups are in the lat/lon group
# some GPM collections won't have any other variables in the same group as lat/lon
if len(list(nc_d.groups[g].groups.keys())) > 0:
data_group = [v for v in list(nc_d.groups[g].groups.keys()) if 'time' not in str(v).lower()][0]
g_data = lat_group+'/'+data_group
subsetted_ds_data = xarray.open_dataset(subsetted_filepath, group=g_data, decode_times=False)
> sci_var = list(subsetted_ds_data.variables.keys())[0]
E IndexError: list index out of range
verify_collection.py:438: IndexError
--------------------------------- Captured Log ---------------------------------
INFO root:verify_collection.py:373 Using granule G3215164205-GES_DISC for test
INFO root:verify_collection.py:389 Sending harmony request https://harmony.earthdata.nasa.gov/C2089270961-GES_DISC/ogc-api-coverages/1.0.0/collections/all/coverage/rangeset?forceAsync=true&subset=lat%28-84.93455%3A-66.25945%29&subset=lon%28-157.250275%3A141.03927500000003%29&granuleId=G3215164205-GES_DISC
INFO root:verify_collection.py:393 Submitted harmony job 1bab2824-fae2-483d-b6ad-9264bc17c6ea
INFO root:verify_collection.py:399 Downloaded: /tmp/pytest-of-runner/pytest-0/popen-gw8/test_spatial_subset_C2089270960/76529253_S5P_OFFL_L2_NO2_20240819T223705_20240820T001834_35505_03_020600_20240821T143225_subsetted.nc4
Check warning on line 0 in tests.verify_collection
github-actions / Regression test results for ops
test_spatial_subset[C2087216100-GES_DISC] (tests.verify_collection) failed
test-results/ops_test_report.xml [took 34s]
Raw output
IndexError: list index out of range
collection_concept_id = 'C2087216100-GES_DISC', env = 'ops'
granule_json = {'meta': {'collection-concept-id': 'C2087216100-GES_DISC', 'concept-id': 'G3216207998-GES_DISC', 'concept-type': 'gran...pecification': {'Name': 'UMM-G', 'URL': 'https://cdn.earthdata.nasa.gov/umm/granule/v1.6.6', 'Version': '1.6.6'}, ...}}
collection_variables = [{'associations': {'collections': [{'concept-id': 'C2087216100-GES_DISC'}]}, 'meta': {'association-details': {'collect...m/variable/v1.9.0', 'Version': '1.9.0'}, 'Name': 'METADATA/QA_STATISTICS/aerosol_mid_pressure_histogram_bounds'}}, ...]
harmony_env = <Environment.PROD: 4>
tmp_path = PosixPath('/tmp/pytest-of-runner/pytest-0/popen-gw4/test_spatial_subset_C2087216100')
bearer_token = 'eyJ0eXAiOiJKV1QiLCJvcmlnaW4iOiJFYXJ0aGRhdGEgTG9naW4iLCJzaWciOiJlZGxqd3RwdWJrZXlfb3BzIiwiYWxnIjoiUlMyNTYifQ.eyJ0eXBlIj...Jb1BnM7KDdEqo_3EoDdVhQCL6YyPeFO5phn_VtkdRRgce7fIpgz79Xenaj8C_tgpF1XCp4JT2t834vHhVXrf_lRaldMpl7WxjRzMfWMdoxY5ZFahb_B-Yw'
@pytest.mark.timeout(600)
def test_spatial_subset(collection_concept_id, env, granule_json, collection_variables,
harmony_env, tmp_path: pathlib.Path, bearer_token):
test_spatial_subset.__doc__ = f"Verify spatial subset for {collection_concept_id} in {env}"
logging.info("Using granule %s for test", granule_json['meta']['concept-id'])
# Compute a box that is smaller than the granule extent bounding box
north, south, east, west = get_bounding_box(granule_json)
east, west, north, south = create_smaller_bounding_box(east, west, north, south, .95)
start_time = granule_json['umm']["TemporalExtent"]["RangeDateTime"]["BeginningDateTime"]
end_time = granule_json['umm']["TemporalExtent"]["RangeDateTime"]["EndingDateTime"]
# Build harmony request
harmony_client = harmony.Client(env=harmony_env, token=bearer_token)
request_bbox = harmony.BBox(w=west, s=south, e=east, n=north)
request_collection = harmony.Collection(id=collection_concept_id)
harmony_request = harmony.Request(collection=request_collection, spatial=request_bbox,
granule_id=[granule_json['meta']['concept-id']])
logging.info("Sending harmony request %s", harmony_client.request_as_url(harmony_request))
# Submit harmony request and download result
job_id = harmony_client.submit(harmony_request)
logging.info("Submitted harmony job %s", job_id)
harmony_client.wait_for_processing(job_id, show_progress=True)
subsetted_filepath = None
for filename in [file_future.result()
for file_future
in harmony_client.download_all(job_id, directory=f'{tmp_path}', overwrite=True)]:
logging.info(f'Downloaded: %s', filename)
subsetted_filepath = pathlib.Path(filename)
# Verify spatial subset worked
subsetted_ds = xarray.open_dataset(subsetted_filepath, decode_times=False)
group = None
# Try to read group in file
lat_var_name, lon_var_name = get_lat_lon_var_names(subsetted_ds, subsetted_filepath, collection_variables, collection_concept_id)
lat_var_name = lat_var_name.split('/')[-1]
lon_var_name = lon_var_name.split('/')[-1]
with netCDF4.Dataset(subsetted_filepath) as f:
group_list = []
def group_walk(groups, nc_d, current_group):
global subsetted_ds_new
subsetted_ds_new = None
# check if the top group has lat or lon variable
if lat_var_name in list(nc_d.variables.keys()):
subsetted_ds_new = subsetted_ds
else:
# if not then we'll need to keep track of the group layers
group_list.append(current_group)
# loop through the groups in the current layer
for g in groups:
# end the loop if we've already found latitude
if subsetted_ds_new:
break
# check if the groups have latitude, define the dataset and end the loop if found
if lat_var_name in list(nc_d.groups[g].variables.keys()):
group_list.append(g)
lat_group = '/'.join(group_list)
subsetted_ds_new = xarray.open_dataset(subsetted_filepath, group=lat_group, decode_times=False)
# add a science variable to the dataset if other groups are in the lat/lon group
# some GPM collections won't have any other variables in the same group as lat/lon
if len(list(nc_d.groups[g].groups.keys())) > 0:
data_group = [v for v in list(nc_d.groups[g].groups.keys()) if 'time' not in str(v).lower()][0]
g_data = lat_group+'/'+data_group
subsetted_ds_data = xarray.open_dataset(subsetted_filepath, group=g_data, decode_times=False)
sci_var = list(subsetted_ds_data.variables.keys())[0]
subsetted_ds_new['science_test'] = subsetted_ds_data[sci_var]
break
# recall the function on a group that has groups in it and didn't find latitude
# this is going 'deeper' into the groups
if len(list(nc_d.groups[g].groups.keys())) > 0:
group_walk(nc_d.groups[g].groups, nc_d.groups[g], g)
else:
continue
> group_walk(f.groups, f, '')
verify_collection.py:448:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
groups = {'METADATA': <[AttributeError('NetCDF: Not a valid ID') raised in repr()] Group object at 0x7f095ce60d40>, 'PRODUCT': <[RuntimeError('NetCDF: Not a valid ID') raised in repr()] Group object at 0x7f095ce61c40>}
nc_d = <[AttributeError('NetCDF: Not a valid ID') raised in repr()] Dataset object at 0x7f095ce61d40>
current_group = ''
def group_walk(groups, nc_d, current_group):
global subsetted_ds_new
subsetted_ds_new = None
# check if the top group has lat or lon variable
if lat_var_name in list(nc_d.variables.keys()):
subsetted_ds_new = subsetted_ds
else:
# if not then we'll need to keep track of the group layers
group_list.append(current_group)
# loop through the groups in the current layer
for g in groups:
# end the loop if we've already found latitude
if subsetted_ds_new:
break
# check if the groups have latitude, define the dataset and end the loop if found
if lat_var_name in list(nc_d.groups[g].variables.keys()):
group_list.append(g)
lat_group = '/'.join(group_list)
subsetted_ds_new = xarray.open_dataset(subsetted_filepath, group=lat_group, decode_times=False)
# add a science variable to the dataset if other groups are in the lat/lon group
# some GPM collections won't have any other variables in the same group as lat/lon
if len(list(nc_d.groups[g].groups.keys())) > 0:
data_group = [v for v in list(nc_d.groups[g].groups.keys()) if 'time' not in str(v).lower()][0]
g_data = lat_group+'/'+data_group
subsetted_ds_data = xarray.open_dataset(subsetted_filepath, group=g_data, decode_times=False)
> sci_var = list(subsetted_ds_data.variables.keys())[0]
E IndexError: list index out of range
verify_collection.py:438: IndexError
--------------------------------- Captured Log ---------------------------------
INFO root:verify_collection.py:373 Using granule G3216207998-GES_DISC for test
INFO root:verify_collection.py:389 Sending harmony request https://harmony.earthdata.nasa.gov/C2087216100-GES_DISC/ogc-api-coverages/1.0.0/collections/all/coverage/rangeset?forceAsync=true&subset=lat%28-85.34975%3A-67.13825%29&subset=lon%28-73.32692499999999%3A103.983925%29&granuleId=G3216207998-GES_DISC
INFO root:verify_collection.py:393 Submitted harmony job 0e5c34cf-9a3e-47d9-925f-d6af77924228
INFO root:verify_collection.py:399 Downloaded: /tmp/pytest-of-runner/pytest-0/popen-gw4/test_spatial_subset_C2087216100/76529265_S5P_OFFL_L2_AER_LH_20240826T115537_20240826T133706_35598_03_020600_20240828T041109_subsetted.nc4
Check warning on line 0 in tests.verify_collection
github-actions / Regression test results for ops
test_spatial_subset[C1918210292-GES_DISC] (tests.verify_collection) failed
test-results/ops_test_report.xml [took 1m 14s]
Raw output
IndexError: list index out of range
collection_concept_id = 'C1918210292-GES_DISC', env = 'ops'
granule_json = {'meta': {'collection-concept-id': 'C1918210292-GES_DISC', 'concept-id': 'G3216209050-GES_DISC', 'concept-type': 'gran...pecification': {'Name': 'UMM-G', 'URL': 'https://cdn.earthdata.nasa.gov/umm/granule/v1.6.6', 'Version': '1.6.6'}, ...}}
collection_variables = [{'associations': {'collections': [{'concept-id': 'C1918210292-GES_DISC'}]}, 'meta': {'association-details': {'collect...v1.9.0', 'Version': '1.9.0'}, 'Name': 'PRODUCT/SUPPORT_DATA/DETAILED_RESULTS/number_of_slant_columns_win2', ...}}, ...]
harmony_env = <Environment.PROD: 4>
tmp_path = PosixPath('/tmp/pytest-of-runner/pytest-0/popen-gw1/test_spatial_subset_C1918210290')
bearer_token = 'eyJ0eXAiOiJKV1QiLCJvcmlnaW4iOiJFYXJ0aGRhdGEgTG9naW4iLCJzaWciOiJlZGxqd3RwdWJrZXlfb3BzIiwiYWxnIjoiUlMyNTYifQ.eyJ0eXBlIj...Jb1BnM7KDdEqo_3EoDdVhQCL6YyPeFO5phn_VtkdRRgce7fIpgz79Xenaj8C_tgpF1XCp4JT2t834vHhVXrf_lRaldMpl7WxjRzMfWMdoxY5ZFahb_B-Yw'
@pytest.mark.timeout(600)
def test_spatial_subset(collection_concept_id, env, granule_json, collection_variables,
harmony_env, tmp_path: pathlib.Path, bearer_token):
test_spatial_subset.__doc__ = f"Verify spatial subset for {collection_concept_id} in {env}"
logging.info("Using granule %s for test", granule_json['meta']['concept-id'])
# Compute a box that is smaller than the granule extent bounding box
north, south, east, west = get_bounding_box(granule_json)
east, west, north, south = create_smaller_bounding_box(east, west, north, south, .95)
start_time = granule_json['umm']["TemporalExtent"]["RangeDateTime"]["BeginningDateTime"]
end_time = granule_json['umm']["TemporalExtent"]["RangeDateTime"]["EndingDateTime"]
# Build harmony request
harmony_client = harmony.Client(env=harmony_env, token=bearer_token)
request_bbox = harmony.BBox(w=west, s=south, e=east, n=north)
request_collection = harmony.Collection(id=collection_concept_id)
harmony_request = harmony.Request(collection=request_collection, spatial=request_bbox,
granule_id=[granule_json['meta']['concept-id']])
logging.info("Sending harmony request %s", harmony_client.request_as_url(harmony_request))
# Submit harmony request and download result
job_id = harmony_client.submit(harmony_request)
logging.info("Submitted harmony job %s", job_id)
harmony_client.wait_for_processing(job_id, show_progress=True)
subsetted_filepath = None
for filename in [file_future.result()
for file_future
in harmony_client.download_all(job_id, directory=f'{tmp_path}', overwrite=True)]:
logging.info(f'Downloaded: %s', filename)
subsetted_filepath = pathlib.Path(filename)
# Verify spatial subset worked
subsetted_ds = xarray.open_dataset(subsetted_filepath, decode_times=False)
group = None
# Try to read group in file
lat_var_name, lon_var_name = get_lat_lon_var_names(subsetted_ds, subsetted_filepath, collection_variables, collection_concept_id)
lat_var_name = lat_var_name.split('/')[-1]
lon_var_name = lon_var_name.split('/')[-1]
with netCDF4.Dataset(subsetted_filepath) as f:
group_list = []
def group_walk(groups, nc_d, current_group):
global subsetted_ds_new
subsetted_ds_new = None
# check if the top group has lat or lon variable
if lat_var_name in list(nc_d.variables.keys()):
subsetted_ds_new = subsetted_ds
else:
# if not then we'll need to keep track of the group layers
group_list.append(current_group)
# loop through the groups in the current layer
for g in groups:
# end the loop if we've already found latitude
if subsetted_ds_new:
break
# check if the groups have latitude, define the dataset and end the loop if found
if lat_var_name in list(nc_d.groups[g].variables.keys()):
group_list.append(g)
lat_group = '/'.join(group_list)
subsetted_ds_new = xarray.open_dataset(subsetted_filepath, group=lat_group, decode_times=False)
# add a science variable to the dataset if other groups are in the lat/lon group
# some GPM collections won't have any other variables in the same group as lat/lon
if len(list(nc_d.groups[g].groups.keys())) > 0:
data_group = [v for v in list(nc_d.groups[g].groups.keys()) if 'time' not in str(v).lower()][0]
g_data = lat_group+'/'+data_group
subsetted_ds_data = xarray.open_dataset(subsetted_filepath, group=g_data, decode_times=False)
sci_var = list(subsetted_ds_data.variables.keys())[0]
subsetted_ds_new['science_test'] = subsetted_ds_data[sci_var]
break
# recall the function on a group that has groups in it and didn't find latitude
# this is going 'deeper' into the groups
if len(list(nc_d.groups[g].groups.keys())) > 0:
group_walk(nc_d.groups[g].groups, nc_d.groups[g], g)
else:
continue
> group_walk(f.groups, f, '')
verify_collection.py:448:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
groups = {'METADATA': <[AttributeError('NetCDF: Not a valid ID') raised in repr()] Group object at 0x7fd751800a40>, 'PRODUCT': <[RuntimeError('NetCDF: Not a valid ID') raised in repr()] Group object at 0x7fd751676340>}
nc_d = <[AttributeError('NetCDF: Not a valid ID') raised in repr()] Dataset object at 0x7fd751675f40>
current_group = ''
def group_walk(groups, nc_d, current_group):
global subsetted_ds_new
subsetted_ds_new = None
# check if the top group has lat or lon variable
if lat_var_name in list(nc_d.variables.keys()):
subsetted_ds_new = subsetted_ds
else:
# if not then we'll need to keep track of the group layers
group_list.append(current_group)
# loop through the groups in the current layer
for g in groups:
# end the loop if we've already found latitude
if subsetted_ds_new:
break
# check if the groups have latitude, define the dataset and end the loop if found
if lat_var_name in list(nc_d.groups[g].variables.keys()):
group_list.append(g)
lat_group = '/'.join(group_list)
subsetted_ds_new = xarray.open_dataset(subsetted_filepath, group=lat_group, decode_times=False)
# add a science variable to the dataset if other groups are in the lat/lon group
# some GPM collections won't have any other variables in the same group as lat/lon
if len(list(nc_d.groups[g].groups.keys())) > 0:
data_group = [v for v in list(nc_d.groups[g].groups.keys()) if 'time' not in str(v).lower()][0]
g_data = lat_group+'/'+data_group
subsetted_ds_data = xarray.open_dataset(subsetted_filepath, group=g_data, decode_times=False)
> sci_var = list(subsetted_ds_data.variables.keys())[0]
E IndexError: list index out of range
verify_collection.py:438: IndexError
--------------------------------- Captured Log ---------------------------------
INFO root:verify_collection.py:373 Using granule G3216209050-GES_DISC for test
INFO root:verify_collection.py:389 Sending harmony request https://harmony.earthdata.nasa.gov/C1918210292-GES_DISC/ogc-api-coverages/1.0.0/collections/all/coverage/rangeset?forceAsync=true&subset=lat%28-85.24002499999999%3A-67.15297500000001%29&subset=lon%28-21.706575%3A154.59157499999998%29&granuleId=G3216209050-GES_DISC
INFO root:verify_collection.py:393 Submitted harmony job 1acaa7d7-2ebf-4454-9e40-a63af71a31b9
INFO root:verify_collection.py:399 Downloaded: /tmp/pytest-of-runner/pytest-0/popen-gw1/test_spatial_subset_C1918210290/76529413_S5P_OFFL_L2_SO2_20240826T083238_20240826T101408_35596_03_020601_20240828T064741_subsetted.nc4
Check warning on line 0 in tests.verify_collection
github-actions / Regression test results for ops
test_spatial_subset[C1729926922-GES_DISC] (tests.verify_collection) failed
test-results/ops_test_report.xml [took 23s]
Raw output
assert False
collection_concept_id = 'C1729926922-GES_DISC', env = 'ops'
granule_json = {'meta': {'collection-concept-id': 'C1729926922-GES_DISC', 'concept-id': 'G3216182085-GES_DISC', 'concept-type': 'gran...pecification': {'Name': 'UMM-G', 'URL': 'https://cdn.earthdata.nasa.gov/umm/granule/v1.6.6', 'Version': '1.6.6'}, ...}}
collection_variables = [{'associations': {'collections': [{'concept-id': 'C1729926922-GES_DISC'}]}, 'meta': {'association-details': {'collect...9.989990234375}], 'LongName': 'HDFEOS/SWATHS/Temperature-APriori/Data Fields/Temperature-APrioriPrecision', ...}}, ...]
harmony_env = <Environment.PROD: 4>
tmp_path = PosixPath('/tmp/pytest-of-runner/pytest-0/popen-gw6/test_spatial_subset_C1729926920')
bearer_token = 'eyJ0eXAiOiJKV1QiLCJvcmlnaW4iOiJFYXJ0aGRhdGEgTG9naW4iLCJzaWciOiJlZGxqd3RwdWJrZXlfb3BzIiwiYWxnIjoiUlMyNTYifQ.eyJ0eXBlIj...Jb1BnM7KDdEqo_3EoDdVhQCL6YyPeFO5phn_VtkdRRgce7fIpgz79Xenaj8C_tgpF1XCp4JT2t834vHhVXrf_lRaldMpl7WxjRzMfWMdoxY5ZFahb_B-Yw'
@pytest.mark.timeout(600)
def test_spatial_subset(collection_concept_id, env, granule_json, collection_variables,
harmony_env, tmp_path: pathlib.Path, bearer_token):
test_spatial_subset.__doc__ = f"Verify spatial subset for {collection_concept_id} in {env}"
logging.info("Using granule %s for test", granule_json['meta']['concept-id'])
# Compute a box that is smaller than the granule extent bounding box
north, south, east, west = get_bounding_box(granule_json)
east, west, north, south = create_smaller_bounding_box(east, west, north, south, .95)
start_time = granule_json['umm']["TemporalExtent"]["RangeDateTime"]["BeginningDateTime"]
end_time = granule_json['umm']["TemporalExtent"]["RangeDateTime"]["EndingDateTime"]
# Build harmony request
harmony_client = harmony.Client(env=harmony_env, token=bearer_token)
request_bbox = harmony.BBox(w=west, s=south, e=east, n=north)
request_collection = harmony.Collection(id=collection_concept_id)
harmony_request = harmony.Request(collection=request_collection, spatial=request_bbox,
granule_id=[granule_json['meta']['concept-id']])
logging.info("Sending harmony request %s", harmony_client.request_as_url(harmony_request))
# Submit harmony request and download result
job_id = harmony_client.submit(harmony_request)
logging.info("Submitted harmony job %s", job_id)
harmony_client.wait_for_processing(job_id, show_progress=True)
subsetted_filepath = None
for filename in [file_future.result()
for file_future
in harmony_client.download_all(job_id, directory=f'{tmp_path}', overwrite=True)]:
logging.info(f'Downloaded: %s', filename)
subsetted_filepath = pathlib.Path(filename)
# Verify spatial subset worked
subsetted_ds = xarray.open_dataset(subsetted_filepath, decode_times=False)
group = None
# Try to read group in file
lat_var_name, lon_var_name = get_lat_lon_var_names(subsetted_ds, subsetted_filepath, collection_variables, collection_concept_id)
lat_var_name = lat_var_name.split('/')[-1]
lon_var_name = lon_var_name.split('/')[-1]
with netCDF4.Dataset(subsetted_filepath) as f:
group_list = []
def group_walk(groups, nc_d, current_group):
global subsetted_ds_new
subsetted_ds_new = None
# check if the top group has lat or lon variable
if lat_var_name in list(nc_d.variables.keys()):
subsetted_ds_new = subsetted_ds
else:
# if not then we'll need to keep track of the group layers
group_list.append(current_group)
# loop through the groups in the current layer
for g in groups:
# end the loop if we've already found latitude
if subsetted_ds_new:
break
# check if the groups have latitude, define the dataset and end the loop if found
if lat_var_name in list(nc_d.groups[g].variables.keys()):
group_list.append(g)
lat_group = '/'.join(group_list)
subsetted_ds_new = xarray.open_dataset(subsetted_filepath, group=lat_group, decode_times=False)
# add a science variable to the dataset if other groups are in the lat/lon group
# some GPM collections won't have any other variables in the same group as lat/lon
if len(list(nc_d.groups[g].groups.keys())) > 0:
data_group = [v for v in list(nc_d.groups[g].groups.keys()) if 'time' not in str(v).lower()][0]
g_data = lat_group+'/'+data_group
subsetted_ds_data = xarray.open_dataset(subsetted_filepath, group=g_data, decode_times=False)
sci_var = list(subsetted_ds_data.variables.keys())[0]
subsetted_ds_new['science_test'] = subsetted_ds_data[sci_var]
break
# recall the function on a group that has groups in it and didn't find latitude
# this is going 'deeper' into the groups
if len(list(nc_d.groups[g].groups.keys())) > 0:
group_walk(nc_d.groups[g].groups, nc_d.groups[g], g)
else:
continue
group_walk(f.groups, f, '')
assert lat_var_name and lon_var_name
var_ds = None
msk = None
science_vars = get_science_vars(collection_variables)
if science_vars:
for var in science_vars:
science_var_name = var['umm']['Name']
var_ds = find_variable(subsetted_ds_new, science_var_name)
if var_ds is not None:
try:
msk = np.logical_not(np.isnan(var_ds.data.squeeze()))
break
except Exception:
continue
else:
var_ds, msk = None, None
else:
for science_var_name in subsetted_ds_new.variables:
if (str(science_var_name) not in lat_var_name and
str(science_var_name) not in lon_var_name and
'time' not in str(science_var_name)):
var_ds = find_variable(subsetted_ds_new, science_var_name)
if var_ds is not None:
try:
msk = np.logical_not(np.isnan(var_ds.data.squeeze()))
break
except Exception:
continue
else:
var_ds, msk = None, None
if var_ds is None or msk is None:
pytest.fail("Unable to find variable from umm-v to use as science variable.")
try:
msk = np.logical_not(np.isnan(var_ds.data.squeeze()))
llat = subsetted_ds_new[lat_var_name].where(msk)
llon = subsetted_ds_new[lon_var_name].where(msk)
except ValueError:
llat = subsetted_ds_new[lat_var_name]
llon = subsetted_ds_new[lon_var_name]
lat_max = llat.max()
lat_min = llat.min()
lon_min = llon.min()
lon_max = llon.max()
lon_min = (lon_min + 180) % 360 - 180
lon_max = (lon_max + 180) % 360 - 180
lat_var_fill_value = subsetted_ds_new[lat_var_name].encoding.get('_FillValue')
lon_var_fill_value = subsetted_ds_new[lon_var_name].encoding.get('_FillValue')
if lat_var_fill_value:
if (lat_max <= north or np.isclose(lat_max, north)) and (lat_min >= south or np.isclose(lat_min, south)):
logging.info("Successful Latitude subsetting")
elif np.isnan(lat_max) and np.isnan(lat_min):
logging.info("Partial Lat Success - no Data")
else:
assert False
if lon_var_fill_value:
if (lon_max <= east or np.isclose(lon_max, east)) and (lon_min >= west or np.isclose(lon_min, west)):
logging.info("Successful Longitude subsetting")
elif np.isnan(lon_max) and np.isnan(lon_min):
logging.info("Partial Lon Success - no Data")
else:
> assert False
E assert False
verify_collection.py:522: AssertionError
--------------------------------- Captured Log ---------------------------------
INFO root:verify_collection.py:373 Using granule G3216182085-GES_DISC for test
INFO root:verify_collection.py:389 Sending harmony request https://harmony.earthdata.nasa.gov/C1729926922-GES_DISC/ogc-api-coverages/1.0.0/collections/all/coverage/rangeset?forceAsync=true&subset=lat%28-85.5%3A85.5%29&subset=lon%28-171.0%3A171.0%29&granuleId=G3216182085-GES_DISC
INFO root:verify_collection.py:393 Submitted harmony job 5b09e222-fb0a-44c1-a505-aa180ea87cd7
INFO root:verify_collection.py:399 Downloaded: /tmp/pytest-of-runner/pytest-0/popen-gw6/test_spatial_subset_C1729926920/76529453_MLS-Aura_L2GP-Temperature_v05-03-c01_2024d239_subsetted.nc4
INFO root:verify_collection.py:510 Successful Latitude subsetting