Remove PR old association

Remove PR old association #63

GitHub Actions / Regression test results for ops failed Sep 25, 2024 in 0s

8 errors, 50 fail, 530 pass in 47m 29s

588 tests 530 ✅ 47m 29s ⏱️
1 suites 0 💤
1 files 50 ❌ 8 🔥

Results for commit facdaab.

Annotations

Check warning on line 0 in tests.verify_collection

github-actions / Regression test results for ops

test_spatial_subset[C1918209846-GES_DISC] (tests.verify_collection) failed

test-results/ops_test_report.xml [took 47s]

Raw output


            IndexError: list index out of range
collection_concept_id = 'C1918209846-GES_DISC', env = 'ops'
granule_json = {'meta': {'collection-concept-id': 'C1918209846-GES_DISC', 'concept-id': 'G3248176862-GES_DISC', 'concept-type': 'gran...pecification': {'Name': 'UMM-G', 'URL': 'https://cdn.earthdata.nasa.gov/umm/granule/v1.6.6', 'Version': '1.6.6'}, ...}}
collection_variables = [{'associations': {'collections': [{'concept-id': 'C1918209846-GES_DISC'}]}, 'meta': {'association-details': {'collect...tracted from _FillValue metadata attribute', 'Type': 'SCIENCE_FILLVALUE', 'Value': 9.969209968386869e+36}], ...}}, ...]
harmony_env = <Environment.PROD: 4>
tmp_path = PosixPath('/tmp/pytest-of-runner/pytest-0/popen-gw0/test_spatial_subset_C1918209840')
bearer_token = 'eyJ0eXAiOiJKV1QiLCJvcmlnaW4iOiJFYXJ0aGRhdGEgTG9naW4iLCJzaWciOiJlZGxqd3RwdWJrZXlfb3BzIiwiYWxnIjoiUlMyNTYifQ.eyJ0eXBlIj...Hcf0QWqtHsCuvOtj5tczYDaCn691RlCxRjaMlZBPYm2O9z5cTN31ynn1hy4h8lXYRR_I6DfCAdmdtrIdlLaMNL-ZbKOjYgx5kEqU8ClqAQnFPDVYJL29Hw'

    @pytest.mark.timeout(600)
    def test_spatial_subset(collection_concept_id, env, granule_json, collection_variables,
                            harmony_env, tmp_path: pathlib.Path, bearer_token):
        test_spatial_subset.__doc__ = f"Verify spatial subset for {collection_concept_id} in {env}"
    
        logging.info("Using granule %s for test", granule_json['meta']['concept-id'])
    
        # Compute a box that is smaller than the granule extent bounding box
        north, south, east, west = get_bounding_box(granule_json)
        east, west, north, south = create_smaller_bounding_box(east, west, north, south, .95)
    
        start_time = granule_json['umm']["TemporalExtent"]["RangeDateTime"]["BeginningDateTime"]
        end_time = granule_json['umm']["TemporalExtent"]["RangeDateTime"]["EndingDateTime"]
    
        # Build harmony request
        harmony_client = harmony.Client(env=harmony_env, token=bearer_token)
        request_bbox = harmony.BBox(w=west, s=south, e=east, n=north)
        request_collection = harmony.Collection(id=collection_concept_id)
        harmony_request = harmony.Request(collection=request_collection, spatial=request_bbox,
                                          granule_id=[granule_json['meta']['concept-id']])
    
        logging.info("Sending harmony request %s", harmony_client.request_as_url(harmony_request))
    
        # Submit harmony request and download result
        job_id = harmony_client.submit(harmony_request)
        logging.info("Submitted harmony job %s", job_id)
        harmony_client.wait_for_processing(job_id, show_progress=True)
        subsetted_filepath = None
        for filename in [file_future.result()
                         for file_future
                         in harmony_client.download_all(job_id, directory=f'{tmp_path}', overwrite=True)]:
            logging.info(f'Downloaded: %s', filename)
            subsetted_filepath = pathlib.Path(filename)
    
        # Verify spatial subset worked
        subsetted_ds = xarray.open_dataset(subsetted_filepath, decode_times=False)
        group = None
        # Try to read group in file
        lat_var_name, lon_var_name = get_lat_lon_var_names(subsetted_ds, subsetted_filepath, collection_variables, collection_concept_id)
        lat_var_name = lat_var_name.split('/')[-1]
        lon_var_name = lon_var_name.split('/')[-1]
    
        with netCDF4.Dataset(subsetted_filepath) as f:
            group_list = []
            def group_walk(groups, nc_d, current_group):
                global subsetted_ds_new
                subsetted_ds_new = None
                # check if the top group has lat or lon variable
                if lat_var_name in list(nc_d.variables.keys()):
                    subsetted_ds_new = subsetted_ds
                else:
                    # if not then we'll need to keep track of the group layers
                    group_list.append(current_group)
    
                # loop through the groups in the current layer
                for g in groups:
                    # end the loop if we've already found latitude
                    if subsetted_ds_new:
                        break
                    # check if the groups have latitude, define the dataset and end the loop if found
                    if lat_var_name in list(nc_d.groups[g].variables.keys()):
                        group_list.append(g)
                        lat_group = '/'.join(group_list)
                        subsetted_ds_new = xarray.open_dataset(subsetted_filepath, group=lat_group, decode_times=False)
                        # add a science variable to the dataset if other groups are in the lat/lon group
                        # some GPM collections won't have any other variables in the same group as lat/lon
                        if len(list(nc_d.groups[g].groups.keys())) > 0:
                            data_group = [v for v in list(nc_d.groups[g].groups.keys()) if 'time' not in str(v).lower()][0]
                            g_data = lat_group+'/'+data_group
                            subsetted_ds_data = xarray.open_dataset(subsetted_filepath, group=g_data, decode_times=False)
                            sci_var = list(subsetted_ds_data.variables.keys())[0]
                            subsetted_ds_new['science_test'] = subsetted_ds_data[sci_var]
                        break
                    # recall the function on a group that has groups in it and didn't find latitude
                    # this is going 'deeper' into the groups
                    if len(list(nc_d.groups[g].groups.keys())) > 0:
                        group_walk(nc_d.groups[g].groups, nc_d.groups[g], g)
                    else:
                        continue
    
>           group_walk(f.groups, f, '')

verify_collection.py:448: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

groups = {'METADATA': <[AttributeError('NetCDF: Not a valid ID') raised in repr()] Group object at 0x7f9ed8460740>, 'PRODUCT': <[RuntimeError('NetCDF: Not a valid ID') raised in repr()] Group object at 0x7f9ed8460140>}
nc_d = <[AttributeError('NetCDF: Not a valid ID') raised in repr()] Dataset object at 0x7f9ed8460040>
current_group = ''

    def group_walk(groups, nc_d, current_group):
        global subsetted_ds_new
        subsetted_ds_new = None
        # check if the top group has lat or lon variable
        if lat_var_name in list(nc_d.variables.keys()):
            subsetted_ds_new = subsetted_ds
        else:
            # if not then we'll need to keep track of the group layers
            group_list.append(current_group)
    
        # loop through the groups in the current layer
        for g in groups:
            # end the loop if we've already found latitude
            if subsetted_ds_new:
                break
            # check if the groups have latitude, define the dataset and end the loop if found
            if lat_var_name in list(nc_d.groups[g].variables.keys()):
                group_list.append(g)
                lat_group = '/'.join(group_list)
                subsetted_ds_new = xarray.open_dataset(subsetted_filepath, group=lat_group, decode_times=False)
                # add a science variable to the dataset if other groups are in the lat/lon group
                # some GPM collections won't have any other variables in the same group as lat/lon
                if len(list(nc_d.groups[g].groups.keys())) > 0:
                    data_group = [v for v in list(nc_d.groups[g].groups.keys()) if 'time' not in str(v).lower()][0]
                    g_data = lat_group+'/'+data_group
                    subsetted_ds_data = xarray.open_dataset(subsetted_filepath, group=g_data, decode_times=False)
>                   sci_var = list(subsetted_ds_data.variables.keys())[0]
E                   IndexError: list index out of range

verify_collection.py:438: IndexError
--------------------------------- Captured Log ---------------------------------
INFO     root:verify_collection.py:373 Using granule G3248176862-GES_DISC for test
INFO     root:verify_collection.py:389 Sending harmony request https://harmony.earthdata.nasa.gov/C1918209846-GES_DISC/ogc-api-coverages/1.0.0/collections/all/coverage/rangeset?forceAsync=true&subset=lat%28-85.08309999999999%3A-66.46690000000001%29&subset=lon%28-135.778675%3A16.423675000000003%29&granuleId=G3248176862-GES_DISC
INFO     root:verify_collection.py:393 Submitted harmony job f1aab317-27a8-4cd6-a0e8-ea741443808b
INFO     root:verify_collection.py:399 Downloaded: /tmp/pytest-of-runner/pytest-0/popen-gw0/test_spatial_subset_C1918209840/78002372_S5P_OFFL_L2_O3_20240923T044126_20240923T062256_35991_03_020601_20240924T203403_subsetted.nc4

Check warning on line 0 in tests.verify_collection

github-actions / Regression test results for ops

test_spatial_subset[C1627516298-GES_DISC] (tests.verify_collection) failed

test-results/ops_test_report.xml [took 56s]

Raw output


            IndexError: list index out of range
collection_concept_id = 'C1627516298-GES_DISC', env = 'ops'
granule_json = {'meta': {'collection-concept-id': 'C1627516298-GES_DISC', 'concept-id': 'G2087797426-GES_DISC', 'concept-type': 'gran...pecification': {'Name': 'UMM-G', 'URL': 'https://cdn.earthdata.nasa.gov/umm/granule/v1.6.6', 'Version': '1.6.6'}, ...}}
collection_variables = [{'associations': {'collections': [{'concept-id': 'C1627516298-GES_DISC'}]}, 'meta': {'association-details': {'collect..., 'Version': '1.9.0'}, 'Name': 'METADATA/QA_STATISTICS/nitrogendioxide_tropospheric_column_histogram_axis', ...}}, ...]
harmony_env = <Environment.PROD: 4>
tmp_path = PosixPath('/tmp/pytest-of-runner/pytest-0/popen-gw6/test_spatial_subset_C1627516290')
bearer_token = 'eyJ0eXAiOiJKV1QiLCJvcmlnaW4iOiJFYXJ0aGRhdGEgTG9naW4iLCJzaWciOiJlZGxqd3RwdWJrZXlfb3BzIiwiYWxnIjoiUlMyNTYifQ.eyJ0eXBlIj...Hcf0QWqtHsCuvOtj5tczYDaCn691RlCxRjaMlZBPYm2O9z5cTN31ynn1hy4h8lXYRR_I6DfCAdmdtrIdlLaMNL-ZbKOjYgx5kEqU8ClqAQnFPDVYJL29Hw'

    @pytest.mark.timeout(600)
    def test_spatial_subset(collection_concept_id, env, granule_json, collection_variables,
                            harmony_env, tmp_path: pathlib.Path, bearer_token):
        test_spatial_subset.__doc__ = f"Verify spatial subset for {collection_concept_id} in {env}"
    
        logging.info("Using granule %s for test", granule_json['meta']['concept-id'])
    
        # Compute a box that is smaller than the granule extent bounding box
        north, south, east, west = get_bounding_box(granule_json)
        east, west, north, south = create_smaller_bounding_box(east, west, north, south, .95)
    
        start_time = granule_json['umm']["TemporalExtent"]["RangeDateTime"]["BeginningDateTime"]
        end_time = granule_json['umm']["TemporalExtent"]["RangeDateTime"]["EndingDateTime"]
    
        # Build harmony request
        harmony_client = harmony.Client(env=harmony_env, token=bearer_token)
        request_bbox = harmony.BBox(w=west, s=south, e=east, n=north)
        request_collection = harmony.Collection(id=collection_concept_id)
        harmony_request = harmony.Request(collection=request_collection, spatial=request_bbox,
                                          granule_id=[granule_json['meta']['concept-id']])
    
        logging.info("Sending harmony request %s", harmony_client.request_as_url(harmony_request))
    
        # Submit harmony request and download result
        job_id = harmony_client.submit(harmony_request)
        logging.info("Submitted harmony job %s", job_id)
        harmony_client.wait_for_processing(job_id, show_progress=True)
        subsetted_filepath = None
        for filename in [file_future.result()
                         for file_future
                         in harmony_client.download_all(job_id, directory=f'{tmp_path}', overwrite=True)]:
            logging.info(f'Downloaded: %s', filename)
            subsetted_filepath = pathlib.Path(filename)
    
        # Verify spatial subset worked
        subsetted_ds = xarray.open_dataset(subsetted_filepath, decode_times=False)
        group = None
        # Try to read group in file
        lat_var_name, lon_var_name = get_lat_lon_var_names(subsetted_ds, subsetted_filepath, collection_variables, collection_concept_id)
        lat_var_name = lat_var_name.split('/')[-1]
        lon_var_name = lon_var_name.split('/')[-1]
    
        with netCDF4.Dataset(subsetted_filepath) as f:
            group_list = []
            def group_walk(groups, nc_d, current_group):
                global subsetted_ds_new
                subsetted_ds_new = None
                # check if the top group has lat or lon variable
                if lat_var_name in list(nc_d.variables.keys()):
                    subsetted_ds_new = subsetted_ds
                else:
                    # if not then we'll need to keep track of the group layers
                    group_list.append(current_group)
    
                # loop through the groups in the current layer
                for g in groups:
                    # end the loop if we've already found latitude
                    if subsetted_ds_new:
                        break
                    # check if the groups have latitude, define the dataset and end the loop if found
                    if lat_var_name in list(nc_d.groups[g].variables.keys()):
                        group_list.append(g)
                        lat_group = '/'.join(group_list)
                        subsetted_ds_new = xarray.open_dataset(subsetted_filepath, group=lat_group, decode_times=False)
                        # add a science variable to the dataset if other groups are in the lat/lon group
                        # some GPM collections won't have any other variables in the same group as lat/lon
                        if len(list(nc_d.groups[g].groups.keys())) > 0:
                            data_group = [v for v in list(nc_d.groups[g].groups.keys()) if 'time' not in str(v).lower()][0]
                            g_data = lat_group+'/'+data_group
                            subsetted_ds_data = xarray.open_dataset(subsetted_filepath, group=g_data, decode_times=False)
                            sci_var = list(subsetted_ds_data.variables.keys())[0]
                            subsetted_ds_new['science_test'] = subsetted_ds_data[sci_var]
                        break
                    # recall the function on a group that has groups in it and didn't find latitude
                    # this is going 'deeper' into the groups
                    if len(list(nc_d.groups[g].groups.keys())) > 0:
                        group_walk(nc_d.groups[g].groups, nc_d.groups[g], g)
                    else:
                        continue
    
>           group_walk(f.groups, f, '')

verify_collection.py:448: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

groups = {'METADATA': <[AttributeError('NetCDF: Not a valid ID') raised in repr()] Group object at 0x7fd29d323d40>, 'PRODUCT': <[RuntimeError('NetCDF: Not a valid ID') raised in repr()] Group object at 0x7fd29d323540>}
nc_d = <[AttributeError('NetCDF: Not a valid ID') raised in repr()] Dataset object at 0x7fd29d323440>
current_group = ''

    def group_walk(groups, nc_d, current_group):
        global subsetted_ds_new
        subsetted_ds_new = None
        # check if the top group has lat or lon variable
        if lat_var_name in list(nc_d.variables.keys()):
            subsetted_ds_new = subsetted_ds
        else:
            # if not then we'll need to keep track of the group layers
            group_list.append(current_group)
    
        # loop through the groups in the current layer
        for g in groups:
            # end the loop if we've already found latitude
            if subsetted_ds_new:
                break
            # check if the groups have latitude, define the dataset and end the loop if found
            if lat_var_name in list(nc_d.groups[g].variables.keys()):
                group_list.append(g)
                lat_group = '/'.join(group_list)
                subsetted_ds_new = xarray.open_dataset(subsetted_filepath, group=lat_group, decode_times=False)
                # add a science variable to the dataset if other groups are in the lat/lon group
                # some GPM collections won't have any other variables in the same group as lat/lon
                if len(list(nc_d.groups[g].groups.keys())) > 0:
                    data_group = [v for v in list(nc_d.groups[g].groups.keys()) if 'time' not in str(v).lower()][0]
                    g_data = lat_group+'/'+data_group
                    subsetted_ds_data = xarray.open_dataset(subsetted_filepath, group=g_data, decode_times=False)
>                   sci_var = list(subsetted_ds_data.variables.keys())[0]
E                   IndexError: list index out of range

verify_collection.py:438: IndexError
--------------------------------- Captured Log ---------------------------------
INFO     root:verify_collection.py:373 Using granule G2087797426-GES_DISC for test
INFO     root:verify_collection.py:389 Sending harmony request https://harmony.earthdata.nasa.gov/C1627516298-GES_DISC/ogc-api-coverages/1.0.0/collections/all/coverage/rangeset?forceAsync=true&subset=lat%28-76.82889999999999%3A-59.7251%29&subset=lon%28-77.22455%3A-1.6634499999999974%29&granuleId=G2087797426-GES_DISC
INFO     root:verify_collection.py:393 Submitted harmony job 69eeaa8d-b2a8-4425-91cd-91ac22cc43dc
INFO     root:verify_collection.py:399 Downloaded: /tmp/pytest-of-runner/pytest-0/popen-gw6/test_spatial_subset_C1627516290/78002382_S5P_OFFL_L2_NO2_20210701T170324_20210701T184453_19257_01_010400_20210703T102341_subsetted.nc4

Check warning on line 0 in tests.verify_collection

github-actions / Regression test results for ops

test_spatial_subset[C1729925806-GES_DISC] (tests.verify_collection) failed

test-results/ops_test_report.xml [took 29s]

Raw output


            assert False
collection_concept_id = 'C1729925806-GES_DISC', env = 'ops'
granule_json = {'meta': {'collection-concept-id': 'C1729925806-GES_DISC', 'concept-id': 'G3248349908-GES_DISC', 'concept-type': 'gran...pecification': {'Name': 'UMM-G', 'URL': 'https://cdn.earthdata.nasa.gov/umm/granule/v1.6.6', 'Version': '1.6.6'}, ...}}
collection_variables = [{'associations': {'collections': [{'concept-id': 'C1729925806-GES_DISC'}]}, 'meta': {'association-details': {'collect...': [{'Type': 'SCIENCE_FILLVALUE', 'Value': 513}], 'LongName': 'HDFEOS/SWATHS/O3 column/Data Fields/Status', ...}}, ...]
harmony_env = <Environment.PROD: 4>
tmp_path = PosixPath('/tmp/pytest-of-runner/pytest-0/popen-gw2/test_spatial_subset_C1729925800')
bearer_token = 'eyJ0eXAiOiJKV1QiLCJvcmlnaW4iOiJFYXJ0aGRhdGEgTG9naW4iLCJzaWciOiJlZGxqd3RwdWJrZXlfb3BzIiwiYWxnIjoiUlMyNTYifQ.eyJ0eXBlIj...Hcf0QWqtHsCuvOtj5tczYDaCn691RlCxRjaMlZBPYm2O9z5cTN31ynn1hy4h8lXYRR_I6DfCAdmdtrIdlLaMNL-ZbKOjYgx5kEqU8ClqAQnFPDVYJL29Hw'

    @pytest.mark.timeout(600)
    def test_spatial_subset(collection_concept_id, env, granule_json, collection_variables,
                            harmony_env, tmp_path: pathlib.Path, bearer_token):
        test_spatial_subset.__doc__ = f"Verify spatial subset for {collection_concept_id} in {env}"
    
        logging.info("Using granule %s for test", granule_json['meta']['concept-id'])
    
        # Compute a box that is smaller than the granule extent bounding box
        north, south, east, west = get_bounding_box(granule_json)
        east, west, north, south = create_smaller_bounding_box(east, west, north, south, .95)
    
        start_time = granule_json['umm']["TemporalExtent"]["RangeDateTime"]["BeginningDateTime"]
        end_time = granule_json['umm']["TemporalExtent"]["RangeDateTime"]["EndingDateTime"]
    
        # Build harmony request
        harmony_client = harmony.Client(env=harmony_env, token=bearer_token)
        request_bbox = harmony.BBox(w=west, s=south, e=east, n=north)
        request_collection = harmony.Collection(id=collection_concept_id)
        harmony_request = harmony.Request(collection=request_collection, spatial=request_bbox,
                                          granule_id=[granule_json['meta']['concept-id']])
    
        logging.info("Sending harmony request %s", harmony_client.request_as_url(harmony_request))
    
        # Submit harmony request and download result
        job_id = harmony_client.submit(harmony_request)
        logging.info("Submitted harmony job %s", job_id)
        harmony_client.wait_for_processing(job_id, show_progress=True)
        subsetted_filepath = None
        for filename in [file_future.result()
                         for file_future
                         in harmony_client.download_all(job_id, directory=f'{tmp_path}', overwrite=True)]:
            logging.info(f'Downloaded: %s', filename)
            subsetted_filepath = pathlib.Path(filename)
    
        # Verify spatial subset worked
        subsetted_ds = xarray.open_dataset(subsetted_filepath, decode_times=False)
        group = None
        # Try to read group in file
        lat_var_name, lon_var_name = get_lat_lon_var_names(subsetted_ds, subsetted_filepath, collection_variables, collection_concept_id)
        lat_var_name = lat_var_name.split('/')[-1]
        lon_var_name = lon_var_name.split('/')[-1]
    
        with netCDF4.Dataset(subsetted_filepath) as f:
            group_list = []
            def group_walk(groups, nc_d, current_group):
                global subsetted_ds_new
                subsetted_ds_new = None
                # check if the top group has lat or lon variable
                if lat_var_name in list(nc_d.variables.keys()):
                    subsetted_ds_new = subsetted_ds
                else:
                    # if not then we'll need to keep track of the group layers
                    group_list.append(current_group)
    
                # loop through the groups in the current layer
                for g in groups:
                    # end the loop if we've already found latitude
                    if subsetted_ds_new:
                        break
                    # check if the groups have latitude, define the dataset and end the loop if found
                    if lat_var_name in list(nc_d.groups[g].variables.keys()):
                        group_list.append(g)
                        lat_group = '/'.join(group_list)
                        subsetted_ds_new = xarray.open_dataset(subsetted_filepath, group=lat_group, decode_times=False)
                        # add a science variable to the dataset if other groups are in the lat/lon group
                        # some GPM collections won't have any other variables in the same group as lat/lon
                        if len(list(nc_d.groups[g].groups.keys())) > 0:
                            data_group = [v for v in list(nc_d.groups[g].groups.keys()) if 'time' not in str(v).lower()][0]
                            g_data = lat_group+'/'+data_group
                            subsetted_ds_data = xarray.open_dataset(subsetted_filepath, group=g_data, decode_times=False)
                            sci_var = list(subsetted_ds_data.variables.keys())[0]
                            subsetted_ds_new['science_test'] = subsetted_ds_data[sci_var]
                        break
                    # recall the function on a group that has groups in it and didn't find latitude
                    # this is going 'deeper' into the groups
                    if len(list(nc_d.groups[g].groups.keys())) > 0:
                        group_walk(nc_d.groups[g].groups, nc_d.groups[g], g)
                    else:
                        continue
    
            group_walk(f.groups, f, '')
    
        assert lat_var_name and lon_var_name
    
        var_ds = None
        msk = None
    
        science_vars = get_science_vars(collection_variables)
        if science_vars:
            for var in science_vars:
                science_var_name = var['umm']['Name']
                var_ds = find_variable(subsetted_ds_new, science_var_name)
                if var_ds is not None:
                    try:
                        msk = np.logical_not(np.isnan(var_ds.data.squeeze()))
                        break
                    except Exception:
                        continue
            else:
                var_ds, msk = None, None
        else:
            for science_var_name in subsetted_ds_new.variables:
                if (str(science_var_name) not in lat_var_name and
                    str(science_var_name) not in lon_var_name and
                    'time' not in str(science_var_name)):
    
                    var_ds = find_variable(subsetted_ds_new, science_var_name)
                    if var_ds is not None:
                        try:
                            msk = np.logical_not(np.isnan(var_ds.data.squeeze()))
                            break
                        except Exception:
                            continue
            else:
                var_ds, msk = None, None
    
        if var_ds is None or msk is None:
            pytest.fail("Unable to find variable from umm-v to use as science variable.")
    
        try:
            msk = np.logical_not(np.isnan(var_ds.data.squeeze()))
            llat = subsetted_ds_new[lat_var_name].where(msk)
            llon = subsetted_ds_new[lon_var_name].where(msk)
        except ValueError:
    
            llat = subsetted_ds_new[lat_var_name]
            llon = subsetted_ds_new[lon_var_name]
    
        lat_max = llat.max()
        lat_min = llat.min()
    
        lon_min = llon.min()
        lon_max = llon.max()
    
        lon_min = (lon_min + 180) % 360 - 180
        lon_max = (lon_max + 180) % 360 - 180
    
        lat_var_fill_value = subsetted_ds_new[lat_var_name].encoding.get('_FillValue')
        lon_var_fill_value = subsetted_ds_new[lon_var_name].encoding.get('_FillValue')
    
        if lat_var_fill_value:
            if (lat_max <= north or np.isclose(lat_max, north)) and (lat_min >= south or np.isclose(lat_min, south)):
                logging.info("Successful Latitude subsetting")
            elif np.isnan(lat_max) and np.isnan(lat_min):
                logging.info("Partial Lat Success - no Data")
            else:
                assert False
    
        if lon_var_fill_value:
            if (lon_max <= east or np.isclose(lon_max, east)) and (lon_min >= west or np.isclose(lon_min, west)):
                logging.info("Successful Longitude subsetting")
            elif np.isnan(lon_max) and np.isnan(lon_min):
                logging.info("Partial Lon Success - no Data")
            else:
>               assert False
E               assert False

verify_collection.py:522: AssertionError
--------------------------------- Captured Log ---------------------------------
INFO     root:verify_collection.py:373 Using granule G3248349908-GES_DISC for test
INFO     root:verify_collection.py:389 Sending harmony request https://harmony.earthdata.nasa.gov/C1729925806-GES_DISC/ogc-api-coverages/1.0.0/collections/all/coverage/rangeset?forceAsync=true&subset=lat%28-85.5%3A85.5%29&subset=lon%28-171.0%3A171.0%29&granuleId=G3248349908-GES_DISC
INFO     root:verify_collection.py:393 Submitted harmony job 1c69daa2-da72-47e5-a8e5-9eb0d49e4a4e
INFO     root:verify_collection.py:399 Downloaded: /tmp/pytest-of-runner/pytest-0/popen-gw2/test_spatial_subset_C1729925800/78002387_MLS-Aura_L2GP-O3_v05-03-c01_2024d267_subsetted.nc4
INFO     root:verify_collection.py:510 Successful Latitude subsetting

Check warning on line 0 in tests.verify_collection

github-actions / Regression test results for ops

test_spatial_subset[C2832224417-POCLOUD] (tests.verify_collection) failed

test-results/ops_test_report.xml [took 31s]

Raw output


            Failed: Unable to find latitude and longitude variables.
collection_concept_id = 'C2832224417-POCLOUD', env = 'ops'
granule_json = {'meta': {'collection-concept-id': 'C2832224417-POCLOUD', 'concept-id': 'G3245436870-POCLOUD', 'concept-type': 'granul...pecification': {'Name': 'UMM-G', 'URL': 'https://cdn.earthdata.nasa.gov/umm/granule/v1.6.6', 'Version': '1.6.6'}, ...}}
collection_variables = [{'associations': {'collections': [{'concept-id': 'C2832224417-POCLOUD'}]}, 'meta': {'association-details': {'collecti...me': 'look', 'Size': 2, 'Type': 'OTHER'}], 'FillValues': [{'Type': 'SCIENCE_FILLVALUE', 'Value': -9999.0}], ...}}, ...]
harmony_env = <Environment.PROD: 4>
tmp_path = PosixPath('/tmp/pytest-of-runner/pytest-0/popen-gw1/test_spatial_subset_C2832224410')
bearer_token = 'eyJ0eXAiOiJKV1QiLCJvcmlnaW4iOiJFYXJ0aGRhdGEgTG9naW4iLCJzaWciOiJlZGxqd3RwdWJrZXlfb3BzIiwiYWxnIjoiUlMyNTYifQ.eyJ0eXBlIj...Hcf0QWqtHsCuvOtj5tczYDaCn691RlCxRjaMlZBPYm2O9z5cTN31ynn1hy4h8lXYRR_I6DfCAdmdtrIdlLaMNL-ZbKOjYgx5kEqU8ClqAQnFPDVYJL29Hw'

    @pytest.mark.timeout(600)
    def test_spatial_subset(collection_concept_id, env, granule_json, collection_variables,
                            harmony_env, tmp_path: pathlib.Path, bearer_token):
        test_spatial_subset.__doc__ = f"Verify spatial subset for {collection_concept_id} in {env}"
    
        logging.info("Using granule %s for test", granule_json['meta']['concept-id'])
    
        # Compute a box that is smaller than the granule extent bounding box
        north, south, east, west = get_bounding_box(granule_json)
        east, west, north, south = create_smaller_bounding_box(east, west, north, south, .95)
    
        start_time = granule_json['umm']["TemporalExtent"]["RangeDateTime"]["BeginningDateTime"]
        end_time = granule_json['umm']["TemporalExtent"]["RangeDateTime"]["EndingDateTime"]
    
        # Build harmony request
        harmony_client = harmony.Client(env=harmony_env, token=bearer_token)
        request_bbox = harmony.BBox(w=west, s=south, e=east, n=north)
        request_collection = harmony.Collection(id=collection_concept_id)
        harmony_request = harmony.Request(collection=request_collection, spatial=request_bbox,
                                          granule_id=[granule_json['meta']['concept-id']])
    
        logging.info("Sending harmony request %s", harmony_client.request_as_url(harmony_request))
    
        # Submit harmony request and download result
        job_id = harmony_client.submit(harmony_request)
        logging.info("Submitted harmony job %s", job_id)
        harmony_client.wait_for_processing(job_id, show_progress=True)
        subsetted_filepath = None
        for filename in [file_future.result()
                         for file_future
                         in harmony_client.download_all(job_id, directory=f'{tmp_path}', overwrite=True)]:
            logging.info(f'Downloaded: %s', filename)
            subsetted_filepath = pathlib.Path(filename)
    
        # Verify spatial subset worked
        subsetted_ds = xarray.open_dataset(subsetted_filepath, decode_times=False)
        group = None
        # Try to read group in file
>       lat_var_name, lon_var_name = get_lat_lon_var_names(subsetted_ds, subsetted_filepath, collection_variables, collection_concept_id)

verify_collection.py:406: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

dataset = <xarray.Dataset> Size: 232B
Dimensions:                 (ydim_grid: 1, xdim_grid: 1, look: 1,
                        ...                                 -0.43
    history_json:                                       [{"date_time": "2024-...
file_to_subset = PosixPath('/tmp/pytest-of-runner/pytest-0/popen-gw1/test_spatial_subset_C2832224410/78002398_RSS_SMAP_SSS_L2C_r51501_20240921T231358_2024265_NRT_V06.0_001.nc4')
collection_variable_list = [{'associations': {'collections': [{'concept-id': 'C2832224417-POCLOUD'}]}, 'meta': {'association-details': {'collecti...me': 'look', 'Size': 2, 'Type': 'OTHER'}], 'FillValues': [{'Type': 'SCIENCE_FILLVALUE', 'Value': -9999.0}], ...}}, ...]
collection_concept_id = 'C2832224417-POCLOUD'

    def get_lat_lon_var_names(dataset: xarray.Dataset, file_to_subset: str, collection_variable_list: List[Dict], collection_concept_id: str):
        # Try getting it from UMM-Var first
        lat_var_json, lon_var_json, _ = get_coordinate_vars_from_umm(collection_variable_list)
        lat_var_name = get_variable_name_from_umm_json(lat_var_json)
        lon_var_name = get_variable_name_from_umm_json(lon_var_json)
    
        if lat_var_name and lon_var_name:
            return lat_var_name, lon_var_name
    
        logging.warning("Unable to find lat/lon vars in UMM-Var")
    
        # If that doesn't work, try using cf-xarray to infer lat/lon variable names
        try:
            latitude = [lat for lat in dataset.cf.coordinates['latitude']
                             if lat.lower() in VALID_LATITUDE_VARIABLE_NAMES][0]
            longitude = [lon for lon in dataset.cf.coordinates['longitude']
                             if lon.lower() in VALID_LONGITUDE_VARIABLE_NAMES][0]
            return latitude, longitude
        except:
            logging.warning("Unable to find lat/lon vars using cf_xarray")
    
        # If that still doesn't work, try using l2ss-py directly
        try:
            # file not able to be flattened unless locally downloaded
            filename = f'my_copy_file_{collection_concept_id}.nc'
            shutil.copy(file_to_subset, filename)
            nc_dataset = netCDF4.Dataset(filename, mode='r+')
            # flatten the dataset
            nc_dataset_flattened = podaac.subsetter.group_handling.transform_grouped_dataset(nc_dataset, filename)
    
            args = {
                    'decode_coords': False,
                    'mask_and_scale': False,
                    'decode_times': False
                    }
    
            with xarray.open_dataset(
                xarray.backends.NetCDF4DataStore(nc_dataset_flattened),
                **args
                ) as flat_dataset:
                    # use l2ss-py to find lat and lon names
                    lat_var_names, lon_var_names = podaac.subsetter.subset.compute_coordinate_variable_names(flat_dataset)
    
            os.remove(filename)
            if lat_var_names and lon_var_names:
                lat_var_name = lat_var_names.split('__')[-1] if isinstance(lat_var_names, str) else lat_var_names[0].split('__')[-1]
                lon_var_name = lon_var_names.split('__')[-1] if isinstance(lon_var_names, str) else lon_var_names[0].split('__')[-1]
                return lat_var_name, lon_var_name
    
        except ValueError:
            logging.warning("Unable to find lat/lon vars using l2ss-py")
    
        # Still no dice, try using the 'units' variable attribute
        for coord_name, coord in dataset.coords.items():
            if 'units' not in coord.attrs:
                continue
            if coord.attrs['units'] == 'degrees_north' and lat_var_name is None:
                lat_var_name = coord_name
            if coord.attrs['units'] == 'degrees_east' and lon_var_name is None:
                lon_var_name = coord_name
        if lat_var_name and lon_var_name:
            return lat_var_name, lon_var_name
        else:
            logging.warning("Unable to find lat/lon vars using 'units' attribute")
    
        # Out of options, fail the test because we couldn't determine lat/lon variables
>       pytest.fail(f"Unable to find latitude and longitude variables.")
E       Failed: Unable to find latitude and longitude variables.

verify_collection.py:359: Failed
--------------------------------- Captured Log ---------------------------------
INFO     root:verify_collection.py:373 Using granule G3245436870-POCLOUD for test
INFO     root:verify_collection.py:389 Sending harmony request https://harmony.earthdata.nasa.gov/C2832224417-POCLOUD/ogc-api-coverages/1.0.0/collections/all/coverage/rangeset?forceAsync=true&subset=lat%28-82.75125%3A63.53925%29&subset=lon%284.500975000000011%3A175.500025%29&granuleId=G3245436870-POCLOUD
INFO     root:verify_collection.py:393 Submitted harmony job ee3907dc-d334-4adc-a43f-6b086c55f9ce
INFO     root:verify_collection.py:399 Downloaded: /tmp/pytest-of-runner/pytest-0/popen-gw1/test_spatial_subset_C2832224410/78002398_RSS_SMAP_SSS_L2C_r51501_20240921T231358_2024265_NRT_V06.0_001.nc4
WARNING  root:verify_collection.py:302 Unable to find lat/lon vars in UMM-Var
WARNING  root:verify_collection.py:312 Unable to find lat/lon vars using cf_xarray
WARNING  root:verify_collection.py:343 Unable to find lat/lon vars using l2ss-py
WARNING  root:verify_collection.py:356 Unable to find lat/lon vars using 'units' attribute

Check warning on line 0 in tests.verify_collection

github-actions / Regression test results for ops

test_spatial_subset[C2601583089-POCLOUD] (tests.verify_collection) failed

test-results/ops_test_report.xml [took 4s]

Raw output


            Exception: ('Unprocessable Entity', 'Error: the requested combination of operations: spatial subsetting on C2601583089-POCLOUD is unsupported')
collection_concept_id = 'C2601583089-POCLOUD', env = 'ops'
granule_json = {'meta': {'collection-concept-id': 'C2601583089-POCLOUD', 'concept-id': 'G2816589876-POCLOUD', 'concept-type': 'granul...122T003749_PIC0_02.nc', 'SWOT_L2_LR_PreCalSSH_WindWave_006_545_20231121T234622_20231122T003750_PIC0_02.nc', ...], ...}}
collection_variables = [{'associations': {'collections': [{'concept-id': 'C2601583089-POCLOUD'}]}, 'meta': {'association-details': {'collecti...pixels', 'Size': 71, 'Type': 'OTHER'}], 'FillValues': [{'Type': 'SCIENCE_FILLVALUE', 'Value': 2147483647}], ...}}, ...]
harmony_env = <Environment.PROD: 4>
tmp_path = PosixPath('/tmp/pytest-of-runner/pytest-0/popen-gw5/test_spatial_subset_C2601583080')
bearer_token = 'eyJ0eXAiOiJKV1QiLCJvcmlnaW4iOiJFYXJ0aGRhdGEgTG9naW4iLCJzaWciOiJlZGxqd3RwdWJrZXlfb3BzIiwiYWxnIjoiUlMyNTYifQ.eyJ0eXBlIj...Hcf0QWqtHsCuvOtj5tczYDaCn691RlCxRjaMlZBPYm2O9z5cTN31ynn1hy4h8lXYRR_I6DfCAdmdtrIdlLaMNL-ZbKOjYgx5kEqU8ClqAQnFPDVYJL29Hw'

    @pytest.mark.timeout(600)
    def test_spatial_subset(collection_concept_id, env, granule_json, collection_variables,
                            harmony_env, tmp_path: pathlib.Path, bearer_token):
        test_spatial_subset.__doc__ = f"Verify spatial subset for {collection_concept_id} in {env}"
    
        logging.info("Using granule %s for test", granule_json['meta']['concept-id'])
    
        # Compute a box that is smaller than the granule extent bounding box
        north, south, east, west = get_bounding_box(granule_json)
        east, west, north, south = create_smaller_bounding_box(east, west, north, south, .95)
    
        start_time = granule_json['umm']["TemporalExtent"]["RangeDateTime"]["BeginningDateTime"]
        end_time = granule_json['umm']["TemporalExtent"]["RangeDateTime"]["EndingDateTime"]
    
        # Build harmony request
        harmony_client = harmony.Client(env=harmony_env, token=bearer_token)
        request_bbox = harmony.BBox(w=west, s=south, e=east, n=north)
        request_collection = harmony.Collection(id=collection_concept_id)
        harmony_request = harmony.Request(collection=request_collection, spatial=request_bbox,
                                          granule_id=[granule_json['meta']['concept-id']])
    
        logging.info("Sending harmony request %s", harmony_client.request_as_url(harmony_request))
    
        # Submit harmony request and download result
>       job_id = harmony_client.submit(harmony_request)

verify_collection.py:392: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
../../../../.cache/pypoetry/virtualenvs/l2ss-py-autotest-iYz8Sff2-py3.10/lib/python3.10/site-packages/harmony/harmony.py:851: in submit
    self._handle_error_response(response)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = <harmony.harmony.Client object at 0x7f0af0877490>
response = <Response [422]>

    def _handle_error_response(self, response: Response):
        """Raises the appropriate exception based on the response
        received from Harmony. Tries to pull out an error message
        from a Harmony JSON response when possible.
    
        Args:
            response: The Response from Harmony
    
        Raises:
            Exception with a Harmony error message or a more generic
            HTTPError
        """
        if 'application/json' in response.headers.get('Content-Type', ''):
            exception_message = None
            try:
                response_json = response.json()
                if hasattr(response_json, 'get'):
                    exception_message = response_json.get('description')
                    if not exception_message:
                        exception_message = response_json.get('error')
            except JSONDecodeError:
                pass
            if exception_message:
>               raise Exception(response.reason, exception_message)
E               Exception: ('Unprocessable Entity', 'Error: the requested combination of operations: spatial subsetting on C2601583089-POCLOUD is unsupported')

../../../../.cache/pypoetry/virtualenvs/l2ss-py-autotest-iYz8Sff2-py3.10/lib/python3.10/site-packages/harmony/harmony.py:784: Exception
--------------------------------- Captured Log ---------------------------------
INFO     root:verify_collection.py:373 Using granule G2816589876-POCLOUD for test
INFO     root:verify_collection.py:389 Sending harmony request https://harmony.earthdata.nasa.gov/C2601583089-POCLOUD/ogc-api-coverages/1.0.0/collections/all/coverage/rangeset?forceAsync=true&subset=lat%28-85.5%3A85.5%29&subset=lon%28-171.0%3A171.0%29&granuleId=G2816589876-POCLOUD

Check warning on line 0 in tests.verify_collection

github-actions / Regression test results for ops

test_spatial_subset[C2601584109-POCLOUD] (tests.verify_collection) failed

test-results/ops_test_report.xml [took 10s]

Raw output


            Exception: ('Unprocessable Entity', 'Error: the requested combination of operations: spatial subsetting on C2601584109-POCLOUD is unsupported')
collection_concept_id = 'C2601584109-POCLOUD', env = 'ops'
granule_json = {'meta': {'collection-concept-id': 'C2601584109-POCLOUD', 'concept-id': 'G2816589757-POCLOUD', 'concept-type': 'granul...122T003749_PIC0_02.nc', 'SWOT_L2_LR_PreCalSSH_WindWave_006_545_20231121T234622_20231122T003750_PIC0_02.nc', ...], ...}}
collection_variables = [{'associations': {'collections': [{'concept-id': 'C2601584109-POCLOUD'}]}, 'meta': {'association-details': {'collecti...'num_pixels', 'Size': 71, 'Type': 'OTHER'}], 'FillValues': [{'Type': 'SCIENCE_FILLVALUE', 'Value': 32767}], ...}}, ...]
harmony_env = <Environment.PROD: 4>
tmp_path = PosixPath('/tmp/pytest-of-runner/pytest-0/popen-gw7/test_spatial_subset_C2601584100')
bearer_token = 'eyJ0eXAiOiJKV1QiLCJvcmlnaW4iOiJFYXJ0aGRhdGEgTG9naW4iLCJzaWciOiJlZGxqd3RwdWJrZXlfb3BzIiwiYWxnIjoiUlMyNTYifQ.eyJ0eXBlIj...Hcf0QWqtHsCuvOtj5tczYDaCn691RlCxRjaMlZBPYm2O9z5cTN31ynn1hy4h8lXYRR_I6DfCAdmdtrIdlLaMNL-ZbKOjYgx5kEqU8ClqAQnFPDVYJL29Hw'

    @pytest.mark.timeout(600)
    def test_spatial_subset(collection_concept_id, env, granule_json, collection_variables,
                            harmony_env, tmp_path: pathlib.Path, bearer_token):
        test_spatial_subset.__doc__ = f"Verify spatial subset for {collection_concept_id} in {env}"
    
        logging.info("Using granule %s for test", granule_json['meta']['concept-id'])
    
        # Compute a box that is smaller than the granule extent bounding box
        north, south, east, west = get_bounding_box(granule_json)
        east, west, north, south = create_smaller_bounding_box(east, west, north, south, .95)
    
        start_time = granule_json['umm']["TemporalExtent"]["RangeDateTime"]["BeginningDateTime"]
        end_time = granule_json['umm']["TemporalExtent"]["RangeDateTime"]["EndingDateTime"]
    
        # Build harmony request
        harmony_client = harmony.Client(env=harmony_env, token=bearer_token)
        request_bbox = harmony.BBox(w=west, s=south, e=east, n=north)
        request_collection = harmony.Collection(id=collection_concept_id)
        harmony_request = harmony.Request(collection=request_collection, spatial=request_bbox,
                                          granule_id=[granule_json['meta']['concept-id']])
    
        logging.info("Sending harmony request %s", harmony_client.request_as_url(harmony_request))
    
        # Submit harmony request and download result
>       job_id = harmony_client.submit(harmony_request)

verify_collection.py:392: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
../../../../.cache/pypoetry/virtualenvs/l2ss-py-autotest-iYz8Sff2-py3.10/lib/python3.10/site-packages/harmony/harmony.py:851: in submit
    self._handle_error_response(response)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = <harmony.harmony.Client object at 0x7f4c04f75060>
response = <Response [422]>

    def _handle_error_response(self, response: Response):
        """Raises the appropriate exception based on the response
        received from Harmony. Tries to pull out an error message
        from a Harmony JSON response when possible.
    
        Args:
            response: The Response from Harmony
    
        Raises:
            Exception with a Harmony error message or a more generic
            HTTPError
        """
        if 'application/json' in response.headers.get('Content-Type', ''):
            exception_message = None
            try:
                response_json = response.json()
                if hasattr(response_json, 'get'):
                    exception_message = response_json.get('description')
                    if not exception_message:
                        exception_message = response_json.get('error')
            except JSONDecodeError:
                pass
            if exception_message:
>               raise Exception(response.reason, exception_message)
E               Exception: ('Unprocessable Entity', 'Error: the requested combination of operations: spatial subsetting on C2601584109-POCLOUD is unsupported')

../../../../.cache/pypoetry/virtualenvs/l2ss-py-autotest-iYz8Sff2-py3.10/lib/python3.10/site-packages/harmony/harmony.py:784: Exception
--------------------------------- Captured Log ---------------------------------
INFO     root:verify_collection.py:373 Using granule G2816589757-POCLOUD for test
INFO     root:verify_collection.py:389 Sending harmony request https://harmony.earthdata.nasa.gov/C2601584109-POCLOUD/ogc-api-coverages/1.0.0/collections/all/coverage/rangeset?forceAsync=true&subset=lat%28-85.5%3A85.5%29&subset=lon%28-171.0%3A171.0%29&granuleId=G2816589757-POCLOUD

Check warning on line 0 in tests.verify_collection

github-actions / Regression test results for ops

test_spatial_subset[C2601581863-POCLOUD] (tests.verify_collection) failed

test-results/ops_test_report.xml [took 3s]

Raw output


            Exception: ('Unprocessable Entity', 'Error: the requested combination of operations: spatial subsetting on C2601581863-POCLOUD is unsupported')
collection_concept_id = 'C2601581863-POCLOUD', env = 'ops'
granule_json = {'meta': {'collection-concept-id': 'C2601581863-POCLOUD', 'concept-id': 'G2816589710-POCLOUD', 'concept-type': 'granul...122T003749_PIC0_02.nc', 'SWOT_L2_LR_PreCalSSH_WindWave_006_545_20231121T234622_20231122T003750_PIC0_02.nc', ...], ...}}
collection_variables = [{'associations': {'collections': [{'concept-id': 'C2601581863-POCLOUD'}]}, 'meta': {'association-details': {'collecti...pixels', 'Size': 71, 'Type': 'OTHER'}], 'FillValues': [{'Type': 'SCIENCE_FILLVALUE', 'Value': 2147483647}], ...}}, ...]
harmony_env = <Environment.PROD: 4>
tmp_path = PosixPath('/tmp/pytest-of-runner/pytest-0/popen-gw3/test_spatial_subset_C2601581860')
bearer_token = 'eyJ0eXAiOiJKV1QiLCJvcmlnaW4iOiJFYXJ0aGRhdGEgTG9naW4iLCJzaWciOiJlZGxqd3RwdWJrZXlfb3BzIiwiYWxnIjoiUlMyNTYifQ.eyJ0eXBlIj...Hcf0QWqtHsCuvOtj5tczYDaCn691RlCxRjaMlZBPYm2O9z5cTN31ynn1hy4h8lXYRR_I6DfCAdmdtrIdlLaMNL-ZbKOjYgx5kEqU8ClqAQnFPDVYJL29Hw'

    @pytest.mark.timeout(600)
    def test_spatial_subset(collection_concept_id, env, granule_json, collection_variables,
                            harmony_env, tmp_path: pathlib.Path, bearer_token):
        test_spatial_subset.__doc__ = f"Verify spatial subset for {collection_concept_id} in {env}"
    
        logging.info("Using granule %s for test", granule_json['meta']['concept-id'])
    
        # Compute a box that is smaller than the granule extent bounding box
        north, south, east, west = get_bounding_box(granule_json)
        east, west, north, south = create_smaller_bounding_box(east, west, north, south, .95)
    
        start_time = granule_json['umm']["TemporalExtent"]["RangeDateTime"]["BeginningDateTime"]
        end_time = granule_json['umm']["TemporalExtent"]["RangeDateTime"]["EndingDateTime"]
    
        # Build harmony request
        harmony_client = harmony.Client(env=harmony_env, token=bearer_token)
        request_bbox = harmony.BBox(w=west, s=south, e=east, n=north)
        request_collection = harmony.Collection(id=collection_concept_id)
        harmony_request = harmony.Request(collection=request_collection, spatial=request_bbox,
                                          granule_id=[granule_json['meta']['concept-id']])
    
        logging.info("Sending harmony request %s", harmony_client.request_as_url(harmony_request))
    
        # Submit harmony request and download result
>       job_id = harmony_client.submit(harmony_request)

verify_collection.py:392: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
../../../../.cache/pypoetry/virtualenvs/l2ss-py-autotest-iYz8Sff2-py3.10/lib/python3.10/site-packages/harmony/harmony.py:851: in submit
    self._handle_error_response(response)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = <harmony.harmony.Client object at 0x7f2b8b2c3df0>
response = <Response [422]>

    def _handle_error_response(self, response: Response):
        """Raises the appropriate exception based on the response
        received from Harmony. Tries to pull out an error message
        from a Harmony JSON response when possible.
    
        Args:
            response: The Response from Harmony
    
        Raises:
            Exception with a Harmony error message or a more generic
            HTTPError
        """
        if 'application/json' in response.headers.get('Content-Type', ''):
            exception_message = None
            try:
                response_json = response.json()
                if hasattr(response_json, 'get'):
                    exception_message = response_json.get('description')
                    if not exception_message:
                        exception_message = response_json.get('error')
            except JSONDecodeError:
                pass
            if exception_message:
>               raise Exception(response.reason, exception_message)
E               Exception: ('Unprocessable Entity', 'Error: the requested combination of operations: spatial subsetting on C2601581863-POCLOUD is unsupported')

../../../../.cache/pypoetry/virtualenvs/l2ss-py-autotest-iYz8Sff2-py3.10/lib/python3.10/site-packages/harmony/harmony.py:784: Exception
--------------------------------- Captured Log ---------------------------------
INFO     root:verify_collection.py:373 Using granule G2816589710-POCLOUD for test
INFO     root:verify_collection.py:389 Sending harmony request https://harmony.earthdata.nasa.gov/C2601581863-POCLOUD/ogc-api-coverages/1.0.0/collections/all/coverage/rangeset?forceAsync=true&subset=lat%28-85.5%3A85.5%29&subset=lon%28-171.0%3A171.0%29&granuleId=G2816589710-POCLOUD

Check warning on line 0 in tests.verify_collection

github-actions / Regression test results for ops

test_spatial_subset[C1627516292-GES_DISC] (tests.verify_collection) failed

test-results/ops_test_report.xml [took 56s]

Raw output


            IndexError: list index out of range
collection_concept_id = 'C1627516292-GES_DISC', env = 'ops'
granule_json = {'meta': {'collection-concept-id': 'C1627516292-GES_DISC', 'concept-id': 'G1898261144-GES_DISC', 'concept-type': 'gran...pecification': {'Name': 'UMM-G', 'URL': 'https://cdn.earthdata.nasa.gov/umm/granule/v1.6.6', 'Version': '1.6.6'}, ...}}
collection_variables = [{'associations': {'collections': [{'concept-id': 'C1627516292-GES_DISC'}]}, 'meta': {'association-details': {'collect... 'URL': 'https://cdn.earthdata.nasa.gov/umm/variable/v1.9.0', 'Version': '1.9.0'}, 'Name': 'PRODUCT/layer', ...}}, ...]
harmony_env = <Environment.PROD: 4>
tmp_path = PosixPath('/tmp/pytest-of-runner/pytest-0/popen-gw3/test_spatial_subset_C1627516290')
bearer_token = 'eyJ0eXAiOiJKV1QiLCJvcmlnaW4iOiJFYXJ0aGRhdGEgTG9naW4iLCJzaWciOiJlZGxqd3RwdWJrZXlfb3BzIiwiYWxnIjoiUlMyNTYifQ.eyJ0eXBlIj...Hcf0QWqtHsCuvOtj5tczYDaCn691RlCxRjaMlZBPYm2O9z5cTN31ynn1hy4h8lXYRR_I6DfCAdmdtrIdlLaMNL-ZbKOjYgx5kEqU8ClqAQnFPDVYJL29Hw'

    @pytest.mark.timeout(600)
    def test_spatial_subset(collection_concept_id, env, granule_json, collection_variables,
                            harmony_env, tmp_path: pathlib.Path, bearer_token):
        test_spatial_subset.__doc__ = f"Verify spatial subset for {collection_concept_id} in {env}"
    
        logging.info("Using granule %s for test", granule_json['meta']['concept-id'])
    
        # Compute a box that is smaller than the granule extent bounding box
        north, south, east, west = get_bounding_box(granule_json)
        east, west, north, south = create_smaller_bounding_box(east, west, north, south, .95)
    
        start_time = granule_json['umm']["TemporalExtent"]["RangeDateTime"]["BeginningDateTime"]
        end_time = granule_json['umm']["TemporalExtent"]["RangeDateTime"]["EndingDateTime"]
    
        # Build harmony request
        harmony_client = harmony.Client(env=harmony_env, token=bearer_token)
        request_bbox = harmony.BBox(w=west, s=south, e=east, n=north)
        request_collection = harmony.Collection(id=collection_concept_id)
        harmony_request = harmony.Request(collection=request_collection, spatial=request_bbox,
                                          granule_id=[granule_json['meta']['concept-id']])
    
        logging.info("Sending harmony request %s", harmony_client.request_as_url(harmony_request))
    
        # Submit harmony request and download result
        job_id = harmony_client.submit(harmony_request)
        logging.info("Submitted harmony job %s", job_id)
        harmony_client.wait_for_processing(job_id, show_progress=True)
        subsetted_filepath = None
        for filename in [file_future.result()
                         for file_future
                         in harmony_client.download_all(job_id, directory=f'{tmp_path}', overwrite=True)]:
            logging.info(f'Downloaded: %s', filename)
            subsetted_filepath = pathlib.Path(filename)
    
        # Verify spatial subset worked
        subsetted_ds = xarray.open_dataset(subsetted_filepath, decode_times=False)
        group = None
        # Try to read group in file
        lat_var_name, lon_var_name = get_lat_lon_var_names(subsetted_ds, subsetted_filepath, collection_variables, collection_concept_id)
        lat_var_name = lat_var_name.split('/')[-1]
        lon_var_name = lon_var_name.split('/')[-1]
    
        with netCDF4.Dataset(subsetted_filepath) as f:
            group_list = []
            def group_walk(groups, nc_d, current_group):
                global subsetted_ds_new
                subsetted_ds_new = None
                # check if the top group has lat or lon variable
                if lat_var_name in list(nc_d.variables.keys()):
                    subsetted_ds_new = subsetted_ds
                else:
                    # if not then we'll need to keep track of the group layers
                    group_list.append(current_group)
    
                # loop through the groups in the current layer
                for g in groups:
                    # end the loop if we've already found latitude
                    if subsetted_ds_new:
                        break
                    # check if the groups have latitude, define the dataset and end the loop if found
                    if lat_var_name in list(nc_d.groups[g].variables.keys()):
                        group_list.append(g)
                        lat_group = '/'.join(group_list)
                        subsetted_ds_new = xarray.open_dataset(subsetted_filepath, group=lat_group, decode_times=False)
                        # add a science variable to the dataset if other groups are in the lat/lon group
                        # some GPM collections won't have any other variables in the same group as lat/lon
                        if len(list(nc_d.groups[g].groups.keys())) > 0:
                            data_group = [v for v in list(nc_d.groups[g].groups.keys()) if 'time' not in str(v).lower()][0]
                            g_data = lat_group+'/'+data_group
                            subsetted_ds_data = xarray.open_dataset(subsetted_filepath, group=g_data, decode_times=False)
                            sci_var = list(subsetted_ds_data.variables.keys())[0]
                            subsetted_ds_new['science_test'] = subsetted_ds_data[sci_var]
                        break
                    # recall the function on a group that has groups in it and didn't find latitude
                    # this is going 'deeper' into the groups
                    if len(list(nc_d.groups[g].groups.keys())) > 0:
                        group_walk(nc_d.groups[g].groups, nc_d.groups[g], g)
                    else:
                        continue
    
>           group_walk(f.groups, f, '')

verify_collection.py:448: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

groups = {'METADATA': <[AttributeError('NetCDF: Not a valid ID') raised in repr()] Group object at 0x7f2b8b0f5140>, 'PRODUCT': <[RuntimeError('NetCDF: Not a valid ID') raised in repr()] Group object at 0x7f2b8b0f4240>}
nc_d = <[AttributeError('NetCDF: Not a valid ID') raised in repr()] Dataset object at 0x7f2b8b0f4340>
current_group = ''

    def group_walk(groups, nc_d, current_group):
        global subsetted_ds_new
        subsetted_ds_new = None
        # check if the top group has lat or lon variable
        if lat_var_name in list(nc_d.variables.keys()):
            subsetted_ds_new = subsetted_ds
        else:
            # if not then we'll need to keep track of the group layers
            group_list.append(current_group)
    
        # loop through the groups in the current layer
        for g in groups:
            # end the loop if we've already found latitude
            if subsetted_ds_new:
                break
            # check if the groups have latitude, define the dataset and end the loop if found
            if lat_var_name in list(nc_d.groups[g].variables.keys()):
                group_list.append(g)
                lat_group = '/'.join(group_list)
                subsetted_ds_new = xarray.open_dataset(subsetted_filepath, group=lat_group, decode_times=False)
                # add a science variable to the dataset if other groups are in the lat/lon group
                # some GPM collections won't have any other variables in the same group as lat/lon
                if len(list(nc_d.groups[g].groups.keys())) > 0:
                    data_group = [v for v in list(nc_d.groups[g].groups.keys()) if 'time' not in str(v).lower()][0]
                    g_data = lat_group+'/'+data_group
                    subsetted_ds_data = xarray.open_dataset(subsetted_filepath, group=g_data, decode_times=False)
>                   sci_var = list(subsetted_ds_data.variables.keys())[0]
E                   IndexError: list index out of range

verify_collection.py:438: IndexError
--------------------------------- Captured Log ---------------------------------
INFO     root:verify_collection.py:373 Using granule G1898261144-GES_DISC for test
INFO     root:verify_collection.py:389 Sending harmony request https://harmony.earthdata.nasa.gov/C1627516292-GES_DISC/ogc-api-coverages/1.0.0/collections/all/coverage/rangeset?forceAsync=true&subset=lat%28-78.0453%3A-60.6907%29&subset=lon%28-164.82465%3A-84.66935000000001%29&granuleId=G1898261144-GES_DISC
INFO     root:verify_collection.py:393 Submitted harmony job 666d842a-1f08-4487-a2cb-886e5b2c0a82
INFO     root:verify_collection.py:399 Downloaded: /tmp/pytest-of-runner/pytest-0/popen-gw3/test_spatial_subset_C1627516290/78002432_S5P_OFFL_L2_HCHO_20200712T224601_20200713T002730_14238_01_010108_20200715T122623_subsetted.nc4

Check warning on line 0 in tests.verify_collection

github-actions / Regression test results for ops

test_spatial_subset[C2936721448-POCLOUD] (tests.verify_collection) failed

test-results/ops_test_report.xml [took 31s]

Raw output


            Failed: Unable to find latitude and longitude variables.
collection_concept_id = 'C2936721448-POCLOUD', env = 'ops'
granule_json = {'meta': {'collection-concept-id': 'C2936721448-POCLOUD', 'concept-id': 'G3062447313-POCLOUD', 'concept-type': 'granul...pecification': {'Name': 'UMM-G', 'URL': 'https://cdn.earthdata.nasa.gov/umm/granule/v1.6.6', 'Version': '1.6.6'}, ...}}
collection_variables = [{'associations': {'collections': [{'concept-id': 'C2936721448-POCLOUD'}]}, 'meta': {'association-details': {'collecti...rization_2', 'Size': 2, 'Type': 'OTHER'}], 'FillValues': [{'Type': 'SCIENCE_FILLVALUE', 'Value': -9999.0}], ...}}, ...]
harmony_env = <Environment.PROD: 4>
tmp_path = PosixPath('/tmp/pytest-of-runner/pytest-0/popen-gw6/test_spatial_subset_C2936721440')
bearer_token = 'eyJ0eXAiOiJKV1QiLCJvcmlnaW4iOiJFYXJ0aGRhdGEgTG9naW4iLCJzaWciOiJlZGxqd3RwdWJrZXlfb3BzIiwiYWxnIjoiUlMyNTYifQ.eyJ0eXBlIj...Hcf0QWqtHsCuvOtj5tczYDaCn691RlCxRjaMlZBPYm2O9z5cTN31ynn1hy4h8lXYRR_I6DfCAdmdtrIdlLaMNL-ZbKOjYgx5kEqU8ClqAQnFPDVYJL29Hw'

    @pytest.mark.timeout(600)
    def test_spatial_subset(collection_concept_id, env, granule_json, collection_variables,
                            harmony_env, tmp_path: pathlib.Path, bearer_token):
        test_spatial_subset.__doc__ = f"Verify spatial subset for {collection_concept_id} in {env}"
    
        logging.info("Using granule %s for test", granule_json['meta']['concept-id'])
    
        # Compute a box that is smaller than the granule extent bounding box
        north, south, east, west = get_bounding_box(granule_json)
        east, west, north, south = create_smaller_bounding_box(east, west, north, south, .95)
    
        start_time = granule_json['umm']["TemporalExtent"]["RangeDateTime"]["BeginningDateTime"]
        end_time = granule_json['umm']["TemporalExtent"]["RangeDateTime"]["EndingDateTime"]
    
        # Build harmony request
        harmony_client = harmony.Client(env=harmony_env, token=bearer_token)
        request_bbox = harmony.BBox(w=west, s=south, e=east, n=north)
        request_collection = harmony.Collection(id=collection_concept_id)
        harmony_request = harmony.Request(collection=request_collection, spatial=request_bbox,
                                          granule_id=[granule_json['meta']['concept-id']])
    
        logging.info("Sending harmony request %s", harmony_client.request_as_url(harmony_request))
    
        # Submit harmony request and download result
        job_id = harmony_client.submit(harmony_request)
        logging.info("Submitted harmony job %s", job_id)
        harmony_client.wait_for_processing(job_id, show_progress=True)
        subsetted_filepath = None
        for filename in [file_future.result()
                         for file_future
                         in harmony_client.download_all(job_id, directory=f'{tmp_path}', overwrite=True)]:
            logging.info(f'Downloaded: %s', filename)
            subsetted_filepath = pathlib.Path(filename)
    
        # Verify spatial subset worked
        subsetted_ds = xarray.open_dataset(subsetted_filepath, decode_times=False)
        group = None
        # Try to read group in file
>       lat_var_name, lon_var_name = get_lat_lon_var_names(subsetted_ds, subsetted_filepath, collection_variables, collection_concept_id)

verify_collection.py:406: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

dataset = <xarray.Dataset> Size: 240B
Dimensions:                 (ydim_grid: 1, xdim_grid: 1, look: 1,
                        ...                                 -0.43
    history_json:                                           [{"date_time": "2...
file_to_subset = PosixPath('/tmp/pytest-of-runner/pytest-0/popen-gw6/test_spatial_subset_C2936721440/78002448_RSS_SMAP_SSS_L2C_r47700_20240106T014035_2024006_FNL_V05.3.nc4')
collection_variable_list = [{'associations': {'collections': [{'concept-id': 'C2936721448-POCLOUD'}]}, 'meta': {'association-details': {'collecti...rization_2', 'Size': 2, 'Type': 'OTHER'}], 'FillValues': [{'Type': 'SCIENCE_FILLVALUE', 'Value': -9999.0}], ...}}, ...]
collection_concept_id = 'C2936721448-POCLOUD'

    def get_lat_lon_var_names(dataset: xarray.Dataset, file_to_subset: str, collection_variable_list: List[Dict], collection_concept_id: str):
        # Try getting it from UMM-Var first
        lat_var_json, lon_var_json, _ = get_coordinate_vars_from_umm(collection_variable_list)
        lat_var_name = get_variable_name_from_umm_json(lat_var_json)
        lon_var_name = get_variable_name_from_umm_json(lon_var_json)
    
        if lat_var_name and lon_var_name:
            return lat_var_name, lon_var_name
    
        logging.warning("Unable to find lat/lon vars in UMM-Var")
    
        # If that doesn't work, try using cf-xarray to infer lat/lon variable names
        try:
            latitude = [lat for lat in dataset.cf.coordinates['latitude']
                             if lat.lower() in VALID_LATITUDE_VARIABLE_NAMES][0]
            longitude = [lon for lon in dataset.cf.coordinates['longitude']
                             if lon.lower() in VALID_LONGITUDE_VARIABLE_NAMES][0]
            return latitude, longitude
        except:
            logging.warning("Unable to find lat/lon vars using cf_xarray")
    
        # If that still doesn't work, try using l2ss-py directly
        try:
            # file not able to be flattened unless locally downloaded
            filename = f'my_copy_file_{collection_concept_id}.nc'
            shutil.copy(file_to_subset, filename)
            nc_dataset = netCDF4.Dataset(filename, mode='r+')
            # flatten the dataset
            nc_dataset_flattened = podaac.subsetter.group_handling.transform_grouped_dataset(nc_dataset, filename)
    
            args = {
                    'decode_coords': False,
                    'mask_and_scale': False,
                    'decode_times': False
                    }
    
            with xarray.open_dataset(
                xarray.backends.NetCDF4DataStore(nc_dataset_flattened),
                **args
                ) as flat_dataset:
                    # use l2ss-py to find lat and lon names
                    lat_var_names, lon_var_names = podaac.subsetter.subset.compute_coordinate_variable_names(flat_dataset)
    
            os.remove(filename)
            if lat_var_names and lon_var_names:
                lat_var_name = lat_var_names.split('__')[-1] if isinstance(lat_var_names, str) else lat_var_names[0].split('__')[-1]
                lon_var_name = lon_var_names.split('__')[-1] if isinstance(lon_var_names, str) else lon_var_names[0].split('__')[-1]
                return lat_var_name, lon_var_name
    
        except ValueError:
            logging.warning("Unable to find lat/lon vars using l2ss-py")
    
        # Still no dice, try using the 'units' variable attribute
        for coord_name, coord in dataset.coords.items():
            if 'units' not in coord.attrs:
                continue
            if coord.attrs['units'] == 'degrees_north' and lat_var_name is None:
                lat_var_name = coord_name
            if coord.attrs['units'] == 'degrees_east' and lon_var_name is None:
                lon_var_name = coord_name
        if lat_var_name and lon_var_name:
            return lat_var_name, lon_var_name
        else:
            logging.warning("Unable to find lat/lon vars using 'units' attribute")
    
        # Out of options, fail the test because we couldn't determine lat/lon variables
>       pytest.fail(f"Unable to find latitude and longitude variables.")
E       Failed: Unable to find latitude and longitude variables.

verify_collection.py:359: Failed
--------------------------------- Captured Log ---------------------------------
INFO     root:verify_collection.py:373 Using granule G3062447313-POCLOUD for test
INFO     root:verify_collection.py:389 Sending harmony request https://harmony.earthdata.nasa.gov/C2936721448-POCLOUD/ogc-api-coverages/1.0.0/collections/all/coverage/rangeset?forceAsync=true&subset=lat%28-82.29044999999999%3A82.08044999999998%29&subset=lon%284.51755%3A175.50045%29&granuleId=G3062447313-POCLOUD
INFO     root:verify_collection.py:393 Submitted harmony job 67a85aa2-93a2-47aa-9393-7a19d13cbc06
INFO     root:verify_collection.py:399 Downloaded: /tmp/pytest-of-runner/pytest-0/popen-gw6/test_spatial_subset_C2936721440/78002448_RSS_SMAP_SSS_L2C_r47700_20240106T014035_2024006_FNL_V05.3.nc4
WARNING  root:verify_collection.py:302 Unable to find lat/lon vars in UMM-Var
WARNING  root:verify_collection.py:312 Unable to find lat/lon vars using cf_xarray
WARNING  root:verify_collection.py:343 Unable to find lat/lon vars using l2ss-py
WARNING  root:verify_collection.py:356 Unable to find lat/lon vars using 'units' attribute

Check warning on line 0 in tests.verify_collection

github-actions / Regression test results for ops

test_spatial_subset[C1918209669-GES_DISC] (tests.verify_collection) failed

test-results/ops_test_report.xml [took 1m 18s]

Raw output


            IndexError: list index out of range
collection_concept_id = 'C1918209669-GES_DISC', env = 'ops'
granule_json = {'meta': {'collection-concept-id': 'C1918209669-GES_DISC', 'concept-id': 'G3248278202-GES_DISC', 'concept-type': 'gran...pecification': {'Name': 'UMM-G', 'URL': 'https://cdn.earthdata.nasa.gov/umm/granule/v1.6.6', 'Version': '1.6.6'}, ...}}
collection_variables = [{'associations': {'collections': [{'concept-id': 'C1918209669-GES_DISC'}]}, 'meta': {'association-details': {'collect...tracted from _FillValue metadata attribute', 'Type': 'SCIENCE_FILLVALUE', 'Value': 9.969209968386869e+36}], ...}}, ...]
harmony_env = <Environment.PROD: 4>
tmp_path = PosixPath('/tmp/pytest-of-runner/pytest-0/popen-gw8/test_spatial_subset_C1918209660')
bearer_token = 'eyJ0eXAiOiJKV1QiLCJvcmlnaW4iOiJFYXJ0aGRhdGEgTG9naW4iLCJzaWciOiJlZGxqd3RwdWJrZXlfb3BzIiwiYWxnIjoiUlMyNTYifQ.eyJ0eXBlIj...Hcf0QWqtHsCuvOtj5tczYDaCn691RlCxRjaMlZBPYm2O9z5cTN31ynn1hy4h8lXYRR_I6DfCAdmdtrIdlLaMNL-ZbKOjYgx5kEqU8ClqAQnFPDVYJL29Hw'

    @pytest.mark.timeout(600)
    def test_spatial_subset(collection_concept_id, env, granule_json, collection_variables,
                            harmony_env, tmp_path: pathlib.Path, bearer_token):
        test_spatial_subset.__doc__ = f"Verify spatial subset for {collection_concept_id} in {env}"
    
        logging.info("Using granule %s for test", granule_json['meta']['concept-id'])
    
        # Compute a box that is smaller than the granule extent bounding box
        north, south, east, west = get_bounding_box(granule_json)
        east, west, north, south = create_smaller_bounding_box(east, west, north, south, .95)
    
        start_time = granule_json['umm']["TemporalExtent"]["RangeDateTime"]["BeginningDateTime"]
        end_time = granule_json['umm']["TemporalExtent"]["RangeDateTime"]["EndingDateTime"]
    
        # Build harmony request
        harmony_client = harmony.Client(env=harmony_env, token=bearer_token)
        request_bbox = harmony.BBox(w=west, s=south, e=east, n=north)
        request_collection = harmony.Collection(id=collection_concept_id)
        harmony_request = harmony.Request(collection=request_collection, spatial=request_bbox,
                                          granule_id=[granule_json['meta']['concept-id']])
    
        logging.info("Sending harmony request %s", harmony_client.request_as_url(harmony_request))
    
        # Submit harmony request and download result
        job_id = harmony_client.submit(harmony_request)
        logging.info("Submitted harmony job %s", job_id)
        harmony_client.wait_for_processing(job_id, show_progress=True)
        subsetted_filepath = None
        for filename in [file_future.result()
                         for file_future
                         in harmony_client.download_all(job_id, directory=f'{tmp_path}', overwrite=True)]:
            logging.info(f'Downloaded: %s', filename)
            subsetted_filepath = pathlib.Path(filename)
    
        # Verify spatial subset worked
        subsetted_ds = xarray.open_dataset(subsetted_filepath, decode_times=False)
        group = None
        # Try to read group in file
        lat_var_name, lon_var_name = get_lat_lon_var_names(subsetted_ds, subsetted_filepath, collection_variables, collection_concept_id)
        lat_var_name = lat_var_name.split('/')[-1]
        lon_var_name = lon_var_name.split('/')[-1]
    
        with netCDF4.Dataset(subsetted_filepath) as f:
            group_list = []
            def group_walk(groups, nc_d, current_group):
                global subsetted_ds_new
                subsetted_ds_new = None
                # check if the top group has lat or lon variable
                if lat_var_name in list(nc_d.variables.keys()):
                    subsetted_ds_new = subsetted_ds
                else:
                    # if not then we'll need to keep track of the group layers
                    group_list.append(current_group)
    
                # loop through the groups in the current layer
                for g in groups:
                    # end the loop if we've already found latitude
                    if subsetted_ds_new:
                        break
                    # check if the groups have latitude, define the dataset and end the loop if found
                    if lat_var_name in list(nc_d.groups[g].variables.keys()):
                        group_list.append(g)
                        lat_group = '/'.join(group_list)
                        subsetted_ds_new = xarray.open_dataset(subsetted_filepath, group=lat_group, decode_times=False)
                        # add a science variable to the dataset if other groups are in the lat/lon group
                        # some GPM collections won't have any other variables in the same group as lat/lon
                        if len(list(nc_d.groups[g].groups.keys())) > 0:
                            data_group = [v for v in list(nc_d.groups[g].groups.keys()) if 'time' not in str(v).lower()][0]
                            g_data = lat_group+'/'+data_group
                            subsetted_ds_data = xarray.open_dataset(subsetted_filepath, group=g_data, decode_times=False)
                            sci_var = list(subsetted_ds_data.variables.keys())[0]
                            subsetted_ds_new['science_test'] = subsetted_ds_data[sci_var]
                        break
                    # recall the function on a group that has groups in it and didn't find latitude
                    # this is going 'deeper' into the groups
                    if len(list(nc_d.groups[g].groups.keys())) > 0:
                        group_walk(nc_d.groups[g].groups, nc_d.groups[g], g)
                    else:
                        continue
    
>           group_walk(f.groups, f, '')

verify_collection.py:448: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

groups = {'METADATA': <[AttributeError('NetCDF: Not a valid ID') raised in repr()] Group object at 0x7fa22912c440>, 'PRODUCT': <[RuntimeError('NetCDF: Not a valid ID') raised in repr()] Group object at 0x7fa22912cb40>}
nc_d = <[AttributeError('NetCDF: Not a valid ID') raised in repr()] Dataset object at 0x7fa22912cc40>
current_group = ''

    def group_walk(groups, nc_d, current_group):
        global subsetted_ds_new
        subsetted_ds_new = None
        # check if the top group has lat or lon variable
        if lat_var_name in list(nc_d.variables.keys()):
            subsetted_ds_new = subsetted_ds
        else:
            # if not then we'll need to keep track of the group layers
            group_list.append(current_group)
    
        # loop through the groups in the current layer
        for g in groups:
            # end the loop if we've already found latitude
            if subsetted_ds_new:
                break
            # check if the groups have latitude, define the dataset and end the loop if found
            if lat_var_name in list(nc_d.groups[g].variables.keys()):
                group_list.append(g)
                lat_group = '/'.join(group_list)
                subsetted_ds_new = xarray.open_dataset(subsetted_filepath, group=lat_group, decode_times=False)
                # add a science variable to the dataset if other groups are in the lat/lon group
                # some GPM collections won't have any other variables in the same group as lat/lon
                if len(list(nc_d.groups[g].groups.keys())) > 0:
                    data_group = [v for v in list(nc_d.groups[g].groups.keys()) if 'time' not in str(v).lower()][0]
                    g_data = lat_group+'/'+data_group
                    subsetted_ds_data = xarray.open_dataset(subsetted_filepath, group=g_data, decode_times=False)
>                   sci_var = list(subsetted_ds_data.variables.keys())[0]
E                   IndexError: list index out of range

verify_collection.py:438: IndexError
--------------------------------- Captured Log ---------------------------------
INFO     root:verify_collection.py:373 Using granule G3248278202-GES_DISC for test
INFO     root:verify_collection.py:389 Sending harmony request https://harmony.earthdata.nasa.gov/C1918209669-GES_DISC/ogc-api-coverages/1.0.0/collections/all/coverage/rangeset?forceAsync=true&subset=lat%28-85.029225%3A-66.379775%29&subset=lon%28-150.570875%3A109.885875%29&granuleId=G3248278202-GES_DISC
INFO     root:verify_collection.py:393 Submitted harmony job 55205d9f-b972-4328-80bb-90906d93c26b
INFO     root:verify_collection.py:399 Downloaded: /tmp/pytest-of-runner/pytest-0/popen-gw8/test_spatial_subset_C1918209660/78002467_S5P_OFFL_L2_CLOUD_20240923T130854_20240923T145023_35996_03_020601_20240925T043638_subsetted.nc4

Check warning on line 0 in tests.verify_collection

github-actions / Regression test results for ops

test_spatial_subset[C2799465503-POCLOUD] (tests.verify_collection) failed

test-results/ops_test_report.xml [took 10m 0s]

Raw output


            Failed: Timeout >600.0s
collection_concept_id = 'C2799465503-POCLOUD', env = 'ops'
granule_json = {'meta': {'collection-concept-id': 'C2799465503-POCLOUD', 'concept-id': 'G3247158046-POCLOUD', 'concept-type': 'granul...920T234923_PIC0_01.nc', 'SWOT_L2_LR_PreCalSSH_WindWave_021_293_20240920T225756_20240920T234924_PIC0_01.nc', ...], ...}}
collection_variables = [{'associations': {'collections': [{'concept-id': 'C2799465503-POCLOUD'}]}, 'meta': {'association-details': {'collecti...ixels', 'Size': 240, 'Type': 'OTHER'}], 'FillValues': [{'Type': 'SCIENCE_FILLVALUE', 'Value': 2147483647}], ...}}, ...]
harmony_env = <Environment.PROD: 4>
tmp_path = PosixPath('/tmp/pytest-of-runner/pytest-0/popen-gw1/test_spatial_subset_C2799465500')
bearer_token = 'eyJ0eXAiOiJKV1QiLCJvcmlnaW4iOiJFYXJ0aGRhdGEgTG9naW4iLCJzaWciOiJlZGxqd3RwdWJrZXlfb3BzIiwiYWxnIjoiUlMyNTYifQ.eyJ0eXBlIj...Hcf0QWqtHsCuvOtj5tczYDaCn691RlCxRjaMlZBPYm2O9z5cTN31ynn1hy4h8lXYRR_I6DfCAdmdtrIdlLaMNL-ZbKOjYgx5kEqU8ClqAQnFPDVYJL29Hw'

    @pytest.mark.timeout(600)
    def test_spatial_subset(collection_concept_id, env, granule_json, collection_variables,
                            harmony_env, tmp_path: pathlib.Path, bearer_token):
        test_spatial_subset.__doc__ = f"Verify spatial subset for {collection_concept_id} in {env}"
    
        logging.info("Using granule %s for test", granule_json['meta']['concept-id'])
    
        # Compute a box that is smaller than the granule extent bounding box
        north, south, east, west = get_bounding_box(granule_json)
        east, west, north, south = create_smaller_bounding_box(east, west, north, south, .95)
    
        start_time = granule_json['umm']["TemporalExtent"]["RangeDateTime"]["BeginningDateTime"]
        end_time = granule_json['umm']["TemporalExtent"]["RangeDateTime"]["EndingDateTime"]
    
        # Build harmony request
        harmony_client = harmony.Client(env=harmony_env, token=bearer_token)
        request_bbox = harmony.BBox(w=west, s=south, e=east, n=north)
        request_collection = harmony.Collection(id=collection_concept_id)
        harmony_request = harmony.Request(collection=request_collection, spatial=request_bbox,
                                          granule_id=[granule_json['meta']['concept-id']])
    
        logging.info("Sending harmony request %s", harmony_client.request_as_url(harmony_request))
    
        # Submit harmony request and download result
        job_id = harmony_client.submit(harmony_request)
        logging.info("Submitted harmony job %s", job_id)
>       harmony_client.wait_for_processing(job_id, show_progress=True)

verify_collection.py:394: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = <harmony.harmony.Client object at 0x7fe3c965ac50>
job_id = '5a90d53e-74c1-4413-b446-cbf9de42b27a', show_progress = True

    def wait_for_processing(self, job_id: str, show_progress: bool = False) -> None:
        """Retrieve a submitted job's completion status in percent.
    
        Args:
            job_id: UUID string for the job you wish to interrogate.
    
        Returns:
            The job's processing progress as a percentage.
    
        :raises
            Exception: This can happen if an invalid job_id is provided or Harmony services
            can't be reached.
        """
        # How often to refresh the screen for progress updates and animating spinners.
        ui_update_interval = 0.33  # in seconds
        running_w_errors_logged = False
    
        intervals = round(self.check_interval / ui_update_interval)
        if show_progress:
            with progressbar.ProgressBar(max_value=100, widgets=progressbar_widgets) as bar:
                progress = 0
                while progress < 100:
                    progress, status, message = self.progress(job_id)
                    if status == 'failed':
                        raise ProcessingFailedException(job_id, message)
                    if status == 'canceled':
                        print('Job has been canceled.')
                        break
                    if status == 'paused':
                        print('\nJob has been paused. Call `resume()` to resume.', file=sys.stderr)
                        break
                    if (not running_w_errors_logged and status == 'running_with_errors'):
                        print('\nJob is running with errors.', file=sys.stderr)
                        running_w_errors_logged = True
    
                    # This gets around an issue with progressbar. If we update() with 0, the
                    # output shows up as "N/A". If we update with, e.g. 0.1, it rounds down or
                    # truncates to 0 but, importantly, actually displays that.
                    if progress == 0:
                        progress = 0.1
    
                    for _ in range(intervals):
                        bar.update(progress)  # causes spinner to rotate even when no data change
                        sys.stdout.flush()  # ensures correct behavior in Jupyter notebooks
                        if progress >= 100:
                            break
                        else:
>                           time.sleep(ui_update_interval)
E                           Failed: Timeout >600.0s

../../../../.cache/pypoetry/virtualenvs/l2ss-py-autotest-iYz8Sff2-py3.10/lib/python3.10/site-packages/harmony/harmony.py:1009: Failed
--------------------------------- Captured Log ---------------------------------
INFO     root:verify_collection.py:373 Using granule G3247158046-POCLOUD for test
INFO     root:verify_collection.py:389 Sending harmony request https://harmony.earthdata.nasa.gov/C2799465503-POCLOUD/ogc-api-coverages/1.0.0/collections/all/coverage/rangeset?forceAsync=true&subset=lat%28-76.2280053778625%3A0.080195033362493%29&subset=lon%2898.7363388551%3A177.9163163809%29&granuleId=G3247158046-POCLOUD
INFO     root:verify_collection.py:393 Submitted harmony job 5a90d53e-74c1-4413-b446-cbf9de42b27a

Check warning on line 0 in tests.verify_collection

github-actions / Regression test results for ops

test_spatial_subset[C1627516300-GES_DISC] (tests.verify_collection) failed

test-results/ops_test_report.xml [took 47s]

Raw output


            IndexError: list index out of range
collection_concept_id = 'C1627516300-GES_DISC', env = 'ops'
granule_json = {'meta': {'collection-concept-id': 'C1627516300-GES_DISC', 'concept-id': 'G1902371249-GES_DISC', 'concept-type': 'gran...pecification': {'Name': 'UMM-G', 'URL': 'https://cdn.earthdata.nasa.gov/umm/granule/v1.6.6', 'Version': '1.6.6'}, ...}}
collection_variables = [{'associations': {'collections': [{'concept-id': 'C1627516300-GES_DISC'}]}, 'meta': {'association-details': {'collect...asa.gov/umm/variable/v1.9.0', 'Version': '1.9.0'}, 'Name': 'PRODUCT/ozone_total_vertical_column_precision', ...}}, ...]
harmony_env = <Environment.PROD: 4>
tmp_path = PosixPath('/tmp/pytest-of-runner/pytest-0/popen-gw5/test_spatial_subset_C1627516300')
bearer_token = 'eyJ0eXAiOiJKV1QiLCJvcmlnaW4iOiJFYXJ0aGRhdGEgTG9naW4iLCJzaWciOiJlZGxqd3RwdWJrZXlfb3BzIiwiYWxnIjoiUlMyNTYifQ.eyJ0eXBlIj...Hcf0QWqtHsCuvOtj5tczYDaCn691RlCxRjaMlZBPYm2O9z5cTN31ynn1hy4h8lXYRR_I6DfCAdmdtrIdlLaMNL-ZbKOjYgx5kEqU8ClqAQnFPDVYJL29Hw'

    @pytest.mark.timeout(600)
    def test_spatial_subset(collection_concept_id, env, granule_json, collection_variables,
                            harmony_env, tmp_path: pathlib.Path, bearer_token):
        test_spatial_subset.__doc__ = f"Verify spatial subset for {collection_concept_id} in {env}"
    
        logging.info("Using granule %s for test", granule_json['meta']['concept-id'])
    
        # Compute a box that is smaller than the granule extent bounding box
        north, south, east, west = get_bounding_box(granule_json)
        east, west, north, south = create_smaller_bounding_box(east, west, north, south, .95)
    
        start_time = granule_json['umm']["TemporalExtent"]["RangeDateTime"]["BeginningDateTime"]
        end_time = granule_json['umm']["TemporalExtent"]["RangeDateTime"]["EndingDateTime"]
    
        # Build harmony request
        harmony_client = harmony.Client(env=harmony_env, token=bearer_token)
        request_bbox = harmony.BBox(w=west, s=south, e=east, n=north)
        request_collection = harmony.Collection(id=collection_concept_id)
        harmony_request = harmony.Request(collection=request_collection, spatial=request_bbox,
                                          granule_id=[granule_json['meta']['concept-id']])
    
        logging.info("Sending harmony request %s", harmony_client.request_as_url(harmony_request))
    
        # Submit harmony request and download result
        job_id = harmony_client.submit(harmony_request)
        logging.info("Submitted harmony job %s", job_id)
        harmony_client.wait_for_processing(job_id, show_progress=True)
        subsetted_filepath = None
        for filename in [file_future.result()
                         for file_future
                         in harmony_client.download_all(job_id, directory=f'{tmp_path}', overwrite=True)]:
            logging.info(f'Downloaded: %s', filename)
            subsetted_filepath = pathlib.Path(filename)
    
        # Verify spatial subset worked
        subsetted_ds = xarray.open_dataset(subsetted_filepath, decode_times=False)
        group = None
        # Try to read group in file
        lat_var_name, lon_var_name = get_lat_lon_var_names(subsetted_ds, subsetted_filepath, collection_variables, collection_concept_id)
        lat_var_name = lat_var_name.split('/')[-1]
        lon_var_name = lon_var_name.split('/')[-1]
    
        with netCDF4.Dataset(subsetted_filepath) as f:
            group_list = []
            def group_walk(groups, nc_d, current_group):
                global subsetted_ds_new
                subsetted_ds_new = None
                # check if the top group has lat or lon variable
                if lat_var_name in list(nc_d.variables.keys()):
                    subsetted_ds_new = subsetted_ds
                else:
                    # if not then we'll need to keep track of the group layers
                    group_list.append(current_group)
    
                # loop through the groups in the current layer
                for g in groups:
                    # end the loop if we've already found latitude
                    if subsetted_ds_new:
                        break
                    # check if the groups have latitude, define the dataset and end the loop if found
                    if lat_var_name in list(nc_d.groups[g].variables.keys()):
                        group_list.append(g)
                        lat_group = '/'.join(group_list)
                        subsetted_ds_new = xarray.open_dataset(subsetted_filepath, group=lat_group, decode_times=False)
                        # add a science variable to the dataset if other groups are in the lat/lon group
                        # some GPM collections won't have any other variables in the same group as lat/lon
                        if len(list(nc_d.groups[g].groups.keys())) > 0:
                            data_group = [v for v in list(nc_d.groups[g].groups.keys()) if 'time' not in str(v).lower()][0]
                            g_data = lat_group+'/'+data_group
                            subsetted_ds_data = xarray.open_dataset(subsetted_filepath, group=g_data, decode_times=False)
                            sci_var = list(subsetted_ds_data.variables.keys())[0]
                            subsetted_ds_new['science_test'] = subsetted_ds_data[sci_var]
                        break
                    # recall the function on a group that has groups in it and didn't find latitude
                    # this is going 'deeper' into the groups
                    if len(list(nc_d.groups[g].groups.keys())) > 0:
                        group_walk(nc_d.groups[g].groups, nc_d.groups[g], g)
                    else:
                        continue
    
>           group_walk(f.groups, f, '')

verify_collection.py:448: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

groups = {'METADATA': <[AttributeError('NetCDF: Not a valid ID') raised in repr()] Group object at 0x7f0aef771340>, 'PRODUCT': <[RuntimeError('NetCDF: Not a valid ID') raised in repr()] Group object at 0x7f0aef771940>}
nc_d = <[AttributeError('NetCDF: Not a valid ID') raised in repr()] Dataset object at 0x7f0aef771640>
current_group = ''

    def group_walk(groups, nc_d, current_group):
        global subsetted_ds_new
        subsetted_ds_new = None
        # check if the top group has lat or lon variable
        if lat_var_name in list(nc_d.variables.keys()):
            subsetted_ds_new = subsetted_ds
        else:
            # if not then we'll need to keep track of the group layers
            group_list.append(current_group)
    
        # loop through the groups in the current layer
        for g in groups:
            # end the loop if we've already found latitude
            if subsetted_ds_new:
                break
            # check if the groups have latitude, define the dataset and end the loop if found
            if lat_var_name in list(nc_d.groups[g].variables.keys()):
                group_list.append(g)
                lat_group = '/'.join(group_list)
                subsetted_ds_new = xarray.open_dataset(subsetted_filepath, group=lat_group, decode_times=False)
                # add a science variable to the dataset if other groups are in the lat/lon group
                # some GPM collections won't have any other variables in the same group as lat/lon
                if len(list(nc_d.groups[g].groups.keys())) > 0:
                    data_group = [v for v in list(nc_d.groups[g].groups.keys()) if 'time' not in str(v).lower()][0]
                    g_data = lat_group+'/'+data_group
                    subsetted_ds_data = xarray.open_dataset(subsetted_filepath, group=g_data, decode_times=False)
>                   sci_var = list(subsetted_ds_data.variables.keys())[0]
E                   IndexError: list index out of range

verify_collection.py:438: IndexError
--------------------------------- Captured Log ---------------------------------
INFO     root:verify_collection.py:373 Using granule G1902371249-GES_DISC for test
INFO     root:verify_collection.py:389 Sending harmony request https://harmony.earthdata.nasa.gov/C1627516300-GES_DISC/ogc-api-coverages/1.0.0/collections/all/coverage/rangeset?forceAsync=true&subset=lat%28-78.0453%3A-60.6907%29&subset=lon%28-164.82465%3A-84.66935000000001%29&granuleId=G1902371249-GES_DISC
INFO     root:verify_collection.py:393 Submitted harmony job fd09b7f0-c321-4c2a-ad0a-630b85e0c03e
INFO     root:verify_collection.py:399 Downloaded: /tmp/pytest-of-runner/pytest-0/popen-gw5/test_spatial_subset_C1627516300/78002480_S5P_OFFL_L2_O3_20200712T224601_20200713T002730_14238_01_010108_20200715T122623_subsetted.nc4

Check warning on line 0 in tests.verify_collection

github-actions / Regression test results for ops

test_spatial_subset[C1627516285-GES_DISC] (tests.verify_collection) failed

test-results/ops_test_report.xml [took 45s]

Raw output


            OSError: [Errno group not found: PRODUCT] 'PRODUCT'
ds = <[AttributeError('NetCDF: Not a valid ID') raised in repr()] Group object at 0x7fd72a9c6640>
group = '/METADATA/PRODUCT', mode = 'r'
create_group = <function _netcdf4_create_group at 0x7fd72f508310>

    def _nc4_require_group(ds, group, mode, create_group=_netcdf4_create_group):
        if group in {None, "", "/"}:
            # use the root group
            return ds
        else:
            # make sure it's a string
            if not isinstance(group, str):
                raise ValueError("group must be a string or None")
            # support path-like syntax
            path = group.strip("/").split("/")
            for key in path:
                try:
>                   ds = ds.groups[key]
E                   KeyError: 'PRODUCT'

../../../../.cache/pypoetry/virtualenvs/l2ss-py-autotest-iYz8Sff2-py3.10/lib/python3.10/site-packages/xarray/backends/netCDF4_.py:190: KeyError

During handling of the above exception, another exception occurred:

collection_concept_id = 'C1627516285-GES_DISC', env = 'ops'
granule_json = {'meta': {'collection-concept-id': 'C1627516285-GES_DISC', 'concept-id': 'G2084435970-GES_DISC', 'concept-type': 'gran...pecification': {'Name': 'UMM-G', 'URL': 'https://cdn.earthdata.nasa.gov/umm/granule/v1.6.6', 'Version': '1.6.6'}, ...}}
collection_variables = [{'associations': {'collections': [{'concept-id': 'C1627516285-GES_DISC'}]}, 'meta': {'association-details': {'collect.../variable/v1.9.0', 'Version': '1.9.0'}, 'Name': 'METADATA/QA_STATISTICS/aerosol_index_354_388_histogram_bounds'}}, ...]
harmony_env = <Environment.PROD: 4>
tmp_path = PosixPath('/tmp/pytest-of-runner/pytest-0/popen-gw9/test_spatial_subset_C1627516280')
bearer_token = 'eyJ0eXAiOiJKV1QiLCJvcmlnaW4iOiJFYXJ0aGRhdGEgTG9naW4iLCJzaWciOiJlZGxqd3RwdWJrZXlfb3BzIiwiYWxnIjoiUlMyNTYifQ.eyJ0eXBlIj...Hcf0QWqtHsCuvOtj5tczYDaCn691RlCxRjaMlZBPYm2O9z5cTN31ynn1hy4h8lXYRR_I6DfCAdmdtrIdlLaMNL-ZbKOjYgx5kEqU8ClqAQnFPDVYJL29Hw'

    @pytest.mark.timeout(600)
    def test_spatial_subset(collection_concept_id, env, granule_json, collection_variables,
                            harmony_env, tmp_path: pathlib.Path, bearer_token):
        test_spatial_subset.__doc__ = f"Verify spatial subset for {collection_concept_id} in {env}"
    
        logging.info("Using granule %s for test", granule_json['meta']['concept-id'])
    
        # Compute a box that is smaller than the granule extent bounding box
        north, south, east, west = get_bounding_box(granule_json)
        east, west, north, south = create_smaller_bounding_box(east, west, north, south, .95)
    
        start_time = granule_json['umm']["TemporalExtent"]["RangeDateTime"]["BeginningDateTime"]
        end_time = granule_json['umm']["TemporalExtent"]["RangeDateTime"]["EndingDateTime"]
    
        # Build harmony request
        harmony_client = harmony.Client(env=harmony_env, token=bearer_token)
        request_bbox = harmony.BBox(w=west, s=south, e=east, n=north)
        request_collection = harmony.Collection(id=collection_concept_id)
        harmony_request = harmony.Request(collection=request_collection, spatial=request_bbox,
                                          granule_id=[granule_json['meta']['concept-id']])
    
        logging.info("Sending harmony request %s", harmony_client.request_as_url(harmony_request))
    
        # Submit harmony request and download result
        job_id = harmony_client.submit(harmony_request)
        logging.info("Submitted harmony job %s", job_id)
        harmony_client.wait_for_processing(job_id, show_progress=True)
        subsetted_filepath = None
        for filename in [file_future.result()
                         for file_future
                         in harmony_client.download_all(job_id, directory=f'{tmp_path}', overwrite=True)]:
            logging.info(f'Downloaded: %s', filename)
            subsetted_filepath = pathlib.Path(filename)
    
        # Verify spatial subset worked
        subsetted_ds = xarray.open_dataset(subsetted_filepath, decode_times=False)
        group = None
        # Try to read group in file
        lat_var_name, lon_var_name = get_lat_lon_var_names(subsetted_ds, subsetted_filepath, collection_variables, collection_concept_id)
        lat_var_name = lat_var_name.split('/')[-1]
        lon_var_name = lon_var_name.split('/')[-1]
    
        with netCDF4.Dataset(subsetted_filepath) as f:
            group_list = []
            def group_walk(groups, nc_d, current_group):
                global subsetted_ds_new
                subsetted_ds_new = None
                # check if the top group has lat or lon variable
                if lat_var_name in list(nc_d.variables.keys()):
                    subsetted_ds_new = subsetted_ds
                else:
                    # if not then we'll need to keep track of the group layers
                    group_list.append(current_group)
    
                # loop through the groups in the current layer
                for g in groups:
                    # end the loop if we've already found latitude
                    if subsetted_ds_new:
                        break
                    # check if the groups have latitude, define the dataset and end the loop if found
                    if lat_var_name in list(nc_d.groups[g].variables.keys()):
                        group_list.append(g)
                        lat_group = '/'.join(group_list)
                        subsetted_ds_new = xarray.open_dataset(subsetted_filepath, group=lat_group, decode_times=False)
                        # add a science variable to the dataset if other groups are in the lat/lon group
                        # some GPM collections won't have any other variables in the same group as lat/lon
                        if len(list(nc_d.groups[g].groups.keys())) > 0:
                            data_group = [v for v in list(nc_d.groups[g].groups.keys()) if 'time' not in str(v).lower()][0]
                            g_data = lat_group+'/'+data_group
                            subsetted_ds_data = xarray.open_dataset(subsetted_filepath, group=g_data, decode_times=False)
                            sci_var = list(subsetted_ds_data.variables.keys())[0]
                            subsetted_ds_new['science_test'] = subsetted_ds_data[sci_var]
                        break
                    # recall the function on a group that has groups in it and didn't find latitude
                    # this is going 'deeper' into the groups
                    if len(list(nc_d.groups[g].groups.keys())) > 0:
                        group_walk(nc_d.groups[g].groups, nc_d.groups[g], g)
                    else:
                        continue
    
>           group_walk(f.groups, f, '')

verify_collection.py:448: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
verify_collection.py:431: in group_walk
    subsetted_ds_new = xarray.open_dataset(subsetted_filepath, group=lat_group, decode_times=False)
../../../../.cache/pypoetry/virtualenvs/l2ss-py-autotest-iYz8Sff2-py3.10/lib/python3.10/site-packages/xarray/backends/api.py:571: in open_dataset
    backend_ds = backend.open_dataset(
../../../../.cache/pypoetry/virtualenvs/l2ss-py-autotest-iYz8Sff2-py3.10/lib/python3.10/site-packages/xarray/backends/netCDF4_.py:646: in open_dataset
    store = NetCDF4DataStore.open(
../../../../.cache/pypoetry/virtualenvs/l2ss-py-autotest-iYz8Sff2-py3.10/lib/python3.10/site-packages/xarray/backends/netCDF4_.py:409: in open
    return cls(manager, group=group, mode=mode, lock=lock, autoclose=autoclose)
../../../../.cache/pypoetry/virtualenvs/l2ss-py-autotest-iYz8Sff2-py3.10/lib/python3.10/site-packages/xarray/backends/netCDF4_.py:356: in __init__
    self.format = self.ds.data_model
../../../../.cache/pypoetry/virtualenvs/l2ss-py-autotest-iYz8Sff2-py3.10/lib/python3.10/site-packages/xarray/backends/netCDF4_.py:418: in ds
    return self._acquire()
../../../../.cache/pypoetry/virtualenvs/l2ss-py-autotest-iYz8Sff2-py3.10/lib/python3.10/site-packages/xarray/backends/netCDF4_.py:413: in _acquire
    ds = _nc4_require_group(root, self._group, self._mode)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

ds = <[AttributeError('NetCDF: Not a valid ID') raised in repr()] Group object at 0x7fd72a9c6640>
group = '/METADATA/PRODUCT', mode = 'r'
create_group = <function _netcdf4_create_group at 0x7fd72f508310>

    def _nc4_require_group(ds, group, mode, create_group=_netcdf4_create_group):
        if group in {None, "", "/"}:
            # use the root group
            return ds
        else:
            # make sure it's a string
            if not isinstance(group, str):
                raise ValueError("group must be a string or None")
            # support path-like syntax
            path = group.strip("/").split("/")
            for key in path:
                try:
                    ds = ds.groups[key]
                except KeyError as e:
                    if mode != "r":
                        ds = create_group(ds, key)
                    else:
                        # wrap error to provide slightly more helpful message
>                       raise OSError(f"group not found: {key}", e)
E                       OSError: [Errno group not found: PRODUCT] 'PRODUCT'

../../../../.cache/pypoetry/virtualenvs/l2ss-py-autotest-iYz8Sff2-py3.10/lib/python3.10/site-packages/xarray/backends/netCDF4_.py:196: OSError
--------------------------------- Captured Log ---------------------------------
INFO     root:verify_collection.py:373 Using granule G2084435970-GES_DISC for test
INFO     root:verify_collection.py:389 Sending harmony request https://harmony.earthdata.nasa.gov/C1627516285-GES_DISC/ogc-api-coverages/1.0.0/collections/all/coverage/rangeset?forceAsync=true&subset=lat%28-76.82889999999999%3A-59.7251%29&subset=lon%28-77.22455%3A-1.6634499999999974%29&granuleId=G2084435970-GES_DISC
INFO     root:verify_collection.py:393 Submitted harmony job 19d2a915-7fe6-4702-b9f3-8b4d39735d97
INFO     root:verify_collection.py:399 Downloaded: /tmp/pytest-of-runner/pytest-0/popen-gw9/test_spatial_subset_C1627516280/78002484_S5P_OFFL_L2_AER_AI_20210701T170324_20210701T184453_19257_01_010400_20210703T065109_subsetted.nc4

Check warning on line 0 in tests.verify_collection

github-actions / Regression test results for ops

test_spatial_subset[C1627516296-GES_DISC] (tests.verify_collection) failed

test-results/ops_test_report.xml [took 1m 5s]

Raw output


            IndexError: list index out of range
collection_concept_id = 'C1627516296-GES_DISC', env = 'ops'
granule_json = {'meta': {'collection-concept-id': 'C1627516296-GES_DISC', 'concept-id': 'G1902371245-GES_DISC', 'concept-type': 'gran...pecification': {'Name': 'UMM-G', 'URL': 'https://cdn.earthdata.nasa.gov/umm/granule/v1.6.6', 'Version': '1.6.6'}, ...}}
collection_variables = [{'associations': {'collections': [{'concept-id': 'C1627516296-GES_DISC'}]}, 'meta': {'association-details': {'collect...umm/variable/v1.9.0', 'Version': '1.9.0'}, 'Name': 'PRODUCT/sulfurdioxide_total_vertical_column_precision', ...}}, ...]
harmony_env = <Environment.PROD: 4>
tmp_path = PosixPath('/tmp/pytest-of-runner/pytest-0/popen-gw4/test_spatial_subset_C1627516290')
bearer_token = 'eyJ0eXAiOiJKV1QiLCJvcmlnaW4iOiJFYXJ0aGRhdGEgTG9naW4iLCJzaWciOiJlZGxqd3RwdWJrZXlfb3BzIiwiYWxnIjoiUlMyNTYifQ.eyJ0eXBlIj...Hcf0QWqtHsCuvOtj5tczYDaCn691RlCxRjaMlZBPYm2O9z5cTN31ynn1hy4h8lXYRR_I6DfCAdmdtrIdlLaMNL-ZbKOjYgx5kEqU8ClqAQnFPDVYJL29Hw'

    @pytest.mark.timeout(600)
    def test_spatial_subset(collection_concept_id, env, granule_json, collection_variables,
                            harmony_env, tmp_path: pathlib.Path, bearer_token):
        test_spatial_subset.__doc__ = f"Verify spatial subset for {collection_concept_id} in {env}"
    
        logging.info("Using granule %s for test", granule_json['meta']['concept-id'])
    
        # Compute a box that is smaller than the granule extent bounding box
        north, south, east, west = get_bounding_box(granule_json)
        east, west, north, south = create_smaller_bounding_box(east, west, north, south, .95)
    
        start_time = granule_json['umm']["TemporalExtent"]["RangeDateTime"]["BeginningDateTime"]
        end_time = granule_json['umm']["TemporalExtent"]["RangeDateTime"]["EndingDateTime"]
    
        # Build harmony request
        harmony_client = harmony.Client(env=harmony_env, token=bearer_token)
        request_bbox = harmony.BBox(w=west, s=south, e=east, n=north)
        request_collection = harmony.Collection(id=collection_concept_id)
        harmony_request = harmony.Request(collection=request_collection, spatial=request_bbox,
                                          granule_id=[granule_json['meta']['concept-id']])
    
        logging.info("Sending harmony request %s", harmony_client.request_as_url(harmony_request))
    
        # Submit harmony request and download result
        job_id = harmony_client.submit(harmony_request)
        logging.info("Submitted harmony job %s", job_id)
        harmony_client.wait_for_processing(job_id, show_progress=True)
        subsetted_filepath = None
        for filename in [file_future.result()
                         for file_future
                         in harmony_client.download_all(job_id, directory=f'{tmp_path}', overwrite=True)]:
            logging.info(f'Downloaded: %s', filename)
            subsetted_filepath = pathlib.Path(filename)
    
        # Verify spatial subset worked
        subsetted_ds = xarray.open_dataset(subsetted_filepath, decode_times=False)
        group = None
        # Try to read group in file
        lat_var_name, lon_var_name = get_lat_lon_var_names(subsetted_ds, subsetted_filepath, collection_variables, collection_concept_id)
        lat_var_name = lat_var_name.split('/')[-1]
        lon_var_name = lon_var_name.split('/')[-1]
    
        with netCDF4.Dataset(subsetted_filepath) as f:
            group_list = []
            def group_walk(groups, nc_d, current_group):
                global subsetted_ds_new
                subsetted_ds_new = None
                # check if the top group has lat or lon variable
                if lat_var_name in list(nc_d.variables.keys()):
                    subsetted_ds_new = subsetted_ds
                else:
                    # if not then we'll need to keep track of the group layers
                    group_list.append(current_group)
    
                # loop through the groups in the current layer
                for g in groups:
                    # end the loop if we've already found latitude
                    if subsetted_ds_new:
                        break
                    # check if the groups have latitude, define the dataset and end the loop if found
                    if lat_var_name in list(nc_d.groups[g].variables.keys()):
                        group_list.append(g)
                        lat_group = '/'.join(group_list)
                        subsetted_ds_new = xarray.open_dataset(subsetted_filepath, group=lat_group, decode_times=False)
                        # add a science variable to the dataset if other groups are in the lat/lon group
                        # some GPM collections won't have any other variables in the same group as lat/lon
                        if len(list(nc_d.groups[g].groups.keys())) > 0:
                            data_group = [v for v in list(nc_d.groups[g].groups.keys()) if 'time' not in str(v).lower()][0]
                            g_data = lat_group+'/'+data_group
                            subsetted_ds_data = xarray.open_dataset(subsetted_filepath, group=g_data, decode_times=False)
                            sci_var = list(subsetted_ds_data.variables.keys())[0]
                            subsetted_ds_new['science_test'] = subsetted_ds_data[sci_var]
                        break
                    # recall the function on a group that has groups in it and didn't find latitude
                    # this is going 'deeper' into the groups
                    if len(list(nc_d.groups[g].groups.keys())) > 0:
                        group_walk(nc_d.groups[g].groups, nc_d.groups[g], g)
                    else:
                        continue
    
>           group_walk(f.groups, f, '')

verify_collection.py:448: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

groups = {'METADATA': <[AttributeError('NetCDF: Not a valid ID') raised in repr()] Group object at 0x7f4ab4164b40>, 'PRODUCT': <[RuntimeError('NetCDF: Not a valid ID') raised in repr()] Group object at 0x7f4ab4166640>}
nc_d = <[AttributeError('NetCDF: Not a valid ID') raised in repr()] Dataset object at 0x7f4ab4166740>
current_group = ''

    def group_walk(groups, nc_d, current_group):
        global subsetted_ds_new
        subsetted_ds_new = None
        # check if the top group has lat or lon variable
        if lat_var_name in list(nc_d.variables.keys()):
            subsetted_ds_new = subsetted_ds
        else:
            # if not then we'll need to keep track of the group layers
            group_list.append(current_group)
    
        # loop through the groups in the current layer
        for g in groups:
            # end the loop if we've already found latitude
            if subsetted_ds_new:
                break
            # check if the groups have latitude, define the dataset and end the loop if found
            if lat_var_name in list(nc_d.groups[g].variables.keys()):
                group_list.append(g)
                lat_group = '/'.join(group_list)
                subsetted_ds_new = xarray.open_dataset(subsetted_filepath, group=lat_group, decode_times=False)
                # add a science variable to the dataset if other groups are in the lat/lon group
                # some GPM collections won't have any other variables in the same group as lat/lon
                if len(list(nc_d.groups[g].groups.keys())) > 0:
                    data_group = [v for v in list(nc_d.groups[g].groups.keys()) if 'time' not in str(v).lower()][0]
                    g_data = lat_group+'/'+data_group
                    subsetted_ds_data = xarray.open_dataset(subsetted_filepath, group=g_data, decode_times=False)
>                   sci_var = list(subsetted_ds_data.variables.keys())[0]
E                   IndexError: list index out of range

verify_collection.py:438: IndexError
--------------------------------- Captured Log ---------------------------------
INFO     root:verify_collection.py:373 Using granule G1902371245-GES_DISC for test
INFO     root:verify_collection.py:389 Sending harmony request https://harmony.earthdata.nasa.gov/C1627516296-GES_DISC/ogc-api-coverages/1.0.0/collections/all/coverage/rangeset?forceAsync=true&subset=lat%28-78.0453%3A-60.6907%29&subset=lon%28-164.82465%3A-84.66935000000001%29&granuleId=G1902371245-GES_DISC
INFO     root:verify_collection.py:393 Submitted harmony job b60b41b3-fdf9-42c7-9215-4599aa448ebf
INFO     root:verify_collection.py:399 Downloaded: /tmp/pytest-of-runner/pytest-0/popen-gw4/test_spatial_subset_C1627516290/78002522_S5P_OFFL_L2_SO2_20200712T224601_20200713T002730_14238_01_010108_20200715T211427_subsetted.nc4

Check warning on line 0 in tests.verify_collection

github-actions / Regression test results for ops

test_spatial_subset[C1627516287-GES_DISC] (tests.verify_collection) failed

test-results/ops_test_report.xml [took 34s]

Raw output


            IndexError: list index out of range
collection_concept_id = 'C1627516287-GES_DISC', env = 'ops'
granule_json = {'meta': {'collection-concept-id': 'C1627516287-GES_DISC', 'concept-id': 'G2084463561-GES_DISC', 'concept-type': 'gran...pecification': {'Name': 'UMM-G', 'URL': 'https://cdn.earthdata.nasa.gov/umm/granule/v1.6.6', 'Version': '1.6.6'}, ...}}
collection_variables = [{'associations': {'collections': [{'concept-id': 'C1627516287-GES_DISC'}]}, 'meta': {'association-details': {'collect...'URL': 'https://cdn.earthdata.nasa.gov/umm/variable/v1.9.0', 'Version': '1.9.0'}, 'Name': 'PRODUCT/corner', ...}}, ...]
harmony_env = <Environment.PROD: 4>
tmp_path = PosixPath('/tmp/pytest-of-runner/pytest-0/popen-gw7/test_spatial_subset_C1627516280')
bearer_token = 'eyJ0eXAiOiJKV1QiLCJvcmlnaW4iOiJFYXJ0aGRhdGEgTG9naW4iLCJzaWciOiJlZGxqd3RwdWJrZXlfb3BzIiwiYWxnIjoiUlMyNTYifQ.eyJ0eXBlIj...Hcf0QWqtHsCuvOtj5tczYDaCn691RlCxRjaMlZBPYm2O9z5cTN31ynn1hy4h8lXYRR_I6DfCAdmdtrIdlLaMNL-ZbKOjYgx5kEqU8ClqAQnFPDVYJL29Hw'

    @pytest.mark.timeout(600)
    def test_spatial_subset(collection_concept_id, env, granule_json, collection_variables,
                            harmony_env, tmp_path: pathlib.Path, bearer_token):
        test_spatial_subset.__doc__ = f"Verify spatial subset for {collection_concept_id} in {env}"
    
        logging.info("Using granule %s for test", granule_json['meta']['concept-id'])
    
        # Compute a box that is smaller than the granule extent bounding box
        north, south, east, west = get_bounding_box(granule_json)
        east, west, north, south = create_smaller_bounding_box(east, west, north, south, .95)
    
        start_time = granule_json['umm']["TemporalExtent"]["RangeDateTime"]["BeginningDateTime"]
        end_time = granule_json['umm']["TemporalExtent"]["RangeDateTime"]["EndingDateTime"]
    
        # Build harmony request
        harmony_client = harmony.Client(env=harmony_env, token=bearer_token)
        request_bbox = harmony.BBox(w=west, s=south, e=east, n=north)
        request_collection = harmony.Collection(id=collection_concept_id)
        harmony_request = harmony.Request(collection=request_collection, spatial=request_bbox,
                                          granule_id=[granule_json['meta']['concept-id']])
    
        logging.info("Sending harmony request %s", harmony_client.request_as_url(harmony_request))
    
        # Submit harmony request and download result
        job_id = harmony_client.submit(harmony_request)
        logging.info("Submitted harmony job %s", job_id)
        harmony_client.wait_for_processing(job_id, show_progress=True)
        subsetted_filepath = None
        for filename in [file_future.result()
                         for file_future
                         in harmony_client.download_all(job_id, directory=f'{tmp_path}', overwrite=True)]:
            logging.info(f'Downloaded: %s', filename)
            subsetted_filepath = pathlib.Path(filename)
    
        # Verify spatial subset worked
        subsetted_ds = xarray.open_dataset(subsetted_filepath, decode_times=False)
        group = None
        # Try to read group in file
        lat_var_name, lon_var_name = get_lat_lon_var_names(subsetted_ds, subsetted_filepath, collection_variables, collection_concept_id)
        lat_var_name = lat_var_name.split('/')[-1]
        lon_var_name = lon_var_name.split('/')[-1]
    
        with netCDF4.Dataset(subsetted_filepath) as f:
            group_list = []
            def group_walk(groups, nc_d, current_group):
                global subsetted_ds_new
                subsetted_ds_new = None
                # check if the top group has lat or lon variable
                if lat_var_name in list(nc_d.variables.keys()):
                    subsetted_ds_new = subsetted_ds
                else:
                    # if not then we'll need to keep track of the group layers
                    group_list.append(current_group)
    
                # loop through the groups in the current layer
                for g in groups:
                    # end the loop if we've already found latitude
                    if subsetted_ds_new:
                        break
                    # check if the groups have latitude, define the dataset and end the loop if found
                    if lat_var_name in list(nc_d.groups[g].variables.keys()):
                        group_list.append(g)
                        lat_group = '/'.join(group_list)
                        subsetted_ds_new = xarray.open_dataset(subsetted_filepath, group=lat_group, decode_times=False)
                        # add a science variable to the dataset if other groups are in the lat/lon group
                        # some GPM collections won't have any other variables in the same group as lat/lon
                        if len(list(nc_d.groups[g].groups.keys())) > 0:
                            data_group = [v for v in list(nc_d.groups[g].groups.keys()) if 'time' not in str(v).lower()][0]
                            g_data = lat_group+'/'+data_group
                            subsetted_ds_data = xarray.open_dataset(subsetted_filepath, group=g_data, decode_times=False)
                            sci_var = list(subsetted_ds_data.variables.keys())[0]
                            subsetted_ds_new['science_test'] = subsetted_ds_data[sci_var]
                        break
                    # recall the function on a group that has groups in it and didn't find latitude
                    # this is going 'deeper' into the groups
                    if len(list(nc_d.groups[g].groups.keys())) > 0:
                        group_walk(nc_d.groups[g].groups, nc_d.groups[g], g)
                    else:
                        continue
    
>           group_walk(f.groups, f, '')

verify_collection.py:448: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

groups = {'METADATA': <[AttributeError('NetCDF: Not a valid ID') raised in repr()] Group object at 0x7f4c04c91840>, 'PRODUCT': <[RuntimeError('NetCDF: Not a valid ID') raised in repr()] Group object at 0x7f4c04c91340>}
nc_d = <[AttributeError('NetCDF: Not a valid ID') raised in repr()] Dataset object at 0x7f4c04c91240>
current_group = ''

    def group_walk(groups, nc_d, current_group):
        global subsetted_ds_new
        subsetted_ds_new = None
        # check if the top group has lat or lon variable
        if lat_var_name in list(nc_d.variables.keys()):
            subsetted_ds_new = subsetted_ds
        else:
            # if not then we'll need to keep track of the group layers
            group_list.append(current_group)
    
        # loop through the groups in the current layer
        for g in groups:
            # end the loop if we've already found latitude
            if subsetted_ds_new:
                break
            # check if the groups have latitude, define the dataset and end the loop if found
            if lat_var_name in list(nc_d.groups[g].variables.keys()):
                group_list.append(g)
                lat_group = '/'.join(group_list)
                subsetted_ds_new = xarray.open_dataset(subsetted_filepath, group=lat_group, decode_times=False)
                # add a science variable to the dataset if other groups are in the lat/lon group
                # some GPM collections won't have any other variables in the same group as lat/lon
                if len(list(nc_d.groups[g].groups.keys())) > 0:
                    data_group = [v for v in list(nc_d.groups[g].groups.keys()) if 'time' not in str(v).lower()][0]
                    g_data = lat_group+'/'+data_group
                    subsetted_ds_data = xarray.open_dataset(subsetted_filepath, group=g_data, decode_times=False)
>                   sci_var = list(subsetted_ds_data.variables.keys())[0]
E                   IndexError: list index out of range

verify_collection.py:438: IndexError
--------------------------------- Captured Log ---------------------------------
INFO     root:verify_collection.py:373 Using granule G2084463561-GES_DISC for test
INFO     root:verify_collection.py:389 Sending harmony request https://harmony.earthdata.nasa.gov/C1627516287-GES_DISC/ogc-api-coverages/1.0.0/collections/all/coverage/rangeset?forceAsync=true&subset=lat%28-76.99937499999999%3A-59.951625%29&subset=lon%28-76.6214%3A-1.5866000000000042%29&granuleId=G2084463561-GES_DISC
INFO     root:verify_collection.py:393 Submitted harmony job 8e58e151-7884-48c2-a1d5-eef218362b57
INFO     root:verify_collection.py:399 Downloaded: /tmp/pytest-of-runner/pytest-0/popen-gw7/test_spatial_subset_C1627516280/78002544_S5P_OFFL_L2_CO_20210701T170324_20210701T184453_19257_01_010400_20210703T065107_subsetted.nc4

Check warning on line 0 in tests.verify_collection

github-actions / Regression test results for ops

test_spatial_subset[C1918210023-GES_DISC] (tests.verify_collection) failed

test-results/ops_test_report.xml [took 1m 16s]

Raw output


            OSError: [Errno group not found: PRODUCT] 'PRODUCT'
ds = <[AttributeError('NetCDF: Not a valid ID') raised in repr()] Group object at 0x7f4c04f85f40>
group = '/METADATA/PRODUCT', mode = 'r'
create_group = <function _netcdf4_create_group at 0x7f4c0d792c20>

    def _nc4_require_group(ds, group, mode, create_group=_netcdf4_create_group):
        if group in {None, "", "/"}:
            # use the root group
            return ds
        else:
            # make sure it's a string
            if not isinstance(group, str):
                raise ValueError("group must be a string or None")
            # support path-like syntax
            path = group.strip("/").split("/")
            for key in path:
                try:
>                   ds = ds.groups[key]
E                   KeyError: 'PRODUCT'

../../../../.cache/pypoetry/virtualenvs/l2ss-py-autotest-iYz8Sff2-py3.10/lib/python3.10/site-packages/xarray/backends/netCDF4_.py:190: KeyError

During handling of the above exception, another exception occurred:

collection_concept_id = 'C1918210023-GES_DISC', env = 'ops'
granule_json = {'meta': {'collection-concept-id': 'C1918210023-GES_DISC', 'concept-id': 'G3248340445-GES_DISC', 'concept-type': 'gran...pecification': {'Name': 'UMM-G', 'URL': 'https://cdn.earthdata.nasa.gov/umm/granule/v1.6.6', 'Version': '1.6.6'}, ...}}
collection_variables = [{'associations': {'collections': [{'concept-id': 'C1918210023-GES_DISC'}]}, 'meta': {'association-details': {'collect...RL': 'https://cdn.earthdata.nasa.gov/umm/variable/v1.9.0', 'Version': '1.9.0'}, 'Name': 'PRODUCT/qa_value', ...}}, ...]
harmony_env = <Environment.PROD: 4>
tmp_path = PosixPath('/tmp/pytest-of-runner/pytest-0/popen-gw7/test_spatial_subset_C1918210020')
bearer_token = 'eyJ0eXAiOiJKV1QiLCJvcmlnaW4iOiJFYXJ0aGRhdGEgTG9naW4iLCJzaWciOiJlZGxqd3RwdWJrZXlfb3BzIiwiYWxnIjoiUlMyNTYifQ.eyJ0eXBlIj...Hcf0QWqtHsCuvOtj5tczYDaCn691RlCxRjaMlZBPYm2O9z5cTN31ynn1hy4h8lXYRR_I6DfCAdmdtrIdlLaMNL-ZbKOjYgx5kEqU8ClqAQnFPDVYJL29Hw'

    @pytest.mark.timeout(600)
    def test_spatial_subset(collection_concept_id, env, granule_json, collection_variables,
                            harmony_env, tmp_path: pathlib.Path, bearer_token):
        test_spatial_subset.__doc__ = f"Verify spatial subset for {collection_concept_id} in {env}"
    
        logging.info("Using granule %s for test", granule_json['meta']['concept-id'])
    
        # Compute a box that is smaller than the granule extent bounding box
        north, south, east, west = get_bounding_box(granule_json)
        east, west, north, south = create_smaller_bounding_box(east, west, north, south, .95)
    
        start_time = granule_json['umm']["TemporalExtent"]["RangeDateTime"]["BeginningDateTime"]
        end_time = granule_json['umm']["TemporalExtent"]["RangeDateTime"]["EndingDateTime"]
    
        # Build harmony request
        harmony_client = harmony.Client(env=harmony_env, token=bearer_token)
        request_bbox = harmony.BBox(w=west, s=south, e=east, n=north)
        request_collection = harmony.Collection(id=collection_concept_id)
        harmony_request = harmony.Request(collection=request_collection, spatial=request_bbox,
                                          granule_id=[granule_json['meta']['concept-id']])
    
        logging.info("Sending harmony request %s", harmony_client.request_as_url(harmony_request))
    
        # Submit harmony request and download result
        job_id = harmony_client.submit(harmony_request)
        logging.info("Submitted harmony job %s", job_id)
        harmony_client.wait_for_processing(job_id, show_progress=True)
        subsetted_filepath = None
        for filename in [file_future.result()
                         for file_future
                         in harmony_client.download_all(job_id, directory=f'{tmp_path}', overwrite=True)]:
            logging.info(f'Downloaded: %s', filename)
            subsetted_filepath = pathlib.Path(filename)
    
        # Verify spatial subset worked
        subsetted_ds = xarray.open_dataset(subsetted_filepath, decode_times=False)
        group = None
        # Try to read group in file
        lat_var_name, lon_var_name = get_lat_lon_var_names(subsetted_ds, subsetted_filepath, collection_variables, collection_concept_id)
        lat_var_name = lat_var_name.split('/')[-1]
        lon_var_name = lon_var_name.split('/')[-1]
    
        with netCDF4.Dataset(subsetted_filepath) as f:
            group_list = []
            def group_walk(groups, nc_d, current_group):
                global subsetted_ds_new
                subsetted_ds_new = None
                # check if the top group has lat or lon variable
                if lat_var_name in list(nc_d.variables.keys()):
                    subsetted_ds_new = subsetted_ds
                else:
                    # if not then we'll need to keep track of the group layers
                    group_list.append(current_group)
    
                # loop through the groups in the current layer
                for g in groups:
                    # end the loop if we've already found latitude
                    if subsetted_ds_new:
                        break
                    # check if the groups have latitude, define the dataset and end the loop if found
                    if lat_var_name in list(nc_d.groups[g].variables.keys()):
                        group_list.append(g)
                        lat_group = '/'.join(group_list)
                        subsetted_ds_new = xarray.open_dataset(subsetted_filepath, group=lat_group, decode_times=False)
                        # add a science variable to the dataset if other groups are in the lat/lon group
                        # some GPM collections won't have any other variables in the same group as lat/lon
                        if len(list(nc_d.groups[g].groups.keys())) > 0:
                            data_group = [v for v in list(nc_d.groups[g].groups.keys()) if 'time' not in str(v).lower()][0]
                            g_data = lat_group+'/'+data_group
                            subsetted_ds_data = xarray.open_dataset(subsetted_filepath, group=g_data, decode_times=False)
                            sci_var = list(subsetted_ds_data.variables.keys())[0]
                            subsetted_ds_new['science_test'] = subsetted_ds_data[sci_var]
                        break
                    # recall the function on a group that has groups in it and didn't find latitude
                    # this is going 'deeper' into the groups
                    if len(list(nc_d.groups[g].groups.keys())) > 0:
                        group_walk(nc_d.groups[g].groups, nc_d.groups[g], g)
                    else:
                        continue
    
>           group_walk(f.groups, f, '')

verify_collection.py:448: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
verify_collection.py:431: in group_walk
    subsetted_ds_new = xarray.open_dataset(subsetted_filepath, group=lat_group, decode_times=False)
../../../../.cache/pypoetry/virtualenvs/l2ss-py-autotest-iYz8Sff2-py3.10/lib/python3.10/site-packages/xarray/backends/api.py:571: in open_dataset
    backend_ds = backend.open_dataset(
../../../../.cache/pypoetry/virtualenvs/l2ss-py-autotest-iYz8Sff2-py3.10/lib/python3.10/site-packages/xarray/backends/netCDF4_.py:646: in open_dataset
    store = NetCDF4DataStore.open(
../../../../.cache/pypoetry/virtualenvs/l2ss-py-autotest-iYz8Sff2-py3.10/lib/python3.10/site-packages/xarray/backends/netCDF4_.py:409: in open
    return cls(manager, group=group, mode=mode, lock=lock, autoclose=autoclose)
../../../../.cache/pypoetry/virtualenvs/l2ss-py-autotest-iYz8Sff2-py3.10/lib/python3.10/site-packages/xarray/backends/netCDF4_.py:356: in __init__
    self.format = self.ds.data_model
../../../../.cache/pypoetry/virtualenvs/l2ss-py-autotest-iYz8Sff2-py3.10/lib/python3.10/site-packages/xarray/backends/netCDF4_.py:418: in ds
    return self._acquire()
../../../../.cache/pypoetry/virtualenvs/l2ss-py-autotest-iYz8Sff2-py3.10/lib/python3.10/site-packages/xarray/backends/netCDF4_.py:413: in _acquire
    ds = _nc4_require_group(root, self._group, self._mode)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

ds = <[AttributeError('NetCDF: Not a valid ID') raised in repr()] Group object at 0x7f4c04f85f40>
group = '/METADATA/PRODUCT', mode = 'r'
create_group = <function _netcdf4_create_group at 0x7f4c0d792c20>

    def _nc4_require_group(ds, group, mode, create_group=_netcdf4_create_group):
        if group in {None, "", "/"}:
            # use the root group
            return ds
        else:
            # make sure it's a string
            if not isinstance(group, str):
                raise ValueError("group must be a string or None")
            # support path-like syntax
            path = group.strip("/").split("/")
            for key in path:
                try:
                    ds = ds.groups[key]
                except KeyError as e:
                    if mode != "r":
                        ds = create_group(ds, key)
                    else:
                        # wrap error to provide slightly more helpful message
>                       raise OSError(f"group not found: {key}", e)
E                       OSError: [Errno group not found: PRODUCT] 'PRODUCT'

../../../../.cache/pypoetry/virtualenvs/l2ss-py-autotest-iYz8Sff2-py3.10/lib/python3.10/site-packages/xarray/backends/netCDF4_.py:196: OSError
--------------------------------- Captured Log ---------------------------------
INFO     root:verify_collection.py:373 Using granule G3248340445-GES_DISC for test
INFO     root:verify_collection.py:389 Sending harmony request https://harmony.earthdata.nasa.gov/C1918210023-GES_DISC/ogc-api-coverages/1.0.0/collections/all/coverage/rangeset?forceAsync=true&subset=lat%28-85.029225%3A-66.379775%29&subset=lon%28-150.570875%3A109.885875%29&granuleId=G3248340445-GES_DISC
INFO     root:verify_collection.py:393 Submitted harmony job 0dc550be-d430-4ccc-8685-886c8b0285e9
INFO     root:verify_collection.py:399 Downloaded: /tmp/pytest-of-runner/pytest-0/popen-gw7/test_spatial_subset_C1918210020/78002561_S5P_OFFL_L2_HCHO_20240923T130854_20240923T145023_35996_03_020601_20240925T045948_subsetted.nc4

Check warning on line 0 in tests.verify_collection

github-actions / Regression test results for ops

test_spatial_subset[C2087216530-GES_DISC] (tests.verify_collection) failed

test-results/ops_test_report.xml [took 43s]

Raw output


            OSError: [Errno group not found: PRODUCT] 'PRODUCT'
ds = <[AttributeError('NetCDF: Not a valid ID') raised in repr()] Group object at 0x7f83a18ce640>
group = '/METADATA/PRODUCT', mode = 'r'
create_group = <function _netcdf4_create_group at 0x7f83a8d2f1c0>

    def _nc4_require_group(ds, group, mode, create_group=_netcdf4_create_group):
        if group in {None, "", "/"}:
            # use the root group
            return ds
        else:
            # make sure it's a string
            if not isinstance(group, str):
                raise ValueError("group must be a string or None")
            # support path-like syntax
            path = group.strip("/").split("/")
            for key in path:
                try:
>                   ds = ds.groups[key]
E                   KeyError: 'PRODUCT'

../../../../.cache/pypoetry/virtualenvs/l2ss-py-autotest-iYz8Sff2-py3.10/lib/python3.10/site-packages/xarray/backends/netCDF4_.py:190: KeyError

During handling of the above exception, another exception occurred:

collection_concept_id = 'C2087216530-GES_DISC', env = 'ops'
granule_json = {'meta': {'collection-concept-id': 'C2087216530-GES_DISC', 'concept-id': 'G3248244839-GES_DISC', 'concept-type': 'gran...pecification': {'Name': 'UMM-G', 'URL': 'https://cdn.earthdata.nasa.gov/umm/granule/v1.6.6', 'Version': '1.6.6'}, ...}}
collection_variables = [{'associations': {'collections': [{'concept-id': 'C2087216530-GES_DISC'}]}, 'meta': {'association-details': {'collect...Var', 'URL': 'https://cdn.earthdata.nasa.gov/umm/variable/v1.9.0', 'Version': '1.9.0'}, 'Name': 'PRODUCT/layer'}}, ...]
harmony_env = <Environment.PROD: 4>
tmp_path = PosixPath('/tmp/pytest-of-runner/pytest-0/popen-gw2/test_spatial_subset_C2087216530')
bearer_token = 'eyJ0eXAiOiJKV1QiLCJvcmlnaW4iOiJFYXJ0aGRhdGEgTG9naW4iLCJzaWciOiJlZGxqd3RwdWJrZXlfb3BzIiwiYWxnIjoiUlMyNTYifQ.eyJ0eXBlIj...Hcf0QWqtHsCuvOtj5tczYDaCn691RlCxRjaMlZBPYm2O9z5cTN31ynn1hy4h8lXYRR_I6DfCAdmdtrIdlLaMNL-ZbKOjYgx5kEqU8ClqAQnFPDVYJL29Hw'

    @pytest.mark.timeout(600)
    def test_spatial_subset(collection_concept_id, env, granule_json, collection_variables,
                            harmony_env, tmp_path: pathlib.Path, bearer_token):
        test_spatial_subset.__doc__ = f"Verify spatial subset for {collection_concept_id} in {env}"
    
        logging.info("Using granule %s for test", granule_json['meta']['concept-id'])
    
        # Compute a box that is smaller than the granule extent bounding box
        north, south, east, west = get_bounding_box(granule_json)
        east, west, north, south = create_smaller_bounding_box(east, west, north, south, .95)
    
        start_time = granule_json['umm']["TemporalExtent"]["RangeDateTime"]["BeginningDateTime"]
        end_time = granule_json['umm']["TemporalExtent"]["RangeDateTime"]["EndingDateTime"]
    
        # Build harmony request
        harmony_client = harmony.Client(env=harmony_env, token=bearer_token)
        request_bbox = harmony.BBox(w=west, s=south, e=east, n=north)
        request_collection = harmony.Collection(id=collection_concept_id)
        harmony_request = harmony.Request(collection=request_collection, spatial=request_bbox,
                                          granule_id=[granule_json['meta']['concept-id']])
    
        logging.info("Sending harmony request %s", harmony_client.request_as_url(harmony_request))
    
        # Submit harmony request and download result
        job_id = harmony_client.submit(harmony_request)
        logging.info("Submitted harmony job %s", job_id)
        harmony_client.wait_for_processing(job_id, show_progress=True)
        subsetted_filepath = None
        for filename in [file_future.result()
                         for file_future
                         in harmony_client.download_all(job_id, directory=f'{tmp_path}', overwrite=True)]:
            logging.info(f'Downloaded: %s', filename)
            subsetted_filepath = pathlib.Path(filename)
    
        # Verify spatial subset worked
        subsetted_ds = xarray.open_dataset(subsetted_filepath, decode_times=False)
        group = None
        # Try to read group in file
        lat_var_name, lon_var_name = get_lat_lon_var_names(subsetted_ds, subsetted_filepath, collection_variables, collection_concept_id)
        lat_var_name = lat_var_name.split('/')[-1]
        lon_var_name = lon_var_name.split('/')[-1]
    
        with netCDF4.Dataset(subsetted_filepath) as f:
            group_list = []
            def group_walk(groups, nc_d, current_group):
                global subsetted_ds_new
                subsetted_ds_new = None
                # check if the top group has lat or lon variable
                if lat_var_name in list(nc_d.variables.keys()):
                    subsetted_ds_new = subsetted_ds
                else:
                    # if not then we'll need to keep track of the group layers
                    group_list.append(current_group)
    
                # loop through the groups in the current layer
                for g in groups:
                    # end the loop if we've already found latitude
                    if subsetted_ds_new:
                        break
                    # check if the groups have latitude, define the dataset and end the loop if found
                    if lat_var_name in list(nc_d.groups[g].variables.keys()):
                        group_list.append(g)
                        lat_group = '/'.join(group_list)
                        subsetted_ds_new = xarray.open_dataset(subsetted_filepath, group=lat_group, decode_times=False)
                        # add a science variable to the dataset if other groups are in the lat/lon group
                        # some GPM collections won't have any other variables in the same group as lat/lon
                        if len(list(nc_d.groups[g].groups.keys())) > 0:
                            data_group = [v for v in list(nc_d.groups[g].groups.keys()) if 'time' not in str(v).lower()][0]
                            g_data = lat_group+'/'+data_group
                            subsetted_ds_data = xarray.open_dataset(subsetted_filepath, group=g_data, decode_times=False)
                            sci_var = list(subsetted_ds_data.variables.keys())[0]
                            subsetted_ds_new['science_test'] = subsetted_ds_data[sci_var]
                        break
                    # recall the function on a group that has groups in it and didn't find latitude
                    # this is going 'deeper' into the groups
                    if len(list(nc_d.groups[g].groups.keys())) > 0:
                        group_walk(nc_d.groups[g].groups, nc_d.groups[g], g)
                    else:
                        continue
    
>           group_walk(f.groups, f, '')

verify_collection.py:448: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
verify_collection.py:431: in group_walk
    subsetted_ds_new = xarray.open_dataset(subsetted_filepath, group=lat_group, decode_times=False)
../../../../.cache/pypoetry/virtualenvs/l2ss-py-autotest-iYz8Sff2-py3.10/lib/python3.10/site-packages/xarray/backends/api.py:571: in open_dataset
    backend_ds = backend.open_dataset(
../../../../.cache/pypoetry/virtualenvs/l2ss-py-autotest-iYz8Sff2-py3.10/lib/python3.10/site-packages/xarray/backends/netCDF4_.py:646: in open_dataset
    store = NetCDF4DataStore.open(
../../../../.cache/pypoetry/virtualenvs/l2ss-py-autotest-iYz8Sff2-py3.10/lib/python3.10/site-packages/xarray/backends/netCDF4_.py:409: in open
    return cls(manager, group=group, mode=mode, lock=lock, autoclose=autoclose)
../../../../.cache/pypoetry/virtualenvs/l2ss-py-autotest-iYz8Sff2-py3.10/lib/python3.10/site-packages/xarray/backends/netCDF4_.py:356: in __init__
    self.format = self.ds.data_model
../../../../.cache/pypoetry/virtualenvs/l2ss-py-autotest-iYz8Sff2-py3.10/lib/python3.10/site-packages/xarray/backends/netCDF4_.py:418: in ds
    return self._acquire()
../../../../.cache/pypoetry/virtualenvs/l2ss-py-autotest-iYz8Sff2-py3.10/lib/python3.10/site-packages/xarray/backends/netCDF4_.py:413: in _acquire
    ds = _nc4_require_group(root, self._group, self._mode)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

ds = <[AttributeError('NetCDF: Not a valid ID') raised in repr()] Group object at 0x7f83a18ce640>
group = '/METADATA/PRODUCT', mode = 'r'
create_group = <function _netcdf4_create_group at 0x7f83a8d2f1c0>

    def _nc4_require_group(ds, group, mode, create_group=_netcdf4_create_group):
        if group in {None, "", "/"}:
            # use the root group
            return ds
        else:
            # make sure it's a string
            if not isinstance(group, str):
                raise ValueError("group must be a string or None")
            # support path-like syntax
            path = group.strip("/").split("/")
            for key in path:
                try:
                    ds = ds.groups[key]
                except KeyError as e:
                    if mode != "r":
                        ds = create_group(ds, key)
                    else:
                        # wrap error to provide slightly more helpful message
>                       raise OSError(f"group not found: {key}", e)
E                       OSError: [Errno group not found: PRODUCT] 'PRODUCT'

../../../../.cache/pypoetry/virtualenvs/l2ss-py-autotest-iYz8Sff2-py3.10/lib/python3.10/site-packages/xarray/backends/netCDF4_.py:196: OSError
--------------------------------- Captured Log ---------------------------------
INFO     root:verify_collection.py:373 Using granule G3248244839-GES_DISC for test
INFO     root:verify_collection.py:389 Sending harmony request https://harmony.earthdata.nasa.gov/C2087216530-GES_DISC/ogc-api-coverages/1.0.0/collections/all/coverage/rangeset?forceAsync=true&subset=lat%28-85.210325%3A-66.564675%29&subset=lon%28-135.414975%3A14.001974999999987%29&granuleId=G3248244839-GES_DISC
INFO     root:verify_collection.py:393 Submitted harmony job 24ddc334-0fe2-4ec5-b8ad-a417048ea398
INFO     root:verify_collection.py:399 Downloaded: /tmp/pytest-of-runner/pytest-0/popen-gw2/test_spatial_subset_C2087216530/78002570_S5P_OFFL_L2_CH4_20240923T044126_20240923T062256_35991_03_020701_20240924T203403_subsetted.nc4

Check warning on line 0 in tests.verify_collection

github-actions / Regression test results for ops

test_spatial_subset[C1442068509-GES_DISC] (tests.verify_collection) failed

test-results/ops_test_report.xml [took 51s]

Raw output


            IndexError: list index out of range
collection_concept_id = 'C1442068509-GES_DISC', env = 'ops'
granule_json = {'meta': {'collection-concept-id': 'C1442068509-GES_DISC', 'concept-id': 'G1628706233-GES_DISC', 'concept-type': 'gran...pecification': {'Name': 'UMM-G', 'URL': 'https://cdn.earthdata.nasa.gov/umm/granule/v1.6.6', 'Version': '1.6.6'}, ...}}
collection_variables = [{'associations': {'collections': [{'concept-id': 'C1442068509-GES_DISC'}]}, 'meta': {'association-details': {'collect... 'URL': 'https://cdn.earthdata.nasa.gov/umm/variable/v1.9.0', 'Version': '1.9.0'}, 'Name': 'PRODUCT/level', ...}}, ...]
harmony_env = <Environment.PROD: 4>
tmp_path = PosixPath('/tmp/pytest-of-runner/pytest-0/popen-gw4/test_spatial_subset_C1442068500')
bearer_token = 'eyJ0eXAiOiJKV1QiLCJvcmlnaW4iOiJFYXJ0aGRhdGEgTG9naW4iLCJzaWciOiJlZGxqd3RwdWJrZXlfb3BzIiwiYWxnIjoiUlMyNTYifQ.eyJ0eXBlIj...Hcf0QWqtHsCuvOtj5tczYDaCn691RlCxRjaMlZBPYm2O9z5cTN31ynn1hy4h8lXYRR_I6DfCAdmdtrIdlLaMNL-ZbKOjYgx5kEqU8ClqAQnFPDVYJL29Hw'

    @pytest.mark.timeout(600)
    def test_spatial_subset(collection_concept_id, env, granule_json, collection_variables,
                            harmony_env, tmp_path: pathlib.Path, bearer_token):
        test_spatial_subset.__doc__ = f"Verify spatial subset for {collection_concept_id} in {env}"
    
        logging.info("Using granule %s for test", granule_json['meta']['concept-id'])
    
        # Compute a box that is smaller than the granule extent bounding box
        north, south, east, west = get_bounding_box(granule_json)
        east, west, north, south = create_smaller_bounding_box(east, west, north, south, .95)
    
        start_time = granule_json['umm']["TemporalExtent"]["RangeDateTime"]["BeginningDateTime"]
        end_time = granule_json['umm']["TemporalExtent"]["RangeDateTime"]["EndingDateTime"]
    
        # Build harmony request
        harmony_client = harmony.Client(env=harmony_env, token=bearer_token)
        request_bbox = harmony.BBox(w=west, s=south, e=east, n=north)
        request_collection = harmony.Collection(id=collection_concept_id)
        harmony_request = harmony.Request(collection=request_collection, spatial=request_bbox,
                                          granule_id=[granule_json['meta']['concept-id']])
    
        logging.info("Sending harmony request %s", harmony_client.request_as_url(harmony_request))
    
        # Submit harmony request and download result
        job_id = harmony_client.submit(harmony_request)
        logging.info("Submitted harmony job %s", job_id)
        harmony_client.wait_for_processing(job_id, show_progress=True)
        subsetted_filepath = None
        for filename in [file_future.result()
                         for file_future
                         in harmony_client.download_all(job_id, directory=f'{tmp_path}', overwrite=True)]:
            logging.info(f'Downloaded: %s', filename)
            subsetted_filepath = pathlib.Path(filename)
    
        # Verify spatial subset worked
        subsetted_ds = xarray.open_dataset(subsetted_filepath, decode_times=False)
        group = None
        # Try to read group in file
        lat_var_name, lon_var_name = get_lat_lon_var_names(subsetted_ds, subsetted_filepath, collection_variables, collection_concept_id)
        lat_var_name = lat_var_name.split('/')[-1]
        lon_var_name = lon_var_name.split('/')[-1]
    
        with netCDF4.Dataset(subsetted_filepath) as f:
            group_list = []
            def group_walk(groups, nc_d, current_group):
                global subsetted_ds_new
                subsetted_ds_new = None
                # check if the top group has lat or lon variable
                if lat_var_name in list(nc_d.variables.keys()):
                    subsetted_ds_new = subsetted_ds
                else:
                    # if not then we'll need to keep track of the group layers
                    group_list.append(current_group)
    
                # loop through the groups in the current layer
                for g in groups:
                    # end the loop if we've already found latitude
                    if subsetted_ds_new:
                        break
                    # check if the groups have latitude, define the dataset and end the loop if found
                    if lat_var_name in list(nc_d.groups[g].variables.keys()):
                        group_list.append(g)
                        lat_group = '/'.join(group_list)
                        subsetted_ds_new = xarray.open_dataset(subsetted_filepath, group=lat_group, decode_times=False)
                        # add a science variable to the dataset if other groups are in the lat/lon group
                        # some GPM collections won't have any other variables in the same group as lat/lon
                        if len(list(nc_d.groups[g].groups.keys())) > 0:
                            data_group = [v for v in list(nc_d.groups[g].groups.keys()) if 'time' not in str(v).lower()][0]
                            g_data = lat_group+'/'+data_group
                            subsetted_ds_data = xarray.open_dataset(subsetted_filepath, group=g_data, decode_times=False)
                            sci_var = list(subsetted_ds_data.variables.keys())[0]
                            subsetted_ds_new['science_test'] = subsetted_ds_data[sci_var]
                        break
                    # recall the function on a group that has groups in it and didn't find latitude
                    # this is going 'deeper' into the groups
                    if len(list(nc_d.groups[g].groups.keys())) > 0:
                        group_walk(nc_d.groups[g].groups, nc_d.groups[g], g)
                    else:
                        continue
    
>           group_walk(f.groups, f, '')

verify_collection.py:448: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

groups = {'METADATA': <[AttributeError('NetCDF: Not a valid ID') raised in repr()] Group object at 0x7f4ab4192940>, 'PRODUCT': <[RuntimeError('NetCDF: Not a valid ID') raised in repr()] Group object at 0x7f4ab4192340>}
nc_d = <[AttributeError('NetCDF: Not a valid ID') raised in repr()] Dataset object at 0x7f4ab4192240>
current_group = ''

    def group_walk(groups, nc_d, current_group):
        global subsetted_ds_new
        subsetted_ds_new = None
        # check if the top group has lat or lon variable
        if lat_var_name in list(nc_d.variables.keys()):
            subsetted_ds_new = subsetted_ds
        else:
            # if not then we'll need to keep track of the group layers
            group_list.append(current_group)
    
        # loop through the groups in the current layer
        for g in groups:
            # end the loop if we've already found latitude
            if subsetted_ds_new:
                break
            # check if the groups have latitude, define the dataset and end the loop if found
            if lat_var_name in list(nc_d.groups[g].variables.keys()):
                group_list.append(g)
                lat_group = '/'.join(group_list)
                subsetted_ds_new = xarray.open_dataset(subsetted_filepath, group=lat_group, decode_times=False)
                # add a science variable to the dataset if other groups are in the lat/lon group
                # some GPM collections won't have any other variables in the same group as lat/lon
                if len(list(nc_d.groups[g].groups.keys())) > 0:
                    data_group = [v for v in list(nc_d.groups[g].groups.keys()) if 'time' not in str(v).lower()][0]
                    g_data = lat_group+'/'+data_group
                    subsetted_ds_data = xarray.open_dataset(subsetted_filepath, group=g_data, decode_times=False)
>                   sci_var = list(subsetted_ds_data.variables.keys())[0]
E                   IndexError: list index out of range

verify_collection.py:438: IndexError
--------------------------------- Captured Log ---------------------------------
INFO     root:verify_collection.py:373 Using granule G1628706233-GES_DISC for test
INFO     root:verify_collection.py:389 Sending harmony request https://harmony.earthdata.nasa.gov/C1442068509-GES_DISC/ogc-api-coverages/1.0.0/collections/all/coverage/rangeset?forceAsync=true&subset=lat%28-82.265975%3A-63.873025000000005%29&subset=lon%28-112.057275%3A162.74827499999998%29&granuleId=G1628706233-GES_DISC
INFO     root:verify_collection.py:393 Submitted harmony job 6b67b755-9126-45ad-816e-d229df8ae471
INFO     root:verify_collection.py:399 Downloaded: /tmp/pytest-of-runner/pytest-0/popen-gw4/test_spatial_subset_C1442068500/78002574_S5P_OFFL_L2_O3_20190806T003836_20190806T022006_09387_01_010107_20190812T015759_subsetted.nc4

Check warning on line 0 in tests.verify_collection

github-actions / Regression test results for ops

test_spatial_subset[C2746966926-POCLOUD] (tests.verify_collection) failed

test-results/ops_test_report.xml [took 2s]

Raw output


            Exception: ('Unprocessable Entity', 'Error: the requested combination of operations: spatial subsetting on C2746966926-POCLOUD is unsupported')
collection_concept_id = 'C2746966926-POCLOUD', env = 'ops'
granule_json = {'meta': {'collection-concept-id': 'C2746966926-POCLOUD', 'concept-id': 'G2816914994-POCLOUD', 'concept-type': 'granul..._XOverCal_20230709T115434_20230710T082110_PIB0_01.nc', 'SWOT_GranulePolygons_Cal_20230213T142800_v05.json', ...], ...}}
collection_variables = [{'associations': {'collections': [{'concept-id': 'C2746966926-POCLOUD'}]}, 'meta': {'association-details': {'collecti...pixels', 'Size': 69, 'Type': 'OTHER'}], 'FillValues': [{'Type': 'SCIENCE_FILLVALUE', 'Value': 2147483647}], ...}}, ...]
harmony_env = <Environment.PROD: 4>
tmp_path = PosixPath('/tmp/pytest-of-runner/pytest-0/popen-gw9/test_spatial_subset_C2746966920')
bearer_token = 'eyJ0eXAiOiJKV1QiLCJvcmlnaW4iOiJFYXJ0aGRhdGEgTG9naW4iLCJzaWciOiJlZGxqd3RwdWJrZXlfb3BzIiwiYWxnIjoiUlMyNTYifQ.eyJ0eXBlIj...Hcf0QWqtHsCuvOtj5tczYDaCn691RlCxRjaMlZBPYm2O9z5cTN31ynn1hy4h8lXYRR_I6DfCAdmdtrIdlLaMNL-ZbKOjYgx5kEqU8ClqAQnFPDVYJL29Hw'

    @pytest.mark.timeout(600)
    def test_spatial_subset(collection_concept_id, env, granule_json, collection_variables,
                            harmony_env, tmp_path: pathlib.Path, bearer_token):
        test_spatial_subset.__doc__ = f"Verify spatial subset for {collection_concept_id} in {env}"
    
        logging.info("Using granule %s for test", granule_json['meta']['concept-id'])
    
        # Compute a box that is smaller than the granule extent bounding box
        north, south, east, west = get_bounding_box(granule_json)
        east, west, north, south = create_smaller_bounding_box(east, west, north, south, .95)
    
        start_time = granule_json['umm']["TemporalExtent"]["RangeDateTime"]["BeginningDateTime"]
        end_time = granule_json['umm']["TemporalExtent"]["RangeDateTime"]["EndingDateTime"]
    
        # Build harmony request
        harmony_client = harmony.Client(env=harmony_env, token=bearer_token)
        request_bbox = harmony.BBox(w=west, s=south, e=east, n=north)
        request_collection = harmony.Collection(id=collection_concept_id)
        harmony_request = harmony.Request(collection=request_collection, spatial=request_bbox,
                                          granule_id=[granule_json['meta']['concept-id']])
    
        logging.info("Sending harmony request %s", harmony_client.request_as_url(harmony_request))
    
        # Submit harmony request and download result
>       job_id = harmony_client.submit(harmony_request)

verify_collection.py:392: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
../../../../.cache/pypoetry/virtualenvs/l2ss-py-autotest-iYz8Sff2-py3.10/lib/python3.10/site-packages/harmony/harmony.py:851: in submit
    self._handle_error_response(response)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = <harmony.harmony.Client object at 0x7fd72abf4a30>
response = <Response [422]>

    def _handle_error_response(self, response: Response):
        """Raises the appropriate exception based on the response
        received from Harmony. Tries to pull out an error message
        from a Harmony JSON response when possible.
    
        Args:
            response: The Response from Harmony
    
        Raises:
            Exception with a Harmony error message or a more generic
            HTTPError
        """
        if 'application/json' in response.headers.get('Content-Type', ''):
            exception_message = None
            try:
                response_json = response.json()
                if hasattr(response_json, 'get'):
                    exception_message = response_json.get('description')
                    if not exception_message:
                        exception_message = response_json.get('error')
            except JSONDecodeError:
                pass
            if exception_message:
>               raise Exception(response.reason, exception_message)
E               Exception: ('Unprocessable Entity', 'Error: the requested combination of operations: spatial subsetting on C2746966926-POCLOUD is unsupported')

../../../../.cache/pypoetry/virtualenvs/l2ss-py-autotest-iYz8Sff2-py3.10/lib/python3.10/site-packages/harmony/harmony.py:784: Exception
--------------------------------- Captured Log ---------------------------------
INFO     root:verify_collection.py:373 Using granule G2816914994-POCLOUD for test
INFO     root:verify_collection.py:389 Sending harmony request https://harmony.earthdata.nasa.gov/C2746966926-POCLOUD/ogc-api-coverages/1.0.0/collections/all/coverage/rangeset?forceAsync=true&subset=lat%28-85.5%3A85.5%29&subset=lon%28-171.0%3A171.0%29&granuleId=G2816914994-POCLOUD

Check warning on line 0 in tests.verify_collection

github-actions / Regression test results for ops

test_spatial_subset[C1627516288-GES_DISC] (tests.verify_collection) failed

test-results/ops_test_report.xml [took 45s]

Raw output


            IndexError: list index out of range
collection_concept_id = 'C1627516288-GES_DISC', env = 'ops'
granule_json = {'meta': {'collection-concept-id': 'C1627516288-GES_DISC', 'concept-id': 'G2085128317-GES_DISC', 'concept-type': 'gran...pecification': {'Name': 'UMM-G', 'URL': 'https://cdn.earthdata.nasa.gov/umm/granule/v1.6.6', 'Version': '1.6.6'}, ...}}
collection_variables = [{'associations': {'collections': [{'concept-id': 'C1627516288-GES_DISC'}]}, 'meta': {'association-details': {'collect... 'https://cdn.earthdata.nasa.gov/umm/variable/v1.9.0', 'Version': '1.9.0'}, 'Name': 'PRODUCT/ground_pixel', ...}}, ...]
harmony_env = <Environment.PROD: 4>
tmp_path = PosixPath('/tmp/pytest-of-runner/pytest-0/popen-gw2/test_spatial_subset_C1627516280')
bearer_token = 'eyJ0eXAiOiJKV1QiLCJvcmlnaW4iOiJFYXJ0aGRhdGEgTG9naW4iLCJzaWciOiJlZGxqd3RwdWJrZXlfb3BzIiwiYWxnIjoiUlMyNTYifQ.eyJ0eXBlIj...Hcf0QWqtHsCuvOtj5tczYDaCn691RlCxRjaMlZBPYm2O9z5cTN31ynn1hy4h8lXYRR_I6DfCAdmdtrIdlLaMNL-ZbKOjYgx5kEqU8ClqAQnFPDVYJL29Hw'

    @pytest.mark.timeout(600)
    def test_spatial_subset(collection_concept_id, env, granule_json, collection_variables,
                            harmony_env, tmp_path: pathlib.Path, bearer_token):
        test_spatial_subset.__doc__ = f"Verify spatial subset for {collection_concept_id} in {env}"
    
        logging.info("Using granule %s for test", granule_json['meta']['concept-id'])
    
        # Compute a box that is smaller than the granule extent bounding box
        north, south, east, west = get_bounding_box(granule_json)
        east, west, north, south = create_smaller_bounding_box(east, west, north, south, .95)
    
        start_time = granule_json['umm']["TemporalExtent"]["RangeDateTime"]["BeginningDateTime"]
        end_time = granule_json['umm']["TemporalExtent"]["RangeDateTime"]["EndingDateTime"]
    
        # Build harmony request
        harmony_client = harmony.Client(env=harmony_env, token=bearer_token)
        request_bbox = harmony.BBox(w=west, s=south, e=east, n=north)
        request_collection = harmony.Collection(id=collection_concept_id)
        harmony_request = harmony.Request(collection=request_collection, spatial=request_bbox,
                                          granule_id=[granule_json['meta']['concept-id']])
    
        logging.info("Sending harmony request %s", harmony_client.request_as_url(harmony_request))
    
        # Submit harmony request and download result
        job_id = harmony_client.submit(harmony_request)
        logging.info("Submitted harmony job %s", job_id)
        harmony_client.wait_for_processing(job_id, show_progress=True)
        subsetted_filepath = None
        for filename in [file_future.result()
                         for file_future
                         in harmony_client.download_all(job_id, directory=f'{tmp_path}', overwrite=True)]:
            logging.info(f'Downloaded: %s', filename)
            subsetted_filepath = pathlib.Path(filename)
    
        # Verify spatial subset worked
        subsetted_ds = xarray.open_dataset(subsetted_filepath, decode_times=False)
        group = None
        # Try to read group in file
        lat_var_name, lon_var_name = get_lat_lon_var_names(subsetted_ds, subsetted_filepath, collection_variables, collection_concept_id)
        lat_var_name = lat_var_name.split('/')[-1]
        lon_var_name = lon_var_name.split('/')[-1]
    
        with netCDF4.Dataset(subsetted_filepath) as f:
            group_list = []
            def group_walk(groups, nc_d, current_group):
                global subsetted_ds_new
                subsetted_ds_new = None
                # check if the top group has lat or lon variable
                if lat_var_name in list(nc_d.variables.keys()):
                    subsetted_ds_new = subsetted_ds
                else:
                    # if not then we'll need to keep track of the group layers
                    group_list.append(current_group)
    
                # loop through the groups in the current layer
                for g in groups:
                    # end the loop if we've already found latitude
                    if subsetted_ds_new:
                        break
                    # check if the groups have latitude, define the dataset and end the loop if found
                    if lat_var_name in list(nc_d.groups[g].variables.keys()):
                        group_list.append(g)
                        lat_group = '/'.join(group_list)
                        subsetted_ds_new = xarray.open_dataset(subsetted_filepath, group=lat_group, decode_times=False)
                        # add a science variable to the dataset if other groups are in the lat/lon group
                        # some GPM collections won't have any other variables in the same group as lat/lon
                        if len(list(nc_d.groups[g].groups.keys())) > 0:
                            data_group = [v for v in list(nc_d.groups[g].groups.keys()) if 'time' not in str(v).lower()][0]
                            g_data = lat_group+'/'+data_group
                            subsetted_ds_data = xarray.open_dataset(subsetted_filepath, group=g_data, decode_times=False)
                            sci_var = list(subsetted_ds_data.variables.keys())[0]
                            subsetted_ds_new['science_test'] = subsetted_ds_data[sci_var]
                        break
                    # recall the function on a group that has groups in it and didn't find latitude
                    # this is going 'deeper' into the groups
                    if len(list(nc_d.groups[g].groups.keys())) > 0:
                        group_walk(nc_d.groups[g].groups, nc_d.groups[g], g)
                    else:
                        continue
    
>           group_walk(f.groups, f, '')

verify_collection.py:448: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

groups = {'METADATA': <[AttributeError('NetCDF: Not a valid ID') raised in repr()] Group object at 0x7f83a18cd340>, 'PRODUCT': <[RuntimeError('NetCDF: Not a valid ID') raised in repr()] Group object at 0x7f83a18ccc40>}
nc_d = <[AttributeError('NetCDF: Not a valid ID') raised in repr()] Dataset object at 0x7f83a18ccd40>
current_group = ''

    def group_walk(groups, nc_d, current_group):
        global subsetted_ds_new
        subsetted_ds_new = None
        # check if the top group has lat or lon variable
        if lat_var_name in list(nc_d.variables.keys()):
            subsetted_ds_new = subsetted_ds
        else:
            # if not then we'll need to keep track of the group layers
            group_list.append(current_group)
    
        # loop through the groups in the current layer
        for g in groups:
            # end the loop if we've already found latitude
            if subsetted_ds_new:
                break
            # check if the groups have latitude, define the dataset and end the loop if found
            if lat_var_name in list(nc_d.groups[g].variables.keys()):
                group_list.append(g)
                lat_group = '/'.join(group_list)
                subsetted_ds_new = xarray.open_dataset(subsetted_filepath, group=lat_group, decode_times=False)
                # add a science variable to the dataset if other groups are in the lat/lon group
                # some GPM collections won't have any other variables in the same group as lat/lon
                if len(list(nc_d.groups[g].groups.keys())) > 0:
                    data_group = [v for v in list(nc_d.groups[g].groups.keys()) if 'time' not in str(v).lower()][0]
                    g_data = lat_group+'/'+data_group
                    subsetted_ds_data = xarray.open_dataset(subsetted_filepath, group=g_data, decode_times=False)
>                   sci_var = list(subsetted_ds_data.variables.keys())[0]
E                   IndexError: list index out of range

verify_collection.py:438: IndexError
--------------------------------- Captured Log ---------------------------------
INFO     root:verify_collection.py:373 Using granule G2085128317-GES_DISC for test
INFO     root:verify_collection.py:389 Sending harmony request https://harmony.earthdata.nasa.gov/C1627516288-GES_DISC/ogc-api-coverages/1.0.0/collections/all/coverage/rangeset?forceAsync=true&subset=lat%28-76.99937499999999%3A-59.951625%29&subset=lon%28-76.6214%3A-1.5866000000000042%29&granuleId=G2085128317-GES_DISC
INFO     root:verify_collection.py:393 Submitted harmony job d5d0e6a1-50e0-41b3-a019-e5d1e293fb14
INFO     root:verify_collection.py:399 Downloaded: /tmp/pytest-of-runner/pytest-0/popen-gw2/test_spatial_subset_C1627516280/78002592_S5P_OFFL_L2_CH4_20210701T170324_20210701T184453_19257_01_010400_20210703T102338_subsetted.nc4

Check warning on line 0 in tests.verify_collection

github-actions / Regression test results for ops

test_spatial_subset[C1918210292-GES_DISC] (tests.verify_collection) failed

test-results/ops_test_report.xml [took 1m 19s]

Raw output


            OSError: [Errno group not found: PRODUCT] 'PRODUCT'
ds = <[AttributeError('NetCDF: Not a valid ID') raised in repr()] Group object at 0x7f4c04e6e840>
group = '/METADATA/PRODUCT', mode = 'r'
create_group = <function _netcdf4_create_group at 0x7f4c0d792c20>

    def _nc4_require_group(ds, group, mode, create_group=_netcdf4_create_group):
        if group in {None, "", "/"}:
            # use the root group
            return ds
        else:
            # make sure it's a string
            if not isinstance(group, str):
                raise ValueError("group must be a string or None")
            # support path-like syntax
            path = group.strip("/").split("/")
            for key in path:
                try:
>                   ds = ds.groups[key]
E                   KeyError: 'PRODUCT'

../../../../.cache/pypoetry/virtualenvs/l2ss-py-autotest-iYz8Sff2-py3.10/lib/python3.10/site-packages/xarray/backends/netCDF4_.py:190: KeyError

During handling of the above exception, another exception occurred:

collection_concept_id = 'C1918210292-GES_DISC', env = 'ops'
granule_json = {'meta': {'collection-concept-id': 'C1918210292-GES_DISC', 'concept-id': 'G3248340452-GES_DISC', 'concept-type': 'gran...pecification': {'Name': 'UMM-G', 'URL': 'https://cdn.earthdata.nasa.gov/umm/granule/v1.6.6', 'Version': '1.6.6'}, ...}}
collection_variables = [{'associations': {'collections': [{'concept-id': 'C1918210292-GES_DISC'}]}, 'meta': {'association-details': {'collect...v1.9.0', 'Version': '1.9.0'}, 'Name': 'PRODUCT/SUPPORT_DATA/DETAILED_RESULTS/number_of_slant_columns_win2', ...}}, ...]
harmony_env = <Environment.PROD: 4>
tmp_path = PosixPath('/tmp/pytest-of-runner/pytest-0/popen-gw7/test_spatial_subset_C1918210290')
bearer_token = 'eyJ0eXAiOiJKV1QiLCJvcmlnaW4iOiJFYXJ0aGRhdGEgTG9naW4iLCJzaWciOiJlZGxqd3RwdWJrZXlfb3BzIiwiYWxnIjoiUlMyNTYifQ.eyJ0eXBlIj...Hcf0QWqtHsCuvOtj5tczYDaCn691RlCxRjaMlZBPYm2O9z5cTN31ynn1hy4h8lXYRR_I6DfCAdmdtrIdlLaMNL-ZbKOjYgx5kEqU8ClqAQnFPDVYJL29Hw'

    @pytest.mark.timeout(600)
    def test_spatial_subset(collection_concept_id, env, granule_json, collection_variables,
                            harmony_env, tmp_path: pathlib.Path, bearer_token):
        test_spatial_subset.__doc__ = f"Verify spatial subset for {collection_concept_id} in {env}"
    
        logging.info("Using granule %s for test", granule_json['meta']['concept-id'])
    
        # Compute a box that is smaller than the granule extent bounding box
        north, south, east, west = get_bounding_box(granule_json)
        east, west, north, south = create_smaller_bounding_box(east, west, north, south, .95)
    
        start_time = granule_json['umm']["TemporalExtent"]["RangeDateTime"]["BeginningDateTime"]
        end_time = granule_json['umm']["TemporalExtent"]["RangeDateTime"]["EndingDateTime"]
    
        # Build harmony request
        harmony_client = harmony.Client(env=harmony_env, token=bearer_token)
        request_bbox = harmony.BBox(w=west, s=south, e=east, n=north)
        request_collection = harmony.Collection(id=collection_concept_id)
        harmony_request = harmony.Request(collection=request_collection, spatial=request_bbox,
                                          granule_id=[granule_json['meta']['concept-id']])
    
        logging.info("Sending harmony request %s", harmony_client.request_as_url(harmony_request))
    
        # Submit harmony request and download result
        job_id = harmony_client.submit(harmony_request)
        logging.info("Submitted harmony job %s", job_id)
        harmony_client.wait_for_processing(job_id, show_progress=True)
        subsetted_filepath = None
        for filename in [file_future.result()
                         for file_future
                         in harmony_client.download_all(job_id, directory=f'{tmp_path}', overwrite=True)]:
            logging.info(f'Downloaded: %s', filename)
            subsetted_filepath = pathlib.Path(filename)
    
        # Verify spatial subset worked
        subsetted_ds = xarray.open_dataset(subsetted_filepath, decode_times=False)
        group = None
        # Try to read group in file
        lat_var_name, lon_var_name = get_lat_lon_var_names(subsetted_ds, subsetted_filepath, collection_variables, collection_concept_id)
        lat_var_name = lat_var_name.split('/')[-1]
        lon_var_name = lon_var_name.split('/')[-1]
    
        with netCDF4.Dataset(subsetted_filepath) as f:
            group_list = []
            def group_walk(groups, nc_d, current_group):
                global subsetted_ds_new
                subsetted_ds_new = None
                # check if the top group has lat or lon variable
                if lat_var_name in list(nc_d.variables.keys()):
                    subsetted_ds_new = subsetted_ds
                else:
                    # if not then we'll need to keep track of the group layers
                    group_list.append(current_group)
    
                # loop through the groups in the current layer
                for g in groups:
                    # end the loop if we've already found latitude
                    if subsetted_ds_new:
                        break
                    # check if the groups have latitude, define the dataset and end the loop if found
                    if lat_var_name in list(nc_d.groups[g].variables.keys()):
                        group_list.append(g)
                        lat_group = '/'.join(group_list)
                        subsetted_ds_new = xarray.open_dataset(subsetted_filepath, group=lat_group, decode_times=False)
                        # add a science variable to the dataset if other groups are in the lat/lon group
                        # some GPM collections won't have any other variables in the same group as lat/lon
                        if len(list(nc_d.groups[g].groups.keys())) > 0:
                            data_group = [v for v in list(nc_d.groups[g].groups.keys()) if 'time' not in str(v).lower()][0]
                            g_data = lat_group+'/'+data_group
                            subsetted_ds_data = xarray.open_dataset(subsetted_filepath, group=g_data, decode_times=False)
                            sci_var = list(subsetted_ds_data.variables.keys())[0]
                            subsetted_ds_new['science_test'] = subsetted_ds_data[sci_var]
                        break
                    # recall the function on a group that has groups in it and didn't find latitude
                    # this is going 'deeper' into the groups
                    if len(list(nc_d.groups[g].groups.keys())) > 0:
                        group_walk(nc_d.groups[g].groups, nc_d.groups[g], g)
                    else:
                        continue
    
>           group_walk(f.groups, f, '')

verify_collection.py:448: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
verify_collection.py:431: in group_walk
    subsetted_ds_new = xarray.open_dataset(subsetted_filepath, group=lat_group, decode_times=False)
../../../../.cache/pypoetry/virtualenvs/l2ss-py-autotest-iYz8Sff2-py3.10/lib/python3.10/site-packages/xarray/backends/api.py:571: in open_dataset
    backend_ds = backend.open_dataset(
../../../../.cache/pypoetry/virtualenvs/l2ss-py-autotest-iYz8Sff2-py3.10/lib/python3.10/site-packages/xarray/backends/netCDF4_.py:646: in open_dataset
    store = NetCDF4DataStore.open(
../../../../.cache/pypoetry/virtualenvs/l2ss-py-autotest-iYz8Sff2-py3.10/lib/python3.10/site-packages/xarray/backends/netCDF4_.py:409: in open
    return cls(manager, group=group, mode=mode, lock=lock, autoclose=autoclose)
../../../../.cache/pypoetry/virtualenvs/l2ss-py-autotest-iYz8Sff2-py3.10/lib/python3.10/site-packages/xarray/backends/netCDF4_.py:356: in __init__
    self.format = self.ds.data_model
../../../../.cache/pypoetry/virtualenvs/l2ss-py-autotest-iYz8Sff2-py3.10/lib/python3.10/site-packages/xarray/backends/netCDF4_.py:418: in ds
    return self._acquire()
../../../../.cache/pypoetry/virtualenvs/l2ss-py-autotest-iYz8Sff2-py3.10/lib/python3.10/site-packages/xarray/backends/netCDF4_.py:413: in _acquire
    ds = _nc4_require_group(root, self._group, self._mode)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

ds = <[AttributeError('NetCDF: Not a valid ID') raised in repr()] Group object at 0x7f4c04e6e840>
group = '/METADATA/PRODUCT', mode = 'r'
create_group = <function _netcdf4_create_group at 0x7f4c0d792c20>

    def _nc4_require_group(ds, group, mode, create_group=_netcdf4_create_group):
        if group in {None, "", "/"}:
            # use the root group
            return ds
        else:
            # make sure it's a string
            if not isinstance(group, str):
                raise ValueError("group must be a string or None")
            # support path-like syntax
            path = group.strip("/").split("/")
            for key in path:
                try:
                    ds = ds.groups[key]
                except KeyError as e:
                    if mode != "r":
                        ds = create_group(ds, key)
                    else:
                        # wrap error to provide slightly more helpful message
>                       raise OSError(f"group not found: {key}", e)
E                       OSError: [Errno group not found: PRODUCT] 'PRODUCT'

../../../../.cache/pypoetry/virtualenvs/l2ss-py-autotest-iYz8Sff2-py3.10/lib/python3.10/site-packages/xarray/backends/netCDF4_.py:196: OSError
--------------------------------- Captured Log ---------------------------------
INFO     root:verify_collection.py:373 Using granule G3248340452-GES_DISC for test
INFO     root:verify_collection.py:389 Sending harmony request https://harmony.earthdata.nasa.gov/C1918210292-GES_DISC/ogc-api-coverages/1.0.0/collections/all/coverage/rangeset?forceAsync=true&subset=lat%28-85.066%3A-66.446%29&subset=lon%28-159.00975%3A161.18775%29&granuleId=G3248340452-GES_DISC
INFO     root:verify_collection.py:393 Submitted harmony job 7bfe1b1a-2ebc-4a41-8ec1-d6dc1b9800f5
INFO     root:verify_collection.py:399 Downloaded: /tmp/pytest-of-runner/pytest-0/popen-gw7/test_spatial_subset_C1918210290/78002599_S5P_OFFL_L2_SO2_20240923T080425_20240923T094555_35993_03_020601_20240925T055934_subsetted.nc4

Check warning on line 0 in tests.verify_collection

github-actions / Regression test results for ops

test_spatial_subset[C1442068508-GES_DISC] (tests.verify_collection) failed

test-results/ops_test_report.xml [took 1m 2s]

Raw output


            IndexError: list index out of range
collection_concept_id = 'C1442068508-GES_DISC', env = 'ops'
granule_json = {'meta': {'collection-concept-id': 'C1442068508-GES_DISC', 'concept-id': 'G1628710396-GES_DISC', 'concept-type': 'gran...pecification': {'Name': 'UMM-G', 'URL': 'https://cdn.earthdata.nasa.gov/umm/granule/v1.6.6', 'Version': '1.6.6'}, ...}}
collection_variables = [{'associations': {'collections': [{'concept-id': 'C1442068508-GES_DISC'}]}, 'meta': {'association-details': {'collect...v1.9.0', 'Version': '1.9.0'}, 'Name': 'PRODUCT/SUPPORT_DATA/DETAILED_RESULTS/fitted_radiance_squeeze_win3', ...}}, ...]
harmony_env = <Environment.PROD: 4>
tmp_path = PosixPath('/tmp/pytest-of-runner/pytest-0/popen-gw4/test_spatial_subset_C1442068501')
bearer_token = 'eyJ0eXAiOiJKV1QiLCJvcmlnaW4iOiJFYXJ0aGRhdGEgTG9naW4iLCJzaWciOiJlZGxqd3RwdWJrZXlfb3BzIiwiYWxnIjoiUlMyNTYifQ.eyJ0eXBlIj...Hcf0QWqtHsCuvOtj5tczYDaCn691RlCxRjaMlZBPYm2O9z5cTN31ynn1hy4h8lXYRR_I6DfCAdmdtrIdlLaMNL-ZbKOjYgx5kEqU8ClqAQnFPDVYJL29Hw'

    @pytest.mark.timeout(600)
    def test_spatial_subset(collection_concept_id, env, granule_json, collection_variables,
                            harmony_env, tmp_path: pathlib.Path, bearer_token):
        test_spatial_subset.__doc__ = f"Verify spatial subset for {collection_concept_id} in {env}"
    
        logging.info("Using granule %s for test", granule_json['meta']['concept-id'])
    
        # Compute a box that is smaller than the granule extent bounding box
        north, south, east, west = get_bounding_box(granule_json)
        east, west, north, south = create_smaller_bounding_box(east, west, north, south, .95)
    
        start_time = granule_json['umm']["TemporalExtent"]["RangeDateTime"]["BeginningDateTime"]
        end_time = granule_json['umm']["TemporalExtent"]["RangeDateTime"]["EndingDateTime"]
    
        # Build harmony request
        harmony_client = harmony.Client(env=harmony_env, token=bearer_token)
        request_bbox = harmony.BBox(w=west, s=south, e=east, n=north)
        request_collection = harmony.Collection(id=collection_concept_id)
        harmony_request = harmony.Request(collection=request_collection, spatial=request_bbox,
                                          granule_id=[granule_json['meta']['concept-id']])
    
        logging.info("Sending harmony request %s", harmony_client.request_as_url(harmony_request))
    
        # Submit harmony request and download result
        job_id = harmony_client.submit(harmony_request)
        logging.info("Submitted harmony job %s", job_id)
        harmony_client.wait_for_processing(job_id, show_progress=True)
        subsetted_filepath = None
        for filename in [file_future.result()
                         for file_future
                         in harmony_client.download_all(job_id, directory=f'{tmp_path}', overwrite=True)]:
            logging.info(f'Downloaded: %s', filename)
            subsetted_filepath = pathlib.Path(filename)
    
        # Verify spatial subset worked
        subsetted_ds = xarray.open_dataset(subsetted_filepath, decode_times=False)
        group = None
        # Try to read group in file
        lat_var_name, lon_var_name = get_lat_lon_var_names(subsetted_ds, subsetted_filepath, collection_variables, collection_concept_id)
        lat_var_name = lat_var_name.split('/')[-1]
        lon_var_name = lon_var_name.split('/')[-1]
    
        with netCDF4.Dataset(subsetted_filepath) as f:
            group_list = []
            def group_walk(groups, nc_d, current_group):
                global subsetted_ds_new
                subsetted_ds_new = None
                # check if the top group has lat or lon variable
                if lat_var_name in list(nc_d.variables.keys()):
                    subsetted_ds_new = subsetted_ds
                else:
                    # if not then we'll need to keep track of the group layers
                    group_list.append(current_group)
    
                # loop through the groups in the current layer
                for g in groups:
                    # end the loop if we've already found latitude
                    if subsetted_ds_new:
                        break
                    # check if the groups have latitude, define the dataset and end the loop if found
                    if lat_var_name in list(nc_d.groups[g].variables.keys()):
                        group_list.append(g)
                        lat_group = '/'.join(group_list)
                        subsetted_ds_new = xarray.open_dataset(subsetted_filepath, group=lat_group, decode_times=False)
                        # add a science variable to the dataset if other groups are in the lat/lon group
                        # some GPM collections won't have any other variables in the same group as lat/lon
                        if len(list(nc_d.groups[g].groups.keys())) > 0:
                            data_group = [v for v in list(nc_d.groups[g].groups.keys()) if 'time' not in str(v).lower()][0]
                            g_data = lat_group+'/'+data_group
                            subsetted_ds_data = xarray.open_dataset(subsetted_filepath, group=g_data, decode_times=False)
                            sci_var = list(subsetted_ds_data.variables.keys())[0]
                            subsetted_ds_new['science_test'] = subsetted_ds_data[sci_var]
                        break
                    # recall the function on a group that has groups in it and didn't find latitude
                    # this is going 'deeper' into the groups
                    if len(list(nc_d.groups[g].groups.keys())) > 0:
                        group_walk(nc_d.groups[g].groups, nc_d.groups[g], g)
                    else:
                        continue
    
>           group_walk(f.groups, f, '')

verify_collection.py:448: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

groups = {'METADATA': <[AttributeError('NetCDF: Not a valid ID') raised in repr()] Group object at 0x7f4aaffae540>, 'PRODUCT': <[RuntimeError('NetCDF: Not a valid ID') raised in repr()] Group object at 0x7f4aaffad440>}
nc_d = <[AttributeError('NetCDF: Not a valid ID') raised in repr()] Dataset object at 0x7f4aaffad140>
current_group = ''

    def group_walk(groups, nc_d, current_group):
        global subsetted_ds_new
        subsetted_ds_new = None
        # check if the top group has lat or lon variable
        if lat_var_name in list(nc_d.variables.keys()):
            subsetted_ds_new = subsetted_ds
        else:
            # if not then we'll need to keep track of the group layers
            group_list.append(current_group)
    
        # loop through the groups in the current layer
        for g in groups:
            # end the loop if we've already found latitude
            if subsetted_ds_new:
                break
            # check if the groups have latitude, define the dataset and end the loop if found
            if lat_var_name in list(nc_d.groups[g].variables.keys()):
                group_list.append(g)
                lat_group = '/'.join(group_list)
                subsetted_ds_new = xarray.open_dataset(subsetted_filepath, group=lat_group, decode_times=False)
                # add a science variable to the dataset if other groups are in the lat/lon group
                # some GPM collections won't have any other variables in the same group as lat/lon
                if len(list(nc_d.groups[g].groups.keys())) > 0:
                    data_group = [v for v in list(nc_d.groups[g].groups.keys()) if 'time' not in str(v).lower()][0]
                    g_data = lat_group+'/'+data_group
                    subsetted_ds_data = xarray.open_dataset(subsetted_filepath, group=g_data, decode_times=False)
>                   sci_var = list(subsetted_ds_data.variables.keys())[0]
E                   IndexError: list index out of range

verify_collection.py:438: IndexError
--------------------------------- Captured Log ---------------------------------
INFO     root:verify_collection.py:373 Using granule G1628710396-GES_DISC for test
INFO     root:verify_collection.py:389 Sending harmony request https://harmony.earthdata.nasa.gov/C1442068508-GES_DISC/ogc-api-coverages/1.0.0/collections/all/coverage/rangeset?forceAsync=true&subset=lat%28-82.265975%3A-63.873025000000005%29&subset=lon%28-112.057275%3A162.74827499999998%29&granuleId=G1628710396-GES_DISC
INFO     root:verify_collection.py:393 Submitted harmony job ad2778b1-0e24-4783-abfb-fdff2022c2c5
INFO     root:verify_collection.py:399 Downloaded: /tmp/pytest-of-runner/pytest-0/popen-gw4/test_spatial_subset_C1442068501/78002602_S5P_OFFL_L2_SO2_20190806T003836_20190806T022006_09387_01_010107_20190812T085130_subsetted.nc4

Check warning on line 0 in tests.verify_collection

github-actions / Regression test results for ops

test_spatial_subset[C2179081549-GES_DISC] (tests.verify_collection) failed

test-results/ops_test_report.xml [took 46s]

Raw output


            IndexError: list index out of range
collection_concept_id = 'C2179081549-GES_DISC', env = 'ops'
granule_json = {'meta': {'collection-concept-id': 'C2179081549-GES_DISC', 'concept-id': 'G3247892495-GES_DISC', 'concept-type': 'gran...pecification': {'Name': 'UMM-G', 'URL': 'https://cdn.earthdata.nasa.gov/umm/granule/v1.6.6', 'Version': '1.6.6'}, ...}}
collection_variables = [{'associations': {'collections': [{'concept-id': 'C2179081549-GES_DISC'}]}, 'meta': {'association-details': {'collect...escription': 'Extracted from _FillValue metadata attribute', 'Type': 'SCIENCE_FILLVALUE', 'Value': -9999}], ...}}, ...]
harmony_env = <Environment.PROD: 4>
tmp_path = PosixPath('/tmp/pytest-of-runner/pytest-0/popen-gw2/test_spatial_subset_C2179081540')
bearer_token = 'eyJ0eXAiOiJKV1QiLCJvcmlnaW4iOiJFYXJ0aGRhdGEgTG9naW4iLCJzaWciOiJlZGxqd3RwdWJrZXlfb3BzIiwiYWxnIjoiUlMyNTYifQ.eyJ0eXBlIj...Hcf0QWqtHsCuvOtj5tczYDaCn691RlCxRjaMlZBPYm2O9z5cTN31ynn1hy4h8lXYRR_I6DfCAdmdtrIdlLaMNL-ZbKOjYgx5kEqU8ClqAQnFPDVYJL29Hw'

    @pytest.mark.timeout(600)
    def test_spatial_subset(collection_concept_id, env, granule_json, collection_variables,
                            harmony_env, tmp_path: pathlib.Path, bearer_token):
        test_spatial_subset.__doc__ = f"Verify spatial subset for {collection_concept_id} in {env}"
    
        logging.info("Using granule %s for test", granule_json['meta']['concept-id'])
    
        # Compute a box that is smaller than the granule extent bounding box
        north, south, east, west = get_bounding_box(granule_json)
        east, west, north, south = create_smaller_bounding_box(east, west, north, south, .95)
    
        start_time = granule_json['umm']["TemporalExtent"]["RangeDateTime"]["BeginningDateTime"]
        end_time = granule_json['umm']["TemporalExtent"]["RangeDateTime"]["EndingDateTime"]
    
        # Build harmony request
        harmony_client = harmony.Client(env=harmony_env, token=bearer_token)
        request_bbox = harmony.BBox(w=west, s=south, e=east, n=north)
        request_collection = harmony.Collection(id=collection_concept_id)
        harmony_request = harmony.Request(collection=request_collection, spatial=request_bbox,
                                          granule_id=[granule_json['meta']['concept-id']])
    
        logging.info("Sending harmony request %s", harmony_client.request_as_url(harmony_request))
    
        # Submit harmony request and download result
        job_id = harmony_client.submit(harmony_request)
        logging.info("Submitted harmony job %s", job_id)
        harmony_client.wait_for_processing(job_id, show_progress=True)
        subsetted_filepath = None
        for filename in [file_future.result()
                         for file_future
                         in harmony_client.download_all(job_id, directory=f'{tmp_path}', overwrite=True)]:
            logging.info(f'Downloaded: %s', filename)
            subsetted_filepath = pathlib.Path(filename)
    
        # Verify spatial subset worked
        subsetted_ds = xarray.open_dataset(subsetted_filepath, decode_times=False)
        group = None
        # Try to read group in file
        lat_var_name, lon_var_name = get_lat_lon_var_names(subsetted_ds, subsetted_filepath, collection_variables, collection_concept_id)
        lat_var_name = lat_var_name.split('/')[-1]
        lon_var_name = lon_var_name.split('/')[-1]
    
        with netCDF4.Dataset(subsetted_filepath) as f:
            group_list = []
            def group_walk(groups, nc_d, current_group):
                global subsetted_ds_new
                subsetted_ds_new = None
                # check if the top group has lat or lon variable
                if lat_var_name in list(nc_d.variables.keys()):
                    subsetted_ds_new = subsetted_ds
                else:
                    # if not then we'll need to keep track of the group layers
                    group_list.append(current_group)
    
                # loop through the groups in the current layer
                for g in groups:
                    # end the loop if we've already found latitude
                    if subsetted_ds_new:
                        break
                    # check if the groups have latitude, define the dataset and end the loop if found
                    if lat_var_name in list(nc_d.groups[g].variables.keys()):
                        group_list.append(g)
                        lat_group = '/'.join(group_list)
                        subsetted_ds_new = xarray.open_dataset(subsetted_filepath, group=lat_group, decode_times=False)
                        # add a science variable to the dataset if other groups are in the lat/lon group
                        # some GPM collections won't have any other variables in the same group as lat/lon
                        if len(list(nc_d.groups[g].groups.keys())) > 0:
                            data_group = [v for v in list(nc_d.groups[g].groups.keys()) if 'time' not in str(v).lower()][0]
                            g_data = lat_group+'/'+data_group
                            subsetted_ds_data = xarray.open_dataset(subsetted_filepath, group=g_data, decode_times=False)
                            sci_var = list(subsetted_ds_data.variables.keys())[0]
                            subsetted_ds_new['science_test'] = subsetted_ds_data[sci_var]
                        break
                    # recall the function on a group that has groups in it and didn't find latitude
                    # this is going 'deeper' into the groups
                    if len(list(nc_d.groups[g].groups.keys())) > 0:
                        group_walk(nc_d.groups[g].groups, nc_d.groups[g], g)
                    else:
                        continue
    
>           group_walk(f.groups, f, '')

verify_collection.py:448: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

groups = {'Swath': <[RuntimeError('NetCDF: Not a valid ID') raised in repr()] Group object at 0x7f83a012d940>}
nc_d = <[RuntimeError('NetCDF: Not a valid ID') raised in repr()] Dataset object at 0x7f83a012d840>
current_group = ''

    def group_walk(groups, nc_d, current_group):
        global subsetted_ds_new
        subsetted_ds_new = None
        # check if the top group has lat or lon variable
        if lat_var_name in list(nc_d.variables.keys()):
            subsetted_ds_new = subsetted_ds
        else:
            # if not then we'll need to keep track of the group layers
            group_list.append(current_group)
    
        # loop through the groups in the current layer
        for g in groups:
            # end the loop if we've already found latitude
            if subsetted_ds_new:
                break
            # check if the groups have latitude, define the dataset and end the loop if found
            if lat_var_name in list(nc_d.groups[g].variables.keys()):
                group_list.append(g)
                lat_group = '/'.join(group_list)
                subsetted_ds_new = xarray.open_dataset(subsetted_filepath, group=lat_group, decode_times=False)
                # add a science variable to the dataset if other groups are in the lat/lon group
                # some GPM collections won't have any other variables in the same group as lat/lon
                if len(list(nc_d.groups[g].groups.keys())) > 0:
>                   data_group = [v for v in list(nc_d.groups[g].groups.keys()) if 'time' not in str(v).lower()][0]
E                   IndexError: list index out of range

verify_collection.py:435: IndexError
--------------------------------- Captured Log ---------------------------------
INFO     root:verify_collection.py:373 Using granule G3247892495-GES_DISC for test
INFO     root:verify_collection.py:389 Sending harmony request https://harmony.earthdata.nasa.gov/C2179081549-GES_DISC/ogc-api-coverages/1.0.0/collections/all/coverage/rangeset?forceAsync=true&subset=lat%28-66.24214475000001%3A-60.47316525%29&subset=lon%28-156.89747825%3A-126.62547175%29&granuleId=G3247892495-GES_DISC
INFO     root:verify_collection.py:393 Submitted harmony job 0b4cbfc9-be3b-4fa0-b268-3d26ae72bf38
INFO     root:verify_collection.py:399 Downloaded: /tmp/pytest-of-runner/pytest-0/popen-gw2/test_spatial_subset_C2179081540/78002613_2A.GPM.DPR.GPM-SLH.20240923-S214728-E232040.060034.V07C_subsetted.nc4
WARNING  root:verify_collection.py:302 Unable to find lat/lon vars in UMM-Var
WARNING  root:verify_collection.py:312 Unable to find lat/lon vars using cf_xarray

Check warning on line 0 in tests.verify_collection

github-actions / Regression test results for ops

test_spatial_subset[C1442068511-GES_DISC] (tests.verify_collection) failed

test-results/ops_test_report.xml [took 44s]

Raw output


            IndexError: list index out of range
collection_concept_id = 'C1442068511-GES_DISC', env = 'ops'
granule_json = {'meta': {'collection-concept-id': 'C1442068511-GES_DISC', 'concept-id': 'G1629705055-GES_DISC', 'concept-type': 'gran...pecification': {'Name': 'UMM-G', 'URL': 'https://cdn.earthdata.nasa.gov/umm/granule/v1.6.6', 'Version': '1.6.6'}, ...}}
collection_variables = [{'associations': {'collections': [{'concept-id': 'C1442068511-GES_DISC'}]}, 'meta': {'association-details': {'collect...e/v1.9.0', 'Version': '1.9.0'}, 'Name': 'METADATA/QA_STATISTICS/nitrogendioxide_tropospheric_column_pdf_bounds'}}, ...]
harmony_env = <Environment.PROD: 4>
tmp_path = PosixPath('/tmp/pytest-of-runner/pytest-0/popen-gw4/test_spatial_subset_C1442068510')
bearer_token = 'eyJ0eXAiOiJKV1QiLCJvcmlnaW4iOiJFYXJ0aGRhdGEgTG9naW4iLCJzaWciOiJlZGxqd3RwdWJrZXlfb3BzIiwiYWxnIjoiUlMyNTYifQ.eyJ0eXBlIj...Hcf0QWqtHsCuvOtj5tczYDaCn691RlCxRjaMlZBPYm2O9z5cTN31ynn1hy4h8lXYRR_I6DfCAdmdtrIdlLaMNL-ZbKOjYgx5kEqU8ClqAQnFPDVYJL29Hw'

    @pytest.mark.timeout(600)
    def test_spatial_subset(collection_concept_id, env, granule_json, collection_variables,
                            harmony_env, tmp_path: pathlib.Path, bearer_token):
        test_spatial_subset.__doc__ = f"Verify spatial subset for {collection_concept_id} in {env}"
    
        logging.info("Using granule %s for test", granule_json['meta']['concept-id'])
    
        # Compute a box that is smaller than the granule extent bounding box
        north, south, east, west = get_bounding_box(granule_json)
        east, west, north, south = create_smaller_bounding_box(east, west, north, south, .95)
    
        start_time = granule_json['umm']["TemporalExtent"]["RangeDateTime"]["BeginningDateTime"]
        end_time = granule_json['umm']["TemporalExtent"]["RangeDateTime"]["EndingDateTime"]
    
        # Build harmony request
        harmony_client = harmony.Client(env=harmony_env, token=bearer_token)
        request_bbox = harmony.BBox(w=west, s=south, e=east, n=north)
        request_collection = harmony.Collection(id=collection_concept_id)
        harmony_request = harmony.Request(collection=request_collection, spatial=request_bbox,
                                          granule_id=[granule_json['meta']['concept-id']])
    
        logging.info("Sending harmony request %s", harmony_client.request_as_url(harmony_request))
    
        # Submit harmony request and download result
        job_id = harmony_client.submit(harmony_request)
        logging.info("Submitted harmony job %s", job_id)
        harmony_client.wait_for_processing(job_id, show_progress=True)
        subsetted_filepath = None
        for filename in [file_future.result()
                         for file_future
                         in harmony_client.download_all(job_id, directory=f'{tmp_path}', overwrite=True)]:
            logging.info(f'Downloaded: %s', filename)
            subsetted_filepath = pathlib.Path(filename)
    
        # Verify spatial subset worked
        subsetted_ds = xarray.open_dataset(subsetted_filepath, decode_times=False)
        group = None
        # Try to read group in file
        lat_var_name, lon_var_name = get_lat_lon_var_names(subsetted_ds, subsetted_filepath, collection_variables, collection_concept_id)
        lat_var_name = lat_var_name.split('/')[-1]
        lon_var_name = lon_var_name.split('/')[-1]
    
        with netCDF4.Dataset(subsetted_filepath) as f:
            group_list = []
            def group_walk(groups, nc_d, current_group):
                global subsetted_ds_new
                subsetted_ds_new = None
                # check if the top group has lat or lon variable
                if lat_var_name in list(nc_d.variables.keys()):
                    subsetted_ds_new = subsetted_ds
                else:
                    # if not then we'll need to keep track of the group layers
                    group_list.append(current_group)
    
                # loop through the groups in the current layer
                for g in groups:
                    # end the loop if we've already found latitude
                    if subsetted_ds_new:
                        break
                    # check if the groups have latitude, define the dataset and end the loop if found
                    if lat_var_name in list(nc_d.groups[g].variables.keys()):
                        group_list.append(g)
                        lat_group = '/'.join(group_list)
                        subsetted_ds_new = xarray.open_dataset(subsetted_filepath, group=lat_group, decode_times=False)
                        # add a science variable to the dataset if other groups are in the lat/lon group
                        # some GPM collections won't have any other variables in the same group as lat/lon
                        if len(list(nc_d.groups[g].groups.keys())) > 0:
                            data_group = [v for v in list(nc_d.groups[g].groups.keys()) if 'time' not in str(v).lower()][0]
                            g_data = lat_group+'/'+data_group
                            subsetted_ds_data = xarray.open_dataset(subsetted_filepath, group=g_data, decode_times=False)
                            sci_var = list(subsetted_ds_data.variables.keys())[0]
                            subsetted_ds_new['science_test'] = subsetted_ds_data[sci_var]
                        break
                    # recall the function on a group that has groups in it and didn't find latitude
                    # this is going 'deeper' into the groups
                    if len(list(nc_d.groups[g].groups.keys())) > 0:
                        group_walk(nc_d.groups[g].groups, nc_d.groups[g], g)
                    else:
                        continue
    
>           group_walk(f.groups, f, '')

verify_collection.py:448: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

groups = {'METADATA': <[AttributeError('NetCDF: Not a valid ID') raised in repr()] Group object at 0x7f4aaedfd840>, 'PRODUCT': <[RuntimeError('NetCDF: Not a valid ID') raised in repr()] Group object at 0x7f4aaedfd340>}
nc_d = <[AttributeError('NetCDF: Not a valid ID') raised in repr()] Dataset object at 0x7f4aaedfd240>
current_group = ''

    def group_walk(groups, nc_d, current_group):
        global subsetted_ds_new
        subsetted_ds_new = None
        # check if the top group has lat or lon variable
        if lat_var_name in list(nc_d.variables.keys()):
            subsetted_ds_new = subsetted_ds
        else:
            # if not then we'll need to keep track of the group layers
            group_list.append(current_group)
    
        # loop through the groups in the current layer
        for g in groups:
            # end the loop if we've already found latitude
            if subsetted_ds_new:
                break
            # check if the groups have latitude, define the dataset and end the loop if found
            if lat_var_name in list(nc_d.groups[g].variables.keys()):
                group_list.append(g)
                lat_group = '/'.join(group_list)
                subsetted_ds_new = xarray.open_dataset(subsetted_filepath, group=lat_group, decode_times=False)
                # add a science variable to the dataset if other groups are in the lat/lon group
                # some GPM collections won't have any other variables in the same group as lat/lon
                if len(list(nc_d.groups[g].groups.keys())) > 0:
                    data_group = [v for v in list(nc_d.groups[g].groups.keys()) if 'time' not in str(v).lower()][0]
                    g_data = lat_group+'/'+data_group
                    subsetted_ds_data = xarray.open_dataset(subsetted_filepath, group=g_data, decode_times=False)
>                   sci_var = list(subsetted_ds_data.variables.keys())[0]
E                   IndexError: list index out of range

verify_collection.py:438: IndexError
--------------------------------- Captured Log ---------------------------------
INFO     root:verify_collection.py:373 Using granule G1629705055-GES_DISC for test
INFO     root:verify_collection.py:389 Sending harmony request https://harmony.earthdata.nasa.gov/C1442068511-GES_DISC/ogc-api-coverages/1.0.0/collections/all/coverage/rangeset?forceAsync=true&subset=lat%28-82.265975%3A-63.873025000000005%29&subset=lon%28-112.057275%3A162.74827499999998%29&granuleId=G1629705055-GES_DISC
INFO     root:verify_collection.py:393 Submitted harmony job fbf5a43f-88f8-4a84-86b0-29b743ad9d52
INFO     root:verify_collection.py:399 Downloaded: /tmp/pytest-of-runner/pytest-0/popen-gw4/test_spatial_subset_C1442068510/78002636_S5P_OFFL_L2_NO2_20190806T003836_20190806T022006_09387_01_010302_20190812T015802_subsetted.nc4

Check warning on line 0 in tests.verify_collection

github-actions / Regression test results for ops

test_spatial_subset[C2087131083-GES_DISC] (tests.verify_collection) failed

test-results/ops_test_report.xml [took 58s]

Raw output


            OSError: [Errno group not found: PRODUCT] 'PRODUCT'
ds = <[AttributeError('NetCDF: Not a valid ID') raised in repr()] Group object at 0x7f83a012e940>
group = '/METADATA/PRODUCT', mode = 'r'
create_group = <function _netcdf4_create_group at 0x7f83a8d2f1c0>

    def _nc4_require_group(ds, group, mode, create_group=_netcdf4_create_group):
        if group in {None, "", "/"}:
            # use the root group
            return ds
        else:
            # make sure it's a string
            if not isinstance(group, str):
                raise ValueError("group must be a string or None")
            # support path-like syntax
            path = group.strip("/").split("/")
            for key in path:
                try:
>                   ds = ds.groups[key]
E                   KeyError: 'PRODUCT'

../../../../.cache/pypoetry/virtualenvs/l2ss-py-autotest-iYz8Sff2-py3.10/lib/python3.10/site-packages/xarray/backends/netCDF4_.py:190: KeyError

During handling of the above exception, another exception occurred:

collection_concept_id = 'C2087131083-GES_DISC', env = 'ops'
granule_json = {'meta': {'collection-concept-id': 'C2087131083-GES_DISC', 'concept-id': 'G3248340660-GES_DISC', 'concept-type': 'gran...pecification': {'Name': 'UMM-G', 'URL': 'https://cdn.earthdata.nasa.gov/umm/granule/v1.6.6', 'Version': '1.6.6'}, ...}}
collection_variables = [{'associations': {'collections': [{'concept-id': 'C2087131083-GES_DISC'}]}, 'meta': {'association-details': {'collect.../variable/v1.9.0', 'Version': '1.9.0'}, 'Name': 'METADATA/QA_STATISTICS/aerosol_index_354_388_histogram_bounds'}}, ...]
harmony_env = <Environment.PROD: 4>
tmp_path = PosixPath('/tmp/pytest-of-runner/pytest-0/popen-gw2/test_spatial_subset_C2087131080')
bearer_token = 'eyJ0eXAiOiJKV1QiLCJvcmlnaW4iOiJFYXJ0aGRhdGEgTG9naW4iLCJzaWciOiJlZGxqd3RwdWJrZXlfb3BzIiwiYWxnIjoiUlMyNTYifQ.eyJ0eXBlIj...Hcf0QWqtHsCuvOtj5tczYDaCn691RlCxRjaMlZBPYm2O9z5cTN31ynn1hy4h8lXYRR_I6DfCAdmdtrIdlLaMNL-ZbKOjYgx5kEqU8ClqAQnFPDVYJL29Hw'

    @pytest.mark.timeout(600)
    def test_spatial_subset(collection_concept_id, env, granule_json, collection_variables,
                            harmony_env, tmp_path: pathlib.Path, bearer_token):
        test_spatial_subset.__doc__ = f"Verify spatial subset for {collection_concept_id} in {env}"
    
        logging.info("Using granule %s for test", granule_json['meta']['concept-id'])
    
        # Compute a box that is smaller than the granule extent bounding box
        north, south, east, west = get_bounding_box(granule_json)
        east, west, north, south = create_smaller_bounding_box(east, west, north, south, .95)
    
        start_time = granule_json['umm']["TemporalExtent"]["RangeDateTime"]["BeginningDateTime"]
        end_time = granule_json['umm']["TemporalExtent"]["RangeDateTime"]["EndingDateTime"]
    
        # Build harmony request
        harmony_client = harmony.Client(env=harmony_env, token=bearer_token)
        request_bbox = harmony.BBox(w=west, s=south, e=east, n=north)
        request_collection = harmony.Collection(id=collection_concept_id)
        harmony_request = harmony.Request(collection=request_collection, spatial=request_bbox,
                                          granule_id=[granule_json['meta']['concept-id']])
    
        logging.info("Sending harmony request %s", harmony_client.request_as_url(harmony_request))
    
        # Submit harmony request and download result
        job_id = harmony_client.submit(harmony_request)
        logging.info("Submitted harmony job %s", job_id)
        harmony_client.wait_for_processing(job_id, show_progress=True)
        subsetted_filepath = None
        for filename in [file_future.result()
                         for file_future
                         in harmony_client.download_all(job_id, directory=f'{tmp_path}', overwrite=True)]:
            logging.info(f'Downloaded: %s', filename)
            subsetted_filepath = pathlib.Path(filename)
    
        # Verify spatial subset worked
        subsetted_ds = xarray.open_dataset(subsetted_filepath, decode_times=False)
        group = None
        # Try to read group in file
        lat_var_name, lon_var_name = get_lat_lon_var_names(subsetted_ds, subsetted_filepath, collection_variables, collection_concept_id)
        lat_var_name = lat_var_name.split('/')[-1]
        lon_var_name = lon_var_name.split('/')[-1]
    
        with netCDF4.Dataset(subsetted_filepath) as f:
            group_list = []
            def group_walk(groups, nc_d, current_group):
                global subsetted_ds_new
                subsetted_ds_new = None
                # check if the top group has lat or lon variable
                if lat_var_name in list(nc_d.variables.keys()):
                    subsetted_ds_new = subsetted_ds
                else:
                    # if not then we'll need to keep track of the group layers
                    group_list.append(current_group)
    
                # loop through the groups in the current layer
                for g in groups:
                    # end the loop if we've already found latitude
                    if subsetted_ds_new:
                        break
                    # check if the groups have latitude, define the dataset and end the loop if found
                    if lat_var_name in list(nc_d.groups[g].variables.keys()):
                        group_list.append(g)
                        lat_group = '/'.join(group_list)
                        subsetted_ds_new = xarray.open_dataset(subsetted_filepath, group=lat_group, decode_times=False)
                        # add a science variable to the dataset if other groups are in the lat/lon group
                        # some GPM collections won't have any other variables in the same group as lat/lon
                        if len(list(nc_d.groups[g].groups.keys())) > 0:
                            data_group = [v for v in list(nc_d.groups[g].groups.keys()) if 'time' not in str(v).lower()][0]
                            g_data = lat_group+'/'+data_group
                            subsetted_ds_data = xarray.open_dataset(subsetted_filepath, group=g_data, decode_times=False)
                            sci_var = list(subsetted_ds_data.variables.keys())[0]
                            subsetted_ds_new['science_test'] = subsetted_ds_data[sci_var]
                        break
                    # recall the function on a group that has groups in it and didn't find latitude
                    # this is going 'deeper' into the groups
                    if len(list(nc_d.groups[g].groups.keys())) > 0:
                        group_walk(nc_d.groups[g].groups, nc_d.groups[g], g)
                    else:
                        continue
    
>           group_walk(f.groups, f, '')

verify_collection.py:448: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
verify_collection.py:431: in group_walk
    subsetted_ds_new = xarray.open_dataset(subsetted_filepath, group=lat_group, decode_times=False)
../../../../.cache/pypoetry/virtualenvs/l2ss-py-autotest-iYz8Sff2-py3.10/lib/python3.10/site-packages/xarray/backends/api.py:571: in open_dataset
    backend_ds = backend.open_dataset(
../../../../.cache/pypoetry/virtualenvs/l2ss-py-autotest-iYz8Sff2-py3.10/lib/python3.10/site-packages/xarray/backends/netCDF4_.py:646: in open_dataset
    store = NetCDF4DataStore.open(
../../../../.cache/pypoetry/virtualenvs/l2ss-py-autotest-iYz8Sff2-py3.10/lib/python3.10/site-packages/xarray/backends/netCDF4_.py:409: in open
    return cls(manager, group=group, mode=mode, lock=lock, autoclose=autoclose)
../../../../.cache/pypoetry/virtualenvs/l2ss-py-autotest-iYz8Sff2-py3.10/lib/python3.10/site-packages/xarray/backends/netCDF4_.py:356: in __init__
    self.format = self.ds.data_model
../../../../.cache/pypoetry/virtualenvs/l2ss-py-autotest-iYz8Sff2-py3.10/lib/python3.10/site-packages/xarray/backends/netCDF4_.py:418: in ds
    return self._acquire()
../../../../.cache/pypoetry/virtualenvs/l2ss-py-autotest-iYz8Sff2-py3.10/lib/python3.10/site-packages/xarray/backends/netCDF4_.py:413: in _acquire
    ds = _nc4_require_group(root, self._group, self._mode)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

ds = <[AttributeError('NetCDF: Not a valid ID') raised in repr()] Group object at 0x7f83a012e940>
group = '/METADATA/PRODUCT', mode = 'r'
create_group = <function _netcdf4_create_group at 0x7f83a8d2f1c0>

    def _nc4_require_group(ds, group, mode, create_group=_netcdf4_create_group):
        if group in {None, "", "/"}:
            # use the root group
            return ds
        else:
            # make sure it's a string
            if not isinstance(group, str):
                raise ValueError("group must be a string or None")
            # support path-like syntax
            path = group.strip("/").split("/")
            for key in path:
                try:
                    ds = ds.groups[key]
                except KeyError as e:
                    if mode != "r":
                        ds = create_group(ds, key)
                    else:
                        # wrap error to provide slightly more helpful message
>                       raise OSError(f"group not found: {key}", e)
E                       OSError: [Errno group not found: PRODUCT] 'PRODUCT'

../../../../.cache/pypoetry/virtualenvs/l2ss-py-autotest-iYz8Sff2-py3.10/lib/python3.10/site-packages/xarray/backends/netCDF4_.py:196: OSError
--------------------------------- Captured Log ---------------------------------
INFO     root:verify_collection.py:373 Using granule G3248340660-GES_DISC for test
INFO     root:verify_collection.py:389 Sending harmony request https://harmony.earthdata.nasa.gov/C2087131083-GES_DISC/ogc-api-coverages/1.0.0/collections/all/coverage/rangeset?forceAsync=true&subset=lat%28-85.038375%3A-66.39462499999999%29&subset=lon%28-125.88400000000001%3A169.49%29&granuleId=G3248340660-GES_DISC
INFO     root:verify_collection.py:393 Submitted harmony job 3f6bbf67-08cf-4ba1-aea1-0e01059b8370
INFO     root:verify_collection.py:399 Downloaded: /tmp/pytest-of-runner/pytest-0/popen-gw2/test_spatial_subset_C2087131080/78002644_S5P_OFFL_L2_AER_AI_20240923T145023_20240923T163152_35997_03_020701_20240925T043203_subsetted.nc4

View more details on GitHub Actions

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Remove PR old association #63

Regression test results for ops