From ffee8b20ccc44f06dca2b8376d09a9384f13c25e Mon Sep 17 00:00:00 2001 From: Adrien Perrin Date: Wed, 6 Mar 2024 14:40:04 +0100 Subject: [PATCH 1/6] make earthdata normalizer more generic --- .../normalizers/geospaas/earthdata_cmr.py | 21 +++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/metanorm/normalizers/geospaas/earthdata_cmr.py b/metanorm/normalizers/geospaas/earthdata_cmr.py index d65c603..bfc31c7 100644 --- a/metanorm/normalizers/geospaas/earthdata_cmr.py +++ b/metanorm/normalizers/geospaas/earthdata_cmr.py @@ -25,7 +25,10 @@ def get_entry_title(self, raw_metadata): @utils.raises((KeyError, IndexError)) def get_entry_id(self, raw_metadata): - return raw_metadata['umm']['DataGranule']['Identifiers'][0]['Identifier'].rstrip('.nc') + try: + return raw_metadata['umm']['DataGranule']['Identifiers'][0]['Identifier'].rstrip('.nc') + except KeyError: + return raw_metadata['umm']['GranuleUR'] @utils.raises((KeyError, IndexError)) def get_summary(self, raw_metadata): @@ -33,10 +36,13 @@ def get_summary(self, raw_metadata): description = '' umm = raw_metadata['umm'] - for platform in umm['Platforms']: - description += ( - f"Platform={platform['ShortName']}, " + - ', '.join(f"Instrument={i['ShortName']}" for i in platform['Instruments'])) + try: + for platform in umm['Platforms']: + description += ( + f"Platform={platform['ShortName']}, " + + ', '.join(f"Instrument={i['ShortName']}" for i in platform['Instruments'])) + except KeyError: + pass description += ( f", Start date={umm['TemporalExtent']['RangeDateTime']['BeginningDateTime']}") @@ -66,7 +72,10 @@ def get_platform(self, raw_metadata): """Only get the first platform from the raw metadata, because GeoSPaaS does not support more than one platform per dataset """ - return utils.get_gcmd_platform(raw_metadata['umm']['Platforms'][0]['ShortName']) + try: + return utils.get_gcmd_platform(raw_metadata['umm']['Platforms'][0]['ShortName']) + except KeyError: + return utils.get_gcmd_platform('UNKNOWN') @utils.raises((KeyError, IndexError)) def get_instrument(self, raw_metadata): From 28acf46bfa7e61b361523415cbfd2f16c00ab9d6 Mon Sep 17 00:00:00 2001 From: Adrien Perrin Date: Wed, 17 Apr 2024 10:29:23 +0200 Subject: [PATCH 2/6] add PODAAC pythesint alias --- metanorm/utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/metanorm/utils.py b/metanorm/utils.py index 8d156cc..938d801 100644 --- a/metanorm/utils.py +++ b/metanorm/utils.py @@ -80,6 +80,7 @@ def export_subclasses(package__all__, package_name, package_dir, base_class): # providers 'ESA/EO': ('ESA',), 'OB.DAAC': ('OB_DAAC',), + 'NASA/JPL/PODAAC': ('POCLOUD',), 'C-SAR': ('SAR-C', 'SAR-C SAR'), 'EUMETSAT/OSISAF': ('EUMETSAT OSI SAF',), 'NSIDC': ('NSIDC_ECS',), From 258cb5abe53bd2f21e07f15f2b4f10287e18862d Mon Sep 17 00:00:00 2001 From: Adrien Perrin Date: Wed, 17 Apr 2024 09:42:14 +0000 Subject: [PATCH 3/6] earthdataCMR normalizer: make unknown platform more consistent --- metanorm/normalizers/geospaas/earthdata_cmr.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metanorm/normalizers/geospaas/earthdata_cmr.py b/metanorm/normalizers/geospaas/earthdata_cmr.py index bfc31c7..07db74f 100644 --- a/metanorm/normalizers/geospaas/earthdata_cmr.py +++ b/metanorm/normalizers/geospaas/earthdata_cmr.py @@ -75,7 +75,7 @@ def get_platform(self, raw_metadata): try: return utils.get_gcmd_platform(raw_metadata['umm']['Platforms'][0]['ShortName']) except KeyError: - return utils.get_gcmd_platform('UNKNOWN') + return utils.get_gcmd_platform(utils.UNKNOWN) @utils.raises((KeyError, IndexError)) def get_instrument(self, raw_metadata): From 57e0308c873d779779cbc32dbbbb0839298bac81 Mon Sep 17 00:00:00 2001 From: Adrien Perrin Date: Wed, 17 Apr 2024 09:42:26 +0000 Subject: [PATCH 4/6] fix existing earthdata normalizer test --- tests/normalizers/test_earthdata_cmr.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/tests/normalizers/test_earthdata_cmr.py b/tests/normalizers/test_earthdata_cmr.py index 3235449..da89a84 100644 --- a/tests/normalizers/test_earthdata_cmr.py +++ b/tests/normalizers/test_earthdata_cmr.py @@ -1,6 +1,7 @@ """Tests for the ACDD metadata normalizer""" import unittest import unittest.mock as mock +from collections import OrderedDict from datetime import datetime from dateutil.tz import tzutc @@ -189,10 +190,15 @@ def test_platform_missing_attribute(self): """A MetadataNormalizationError must be raised if the raw attribute is missing """ - with self.assertRaises(MetadataNormalizationError): - self.normalizer.get_platform({}) - with self.assertRaises(MetadataNormalizationError): - self.normalizer.get_platform({'umm': {'foo': 'bar'}}) + unknown_platform = OrderedDict([ + ('Category', 'Unknown'), + ('Series_Entity', 'Unknown'), + ('Short_Name', 'Unknown'), + ('Long_Name', 'Unknown') + ]) + self.assertDictEqual(self.normalizer.get_platform({}), unknown_platform) + self.assertDictEqual(self.normalizer.get_platform({'umm': {'foo': 'bar'}}), + unknown_platform) def test_instrument(self): """Test getting the instrument""" From ebc4c853e831b73f61eb54d4417ad4d74188d7ca Mon Sep 17 00:00:00 2001 From: Adrien Perrin Date: Wed, 17 Apr 2024 10:08:56 +0000 Subject: [PATCH 5/6] fix earthdata CMR summary without platform --- metanorm/normalizers/geospaas/earthdata_cmr.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/metanorm/normalizers/geospaas/earthdata_cmr.py b/metanorm/normalizers/geospaas/earthdata_cmr.py index 07db74f..7b44b6a 100644 --- a/metanorm/normalizers/geospaas/earthdata_cmr.py +++ b/metanorm/normalizers/geospaas/earthdata_cmr.py @@ -40,12 +40,12 @@ def get_summary(self, raw_metadata): for platform in umm['Platforms']: description += ( f"Platform={platform['ShortName']}, " + - ', '.join(f"Instrument={i['ShortName']}" for i in platform['Instruments'])) + ', '.join(f"Instrument={i['ShortName']}" for i in platform['Instruments']) + + ', ') except KeyError: pass + description += f"Start date={umm['TemporalExtent']['RangeDateTime']['BeginningDateTime']}" - description += ( - f", Start date={umm['TemporalExtent']['RangeDateTime']['BeginningDateTime']}") summary_fields[utils.SUMMARY_FIELDS['description']] = description processing_level_match = re.match( From a469ab48f39b75905432b94c5d60f8637e0e1a5a Mon Sep 17 00:00:00 2001 From: Adrien Perrin Date: Wed, 17 Apr 2024 10:09:17 +0000 Subject: [PATCH 6/6] test earthdata CMR summary without platform --- tests/normalizers/test_earthdata_cmr.py | 26 +++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/tests/normalizers/test_earthdata_cmr.py b/tests/normalizers/test_earthdata_cmr.py index da89a84..a6cae65 100644 --- a/tests/normalizers/test_earthdata_cmr.py +++ b/tests/normalizers/test_earthdata_cmr.py @@ -39,6 +39,11 @@ def test_entry_id(self): } self.assertEqual(self.normalizer.get_entry_id(attributes), 'V2020245000600.L2_SNPP_OC') + def test_entry_id_from_granuleUR(self): + """Test getting the ID from the GranuleUR field""" + attributes = {'umm': {'GranuleUR': 'foo'}} + self.assertEqual(self.normalizer.get_entry_id(attributes), 'foo') + def test_entry_id_missing_attribute(self): """A MetadataNormalizationError must be raised if the raw attribute is missing @@ -110,6 +115,27 @@ def test_summary(self): 'Description: Platform=SUOMI-NPP, ' + 'Instrument=VIIRS, Start date=2020-09-01T00:06:00Z') + def test_summary_no_platform(self): + """Test getting a summary when no platform info is available + """ + attributes = { + "umm": { + "TemporalExtent": { + "RangeDateTime": { + "BeginningDateTime": "2020-09-01T00:06:00Z", + "EndingDateTime": "2020-09-01T00:11:59Z" + } + }, + "CollectionReference": { + "ShortName": "VIIRSN_L2_OC", + "Version": "2018" + } + } + } + self.assertEqual( + self.normalizer.get_summary(attributes), + 'Description: Start date=2020-09-01T00:06:00Z;Processing level: 2') + def test_summary_missing_attribute(self): """A MetadataNormalizationError must be raised if the raw attribute is missing