Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make Earthdata CMR normalizer more generic #144

Merged
merged 6 commits into from
Apr 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 17 additions & 8 deletions metanorm/normalizers/geospaas/earthdata_cmr.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,21 +25,27 @@ def get_entry_title(self, raw_metadata):

@utils.raises((KeyError, IndexError))
def get_entry_id(self, raw_metadata):
return raw_metadata['umm']['DataGranule']['Identifiers'][0]['Identifier'].rstrip('.nc')
try:
return raw_metadata['umm']['DataGranule']['Identifiers'][0]['Identifier'].rstrip('.nc')
except KeyError:
return raw_metadata['umm']['GranuleUR']

@utils.raises((KeyError, IndexError))
def get_summary(self, raw_metadata):
summary_fields = {}
description = ''
umm = raw_metadata['umm']

for platform in umm['Platforms']:
description += (
f"Platform={platform['ShortName']}, " +
', '.join(f"Instrument={i['ShortName']}" for i in platform['Instruments']))
try:
for platform in umm['Platforms']:
description += (
f"Platform={platform['ShortName']}, " +
', '.join(f"Instrument={i['ShortName']}" for i in platform['Instruments']) +
', ')
except KeyError:
pass
description += f"Start date={umm['TemporalExtent']['RangeDateTime']['BeginningDateTime']}"

description += (
f", Start date={umm['TemporalExtent']['RangeDateTime']['BeginningDateTime']}")
summary_fields[utils.SUMMARY_FIELDS['description']] = description

processing_level_match = re.match(
Expand All @@ -66,7 +72,10 @@ def get_platform(self, raw_metadata):
"""Only get the first platform from the raw metadata, because
GeoSPaaS does not support more than one platform per dataset
"""
return utils.get_gcmd_platform(raw_metadata['umm']['Platforms'][0]['ShortName'])
try:
return utils.get_gcmd_platform(raw_metadata['umm']['Platforms'][0]['ShortName'])
except KeyError:
return utils.get_gcmd_platform(utils.UNKNOWN)

@utils.raises((KeyError, IndexError))
def get_instrument(self, raw_metadata):
Expand Down
1 change: 1 addition & 0 deletions metanorm/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ def export_subclasses(package__all__, package_name, package_dir, base_class):
# providers
'ESA/EO': ('ESA',),
'OB.DAAC': ('OB_DAAC',),
'NASA/JPL/PODAAC': ('POCLOUD',),
'C-SAR': ('SAR-C', 'SAR-C SAR'),
'EUMETSAT/OSISAF': ('EUMETSAT OSI SAF',),
'NSIDC': ('NSIDC_ECS',),
Expand Down
40 changes: 36 additions & 4 deletions tests/normalizers/test_earthdata_cmr.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Tests for the ACDD metadata normalizer"""
import unittest
import unittest.mock as mock
from collections import OrderedDict
from datetime import datetime

from dateutil.tz import tzutc
Expand Down Expand Up @@ -38,6 +39,11 @@ def test_entry_id(self):
}
self.assertEqual(self.normalizer.get_entry_id(attributes), 'V2020245000600.L2_SNPP_OC')

def test_entry_id_from_granuleUR(self):
"""Test getting the ID from the GranuleUR field"""
attributes = {'umm': {'GranuleUR': 'foo'}}
self.assertEqual(self.normalizer.get_entry_id(attributes), 'foo')

def test_entry_id_missing_attribute(self):
"""A MetadataNormalizationError must be raised if the raw
attribute is missing
Expand Down Expand Up @@ -109,6 +115,27 @@ def test_summary(self):
'Description: Platform=SUOMI-NPP, ' +
'Instrument=VIIRS, Start date=2020-09-01T00:06:00Z')

def test_summary_no_platform(self):
"""Test getting a summary when no platform info is available
"""
attributes = {
"umm": {
"TemporalExtent": {
"RangeDateTime": {
"BeginningDateTime": "2020-09-01T00:06:00Z",
"EndingDateTime": "2020-09-01T00:11:59Z"
}
},
"CollectionReference": {
"ShortName": "VIIRSN_L2_OC",
"Version": "2018"
}
}
}
self.assertEqual(
self.normalizer.get_summary(attributes),
'Description: Start date=2020-09-01T00:06:00Z;Processing level: 2')

def test_summary_missing_attribute(self):
"""A MetadataNormalizationError must be raised if the raw
attribute is missing
Expand Down Expand Up @@ -189,10 +216,15 @@ def test_platform_missing_attribute(self):
"""A MetadataNormalizationError must be raised if the raw
attribute is missing
"""
with self.assertRaises(MetadataNormalizationError):
self.normalizer.get_platform({})
with self.assertRaises(MetadataNormalizationError):
self.normalizer.get_platform({'umm': {'foo': 'bar'}})
unknown_platform = OrderedDict([
('Category', 'Unknown'),
('Series_Entity', 'Unknown'),
('Short_Name', 'Unknown'),
('Long_Name', 'Unknown')
])
self.assertDictEqual(self.normalizer.get_platform({}), unknown_platform)
self.assertDictEqual(self.normalizer.get_platform({'umm': {'foo': 'bar'}}),
unknown_platform)

def test_instrument(self):
"""Test getting the instrument"""
Expand Down
Loading