-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #143 from nansencenter/issue142_tabledap_normalizer
TableDAP normalizer
- Loading branch information
Showing
3 changed files
with
272 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,87 @@ | ||
"""Normalizer for ERDDAP's tabledap data""" | ||
import dateutil.parser | ||
from collections import OrderedDict | ||
|
||
from shapely.geometry import LineString | ||
|
||
import metanorm.utils as utils | ||
from .base import GeoSPaaSMetadataNormalizer | ||
from ...errors import MetadataNormalizationError | ||
|
||
|
||
class TableDAPMetadataNormalizer(GeoSPaaSMetadataNormalizer): | ||
"""Generate the properties of a GeoSPaaS Dataset using tabledap | ||
attributes | ||
""" | ||
|
||
@staticmethod | ||
def get_product_attribute(product_metadata, attribute): | ||
"""Extract the value of an attribute from tabledap product | ||
metadata | ||
""" | ||
for row in product_metadata['table']['rows']: | ||
if row[2] == attribute: | ||
return row[4] | ||
raise MetadataNormalizationError(f'Could not find product attribute {attribute}') | ||
|
||
@utils.raises(KeyError) | ||
def check(self, raw_metadata): | ||
return 'tabledap' in raw_metadata.get('url', '') | ||
|
||
@utils.raises(KeyError) | ||
def get_entry_title(self, raw_metadata): | ||
return self.get_product_attribute(raw_metadata['product_metadata'], 'title') | ||
|
||
@utils.raises(KeyError) | ||
def get_entry_id(self, raw_metadata): | ||
return raw_metadata['entry_id'] | ||
|
||
@utils.raises(KeyError) | ||
def get_summary(self, raw_metadata): | ||
return self.get_product_attribute(raw_metadata['product_metadata'], 'summary') | ||
|
||
@utils.raises(KeyError) | ||
def get_time_coverage_start(self, raw_metadata): | ||
return dateutil.parser.parse(raw_metadata['temporal_coverage'][0]) | ||
|
||
@utils.raises(KeyError) | ||
def get_time_coverage_end(self, raw_metadata): | ||
return dateutil.parser.parse(raw_metadata['temporal_coverage'][1]) | ||
|
||
@utils.raises(KeyError) | ||
def get_platform(self, raw_metadata): | ||
source = self.get_product_attribute(raw_metadata['product_metadata'], 'source') | ||
platform = utils.get_gcmd_platform(source) | ||
# backwards conpatibility with older GCMD versions | ||
if platform['Short_Name'] == utils.UNKNOWN and source == 'Argo float': | ||
return OrderedDict([ | ||
('Basis', 'Water-based Platforms'), | ||
('Category', 'Buoys'), | ||
('Sub_Category', 'Unmoored'), | ||
('Short_Name', 'Argo-Float'), | ||
('Long_Name', '')]) | ||
return platform | ||
|
||
def get_instrument(self, raw_metadata): | ||
return utils.get_gcmd_instrument('Unknown') | ||
|
||
@utils.raises(KeyError) | ||
def get_location_geometry(self, raw_metadata): | ||
return raw_metadata['trajectory'] | ||
|
||
@utils.raises(KeyError) | ||
def get_provider(self, raw_metadata): | ||
"""Returns a GCMD-like provider data structure""" | ||
institution = self.get_product_attribute(raw_metadata['product_metadata'], 'institution') | ||
provider = utils.get_gcmd_provider([institution]) | ||
if provider: | ||
return provider | ||
else: | ||
return OrderedDict([ | ||
('Bucket_Level0', 'CONSORTIA/INSTITUTIONS'), | ||
('Bucket_Level1', ''), | ||
('Bucket_Level2', ''), | ||
('Bucket_Level3', ''), | ||
('Short_Name', institution[:100]), | ||
('Long_Name', institution[:250]), | ||
('Data_Center_URL', '')]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,183 @@ | ||
"""Tests for the tabledap normalizer""" | ||
|
||
import unittest | ||
import unittest.mock as mock | ||
from collections import OrderedDict | ||
from datetime import datetime, timezone | ||
|
||
import metanorm.normalizers as normalizers | ||
import metanorm.utils as utils | ||
from metanorm.errors import MetadataNormalizationError | ||
|
||
|
||
class TableDAPMetadataNormalizerTests(unittest.TestCase): | ||
"""Tests for TableDAPMetadataNormalizer""" | ||
|
||
def setUp(self): | ||
self.normalizer = normalizers.TableDAPMetadataNormalizer() | ||
self.empty_raw_metadata = {'product_metadata': {'table': {'rows': []}}} | ||
self.raw_metadata = { | ||
'entry_id': '123456', | ||
'url': 'http://foo/tabledap/bar.json', | ||
'temporal_coverage': ('2023-01-01T00:00:00Z', '2023-01-01T12:47:13Z'), | ||
'trajectory': 'LINESTRING (1 2, 3 4)', | ||
'product_metadata': { | ||
'table': { | ||
'columnNames': [ | ||
"Row Type", "Variable Name", "Attribute Name", "Data Type", "Value"], | ||
'rows': [ | ||
["attribute", "NC_GLOBAL", "cdm_altitude_proxy", "String", "pres"], | ||
["attribute", "NC_GLOBAL", "cdm_data_type", "String", "TrajectoryProfile"], | ||
["attribute", "NC_GLOBAL", "time_coverage_end", "String", | ||
"2026-12-27T14:48:20Z"], | ||
["attribute", "NC_GLOBAL", "time_coverage_start", "String", | ||
"1997-07-28T20:26:20Z"], | ||
["attribute", "NC_GLOBAL", "title", "String", "Argo Float Measurements"], | ||
["attribute", "NC_GLOBAL", "summary", "String", | ||
"Argo float vertical profiles from Coriolis Global Data Assembly Centres"], | ||
["attribute", "NC_GLOBAL", "source", "String", "Argo float"], | ||
["attribute", "NC_GLOBAL", "institution", "String", "Argo"], | ||
] | ||
} | ||
} | ||
} | ||
|
||
def test_get_product_attribute(self): | ||
"""Test getting the value of an attribute from a tabledap | ||
product's metadata | ||
""" | ||
self.assertEqual( | ||
normalizers.TableDAPMetadataNormalizer.get_product_attribute( | ||
self.raw_metadata['product_metadata'], 'cdm_data_type'), | ||
'TrajectoryProfile') | ||
with self.assertRaises(MetadataNormalizationError): | ||
normalizers.TableDAPMetadataNormalizer.get_product_attribute( | ||
self.raw_metadata['product_metadata'], 'foo') | ||
|
||
def test_check(self): | ||
"""Test the checking condition""" | ||
self.assertTrue(self.normalizer.check(self.raw_metadata)) | ||
|
||
self.assertFalse(self.normalizer.check({})) | ||
self.assertFalse(self.normalizer.check({'url': ''})) | ||
self.assertFalse(self.normalizer.check({'url': '/foo/bar/baz.nc'})) | ||
|
||
def test_get_entry_title(self): | ||
"""Test getting the title""" | ||
self.assertEqual(self.normalizer.get_entry_title(self.raw_metadata), | ||
'Argo Float Measurements') | ||
|
||
def test_missing_title(self): | ||
"""A MetadataNormalizationError should be raised if the raw title | ||
is missing | ||
""" | ||
with self.assertRaises(MetadataNormalizationError): | ||
self.normalizer.get_entry_title(self.empty_raw_metadata) | ||
|
||
def test_get_entry_id(self): | ||
"""Test getting the ID""" | ||
self.assertEqual(self.normalizer.get_entry_id(self.raw_metadata), '123456') | ||
|
||
def test_entry_id_error(self): | ||
"""A MetadataNormalizationError should be raised if ID is not found | ||
""" | ||
with self.assertRaises(MetadataNormalizationError): | ||
self.normalizer.get_entry_id(self.empty_raw_metadata) | ||
|
||
def test_summary(self): | ||
"""Test getting the summary""" | ||
self.assertEqual( | ||
self.normalizer.get_summary(self.raw_metadata), | ||
'Argo float vertical profiles from Coriolis Global Data Assembly Centres') | ||
|
||
def test_get_time_coverage_start(self): | ||
"""Test getting the start of the time coverage""" | ||
self.assertEqual( | ||
self.normalizer.get_time_coverage_start(self.raw_metadata), | ||
datetime(year=2023, month=1, day=1, tzinfo=timezone.utc)) | ||
|
||
def test_missing_time_coverage_start(self): | ||
"""A MetadataNormalizationError must be raised when the | ||
time_coverage_start raw attribute is missing | ||
""" | ||
with self.assertRaises(MetadataNormalizationError): | ||
self.normalizer.get_time_coverage_start(self.empty_raw_metadata) | ||
|
||
def test_get_time_coverage_end(self): | ||
"""Test getting the end of the time coverage""" | ||
self.assertEqual( | ||
self.normalizer.get_time_coverage_end(self.raw_metadata), | ||
datetime(year=2023, month=1, day=1, hour=12, minute=47, second=13, tzinfo=timezone.utc)) | ||
|
||
def test_missing_time_coverage_end(self): | ||
"""A MetadataNormalizationError must be raised when the | ||
time_coverage_end raw attribute is missing | ||
""" | ||
with self.assertRaises(MetadataNormalizationError): | ||
self.normalizer.get_time_coverage_end(self.empty_raw_metadata) | ||
|
||
def test_gcmd_platform(self): | ||
"""Test getting the platform""" | ||
with mock.patch('metanorm.utils.get_gcmd_platform') as mock_get_gcmd_method: | ||
self.assertEqual( | ||
self.normalizer.get_platform(self.raw_metadata), | ||
mock_get_gcmd_method.return_value) | ||
mock_get_gcmd_method.assert_called_with('Argo float') | ||
|
||
def test_gcmd_platform_unknow(self): | ||
"""Test getting the platform with GCMD versions that don't | ||
support ARGO floats | ||
""" | ||
with mock.patch('metanorm.utils.get_gcmd_platform') as mock_get_gcmd_method: | ||
mock_get_gcmd_method.return_value = {'Short_Name': utils.UNKNOWN} | ||
self.assertEqual( | ||
self.normalizer.get_platform(self.raw_metadata), | ||
OrderedDict([ | ||
('Basis', 'Water-based Platforms'), | ||
('Category', 'Buoys'), | ||
('Sub_Category', 'Unmoored'), | ||
('Short_Name', 'Argo-Float'), | ||
('Long_Name', '')])) | ||
|
||
def test_gcmd_instrument(self): | ||
"""Test getting the instrument""" | ||
self.assertEqual( | ||
self.normalizer.get_instrument(self.raw_metadata), | ||
OrderedDict([ | ||
('Category', utils.UNKNOWN), | ||
('Class', utils.UNKNOWN), | ||
('Type', utils.UNKNOWN), | ||
('Subtype', utils.UNKNOWN), | ||
('Short_Name', 'Unknown'), | ||
('Long_Name', 'Unknown')])) | ||
|
||
def test_gcmd_provider(self): | ||
"""Test getting the provider""" | ||
with mock.patch('metanorm.utils.get_gcmd_provider') as mock_get_gcmd_method: | ||
self.assertEqual( | ||
self.normalizer.get_provider(self.raw_metadata), | ||
mock_get_gcmd_method.return_value) | ||
|
||
def test_gcmd_provider_unknow(self): | ||
"""Test getting the provider with GCMD versions that don't | ||
support ARGO floats | ||
""" | ||
with mock.patch('metanorm.utils.get_gcmd_provider', return_value=None): | ||
self.assertEqual( | ||
self.normalizer.get_provider(self.raw_metadata), | ||
OrderedDict([ | ||
('Bucket_Level0', 'CONSORTIA/INSTITUTIONS'), | ||
('Bucket_Level1', ''), | ||
('Bucket_Level2', ''), | ||
('Bucket_Level3', ''), | ||
('Short_Name', 'Argo'), | ||
('Long_Name', 'Argo'), | ||
('Data_Center_URL', '')])) | ||
|
||
def test_get_location_geometry(self): | ||
"""get_location_geometry() should return the location | ||
of the dataset | ||
""" | ||
self.assertEqual( | ||
self.normalizer.get_location_geometry(self.raw_metadata), | ||
'LINESTRING (1 2, 3 4)') |