Skip to content

Commit

Permalink
Merge pull request #143 from nansencenter/issue142_tabledap_normalizer
Browse files Browse the repository at this point in the history
TableDAP normalizer
  • Loading branch information
aperrin66 authored Mar 7, 2024
2 parents e541dd4 + dc7776a commit aa68a1d
Show file tree
Hide file tree
Showing 3 changed files with 272 additions and 1 deletion.
87 changes: 87 additions & 0 deletions metanorm/normalizers/geospaas/tabledap.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
"""Normalizer for ERDDAP's tabledap data"""
import dateutil.parser
from collections import OrderedDict

from shapely.geometry import LineString

import metanorm.utils as utils
from .base import GeoSPaaSMetadataNormalizer
from ...errors import MetadataNormalizationError


class TableDAPMetadataNormalizer(GeoSPaaSMetadataNormalizer):
"""Generate the properties of a GeoSPaaS Dataset using tabledap
attributes
"""

@staticmethod
def get_product_attribute(product_metadata, attribute):
"""Extract the value of an attribute from tabledap product
metadata
"""
for row in product_metadata['table']['rows']:
if row[2] == attribute:
return row[4]
raise MetadataNormalizationError(f'Could not find product attribute {attribute}')

@utils.raises(KeyError)
def check(self, raw_metadata):
return 'tabledap' in raw_metadata.get('url', '')

@utils.raises(KeyError)
def get_entry_title(self, raw_metadata):
return self.get_product_attribute(raw_metadata['product_metadata'], 'title')

@utils.raises(KeyError)
def get_entry_id(self, raw_metadata):
return raw_metadata['entry_id']

@utils.raises(KeyError)
def get_summary(self, raw_metadata):
return self.get_product_attribute(raw_metadata['product_metadata'], 'summary')

@utils.raises(KeyError)
def get_time_coverage_start(self, raw_metadata):
return dateutil.parser.parse(raw_metadata['temporal_coverage'][0])

@utils.raises(KeyError)
def get_time_coverage_end(self, raw_metadata):
return dateutil.parser.parse(raw_metadata['temporal_coverage'][1])

@utils.raises(KeyError)
def get_platform(self, raw_metadata):
source = self.get_product_attribute(raw_metadata['product_metadata'], 'source')
platform = utils.get_gcmd_platform(source)
# backwards conpatibility with older GCMD versions
if platform['Short_Name'] == utils.UNKNOWN and source == 'Argo float':
return OrderedDict([
('Basis', 'Water-based Platforms'),
('Category', 'Buoys'),
('Sub_Category', 'Unmoored'),
('Short_Name', 'Argo-Float'),
('Long_Name', '')])
return platform

def get_instrument(self, raw_metadata):
return utils.get_gcmd_instrument('Unknown')

@utils.raises(KeyError)
def get_location_geometry(self, raw_metadata):
return raw_metadata['trajectory']

@utils.raises(KeyError)
def get_provider(self, raw_metadata):
"""Returns a GCMD-like provider data structure"""
institution = self.get_product_attribute(raw_metadata['product_metadata'], 'institution')
provider = utils.get_gcmd_provider([institution])
if provider:
return provider
else:
return OrderedDict([
('Bucket_Level0', 'CONSORTIA/INSTITUTIONS'),
('Bucket_Level1', ''),
('Bucket_Level2', ''),
('Bucket_Level3', ''),
('Short_Name', institution[:100]),
('Long_Name', institution[:250]),
('Data_Center_URL', '')])
3 changes: 2 additions & 1 deletion metanorm/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,12 +76,13 @@ def export_subclasses(package__all__, package_name, package_dir, base_class):
'Sentinel-2B': ('S2B',),
'Sentinel-3A': ('S3A',),
'Sentinel-3B': ('S3B',),
'argo-float': ('Argo float',),
# providers
'ESA/EO': ('ESA',),
'OB.DAAC': ('OB_DAAC',),
'C-SAR': ('SAR-C', 'SAR-C SAR'),
'EUMETSAT/OSISAF': ('EUMETSAT OSI SAF',),
'NSIDC': ('NSIDC_ECS',)
'NSIDC': ('NSIDC_ECS',),
}

def translate_pythesint_keyword(translation_dict, alias):
Expand Down
183 changes: 183 additions & 0 deletions tests/normalizers/test_tabledap.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,183 @@
"""Tests for the tabledap normalizer"""

import unittest
import unittest.mock as mock
from collections import OrderedDict
from datetime import datetime, timezone

import metanorm.normalizers as normalizers
import metanorm.utils as utils
from metanorm.errors import MetadataNormalizationError


class TableDAPMetadataNormalizerTests(unittest.TestCase):
"""Tests for TableDAPMetadataNormalizer"""

def setUp(self):
self.normalizer = normalizers.TableDAPMetadataNormalizer()
self.empty_raw_metadata = {'product_metadata': {'table': {'rows': []}}}
self.raw_metadata = {
'entry_id': '123456',
'url': 'http://foo/tabledap/bar.json',
'temporal_coverage': ('2023-01-01T00:00:00Z', '2023-01-01T12:47:13Z'),
'trajectory': 'LINESTRING (1 2, 3 4)',
'product_metadata': {
'table': {
'columnNames': [
"Row Type", "Variable Name", "Attribute Name", "Data Type", "Value"],
'rows': [
["attribute", "NC_GLOBAL", "cdm_altitude_proxy", "String", "pres"],
["attribute", "NC_GLOBAL", "cdm_data_type", "String", "TrajectoryProfile"],
["attribute", "NC_GLOBAL", "time_coverage_end", "String",
"2026-12-27T14:48:20Z"],
["attribute", "NC_GLOBAL", "time_coverage_start", "String",
"1997-07-28T20:26:20Z"],
["attribute", "NC_GLOBAL", "title", "String", "Argo Float Measurements"],
["attribute", "NC_GLOBAL", "summary", "String",
"Argo float vertical profiles from Coriolis Global Data Assembly Centres"],
["attribute", "NC_GLOBAL", "source", "String", "Argo float"],
["attribute", "NC_GLOBAL", "institution", "String", "Argo"],
]
}
}
}

def test_get_product_attribute(self):
"""Test getting the value of an attribute from a tabledap
product's metadata
"""
self.assertEqual(
normalizers.TableDAPMetadataNormalizer.get_product_attribute(
self.raw_metadata['product_metadata'], 'cdm_data_type'),
'TrajectoryProfile')
with self.assertRaises(MetadataNormalizationError):
normalizers.TableDAPMetadataNormalizer.get_product_attribute(
self.raw_metadata['product_metadata'], 'foo')

def test_check(self):
"""Test the checking condition"""
self.assertTrue(self.normalizer.check(self.raw_metadata))

self.assertFalse(self.normalizer.check({}))
self.assertFalse(self.normalizer.check({'url': ''}))
self.assertFalse(self.normalizer.check({'url': '/foo/bar/baz.nc'}))

def test_get_entry_title(self):
"""Test getting the title"""
self.assertEqual(self.normalizer.get_entry_title(self.raw_metadata),
'Argo Float Measurements')

def test_missing_title(self):
"""A MetadataNormalizationError should be raised if the raw title
is missing
"""
with self.assertRaises(MetadataNormalizationError):
self.normalizer.get_entry_title(self.empty_raw_metadata)

def test_get_entry_id(self):
"""Test getting the ID"""
self.assertEqual(self.normalizer.get_entry_id(self.raw_metadata), '123456')

def test_entry_id_error(self):
"""A MetadataNormalizationError should be raised if ID is not found
"""
with self.assertRaises(MetadataNormalizationError):
self.normalizer.get_entry_id(self.empty_raw_metadata)

def test_summary(self):
"""Test getting the summary"""
self.assertEqual(
self.normalizer.get_summary(self.raw_metadata),
'Argo float vertical profiles from Coriolis Global Data Assembly Centres')

def test_get_time_coverage_start(self):
"""Test getting the start of the time coverage"""
self.assertEqual(
self.normalizer.get_time_coverage_start(self.raw_metadata),
datetime(year=2023, month=1, day=1, tzinfo=timezone.utc))

def test_missing_time_coverage_start(self):
"""A MetadataNormalizationError must be raised when the
time_coverage_start raw attribute is missing
"""
with self.assertRaises(MetadataNormalizationError):
self.normalizer.get_time_coverage_start(self.empty_raw_metadata)

def test_get_time_coverage_end(self):
"""Test getting the end of the time coverage"""
self.assertEqual(
self.normalizer.get_time_coverage_end(self.raw_metadata),
datetime(year=2023, month=1, day=1, hour=12, minute=47, second=13, tzinfo=timezone.utc))

def test_missing_time_coverage_end(self):
"""A MetadataNormalizationError must be raised when the
time_coverage_end raw attribute is missing
"""
with self.assertRaises(MetadataNormalizationError):
self.normalizer.get_time_coverage_end(self.empty_raw_metadata)

def test_gcmd_platform(self):
"""Test getting the platform"""
with mock.patch('metanorm.utils.get_gcmd_platform') as mock_get_gcmd_method:
self.assertEqual(
self.normalizer.get_platform(self.raw_metadata),
mock_get_gcmd_method.return_value)
mock_get_gcmd_method.assert_called_with('Argo float')

def test_gcmd_platform_unknow(self):
"""Test getting the platform with GCMD versions that don't
support ARGO floats
"""
with mock.patch('metanorm.utils.get_gcmd_platform') as mock_get_gcmd_method:
mock_get_gcmd_method.return_value = {'Short_Name': utils.UNKNOWN}
self.assertEqual(
self.normalizer.get_platform(self.raw_metadata),
OrderedDict([
('Basis', 'Water-based Platforms'),
('Category', 'Buoys'),
('Sub_Category', 'Unmoored'),
('Short_Name', 'Argo-Float'),
('Long_Name', '')]))

def test_gcmd_instrument(self):
"""Test getting the instrument"""
self.assertEqual(
self.normalizer.get_instrument(self.raw_metadata),
OrderedDict([
('Category', utils.UNKNOWN),
('Class', utils.UNKNOWN),
('Type', utils.UNKNOWN),
('Subtype', utils.UNKNOWN),
('Short_Name', 'Unknown'),
('Long_Name', 'Unknown')]))

def test_gcmd_provider(self):
"""Test getting the provider"""
with mock.patch('metanorm.utils.get_gcmd_provider') as mock_get_gcmd_method:
self.assertEqual(
self.normalizer.get_provider(self.raw_metadata),
mock_get_gcmd_method.return_value)

def test_gcmd_provider_unknow(self):
"""Test getting the provider with GCMD versions that don't
support ARGO floats
"""
with mock.patch('metanorm.utils.get_gcmd_provider', return_value=None):
self.assertEqual(
self.normalizer.get_provider(self.raw_metadata),
OrderedDict([
('Bucket_Level0', 'CONSORTIA/INSTITUTIONS'),
('Bucket_Level1', ''),
('Bucket_Level2', ''),
('Bucket_Level3', ''),
('Short_Name', 'Argo'),
('Long_Name', 'Argo'),
('Data_Center_URL', '')]))

def test_get_location_geometry(self):
"""get_location_geometry() should return the location
of the dataset
"""
self.assertEqual(
self.normalizer.get_location_geometry(self.raw_metadata),
'LINESTRING (1 2, 3 4)')

0 comments on commit aa68a1d

Please sign in to comment.