Skip to content

Commit

Permalink
Refine system AVUs check and unit tests
Browse files Browse the repository at this point in the history
  • Loading branch information
claravox committed Oct 11, 2024
1 parent d453d43 commit 8567eb4
Show file tree
Hide file tree
Showing 3 changed files with 136 additions and 21 deletions.
2 changes: 1 addition & 1 deletion folder.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,7 @@ def precheck_folder_secure(ctx, coll):
found, last_run = get_last_run_time(ctx, coll)
if (not correct_copytovault_start_status(ctx, coll)
or not correct_copytovault_start_location(coll)
or not misc.last_run_time_acceptable(coll, found, last_run, config.vault_copy_backoff_time)):
or not misc.last_run_time_acceptable(found, last_run, config.vault_copy_backoff_time)):
return False

return True
Expand Down
100 changes: 97 additions & 3 deletions unit-tests/test_util_misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

from misc import check_data_package_system_avus, human_readable_size, last_run_time_acceptable, remove_empty_objects

# AVs of a successfully published data package, that is the first version of the package
avs_success_data_package = {
"org_publication_accessRestriction": "Open - freely retrievable",
"org_publication_anonymousAccess": "yes",
Expand All @@ -35,6 +36,60 @@
"org_publication_versionDOI": "10.00012/UU01-ICGVFV",
"org_publication_versionDOIMinted": "yes",
}

avs_success_data_package_multiversion = {
"org_publication_accessRestriction": "Open - freely retrievable",
"org_publication_anonymousAccess": "yes",
"org_publication_approval_actor": "datamanager#tempZone",
"org_publication_baseDOI": "10.00012/UU01-X0GU3S",
"org_publication_baseDOIMinted": "yes",
"org_publication_baseRandomId": "X0GU3S",
"org_publication_combiJsonPath": "/tempZone/yoda/publication/YU0JDH-combi.json",
"org_publication_dataCiteJsonPath": "/tempZone/yoda/publication/YU0JDH-dataCite.json",
"org_publication_dataCiteMetadataPosted": "yes",
"org_publication_landingPagePath": "/tempZone/yoda/publication/YU0JDH.html",
"org_publication_landingPageUploaded": "yes",
"org_publication_landingPageUrl": "https://public.yoda.test/allinone/UU01/YU0JDH.html",
"org_publication_lastModifiedDateTime": "2024-10-11T08:49:17.000000",
"org_publication_license": "Custom",
"org_publication_oaiUploaded": "yes",
"org_publication_previous_version": "/tempZone/home/vault-initial1/new-group01[1728550839]",
"org_publication_publicationDate": "2024-10-11T08:50:01.812220",
"org_publication_randomId": "YU0JDH",
"org_publication_status": "OK",
"org_publication_submission_actor": "datamanager#tempZone",
"org_publication_vaultPackage": "/tempZone/home/vault-initial1/new-group01[1728629336]",
"org_publication_versionDOI": "10.00012/UU01-YU0JDH",
"org_publication_versionDOIMinted": "yes"
}

avs_success_data_package_multiversion_first = {
"org_publication_accessRestriction": "Open - freely retrievable",
"org_publication_anonymousAccess": "yes",
"org_publication_approval_actor": "datamanager#tempZone",
"org_publication_baseDOI": "10.00012/UU01-X0GU3S",
"org_publication_baseRandomId": "X0GU3S",
"org_publication_combiJsonPath": "/tempZone/yoda/publication/T8D8QU-combi.json",
"org_publication_dataCiteJsonPath": "/tempZone/yoda/publication/T8D8QU-dataCite.json",
"org_publication_dataCiteMetadataPosted": "yes",
"org_publication_landingPagePath": "/tempZone/yoda/publication/T8D8QU.html",
"org_publication_landingPageUploaded": "yes",
"org_publication_landingPageUrl": "https://public.yoda.test/allinone/UU01/T8D8QU.html",
"org_publication_lastModifiedDateTime": "2024-10-10T09:06:05.000000",
"org_publication_license": "Creative Commons Attribution 4.0 International Public License",
"org_publication_licenseUri": "https://creativecommons.org/licenses/by/4.0/legalcode",
"org_publication_next_version": "/tempZone/home/vault-initial1/new-group01[1728545387]",
"org_publication_oaiUploaded": "yes",
"org_publication_publicationDate": "2024-10-10T09:06:02.177810",
"org_publication_randomId": "T8D8QU",
"org_publication_status": "OK",
"org_publication_submission_actor": "datamanager#tempZone",
"org_publication_vaultPackage": "/tempZone/home/vault-initial1/new-group01[1728543897]",
"org_publication_versionDOI": "10.00012/UU01-T8D8QU",
"org_publication_versionDOIMinted": "yes",
}

# From avu.py
Avu = namedtuple('Avu', list('avu'))
Avu.attr = Avu.a
Avu.value = Avu.v
Expand All @@ -44,6 +99,9 @@
class UtilMiscTest(TestCase):

def test_check_data_package_system_avus(self):
# TODO switch to dictionary?
# TODO should I be concerned when there are multiple versions of avus

# Success
avs = avs_success_data_package
avus_success = [Avu(attr, val, "") for attr, val in avs.items()]
Expand All @@ -53,6 +111,24 @@ def test_check_data_package_system_avus(self):
self.assertTrue(len(result['missing_avus']) == 0)
self.assertTrue(len(result['unexpected_avus']) == 0)

# Missing license Uri for non-custom license
del avs['org_publication_licenseUri']
avus_missing_license_uri = [Avu(attr, val, "") for attr, val in avs.items()]
result = check_data_package_system_avus(avus_missing_license_uri)
self.assertFalse(result['no_missing_avus'])
self.assertTrue(result['no_unexpected_avus'])
self.assertTrue(len(result['missing_avus']) == 1)
self.assertTrue(len(result['unexpected_avus']) == 0)

# Custom license, no license Uri (happy flow)
avs['org_publication_license'] = "Custom"
avus_custom_license = [Avu(attr, val, "") for attr, val in avs.items()]
result = check_data_package_system_avus(avus_custom_license)
self.assertTrue(result['no_missing_avus'])
self.assertTrue(result['no_unexpected_avus'])
self.assertTrue(len(result['missing_avus']) == 0)
self.assertTrue(len(result['unexpected_avus']) == 0)

# Unexpected
avs['org_publication_userAddedSomethingWeird'] = "yodayoda:)"
avus_unexpected = [Avu(attr, val, "") for attr, val in avs.items()]
Expand Down Expand Up @@ -80,25 +156,43 @@ def test_check_data_package_system_avus(self):
self.assertTrue(len(result['missing_avus']) == 1)
self.assertTrue(len(result['unexpected_avus']) == 0)

# Success, latest version of a publication
avs = avs_success_data_package_multiversion
avus_success = [Avu(attr, val, "") for attr, val in avs.items()]
result = check_data_package_system_avus(avus_success)
self.assertTrue(result['no_missing_avus'])
self.assertTrue(result['no_unexpected_avus'])
self.assertTrue(len(result['missing_avus']) == 0)
self.assertTrue(len(result['unexpected_avus']) == 0)

# Success, first version of a publication that has had other versions
avs = avs_success_data_package_multiversion_first
avus_success = [Avu(attr, val, "") for attr, val in avs.items()]
result = check_data_package_system_avus(avus_success)
self.assertTrue(result['no_missing_avus'])
self.assertTrue(result['no_unexpected_avus'])
self.assertTrue(len(result['missing_avus']) == 0)
self.assertTrue(len(result['unexpected_avus']) == 0)

def test_last_run_time_acceptable(self):
"""Test the last run time for copy to vault"""
# No last run time (job hasn't been tried before)
found = False
last_run = 1
self.assertEqual(last_run_time_acceptable("b", found, last_run, 300), True)
self.assertEqual(last_run_time_acceptable(found, last_run, 300), True)

# Last run time greater than the backoff, so can run
now = int(time.time())
found = True
copy_backoff_time = 300
last_run = now - copy_backoff_time - 1
self.assertEqual(last_run_time_acceptable("b", found, last_run, copy_backoff_time), True)
self.assertEqual(last_run_time_acceptable(found, last_run, copy_backoff_time), True)

# Last run time more recent than the backoff, so should not run
found = True
copy_backoff_time = 300
last_run = now
self.assertEqual(last_run_time_acceptable("b", found, int(time.time()), copy_backoff_time), False)
self.assertEqual(last_run_time_acceptable(found, int(time.time()), copy_backoff_time), False)

def test_human_readable_size(self):
output = human_readable_size(0)
Expand Down
55 changes: 38 additions & 17 deletions util/misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,41 +17,62 @@ def check_data_package_system_avus(extracted_avus):
This function compares the AVUs of the provided data package against a set of ground truth AVUs derived from
a successfully published data package.
:param extracted_avus: AVUs of the data package
:param extracted_avus: AVUs of the data package in AVU form
:returns: Dictionary of the results of the check
"""
# Filter those starting with 'org_'
extracted_avus = {m.attr for m in extracted_avus if m.attr.startswith(constants.UUORGMETADATAPREFIX + 'publication_')}
# Filter those starting with 'org_publication'
extracted_avs = {}
for m in extracted_avus:
if m.attr.startswith(constants.UUORGMETADATAPREFIX + 'publication_'):
extracted_avs[m.attr] = m.value
extracted_attrs = set(extracted_avs.keys())

# Define the set of ground truth AVUs
avu_names_suffix = [
avu_names_suffix = {
'publication_approval_actor', 'publication_randomId',
'publication_versionDOI', 'publication_dataCiteJsonPath', 'publication_license',
'publication_anonymousAccess', 'publication_versionDOIMinted',
'publication_accessRestriction', 'publication_landingPagePath',
'publication_licenseUri', 'publication_publicationDate',
'publication_publicationDate',
'publication_vaultPackage', 'publication_submission_actor', 'publication_status',
'publication_lastModifiedDateTime', 'publication_combiJsonPath',
'publication_landingPageUploaded', 'publication_oaiUploaded',
'publication_landingPageUrl', 'publication_dataCiteMetadataPosted'
]
}

# If the license is not Custom, it must have a licenseUri
if constants.UUORGMETADATAPREFIX + 'publication_license' in extracted_attrs:
if extracted_avs[constants.UUORGMETADATAPREFIX + 'publication_license'] != "Custom":
avu_names_suffix.add('publication_licenseUri')

# Define set of AVUs with more than one version of publication
avu_names_base_suffix = [
# Define additional set of AVUs with more than one version of publication
avu_names_version_suffix = {
'publication_previous_version', 'publication_baseDOI', 'publication_baseRandomId',
'publication_baseDOIMinted'
]
}

if constants.UUORGMETADATAPREFIX + 'publication_previous_version' in extracted_avus:
combined_avu_names_suffix = avu_names_base_suffix + avu_names_suffix
ground_truth_avus = {constants.UUORGMETADATAPREFIX + name for name in combined_avu_names_suffix}
else:
ground_truth_avus = {constants.UUORGMETADATAPREFIX + name for name in avu_names_suffix}
# Define additional set of AVUs expected for the first version of a publication, when there are multiple versions
avu_names_first_version_suffix = {
'publication_baseRandomId', 'publication_baseDOI', 'publication_next_version'
}

# for the second version, all we need is next_version in addition to avu_names_version_suffix
avu_names_previous_version_suffix = {'publication_next_version'}

combined_avu_names_suffix = avu_names_suffix

if constants.UUORGMETADATAPREFIX + 'publication_previous_version' in extracted_attrs:
combined_avu_names_suffix.update(avu_names_version_suffix)
if constants.UUORGMETADATAPREFIX + 'publication_next_version' in extracted_attrs:
combined_avu_names_suffix.update(avu_names_previous_version_suffix)
elif constants.UUORGMETADATAPREFIX + 'publication_next_version' in extracted_attrs:
combined_avu_names_suffix.update(avu_names_first_version_suffix)

ground_truth_avus = {constants.UUORGMETADATAPREFIX + name for name in combined_avu_names_suffix}
# Find missing and unexpected AVUs
missing_avus = ground_truth_avus - extracted_avus
unexpected_avus = extracted_avus - ground_truth_avus
missing_avus = ground_truth_avus - extracted_attrs
unexpected_avus = extracted_attrs - ground_truth_avus

results = {
'no_missing_avus': not bool(missing_avus),
Expand All @@ -63,7 +84,7 @@ def check_data_package_system_avus(extracted_avus):
return results


def last_run_time_acceptable(coll, found, last_run, config_backoff_time):
def last_run_time_acceptable(found, last_run, config_backoff_time):
"""Return whether the last run time is acceptable to continue with task."""
now = int(time.time())

Expand Down

0 comments on commit 8567eb4

Please sign in to comment.