Merge remote-tracking branch 'origin/development' into YDA-5721-add-t…

…ests
UtrechtUniversity · Oct 10, 2024 · cdf3a7f · cdf3a7f
2 parents b0f5577 + 4536c69
commit cdf3a7f
Show file tree

Hide file tree

Showing 12 changed files with 277 additions and 79 deletions.
diff --git a/.github/workflows/api-and-integration-tests.yml b/.github/workflows/api-and-integration-tests.yml
@@ -5,6 +5,7 @@ on:
     branches:
     - development
     - release-1.9
+    - release-1.10
     - "**-atr"
     # We can force an integration/API test run without opening a PR by pushing to a branch name that ends with "-atr"
   pull_request:
@@ -38,6 +39,8 @@ jobs:
       run: |
         if [ "${{ steps.extract_branch.outputs.branch }}" = "release-1.9" ]; then
           echo "branch=release-1.9" >> $GITHUB_OUTPUT
+        elif [ "${{ steps.extract_branch.outputs.branch }}" = "release-1.10" ]; then
+          echo "branch=release-1.10" >> $GITHUB_OUTPUT
         else
           echo "branch=development" >> $GITHUB_OUTPUT
         fi

diff --git a/.github/workflows/build-push-image.yml b/.github/workflows/build-push-image.yml
@@ -6,6 +6,7 @@ on:
     branches:
       - 'development'
       - 'release-1.9'
+      - 'release-1.10'
 
 jobs:
   push-image:

diff --git a/groups.py b/groups.py
@@ -56,18 +56,13 @@ def getGroupsData(ctx):
         attr = row[1]
         value = row[2]
 
-        # Create/update group with this information.
-        try:
-            group = groups[name]
-        except Exception:
-            group = {
-                "name": name,
-                "managers": [],
-                "members": [],
-                "read": [],
-                "invited": []
-            }
-            groups[name] = group
+        group = groups.setdefault(name, {
+            "name": name,
+            "managers": [],
+            "members": [],
+            "read": [],
+            "invited": []
+        })
 
         if attr in ["schema_id", "data_classification", "category", "subcategory"]:
             group[attr] = value
@@ -95,26 +90,17 @@ def getGroupsData(ctx):
             if name.startswith("read-"):
                 # Match read-* group with research-* or initial-* group.
                 name = name[5:]
-                try:
-                    # Attempt to add to read list of research group.
-                    group = groups["research-" + name]
-                    group["read"].append(user)
-                except Exception:
-                    try:
-                        # Attempt to add to read list of initial group.
-                        group = groups["initial-" + name]
+                for prefix in ("research-", "initial-"):
+                    group = groups.get(prefix + name)
+                    if group:
                         group["read"].append(user)
-                    except Exception:
-                        pass
+                        break
             elif not name.startswith("vault-"):
-                try:
-                    # Ordinary group.
-                    group = groups[name]
+                group = groups.get(name)
+                if group:
                     group["members"].append(user)
-                except KeyError:
-                    pass
 
-    # Third query: obtain list of invited SRAM users
+    # Third query: obtain list of invited SRAM users.
     if config.enable_sram:
         iter = genquery.row_iterator(
             "META_USER_ATTR_VALUE, USER_NAME, USER_ZONE",
@@ -124,11 +110,9 @@ def getGroupsData(ctx):
         for row in iter:
             name = row[0]
             user = row[1] + "#" + row[2]
-            try:
-                group = groups[name]
+            group = groups.get(name)
+            if group:
                 group["invited"].append(user)
-            except KeyError:
-                pass
 
     return groups.values()
 
@@ -553,7 +537,7 @@ def validate_data(ctx, data, allow_update):
     for (category, subcategory, groupname, _managers, _members, _viewers, _schema_id, _expiration_date) in data:
 
         if group.exists(ctx, groupname) and not allow_update:
-            errors.append('Group "{}" already exists'.format(groupname))
+            errors.append('Group "{}" already exists. It has not been updated.'.format(groupname))
 
         # Is user admin or has category add privileges?
         if not (is_admin or can_add_category):
@@ -988,6 +972,9 @@ def group_create(ctx, group_name, category, subcategory, schema_id, expiration_d
             if not sram.sram_connect_service_collaboration(ctx, short_name):
                 return api.Error('sram_error', 'Something went wrong connecting service to group "{}" in SRAM'.format(group_name))
 
+        if group.exists(ctx, group_name):
+            return api.Error('group_exists', "Group {} not created, it already exists".format(group_name))
+
         response = ctx.uuGroupAdd(group_name, category, subcategory, schema_id, expiration_date, description, data_classification, co_identifier, '', '')['arguments']
         status = response[8]
         message = response[9]

diff --git a/revisions.py b/revisions.py
@@ -361,6 +361,10 @@ def rule_revision_batch(ctx, verbose, balance_id_min, balance_id_max, batch_size
 
         minimum_timestamp = int(time.time() - config.async_revision_delay_time)
 
+        # Remove revision creation AVUs from deleted data objects.
+        # This makes it easier to monitor the number of data objects waiting for revision creation.
+        remove_revision_creation_avu_from_deleted_data_objects(ctx, print_verbose)
+
         # Get list of up to batch size limit of data objects (in research space) scheduled for revision, taking into account
         # modification time.
         log.write(ctx, "verbose = {}".format(verbose))
@@ -1054,3 +1058,28 @@ def memory_limit_exceeded(rss_limit):
     """
     rss_limit = int(rss_limit)
     return rss_limit and memory_rss_usage() > rss_limit
+
+
+def remove_revision_creation_avu_from_deleted_data_objects(ctx, print_verbose):
+    """
+    Removes revision creation AVUs from deleted data objects [marked with 'org_revision_scheduled' metadata].
+
+    :param ctx:  Combined type of a callback and rei struct
+    :param print_verbose: Whether to log verbose messages for troubleshooting (Boolean)
+    """
+    revision_avu_name = constants.UUORGMETADATAPREFIX + "revision_scheduled"
+
+    iter = genquery.row_iterator(
+        "COLL_NAME, DATA_NAME",
+        "COLL_NAME like '%{}/trash/home/%' AND META_DATA_ATTR_NAME = '{}'".format(user.zone(ctx), revision_avu_name),
+        genquery.AS_LIST, ctx
+    )
+
+    for coll_name, data_name in iter:
+        path = coll_name + '/' + data_name
+        try:
+            avu.rmw_from_data(ctx, path, revision_avu_name, "%")  # use wildcard cause rm_from_data causes problems
+            if print_verbose:
+                log.write(ctx, 'Removed revision creation AVUs from data object: {}'.format(path))
+        except Exception as e:
+            log.write(ctx, "Error processing data object {}: {}".format(path, str(e)))
diff --git a/schema_transformations.py b/schema_transformations.py
@@ -6,6 +6,8 @@
 
 import re
 
+from schema_transformations_utils import correctify_isni, correctify_orcid, correctify_researcher_id, correctify_scopus
+
 import meta
 from util import *
 
@@ -128,21 +130,44 @@ def _default2_default3(ctx, m):
 
             person_identifiers = []
             for person_identifier in creator.get('Person_Identifier', []):
+                # Check ORCID
                 if person_identifier.get('Name_Identifier_Scheme', None) == 'ORCID':
                     # Check for incorrect ORCID format.
                     if not re.search("^(https://orcid.org/)[0-9]{4}-[0-9]{4}-[0-9]{4}-[0-9]{3}[0-9X]$", person_identifier.get('Name_Identifier', None)):
                         corrected_orcid = correctify_orcid(person_identifier['Name_Identifier'])
-                        # Only it an actual correction took place change the value and mark this data as 'changed'.
+                        # Only if an actual correction took place change the value and mark this data as 'changed'.
                         if corrected_orcid is None:
                             log.write(ctx, "Warning: could not correct ORCID %s during schema transformation. It needs to be fixed manually."
                                       % (person_identifier['Name_Identifier']))
                         elif corrected_orcid != person_identifier['Name_Identifier']:
                             person_identifier['Name_Identifier'] = corrected_orcid
+                # Check Scopus
+                elif person_identifier.get('Name_Identifier_Scheme', None) == 'Author identifier (Scopus)':
+                    # Check for incorrect Scopus format.
+                    if not re.search("^\d{1,11}$", person_identifier.get('Name_Identifier', None)):
+                        corrected_scopus = correctify_scopus(person_identifier['Name_Identifier'])
+                        # Only if an actual correction took place change the value and mark this data as 'changed'.
+                        if corrected_scopus is None:
+                            log.write(ctx, "Warning: could not correct Scopus %s during schema transformation. It needs to be fixed manually."
+                                      % (person_identifier['Name_Identifier']))
+                        elif corrected_scopus != person_identifier['Name_Identifier']:
+                            person_identifier['Name_Identifier'] = corrected_scopus
+                # Check ISNI
+                elif person_identifier.get('Name_Identifier_Scheme', None) == 'ISNI':
+                    # Check for incorrect ISNI format.
+                    if not re.search("^(https://isni.org/isni/)[0-9]{15}[0-9X]$", person_identifier.get('Name_Identifier', None)):
+                        corrected_isni = correctify_isni(person_identifier['Name_Identifier'])
+                        # Only if an actual correction took place change the value and mark this data as 'changed'.
+                        if corrected_isni is None:
+                            log.write(ctx, "Warning: could not correct ISNI %s during schema transformation. It needs to be fixed manually."
+                                      % (person_identifier['Name_Identifier']))
+                        elif corrected_isni != person_identifier['Name_Identifier']:
+                            person_identifier['Name_Identifier'] = corrected_isni
                 elif person_identifier.get('Name_Identifier_Scheme', None) == 'ResearcherID (Web of Science)':
                     # Check for incorrect ResearcherID format.
                     if not re.search("^(https://www.researcherid.com/rid/)[A-Z]-[0-9]{4}-[0-9]{4}$", person_identifier.get('Name_Identifier', None)):
                         corrected_researcher_id = correctify_researcher_id(person_identifier['Name_Identifier'])
-                        # Only it an actual correction took place change the value and mark this data as 'changed'.
+                        # Only if an actual correction took place change the value and mark this data as 'changed'.
                         if corrected_researcher_id != person_identifier['Name_Identifier']:
                             person_identifier['Name_Identifier'] = corrected_researcher_id
                 elif 'Name_Identifier_Scheme' not in person_identifier:
@@ -164,21 +189,44 @@ def _default2_default3(ctx, m):
 
             person_identifiers = []
             for person_identifier in contributor.get('Person_Identifier', []):
+                # Check ORCID
                 if person_identifier.get('Name_Identifier_Scheme', None) == 'ORCID':
                     # Check for incorrect ORCID format.
                     if not re.search("^(https://orcid.org/)[0-9]{4}-[0-9]{4}-[0-9]{4}-[0-9]{3}[0-9X]$", person_identifier.get('Name_Identifier', None)):
                         corrected_orcid = correctify_orcid(person_identifier['Name_Identifier'])
-                        # Only it an actual correction took place change the value and mark this data as 'changed'.
+                        # Only if an actual correction took place change the value and mark this data as 'changed'.
                         if corrected_orcid is None:
                             log.write(ctx, "Warning: could not correct ORCID %s during schema transformation. It needs to be fixed manually."
                                       % (person_identifier['Name_Identifier']))
                         elif corrected_orcid != person_identifier['Name_Identifier']:
                             person_identifier['Name_Identifier'] = corrected_orcid
+                # Check Scopus
+                elif person_identifier.get('Name_Identifier_Scheme', None) == 'Author identifier (Scopus)':
+                    # Check for incorrect Scopus format.
+                    if not re.search("^\d{1,11}$", person_identifier.get('Name_Identifier', None)):
+                        corrected_scopus = correctify_scopus(person_identifier['Name_Identifier'])
+                        # Only if an actual correction took place change the value and mark this data as 'changed'.
+                        if corrected_scopus is None:
+                            log.write(ctx, "Warning: could not correct Scopus %s during schema transformation. It needs to be fixed manually."
+                                      % (person_identifier['Name_Identifier']))
+                        elif corrected_scopus != person_identifier['Name_Identifier']:
+                            person_identifier['Name_Identifier'] = corrected_scopus
+                # Check ISNI
+                elif person_identifier.get('Name_Identifier_Scheme', None) == 'ISNI':
+                    # Check for incorrect ISNI format.
+                    if not re.search("^(https://isni.org/isni/)[0-9]{15}[0-9X]$", person_identifier.get('Name_Identifier', None)):
+                        corrected_isni = correctify_isni(person_identifier['Name_Identifier'])
+                        # Only if an actual correction took place change the value and mark this data as 'changed'.
+                        if corrected_isni is None:
+                            log.write(ctx, "Warning: could not correct ISNI %s during schema transformation. It needs to be fixed manually."
+                                      % (person_identifier['Name_Identifier']))
+                        elif corrected_isni != person_identifier['Name_Identifier']:
+                            person_identifier['Name_Identifier'] = corrected_isni
                 elif person_identifier.get('Name_Identifier_Scheme', None) == 'ResearcherID (Web of Science)':
                     # Check for incorrect ResearcherID format.
                     if not re.search("^(https://www.researcherid.com/rid/)[A-Z]-[0-9]{4}-[0-9]{4}$", person_identifier.get('Name_Identifier', None)):
                         corrected_researcher_id = correctify_researcher_id(person_identifier['Name_Identifier'])
-                        # Only it an actual correction took place change the value and mark this data as 'changed'.
+                        # Only if an actual correction took place change the value and mark this data as 'changed'.
                         if corrected_researcher_id != person_identifier['Name_Identifier']:
                             person_identifier['Name_Identifier'] = corrected_researcher_id
                 elif 'Name_Identifier_Scheme' not in person_identifier:
@@ -702,36 +750,3 @@ def get(src_id, dst_id):
 
     x = transformations.get(src_id)
     return None if x is None else x.get(dst_id)
-
-
-def correctify_orcid(org_orcid):
-    """Correct illformatted ORCID."""
-    # Get rid of all spaces.
-    orcid = org_orcid.replace(' ', '')
-
-    # Upper-case X.
-    orcid = org_orcid.replace('x', 'X')
-
-    # The last part should hold a valid id like eg: 1234-1234-1234-123X.
-    # If not, it is impossible to correct it to the valid orcid format
-    orcs = orcid.split('/')
-    if not re.search("^[0-9]{4}-[0-9]{4}-[0-9]{4}-[0-9]{3}[0-9X]$", orcs[-1]):
-        # Return original value.
-        return org_orcid
-
-    return "https://orcid.org/{}".format(orcs[-1])
-
-
-def correctify_researcher_id(org_researcher_id):
-    """Correct illformatted ResearcherID."""
-    # Get rid of all spaces.
-    researcher_id = org_researcher_id.replace(' ', '')
-
-    # The last part should hold a valid id like eg: A-1234-1234
-    # If not, it is impossible to correct it to the valid ResearcherID format
-    orcs = researcher_id.split('/')
-    if not re.search("^[A-Z]-[0-9]{4}-[0-9]{4}$", orcs[-1]):
-        # Return original value.
-        return org_researcher_id
-
-    return "https://www.researcherid.com/rid/{}".format(orcs[-1])
diff --git a/schema_transformations_utils.py b/schema_transformations_utils.py
@@ -0,0 +1,67 @@
+# -*- coding: utf-8 -*-
+"""JSON schema transformation utility functions."""
+
+__copyright__ = 'Copyright (c) 2024, Utrecht University'
+__license__   = 'GPLv3, see LICENSE'
+
+import re
+
+
+def correctify_orcid(org_orcid):
+    """Correct illformatted ORCID."""
+    # Get rid of all spaces.
+    orcid = org_orcid.replace(' ', '')
+
+    # Upper-case X.
+    orcid = orcid.replace('x', 'X')
+
+    # The last part should hold a valid id like eg: 1234-1234-1234-123X.
+    # If not, it is impossible to correct it to the valid orcid format
+    orcs = orcid.split('/')
+    if not re.search("^[0-9]{4}-[0-9]{4}-[0-9]{4}-[0-9]{3}[0-9X]$", orcs[-1]):
+        return None
+
+    return "https://orcid.org/{}".format(orcs[-1])
+
+
+def correctify_scopus(org_scopus):
+    """Correct illformatted Scopus."""
+    # Get rid of all spaces.
+    new_scopus = org_scopus.replace(' ', '')
+
+    if not re.search("^\d{1,11}$", new_scopus):
+        return None
+
+    return new_scopus
+
+
+def correctify_isni(org_isni):
+    """Correct ill-formatted ISNI."""
+    # Remove all spaces.
+    new_isni = org_isni.replace(' ', '')
+
+    # Upper-case X.
+    new_isni = new_isni.replace('x', 'X')
+
+    # The last part should hold a valid id like eg: 123412341234123X.
+    # If not, it is impossible to correct it to the valid isni format
+    new_isni = new_isni.split('/')
+    if not re.search("^[0-9]{15}[0-9X]$", new_isni[-1]):
+        return None
+
+    return "https://isni.org/isni/{}".format(new_isni[-1])
+
+
+def correctify_researcher_id(org_researcher_id):
+    """Correct illformatted ResearcherID."""
+    # Get rid of all spaces.
+    researcher_id = org_researcher_id.replace(' ', '')
+
+    # The last part should hold a valid id like eg: A-1234-1234
+    # If not, it is impossible to correct it to the valid ResearcherID format
+    orcs = researcher_id.split('/')
+    if not re.search("^[A-Z]-[0-9]{4}-[0-9]{4}$", orcs[-1]):
+        # Return original value.
+        return org_researcher_id
+
+    return "https://www.researcherid.com/rid/{}".format(orcs[-1])
diff --git a/tests/features/api/api_deposit_open.feature b/tests/features/api/api_deposit_open.feature
@@ -60,7 +60,7 @@ Feature: Deposit API (open)
         And deposit exists
         And deposit is archived
         And user viewer is authenticated
-        And as viewer the Yoda browse collections API is queried with <collection>  # Workaround for https://github.com/pytest-dev/pytest-bdd/issues/689
+        And the Yoda browse collections API is queried with <collection>
         Then the response status code is "200"
         And the browse result contains deposit
 

diff --git a/tests/features/api/api_deposit_restricted.feature b/tests/features/api/api_deposit_restricted.feature
@@ -49,7 +49,7 @@ Feature: Deposit API (restricted)
         And deposit exists
         And deposit is archived
         And user viewer is authenticated
-        And as viewer the Yoda browse collections API is queried with <collection>  # Workaround for https://github.com/pytest-dev/pytest-bdd/issues/689
+        And the Yoda browse collections API is queried with <collection>
         Then the response status code is "200"
         And the browse result does not contain deposit
 

diff --git a/tests/requirements.txt b/tests/requirements.txt
@@ -3,7 +3,7 @@ requests==2.32.2
 selenium==4.21.0
 splinter==0.21.0
 pytest-splinter==3.3.2
-pytest_bdd==7.2.0
+pytest_bdd==7.3.0
 pytest==8.2.2
 deepdiff==6.6.1
 pyperclip==1.9.0