Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/development' into YDA-5721-add-t…
Browse files Browse the repository at this point in the history
…ests
  • Loading branch information
leonidastri committed Oct 10, 2024
2 parents b0f5577 + 4536c69 commit cdf3a7f
Show file tree
Hide file tree
Showing 12 changed files with 277 additions and 79 deletions.
3 changes: 3 additions & 0 deletions .github/workflows/api-and-integration-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ on:
branches:
- development
- release-1.9
- release-1.10
- "**-atr"
# We can force an integration/API test run without opening a PR by pushing to a branch name that ends with "-atr"
pull_request:
Expand Down Expand Up @@ -38,6 +39,8 @@ jobs:
run: |
if [ "${{ steps.extract_branch.outputs.branch }}" = "release-1.9" ]; then
echo "branch=release-1.9" >> $GITHUB_OUTPUT
elif [ "${{ steps.extract_branch.outputs.branch }}" = "release-1.10" ]; then
echo "branch=release-1.10" >> $GITHUB_OUTPUT
else
echo "branch=development" >> $GITHUB_OUTPUT
fi
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/build-push-image.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ on:
branches:
- 'development'
- 'release-1.9'
- 'release-1.10'

jobs:
push-image:
Expand Down
53 changes: 20 additions & 33 deletions groups.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,18 +56,13 @@ def getGroupsData(ctx):
attr = row[1]
value = row[2]

# Create/update group with this information.
try:
group = groups[name]
except Exception:
group = {
"name": name,
"managers": [],
"members": [],
"read": [],
"invited": []
}
groups[name] = group
group = groups.setdefault(name, {
"name": name,
"managers": [],
"members": [],
"read": [],
"invited": []
})

if attr in ["schema_id", "data_classification", "category", "subcategory"]:
group[attr] = value
Expand Down Expand Up @@ -95,26 +90,17 @@ def getGroupsData(ctx):
if name.startswith("read-"):
# Match read-* group with research-* or initial-* group.
name = name[5:]
try:
# Attempt to add to read list of research group.
group = groups["research-" + name]
group["read"].append(user)
except Exception:
try:
# Attempt to add to read list of initial group.
group = groups["initial-" + name]
for prefix in ("research-", "initial-"):
group = groups.get(prefix + name)
if group:
group["read"].append(user)
except Exception:
pass
break
elif not name.startswith("vault-"):
try:
# Ordinary group.
group = groups[name]
group = groups.get(name)
if group:
group["members"].append(user)
except KeyError:
pass

# Third query: obtain list of invited SRAM users
# Third query: obtain list of invited SRAM users.
if config.enable_sram:
iter = genquery.row_iterator(
"META_USER_ATTR_VALUE, USER_NAME, USER_ZONE",
Expand All @@ -124,11 +110,9 @@ def getGroupsData(ctx):
for row in iter:
name = row[0]
user = row[1] + "#" + row[2]
try:
group = groups[name]
group = groups.get(name)
if group:
group["invited"].append(user)
except KeyError:
pass

return groups.values()

Expand Down Expand Up @@ -553,7 +537,7 @@ def validate_data(ctx, data, allow_update):
for (category, subcategory, groupname, _managers, _members, _viewers, _schema_id, _expiration_date) in data:

if group.exists(ctx, groupname) and not allow_update:
errors.append('Group "{}" already exists'.format(groupname))
errors.append('Group "{}" already exists. It has not been updated.'.format(groupname))

# Is user admin or has category add privileges?
if not (is_admin or can_add_category):
Expand Down Expand Up @@ -988,6 +972,9 @@ def group_create(ctx, group_name, category, subcategory, schema_id, expiration_d
if not sram.sram_connect_service_collaboration(ctx, short_name):
return api.Error('sram_error', 'Something went wrong connecting service to group "{}" in SRAM'.format(group_name))

if group.exists(ctx, group_name):
return api.Error('group_exists', "Group {} not created, it already exists".format(group_name))

response = ctx.uuGroupAdd(group_name, category, subcategory, schema_id, expiration_date, description, data_classification, co_identifier, '', '')['arguments']
status = response[8]
message = response[9]
Expand Down
29 changes: 29 additions & 0 deletions revisions.py
Original file line number Diff line number Diff line change
Expand Up @@ -361,6 +361,10 @@ def rule_revision_batch(ctx, verbose, balance_id_min, balance_id_max, batch_size

minimum_timestamp = int(time.time() - config.async_revision_delay_time)

# Remove revision creation AVUs from deleted data objects.
# This makes it easier to monitor the number of data objects waiting for revision creation.
remove_revision_creation_avu_from_deleted_data_objects(ctx, print_verbose)

# Get list of up to batch size limit of data objects (in research space) scheduled for revision, taking into account
# modification time.
log.write(ctx, "verbose = {}".format(verbose))
Expand Down Expand Up @@ -1054,3 +1058,28 @@ def memory_limit_exceeded(rss_limit):
"""
rss_limit = int(rss_limit)
return rss_limit and memory_rss_usage() > rss_limit


def remove_revision_creation_avu_from_deleted_data_objects(ctx, print_verbose):
"""
Removes revision creation AVUs from deleted data objects [marked with 'org_revision_scheduled' metadata].
:param ctx: Combined type of a callback and rei struct
:param print_verbose: Whether to log verbose messages for troubleshooting (Boolean)
"""
revision_avu_name = constants.UUORGMETADATAPREFIX + "revision_scheduled"

iter = genquery.row_iterator(
"COLL_NAME, DATA_NAME",
"COLL_NAME like '%{}/trash/home/%' AND META_DATA_ATTR_NAME = '{}'".format(user.zone(ctx), revision_avu_name),
genquery.AS_LIST, ctx
)

for coll_name, data_name in iter:
path = coll_name + '/' + data_name
try:
avu.rmw_from_data(ctx, path, revision_avu_name, "%") # use wildcard cause rm_from_data causes problems
if print_verbose:
log.write(ctx, 'Removed revision creation AVUs from data object: {}'.format(path))
except Exception as e:
log.write(ctx, "Error processing data object {}: {}".format(path, str(e)))
89 changes: 52 additions & 37 deletions schema_transformations.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@

import re

from schema_transformations_utils import correctify_isni, correctify_orcid, correctify_researcher_id, correctify_scopus

import meta
from util import *

Expand Down Expand Up @@ -128,21 +130,44 @@ def _default2_default3(ctx, m):

person_identifiers = []
for person_identifier in creator.get('Person_Identifier', []):
# Check ORCID
if person_identifier.get('Name_Identifier_Scheme', None) == 'ORCID':
# Check for incorrect ORCID format.
if not re.search("^(https://orcid.org/)[0-9]{4}-[0-9]{4}-[0-9]{4}-[0-9]{3}[0-9X]$", person_identifier.get('Name_Identifier', None)):
corrected_orcid = correctify_orcid(person_identifier['Name_Identifier'])
# Only it an actual correction took place change the value and mark this data as 'changed'.
# Only if an actual correction took place change the value and mark this data as 'changed'.
if corrected_orcid is None:
log.write(ctx, "Warning: could not correct ORCID %s during schema transformation. It needs to be fixed manually."
% (person_identifier['Name_Identifier']))
elif corrected_orcid != person_identifier['Name_Identifier']:
person_identifier['Name_Identifier'] = corrected_orcid
# Check Scopus
elif person_identifier.get('Name_Identifier_Scheme', None) == 'Author identifier (Scopus)':
# Check for incorrect Scopus format.
if not re.search("^\d{1,11}$", person_identifier.get('Name_Identifier', None)):
corrected_scopus = correctify_scopus(person_identifier['Name_Identifier'])
# Only if an actual correction took place change the value and mark this data as 'changed'.
if corrected_scopus is None:
log.write(ctx, "Warning: could not correct Scopus %s during schema transformation. It needs to be fixed manually."
% (person_identifier['Name_Identifier']))
elif corrected_scopus != person_identifier['Name_Identifier']:
person_identifier['Name_Identifier'] = corrected_scopus
# Check ISNI
elif person_identifier.get('Name_Identifier_Scheme', None) == 'ISNI':
# Check for incorrect ISNI format.
if not re.search("^(https://isni.org/isni/)[0-9]{15}[0-9X]$", person_identifier.get('Name_Identifier', None)):
corrected_isni = correctify_isni(person_identifier['Name_Identifier'])
# Only if an actual correction took place change the value and mark this data as 'changed'.
if corrected_isni is None:
log.write(ctx, "Warning: could not correct ISNI %s during schema transformation. It needs to be fixed manually."
% (person_identifier['Name_Identifier']))
elif corrected_isni != person_identifier['Name_Identifier']:
person_identifier['Name_Identifier'] = corrected_isni
elif person_identifier.get('Name_Identifier_Scheme', None) == 'ResearcherID (Web of Science)':
# Check for incorrect ResearcherID format.
if not re.search("^(https://www.researcherid.com/rid/)[A-Z]-[0-9]{4}-[0-9]{4}$", person_identifier.get('Name_Identifier', None)):
corrected_researcher_id = correctify_researcher_id(person_identifier['Name_Identifier'])
# Only it an actual correction took place change the value and mark this data as 'changed'.
# Only if an actual correction took place change the value and mark this data as 'changed'.
if corrected_researcher_id != person_identifier['Name_Identifier']:
person_identifier['Name_Identifier'] = corrected_researcher_id
elif 'Name_Identifier_Scheme' not in person_identifier:
Expand All @@ -164,21 +189,44 @@ def _default2_default3(ctx, m):

person_identifiers = []
for person_identifier in contributor.get('Person_Identifier', []):
# Check ORCID
if person_identifier.get('Name_Identifier_Scheme', None) == 'ORCID':
# Check for incorrect ORCID format.
if not re.search("^(https://orcid.org/)[0-9]{4}-[0-9]{4}-[0-9]{4}-[0-9]{3}[0-9X]$", person_identifier.get('Name_Identifier', None)):
corrected_orcid = correctify_orcid(person_identifier['Name_Identifier'])
# Only it an actual correction took place change the value and mark this data as 'changed'.
# Only if an actual correction took place change the value and mark this data as 'changed'.
if corrected_orcid is None:
log.write(ctx, "Warning: could not correct ORCID %s during schema transformation. It needs to be fixed manually."
% (person_identifier['Name_Identifier']))
elif corrected_orcid != person_identifier['Name_Identifier']:
person_identifier['Name_Identifier'] = corrected_orcid
# Check Scopus
elif person_identifier.get('Name_Identifier_Scheme', None) == 'Author identifier (Scopus)':
# Check for incorrect Scopus format.
if not re.search("^\d{1,11}$", person_identifier.get('Name_Identifier', None)):
corrected_scopus = correctify_scopus(person_identifier['Name_Identifier'])
# Only if an actual correction took place change the value and mark this data as 'changed'.
if corrected_scopus is None:
log.write(ctx, "Warning: could not correct Scopus %s during schema transformation. It needs to be fixed manually."
% (person_identifier['Name_Identifier']))
elif corrected_scopus != person_identifier['Name_Identifier']:
person_identifier['Name_Identifier'] = corrected_scopus
# Check ISNI
elif person_identifier.get('Name_Identifier_Scheme', None) == 'ISNI':
# Check for incorrect ISNI format.
if not re.search("^(https://isni.org/isni/)[0-9]{15}[0-9X]$", person_identifier.get('Name_Identifier', None)):
corrected_isni = correctify_isni(person_identifier['Name_Identifier'])
# Only if an actual correction took place change the value and mark this data as 'changed'.
if corrected_isni is None:
log.write(ctx, "Warning: could not correct ISNI %s during schema transformation. It needs to be fixed manually."
% (person_identifier['Name_Identifier']))
elif corrected_isni != person_identifier['Name_Identifier']:
person_identifier['Name_Identifier'] = corrected_isni
elif person_identifier.get('Name_Identifier_Scheme', None) == 'ResearcherID (Web of Science)':
# Check for incorrect ResearcherID format.
if not re.search("^(https://www.researcherid.com/rid/)[A-Z]-[0-9]{4}-[0-9]{4}$", person_identifier.get('Name_Identifier', None)):
corrected_researcher_id = correctify_researcher_id(person_identifier['Name_Identifier'])
# Only it an actual correction took place change the value and mark this data as 'changed'.
# Only if an actual correction took place change the value and mark this data as 'changed'.
if corrected_researcher_id != person_identifier['Name_Identifier']:
person_identifier['Name_Identifier'] = corrected_researcher_id
elif 'Name_Identifier_Scheme' not in person_identifier:
Expand Down Expand Up @@ -702,36 +750,3 @@ def get(src_id, dst_id):

x = transformations.get(src_id)
return None if x is None else x.get(dst_id)


def correctify_orcid(org_orcid):
"""Correct illformatted ORCID."""
# Get rid of all spaces.
orcid = org_orcid.replace(' ', '')

# Upper-case X.
orcid = org_orcid.replace('x', 'X')

# The last part should hold a valid id like eg: 1234-1234-1234-123X.
# If not, it is impossible to correct it to the valid orcid format
orcs = orcid.split('/')
if not re.search("^[0-9]{4}-[0-9]{4}-[0-9]{4}-[0-9]{3}[0-9X]$", orcs[-1]):
# Return original value.
return org_orcid

return "https://orcid.org/{}".format(orcs[-1])


def correctify_researcher_id(org_researcher_id):
"""Correct illformatted ResearcherID."""
# Get rid of all spaces.
researcher_id = org_researcher_id.replace(' ', '')

# The last part should hold a valid id like eg: A-1234-1234
# If not, it is impossible to correct it to the valid ResearcherID format
orcs = researcher_id.split('/')
if not re.search("^[A-Z]-[0-9]{4}-[0-9]{4}$", orcs[-1]):
# Return original value.
return org_researcher_id

return "https://www.researcherid.com/rid/{}".format(orcs[-1])
67 changes: 67 additions & 0 deletions schema_transformations_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
# -*- coding: utf-8 -*-
"""JSON schema transformation utility functions."""

__copyright__ = 'Copyright (c) 2024, Utrecht University'
__license__ = 'GPLv3, see LICENSE'

import re


def correctify_orcid(org_orcid):
"""Correct illformatted ORCID."""
# Get rid of all spaces.
orcid = org_orcid.replace(' ', '')

# Upper-case X.
orcid = orcid.replace('x', 'X')

# The last part should hold a valid id like eg: 1234-1234-1234-123X.
# If not, it is impossible to correct it to the valid orcid format
orcs = orcid.split('/')
if not re.search("^[0-9]{4}-[0-9]{4}-[0-9]{4}-[0-9]{3}[0-9X]$", orcs[-1]):
return None

return "https://orcid.org/{}".format(orcs[-1])


def correctify_scopus(org_scopus):
"""Correct illformatted Scopus."""
# Get rid of all spaces.
new_scopus = org_scopus.replace(' ', '')

if not re.search("^\d{1,11}$", new_scopus):
return None

return new_scopus


def correctify_isni(org_isni):
"""Correct ill-formatted ISNI."""
# Remove all spaces.
new_isni = org_isni.replace(' ', '')

# Upper-case X.
new_isni = new_isni.replace('x', 'X')

# The last part should hold a valid id like eg: 123412341234123X.
# If not, it is impossible to correct it to the valid isni format
new_isni = new_isni.split('/')
if not re.search("^[0-9]{15}[0-9X]$", new_isni[-1]):
return None

return "https://isni.org/isni/{}".format(new_isni[-1])


def correctify_researcher_id(org_researcher_id):
"""Correct illformatted ResearcherID."""
# Get rid of all spaces.
researcher_id = org_researcher_id.replace(' ', '')

# The last part should hold a valid id like eg: A-1234-1234
# If not, it is impossible to correct it to the valid ResearcherID format
orcs = researcher_id.split('/')
if not re.search("^[A-Z]-[0-9]{4}-[0-9]{4}$", orcs[-1]):
# Return original value.
return org_researcher_id

return "https://www.researcherid.com/rid/{}".format(orcs[-1])
2 changes: 1 addition & 1 deletion tests/features/api/api_deposit_open.feature
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ Feature: Deposit API (open)
And deposit exists
And deposit is archived
And user viewer is authenticated
And as viewer the Yoda browse collections API is queried with <collection> # Workaround for https://github.com/pytest-dev/pytest-bdd/issues/689
And the Yoda browse collections API is queried with <collection>
Then the response status code is "200"
And the browse result contains deposit

Expand Down
2 changes: 1 addition & 1 deletion tests/features/api/api_deposit_restricted.feature
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ Feature: Deposit API (restricted)
And deposit exists
And deposit is archived
And user viewer is authenticated
And as viewer the Yoda browse collections API is queried with <collection> # Workaround for https://github.com/pytest-dev/pytest-bdd/issues/689
And the Yoda browse collections API is queried with <collection>
Then the response status code is "200"
And the browse result does not contain deposit

Expand Down
2 changes: 1 addition & 1 deletion tests/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ requests==2.32.2
selenium==4.21.0
splinter==0.21.0
pytest-splinter==3.3.2
pytest_bdd==7.2.0
pytest_bdd==7.3.0
pytest==8.2.2
deepdiff==6.6.1
pyperclip==1.9.0
Loading

0 comments on commit cdf3a7f

Please sign in to comment.