Skip to content

Commit

Permalink
Fix vault check script, get latest metadata file.
Browse files Browse the repository at this point in the history
  • Loading branch information
lwesterhof committed Jul 13, 2023
1 parent ab6ac1b commit e46e18d
Showing 1 changed file with 25 additions and 14 deletions.
39 changes: 25 additions & 14 deletions tools/metadata/vault-check-orcid-format.r
Original file line number Diff line number Diff line change
Expand Up @@ -33,41 +33,52 @@ def main(rule_args, callback, rei):
schemas_to_be_checked = ['core-1', 'default-1', 'default-2', 'hptlab-1', 'teclab-1', 'dag-0', 'vollmer-0']

for schema in schemas_to_be_checked:

callback.writeLine("stdout", "")
callback.writeLine("stdout", "SCHEMA: {}".format(schema))
packages = genquery.row_iterator(
"COLL_NAME, META_COLL_ATTR_VALUE, DATA_NAME",

data_packages = genquery.row_iterator(
"COLL_NAME",
"META_COLL_ATTR_NAME = 'href' AND META_COLL_ATTR_VALUE like '%/{}/metadata.json' "
"AND COLL_NAME not like '%/original' AND COLL_NAME like '/%/home/vault-%' "
"AND DATA_NAME like 'yoda-metadata%.json'".format(schema),
genquery.AS_TUPLE,
callback)

for (coll, href, data) in packages:
# New package so when reporting orcid errors must be mentioned only once
metadata_files = genquery.row_iterator(
"COLL_NAME, ORDER_DESC(DATA_NAME)",
"DATA_NAME like 'yoda-metadata[%].json' "
"AND COLL_NAME not like '%/original'",
genquery.AS_TUPLE,
callback)

for coll in data_packages:
json_file = None

for (coll_, metadata_file) in metadata_files:
if coll == coll_:
json_file = metadata_file
break

wrote_package_line = False

md = get_metadata_as_dict(callback, coll + '/' + data)
md = get_metadata_as_dict(callback, coll + '/' + json_file)

for pi_holder in ['Creator', 'Contributor']:
if pi_holder in md:
for holder in md[pi_holder]:
for pi in holder.get('Person_Identifier', []):
if pi.get('Name_Identifier_Scheme', None) == 'ORCID':
if not re.search("^(https://orcid.org/)[0-9]{4}-[0-9]{4}-[0-9]{4}-[0-9]{3}[0-9xX]$", pi['Name_Identifier']):
if not re.search("^(https://orcid.org/)[0-9]{4}-[0-9]{4}-[0-9]{4}-[0-9]{3}[0-9X]$", pi['Name_Identifier']):
if not wrote_package_line:
# Only write this line once
callback.writeLine("stdout", '----------------------------------')
callback.writeLine("stdout", "Package: {}".format(coll))
wrote_package_line = True

try:
callback.writeLine("stdout", "{}: {} {}".format(pi_holder, holder['Name']['Given_Name'], holder['Name']['Family_Name']))
except TypeError:
callback.writeLine("stdout", "{}: {}".format(pi_holder, holder['Name'].encode('utf-8')))
except UnicodeEncodeError:
callback.writeLine("stdout", "Warning: could not process " + coll + " due to encoding error.")
callback.writeLine("stdout", " Erroneous ORCID: \"{}\"".format(pi['Name_Identifier']))
callback.writeLine("stdout", '----------------------------------')
callback.writeLine("stdout", "Invalid ORCID: \"{}\"".format(pi['Name_Identifier']))
if wrote_package_line:
callback.writeLine("stdout", '----------------------------------')


INPUT null
Expand Down

0 comments on commit e46e18d

Please sign in to comment.