Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Yda 5951 add transformation scopus isni #524

Merged
merged 4 commits into from
Sep 26, 2024
Merged
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
76 changes: 76 additions & 0 deletions schema_transformations.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,7 @@ def _default2_default3(ctx, m):

person_identifiers = []
for person_identifier in creator.get('Person_Identifier', []):
# Check ORCID
if person_identifier.get('Name_Identifier_Scheme', None) == 'ORCID':
# Check for incorrect ORCID format.
if not re.search("^(https://orcid.org/)[0-9]{4}-[0-9]{4}-[0-9]{4}-[0-9]{3}[0-9X]$", person_identifier.get('Name_Identifier', None)):
Expand All @@ -138,6 +139,28 @@ def _default2_default3(ctx, m):
% (person_identifier['Name_Identifier']))
elif corrected_orcid != person_identifier['Name_Identifier']:
person_identifier['Name_Identifier'] = corrected_orcid
# Check Scopus
elif person_identifier.get('Name_Identifier_Scheme', None) == 'Author identifier (Scopus)':
# Check for incorrect Scopus format.
if not re.search("^\d{1,11}$", person_identifier.get('Name_Identifier', None)):
corrected_scopus = correctify_scopus(person_identifier['Name_Identifier'])
# Only it an actual correction took place change the value and mark this data as 'changed'.
leonidastri marked this conversation as resolved.
Show resolved Hide resolved
if corrected_scopus is None:
log.write(ctx, "Warning: could not correct Scopus %s during schema transformation. It needs to be fixed manually."
% (person_identifier['Name_Identifier']))
elif corrected_scopus != person_identifier['Name_Identifier']:
person_identifier['Name_Identifier'] = corrected_scopus
# Check ISNI
elif person_identifier.get('Name_Identifier_Scheme', None) == 'ISNI':
# Check for incorrect ISNI format.
if not re.search("^(https://isni.org/isni/)[0-9]{15}[0-9X]$", person_identifier.get('Name_Identifier', None)):
corrected_isni = correctify_isni(person_identifier['Name_Identifier'])
# Only it an actual correction took place change the value and mark this data as 'changed'.
leonidastri marked this conversation as resolved.
Show resolved Hide resolved
if corrected_isni is None:
log.write(ctx, "Warning: could not correct ISNI %s during schema transformation. It needs to be fixed manually."
% (person_identifier['Name_Identifier']))
elif corrected_isni != person_identifier['Name_Identifier']:
person_identifier['Name_Identifier'] = corrected_isni
elif person_identifier.get('Name_Identifier_Scheme', None) == 'ResearcherID (Web of Science)':
# Check for incorrect ResearcherID format.
if not re.search("^(https://www.researcherid.com/rid/)[A-Z]-[0-9]{4}-[0-9]{4}$", person_identifier.get('Name_Identifier', None)):
Expand All @@ -164,6 +187,7 @@ def _default2_default3(ctx, m):

person_identifiers = []
for person_identifier in contributor.get('Person_Identifier', []):
# Check ORCID
if person_identifier.get('Name_Identifier_Scheme', None) == 'ORCID':
# Check for incorrect ORCID format.
if not re.search("^(https://orcid.org/)[0-9]{4}-[0-9]{4}-[0-9]{4}-[0-9]{3}[0-9X]$", person_identifier.get('Name_Identifier', None)):
Expand All @@ -174,6 +198,28 @@ def _default2_default3(ctx, m):
% (person_identifier['Name_Identifier']))
elif corrected_orcid != person_identifier['Name_Identifier']:
person_identifier['Name_Identifier'] = corrected_orcid
# Check Scopus
elif person_identifier.get('Name_Identifier_Scheme', None) == 'Author identifier (Scopus)':
# Check for incorrect Scopus format.
if not re.search("^\d{1,11}$", person_identifier.get('Name_Identifier', None)):
corrected_scopus = correctify_scopus(person_identifier['Name_Identifier'])
# Only it an actual correction took place change the value and mark this data as 'changed'.
if corrected_scopus is None:
log.write(ctx, "Warning: could not correct Scopus %s during schema transformation. It needs to be fixed manually."
% (person_identifier['Name_Identifier']))
elif corrected_scopus != person_identifier['Name_Identifier']:
person_identifier['Name_Identifier'] = corrected_scopus
# Check ISNI
elif person_identifier.get('Name_Identifier_Scheme', None) == 'ISNI':
# Check for incorrect ISNI format.
if not re.search("^(https://isni.org/isni/)[0-9]{15}[0-9X]$", person_identifier.get('Name_Identifier', None)):
corrected_isni = correctify_isni(person_identifier['Name_Identifier'])
# Only it an actual correction took place change the value and mark this data as 'changed'.
if corrected_isni is None:
log.write(ctx, "Warning: could not correct ISNI %s during schema transformation. It needs to be fixed manually."
% (person_identifier['Name_Identifier']))
elif corrected_isni != person_identifier['Name_Identifier']:
person_identifier['Name_Identifier'] = corrected_isni
elif person_identifier.get('Name_Identifier_Scheme', None) == 'ResearcherID (Web of Science)':
# Check for incorrect ResearcherID format.
if not re.search("^(https://www.researcherid.com/rid/)[A-Z]-[0-9]{4}-[0-9]{4}$", person_identifier.get('Name_Identifier', None)):
Expand Down Expand Up @@ -722,6 +768,36 @@ def correctify_orcid(org_orcid):
return "https://orcid.org/{}".format(orcs[-1])


def correctify_scopus(org_scopus):
"""Correct illformatted Scopus."""
# Get rid of all spaces.
new_scopus = org_scopus.replace(' ', '')

if not re.search("^\d{1,11}$", new_scopus):
# Return original value.
return org_scopus
leonidastri marked this conversation as resolved.
Show resolved Hide resolved

return new_scopus


def correctify_isni(org_isni):
"""Correct ill-formatted ISNI."""
# Remove all spaces.
new_isni = org_isni.replace(' ', '')

# Upper-case X.
new_isni = new_isni.replace('x', 'X')

# The last part should hold a valid id like eg: 123412341234123X.
# If not, it is impossible to correct it to the valid isni format
new_isni = new_isni.split('/')
if not re.search("^[0-9]{15}[0-9X]$", new_isni[-1]):
# Return original value.
return org_isni
leonidastri marked this conversation as resolved.
Show resolved Hide resolved

return "https://isni.org/isni/{}".format(new_isni[-1])


def correctify_researcher_id(org_researcher_id):
"""Correct illformatted ResearcherID."""
# Get rid of all spaces.
Expand Down
Loading