Skip to content

Commit

Permalink
Merge pull request #77 from sul-dlss-labs/openalex-doi-lookup
Browse files Browse the repository at this point in the history
OpenAlex DOI lookups with invalid parameters
  • Loading branch information
edsu authored Jul 8, 2024
2 parents 1ef8969 + bb28524 commit 82286d1
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 4 deletions.
20 changes: 16 additions & 4 deletions rialto_airflow/harvest/openalex.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from urllib.parse import quote

from more_itertools import batched
from pyalex import Authors, Works, config
from pyalex import Authors, Works, config, api

from rialto_airflow.utils import invert_dict

Expand Down Expand Up @@ -89,9 +89,21 @@ def publications_from_dois(dois: list):
time.sleep(1)

doi_list = quote("|".join([doi for doi in doi_batch]))
for page in Works().filter(doi=doi_list).paginate(per_page=200):
for pub in page:
yield normalize_publication(pub)
try:
for page in Works().filter(doi=doi_list).paginate(per_page=200):
for pub in page:
yield normalize_publication(pub)
except api.QueryError:
# try dois individually
for doi in doi_batch:
try:
pubs = Works().filter(doi=doi).get()
if len(pubs) > 1:
logging.warn(f"Found multiple publications for DOI {doi}")
yield normalize_publication(pubs[0])
except api.QueryError as e:
logging.error(f"OpenAlex QueryError for {doi}: {e}")
continue


def normalize_publication(pub) -> dict:
Expand Down
10 changes: 10 additions & 0 deletions test/harvest/test_openalex.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,16 @@ def test_publications_from_dois():
assert len(pubs[1].keys()) == 51, "second publication has 51 columns"


def test_publications_from_invalid_dois(caplog):
# Error may change if OpenAlex API or pyalex changes
invalid_dois = ["doi-with-comma,a", "10.1145/3442188.3445922"]
assert len(list(openalex.publications_from_dois(invalid_dois))) == 1
assert (
"OpenAlex QueryError for doi-with-comma,a: Invalid query parameter"
in caplog.text
), "logs error message"


def test_publications_csv(tmp_path):
pubs_csv = tmp_path / "openalex-pubs.csv"
openalex.publications_csv(
Expand Down

0 comments on commit 82286d1

Please sign in to comment.