From 2dcf8fa0a215565aff8dfbef89ecce9f435a26c8 Mon Sep 17 00:00:00 2001 From: Laura Wrubel Date: Mon, 29 Jul 2024 12:33:13 -0400 Subject: [PATCH] Symlink output files to latest dir --- rialto_airflow/dags/harvest.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/rialto_airflow/dags/harvest.py b/rialto_airflow/dags/harvest.py index 62e6812..13601c1 100644 --- a/rialto_airflow/dags/harvest.py +++ b/rialto_airflow/dags/harvest.py @@ -126,11 +126,22 @@ def pubs_to_contribs(pubs, doi_sunet_pickle, authors_csv, snapshot_dir): return str(output) @task() - def publish(dataset): + def publish(pubs_to_contribs, merge_publications): """ Publish aggregate data to JupyterHub environment. """ - return True + contribs_path = Path(data_dir) / "latest" / "contributions.parquet" + pubs_path = Path(data_dir) / "latest" / "publications.parquet" + + if contribs_path.exists(): + contribs_path.unlink() + if pubs_path.exists(): + pubs_path.unlink() + + contribs_path.symlink_to(pubs_to_contribs) + pubs_path.symlink_to(merge_publications) + + return str(contribs_path), str(pubs_path) snapshot_dir = setup() @@ -156,7 +167,7 @@ def publish(dataset): contribs = pubs_to_contribs(pubs, doi_sunet, authors_csv, snapshot_dir) - publish(contribs) + publish(contribs, pubs) harvest()