From 7a020cbc5150316e25b8a2a7b94cd3e34e71cec2 Mon Sep 17 00:00:00 2001 From: Mandlin Sarah Date: Sun, 1 Sep 2024 23:18:19 -0700 Subject: [PATCH] Improve file handling with context managers for robustness --- finetune/seqcls/preprocess_blurb_seqcls.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/finetune/seqcls/preprocess_blurb_seqcls.py b/finetune/seqcls/preprocess_blurb_seqcls.py index 71a8f77..0579dca 100644 --- a/finetune/seqcls/preprocess_blurb_seqcls.py +++ b/finetune/seqcls/preprocess_blurb_seqcls.py @@ -23,11 +23,15 @@ def process_pubmedqa(fname): dname = "pubmedqa" print (dname, fname) if fname in ["train", "dev"]: - data = json.load(open(f"raw_data/blurb/data_generation/data/pubmedqa/pqal_fold0/{fname}_set.json")) + file_path = f"raw_data/blurb/data_generation/data/pubmedqa/pqal_fold0/{fname}_set.json" elif fname == "test": - data = json.load(open(f"raw_data/blurb/data_generation/data/pubmedqa/{fname}_set.json")) + file_path = f"raw_data/blurb/data_generation/data/pubmedqa/{fname}_set.json" else: assert False + + with open(file_path, "r") as f: + data = json.load(f) + outs, lens = [], [] for id in data: obj = data[id] @@ -50,7 +54,8 @@ def process_pubmedqa(fname): def process_bioasq(fname): dname = "bioasq" print (dname, fname) - df = pd.read_csv(open(f"raw_data/blurb/data_generation/data/BioASQ/{fname}.tsv"), sep="\t", header=None) + with open(f"raw_data/blurb/data_generation/data/BioASQ/{fname}.tsv", "r") as f: + df = pd.read_csv(f, sep="\t", header=None) outs, lens = [], [] for _, row in df.iterrows(): id = row[0].strip() @@ -68,3 +73,4 @@ def process_bioasq(fname): process_bioasq("test") process_bioasq("dev") process_bioasq("train") +