From 81bcccd52b69cf3b8cc9e9fb411e104dd3bfe546 Mon Sep 17 00:00:00 2001 From: Kcreek Dev Date: Wed, 28 Feb 2024 21:54:48 -0500 Subject: [PATCH] Read PUF file location from env variable --- cs-config/cs_config/functions.py | 3 ++- cs-config/cs_config/helpers.py | 10 +++++----- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/cs-config/cs_config/functions.py b/cs-config/cs_config/functions.py index 0e7eb12..dd3e46e 100644 --- a/cs-config/cs_config/functions.py +++ b/cs-config/cs_config/functions.py @@ -18,6 +18,7 @@ AWS_ACCESS_KEY_ID = os.environ.get("AWS_ACCESS_KEY_ID") AWS_SECRET_ACCESS_KEY = os.environ.get("AWS_SECRET_ACCESS_KEY") +PUF_S3_FILE_LOCATION = os.environ.get("PUF_S3_LOCATION", "s3://ospc-data-files/puf.20210720.csv.gz") CUR_PATH = os.path.abspath(os.path.dirname(__file__)) @@ -105,7 +106,7 @@ def run_model(meta_params_dict, adjustment): start_year = int(meta_params.year) use_cps = meta_params.data_source == "CPS" if meta_params.data_source == "PUF": - puf_df = retrieve_puf(AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY) + puf_df = retrieve_puf(PUF_S3_FILE_LOCATION, AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY) if puf_df is not None: if not isinstance(puf_df, pd.DataFrame): raise TypeError("'puf_df' must be a Pandas DataFrame.") diff --git a/cs-config/cs_config/helpers.py b/cs-config/cs_config/helpers.py index 61dc552..3b02366 100644 --- a/cs-config/cs_config/helpers.py +++ b/cs-config/cs_config/helpers.py @@ -38,7 +38,7 @@ AWS_ACCESS_KEY_ID = os.environ.get("AWS_ACCESS_KEY_ID", None) AWS_SECRET_ACCESS_KEY = os.environ.get("AWS_SECRET_ACCESS_KEY", None) -PUF_S3_FILE_NAME = "puf.20210720.csv.gz" +PUF_S3_FILE_LOCATION = os.environ.get("PUF_S3_LOCATION", "s3://ospc-data-files/puf.20210720.csv.gz") def random_seed(user_mods, year): @@ -343,7 +343,7 @@ def pdf_to_clean_html(pdf): def retrieve_puf( - aws_access_key_id=AWS_ACCESS_KEY_ID, aws_secret_access_key=AWS_SECRET_ACCESS_KEY + puf_s3_file_location=PUF_S3_FILE_LOCATION, aws_access_key_id=AWS_ACCESS_KEY_ID, aws_secret_access_key=AWS_SECRET_ACCESS_KEY ): """ Function for retrieving the PUF from the OSPC S3 bucket @@ -352,10 +352,10 @@ def retrieve_puf( has_credentials = ( aws_access_key_id is not None and aws_secret_access_key is not None ) - if has_credentials and s3_reader_installed: - print("Reading puf from S3 bucket.") + if puf_s3_file_location and has_credentials and s3_reader_installed: + print("Reading puf from S3 bucket.", puf_s3_file_location) fs = S3FileSystem(key=AWS_ACCESS_KEY_ID, secret=AWS_SECRET_ACCESS_KEY,) - with fs.open(f"s3://ospc-data-files/{PUF_S3_FILE_NAME}") as f: + with fs.open(PUF_S3_FILE_NAME) as f: # Skips over header from top of file. puf_df = pd.read_csv(f, compression="gzip") return puf_df