Skip to content

Commit

Permalink
Read PUF file location from env variable
Browse files Browse the repository at this point in the history
  • Loading branch information
kcreekdev committed Feb 29, 2024
1 parent d3367c8 commit 81bcccd
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 6 deletions.
3 changes: 2 additions & 1 deletion cs-config/cs_config/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

AWS_ACCESS_KEY_ID = os.environ.get("AWS_ACCESS_KEY_ID")
AWS_SECRET_ACCESS_KEY = os.environ.get("AWS_SECRET_ACCESS_KEY")
PUF_S3_FILE_LOCATION = os.environ.get("PUF_S3_LOCATION", "s3://ospc-data-files/puf.20210720.csv.gz")

CUR_PATH = os.path.abspath(os.path.dirname(__file__))

Expand Down Expand Up @@ -105,7 +106,7 @@ def run_model(meta_params_dict, adjustment):
start_year = int(meta_params.year)
use_cps = meta_params.data_source == "CPS"
if meta_params.data_source == "PUF":
puf_df = retrieve_puf(AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY)
puf_df = retrieve_puf(PUF_S3_FILE_LOCATION, AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY)
if puf_df is not None:
if not isinstance(puf_df, pd.DataFrame):
raise TypeError("'puf_df' must be a Pandas DataFrame.")
Expand Down
10 changes: 5 additions & 5 deletions cs-config/cs_config/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
AWS_ACCESS_KEY_ID = os.environ.get("AWS_ACCESS_KEY_ID", None)
AWS_SECRET_ACCESS_KEY = os.environ.get("AWS_SECRET_ACCESS_KEY", None)

PUF_S3_FILE_NAME = "puf.20210720.csv.gz"
PUF_S3_FILE_LOCATION = os.environ.get("PUF_S3_LOCATION", "s3://ospc-data-files/puf.20210720.csv.gz")


def random_seed(user_mods, year):
Expand Down Expand Up @@ -343,7 +343,7 @@ def pdf_to_clean_html(pdf):


def retrieve_puf(
aws_access_key_id=AWS_ACCESS_KEY_ID, aws_secret_access_key=AWS_SECRET_ACCESS_KEY
puf_s3_file_location=PUF_S3_FILE_LOCATION, aws_access_key_id=AWS_ACCESS_KEY_ID, aws_secret_access_key=AWS_SECRET_ACCESS_KEY
):
"""
Function for retrieving the PUF from the OSPC S3 bucket
Expand All @@ -352,10 +352,10 @@ def retrieve_puf(
has_credentials = (
aws_access_key_id is not None and aws_secret_access_key is not None
)
if has_credentials and s3_reader_installed:
print("Reading puf from S3 bucket.")
if puf_s3_file_location and has_credentials and s3_reader_installed:
print("Reading puf from S3 bucket.", puf_s3_file_location)
fs = S3FileSystem(key=AWS_ACCESS_KEY_ID, secret=AWS_SECRET_ACCESS_KEY,)
with fs.open(f"s3://ospc-data-files/{PUF_S3_FILE_NAME}") as f:
with fs.open(PUF_S3_FILE_NAME) as f:
# Skips over header from top of file.
puf_df = pd.read_csv(f, compression="gzip")
return puf_df
Expand Down

0 comments on commit 81bcccd

Please sign in to comment.