diff --git a/modules/rag_description_generation/main.py b/modules/rag_description_generation/main.py index 1895078..43154a0 100644 --- a/modules/rag_description_generation/main.py +++ b/modules/rag_description_generation/main.py @@ -128,25 +128,27 @@ def process_single_game( all_games_df: pd.DataFrame, generate_prompt: str, ): - # if not self.dynamodb_client.check_dynamo_db_key(game_id=game_id): - df, game_name, game_mean = get_single_game_entries( - df=all_games_df, game_id=game_id, sample_pct=0.05 - ) - reviews = df["combined_review"].to_list() - weaviate_client.add_collection_batch(game_id=game_id, reviews=reviews) - current_prompt = weaviate_client.prompt_replacement( - current_prompt=generate_prompt, - overall_stats=self.overall_stats, - game_name=game_name, - game_mean=game_mean, - ) - summary = weaviate_client.generate_aggregated_review(game_id, current_prompt) - self.dynamodb_client.divide_and_process_generated_summary( - game_id, summary=summary.generated - ) - # print(f"\n{summary.generated}") - # weaviate_client.remove_collection_items(game_id=game_id, reviews=reviews) - return + if not self.dynamodb_client.check_dynamo_db_key(game_id=game_id): + df, game_name, game_mean = get_single_game_entries( + df=all_games_df, game_id=game_id, sample_pct=0.05 + ) + reviews = df["combined_review"].to_list() + weaviate_client.add_collection_batch(game_id=game_id, reviews=reviews) + current_prompt = weaviate_client.prompt_replacement( + current_prompt=generate_prompt, + overall_stats=self.overall_stats, + game_name=game_name, + game_mean=game_mean, + ) + summary = weaviate_client.generate_aggregated_review( + game_id, current_prompt + ) + self.dynamodb_client.divide_and_process_generated_summary( + game_id, summary=summary.generated + ) + # print(f"\n{summary.generated}") + # weaviate_client.remove_collection_items(game_id=game_id, reviews=reviews) + return print(f"Game {game_id} already processed") @@ -183,7 +185,5 @@ def rag_description_generation_chain(self): print(start_block, end_block) - # time.sleep(48000) - rag_description = RagDescription(start_block=start_block, end_block=end_block) rag_description.rag_description_generation_chain() diff --git a/modules/rag_description_generation/rag_dynamodb.py b/modules/rag_description_generation/rag_dynamodb.py index a7cd8ae..692cc88 100644 --- a/modules/rag_description_generation/rag_dynamodb.py +++ b/modules/rag_description_generation/rag_dynamodb.py @@ -1,9 +1,11 @@ import boto3 from pydantic import BaseModel +from datetime import datetime class DynamoDB(BaseModel): dynamodb_client: boto3.client = boto3.client("dynamodb") + today_timestring: str = datetime.now().strftime("%Y%m%d") def divide_and_process_generated_summary(self, game_id: str, summary: str) -> None: summary = summary.replace("**", "") @@ -25,17 +27,35 @@ def divide_and_process_generated_summary(self, game_id: str, summary: str) -> No "generated_description": {"S": description}, "generated_pros": {"S": pros}, "generated_cons": {"S": cons}, + "date_updated": {"S": self.today_timestring}, }, # ConditionExpression="attribute_not_exists(game_id)", ) print(f"Game {game_id} processed and added to DynamoDB") def check_dynamo_db_key(self, game_id: str) -> bool: + + # make a default timestamp that is the standard 1970 01 01 default + default_timestamp = "19700101" + days_since_last_process = 3 + try: - self.dynamodb_client.get_item( + item = self.dynamodb_client.get_item( TableName="game_generated_descriptions", Key={"game_id": {"S": game_id}} )["Item"] - print(f"Game {game_id} already exists in DynamoDB") - return True + db_timestamp_str = item.get("timestamp", {"S": default_timestamp})["S"] + db_timestamp = datetime.strptime(db_timestamp_str, "%Y%m%d") + + # determine if datetime.now() is more than three days after the db_timestamp + if (datetime.now() - db_timestamp).days < days_since_last_process: + print( + f"Game {game_id} already processed within last {days_since_last_process} days" + ) + return True + print( + f"Game {game_id} found but not processed within last {days_since_last_process} days" + ) + return False except: + print(f"Game {game_id} not found in DynamoDB") return False