Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add updated_date to dynamodb entries #77

Merged
merged 3 commits into from
Dec 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 21 additions & 21 deletions modules/rag_description_generation/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,25 +128,27 @@ def process_single_game(
all_games_df: pd.DataFrame,
generate_prompt: str,
):
# if not self.dynamodb_client.check_dynamo_db_key(game_id=game_id):
df, game_name, game_mean = get_single_game_entries(
df=all_games_df, game_id=game_id, sample_pct=0.05
)
reviews = df["combined_review"].to_list()
weaviate_client.add_collection_batch(game_id=game_id, reviews=reviews)
current_prompt = weaviate_client.prompt_replacement(
current_prompt=generate_prompt,
overall_stats=self.overall_stats,
game_name=game_name,
game_mean=game_mean,
)
summary = weaviate_client.generate_aggregated_review(game_id, current_prompt)
self.dynamodb_client.divide_and_process_generated_summary(
game_id, summary=summary.generated
)
# print(f"\n{summary.generated}")
# weaviate_client.remove_collection_items(game_id=game_id, reviews=reviews)
return
if not self.dynamodb_client.check_dynamo_db_key(game_id=game_id):
df, game_name, game_mean = get_single_game_entries(
df=all_games_df, game_id=game_id, sample_pct=0.05
)
reviews = df["combined_review"].to_list()
weaviate_client.add_collection_batch(game_id=game_id, reviews=reviews)
current_prompt = weaviate_client.prompt_replacement(
current_prompt=generate_prompt,
overall_stats=self.overall_stats,
game_name=game_name,
game_mean=game_mean,
)
summary = weaviate_client.generate_aggregated_review(
game_id, current_prompt
)
self.dynamodb_client.divide_and_process_generated_summary(
game_id, summary=summary.generated
)
# print(f"\n{summary.generated}")
# weaviate_client.remove_collection_items(game_id=game_id, reviews=reviews)
return

print(f"Game {game_id} already processed")

Expand Down Expand Up @@ -183,7 +185,5 @@ def rag_description_generation_chain(self):

print(start_block, end_block)

# time.sleep(48000)

rag_description = RagDescription(start_block=start_block, end_block=end_block)
rag_description.rag_description_generation_chain()
26 changes: 23 additions & 3 deletions modules/rag_description_generation/rag_dynamodb.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
import boto3
from pydantic import BaseModel
from datetime import datetime


class DynamoDB(BaseModel):
dynamodb_client: boto3.client = boto3.client("dynamodb")
today_timestring: str = datetime.now().strftime("%Y%m%d")

def divide_and_process_generated_summary(self, game_id: str, summary: str) -> None:
summary = summary.replace("**", "")
Expand All @@ -25,17 +27,35 @@ def divide_and_process_generated_summary(self, game_id: str, summary: str) -> No
"generated_description": {"S": description},
"generated_pros": {"S": pros},
"generated_cons": {"S": cons},
"date_updated": {"S": self.today_timestring},
},
# ConditionExpression="attribute_not_exists(game_id)",
)
print(f"Game {game_id} processed and added to DynamoDB")

def check_dynamo_db_key(self, game_id: str) -> bool:

# make a default timestamp that is the standard 1970 01 01 default
default_timestamp = "19700101"
days_since_last_process = 3

try:
self.dynamodb_client.get_item(
item = self.dynamodb_client.get_item(
TableName="game_generated_descriptions", Key={"game_id": {"S": game_id}}
)["Item"]
print(f"Game {game_id} already exists in DynamoDB")
return True
db_timestamp_str = item.get("timestamp", {"S": default_timestamp})["S"]
db_timestamp = datetime.strptime(db_timestamp_str, "%Y%m%d")

# determine if datetime.now() is more than three days after the db_timestamp
if (datetime.now() - db_timestamp).days < days_since_last_process:
print(
f"Game {game_id} already processed within last {days_since_last_process} days"
)
return True
print(
f"Game {game_id} found but not processed within last {days_since_last_process} days"
)
return False
except:
print(f"Game {game_id} not found in DynamoDB")
return False
Loading