Skip to content

Commit

Permalink
If there are no updates to the knowledge in the bot console, skip emb…
Browse files Browse the repository at this point in the history
…edding (aws-samples#273)

* botの更新の際にknowledgeに変更がなければdynamodbの更新だけにする

* CIのエラー修正

* CIのエラー修正

* レビューコメント反映

* CIエラー修正

* BotModelのtypeを追加、条件判定をわかりやすくする

* CIエラー修正

* current_bot_modelはhas_knowledgeを使用する
  • Loading branch information
fsatsuki authored Apr 26, 2024
1 parent a124943 commit ba72df2
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 3 deletions.
43 changes: 41 additions & 2 deletions backend/app/routes/schemas/bot.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
from typing import Literal

from __future__ import annotations
from typing import Literal, TYPE_CHECKING
from app.routes.schemas.base import BaseSchema
from pydantic import Field

if TYPE_CHECKING:
from app.repositories.models.custom_bot import BotModel

# Knowledge sync status type
# NOTE: `ORIGINAL_NOT_FOUND` is used when the original bot is removed.
type_sync_status = Literal[
Expand Down Expand Up @@ -46,6 +49,42 @@ class BotModifyInput(BaseSchema):
embedding_params: EmbeddingParams | None
knowledge: KnowledgeDiffInput | None

def has_update_files(self) -> bool:
return self.knowledge is not None and (
len(self.knowledge.added_filenames) > 0
or len(self.knowledge.deleted_filenames) > 0
)

def is_embedding_required(self, current_bot_model: BotModel) -> bool:
if self.has_update_files():
return True

if self.knowledge is not None and current_bot_model.has_knowledge():
if set(self.knowledge.source_urls) == set(
current_bot_model.knowledge.source_urls
) and set(self.knowledge.sitemap_urls) == set(
current_bot_model.knowledge.sitemap_urls
):
pass
else:
return True

if (
self.embedding_params is not None
and current_bot_model.embedding_params is not None
):
if (
self.embedding_params.chunk_size
== current_bot_model.embedding_params.chunk_size
and self.embedding_params.chunk_overlap
== current_bot_model.embedding_params.chunk_overlap
):
pass
else:
return True

return False


class BotModifyOutput(BaseSchema):
id: str
Expand Down
10 changes: 9 additions & 1 deletion backend/app/usecases/bot.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,7 @@ def modify_owned_bot(
source_urls = []
sitemap_urls = []
filenames = []
sync_status: type_sync_status = "QUEUED"

if modify_input.knowledge:
source_urls = modify_input.knowledge.source_urls
Expand Down Expand Up @@ -206,6 +207,12 @@ def modify_owned_bot(
else DEFAULT_EMBEDDING_CONFIG["chunk_overlap"]
)

# if knowledge and embedding_params are not updated, skip embeding process.
# 'sync_status = "QUEUED"' will execute embeding process and update dynamodb record.
# 'sync_status= "SUCCEEDED"' will update only dynamodb record.
bot = find_private_bot_by_id(user_id, bot_id)
sync_status = "QUEUED" if modify_input.is_embedding_required(bot) else "SUCCEEDED"

update_bot(
user_id,
bot_id,
Expand All @@ -221,9 +228,10 @@ def modify_owned_bot(
sitemap_urls=sitemap_urls,
filenames=filenames,
),
sync_status="QUEUED",
sync_status=sync_status,
sync_status_reason="",
)

return BotModifyOutput(
id=bot_id,
title=modify_input.title,
Expand Down

0 comments on commit ba72df2

Please sign in to comment.