From ba72df2cd171966b0b29a1edfeb6a5c970e27a08 Mon Sep 17 00:00:00 2001 From: fsatsuki <98732283+fsatsuki@users.noreply.github.com> Date: Fri, 26 Apr 2024 09:41:21 +0900 Subject: [PATCH] If there are no updates to the knowledge in the bot console, skip embedding (#273) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * botの更新の際にknowledgeに変更がなければdynamodbの更新だけにする * CIのエラー修正 * CIのエラー修正 * レビューコメント反映 * CIエラー修正 * BotModelのtypeを追加、条件判定をわかりやすくする * CIエラー修正 * current_bot_modelはhas_knowledgeを使用する --- backend/app/routes/schemas/bot.py | 43 +++++++++++++++++++++++++++++-- backend/app/usecases/bot.py | 10 ++++++- 2 files changed, 50 insertions(+), 3 deletions(-) diff --git a/backend/app/routes/schemas/bot.py b/backend/app/routes/schemas/bot.py index 7514c385f..45c62d2ff 100644 --- a/backend/app/routes/schemas/bot.py +++ b/backend/app/routes/schemas/bot.py @@ -1,8 +1,11 @@ -from typing import Literal - +from __future__ import annotations +from typing import Literal, TYPE_CHECKING from app.routes.schemas.base import BaseSchema from pydantic import Field +if TYPE_CHECKING: + from app.repositories.models.custom_bot import BotModel + # Knowledge sync status type # NOTE: `ORIGINAL_NOT_FOUND` is used when the original bot is removed. type_sync_status = Literal[ @@ -46,6 +49,42 @@ class BotModifyInput(BaseSchema): embedding_params: EmbeddingParams | None knowledge: KnowledgeDiffInput | None + def has_update_files(self) -> bool: + return self.knowledge is not None and ( + len(self.knowledge.added_filenames) > 0 + or len(self.knowledge.deleted_filenames) > 0 + ) + + def is_embedding_required(self, current_bot_model: BotModel) -> bool: + if self.has_update_files(): + return True + + if self.knowledge is not None and current_bot_model.has_knowledge(): + if set(self.knowledge.source_urls) == set( + current_bot_model.knowledge.source_urls + ) and set(self.knowledge.sitemap_urls) == set( + current_bot_model.knowledge.sitemap_urls + ): + pass + else: + return True + + if ( + self.embedding_params is not None + and current_bot_model.embedding_params is not None + ): + if ( + self.embedding_params.chunk_size + == current_bot_model.embedding_params.chunk_size + and self.embedding_params.chunk_overlap + == current_bot_model.embedding_params.chunk_overlap + ): + pass + else: + return True + + return False + class BotModifyOutput(BaseSchema): id: str diff --git a/backend/app/usecases/bot.py b/backend/app/usecases/bot.py index 650fe26ad..2bdb7c3cc 100644 --- a/backend/app/usecases/bot.py +++ b/backend/app/usecases/bot.py @@ -172,6 +172,7 @@ def modify_owned_bot( source_urls = [] sitemap_urls = [] filenames = [] + sync_status: type_sync_status = "QUEUED" if modify_input.knowledge: source_urls = modify_input.knowledge.source_urls @@ -206,6 +207,12 @@ def modify_owned_bot( else DEFAULT_EMBEDDING_CONFIG["chunk_overlap"] ) + # if knowledge and embedding_params are not updated, skip embeding process. + # 'sync_status = "QUEUED"' will execute embeding process and update dynamodb record. + # 'sync_status= "SUCCEEDED"' will update only dynamodb record. + bot = find_private_bot_by_id(user_id, bot_id) + sync_status = "QUEUED" if modify_input.is_embedding_required(bot) else "SUCCEEDED" + update_bot( user_id, bot_id, @@ -221,9 +228,10 @@ def modify_owned_bot( sitemap_urls=sitemap_urls, filenames=filenames, ), - sync_status="QUEUED", + sync_status=sync_status, sync_status_reason="", ) + return BotModifyOutput( id=bot_id, title=modify_input.title,