diff --git a/README.md b/README.md index e2795f949..741baad6c 100644 --- a/README.md +++ b/README.md @@ -10,11 +10,32 @@ ![sample](docs/about/assets/chabot-sample.gif "AWS GenAI Chatbot") + +## 🚀 NEW! Support for new Amazon Nova Models 🚀 +### Deploy this chatbot to use the recently announced [Amazon Nova models](https://aws.amazon.com/blogs/aws/introducing-amazon-nova-frontier-intelligence-and-industry-leading-price-performance/)! +### These powerful models can __understand__ and __generate__ images and videos. + +Deploy this chatbot to experiment with: +- `Amazon Nova Micro` +- `Amazon Nova Lite` +- `Amazon Nova Pro` +- `Amazon Nova Canvas` +- `Amazon Nova Reels` + + + +Make sure to request access to the new models [here](https://aws-samples.github.io/aws-genai-llm-chatbot/documentation/model-requirements.html#amazon-bedrock-requirements) + +Read more about the new models [here](https://www.aboutamazon.com/news/aws/amazon-nova-artificial-intelligence-bedrock-aws) + +--- + + This solution provides ready-to-use code so you can start **experimenting with a variety of Large Language Models and Multimodal Language Models, settings and prompts** in your own AWS account. Supported model providers: -- [Amazon Bedrock](https://aws.amazon.com/bedrock/) +- [Amazon Bedrock](https://aws.amazon.com/bedrock/) which supports a wide range of models from AWS, Anthropic, Cohere and Mistral including the lastst models from Amazon Nova. See [Recent announcements](#) for more details. - [Amazon SageMaker](https://aws.amazon.com/sagemaker/) self-hosted models from Foundation, Jumpstart and HuggingFace. - Third-party providers via API such as Anthropic, Cohere, AI21 Labs, OpenAI, etc. [See available langchain integrations](https://python.langchain.com/docs/integrations/llms/) for a comprehensive list. @@ -42,6 +63,8 @@ Roadmap is available through the [GitHub Project](https://github.com/orgs/aws-sa # License This library is licensed under the MIT-0 License. See the LICENSE file. + +- [Changelog](CHANGELOG.md) of the project. - [License](LICENSE) of the project. - [Code of Conduct](CODE_OF_CONDUCT.md) of the project. - [CONTRIBUTING](CONTRIBUTING.md#security-issue-notifications) for more information. diff --git a/boto3-1.35.58-py3-none-any.whl b/boto3-1.35.58-py3-none-any.whl new file mode 100644 index 000000000..24bf65ea4 Binary files /dev/null and b/boto3-1.35.58-py3-none-any.whl differ diff --git a/botocore-1.35.58-py3-none-any.whl b/botocore-1.35.58-py3-none-any.whl new file mode 100644 index 000000000..1af6e9b3f Binary files /dev/null and b/botocore-1.35.58-py3-none-any.whl differ diff --git a/docs/guide/deploy.md b/docs/guide/deploy.md index a3f0d997a..36d803a83 100644 --- a/docs/guide/deploy.md +++ b/docs/guide/deploy.md @@ -23,7 +23,7 @@ To do this, run the following command from the Cloud9 terminal: ``` git clone https://github.com/aws-samples/aws-genai-llm-chatbot.git cd aws-genai-llm-chatbot/ -chmod +x scripts/cloud9-resize.sh +chmod +x scripts/cloud9-resize.sh ./scripts/cloud9-resize.sh ``` diff --git a/integtests/chatbot-api/multi_modal_test.py b/integtests/chatbot-api/multi_modal_test.py index 99464d5f3..8771f2408 100644 --- a/integtests/chatbot-api/multi_modal_test.py +++ b/integtests/chatbot-api/multi_modal_test.py @@ -10,7 +10,6 @@ def test_multi_modal(client, default_multimodal_model, default_provider): - key = "INTEG_TEST" + str(uuid.uuid4()) + ".jpeg" result = client.add_file( input={ diff --git a/langchain_aws-0.2.2-py3-none-any.whl b/langchain_aws-0.2.2-py3-none-any.whl new file mode 100644 index 000000000..eb37e0794 Binary files /dev/null and b/langchain_aws-0.2.2-py3-none-any.whl differ diff --git a/lib/chatbot-api/functions/api-handler/common/constant.py b/lib/chatbot-api/functions/api-handler/common/constant.py index 8ab799622..58232dae0 100644 --- a/lib/chatbot-api/functions/api-handler/common/constant.py +++ b/lib/chatbot-api/functions/api-handler/common/constant.py @@ -12,3 +12,4 @@ SAFE_SHORT_STR_VALIDATION_OPTIONAL = Field( min_length=1, max_length=100, pattern=SAFE_STR_REGEX, default=None ) +SAFE_FILE_NAME_REGEX = r"^[A-Za-z0-9-_./\\ ]*$" diff --git a/lib/chatbot-api/functions/api-handler/routes/documents.py b/lib/chatbot-api/functions/api-handler/routes/documents.py index d69e2b2d2..ad3cad90c 100644 --- a/lib/chatbot-api/functions/api-handler/routes/documents.py +++ b/lib/chatbot-api/functions/api-handler/routes/documents.py @@ -137,6 +137,7 @@ class DocumentSubscriptionStatusRequest(BaseModel): ".jpg", ".jpeg", ".png", + ".mp4", ] ) @@ -149,14 +150,20 @@ def file_upload(input: dict): if "workspaceId" in input: if extension not in allowed_workspace_extensions: - raise genai_core.types.CommonError("Invalid file extension") + raise genai_core.types.CommonError( + f"""Invalid file extension {extension}. + Allowed extensions: {allowed_workspace_extensions}.""" + ) result = genai_core.presign.generate_workspace_presigned_post( request.workspaceId, request.fileName ) else: if extension not in allowed_session_extensions: - raise genai_core.types.CommonError("Invalid file extension") + raise genai_core.types.CommonError( + f"""Invalid file extension {extension}. + Allowed extensions: {allowed_session_extensions}.""" + ) user_id = genai_core.auth.get_user_id(router) result = genai_core.presign.generate_user_presigned_post( diff --git a/lib/chatbot-api/functions/api-handler/routes/sessions.py b/lib/chatbot-api/functions/api-handler/routes/sessions.py index c98bd60fc..5ae901fd2 100644 --- a/lib/chatbot-api/functions/api-handler/routes/sessions.py +++ b/lib/chatbot-api/functions/api-handler/routes/sessions.py @@ -1,5 +1,5 @@ from pydantic import BaseModel, Field -from common.constant import SAFE_STR_REGEX +from common.constant import SAFE_FILE_NAME_REGEX from common.validation import WorkspaceIdValidation import genai_core.presign import genai_core.sessions @@ -16,7 +16,7 @@ class FileURequestValidation(BaseModel): - fileName: str = Field(min_length=1, max_length=500, pattern=SAFE_STR_REGEX) + fileName: str = Field(min_length=1, max_length=500, pattern=SAFE_FILE_NAME_REGEX) @router.resolver(field_name="getFileURL") diff --git a/lib/layer/index.ts b/lib/layer/index.ts index 3d8566547..248392b42 100644 --- a/lib/layer/index.ts +++ b/lib/layer/index.ts @@ -34,9 +34,8 @@ export class Layer extends Construct { [ `pip install -r requirements.txt ${args.join(" ")}`, `cd /asset-output/python`, - // Remove boto3 since it's already part of the lambda runtime // Remove sqlalchemy, used by Langchain when storing the memory using sql - `rm -rf boto3* botocore* sqlalchemy*`, + `rm -rf sqlalchemy*`, // Main impact of cold start is the file size. (faster to have the lambda regenerate them) `find . -name "*.pyc" -type f -delete`, `cd -`, diff --git a/lib/model-interfaces/idefics/functions/request-handler/adapters/__init__.py b/lib/model-interfaces/idefics/functions/request-handler/adapters/__init__.py index 8d2a134d2..118cdd2d2 100644 --- a/lib/model-interfaces/idefics/functions/request-handler/adapters/__init__.py +++ b/lib/model-interfaces/idefics/functions/request-handler/adapters/__init__.py @@ -1,3 +1,4 @@ # flake8: noqa -from .idefics import Idefics from .claude import Claude3 +from .idefics import Idefics +from .nova import Nova diff --git a/lib/model-interfaces/idefics/functions/request-handler/adapters/base.py b/lib/model-interfaces/idefics/functions/request-handler/adapters/base.py index 0bb5d1709..2db7e2f97 100644 --- a/lib/model-interfaces/idefics/functions/request-handler/adapters/base.py +++ b/lib/model-interfaces/idefics/functions/request-handler/adapters/base.py @@ -1,12 +1,194 @@ +import json +import mimetypes +import os +import uuid from abc import abstractmethod +from dataclasses import dataclass, field +from typing import Optional +import boto3 +from aws_lambda_powertools import Logger +from genai_core.clients import get_bedrock_client +from genai_core.types import ChatbotMessageType +logger = Logger() +s3 = boto3.resource("s3") + + +@dataclass class MultiModalModelBase: + model_id: str + session_id: Optional[str] + user_id: Optional[str] + disable_streaming: Optional[bool] = False + model_kwargs: Optional[dict] = field(default_factory=dict) + mode: Optional[str] = None + client: Optional[any] = get_bedrock_client() + @abstractmethod - def handle_run(self, prompt: str, model_kwargs: dict) -> str: ... + def handle_run( + self, input: dict, model_kwargs: dict, files: Optional[list] = None + ) -> str: + ... @abstractmethod - def format_prompt(self, prompt: str, messages: list, files: list) -> str: ... + def on_llm_new_token(self, user_id: str, session_id: str, chunk: str) -> None: + ... + + def upload_file_message(self, content: bytes, file_type: str): + key = str(uuid.uuid4()) + s3_path = "private/" + self.user_id + "/" + key + s3.Object(os.environ["CHATBOT_FILES_BUCKET_NAME"], s3_path).put(Body=content) + return { + "provider": "s3", + "key": key, + "type": file_type, + } + + def get_file_message(self, file: dict, use_s3_path: Optional[bool] = False): + if file["key"] is None: + raise Exception("Invalid S3 Key " + file["key"]) + + key = "private/" + self.user_id + "/" + file["key"] + logger.info( + "Fetching file", bucket=os.environ["CHATBOT_FILES_BUCKET_NAME"], key=key + ) + extension = mimetypes.guess_extension(file["key"]) or file["key"].split(".")[-1] + mime_type = mimetypes.guess_type(file["key"])[0] + file_type = mime_type.split("/")[0] + logger.info("File type", file_type=file_type) + logger.info("File extension", extension=extension) + logger.info("File mime type", mime_type=mime_type) + format = mime_type.split("/")[-1] or extension + + response = s3.Object(os.environ["CHATBOT_FILES_BUCKET_NAME"], key) + logger.info("File response", response=response) + media_bytes = response.get()["Body"].read() + + source = {} + if use_s3_path: + source["s3Location"] = { + "uri": f"s3://{os.environ['CHATBOT_FILES_BUCKET_NAME']}/{key}", + } + else: + source["bytes"] = media_bytes + + return { + file_type: { + "format": format, + "source": source, + } + } + + def format_prompt(self, prompt: str, messages: list, files: list) -> str: + prompts = [] + + # Chat history + for message in messages: + if message.type.lower() == ChatbotMessageType.Human.value.lower(): + user_msg = { + "role": "user", + "content": [], + } + prompts.append(user_msg) + message_files = message.additional_kwargs.get("files", []) + + for message_file in message_files: + user_msg["content"].append(self.get_file_message(message_file)) + + user_msg["content"].append({"text": message.content}) + + if message.type.lower() == ChatbotMessageType.AI.value.lower(): + prompts.append( + { + "role": "assistant", + "content": [{"text": message.content or ""}], + } + ) + + # User prompt + user_msg = { + "role": "user", + "content": [], + } + prompts.append(user_msg) + for file in files: + user_msg["content"].append(self.get_file_message(file)) + + user_msg["content"].append({"text": prompt}) + + return { + "messages": prompts, + "last_message": prompt, + } + + def clean_prompt(self, input: dict) -> str: + for m in input["messages"]: + if m["role"] == "user" and type(m["content"]) == type([]): # noqa: E721 + for c in m["content"]: + if "video" in c: + c["video"]["source"]["bytes"] = "" + if "image" in c: + c["image"]["source"]["bytes"] = "" + return json.dumps(input) + + @abstractmethod + def generate_image(self, input: dict, model_kwargs: dict): + ... + + @abstractmethod + def generate_video(self, input: dict, model_kwargs: dict): + ... + + def converse(self, input: dict, model_kwargs: dict): + logger.info("Incoming request for nova", model_kwargs=model_kwargs) + logger.info("Mode", mode=self.mode) + streaming = model_kwargs.get("streaming", False) + + complete_response = "" + inf_params = {} + + if "temperature" in model_kwargs: + inf_params["temperature"] = model_kwargs["temperature"] + if "topP" in model_kwargs: + inf_params["topP"] = model_kwargs["topP"] + if "maxTokens" in model_kwargs: + inf_params["maxTokens"] = model_kwargs["maxTokens"] + + stream_params = { + "modelId": self.model_id, + "messages": input["messages"], + "inferenceConfig": inf_params, + } + logger.info("Stream params", stream_params=stream_params) + + if streaming: + logger.info("Calling converse_stream") + mlm_response = self.client.converse_stream( + **stream_params, + ) + logger.info("Stream response", mlm_response=mlm_response) + stream = mlm_response.get("stream") + if stream: + logger.info("Sending stream events to on_llm_new_token") + for event in stream: + if "contentBlockDelta" in event: + chunk = event["contentBlockDelta"]["delta"]["text"] + complete_response += chunk + self.on_llm_new_token(chunk) + + logger.info("Complete response", complete_response=complete_response) + return { + "content": complete_response, + } + + logger.info("Calling converse") + mlm_response = self.client.converse( + **stream_params, + ) + logger.info("Response from nova", mlm_response=mlm_response) + content = mlm_response["output"]["message"]["content"][0]["text"] - def clean_prompt(self, prompt: str) -> str: - return prompt + return { + "content": content, + } diff --git a/lib/model-interfaces/idefics/functions/request-handler/adapters/claude.py b/lib/model-interfaces/idefics/functions/request-handler/adapters/claude.py index cc525a36a..87c8fa1f6 100644 --- a/lib/model-interfaces/idefics/functions/request-handler/adapters/claude.py +++ b/lib/model-interfaces/idefics/functions/request-handler/adapters/claude.py @@ -1,114 +1,16 @@ +from typing import Optional + from aws_lambda_powertools import Logger -import boto3 -from .base import MultiModalModelBase -from genai_core.types import ChatbotMessageType -import os -from genai_core.clients import get_bedrock_client -import json -from base64 import b64encode from genai_core.registry import registry -logger = Logger() -s3 = boto3.resource("s3") - - -def get_image_message(file: dict, user_id: str): - if file["key"] is None: - raise Exception("Invalid S3 Key " + file["key"]) - - key = "private/" + user_id + "/" + file["key"] - logger.info( - "Fetching image", bucket=os.environ["CHATBOT_FILES_BUCKET_NAME"], key=key - ) +from .base import MultiModalModelBase - response = s3.Object(os.environ["CHATBOT_FILES_BUCKET_NAME"], key) - img = str(b64encode(response.get()["Body"].read()), "ascii") - return { - "type": "image", - "source": { - "type": "base64", - "media_type": "image/jpeg", - "data": img, - }, - } +logger = Logger() class Claude3(MultiModalModelBase): - model_id: str - client: any - - def __init__(self, model_id: str): - self.model_id = model_id - self.client = get_bedrock_client() - - def format_prompt( - self, prompt: str, messages: list, files: list, user_id: str - ) -> str: - prompts = [] - - # Chat history - for message in messages: - if message.type.lower() == ChatbotMessageType.Human.value.lower(): - user_msg = { - "role": "user", - "content": [{"type": "text", "text": message.content}], - } - prompts.append(user_msg) - message_files = message.additional_kwargs.get("files", []) - for message_file in message_files: - user_msg["content"].append(get_image_message(message_file, user_id)) - if message.type.lower() == ChatbotMessageType.AI.value.lower(): - prompts.append({"role": "assistant", "content": message.content}) - - # User prompt - user_msg = { - "role": "user", - "content": [{"type": "text", "text": prompt}], - } - prompts.append(user_msg) - for file in files: - user_msg["content"].append(get_image_message(file, user_id)) - - return json.dumps( - { - "anthropic_version": "bedrock-2023-05-31", - "max_tokens": 512, - "messages": prompts, - "temperature": 0.3, - } - ) - - def handle_run(self, prompt: str, model_kwargs: dict): - logger.info("Incoming request for claude", model_kwargs=model_kwargs) - body = json.loads(prompt) - - if "temperature" in model_kwargs: - body["temperature"] = model_kwargs["temperature"] - if "topP" in model_kwargs: - body["top_p"] = model_kwargs["topP"] - if "maxTokens" in model_kwargs: - body["max_tokens"] = model_kwargs["maxTokens"] - if "topK" in model_kwargs: - body["top_k"] = model_kwargs["topK"] - - body_str = json.dumps(body) - mlm_response = self.client.invoke_model( - modelId=self.model_id, - body=body_str, - accept="application/json", - contentType="application/json", - ) - - return json.loads(mlm_response["body"].read())["content"][0]["text"] - - def clean_prompt(self, prompt: str) -> str: - p = json.loads(prompt) - for m in p["messages"]: - if m["role"] == "user" and type(m["content"]) == type([]): # noqa: E721 - for c in m["content"]: - if c["type"] == "image": - c["source"]["data"] = "" - return json.dumps(p) + def handle_run(self, input: dict, model_kwargs: dict, files: Optional[list] = None): + return self.converse(input, model_kwargs) registry.register(r"^bedrock.anthropic.claude-3.*", Claude3) diff --git a/lib/model-interfaces/idefics/functions/request-handler/adapters/idefics.py b/lib/model-interfaces/idefics/functions/request-handler/adapters/idefics.py index 7b110069e..8cba7359e 100644 --- a/lib/model-interfaces/idefics/functions/request-handler/adapters/idefics.py +++ b/lib/model-interfaces/idefics/functions/request-handler/adapters/idefics.py @@ -1,25 +1,21 @@ -from aws_lambda_powertools import Logger -from .base import MultiModalModelBase -from genai_core.types import ChatbotMessageType -from urllib.parse import urljoin import os -from langchain.llms import SagemakerEndpoint +from urllib.parse import urljoin + +from aws_lambda_powertools import Logger from content_handler import ContentHandler from genai_core.registry import registry +from genai_core.types import ChatbotMessageType +from langchain.llms import SagemakerEndpoint + +from .base import MultiModalModelBase logger = Logger() class Idefics(MultiModalModelBase): - model_id: str - - def __init__(self, model_id: str): - self.model_id = model_id - def format_prompt( self, prompt: str, messages: list, files: list, user_id: str ) -> str: - human_prompt_template = "User:{prompt}" human_prompt_with_image = "User:{prompt}![]({image})" ai_prompt_template = "Assistant:{prompt}" @@ -87,7 +83,9 @@ def handle_run(self, prompt: str, model_kwargs: dict): ) mlm_response = mlm.predict(prompt) - return mlm_response + return { + "content": mlm_response, + } registry.register(r"^sagemaker.*idefics*", Idefics) diff --git a/lib/model-interfaces/idefics/functions/request-handler/adapters/nova.py b/lib/model-interfaces/idefics/functions/request-handler/adapters/nova.py new file mode 100644 index 000000000..544a3e712 --- /dev/null +++ b/lib/model-interfaces/idefics/functions/request-handler/adapters/nova.py @@ -0,0 +1,278 @@ +import base64 +import json +import os +from random import randint +from typing import Optional +from aws_lambda_powertools import Logger +from genai_core.registry import registry +from genai_core.types import ChatbotMessageType, Modality + +from .base import MultiModalModelBase + +logger = Logger() + + +class Nova(MultiModalModelBase): + def format_prompt(self, prompt: str, messages: list, files: list) -> str: + prompts = [] + + # Chat history + for message in messages: + if message.type.lower() == ChatbotMessageType.Human.value.lower(): + user_msg = { + "role": "user", + "content": [], + } + prompts.append(user_msg) + message_files = message.additional_kwargs.get("files", []) + + for message_file in message_files: + use_s3_path = False + if message_file["type"] == Modality.VIDEO.value: + use_s3_path = True + + user_msg["content"].append( + self.get_file_message(message_file, use_s3_path=use_s3_path) + ) + + user_msg["content"].append({"text": message.content}) + + if message.type.lower() == ChatbotMessageType.AI.value.lower(): + prompts.append( + { + "role": "assistant", + "content": [{"text": message.content or ""}], + } + ) + + # User prompt + user_msg = { + "role": "user", + "content": [], + } + prompts.append(user_msg) + for file in files: + use_s3_path = False + if file["type"] == Modality.VIDEO.value: + use_s3_path = True + + user_msg["content"].append( + self.get_file_message(file, use_s3_path=use_s3_path) + ) + + user_msg["content"].append({"text": prompt}) + + return { + "messages": prompts, + "last_message": prompt, + } + + def handle_run(self, input: dict, model_kwargs: dict, files: Optional[list] = None): + if self.mode == Modality.IMAGE.value: + return self.generate_image(input, model_kwargs, files) + if self.mode == Modality.VIDEO.value: + return self.generate_video(input, model_kwargs, files) + + return self._run(input, model_kwargs, files) + + def _run(self, input: dict, model_kwargs: dict, files: Optional[list] = None): + # TODO:video understanding not yet supported by converse API + # check if any of input["message"][N]["content"][N] has a video + # can be removed once video understanding is supported by converse API + logger.info( + "Checking for video in messages in history", messages=input["messages"] + ) + for message in input["messages"]: + for content in message["content"]: + if Modality.VIDEO.value.lower() in content: + logger.info("Found video in messages, invoking native") + return self._invoke_native(input, model_kwargs, files) + + logger.info("No video found in messages history, invoking converse") + return self.converse(input, model_kwargs) + + def _invoke_native( + self, input: dict, model_kwargs: dict, files: Optional[list] = None + ): + logger.info("Incoming request for nova", model_kwargs=model_kwargs) + logger.info("Mode", mode=self.mode) + streaming = model_kwargs.get("streaming", False) + + complete_response = "" + inf_params = {} + + if "temperature" in model_kwargs: + inf_params["temperature"] = model_kwargs["temperature"] + if "topP" in model_kwargs: + inf_params["top_p"] = model_kwargs["topP"] + if "maxTokens" in model_kwargs: + inf_params["max_new_tokens"] = model_kwargs["maxTokens"] + + native_request = { + "schemaVersion": "messages-v1", + "messages": input["messages"], + "inferenceConfig": inf_params, + } + logger.info("Native request", native_request=native_request) + + if streaming: + logger.info("Calling invoke_model_with_response_stream") + response = self.client.invoke_model_with_response_stream( + modelId=self.model_id, body=json.dumps(native_request) + ) + request_id = response.get("ResponseMetadata").get("RequestId") + logger.info(f"Request ID: {request_id}") + logger.info("Awaiting first token...") + # Process the response stream + stream = response.get("body") + if stream: + for event in stream: + chunk = event.get("chunk") + if chunk: + chunk_json = json.loads(chunk.get("bytes").decode()) + content_block_delta = chunk_json.get("contentBlockDelta") + if content_block_delta: + chunk_text = content_block_delta.get("delta").get("text") + self.on_llm_new_token(chunk_text) + complete_response += chunk_text + + return { + "content": complete_response, + } + + logger.info("Calling invoke_model") + response = self.client.invoke_model( + modelId=self.model_id, body=json.dumps(native_request) + ) + model_response = json.loads(response["body"].read()) + content_text = model_response["output"]["message"]["content"][0]["text"] + return { + "content": content_text, + } + + def generate_image( + self, input: dict, model_kwargs: dict, files: Optional[list] = None + ): + logger.info( + "Incoming request for nova image generation", + model_kwargs=model_kwargs, + input=input, + files=files, + ) + logger.info("Mode", mode=self.mode) + + inference_params = { + "taskType": "TEXT_IMAGE", + "textToImageParams": {"text": input["last_message"]}, + "imageGenerationConfig": { + "numberOfImages": 1, + "width": 1280, + "height": 768, + "cfgScale": 7.0, + "seed": model_kwargs.get("seed", randint(0, 2147483646)), # nosec B311 + }, + } + logger.info( + f"Generating with seed: {inference_params['imageGenerationConfig']['seed']}" + ) + response = self.client.invoke_model( + modelId=self.model_id, + body=json.dumps(inference_params), + ) + logger.info("Response from nova", response=response) + logger.info(f"Request ID: {response['ResponseMetadata']['RequestId']}") + + response_body = json.loads(response["body"].read()) + images = response_body["images"] + + if "error" in response_body: + if not images: + logger.error("Error: No images generated.") + logger.error(response_body["error"]) + + file_upload = self.upload_file_message( + base64.b64decode(images[0]), Modality.IMAGE.value + ) + response = { + "files": [file_upload], + "content": "", + } + return response + + def generate_video( + self, input: dict, model_kwargs: dict, files: Optional[list] = None + ): + logger.info( + "Incoming request for nova video generation", + model_kwargs=model_kwargs, + input=input, + files=files, + ) + + text_to_video_params = { + "text": input["last_message"], + } + + images = [] + for file in files: + if file["type"] == Modality.IMAGE.value: + image_data = self.get_file_message(file) + media_bytes = image_data["image"]["source"]["bytes"] + images.append( + { + "format": image_data["image"]["format"], + "source": { + "bytes": base64.b64encode(media_bytes).decode("utf-8"), + }, + } + ) + + if images: + text_to_video_params["images"] = images + model_input = { + "taskType": "TEXT_VIDEO", + "textToVideoParams": text_to_video_params, + "videoGenerationConfig": { + "durationSeconds": 6, + "fps": 24, + "dimension": "1280x720", + "seed": model_kwargs.get("seed", randint(0, 2147483646)), # nosec B311 + }, + } + logger.info("Model input", model_input=model_input) + s3_path = f"private/{self.user_id}" + output_data_config = { + "s3OutputDataConfig": { + "s3Uri": f"s3://{os.environ['CHATBOT_FILES_BUCKET_NAME']}/{s3_path}/" + } + } + logger.info("Output data config", output_data_config=output_data_config) + + # Start the asynchronous video generation job. + invocation_jobs = self.client.start_async_invoke( + modelId=self.model_id, + modelInput=model_input, + outputDataConfig=output_data_config, + ) + + logger.info( + "Response:", + invocation_jobs=json.dumps(invocation_jobs, indent=2, default=str), + ) + + invocation_arn = invocation_jobs["invocationArn"] + video_id = invocation_arn.split("/")[-1] + video_path = f"{video_id}/output.mp4" + return { + "files": [ + { + "provider": "s3", + "key": video_path, + "type": Modality.VIDEO.value, + } + ], + "content": "", + } + + +registry.register(r"^bedrock.amazon.nova*", Nova) diff --git a/lib/model-interfaces/idefics/functions/request-handler/index.py b/lib/model-interfaces/idefics/functions/request-handler/index.py index 3527dbbe4..0dd9a3945 100644 --- a/lib/model-interfaces/idefics/functions/request-handler/index.py +++ b/lib/model-interfaces/idefics/functions/request-handler/index.py @@ -1,24 +1,52 @@ -import os import json +import os import uuid from datetime import datetime +import adapters # noqa: F401 Needed to register the adapters +import boto3 from aws_lambda_powertools import Logger, Tracer from aws_lambda_powertools.utilities.batch import BatchProcessor, EventType from aws_lambda_powertools.utilities.batch.exceptions import BatchProcessingError from aws_lambda_powertools.utilities.data_classes.sqs_event import SQSRecord from aws_lambda_powertools.utilities.typing import LambdaContext - -import adapters # noqa: F401 Needed to register the adapters from genai_core.langchain import DynamoDBChatMessageHistory -from genai_core.utils.websocket import send_to_client -from genai_core.types import ChatbotAction from genai_core.registry import registry +from genai_core.types import ChatbotAction +from genai_core.utils.websocket import send_to_client + +print(boto3.__version__) processor = BatchProcessor(event_type=EventType.SQS) tracer = Tracer() logger = Logger() +sequence_number = 0 + + +def on_llm_new_token(user_id, session_id, self, *args, **kwargs): + chunk = args[0] + if chunk is None or len(chunk) == 0: + return + global sequence_number + sequence_number += 1 + + send_to_client( + { + "type": "text", + "action": ChatbotAction.LLM_NEW_TOKEN.value, + "userId": user_id, + "timestamp": str(int(round(datetime.now().timestamp()))), + "data": { + "sessionId": session_id, + "token": { + "sequenceNumber": sequence_number, + "value": chunk, + }, + }, + } + ) + def handle_run(record): logger.info("Incoming request", record=record) @@ -47,38 +75,61 @@ def handle_run(record): messages = chat_history.messages adapter = registry.get_adapter(f"{provider}.{model_id}") - model = adapter(model_id=model_id) + adapter.on_llm_new_token = lambda *args, **kwargs: on_llm_new_token( + user_id, session_id, *args, **kwargs + ) + model = adapter( + model_id=model_id, + session_id=session_id, + user_id=user_id, + model_kwargs=model_kwargs, + mode=mode, + ) - prompt_template = model.format_prompt( + run_input = model.format_prompt( prompt=prompt, messages=messages, files=files, - user_id=user_id, ) - mlm_response = model.handle_run(prompt=prompt_template, model_kwargs=model_kwargs) + ai_response = model.handle_run( + input=run_input, model_kwargs=model_kwargs, files=files + ) - metadata = { + # Add user files and mesage to chat history + user_message_metadata = { "provider": provider, "modelId": model_id, "modelKwargs": model_kwargs, "mode": mode, "sessionId": session_id, "userId": user_id, - "prompts": [model.clean_prompt(prompt_template)], + "prompts": [model.clean_prompt(run_input)], + "files": files or [], } - if files: - metadata["files"] = files - chat_history.add_user_message(prompt) - chat_history.add_metadata(metadata) - chat_history.add_ai_message(mlm_response) + chat_history.add_metadata(user_message_metadata) + + # Add AI files and message to chat history + ai_response_metadata = { + "provider": provider, + "modelId": model_id, + "modelKwargs": model_kwargs, + "mode": mode, + "sessionId": session_id, + "userId": user_id, + "prompts": [model.clean_prompt(run_input)], + "files": ai_response.get("files", []), + } + ai_text_response = ai_response.get("content", "") + chat_history.add_ai_message(ai_text_response) + chat_history.add_metadata(ai_response_metadata) response = { "sessionId": session_id, "type": "text", - "content": mlm_response, - "metadata": metadata, + "content": ai_text_response, + "metadata": ai_response_metadata, } send_to_client( @@ -114,6 +165,25 @@ def handle_failed_records(records): data = detail.get("data", {}) session_id = data.get("sessionId", "") + message = "⚠️ *Something went wrong*" + if ( + "An error occurred (ValidationException)" in error + and "The provided image must have dimensions in set [1280x720]" in error + ): + # At this time only one input size is supported by the Nova reel model. + message = "⚠️ *The provided image must have dimensions of 1280x720.*" + + elif ( + "An error occurred (AccessDeniedException)" in error + and "You don't have access to the model with the specified model ID" + in error + ): + message = ( + "*This model is not enabled. " + "Please try again later or contact " + "an administrator*" + ) + send_to_client( { "type": "text", @@ -122,7 +192,7 @@ def handle_failed_records(records): "timestamp": str(int(round(datetime.now().timestamp()))), "data": { "sessionId": session_id, - "content": "Something went wrong.", + "content": message, "type": "text", }, } diff --git a/lib/model-interfaces/idefics/index.ts b/lib/model-interfaces/idefics/index.ts index 8e52bc3b4..7ac9ee0f1 100644 --- a/lib/model-interfaces/idefics/index.ts +++ b/lib/model-interfaces/idefics/index.ts @@ -75,8 +75,16 @@ export class IdeficsInterface extends Construct { }, } ); + if (props.config.bedrock?.roleArn) { + requestHandler.addToRolePolicy( + new iam.PolicyStatement({ + actions: ["sts:AssumeRole"], + resources: [props.config.bedrock.roleArn], + }) + ); + } - props.chatbotFilesBucket.grantRead(requestHandler); + props.chatbotFilesBucket.grantReadWrite(requestHandler); props.sessionsTable.grantReadWriteData(requestHandler); props.messagesTopic.grantPublish(requestHandler); if (props.shared.kmsKey && requestHandler.role) { @@ -85,7 +93,10 @@ export class IdeficsInterface extends Construct { props.shared.configParameter.grantRead(requestHandler); requestHandler.addToRolePolicy( new iam.PolicyStatement({ - actions: ["bedrock:InvokeModel"], + actions: [ + "bedrock:InvokeModel", + "bedrock:InvokeModelWithResponseStream", + ], resources: ["*"], effect: iam.Effect.ALLOW, }) diff --git a/lib/model-interfaces/langchain/functions/request-handler/adapters/bedrock/base.py b/lib/model-interfaces/langchain/functions/request-handler/adapters/bedrock/base.py index e036b4461..32c0b8b99 100644 --- a/lib/model-interfaces/langchain/functions/request-handler/adapters/bedrock/base.py +++ b/lib/model-interfaces/langchain/functions/request-handler/adapters/bedrock/base.py @@ -256,6 +256,7 @@ def __init__(self, *args, **kwargs): registry.register(r"^bedrock.mistral.mistral-7b-*", BedrockChatNoSystemPromptAdapter) registry.register(r"^bedrock.mistral.mixtral-*", BedrockChatNoSystemPromptAdapter) registry.register(r"^bedrock.amazon.titan-t*", BedrockChatNoSystemPromptAdapter) +registry.register(r"^bedrock.amazon.nova*", BedrockChatAdapter) class PromptTemplateWithHistory(PromptTemplate): diff --git a/lib/shared/layers/common/boto3-1.35.58-py3-none-any.whl b/lib/shared/layers/common/boto3-1.35.58-py3-none-any.whl new file mode 100644 index 000000000..24bf65ea4 Binary files /dev/null and b/lib/shared/layers/common/boto3-1.35.58-py3-none-any.whl differ diff --git a/lib/shared/layers/common/botocore-1.35.58-py3-none-any.whl b/lib/shared/layers/common/botocore-1.35.58-py3-none-any.whl new file mode 100644 index 000000000..1af6e9b3f Binary files /dev/null and b/lib/shared/layers/common/botocore-1.35.58-py3-none-any.whl differ diff --git a/lib/shared/layers/common/langchain_aws-0.2.2-py3-none-any.whl b/lib/shared/layers/common/langchain_aws-0.2.2-py3-none-any.whl new file mode 100644 index 000000000..eb37e0794 Binary files /dev/null and b/lib/shared/layers/common/langchain_aws-0.2.2-py3-none-any.whl differ diff --git a/lib/shared/layers/common/requirements.txt b/lib/shared/layers/common/requirements.txt index 9f7ca2502..db45f6654 100644 --- a/lib/shared/layers/common/requirements.txt +++ b/lib/shared/layers/common/requirements.txt @@ -1,3 +1,6 @@ +boto3-1.35.58-py3-none-any.whl +botocore-1.35.58-py3-none-any.whl +langchain_aws-0.2.2-py3-none-any.whl aws_xray_sdk==2.14.0 numpy==1.26.0 cfnresponse==1.1.2 @@ -6,7 +9,6 @@ requests-aws4auth==1.2.3 langchain==0.3.7 langchain-core==0.3.15 langchain-community==0.3.3 -langchain-aws==0.2.4 langchain-openai==0.2.4 langchain-text-splitters==0.3.1 opensearch-py==2.4.2 diff --git a/lib/shared/layers/python-sdk/python/genai_core/bedrock_kb/client.py b/lib/shared/layers/python-sdk/python/genai_core/bedrock_kb/client.py index d968d3351..31a81bf55 100644 --- a/lib/shared/layers/python-sdk/python/genai_core/bedrock_kb/client.py +++ b/lib/shared/layers/python-sdk/python/genai_core/bedrock_kb/client.py @@ -6,7 +6,6 @@ def get_kb_runtime_client_for_id(knowledge_base_id: str): - config = genai_core.parameters.get_config() kb_config = config.get("rag", {}).get("engines", {}).get("knowledgeBase", {}) external = kb_config.get("external", []) diff --git a/lib/shared/layers/python-sdk/python/genai_core/bedrock_kb/query.py b/lib/shared/layers/python-sdk/python/genai_core/bedrock_kb/query.py index 9a9b5345d..bae74360e 100644 --- a/lib/shared/layers/python-sdk/python/genai_core/bedrock_kb/query.py +++ b/lib/shared/layers/python-sdk/python/genai_core/bedrock_kb/query.py @@ -47,7 +47,6 @@ def _convert_records(source: str, workspace_id: str, records: List[dict]): converted_records = [] _id = 0 for record in records: - path = record.get("location", {}).get("s3Location", {}).get("uri", "") content = record.get("content", {}).get("text", "") score = record.get("score", 0) diff --git a/lib/shared/layers/python-sdk/python/genai_core/models.py b/lib/shared/layers/python-sdk/python/genai_core/models.py index 0b500457b..978ef66c1 100644 --- a/lib/shared/layers/python-sdk/python/genai_core/models.py +++ b/lib/shared/layers/python-sdk/python/genai_core/models.py @@ -85,7 +85,6 @@ def list_bedrock_models(): response = bedrock.list_foundation_models( byInferenceType=genai_core.types.InferenceType.ON_DEMAND.value, - byOutputModality=genai_core.types.Modality.TEXT.value, ) bedrock_models = [ m @@ -109,7 +108,6 @@ def list_bedrock_models(): if "inputModalities" in model and "outputModalities" in model and Modality.EMBEDDING.value not in model.get("outputModalities", []) - and Modality.IMAGE.value not in model.get("outputModalities", []) ] return models diff --git a/lib/shared/layers/python-sdk/python/genai_core/presign.py b/lib/shared/layers/python-sdk/python/genai_core/presign.py index 018c1105c..8930f5d17 100644 --- a/lib/shared/layers/python-sdk/python/genai_core/presign.py +++ b/lib/shared/layers/python-sdk/python/genai_core/presign.py @@ -8,7 +8,7 @@ UPLOAD_BUCKET_NAME = os.environ.get("UPLOAD_BUCKET_NAME") CHATBOT_FILES_BUCKET_NAME = os.environ.get("CHATBOT_FILES_BUCKET_NAME") -MAX_FILE_SIZE = 100 * 1000 * 1000 # 100Mb +MAX_FILE_SIZE = 10 * 1000 * 1000 # 10Mb s3_client = boto3.client( "s3", @@ -79,7 +79,6 @@ def generate_user_presigned_get(user_id: str, file_name: str, expiration=3600): if not user_id or len(user_id) < 10: raise genai_core.types.CommonError("User not set") - file_name = os.path.basename(file_name) object_name = f"private/{user_id}/{file_name}" try: s3_client.head_object(Bucket=CHATBOT_FILES_BUCKET_NAME, Key=object_name) diff --git a/lib/shared/layers/python-sdk/python/genai_core/types.py b/lib/shared/layers/python-sdk/python/genai_core/types.py index 51ff26074..60440e25a 100644 --- a/lib/shared/layers/python-sdk/python/genai_core/types.py +++ b/lib/shared/layers/python-sdk/python/genai_core/types.py @@ -46,6 +46,7 @@ class Modality(Enum): TEXT = "TEXT" IMAGE = "IMAGE" EMBEDDING = "EMBEDDING" + VIDEO = "VIDEO" class InferenceType(Enum): diff --git a/lib/shared/web-crawler-batch-job/index.py b/lib/shared/web-crawler-batch-job/index.py index 3c6e973a6..a7a6d91dd 100644 --- a/lib/shared/web-crawler-batch-job/index.py +++ b/lib/shared/web-crawler-batch-job/index.py @@ -12,7 +12,6 @@ def main(): - response = s3_client.get_object(Bucket=PROCESSING_BUCKET_NAME, Key=OBJECT_KEY) file_content = response["Body"].read().decode("utf-8") data = json.loads(file_content) diff --git a/lib/user-interface/public-website.ts b/lib/user-interface/public-website.ts index 7067f8d91..3d3d829bc 100644 --- a/lib/user-interface/public-website.ts +++ b/lib/user-interface/public-website.ts @@ -93,6 +93,7 @@ export class PublicWebsite extends Construct { };` + "font-src 'self' data:; " + // Fonts are inline in the CSS files `img-src 'self' ${fileBucketURLs.join(" ")} blob:; ` + + `media-src 'self' ${fileBucketURLs.join(" ")} blob:; ` + "style-src 'self' 'unsafe-inline';", // React uses inline style override: true, }, diff --git a/lib/user-interface/react-app/src/common/types.ts b/lib/user-interface/react-app/src/common/types.ts index 6de940cfb..e48428991 100644 --- a/lib/user-interface/react-app/src/common/types.ts +++ b/lib/user-interface/react-app/src/common/types.ts @@ -45,7 +45,7 @@ export type RagDocumentType = | "website" | "rssfeed" | "rsspost"; -export type Modality = "TEXT" | "IMAGE"; +export type Modality = "TEXT" | "IMAGE" | "VIDEO"; export type ModelInterface = "langchain" | "multimodal"; export interface DocumentSubscriptionToggleResult { diff --git a/lib/user-interface/react-app/src/components/chatbot/chat-input-panel.tsx b/lib/user-interface/react-app/src/components/chatbot/chat-input-panel.tsx index d2ec9611b..2f68ba2b8 100644 --- a/lib/user-interface/react-app/src/components/chatbot/chat-input-panel.tsx +++ b/lib/user-interface/react-app/src/components/chatbot/chat-input-panel.tsx @@ -34,6 +34,8 @@ import { LoadingStatus, ModelInterface } from "../../common/types"; import styles from "../../styles/chat.module.scss"; import ConfigDialog from "./config-dialog"; import ImageDialog from "./image-dialog"; +import VideoDialog from "./video-dialog"; + import { ChabotInputModality, ChatBotHeartbeatRequest, @@ -45,9 +47,10 @@ import { ChatBotMode, ChatBotRunRequest, ChatInputState, - ImageFile, + MediaFile, ChatBotModelInterface, ChatBotToken, + ChabotOutputModality, } from "./types"; import { sendQuery } from "../../graphql/mutations"; import { getSelectedModelMetadata, updateMessageHistoryRef } from "./utils"; @@ -99,7 +102,12 @@ export default function ChatInputPanel(props: ChatInputPanelProps) { }); const [configDialogVisible, setConfigDialogVisible] = useState(false); const [imageDialogVisible, setImageDialogVisible] = useState(false); - const [files, setFiles] = useState([]); + const [videoDialogVisible, setVideoDialogVisible] = useState(false); + const [files, setFiles] = useState([]); + const [outputModality, setOutputModality] = useState( + ChabotOutputModality.Text + ); + const [readyState, setReadyState] = useState( ReadyState.UNINSTANTIATED ); @@ -309,8 +317,8 @@ export default function ChatInputPanel(props: ChatInputPanelProps) { const apiClient = new ApiClient(appContext); const getSignedUrls = async () => { - if (props.configuration?.files as ImageFile[]) { - const files: ImageFile[] = []; + if (props.configuration?.files as MediaFile[]) { + const files: MediaFile[] = []; for await (const file of props.configuration?.files ?? []) { const signedUrl = ( await apiClient.sessions.getFileSignedUrl(file.key) @@ -334,7 +342,16 @@ export default function ChatInputPanel(props: ChatInputPanelProps) { } }, [appContext, props.configuration]); - const hasImagesInChatHistory = function (): boolean { + // when a model is selected, set the default mode to first output modality + useEffect(() => { + if ((state.selectedModelMetadata?.outputModalities?.length || 0) > 0) { + setOutputModality( + state.selectedModelMetadata!.outputModalities[0] as ChabotOutputModality + ); + } + }, [state.selectedModelMetadata]); + + const hasFilesInChatHistory = function (): boolean { return ( messageHistoryRef.current.filter( (x) => @@ -354,20 +371,28 @@ export default function ChatInputPanel(props: ChatInputPanelProps) { const { name, provider } = OptionsHelper.parseValue( state.selectedModel.value ); - + const mode = + outputModality === ChabotOutputModality.Text + ? ChatBotMode.Chain + : outputModality; const value = state.value.trim(); const request: ChatBotRunRequest = { action: ChatBotAction.Run, modelInterface: (props.configuration.files && props.configuration.files.length > 0) || - (hasImagesInChatHistory() && + (hasFilesInChatHistory() && state.selectedModelMetadata?.inputModalities.includes( ChabotInputModality.Image - )) - ? "multimodal" + )) || + state.selectedModelMetadata?.inputModalities.includes( + ChabotInputModality.Video + ) || + outputModality === ChabotOutputModality.Video || + outputModality === ChabotOutputModality.Image + ? ChatBotModelInterface.Multimodal : (state.selectedModelMetadata!.interface as ModelInterface), data: { - mode: ChatBotMode.Chain, + mode, text: value, files: props.configuration.files ?? [], modelName: name, @@ -379,6 +404,7 @@ export default function ChatInputPanel(props: ChatInputPanelProps) { maxTokens: props.configuration.maxTokens, temperature: props.configuration.temperature, topP: props.configuration.topP, + seed: props.configuration.seed, }, }, }; @@ -488,6 +514,22 @@ export default function ChatInputPanel(props: ChatInputPanelProps) { } > )} + {state.selectedModelMetadata?.inputModalities.includes( + ChabotInputModality.Video + ) && ( + + )} +
{state.selectedModelMetadata?.inputModalities.includes( @@ -521,19 +578,144 @@ export default function ChatInputPanel(props: ChatInputPanelProps) { ) && files.length > 0 && files.map((file, idx) => ( - setImageDialogVisible(true)} - src={file.url} - style={{ - borderRadius: "4px", - cursor: "pointer", - maxHeight: "30px", - float: "left", - marginRight: "8px", - }} - /> +
+ {file.type === ChabotInputModality.Image ? ( + setImageDialogVisible(true)} + src={file.url} + style={{ + borderRadius: "4px", + cursor: "pointer", + maxHeight: "30px", + float: "left", + marginRight: "8px", + }} + /> + ) : ( +
))} + {state.selectedModelMetadata?.outputModalities.includes( + ChabotInputModality.Text + ) && ( +
setOutputModality(ChabotOutputModality.Text)} + > + + + + + + +
+ )} + {state.selectedModelMetadata?.outputModalities.includes( + ChabotInputModality.Image + ) && ( +
setOutputModality(ChabotOutputModality.Image)} + > + + + + + + +
+ )} + {state.selectedModelMetadata?.outputModalities.includes( + ChabotInputModality.Video + ) && ( +
setOutputModality(ChabotOutputModality.Video)} + > + + + + + + + + +
+ )}
+ ); +} diff --git a/lib/user-interface/react-app/src/components/chatbot/chat-message-media-display.tsx b/lib/user-interface/react-app/src/components/chatbot/chat-message-media-display.tsx new file mode 100644 index 000000000..e77974c12 --- /dev/null +++ b/lib/user-interface/react-app/src/components/chatbot/chat-message-media-display.tsx @@ -0,0 +1,55 @@ +import { MediaFile, ChabotOutputModality, ChabotInputModality } from "./types"; +import styles from "../../styles/chat.module.scss"; + +interface ChatMessageMediaDisplayProps { + files: MediaFile[]; + isAIMessage?: boolean; +} + +export function ChatMessageMediaDisplay({ + files, + isAIMessage = false, +}: ChatMessageMediaDisplayProps) { + if (files.length === 0) return null; + + return ( +
+ {files.map((file: MediaFile) => ( +
+ {(isAIMessage && file.type === ChabotOutputModality.Image) || + (!isAIMessage && file.type === ChabotInputModality.Image) ? ( + + { + + ) : ( + + )} +
+ ))} +
+ ); +} diff --git a/lib/user-interface/react-app/src/components/chatbot/chat-message-metadata.tsx b/lib/user-interface/react-app/src/components/chatbot/chat-message-metadata.tsx new file mode 100644 index 000000000..9682d03bd --- /dev/null +++ b/lib/user-interface/react-app/src/components/chatbot/chat-message-metadata.tsx @@ -0,0 +1,135 @@ +import { + ExpandableSection, + Button, + Popover, + StatusIndicator, + Tabs, + Textarea, +} from "@cloudscape-design/components"; +import { JsonView, darkStyles } from "react-json-view-lite"; +import { RagDocument } from "./types"; +import styles from "../../styles/chat.module.scss"; + +interface ChatMessageMetadataSectionProps { + metadata: any; // eslint-disable-line @typescript-eslint/no-explicit-any + showMetadata: boolean; + documentIndex: string; + promptIndex: string; + setDocumentIndex: (index: string) => void; + setPromptIndex: (index: string) => void; +} + +export function ChatMessageMetadata({ + metadata, + showMetadata, + documentIndex, + promptIndex, + setDocumentIndex, + setPromptIndex, +}: ChatMessageMetadataSectionProps) { + if (!showMetadata) return null; + + return ( + + level < 2} + data={JSON.parse(JSON.stringify(metadata).replace(/\\n/g, "\\\\n"))} + style={{ + ...darkStyles, + stringValue: "jsonStrings", + numberValue: "jsonNumbers", + booleanValue: "jsonBool", + nullValue: "jsonNull", + container: "jsonContainer", + }} + /> + {metadata.documents && metadata.documents.length > 0 && ( + <> +
+ + Copied to clipboard + + } + > +
+ ({ + id: `${i}`, + label: + p.metadata.path?.split("/").at(-1) ?? + p.metadata.title ?? + p.metadata.document_id.slice(-8), + href: p.metadata.path, + content: ( +