From dcec9d7bb7b530a01607f8edb7defe808f22b599 Mon Sep 17 00:00:00 2001 From: Xiao Ley Date: Mon, 24 Jun 2024 01:06:26 +0800 Subject: [PATCH] feat: add new features to enhance image and link handling in Jina tool (#5517) --- .../builtin/jina/tools/jina_reader.py | 19 ++++++--- .../builtin/jina/tools/jina_reader.yaml | 42 +++++++++++++++++++ .../builtin/jina/tools/jina_search.py | 9 ++++ .../builtin/jina/tools/jina_search.yaml | 42 +++++++++++++++++++ 4 files changed, 107 insertions(+), 5 deletions(-) diff --git a/api/core/tools/provider/builtin/jina/tools/jina_reader.py b/api/core/tools/provider/builtin/jina/tools/jina_reader.py index b0bd4788466132..0d0eaef25b1005 100644 --- a/api/core/tools/provider/builtin/jina/tools/jina_reader.py +++ b/api/core/tools/provider/builtin/jina/tools/jina_reader.py @@ -10,10 +10,10 @@ class JinaReaderTool(BuiltinTool): _jina_reader_endpoint = 'https://r.jina.ai/' - def _invoke(self, + def _invoke(self, user_id: str, - tool_parameters: dict[str, Any], - ) -> Union[ToolInvokeMessage, list[ToolInvokeMessage]]: + tool_parameters: dict[str, Any], + ) -> Union[ToolInvokeMessage, list[ToolInvokeMessage]]: """ invoke tools """ @@ -34,6 +34,15 @@ def _invoke(self, if wait_for_selector is not None and wait_for_selector != '': headers['X-Wait-For-Selector'] = wait_for_selector + if tool_parameters.get('image_caption', False): + headers['X-With-Generated-Alt'] = 'true' + + if tool_parameters.get('gather_all_links_at_the_end', False): + headers['X-With-Links-Summary'] = 'true' + + if tool_parameters.get('gather_all_images_at_the_end', False): + headers['X-With-Images-Summary'] = 'true' + proxy_server = tool_parameters.get('proxy_server', None) if proxy_server is not None and proxy_server != '': headers['X-Proxy-Url'] = proxy_server @@ -42,12 +51,12 @@ def _invoke(self, headers['X-No-Cache'] = 'true' response = ssrf_proxy.get( - str(URL(self._jina_reader_endpoint + url)), + str(URL(self._jina_reader_endpoint + url)), headers=headers, timeout=(10, 60) ) if tool_parameters.get('summary', False): return self.create_text_message(self.summary(user_id, response.text)) - + return self.create_text_message(response.text) diff --git a/api/core/tools/provider/builtin/jina/tools/jina_reader.yaml b/api/core/tools/provider/builtin/jina/tools/jina_reader.yaml index 703fa3d389ad75..5eb2692ea555da 100644 --- a/api/core/tools/provider/builtin/jina/tools/jina_reader.yaml +++ b/api/core/tools/provider/builtin/jina/tools/jina_reader.yaml @@ -51,6 +51,48 @@ parameters: pt_BR: css selector for waiting for specific elements llm_description: css selector of the target element to wait for form: form + - name: image_caption + type: boolean + required: false + default: false + label: + en_US: Image caption + zh_Hans: 图片说明 + pt_BR: Legenda da imagem + human_description: + en_US: "Captions all images at the specified URL, adding 'Image [idx]: [caption]' as an alt tag for those without one. This allows downstream LLMs to interact with the images in activities such as reasoning and summarizing." + zh_Hans: "为指定 URL 上的所有图像添加标题,为没有标题的图像添加“Image [idx]: [caption]”作为 alt 标签。这允许下游 LLM 在推理和总结等活动中与图像进行交互。" + pt_BR: "Captions all images at the specified URL, adding 'Image [idx]: [caption]' as an alt tag for those without one. This allows downstream LLMs to interact with the images in activities such as reasoning and summarizing." + llm_description: Captions all images at the specified URL + form: form + - name: gather_all_links_at_the_end + type: boolean + required: false + default: false + label: + en_US: Gather all links at the end + zh_Hans: 将所有链接集中到最后 + pt_BR: Coletar todos os links ao final + human_description: + en_US: A "Buttons & Links" section will be created at the end. This helps the downstream LLMs or web agents navigating the page or take further actions. + zh_Hans: 最后会创建一个“按钮和链接”部分。这可以帮助下游 LLM 或 Web 代理浏览页面或采取进一步的行动。 + pt_BR: A "Buttons & Links" section will be created at the end. This helps the downstream LLMs or web agents navigating the page or take further actions. + llm_description: Gather all links at the end + form: form + - name: gather_all_images_at_the_end + type: boolean + required: false + default: false + label: + en_US: Gather all images at the end + zh_Hans: 将所有图片集中到最后 + pt_BR: Coletar todas as imagens ao final + human_description: + en_US: An "Images" section will be created at the end. This gives the downstream LLMs an overview of all visuals on the page, which may improve reasoning. + zh_Hans: 最后会创建一个“图像”部分。这可以让下游的 LLM 概览页面上的所有视觉效果,从而提高推理能力。 + pt_BR: An "Images" section will be created at the end. This gives the downstream LLMs an overview of all visuals on the page, which may improve reasoning. + llm_description: Gather all images at the end + form: form - name: proxy_server type: string required: false diff --git a/api/core/tools/provider/builtin/jina/tools/jina_search.py b/api/core/tools/provider/builtin/jina/tools/jina_search.py index c13f58d0cd163c..3eda2c5a22e47b 100644 --- a/api/core/tools/provider/builtin/jina/tools/jina_search.py +++ b/api/core/tools/provider/builtin/jina/tools/jina_search.py @@ -24,6 +24,15 @@ def _invoke( if 'api_key' in self.runtime.credentials and self.runtime.credentials.get('api_key'): headers['Authorization'] = "Bearer " + self.runtime.credentials.get('api_key') + if tool_parameters.get('image_caption', False): + headers['X-With-Generated-Alt'] = 'true' + + if tool_parameters.get('gather_all_links_at_the_end', False): + headers['X-With-Links-Summary'] = 'true' + + if tool_parameters.get('gather_all_images_at_the_end', False): + headers['X-With-Images-Summary'] = 'true' + proxy_server = tool_parameters.get('proxy_server', None) if proxy_server is not None and proxy_server != '': headers['X-Proxy-Url'] = proxy_server diff --git a/api/core/tools/provider/builtin/jina/tools/jina_search.yaml b/api/core/tools/provider/builtin/jina/tools/jina_search.yaml index f3b6c0737a9699..da0a300c6c7520 100644 --- a/api/core/tools/provider/builtin/jina/tools/jina_search.yaml +++ b/api/core/tools/provider/builtin/jina/tools/jina_search.yaml @@ -22,6 +22,48 @@ parameters: zh_Hans: 在网络上搜索信息 llm_description: simple question to ask on the web form: llm + - name: image_caption + type: boolean + required: false + default: false + label: + en_US: Image caption + zh_Hans: 图片说明 + pt_BR: Legenda da imagem + human_description: + en_US: "Captions all images at the specified URL, adding 'Image [idx]: [caption]' as an alt tag for those without one. This allows downstream LLMs to interact with the images in activities such as reasoning and summarizing." + zh_Hans: "为指定 URL 上的所有图像添加标题,为没有标题的图像添加“Image [idx]: [caption]”作为 alt 标签。这允许下游 LLM 在推理和总结等活动中与图像进行交互。" + pt_BR: "Captions all images at the specified URL, adding 'Image [idx]: [caption]' as an alt tag for those without one. This allows downstream LLMs to interact with the images in activities such as reasoning and summarizing." + llm_description: Captions all images at the specified URL + form: form + - name: gather_all_links_at_the_end + type: boolean + required: false + default: false + label: + en_US: Gather all links at the end + zh_Hans: 将所有链接集中到最后 + pt_BR: Coletar todos os links ao final + human_description: + en_US: A "Buttons & Links" section will be created at the end. This helps the downstream LLMs or web agents navigating the page or take further actions. + zh_Hans: 最后会创建一个“按钮和链接”部分。这可以帮助下游 LLM 或 Web 代理浏览页面或采取进一步的行动。 + pt_BR: A "Buttons & Links" section will be created at the end. This helps the downstream LLMs or web agents navigating the page or take further actions. + llm_description: Gather all links at the end + form: form + - name: gather_all_images_at_the_end + type: boolean + required: false + default: false + label: + en_US: Gather all images at the end + zh_Hans: 将所有图片集中到最后 + pt_BR: Coletar todas as imagens ao final + human_description: + en_US: An "Images" section will be created at the end. This gives the downstream LLMs an overview of all visuals on the page, which may improve reasoning. + zh_Hans: 最后会创建一个“图像”部分。这可以让下游的 LLM 概览页面上的所有视觉效果,从而提高推理能力。 + pt_BR: An "Images" section will be created at the end. This gives the downstream LLMs an overview of all visuals on the page, which may improve reasoning. + llm_description: Gather all images at the end + form: form - name: proxy_server type: string required: false