From 39cc9bd51a7d75253ecc9cce9fbe94fbbb36a151 Mon Sep 17 00:00:00 2001 From: johnlanni Date: Thu, 16 Jan 2025 14:30:47 +0800 Subject: [PATCH] update ai plugins doc --- .../ai/api-consumer/ai-token-ratelimit.md | 4 - .../en/plugins/ai/api-provider/ai-proxy.md | 1522 ++++++++++++++-- .../ai/api-consumer/ai-token-ratelimit.md | 4 - .../zh-cn/plugins/ai/api-provider/ai-proxy.md | 1538 +++++++++++++++-- 4 files changed, 2778 insertions(+), 290 deletions(-) diff --git a/src/content/docs/latest/en/plugins/ai/api-consumer/ai-token-ratelimit.md b/src/content/docs/latest/en/plugins/ai/api-consumer/ai-token-ratelimit.md index 7e8d74471c..0650bbf1b0 100644 --- a/src/content/docs/latest/en/plugins/ai/api-consumer/ai-token-ratelimit.md +++ b/src/content/docs/latest/en/plugins/ai/api-consumer/ai-token-ratelimit.md @@ -6,10 +6,6 @@ description: AI Token Rate Limiting Plugin Configuration Reference ## Function Description The `ai-token-ratelimit` plugin implements token rate limiting based on specific key values. The key values can come from URL parameters, HTTP request headers, client IP addresses, consumer names, or key names in cookies. -**Notice** - -For this plugin to function, the [AI Observability Plugin](../api-o11y/ai-statistics.md) must also be enabled to achieve token count statistics. - ## Runtime Attributes Plugin execution phase: `default phase` Plugin execution priority: `600` diff --git a/src/content/docs/latest/en/plugins/ai/api-provider/ai-proxy.md b/src/content/docs/latest/en/plugins/ai/api-provider/ai-proxy.md index c9435dcffa..4400e248d0 100644 --- a/src/content/docs/latest/en/plugins/ai/api-provider/ai-proxy.md +++ b/src/content/docs/latest/en/plugins/ai/api-provider/ai-proxy.md @@ -1,203 +1,318 @@ --- title: AI Proxy keywords: [AI Gateway, AI Proxy] -description: AI Proxy plugin configuration reference +description: Reference for configuring the AI Proxy plugin --- + ## Function Description -`AI Proxy` plugin implements AI proxy functionality based on OpenAI API contracts. It currently supports AI service providers such as OpenAI, Azure OpenAI, Moonshot, and Qwen. + +The `AI Proxy` plugin implements AI proxy functionality based on the OpenAI API contract. It currently supports AI service providers such as OpenAI, Azure OpenAI, Moonshot, and Qwen. > **Note:** -> When the request path suffix matches `/v1/chat/completions`, corresponding to text generation scenarios, the request body will be parsed using OpenAI's text generation protocol and then converted to the corresponding LLM vendor's text generation protocol. -> -> When the request path suffix matches `/v1/embeddings`, corresponding to text vector scenarios, the request body will be parsed using OpenAI's text vector protocol and then converted to the corresponding LLM vendor's text vector protocol. -## Running Attributes -Plugin execution phase: `Default phase` +> When the request path suffix matches `/v1/chat/completions`, it corresponds to text-to-text scenarios. The request body will be parsed using OpenAI's text-to-text protocol and then converted to the corresponding LLM vendor's text-to-text protocol. + +> When the request path suffix matches `/v1/embeddings`, it corresponds to text vector scenarios. The request body will be parsed using OpenAI's text vector protocol and then converted to the corresponding LLM vendor's text vector protocol. + +## Execution Properties +Plugin execution phase: `Default Phase` Plugin execution priority: `100` + ## Configuration Fields + ### Basic Configuration -| Name | Data Type | Requirement | Default Value | Description | -|--------------|-------------|-------------|---------------|----------------------------------------| -| `provider` | object | Required | - | Information about the target AI service provider | - -The description of fields in `provider` is as follows: - -| Name | Data Type | Requirement | Default Value | Description | -|-------------------|------------------|-------------|---------------|-----------------------------------------------------------------------------------------------------------| -| `type` | string | Required | - | Name of the AI service provider | -| `apiTokens` | array of string | Optional | - | Tokens for authentication when accessing the AI service. If multiple tokens are provided, the plugin will randomly choose one when making requests. Some service providers only support one token configuration. | -| `timeout` | number | Optional | - | Timeout for accessing the AI service, in milliseconds. The default value is 120000, which is 2 minutes. | -| `modelMapping` | map of string | Optional | - | AI model mapping table for mapping model names in requests to supported model names by the service provider.
1. Supports prefix matching. For example, "gpt-3-*" matches all models whose names start with "gpt-3-";
2. Supports using "*" as a key to configure a general fallback mapping;
3. If the target name in the mapping is an empty string "", it means to retain the original model name. | -| `protocol` | string | Optional | - | The API interface contract provided by the plugin. Currently supports the following values: openai (default, uses OpenAI's interface contract), original (uses the original interface contract of the target service provider) | -| `context` | object | Optional | - | Configuration for AI conversation context information | -| `customSettings` | array of customSetting | Optional | - | Specify override or fill parameters for AI requests | - -The description of fields in `context` is as follows: - -| Name | Data Type | Requirement | Default Value | Description | -|-----------------|-------------|-------------|---------------|--------------------------------------------------| -| `fileUrl` | string | Required | - | URL of the file that stores AI conversation context. Only pure text file content is supported. | -| `serviceName` | string | Required | - | The complete name of the Higress backend service corresponding to the URL. | -| `servicePort` | number | Required | - | The access port of the Higress backend service corresponding to the URL. | - -The description of fields in `customSettings` is as follows: - -| Name | Data Type | Requirement | Default Value | Description | -|-------------|-------------------------|-------------|---------------|--------------------------------------------------------------------------| -| `name` | string | Required | - | Name of the parameter to set, e.g., `max_tokens` | -| `value` | string/int/float/bool | Required | - | Value for the parameter to set, e.g., 0 | -| `mode` | string | Optional | "auto" | Mode for parameter settings, can be set to "auto" or "raw". If "auto", parameter names will be automatically rewritten based on the protocol; if "raw", no rewriting or validation checks will be done. | -| `overwrite` | bool | Optional | true | If false, the parameter will only be filled if the user hasn't set it; otherwise, it will overwrite the user's original parameter settings. | - -Custom settings will follow the table below to replace corresponding fields based on `name` and protocol. Users need to fill in values that exist in the `settingName` column of the table. For example, if the user sets `name` to `max_tokens`, it will be replaced by `max_tokens` in the OpenAI protocol, and by `maxOutputTokens` in Gemini. `none` indicates that the protocol does not support this parameter. If `name` is not in this table or the corresponding protocol does not support this parameter, and if raw mode is not set, the configuration will not take effect. - -| settingName | openai | baidu | spark | qwen | gemini | hunyuan | claude | minimax | -|--------------|-------------|-------------------|-------------|-------------|------------------|-------------|-------------|--------------------| -| max_tokens | max_tokens | max_output_tokens | max_tokens | max_tokens | maxOutputTokens | none | max_tokens | tokens_to_generate | -| temperature | temperature | temperature | temperature | temperature | temperature | Temperature | temperature | temperature | -| top_p | top_p | top_p | none | top_p | topP | TopP | top_p | top_p | -| top_k | none | none | top_k | none | topK | none | top_k | none | -| seed | seed | none | none | seed | none | none | none | none | - -If raw mode is enabled, custom settings will directly use the input `name` and `value` to change the JSON content of the request without any restrictions or modifications to the parameter names. - -For most protocols, custom settings will modify or fill parameters at the root path of the JSON content. For the `qwen` protocol, the ai-proxy will configure under the `parameters` sub-path in JSON. For the `gemini` protocol, it will be configured under the `generation_config` sub-path. - -### Provider-Specific Configuration + +| Name | Data Type | Requirement | Default | Description | +|------------|--------|------|-----|------------------| +| `provider` | object | Required | - | Configures information for the target AI service provider | + +**Details for the `provider` configuration fields:** + +| Name | Data Type | Requirement | Default | Description | +| -------------- | --------------- | -------- | ------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `type` | string | Required | - | Name of the AI service provider | +| `apiTokens` | array of string | Optional | - | Tokens used for authentication when accessing AI services. If multiple tokens are configured, the plugin randomly selects one for each request. Some service providers only support configuring a single token. | +| `timeout` | number | Optional | - | Timeout for accessing AI services, in milliseconds. The default value is 120000, which equals 2 minutes. | +| `modelMapping` | map of string | Optional | - | Mapping table for AI models, used to map model names in requests to names supported by the service provider.
1. Supports prefix matching. For example, "gpt-3-*" matches all model names starting with “gpt-3-”;
2. Supports using "*" as a key for a general fallback mapping;
3. If the mapped target name is an empty string "", the original model name is preserved. | +| `protocol` | string | Optional | - | API contract provided by the plugin. Currently supports the following values: openai (default, uses OpenAI's interface contract), original (uses the raw interface contract of the target service provider) | +| `context` | object | Optional | - | Configuration for AI conversation context information | +| `customSettings` | array of customSetting | Optional | - | Specifies overrides or fills parameters for AI requests | + +**Details for the `context` configuration fields:** + +| Name | Data Type | Requirement | Default | Description | +|---------------|--------|------|-----|----------------------------------| +| `fileUrl` | string | Required | - | File URL to save AI conversation context. Only supports file content of plain text type | +| `serviceName` | string | Required | - | Full name of the Higress backend service corresponding to the URL | +| `servicePort` | number | Required | - | Port for accessing the Higress backend service corresponding to the URL | + +**Details for the `customSettings` configuration fields:** + +| Name | Data Type | Requirement | Default | Description | +| ----------- | --------------------- | -------- | ------ | ---------------------------------------------------------------------------------------------------------------------------- | +| `name` | string | Required | - | Name of the parameter to set, e.g., `max_tokens` | +| `value` | string/int/float/bool | Required | - | Value of the parameter to set, e.g., 0 | +| `mode` | string | Optional | "auto" | Mode for setting the parameter, can be set to "auto" or "raw"; if "auto", the parameter name will be automatically rewritten based on the protocol; if "raw", no rewriting or restriction checks will be applied | +| `overwrite` | bool | Optional | true | If false, the parameter is only filled if the user has not set it; otherwise, it directly overrides the user's existing parameter settings | + +The `custom-setting` adheres to the following table, replacing the corresponding field based on `name` and protocol. Users need to fill in values from the `settingName` column that exists in the table. For instance, if a user sets `name` to `max_tokens`, in the openai protocol, it replaces `max_tokens`; for gemini, it replaces `maxOutputTokens`. `"none"` indicates that the protocol does not support this parameter. If `name` is not in this table or the corresponding protocol does not support the parameter, and "raw" mode is not set, the configuration will not take effect. + +| settingName | openai | baidu | spark | qwen | gemini | hunyuan | claude | minimax | +| ----------- | ----------- | ----------------- | ----------- | ----------- | --------------- | ----------- | ----------- | ------------------ | +| max_tokens | max_tokens | max_output_tokens | max_tokens | max_tokens | maxOutputTokens | none | max_tokens | tokens_to_generate | +| temperature | temperature | temperature | temperature | temperature | temperature | Temperature | temperature | temperature | +| top_p | top_p | top_p | none | top_p | topP | TopP | top_p | top_p | +| top_k | none | none | top_k | none | topK | none | top_k | none | +| seed | seed | none | none | seed | none | none | none | none | + +If raw mode is enabled, `custom-setting` will directly alter the JSON content using the input `name` and `value`, without any restrictions or modifications to the parameter names. +For most protocols, `custom-setting` modifies or fills parameters at the root path of the JSON content. For the `qwen` protocol, ai-proxy configures under the `parameters` subpath. For the `gemini` protocol, it configures under the `generation_config` subpath. + +### Provider-Specific Configurations + #### OpenAI -The `type` corresponding to OpenAI is `openai`. Its specific configuration fields are as follows: -| Name | Data Type | Requirement | Default Value | Description | -|------------------------|-----------|-------------|---------------|------------------------------------------------------------------------------------| -| `openaiCustomUrl` | string | Optional | - | Custom backend URL based on OpenAI protocol, e.g., www.example.com/myai/v1/chat/completions | -| `responseJsonSchema` | object | Optional | - | Predefined Json Schema that OpenAI responses must satisfy, currently only supported by specific models. | +For OpenAI, the corresponding `type` is `openai`. Its unique configuration fields include: + +| Name | Data Type | Requirement | Default | Description | +|-------------------|----------|----------|--------|-------------------------------------------------------------------------------| +| `openaiCustomUrl` | string | Optional | - | Custom backend URL based on the OpenAI protocol, e.g., www.example.com/myai/v1/chat/completions | +| `responseJsonSchema` | object | Optional | - | Predefined Json Schema that OpenAI responses must adhere to; note that currently only a few specific models support this usage| #### Azure OpenAI -The `type` corresponding to Azure OpenAI is `azure`. Its specific configuration fields are as follows: -| Name | Data Type | Requirement | Default Value | Description | -|------------------------|-----------|-------------|---------------|-----------------------------------------------------------------------| -| `azureServiceUrl` | string | Required | - | URL of Azure OpenAI service, must include `api-version` query parameter. | -**Note:** Azure OpenAI only supports the configuration of one API Token. +For Azure OpenAI, the corresponding `type` is `azure`. Its unique configuration field is: + +| Name | Data Type | Filling Requirements | Default Value | Description | +|---------------------|-------------|----------------------|---------------|---------------------------------------------------------------------------------------------------------------| +| `azureServiceUrl` | string | Required | - | The URL of the Azure OpenAI service, must include the `api-version` query parameter. | + +**Note:** Azure OpenAI only supports configuring one API Token. #### Moonshot -The `type` corresponding to Moonshot is `moonshot`. Its specific configuration fields are as follows: -| Name | Data Type | Requirement | Default Value | Description | -|------------------------|-----------|-------------|---------------|-----------------------------------------------------------------------| -| `moonshotFileId` | string | Optional | - | File ID uploaded to Moonshot via the file interface, its content will be used as the context for AI conversation. Cannot be configured simultaneously with the `context` field. | +For Moonshot, the corresponding `type` is `moonshot`. Its unique configuration field is: + +| Name | Data Type | Filling Requirements | Default Value | Description | +|-------------------|-------------|----------------------|---------------|-----------------------------------------------------------------------------------------------------------------| +| `moonshotFileId` | string | Optional | - | The file ID uploaded via the file interface to Moonshot, whose content will be used as context for AI conversations. Cannot be configured with the `context` field. | + +#### Qwen (Tongyi Qwen) -#### Qwen -The `type` corresponding to Qwen is `qwen`. Its specific configuration fields are as follows: +For Qwen (Tongyi Qwen), the corresponding `type` is `qwen`. Its unique configuration fields are: -| Name | Data Type | Requirement | Default Value | Description | -|------------------------|----------------------|-------------|---------------|----------------------------------------------------------------------| -| `qwenEnableSearch` | boolean | Optional | - | Whether to enable the built-in internet search functionality of Qwen. | -| `qwenFileIds` | array of string | Optional | - | File IDs uploaded to Dashscope via the file interface, its contents will be used as the context for AI conversation. Cannot be configured simultaneously with the `context` field. | +| Name | Data Type | Filling Requirements | Default Value | Description | +|--------------------|-----------------|----------------------|---------------|------------------------------------------------------------------------------------------------------------------------| +| `qwenEnableSearch` | boolean | Optional | - | Whether to enable the built-in Internet search function provided by Qwen. | +| `qwenFileIds` | array of string | Optional | - | The file IDs uploaded via the Dashscope file interface, whose content will be used as context for AI conversations. Cannot be configured with the `context` field. | #### Baichuan AI -The `type` corresponding to Baichuan AI is `baichuan`. It has no specific configuration fields. -#### Yi -The `type` corresponding to Yi is `yi`. It has no specific configuration fields. +For Baichuan AI, the corresponding `type` is `baichuan`. It has no unique configuration fields. + +#### Yi (Zero One Universe) + +For Yi (Zero One Universe), the corresponding `type` is `yi`. It has no unique configuration fields. #### Zhipu AI -The `type` corresponding to Zhipu AI is `zhipuai`. It has no specific configuration fields. + +For Zhipu AI, the corresponding `type` is `zhipuai`. It has no unique configuration fields. #### DeepSeek -The `type` corresponding to DeepSeek is `deepseek`. It has no specific configuration fields. + +For DeepSeek, the corresponding `type` is `deepseek`. It has no unique configuration fields. #### Groq -The `type` corresponding to Groq is `groq`. It has no specific configuration fields. -#### Baidu -The `type` corresponding to Baidu is `baidu`. It has no specific configuration fields. +For Groq, the corresponding `type` is `groq`. It has no unique configuration fields. -#### AI360 -The `type` corresponding to AI360 is `ai360`. It has no specific configuration fields. +#### ERNIE Bot -#### Mistral -The `type` corresponding to Mistral is `mistral`. It has no specific configuration fields. +For ERNIE Bot, the corresponding `type` is `baidu`. It has no unique configuration fields. + +### 360 Brain + +For 360 Brain, the corresponding `type` is `ai360`. It has no unique configuration fields. + +### Mistral + +For Mistral, the corresponding `type` is `mistral`. It has no unique configuration fields. #### MiniMax -The `type` corresponding to MiniMax is `minimax`. Its specific configuration fields are as follows: -| Name | Data Type | Requirement | Default Value | Description | -|------------------------|-----------|-----------------------------------------|---------------|--------------------------------------------------------------------| -| `minimaxGroupId` | string | Required when using `abab6.5-chat`, `abab6.5s-chat`, `abab5.5s-chat`, or `abab5.5-chat` models | - | When using these models, ChatCompletion Pro will be used, and `groupID` needs to be set. | +For MiniMax, the corresponding `type` is `minimax`. Its unique configuration field is: + +| Name | Data Type | Filling Requirements | Default Value | Description | +| ---------------- | -------- | --------------------- |---------------|------------------------------------------------------------------------------------------------------------| +| `minimaxGroupId` | string | Required when using models `abab6.5-chat`, `abab6.5s-chat`, `abab5.5s-chat`, `abab5.5-chat` | - | When using models `abab6.5-chat`, `abab6.5s-chat`, `abab5.5s-chat`, `abab5.5-chat`, Minimax uses ChatCompletion Pro and requires setting the groupID. | #### Anthropic Claude -The `type` corresponding to Anthropic Claude is `claude`. Its specific configuration fields are as follows: -| Name | Data Type | Requirement | Default Value | Description | -|------------------------|-----------|-------------|---------------|----------------------------------------------------------------------| -| `claudeVersion` | string | Optional | - | The API version for Claude service, defaults to 2023-06-01 | +For Anthropic Claude, the corresponding `type` is `claude`. Its unique configuration field is: + +| Name | Data Type | Filling Requirements | Default Value | Description | +|------------|-------------|----------------------|---------------|---------------------------------------------------------------------------------------------------------------| +| `claudeVersion` | string | Optional | - | The version of the Claude service's API, default is 2023-06-01. | #### Ollama -The `type` corresponding to Ollama is `ollama`. Its specific configuration fields are as follows: -| Name | Data Type | Requirement | Default Value | Description | -|------------------------|-----------|-------------|---------------|---------------------------------------------------------------------| -| `ollamaServerHost` | string | Required | - | Host address for the Ollama server | -| `ollamaServerPort` | number | Required | - | Port number for the Ollama server, defaults to 11434 | +For Ollama, the corresponding `type` is `ollama`. Its unique configuration field is: + +| Name | Data Type | Filling Requirements | Default Value | Description | +|-------------------|-------------|----------------------|---------------|---------------------------------------------------------------------------------------------------------| +| `ollamaServerHost` | string | Required | - | The host address of the Ollama server. | +| `ollamaServerPort` | number | Required | - | The port number of the Ollama server, defaults to 11434. | #### Hunyuan -The `type` corresponding to Hunyuan is `hunyuan`. Its specific configuration fields are as follows: -| Name | Data Type | Requirement | Default Value | Description | -|------------------------|-----------|-------------|---------------|---------------------------------------------------------------------| -| `hunyuanAuthId` | string | Required | - | ID used for Hunyuan authentication with version v3 | -| `hunyuanAuthKey` | string | Required | - | Key used for Hunyuan authentication with version v3 | +For Hunyuan, the corresponding `type` is `hunyuan`. Its unique configuration fields are: + +| Name | Data Type | Filling Requirements | Default Value | Description | +|-------------------|-------------|----------------------|---------------|---------------------------------------------------------------------------------------------------------| +| `hunyuanAuthId` | string | Required | - | Hunyuan authentication ID for version 3 authentication. | +| `hunyuanAuthKey` | string | Required | - | Hunyuan authentication key for version 3 authentication. | #### Stepfun -The `type` corresponding to Stepfun is `stepfun`. It has no specific configuration fields. + +For Stepfun, the corresponding `type` is `stepfun`. It has no unique configuration fields. #### Cloudflare Workers AI -The `type` corresponding to Cloudflare Workers AI is `cloudflare`. Its specific configuration fields are as follows: -| Name | Data Type | Requirement | Default Value | Description | -|------------------------|-----------|-------------|---------------|---------------------------------------------------------------------| -| `cloudflareAccountId` | string | Required | - | [Cloudflare Account ID](https://developers.cloudflare.com/workers-ai/get-started/rest-api/#1-get-api-token-and-account-id) | +For Cloudflare Workers AI, the corresponding `type` is `cloudflare`. Its unique configuration field is: + +| Name | Data Type | Filling Requirements | Default Value | Description | +|-------------------|-------------|----------------------|---------------|---------------------------------------------------------------------------------------------------------| +| `cloudflareAccountId` | string | Required | - | [Cloudflare Account ID](https://developers.cloudflare.com/workers-ai/get-started/rest-api/#1-get-api-token-and-account-id). | #### Spark -The `type` corresponding to Spark is `spark`. It has no specific configuration fields. -The `apiTokens` field value for iFlytek’s Spark cognitive large model is `APIKey:APISecret`. That is, fill in your own APIKey and APISecret, separated by `:`. +For Spark, the corresponding `type` is `spark`. It has no unique configuration fields. + +The `apiTokens` field value for Xunfei Spark (Xunfei Star) is `APIKey:APISecret`. That is, enter your own APIKey and APISecret, separated by `:`. #### Gemini -The `type` corresponding to Gemini is `gemini`. Its specific configuration fields are as follows: -| Name | Data Type | Requirement | Default Value | Description | -|------------------------|-----------|-------------|---------------|------------------------------------------------------------------| -| `geminiSafetySetting` | map of string | Optional | - | Gemini AI content filtering and safety level settings. Refer to [Safety settings](https://ai.google.dev/gemini-api/docs/safety-settings). | +For Gemini, the corresponding `type` is `gemini`. Its unique configuration field is: -#### DeepL -The `type` corresponding to DeepL is `deepl`. Its specific configuration fields are as follows: +| Name | Data Type | Filling Requirements | Default Value | Description | +|---------------------|----------|----------------------|---------------|---------------------------------------------------------------------------------------------------------| +| `geminiSafetySetting` | map of string | Optional | - | Gemini AI content filtering and safety level settings. Refer to [Safety settings](https://ai.google.dev/gemini-api/docs/safety-settings). | -| Name | Data Type | Requirement | Default Value | Description | -|------------------------|-----------|-------------|---------------|--------------------------------------------------| -| `targetLang` | string | Required | - | Target language required by DeepL translation service. | +### DeepL -#### Cohere -The `type` corresponding to Cohere is `cohere`. It has no specific configuration fields. +For DeepL, the corresponding `type` is `deepl`. Its unique configuration field is: + +| Name | Data Type | Requirement | Default | Description | +| ------------ | --------- | ----------- | ------- | ------------------------------------ | +| `targetLang` | string | Required | - | The target language required by the DeepL translation service | ## Usage Examples -### Using OpenAI Protocol to Proxy Azure OpenAI Service -Using the most basic Azure OpenAI service with no context configured. + +### Using OpenAI Protocol Proxy for Azure OpenAI Service + +Using the basic Azure OpenAI service without configuring any context. **Configuration Information** + ```yaml provider: type: azure apiTokens: - "YOUR_AZURE_OPENAI_API_TOKEN" - azureServiceUrl: "https://YOUR_RESOURCE_NAME.openai.azure.com/openai/deployments/YOUR_DEPLOYMENT_NAME/chat/completions?api-version=2024-02-15-preview" + azureServiceUrl: "https://YOUR_RESOURCE_NAME.openai.azure.com/openai/deployments/YOUR_DEPLOYMENT_NAME/chat/completions?api-version=2024-02-15-preview", +``` + +**Request Example** + +```json +{ + "model": "gpt-3", + "messages": [ + { + "role": "user", + "content": "Hello, who are you?" + } + ], + "temperature": 0.3 +} +``` + +**Response Example** + +```json +{ + "choices": [ + { + "content_filter_results": { + "hate": { + "filtered": false, + "severity": "safe" + }, + "self_harm": { + "filtered": false, + "severity": "safe" + }, + "sexual": { + "filtered": false, + "severity": "safe" + }, + "violence": { + "filtered": false, + "severity": "safe" + } + }, + "finish_reason": "stop", + "index": 0, + "logprobs": null, + "message": { + "content": "Hello! I am an AI assistant, here to answer your questions and provide assistance. Is there anything I can help you with?", + "role": "assistant" + } + } + ], + "created": 1714807624, + "id": "chatcmpl-abcdefg1234567890", + "model": "gpt-35-turbo-16k", + "object": "chat.completion", + "prompt_filter_results": [ + { + "prompt_index": 0, + "content_filter_results": { + "hate": { + "filtered": false, + "severity": "safe" + }, + "self_harm": { + "filtered": false, + "severity": "safe" + }, + "sexual": { + "filtered": false, + "severity": "safe" + }, + "violence": { + "filtered": false, + "severity": "safe" + } + } + } + ], + "system_fingerprint": null, + "usage": { + "completion_tokens": 40, + "prompt_tokens": 15, + "total_tokens": 55 + } +} ``` -### Using OpenAI Protocol to Proxy Qwen Service -Using Qwen service with a model mapping from OpenAI large models to Qwen. + +### Using OpenAI Protocol Proxy for Qwen Service + +Using Qwen service and configuring the mapping relationship between OpenAI large models and Qwen models. **Configuration Information** + ```yaml provider: type: qwen @@ -212,49 +327,465 @@ provider: 'text-embedding-v1': 'text-embedding-v1' '*': "qwen-turbo" ``` -### Using original protocol to Proxy Baichuan AI proxy application + +**AI Conversation Request Example** + +URL: http://your-domain/v1/chat/completions + +Request Example: + +```json +{ + "model": "gpt-3", + "messages": [ + { + "role": "user", + "content": "Hello, who are you?" + } + ], + "temperature": 0.3 +} +``` + +Response Example: + +```json +{ + "id": "c2518bd3-0f46-97d1-be34-bb5777cb3108", + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": "I am Qwen, an AI assistant developed by Alibaba Cloud. I can answer various questions, provide information, and engage in conversations with users. How can I assist you?" + }, + "finish_reason": "stop" + } + ], + "created": 1715175072, + "model": "qwen-turbo", + "object": "chat.completion", + "usage": { + "prompt_tokens": 24, + "completion_tokens": 33, + "total_tokens": 57 + } +} +``` + +**Multimodal Model API Request Example (Applicable to `qwen-vl-plus` and `qwen-vl-max` Models)** + +URL: http://your-domain/v1/chat/completions + +Request Example: + +```json +{ + "model": "gpt-4o", + "messages": [ + { + "role": "user", + "content": [ + { + "type": "image_url", + "image_url": { + "url": "https://dashscope.oss-cn-beijing.aliyuncs.com/images/dog_and_girl.jpeg" + } + }, + { + "type": "text", + "text": "Where is this picture from?" + } + ] + } + ], + "temperature": 0.3 +} +``` + +Response Example: + +```json +{ + "id": "17c5955d-af9c-9f28-bbde-293a9c9a3515", + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": [ + { + "text": "This photo depicts a woman and a dog on a beach. As I cannot access specific geographical information, I cannot pinpoint the exact location of this beach. However, visually, it appears to be a sandy coastline along a coastal area with waves breaking on the shore. Such scenes can be found in many beautiful seaside locations worldwide. If you need more precise information, please provide additional context or descriptive details." + } + ] + }, + "finish_reason": "stop" + } + ], + "created": 1723949230, + "model": "qwen-vl-plus", + "object": "chat.completion", + "usage": { + "prompt_tokens": 1279, + "completion_tokens": 78 + } +} +``` + +**Text Embedding Request Example** + +URL: http://your-domain/v1/embeddings + +Request Example: + +```json +{ + "model": "text-embedding-v1", + "input": "Hello" +} +``` + +Response Example: + +```json +{ + "object": "list", + "data": [ + { + "object": "embedding", + "index": 0, + "embedding": [ + -1.0437825918197632, + 5.208984375, + 3.0483806133270264, + -1.7897135019302368, + -2.0107421875, + ..., + 0.8125, + -1.1759847402572632, + 0.8174641728401184, + 1.0432943105697632, + -0.5885213017463684 + ] + } + ], + "model": "text-embedding-v1", + "usage": { + "prompt_tokens": 1, + "total_tokens": 1 + } +} +``` + +### Using Qwen Service with Pure Text Context Information + +Using Qwen service while configuring pure text context information. + **Configuration Information** + ```yaml provider: type: qwen apiTokens: - - "YOUR_DASHSCOPE_API_TOKEN" - protocol: original + - "YOUR_QWEN_API_TOKEN" + modelMapping: + "*": "qwen-turbo" + context: + - fileUrl: "http://file.default.svc.cluster.local/ai/context.txt", + serviceName: "file.dns", + servicePort: 80 ``` -### Using OpenAI Protocol to Proxy Doubao Large Model Service + +**Request Example** + +```json +{ + "model": "gpt-3", + "messages": [ + { + "role": "user", + "content": "Please summarize the content" + } + ], + "temperature": 0.3 +} +``` + +**Response Example** + +```json +{ + "id": "cmpl-77861a17681f4987ab8270dbf8001936", + "object": "chat.completion", + "created": 9756990, + "model": "moonshot-v1-128k", + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": "The content of this document is about..." + }, + "finish_reason": "stop" + } + ], + "usage": { + "prompt_tokens": 20181, + "completion_tokens": 439, + "total_tokens": 20620 + } +} +``` + +### Using Qwen Service with Native File Context + +Uploading files to Qwen in advance to use them as context when utilizing its AI service. + **Configuration Information** + ```yaml provider: + type: qwen + apiTokens: + - "YOUR_QWEN_API_TOKEN" + modelMapping: + "*": "qwen-long" # Qwen's file context can only be used in the qwen-long model + qwenFileIds: + - "file-fe-xxx" + - "file-fe-yyy" +``` + +**Request Example** + +```json +{ + "model": "gpt-4-turbo", + "messages": [ + { + "role": "user", + "content": "Please summarize the content" + } + ], + "temperature": 0.3 +} +``` + +**Response Example** + +```json +{ + "output": { + "choices": [ + { + "finish_reason": "stop", + "message": { + "role": "assistant", + "content": "You uploaded two files, `context.txt` and `context_2.txt`, which seem to contain information about..." + } + } + ] + }, + "usage": { + "total_tokens": 2023, + "output_tokens": 530, + "input_tokens": 1493 + }, + "request_id": "187e99ba-5b64-9ffe-8f69-01dafbaf6ed7" +} +``` + +### Forwards requests to AliCloud Bailian with the "original" protocol + +**Configuration Information** + +```yaml +activeProviderId: my-qwen +providers: + - id: my-qwen + type: qwen + apiTokens: + - "YOUR_DASHSCOPE_API_TOKEN" + protocol: original +``` + +**Example Request** + +```json +{ + "input": { + "prompt": "What is Dubbo?" + }, + "parameters": {}, + "debug": {} +} +``` + +**Example Response** + +```json +{ + "output": { + "finish_reason": "stop", + "session_id": "677e7e8fbb874e1b84792b65042e1599", + "text": "Apache Dubbo is a..." + }, + "usage": { + "models": [ + { + "output_tokens": 449, + "model_id": "qwen-max", + "input_tokens": 282 + } + ] + }, + "request_id": "b59e45e3-5af4-91df-b7c6-9d746fd3297c" +} +``` + +### Using OpenAI Protocol Proxy for Doubao Service + +```yaml +activeProviderId: my-doubao +providers: +- id: my-doubao type: doubao apiTokens: - - "YOUR_DOUBAO_API_KEY" + - YOUR_DOUBAO_API_KEY modelMapping: '*': YOUR_DOUBAO_ENDPOINT timeout: 1200000 ``` -### Using Moonshot with its native file context -Pre-upload a file to Moonshot to use its content as context for its AI service. + +### Using original Protocol Proxy for Coze applications + +```yaml +provider: + type: coze + apiTokens: + - YOUR_COZE_API_KEY + protocol: original +``` + +### Utilizing Moonshot with its Native File Context + +Upload files to Moonshot in advance and use its AI services based on file content. **Configuration Information** + ```yaml provider: type: moonshot apiTokens: - "YOUR_MOONSHOT_API_TOKEN" - moonshotFileId: "YOUR_MOONSHOT_FILE_ID" + moonshotFileId: "YOUR_MOONSHOT_FILE_ID", modelMapping: '*': "moonshot-v1-32k" ``` -### Using OpenAI Protocol to Proxy Groq Service + +**Example Request** + +```json +{ + "model": "gpt-4-turbo", + "messages": [ + { + "role": "user", + "content": "Please summarize the content" + } + ], + "temperature": 0.3 +} +``` + +**Example Response** + +```json +{ + "id": "cmpl-e5ca873642ca4f5d8b178c1742f9a8e8", + "object": "chat.completion", + "created": 1872961, + "model": "moonshot-v1-128k", + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": "The content of the text is about a payment platform named ‘xxxx’..." + }, + "finish_reason": "stop" + } + ], + "usage": { + "prompt_tokens": 11, + "completion_tokens": 498, + "total_tokens": 509 + } +} +``` + +### Using OpenAI Protocol Proxy for Groq Service + **Configuration Information** + ```yaml provider: type: groq apiTokens: - "YOUR_GROQ_API_TOKEN" ``` -### Using OpenAI Protocol to Proxy Claude Service + +**Example Request** + +```json +{ + "model": "llama3-8b-8192", + "messages": [ + { + "role": "user", + "content": "Hello, who are you?" + } + ] +} +``` + +**Example Response** + +```json +{ + "id": "chatcmpl-26733989-6c52-4056-b7a9-5da791bd7102", + "object": "chat.completion", + "created": 1715917967, + "model": "llama3-8b-8192", + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": "😊 Ni Hao! (That's \"hello\" in Chinese!)\n\nI am LLaMA, an AI assistant developed by Meta AI that can understand and respond to human input in a conversational manner. I'm not a human, but a computer program designed to simulate conversations and answer questions to the best of my ability. I'm happy to chat with you in Chinese or help with any questions or topics you'd like to discuss! 😊" + }, + "logprobs": null, + "finish_reason": "stop" + } + ], + "usage": { + "prompt_tokens": 16, + "prompt_time": 0.005, + "completion_tokens": 89, + "completion_time": 0.104, + "total_tokens": 105, + "total_time": 0.109 + }, + "system_fingerprint": "fp_dadc9d6142", + "x_groq": { + "id": "req_01hy2awmcxfpwbq56qh6svm7qz" + } +} +``` + +### Using OpenAI Protocol Proxy for Claude Service + **Configuration Information** + ```yaml provider: type: claude @@ -262,8 +793,52 @@ provider: - "YOUR_CLAUDE_API_TOKEN" version: "2023-06-01" ``` -### Using OpenAI Protocol to Proxy Hunyuan Service + +**Example Request** + +```json +{ + "model": "claude-3-opus-20240229", + "max_tokens": 1024, + "messages": [ + { + "role": "user", + "content": "Hello, who are you?" + } + ] +} +``` + +**Example Response** + +```json +{ + "id": "msg_01Jt3GzyjuzymnxmZERJguLK", + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": "Hello, I am a conversation system developed by Anthropic, a company specializing in artificial intelligence. My name is Claude, a friendly and knowledgeable chatbot. Nice to meet you! I can engage in discussions on various topics, answer questions, provide suggestions, and assist you. I'll do my best to give you helpful responses. I hope we have a pleasant exchange!" + }, + "finish_reason": "stop" + } + ], + "created": 1717385918, + "model": "claude-3-opus-20240229", + "object": "chat.completion", + "usage": { + "prompt_tokens": 16, + "completion_tokens": 126, + "total_tokens": 142 + } +} +``` + +### Using OpenAI Protocol Proxy for Hunyuan Service + **Configuration Information** + ```yaml provider: type: "hunyuan" @@ -275,8 +850,61 @@ provider: modelMapping: "*": "hunyuan-lite" ``` -### Using OpenAI Protocol to Proxy Baidu Wenxin Service + +**Example Request** + +Request script: + +```shell +curl --location 'http:///v1/chat/completions' \ +--header 'Content-Type: application/json' \ +--data '{ + "model": "gpt-3", + "messages": [ + { + "role": "system", + "content": "You are a professional developer!" + }, + { + "role": "user", + "content": "Hello, who are you?" + } + ], + "temperature": 0.3, + "stream": false +}' +``` + +**Example Response** + +```json +{ + "id": "fd140c3e-0b69-4b19-849b-d354d32a6162", + "choices": [ + { + "index": 0, + "delta": { + "role": "assistant", + "content": "Hello! I am a professional developer." + }, + "finish_reason": "stop" + } + ], + "created": 1717493117, + "model": "hunyuan-lite", + "object": "chat.completion", + "usage": { + "prompt_tokens": 15, + "completion_tokens": 9, + "total_tokens": 24 + } +} +``` + +### Using OpenAI Protocol Proxy for ERNIE Bot Service + **Configuration Information** + ```yaml provider: type: baidu @@ -286,8 +914,52 @@ provider: 'gpt-3': "ERNIE-4.0" '*': "ERNIE-4.0" ``` -### Using OpenAI Protocol to Proxy MiniMax Service + +**Request Example** + +```json +{ + "model": "gpt-4-turbo", + "messages": [ + { + "role": "user", + "content": "Hello, who are you?" + } + ], + "stream": false +} +``` + +**Response Example** + +```json +{ + "id": "as-e90yfg1pk1", + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": "Hello, I am ERNIE Bot. I can interact with people, answer questions, assist in creation, and efficiently provide information, knowledge, and inspiration." + }, + "finish_reason": "stop" + } + ], + "created": 1717251488, + "model": "ERNIE-4.0", + "object": "chat.completion", + "usage": { + "prompt_tokens": 4, + "completion_tokens": 33, + "total_tokens": 37 + } +} +``` + +### Using OpenAI Protocol Proxy for MiniMax Service + **Configuration Information** + ```yaml provider: type: minimax @@ -299,13 +971,63 @@ provider: "*": "abab6.5g-chat" minimaxGroupId: "YOUR_MINIMAX_GROUP_ID" ``` -### Using OpenAI Protocol to Proxy AI360 Service + +**Request Example** + +```json +{ + "model": "gpt-4-turbo", + "messages": [ + { + "role": "user", + "content": "Hello, who are you?" + } + ], + "stream": false +} +``` + +**Response Example** + +```json +{ + "id": "02b2251f8c6c09d68c1743f07c72afd7", + "choices": [ + { + "finish_reason": "stop", + "index": 0, + "message": { + "content": "Hello! I am MM Intelligent Assistant, a large language model developed by MiniMax. I can help answer questions, provide information, and engage in conversations. How can I assist you?", + "role": "assistant" + } + } + ], + "created": 1717760544, + "model": "abab6.5s-chat", + "object": "chat.completion", + "usage": { + "total_tokens": 106 + }, + "input_sensitive": false, + "output_sensitive": false, + "input_sensitive_type": 0, + "output_sensitive_type": 0, + "base_resp": { + "status_code": 0, + "status_msg": "" + } +} +``` + +### Using OpenAI Protocol Proxy for 360 Brain Services + **Configuration Information** + ```yaml provider: type: ai360 apiTokens: - - "YOUR_MINIMAX_API_TOKEN" + - "YOUR_AI360_API_TOKEN" modelMapping: "gpt-4o": "360gpt-turbo-responsibility-8k" "gpt-4": "360gpt2-pro" @@ -313,8 +1035,108 @@ provider: "text-embedding-3-small": "embedding_s1_v1.2" "*": "360gpt-pro" ``` -### Using OpenAI Protocol to Proxy Cloudflare Workers AI Service + +**Request Example** + +```json +{ + "model": "gpt-4o", + "messages": [ + { + "role": "system", + "content": "You are a professional developer!" + }, + { + "role": "user", + "content": "Hello, who are you?" + } + ] +} +``` + +**Response Example** + +```json +{ + "choices": [ + { + "message": { + "role": "assistant", + "content": "Hello, I am 360 Brain, a large language model. I can assist with answering various questions, providing information, engaging in conversations, and more. How can I assist you?" + }, + "finish_reason": "", + "index": 0 + } + ], + "created": 1724257207, + "id": "5e5c94a2-d989-40b5-9965-5b971db941fe", + "model": "360gpt-turbo", + "object": "", + "usage": { + "completion_tokens": 33, + "prompt_tokens": 24, + "total_tokens": 57 + }, + "messages": [ + { + "role": "system", + "content": "You are a professional developer!" + }, + { + "role": "user", + "content": "Hello, who are you?" + } + ], + "context": null +} +``` + +**Text Embedding Request Example** + +**URL**: http://your-domain/v1/embeddings + +**Request Example** + +```json +{ + "input":["Hello"], + "model":"text-embedding-3-small" +} +``` + +**Response Example** + +```json +{ + "data": [ + { + "embedding": [ + -0.011237, + -0.015433, + ..., + -0.028946, + -0.052778, + 0.003768, + -0.007917, + -0.042201 + ], + "index": 0, + "object": "" + } + ], + "model": "embedding_s1_v1.2", + "object": "", + "usage": { + "prompt_tokens": 2, + "total_tokens": 2 + } +} +``` + +### Using OpenAI Protocol Proxy for Cloudflare Workers AI Service + **Configuration Information** + ```yaml provider: type: cloudflare @@ -324,8 +1146,48 @@ provider: modelMapping: "*": "@cf/meta/llama-3-8b-instruct" ``` -### Using OpenAI Protocol to Proxy Spark Service + +**Request Example** + +```json +{ + "model": "gpt-3.5", + "max_tokens": 1024, + "messages": [ + { + "role": "user", + "content": "Who are you?" + } + ] +} +``` + +**Response Example** + +```json +{ + "id": "id-1720367803430", + "object": "chat.completion", + "created": 1720367803, + "model": "@cf/meta/llama-3-8b-instruct", + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": "I am LLaMA, an AI assistant developed by Meta AI that can understand and respond to human input in a conversational manner. I'm not a human, but a computer program designed to simulate conversation and answer questions to the best of my knowledge. I can be used to generate text on a wide range of topics, from science and history to entertainment and culture." + }, + "logprobs": null, + "finish_reason": "stop" + } + ] +} +``` + +### Using OpenAI Protocol Proxy for Spark Service + **Configuration Information** + ```yaml provider: type: spark @@ -336,8 +1198,55 @@ provider: "gpt-4": "generalv3" "*": "general" ``` -### Using OpenAI Protocol to Proxy Gemini Service + +**Request Example** + +```json +{ + "model": "gpt-4o", + "messages": [ + { + "role": "system", + "content": "You are a professional developer!" + }, + { + "role": "user", + "content": "Hello, who are you?" + } + ], + "stream": false +} +``` + +**Response Example** + +```json +{ + "id": "cha000c23c6@dx190ef0b4b96b8f2532", + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": "Hello! I am a professional developer skilled in programming and problem-solving. What can I assist you with?" + } + } + ], + "created": 1721997415, + "model": "generalv3.5", + "object": "chat.completion", + "usage": { + "prompt_tokens": 10, + "completion_tokens": 19, + "total_tokens": 29 + } +} +``` + +### Utilizing OpenAI Protocol Proxy for Gemini Services + **Configuration Information** + ```yaml provider: type: gemini @@ -346,13 +1255,57 @@ provider: modelMapping: "*": "gemini-pro" geminiSafetySetting: - "HARM_CATEGORY_SEXUALLY_EXPLICIT": "BLOCK_NONE" - "HARM_CATEGORY_HATE_SPEECH": "BLOCK_NONE" - "HARM_CATEGORY_HARASSMENT": "BLOCK_NONE" - "HARM_CATEGORY_DANGEROUS_CONTENT": "BLOCK_NONE" + "HARM_CATEGORY_SEXUALLY_EXPLICIT" :"BLOCK_NONE" + "HARM_CATEGORY_HATE_SPEECH" :"BLOCK_NONE" + "HARM_CATEGORY_HARASSMENT" :"BLOCK_NONE" + "HARM_CATEGORY_DANGEROUS_CONTENT" :"BLOCK_NONE" +``` + +**Request Example** + +```json +{ + "model": "gpt-3.5", + "messages": [ + { + "role": "user", + "content": "Who are you?" + } + ], + "stream": false +} +``` + +**Response Example** + +```json +{ + "id": "chatcmpl-b010867c-0d3f-40ba-95fd-4e8030551aeb", + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": "I am a large multi-modal model, trained by Google. I am designed to provide information and answer questions to the best of my abilities." + }, + "finish_reason": "stop" + } + ], + "created": 1722756984, + "model": "gemini-pro", + "object": "chat.completion", + "usage": { + "prompt_tokens": 5, + "completion_tokens": 29, + "total_tokens": 34 + } +} ``` -### Using OpenAI Protocol to Proxy DeepL Text Translation Service + +### Utilizing OpenAI Protocol Proxy for DeepL Text Translation Service + **Configuration Information** + ```yaml provider: type: deepl @@ -360,8 +1313,10 @@ provider: - "YOUR_DEEPL_API_TOKEN" targetLang: "ZH" ``` + **Request Example** -In this context, `model` indicates the type of DeepL service, which can only be `Free` or `Pro`. The `content` sets the text to be translated; in the `role: system` `content`, context that may affect the translation but itself will not be translated can be included. For example, when translating product names, product descriptions can be passed as context, and this additional context may improve the quality of the translation. +Here, `model` denotes the service tier of DeepL and can only be either `Free` or `Pro`. The `content` field contains the text to be translated; within `role: system`, `content` may include context that influences the translation but isn't translated itself. For instance, when translating product names, including a product description as context could enhance translation quality. + ```json { "model": "Free", @@ -379,17 +1334,19 @@ In this context, `model` indicates the type of DeepL service, which can only be ] } ``` + **Response Example** + ```json { "choices": [ { "index": 0, - "message": { "name": "EN", "role": "assistant", "content": "坐庄" } + "message": { "name": "EN", "role": "assistant", "content": "operate a gambling establishment" } }, { "index": 1, - "message": { "name": "EN", "role": "assistant", "content": "中国银行" } + "message": { "name": "EN", "role": "assistant", "content": "Bank of China" } } ], "created": 1722747752, @@ -398,3 +1355,260 @@ In this context, `model` indicates the type of DeepL service, which can only be "usage": {} } ``` + +### Utilizing OpenAI Protocol Proxy for Together-AI Services + +**Configuration Information** +```yaml +provider: + type: together-ai + apiTokens: + - "YOUR_TOGETHER_AI_API_TOKEN" + modelMapping: + "*": "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo" +``` + +**Request Example** +```json +{ + "model": "Qwen/Qwen2.5-72B-Instruct-Turbo", + "messages": [ + { + "role": "user", + "content": "Who are you?" + } + ] +} +``` + +**Response Example** +```json +{ + "id": "8f5809d54b73efac", + "object": "chat.completion", + "created": 1734785851, + "model": "Qwen/Qwen2.5-72B-Instruct-Turbo", + "prompt": [], + "choices": [ + { + "finish_reason": "eos", + "seed": 12830868308626506000, + "logprobs": null, + "index": 0, + "message": { + "role": "assistant", + "content": "I am Qwen, a large language model created by Alibaba Cloud. I am designed to assist users in generating various types of text, such as articles, stories, poems, and more, as well as answering questions and providing information on a wide range of topics. How can I assist you today?", + "tool_calls": [] + } + } + ], + "usage": { + "prompt_tokens": 33, + "completion_tokens": 61, + "total_tokens": 94 + } +} +``` + +## Full Configuration Example + +### Kubernetes Example + +Here's a full plugin configuration example using the OpenAI protocol proxy for Groq services. + +```yaml +apiVersion: extensions.higress.io/v1alpha1 +kind: WasmPlugin +metadata: + name: ai-proxy-groq + namespace: higress-system +spec: + matchRules: + - config: + provider: + type: groq + apiTokens: + - "YOUR_API_TOKEN" + ingress: + - groq + url: oci://higress-registry.cn-hangzhou.cr.aliyuncs.com/plugins/ai-proxy:1.0.0 +--- +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + annotations: + higress.io/backend-protocol: HTTPS + higress.io/destination: groq.dns + higress.io/proxy-ssl-name: api.groq.com + higress.io/proxy-ssl-server-name: "on" + labels: + higress.io/resource-definer: higress + name: groq + namespace: higress-system +spec: + ingressClassName: higress + rules: + - host: + http: + paths: + - backend: + resource: + apiGroup: networking.higress.io + kind: McpBridge + name: default + path: / + pathType: Prefix +--- +apiVersion: networking.higress.io/v1 +kind: McpBridge +metadata: + name: default + namespace: higress-system +spec: + registries: + - domain: api.groq.com + name: groq + port: 443 + type: dns +``` + +Access Example: + +```bash +curl "http:///v1/chat/completions" -H "Content-Type: application/json" -d '{ + "model": "llama3-8b-8192", + "messages": [ + { + "role": "user", + "content": "hello, who are you?" + } + ] +}' +``` + +### Docker-Compose Example + +`docker-compose.yml` configuration file: + +```yaml +version: '3.7' +services: + envoy: + image: higress-registry.cn-hangzhou.cr.aliyuncs.com/higress/envoy:1.20 + entrypoint: /usr/local/bin/envoy + # Enables debug level logging for easier debugging + command: -c /etc/envoy/envoy.yaml --component-log-level wasm:debug + networks: + - higress-net + ports: + - "10000:10000" + volumes: + - ./envoy.yaml:/etc/envoy/envoy.yaml + - ./plugin.wasm:/etc/envoy/plugin.wasm +networks: + higress-net: {} +``` + +`envoy.yaml` configuration file: + +```yaml +admin: + address: + socket_address: + protocol: TCP + address: 0.0.0.0 + port_value: 9901 +static_resources: + listeners: + - name: listener_0 + address: + socket_address: + protocol: TCP + address: 0.0.0.0 + port_value: 10000 + filter_chains: + - filters: + - name: envoy.filters.network.http_connection_manager + typed_config: + "@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager + scheme_header_transformation: + scheme_to_overwrite: https + stat_prefix: ingress_http + # Outputs envoy logs to stdout + access_log: + - name: envoy.access_loggers.stdout + typed_config: + "@type": type.googleapis.com/envoy.extensions.access_loggers.stream.v3.StdoutAccessLog + # Modify as needed + route_config: + name: local_route + virtual_hosts: + - name: local_service + domains: [ "*" ] + routes: + - match: + prefix: "/" + route: + cluster: claude + timeout: 300s + http_filters: + - name: claude + typed_config: + "@type": type.googleapis.com/udpa.type.v1.TypedStruct + type_url: type.googleapis.com/envoy.extensions.filters.http.wasm.v3.Wasm + value: + config: + name: claude + vm_config: + runtime: envoy.wasm.runtime.v8 + code: + local: + filename: /etc/envoy/plugin.wasm + configuration: + "@type": "type.googleapis.com/google.protobuf.StringValue" + value: | # Plugin configuration + { + "provider": { + "type": "claude", + "apiTokens": [ + "YOUR_API_TOKEN" + ] + } + } + - name: envoy.filters.http.router + clusters: + - name: claude + connect_timeout: 30s + type: LOGICAL_DNS + dns_lookup_family: V4_ONLY + lb_policy: ROUND_ROBIN + load_assignment: + cluster_name: claude + endpoints: + - lb_endpoints: + - endpoint: + address: + socket_address: + address: api.anthropic.com # Service address + port_value: 443 + transport_socket: + name: envoy.transport_sockets.tls + typed_config: + "@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext + "sni": "api.anthropic.com" +``` + +Access Example: + +```bash +curl "http://localhost:10000/v1/chat/completions" -H "Content-Type: application/json" -d '{ + "model": "claude-3-opus-20240229", + "max_tokens": 1024, + "messages": [ + { + "role": "user", + "content": "hello, who are you?" + } + ] +}' +``` diff --git a/src/content/docs/latest/zh-cn/plugins/ai/api-consumer/ai-token-ratelimit.md b/src/content/docs/latest/zh-cn/plugins/ai/api-consumer/ai-token-ratelimit.md index c8c387e55f..239524fb73 100644 --- a/src/content/docs/latest/zh-cn/plugins/ai/api-consumer/ai-token-ratelimit.md +++ b/src/content/docs/latest/zh-cn/plugins/ai/api-consumer/ai-token-ratelimit.md @@ -9,10 +9,6 @@ description: AI Token限流插件配置参考 `ai-token-ratelimit`插件实现了基于特定键值实现token限流,键值来源可以是 URL 参数、HTTP 请求头、客户端 IP 地址、consumer 名称、cookie中 key 名称 -**注意** - -此插件功能生效,需要同时开启[ AI 可观测插件](../api-o11y/ai-statistics.md),实现 token 数的统计 - ## 运行属性 插件执行阶段:`默认阶段` diff --git a/src/content/docs/latest/zh-cn/plugins/ai/api-provider/ai-proxy.md b/src/content/docs/latest/zh-cn/plugins/ai/api-provider/ai-proxy.md index c519f293ca..1fc7a160ab 100644 --- a/src/content/docs/latest/zh-cn/plugins/ai/api-provider/ai-proxy.md +++ b/src/content/docs/latest/zh-cn/plugins/ai/api-provider/ai-proxy.md @@ -31,15 +31,17 @@ description: AI 代理插件配置参考 `provider`的配置字段说明如下: -| 名称 | 数据类型 | 填写要求 | 默认值 | 描述 | -| -------------- | --------------- | -------- | ------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `type` | string | 必填 | - | AI 服务提供商名称 | -| `apiTokens` | array of string | 非必填 | - | 用于在访问 AI 服务时进行认证的令牌。如果配置了多个 token,插件会在请求时随机进行选择。部分服务提供商只支持配置一个 token。 | -| `timeout` | number | 非必填 | - | 访问 AI 服务的超时时间。单位为毫秒。默认值为 120000,即 2 分钟 | -| `modelMapping` | map of string | 非必填 | - | AI 模型映射表,用于将请求中的模型名称映射为服务提供商支持模型名称。
1. 支持前缀匹配。例如用 "gpt-3-*" 匹配所有名称以“gpt-3-”开头的模型;
2. 支持使用 "*" 为键来配置通用兜底映射关系;
3. 如果映射的目标名称为空字符串 "",则表示保留原模型名称。 | -| `protocol` | string | 非必填 | - | 插件对外提供的 API 接口契约。目前支持以下取值:openai(默认值,使用 OpenAI 的接口契约)、original(使用目标服务提供商的原始接口契约) | -| `context` | object | 非必填 | - | 配置 AI 对话上下文信息 | -| `customSettings` | array of customSetting | 非必填 | - | 为AI请求指定覆盖或者填充参数 | +| 名称 | 数据类型 | 填写要求 | 默认值 | 描述 | +|------------------| --------------- | -------- | ------ |-----------------------------------------------------------------------------------------------------------------------------------------------------------| +| `type` | string | 必填 | - | AI 服务提供商名称 | +| `apiTokens` | array of string | 非必填 | - | 用于在访问 AI 服务时进行认证的令牌。如果配置了多个 token,插件会在请求时随机进行选择。部分服务提供商只支持配置一个 token。 | +| `timeout` | number | 非必填 | - | 访问 AI 服务的超时时间。单位为毫秒。默认值为 120000,即 2 分钟 | +| `modelMapping` | map of string | 非必填 | - | AI 模型映射表,用于将请求中的模型名称映射为服务提供商支持模型名称。
1. 支持前缀匹配。例如用 "gpt-3-*" 匹配所有名称以“gpt-3-”开头的模型;
2. 支持使用 "*" 为键来配置通用兜底映射关系;
3. 如果映射的目标名称为空字符串 "",则表示保留原模型名称。 | +| `protocol` | string | 非必填 | - | 插件对外提供的 API 接口契约。目前支持以下取值:openai(默认值,使用 OpenAI 的接口契约)、original(使用目标服务提供商的原始接口契约) | +| `context` | object | 非必填 | - | 配置 AI 对话上下文信息 | +| `customSettings` | array of customSetting | 非必填 | - | 为AI请求指定覆盖或者填充参数 | +| `failover` | object | 非必填 | - | 配置 apiToken 的 failover 策略,当 apiToken 不可用时,将其移出 apiToken 列表,待健康检测通过后重新添加回 apiToken 列表 | +| `retryOnFailure` | object | 非必填 | - | 当请求失败时立即进行重试 | `context`的配置字段说明如下: @@ -75,6 +77,24 @@ custom-setting会遵循如下表格,根据`name`和协议来替换对应的字 如果启用了raw模式,custom-setting会直接用输入的`name`和`value`去更改请求中的json内容,而不对参数名称做任何限制和修改。 对于大多数协议,custom-setting都会在json内容的根路径修改或者填充参数。对于`qwen`协议,ai-proxy会在json的`parameters`子路径下做配置。对于`gemini`协议,则会在`generation_config`子路径下做配置。 +`failover` 的配置字段说明如下: + +| 名称 | 数据类型 | 填写要求 | 默认值 | 描述 | +|------------------|--------|-----------------|-------|-----------------------------| +| enabled | bool | 非必填 | false | 是否启用 apiToken 的 failover 机制 | +| failureThreshold | int | 非必填 | 3 | 触发 failover 连续请求失败的阈值(次数) | +| successThreshold | int | 非必填 | 1 | 健康检测的成功阈值(次数) | +| healthCheckInterval | int | 非必填 | 5000 | 健康检测的间隔时间,单位毫秒 | +| healthCheckTimeout | int | 非必填 | 5000 | 健康检测的超时时间,单位毫秒 | +| healthCheckModel | string | 启用 failover 时必填 | | 健康检测使用的模型 | + +`retryOnFailure` 的配置字段说明如下: + +| 名称 | 数据类型 | 填写要求 | 默认值 | 描述 | +|------------------|--------|-----------------|-------|-------------| +| enabled | bool | 非必填 | false | 是否启用失败请求重试 | +| maxRetries | int | 非必填 | 1 | 最大重试次数 | +| retryTimeout | int | 非必填 | 30000 | 重试超时时间,单位毫秒 | ### 提供商特有配置 @@ -137,12 +157,24 @@ Groq 所对应的 `type` 为 `groq`。它并无特有的配置字段。 #### 文心一言(Baidu) -文心一言所对应的 `type` 为 `baidu`。它并无特有的配置字段。 +文心一言所对应的 `type` 为 `baidu`。它特有的配置字段如下: + +| 名称 | 数据类型 | 填写要求 | 默认值 | 描述 | +|--------------------|-----------------|------|-----|-----------------------------------------------------------| +| `baiduAccessKeyAndSecret` | array of string | 必填 | - | Baidu 的 Access Key 和 Secret Key,中间用 `:` 分隔,用于申请 apiToken。 | +| `baiduApiTokenServiceName` | string | 必填 | - | 请求刷新百度 apiToken 服务名称。 | +| `baiduApiTokenServiceHost` | string | 非必填 | - | 请求刷新百度 apiToken 服务域名,默认是 iam.bj.baidubce.com。 | +| `baiduApiTokenServicePort` | int64 | 非必填 | - | 请求刷新百度 apiToken 服务端口,默认是 443。 | + #### 360智脑 360智脑所对应的 `type` 为 `ai360`。它并无特有的配置字段。 +#### GitHub模型 + +GitHub模型所对应的 `type` 为 `github`。它并无特有的配置字段。 + #### Mistral Mistral 所对应的 `type` 为 `mistral`。它并无特有的配置字段。 @@ -151,9 +183,10 @@ Mistral 所对应的 `type` 为 `mistral`。它并无特有的配置字段。 MiniMax所对应的 `type` 为 `minimax`。它特有的配置字段如下: -| 名称 | 数据类型 | 填写要求 | 默认值 | 描述 | -| ---------------- | -------- | ------------------------------------------------------------ | ------ | ------------------------------------------------------------ | -| `minimaxGroupId` | string | 当使用`abab6.5-chat`, `abab6.5s-chat`, `abab5.5s-chat`, `abab5.5-chat`四种模型时必填 | - | 当使用`abab6.5-chat`, `abab6.5s-chat`, `abab5.5s-chat`, `abab5.5-chat`四种模型时会使用ChatCompletion Pro,需要设置groupID | +| 名称 | 数据类型 | 填写要求 | 默认值 | 描述 | +| ---------------- | -------- | ------------------------------ | ------ |----------------------------------------------------------------| +| `minimaxApiType` | string | v2 和 pro 中选填一项 | v2 | v2 代表 ChatCompletion v2 API,pro 代表 ChatCompletion Pro API | +| `minimaxGroupId` | string | `minimaxApiType` 为 pro 时必填 | - | `minimaxApiType` 为 pro 时使用 ChatCompletion Pro API,需要设置 groupID | #### Anthropic Claude @@ -219,6 +252,9 @@ DeepL 所对应的 `type` 为 `deepl`。它特有的配置字段如下: Cohere 所对应的 `type` 为 `cohere`。它并无特有的配置字段。 +#### Together-AI +Together-AI 所对应的 `type` 为 `together-ai`。它并无特有的配置字段。 + ## 用法示例 ### 使用 OpenAI 协议代理 Azure OpenAI 服务 @@ -235,6 +271,90 @@ provider: azureServiceUrl: "https://YOUR_RESOURCE_NAME.openai.azure.com/openai/deployments/YOUR_DEPLOYMENT_NAME/chat/completions?api-version=2024-02-15-preview", ``` +**请求示例** + +```json +{ + "model": "gpt-3", + "messages": [ + { + "role": "user", + "content": "你好,你是谁?" + } + ], + "temperature": 0.3 +} +``` + +**响应示例** + +```json +{ + "choices": [ + { + "content_filter_results": { + "hate": { + "filtered": false, + "severity": "safe" + }, + "self_harm": { + "filtered": false, + "severity": "safe" + }, + "sexual": { + "filtered": false, + "severity": "safe" + }, + "violence": { + "filtered": false, + "severity": "safe" + } + }, + "finish_reason": "stop", + "index": 0, + "logprobs": null, + "message": { + "content": "你好!我是一个AI助手,可以回答你的问题和提供帮助。有什么我可以帮到你的吗?", + "role": "assistant" + } + } + ], + "created": 1714807624, + "id": "chatcmpl-abcdefg1234567890", + "model": "gpt-35-turbo-16k", + "object": "chat.completion", + "prompt_filter_results": [ + { + "prompt_index": 0, + "content_filter_results": { + "hate": { + "filtered": false, + "severity": "safe" + }, + "self_harm": { + "filtered": false, + "severity": "safe" + }, + "sexual": { + "filtered": false, + "severity": "safe" + }, + "violence": { + "filtered": false, + "severity": "safe" + } + } + } + ], + "system_fingerprint": null, + "usage": { + "completion_tokens": 40, + "prompt_tokens": 15, + "total_tokens": 55 + } +} +``` + ### 使用 OpenAI 协议代理通义千问服务 使用通义千问服务,并配置从 OpenAI 大模型到通义千问的模型映射关系。 @@ -256,204 +376,1109 @@ provider: '*': "qwen-turbo" ``` -### 使用original协议代理百炼智能体应用 -**配置信息** +**AI 对话请求示例** -```yaml -provider: - type: qwen - apiTokens: - - "YOUR_DASHSCOPE_API_TOKEN" - protocol: original -``` +URL: http://your-domain/v1/chat/completions -### 使用 OpenAI 协议代理豆包大模型服务 +请求示例: -**配置信息** +```json +{ + "model": "gpt-3", + "messages": [ + { + "role": "user", + "content": "你好,你是谁?" + } + ], + "temperature": 0.3 +} +``` -```yaml -provider: - type: doubao - apiTokens: - - YOUR_DOUBAO_API_KEY - modelMapping: - '*': YOUR_DOUBAO_ENDPOINT - timeout: 1200000 +响应示例: + +```json +{ + "id": "c2518bd3-0f46-97d1-be34-bb5777cb3108", + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": "我是通义千问,由阿里云开发的AI助手。我可以回答各种问题、提供信息和与用户进行对话。有什么我可以帮助你的吗?" + }, + "finish_reason": "stop" + } + ], + "created": 1715175072, + "model": "qwen-turbo", + "object": "chat.completion", + "usage": { + "prompt_tokens": 24, + "completion_tokens": 33, + "total_tokens": 57 + } +} ``` -### 使用月之暗面配合其原生的文件上下文 +**多模态模型 API 请求示例(适用于 `qwen-vl-plus` 和 `qwen-vl-max` 模型)** -提前上传文件至月之暗面,以文件内容作为上下文使用其 AI 服务。 +URL: http://your-domain/v1/chat/completions -**配置信息** +请求示例: -```yaml -provider: - type: moonshot - apiTokens: - - "YOUR_MOONSHOT_API_TOKEN" - moonshotFileId: "YOUR_MOONSHOT_FILE_ID", - modelMapping: - '*': "moonshot-v1-32k" +```json +{ + "model": "gpt-4o", + "messages": [ + { + "role": "user", + "content": [ + { + "type": "image_url", + "image_url": { + "url": "https://dashscope.oss-cn-beijing.aliyuncs.com/images/dog_and_girl.jpeg" + } + }, + { + "type": "text", + "text": "这个图片是哪里?" + } + ] + } + ], + "temperature": 0.3 +} ``` -### 使用 OpenAI 协议代理 Groq 服务 - -**配置信息** +响应示例: -```yaml -provider: - type: groq - apiTokens: - - "YOUR_GROQ_API_TOKEN" +```json +{ + "id": "17c5955d-af9c-9f28-bbde-293a9c9a3515", + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": [ + { + "text": "这张照片显示的是一位女士和一只狗在海滩上。由于我无法获取具体的地理位置信息,所以不能确定这是哪个地方的海滩。但是从视觉内容来看,它可能是一个位于沿海地区的沙滩海岸线,并且有海浪拍打着岸边。这样的场景在全球许多美丽的海滨地区都可以找到。如果您需要更精确的信息,请提供更多的背景或细节描述。" + } + ] + }, + "finish_reason": "stop" + } + ], + "created": 1723949230, + "model": "qwen-vl-plus", + "object": "chat.completion", + "usage": { + "prompt_tokens": 1279, + "completion_tokens": 78 + } +} ``` -### 使用 OpenAI 协议代理 Claude 服务 +**文本向量请求示例** -**配置信息** +URL: http://your-domain/v1/embeddings -```yaml -provider: - type: claude - apiTokens: - - "YOUR_CLAUDE_API_TOKEN" - version: "2023-06-01" +请求示例: + +```json +{ + "model": "text-embedding-v1", + "input": "Hello" +} ``` -### 使用 OpenAI 协议代理混元服务 +响应示例: + +```json +{ + "object": "list", + "data": [ + { + "object": "embedding", + "index": 0, + "embedding": [ + -1.0437825918197632, + 5.208984375, + 3.0483806133270264, + -1.7897135019302368, + -2.0107421875, + ..., + 0.8125, + -1.1759847402572632, + 0.8174641728401184, + 1.0432943105697632, + -0.5885213017463684 + ] + } + ], + "model": "text-embedding-v1", + "usage": { + "prompt_tokens": 1, + "total_tokens": 1 + } +} +``` + +### 使用通义千问配合纯文本上下文信息 + +使用通义千问服务,同时配置纯文本上下文信息。 **配置信息** ```yaml provider: - type: "hunyuan" - hunyuanAuthKey: "" + type: qwen apiTokens: - - "" - hunyuanAuthId: "" - timeout: 1200000 + - "YOUR_QWEN_API_TOKEN" modelMapping: - "*": "hunyuan-lite" + "*": "qwen-turbo" + context: + - fileUrl: "http://file.default.svc.cluster.local/ai/context.txt", + serviceName: "file.dns", + servicePort: 80 ``` -### 使用 OpenAI 协议代理百度文心一言服务 +**请求示例** -**配置信息** +```json +{ + "model": "gpt-3", + "messages": [ + { + "role": "user", + "content": "请概述文案内容" + } + ], + "temperature": 0.3 +} +``` -```yaml -provider: - type: baidu - apiTokens: - - "YOUR_BAIDU_API_TOKEN" - modelMapping: - 'gpt-3': "ERNIE-4.0" - '*': "ERNIE-4.0" +**响应示例** + +```json +{ + "id": "cmpl-77861a17681f4987ab8270dbf8001936", + "object": "chat.completion", + "created": 9756990, + "model": "moonshot-v1-128k", + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": "这份文案是一份关于..." + }, + "finish_reason": "stop" + } + ], + "usage": { + "prompt_tokens": 20181, + "completion_tokens": 439, + "total_tokens": 20620 + } +} ``` -### 使用 OpenAI 协议代理MiniMax服务 +### 使用通义千问配合其原生的文件上下文 + +提前上传文件至通义千问,以文件内容作为上下文使用其 AI 服务。 **配置信息** ```yaml provider: - type: minimax + type: qwen apiTokens: - - "YOUR_MINIMAX_API_TOKEN" + - "YOUR_QWEN_API_TOKEN" modelMapping: - "gpt-3": "abab6.5g-chat" - "gpt-4": "abab6.5-chat" - "*": "abab6.5g-chat" - minimaxGroupId: "YOUR_MINIMAX_GROUP_ID" + "*": "qwen-long" # 通义千问的文件上下文只能在 qwen-long 模型下使用 + qwenFileIds: + - "file-fe-xxx" + - "file-fe-yyy" ``` -### 使用 OpenAI 协议代理360智脑服务 +**请求示例** -**配置信息** +```json +{ + "model": "gpt-4-turbo", + "messages": [ + { + "role": "user", + "content": "请概述文案内容" + } + ], + "temperature": 0.3 +} +``` -```yaml -provider: - type: ai360 - apiTokens: - - "YOUR_MINIMAX_API_TOKEN" - modelMapping: - "gpt-4o": "360gpt-turbo-responsibility-8k" - "gpt-4": "360gpt2-pro" - "gpt-3.5": "360gpt-turbo" - "text-embedding-3-small": "embedding_s1_v1.2" - "*": "360gpt-pro" +**响应示例** + +```json +{ + "output": { + "choices": [ + { + "finish_reason": "stop", + "message": { + "role": "assistant", + "content": "您上传了两个文件,`context.txt` 和 `context_2.txt`,它们似乎都包含了关于xxxx" + } + } + ] + }, + "usage": { + "total_tokens": 2023, + "output_tokens": 530, + "input_tokens": 1493 + }, + "request_id": "187e99ba-5b64-9ffe-8f69-01dafbaf6ed7" +} ``` -### 使用 OpenAI 协议代理 Cloudflare Workers AI 服务 +### 使用original协议代理百炼智能体应用 **配置信息** ```yaml provider: - type: cloudflare + type: qwen apiTokens: - - "YOUR_WORKERS_AI_API_TOKEN" - cloudflareAccountId: "YOUR_CLOUDFLARE_ACCOUNT_ID" - modelMapping: - "*": "@cf/meta/llama-3-8b-instruct" + - "YOUR_DASHSCOPE_API_TOKEN" + protocol: original ``` -### 使用 OpenAI 协议代理Spark服务 +**请求实例** +```json +{ + "input": { + "prompt": "介绍一下Dubbo" + }, + "parameters": {}, + "debug": {} +} +``` + +**响应实例** + +```json +{ + "output": { + "finish_reason": "stop", + "session_id": "677e7e8fbb874e1b84792b65042e1599", + "text": "Apache Dubbo 是一个..." + }, + "usage": { + "models": [ + { + "output_tokens": 449, + "model_id": "qwen-max", + "input_tokens": 282 + } + ] + }, + "request_id": "b59e45e3-5af4-91df-b7c6-9d746fd3297c" +} +``` + +### 使用 OpenAI 协议代理豆包大模型服务 **配置信息** ```yaml provider: - type: spark + type: doubao apiTokens: - - "APIKey:APISecret" + - YOUR_DOUBAO_API_KEY modelMapping: - "gpt-4o": "generalv3.5" - "gpt-4": "generalv3" - "*": "general" + '*': YOUR_DOUBAO_ENDPOINT + timeout: 1200000 ``` -### 使用 OpenAI 协议代理 gemini 服务 +### 使用 original 协议代理 Coze 应用 **配置信息** ```yaml provider: - type: gemini + type: coze apiTokens: - - "YOUR_GEMINI_API_TOKEN" - modelMapping: - "*": "gemini-pro" - geminiSafetySetting: - "HARM_CATEGORY_SEXUALLY_EXPLICIT" :"BLOCK_NONE" - "HARM_CATEGORY_HATE_SPEECH" :"BLOCK_NONE" - "HARM_CATEGORY_HARASSMENT" :"BLOCK_NONE" - "HARM_CATEGORY_DANGEROUS_CONTENT" :"BLOCK_NONE" + - YOUR_COZE_API_KEY + protocol: original ``` -### 使用 OpenAI 协议代理 DeepL 文本翻译服务 +### 使用月之暗面配合其原生的文件上下文 + +提前上传文件至月之暗面,以文件内容作为上下文使用其 AI 服务。 **配置信息** ```yaml provider: - type: deepl + type: moonshot apiTokens: - - "YOUR_DEEPL_API_TOKEN" - targetLang: "ZH" + - "YOUR_MOONSHOT_API_TOKEN" + moonshotFileId: "YOUR_MOONSHOT_FILE_ID", + modelMapping: + '*': "moonshot-v1-32k" ``` **请求示例** -此处 `model` 表示 DeepL 的服务类型,只能填 `Free` 或 `Pro`。`content` 中设置需要翻译的文本;在 `role: system` 的 `content` 中可以包含可能影响翻译但本身不会被翻译的上下文,例如翻译产品名称时,可以将产品描述作为上下文传递,这种额外的上下文可能会提高翻译的质量。 ```json { - "model": "Free", + "model": "gpt-4-turbo", "messages": [ { - "role": "system", - "content": "money" - }, - { + "role": "user", + "content": "请概述文案内容" + } + ], + "temperature": 0.3 +} +``` + +**响应示例** + +```json +{ + "id": "cmpl-e5ca873642ca4f5d8b178c1742f9a8e8", + "object": "chat.completion", + "created": 1872961, + "model": "moonshot-v1-128k", + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": "文案内容是关于一个名为“xxxx”的支付平台..." + }, + "finish_reason": "stop" + } + ], + "usage": { + "prompt_tokens": 11, + "completion_tokens": 498, + "total_tokens": 509 + } +} +``` + +### 使用 OpenAI 协议代理 Groq 服务 + +**配置信息** + +```yaml +provider: + type: groq + apiTokens: + - "YOUR_GROQ_API_TOKEN" +``` + +**请求示例** + +```json +{ + "model": "llama3-8b-8192", + "messages": [ + { + "role": "user", + "content": "你好,你是谁?" + } + ] +} +``` + +**响应示例** + +```json +{ + "id": "chatcmpl-26733989-6c52-4056-b7a9-5da791bd7102", + "object": "chat.completion", + "created": 1715917967, + "model": "llama3-8b-8192", + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": "😊 Ni Hao! (That's \"hello\" in Chinese!)\n\nI am LLaMA, an AI assistant developed by Meta AI that can understand and respond to human input in a conversational manner. I'm not a human, but a computer program designed to simulate conversations and answer questions to the best of my ability. I'm happy to chat with you in Chinese or help with any questions or topics you'd like to discuss! 😊" + }, + "logprobs": null, + "finish_reason": "stop" + } + ], + "usage": { + "prompt_tokens": 16, + "prompt_time": 0.005, + "completion_tokens": 89, + "completion_time": 0.104, + "total_tokens": 105, + "total_time": 0.109 + }, + "system_fingerprint": "fp_dadc9d6142", + "x_groq": { + "id": "req_01hy2awmcxfpwbq56qh6svm7qz" + } +} +``` + +### 使用 OpenAI 协议代理 Claude 服务 + +**配置信息** + +```yaml +provider: + type: claude + apiTokens: + - "YOUR_CLAUDE_API_TOKEN" + version: "2023-06-01" +``` + +**请求示例** + +```json +{ + "model": "claude-3-opus-20240229", + "max_tokens": 1024, + "messages": [ + { + "role": "user", + "content": "你好,你是谁?" + } + ] +} +``` + +**响应示例** + +```json +{ + "id": "msg_01Jt3GzyjuzymnxmZERJguLK", + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": "您好,我是一个由人工智能公司Anthropic开发的聊天助手。我的名字叫Claude,是一个聪明友善、知识渊博的对话系统。很高兴认识您!我可以就各种话题与您聊天,回答问题,提供建议和帮助。我会尽最大努力给您有帮助的回复。希望我们能有个愉快的交流!" + }, + "finish_reason": "stop" + } + ], + "created": 1717385918, + "model": "claude-3-opus-20240229", + "object": "chat.completion", + "usage": { + "prompt_tokens": 16, + "completion_tokens": 126, + "total_tokens": 142 + } +} +``` + +### 使用 OpenAI 协议代理混元服务 + +**配置信息** + +```yaml +provider: + type: "hunyuan" + hunyuanAuthKey: "" + apiTokens: + - "" + hunyuanAuthId: "" + timeout: 1200000 + modelMapping: + "*": "hunyuan-lite" +``` + +**请求示例** + +请求脚本: + +```shell +curl --location 'http:///v1/chat/completions' \ +--header 'Content-Type: application/json' \ +--data '{ + "model": "gpt-3", + "messages": [ + { + "role": "system", + "content": "你是一个名专业的开发人员!" + }, + { + "role": "user", + "content": "你好,你是谁?" + } + ], + "temperature": 0.3, + "stream": false +}' +``` + +**响应示例** + +```json +{ + "id": "fd140c3e-0b69-4b19-849b-d354d32a6162", + "choices": [ + { + "index": 0, + "delta": { + "role": "assistant", + "content": "你好!我是一名专业的开发人员。" + }, + "finish_reason": "stop" + } + ], + "created": 1717493117, + "model": "hunyuan-lite", + "object": "chat.completion", + "usage": { + "prompt_tokens": 15, + "completion_tokens": 9, + "total_tokens": 24 + } +} +``` + +### 使用 OpenAI 协议代理百度文心一言服务 + +**配置信息** + +```yaml +provider: + type: baidu + apiTokens: + - "YOUR_BAIDU_API_TOKEN" + modelMapping: + 'gpt-3': "ERNIE-4.0" + '*': "ERNIE-4.0" +``` + +**请求示例** + +```json +{ + "model": "gpt-4-turbo", + "messages": [ + { + "role": "user", + "content": "你好,你是谁?" + } + ], + "stream": false +} +``` + +**响应示例** + +```json +{ + "id": "as-e90yfg1pk1", + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": "你好,我是文心一言,英文名是ERNIE Bot。我能够与人对话互动,回答问题,协助创作,高效便捷地帮助人们获取信息、知识和灵感。" + }, + "finish_reason": "stop" + } + ], + "created": 1717251488, + "model": "ERNIE-4.0", + "object": "chat.completion", + "usage": { + "prompt_tokens": 4, + "completion_tokens": 33, + "total_tokens": 37 + } +} +``` + +### 使用 OpenAI 协议代理MiniMax服务 + +**配置信息** + +```yaml +provider: + type: minimax + apiTokens: + - "YOUR_MINIMAX_API_TOKEN" + modelMapping: + "gpt-3": "abab6.5s-chat" + "gpt-4": "abab6.5g-chat" + "*": "abab6.5t-chat" +``` + +**请求示例** + +```json +{ + "model": "gpt-3", + "messages": [ + { + "role": "user", + "content": "你好,你是谁?" + } + ], + "stream": false +} +``` + +**响应示例** + +```json +{ + "id": "03ac4fcfe1c6cc9c6a60f9d12046e2b4", + "choices": [ + { + "finish_reason": "stop", + "index": 0, + "message": { + "content": "你好,我是一个由MiniMax公司研发的大型语言模型,名为MM智能助理。我可以帮助回答问题、提供信息、进行对话和执行多种语言处理任务。如果你有任何问题或需要帮助,请随时告诉我!", + "role": "assistant", + "name": "MM智能助理", + "audio_content": "" + } + } + ], + "created": 1734155471, + "model": "abab6.5s-chat", + "object": "chat.completion", + "usage": { + "total_tokens": 116, + "total_characters": 0, + "prompt_tokens": 70, + "completion_tokens": 46 + }, + "input_sensitive": false, + "output_sensitive": false, + "input_sensitive_type": 0, + "output_sensitive_type": 0, + "output_sensitive_int": 0, + "base_resp": { + "status_code": 0, + "status_msg": "" + } +} +``` + +### 使用 OpenAI 协议代理 GitHub 模型服务 + +**配置信息** + +```yaml +provider: + type: github + apiTokens: + - "YOUR_GITHUB_ACCESS_TOKEN" + modelMapping: + "gpt-4o": "gpt-4o" + "gpt-4": "Phi-3.5-MoE-instruct" + "gpt-3.5": "cohere-command-r-08-2024" + "text-embedding-3-large": "text-embedding-3-large" +``` + +**请求示例** + +```json +{ + "messages": [ + { + "role": "system", + "content": "You are a helpful assistant." + }, + { + "role": "user", + "content": "What is the capital of France?" + } + ], + "stream": true, + "temperature": 1.0, + "top_p": 1.0, + "max_tokens": 1000, + "model": "gpt-4o" +} +``` + +**响应示例** +```json +{ + "choices": [ + { + "finish_reason": "stop", + "index": 0, + "logprobs": null, + "message": { + "content": "The capital of France is Paris.", + "role": "assistant" + } + } + ], + "created": 1728131051, + "id": "chatcmpl-AEy7PU2JImdsD1W6Jw8GigZSEnM2u", + "model": "gpt-4o-2024-08-06", + "object": "chat.completion", + "system_fingerprint": "fp_67802d9a6d", + "usage": { + "completion_tokens": 7, + "prompt_tokens": 24, + "total_tokens": 31 + } +} +``` + +**文本向量请求示例** + +```json +{ + "input": ["first phrase", "second phrase", "third phrase"], + "model": "text-embedding-3-large" +} +``` + +响应示例: + +```json +{ + "object": "list", + "data": [ + { + "object": "embedding", + "index": 0, + "embedding": [ + -0.0012583479, + 0.0020349282, + ... + 0.012051377, + -0.0053306012, + 0.0060688322 + ] + } + ], + "model": "text-embedding-3-large", + "usage": { + "prompt_tokens": 6, + "total_tokens": 6 + } +} +``` + +### 使用 OpenAI 协议代理360智脑服务 + +**配置信息** + +```yaml +provider: + type: ai360 + apiTokens: + - "YOUR_360_API_TOKEN" + modelMapping: + "gpt-4o": "360gpt-turbo-responsibility-8k" + "gpt-4": "360gpt2-pro" + "gpt-3.5": "360gpt-turbo" + "text-embedding-3-small": "embedding_s1_v1.2" + "*": "360gpt-pro" +``` + +**请求示例** + +```json +{ + "model": "gpt-4o", + "messages": [ + { + "role": "system", + "content": "你是一个专业的开发人员!" + }, + { + "role": "user", + "content": "你好,你是谁?" + } + ] +} +``` + +**响应示例** + +```json +{ + "choices": [ + { + "message": { + "role": "assistant", + "content": "你好,我是360智脑,一个大型语言模型。我可以帮助回答各种问题、提供信息、进行对话等。有什么可以帮助你的吗?" + }, + "finish_reason": "", + "index": 0 + } + ], + "created": 1724257207, + "id": "5e5c94a2-d989-40b5-9965-5b971db941fe", + "model": "360gpt-turbo", + "object": "", + "usage": { + "completion_tokens": 33, + "prompt_tokens": 24, + "total_tokens": 57 + }, + "messages": [ + { + "role": "system", + "content": "你是一个专业的开发人员!" + }, + { + "role": "user", + "content": "你好,你是谁?" + } + ], + "context": null +} +``` + +**文本向量请求示例** + +URL: http://your-domain/v1/embeddings + +请求示例: + +```json +{ + "input":["你好"], + "model":"text-embedding-3-small" +} +``` + +响应示例: + +```json +{ + "data": [ + { + "embedding": [ + -0.011237, + -0.015433, + ..., + -0.028946, + -0.052778, + 0.003768, + -0.007917, + -0.042201 + ], + "index": 0, + "object": "" + } + ], + "model": "embedding_s1_v1.2", + "object": "", + "usage": { + "prompt_tokens": 2, + "total_tokens": 2 + } +} +``` + +### 使用 OpenAI 协议代理 Cloudflare Workers AI 服务 + +**配置信息** + +```yaml +provider: + type: cloudflare + apiTokens: + - "YOUR_WORKERS_AI_API_TOKEN" + cloudflareAccountId: "YOUR_CLOUDFLARE_ACCOUNT_ID" + modelMapping: + "*": "@cf/meta/llama-3-8b-instruct" +``` + +**请求示例** + +```json +{ + "model": "gpt-3.5", + "max_tokens": 1024, + "messages": [ + { + "role": "user", + "content": "Who are you?" + } + ] +} +``` + +**响应示例** + +```json +{ + "id": "id-1720367803430", + "object": "chat.completion", + "created": 1720367803, + "model": "@cf/meta/llama-3-8b-instruct", + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": "I am LLaMA, an AI assistant developed by Meta AI that can understand and respond to human input in a conversational manner. I'm not a human, but a computer program designed to simulate conversation and answer questions to the best of my knowledge. I can be used to generate text on a wide range of topics, from science and history to entertainment and culture.\n\nI'm a large language model, which means I've been trained on a massive dataset of text from the internet and can generate human-like responses. I can understand natural language and respond accordingly, making me suitable for tasks such as:\n\n* Answering questions on various topics\n* Generating text based on a given prompt\n* Translating text from one language to another\n* Summarizing long pieces of text\n* Creating chatbot dialogues\n\nI'm constantly learning and improving, so the more conversations I have with users like you, the better I'll become." + }, + "logprobs": null, + "finish_reason": "stop" + } + ] +} +``` + +### 使用 OpenAI 协议代理Spark服务 + +**配置信息** + +```yaml +provider: + type: spark + apiTokens: + - "APIKey:APISecret" + modelMapping: + "gpt-4o": "generalv3.5" + "gpt-4": "generalv3" + "*": "general" +``` + +**请求示例** + +```json +{ + "model": "gpt-4o", + "messages": [ + { + "role": "system", + "content": "你是一名专业的开发人员!" + }, + { + "role": "user", + "content": "你好,你是谁?" + } + ], + "stream": false +} +``` + +**响应示例** + +```json +{ + "id": "cha000c23c6@dx190ef0b4b96b8f2532", + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": "你好!我是一名专业的开发人员,擅长编程和解决技术问题。有什么我可以帮助你的吗?" + } + } + ], + "created": 1721997415, + "model": "generalv3.5", + "object": "chat.completion", + "usage": { + "prompt_tokens": 10, + "completion_tokens": 19, + "total_tokens": 29 + } +} +``` + +### 使用 OpenAI 协议代理 gemini 服务 + +**配置信息** + +```yaml +provider: + type: gemini + apiTokens: + - "YOUR_GEMINI_API_TOKEN" + modelMapping: + "*": "gemini-pro" + geminiSafetySetting: + "HARM_CATEGORY_SEXUALLY_EXPLICIT" :"BLOCK_NONE" + "HARM_CATEGORY_HATE_SPEECH" :"BLOCK_NONE" + "HARM_CATEGORY_HARASSMENT" :"BLOCK_NONE" + "HARM_CATEGORY_DANGEROUS_CONTENT" :"BLOCK_NONE" +``` + +**请求示例** + +```json +{ + "model": "gpt-3.5", + "messages": [ + { + "role": "user", + "content": "Who are you?" + } + ], + "stream": false +} +``` + +**响应示例** + +```json +{ + "id": "chatcmpl-b010867c-0d3f-40ba-95fd-4e8030551aeb", + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": "I am a large multi-modal model, trained by Google. I am designed to provide information and answer questions to the best of my abilities." + }, + "finish_reason": "stop" + } + ], + "created": 1722756984, + "model": "gemini-pro", + "object": "chat.completion", + "usage": { + "prompt_tokens": 5, + "completion_tokens": 29, + "total_tokens": 34 + } +} +``` + +### 使用 OpenAI 协议代理 DeepL 文本翻译服务 + +**配置信息** + +```yaml +provider: + type: deepl + apiTokens: + - "YOUR_DEEPL_API_TOKEN" + targetLang: "ZH" +``` + +**请求示例** +此处 `model` 表示 DeepL 的服务类型,只能填 `Free` 或 `Pro`。`content` 中设置需要翻译的文本;在 `role: system` 的 `content` 中可以包含可能影响翻译但本身不会被翻译的上下文,例如翻译产品名称时,可以将产品描述作为上下文传递,这种额外的上下文可能会提高翻译的质量。 + +```json +{ + "model": "Free", + "messages": [ + { + "role": "system", + "content": "money" + }, + { "content": "sit by the bank" }, { @@ -483,3 +1508,260 @@ provider: } ``` +### 使用 OpenAI 协议代理 Together-AI 服务 + +**配置信息** +```yaml +provider: + type: together-ai + apiTokens: + - "YOUR_TOGETHER_AI_API_TOKEN" + modelMapping: + "*": "Qwen/Qwen2.5-72B-Instruct-Turbo" +``` + +**请求示例** +```json +{ + "model": "Qwen/Qwen2.5-72B-Instruct-Turbo", + "messages": [ + { + "role": "user", + "content": "Who are you?" + } + ] +} +``` + +**响应示例** +```json +{ + "id": "8f5809d54b73efac", + "object": "chat.completion", + "created": 1734785851, + "model": "Qwen/Qwen2.5-72B-Instruct-Turbo", + "prompt": [], + "choices": [ + { + "finish_reason": "eos", + "seed": 12830868308626506000, + "logprobs": null, + "index": 0, + "message": { + "role": "assistant", + "content": "I am Qwen, a large language model created by Alibaba Cloud. I am designed to assist users in generating various types of text, such as articles, stories, poems, and more, as well as answering questions and providing information on a wide range of topics. How can I assist you today?", + "tool_calls": [] + } + } + ], + "usage": { + "prompt_tokens": 33, + "completion_tokens": 61, + "total_tokens": 94 + } +} +``` + + +## 完整配置示例 + +### Kubernetes 示例 + +以下以使用 OpenAI 协议代理 Groq 服务为例,展示完整的插件配置示例。 + +```yaml +apiVersion: extensions.higress.io/v1alpha1 +kind: WasmPlugin +metadata: + name: ai-proxy-groq + namespace: higress-system +spec: + matchRules: + - config: + provider: + type: groq + apiTokens: + - "YOUR_API_TOKEN" + ingress: + - groq + url: oci://higress-registry.cn-hangzhou.cr.aliyuncs.com/plugins/ai-proxy:1.0.0 +--- +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + annotations: + higress.io/backend-protocol: HTTPS + higress.io/destination: groq.dns + higress.io/proxy-ssl-name: api.groq.com + higress.io/proxy-ssl-server-name: "on" + labels: + higress.io/resource-definer: higress + name: groq + namespace: higress-system +spec: + ingressClassName: higress + rules: + - host: + http: + paths: + - backend: + resource: + apiGroup: networking.higress.io + kind: McpBridge + name: default + path: / + pathType: Prefix +--- +apiVersion: networking.higress.io/v1 +kind: McpBridge +metadata: + name: default + namespace: higress-system +spec: + registries: + - domain: api.groq.com + name: groq + port: 443 + type: dns +``` + +访问示例: + +```bash +curl "http:///v1/chat/completions" -H "Content-Type: application/json" -d '{ + "model": "llama3-8b-8192", + "messages": [ + { + "role": "user", + "content": "你好,你是谁?" + } + ] +}' +``` + +### Docker-Compose 示例 + +`docker-compose.yml` 配置文件: + +```yaml +version: '3.7' +services: + envoy: + image: higress-registry.cn-hangzhou.cr.aliyuncs.com/higress/envoy:1.20 + entrypoint: /usr/local/bin/envoy + # 开启了 debug 级别日志方便调试 + command: -c /etc/envoy/envoy.yaml --component-log-level wasm:debug + networks: + - higress-net + ports: + - "10000:10000" + volumes: + - ./envoy.yaml:/etc/envoy/envoy.yaml + - ./plugin.wasm:/etc/envoy/plugin.wasm +networks: + higress-net: {} +``` + +`envoy.yaml` 配置文件: + +```yaml +admin: + address: + socket_address: + protocol: TCP + address: 0.0.0.0 + port_value: 9901 +static_resources: + listeners: + - name: listener_0 + address: + socket_address: + protocol: TCP + address: 0.0.0.0 + port_value: 10000 + filter_chains: + - filters: + - name: envoy.filters.network.http_connection_manager + typed_config: + "@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager + scheme_header_transformation: + scheme_to_overwrite: https + stat_prefix: ingress_http + # Output envoy logs to stdout + access_log: + - name: envoy.access_loggers.stdout + typed_config: + "@type": type.googleapis.com/envoy.extensions.access_loggers.stream.v3.StdoutAccessLog + # Modify as required + route_config: + name: local_route + virtual_hosts: + - name: local_service + domains: [ "*" ] + routes: + - match: + prefix: "/" + route: + cluster: claude + timeout: 300s + http_filters: + - name: claude + typed_config: + "@type": type.googleapis.com/udpa.type.v1.TypedStruct + type_url: type.googleapis.com/envoy.extensions.filters.http.wasm.v3.Wasm + value: + config: + name: claude + vm_config: + runtime: envoy.wasm.runtime.v8 + code: + local: + filename: /etc/envoy/plugin.wasm + configuration: + "@type": "type.googleapis.com/google.protobuf.StringValue" + value: | # 插件配置 + { + "provider": { + "type": "claude", + "apiTokens": [ + "YOUR_API_TOKEN" + ] + } + } + - name: envoy.filters.http.router + clusters: + - name: claude + connect_timeout: 30s + type: LOGICAL_DNS + dns_lookup_family: V4_ONLY + lb_policy: ROUND_ROBIN + load_assignment: + cluster_name: claude + endpoints: + - lb_endpoints: + - endpoint: + address: + socket_address: + address: api.anthropic.com # API 服务地址 + port_value: 443 + transport_socket: + name: envoy.transport_sockets.tls + typed_config: + "@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext + "sni": "api.anthropic.com" +``` + +访问示例: + +```bash +curl "http://localhost:10000/v1/chat/completions" -H "Content-Type: application/json" -d '{ + "model": "claude-3-opus-20240229", + "max_tokens": 1024, + "messages": [ + { + "role": "user", + "content": "你好,你是谁?" + } + ] +}' +```