diff --git a/api/server/controllers/agents/client.js b/api/server/controllers/agents/client.js index b33e144f4ac..f035d546f28 100644 --- a/api/server/controllers/agents/client.js +++ b/api/server/controllers/agents/client.js @@ -10,6 +10,7 @@ const { Callback, createMetadataAggregator } = require('@librechat/agents'); const { Constants, + VisionModes, openAISchema, EModelEndpoint, anthropicSchema, @@ -196,6 +197,7 @@ class AgentClient extends BaseClient { this.options.req, attachments, this.options.agent.provider, + VisionModes.agents, ); message.image_urls = image_urls.length ? image_urls : undefined; return files; diff --git a/api/server/services/Endpoints/openAI/initialize.js b/api/server/services/Endpoints/openAI/initialize.js index b72b3d32c44..215b9437309 100644 --- a/api/server/services/Endpoints/openAI/initialize.js +++ b/api/server/services/Endpoints/openAI/initialize.js @@ -130,7 +130,7 @@ const initializeClient = async ({ if (optionsOnly) { const requestOptions = Object.assign( { - modelOptions: endpointOption.modelOptions, + modelOptions: endpointOption.model_parameters, }, clientOptions, ); diff --git a/api/server/services/Endpoints/openAI/llm.js b/api/server/services/Endpoints/openAI/llm.js index 3817224a4ba..bd51679e1b6 100644 --- a/api/server/services/Endpoints/openAI/llm.js +++ b/api/server/services/Endpoints/openAI/llm.js @@ -38,7 +38,6 @@ function getLLMConfig(apiKey, options = {}) { } = options; let llmConfig = { - model: 'gpt-4o-mini', streaming, }; diff --git a/api/server/services/Files/images/encode.js b/api/server/services/Files/images/encode.js index 05c9fc1d33f..f4579270199 100644 --- a/api/server/services/Files/images/encode.js +++ b/api/server/services/Files/images/encode.js @@ -1,6 +1,12 @@ const axios = require('axios'); -const { EModelEndpoint, FileSources, VisionModes } = require('librechat-data-provider'); -const { getStrategyFunctions } = require('../strategies'); +const { + FileSources, + VisionModes, + ImageDetail, + ContentTypes, + EModelEndpoint, +} = require('librechat-data-provider'); +const { getStrategyFunctions } = require('~/server/services/Files/strategies'); const { logger } = require('~/config'); /** @@ -79,7 +85,7 @@ async function encodeAndFormat(req, files, endpoint, mode) { promises.push(preparePayload(req, file)); } - const detail = req.body.imageDetail ?? 'auto'; + const detail = req.body.imageDetail ?? ImageDetail.auto; /** @type {Array<[MongoFile, string]>} */ const formattedImages = await Promise.all(promises); @@ -104,7 +110,7 @@ async function encodeAndFormat(req, files, endpoint, mode) { } const imagePart = { - type: 'image_url', + type: ContentTypes.IMAGE_URL, image_url: { url: imageContent.startsWith('http') ? imageContent @@ -113,6 +119,12 @@ async function encodeAndFormat(req, files, endpoint, mode) { }, }; + if (mode === VisionModes.agents) { + result.image_urls.push(imagePart); + result.files.push(fileMetadata); + continue; + } + if (endpoint && endpoint === EModelEndpoint.google && mode === VisionModes.generative) { delete imagePart.image_url; imagePart.inlineData = { diff --git a/packages/data-provider/src/config.ts b/packages/data-provider/src/config.ts index b95e0f138e1..6ff6a469263 100644 --- a/packages/data-provider/src/config.ts +++ b/packages/data-provider/src/config.ts @@ -752,6 +752,7 @@ export const visionModels = [ ]; export enum VisionModes { generative = 'generative', + agents = 'agents', } export function validateVisionModel({