From 5242e5d2aca7e6ce6aab958bcccc9abf205dacef Mon Sep 17 00:00:00 2001 From: archer <545436317@qq.com> Date: Wed, 20 Mar 2024 22:53:59 +0800 Subject: [PATCH] fix: vector queue --- .../content/docs/development/upgrading/47.md | 12 ++++++ .../service/core/dataset/training/schema.ts | 7 +--- .../service/core/dataset/training/utils.ts | 2 +- .../app/src/service/events/generateVector.ts | 41 ++++++++----------- projects/app/src/service/mongo.ts | 8 ++-- 5 files changed, 36 insertions(+), 34 deletions(-) diff --git a/docSite/content/docs/development/upgrading/47.md b/docSite/content/docs/development/upgrading/47.md index 034b3514ca8..25ec9ceeae3 100644 --- a/docSite/content/docs/development/upgrading/47.md +++ b/docSite/content/docs/development/upgrading/47.md @@ -11,6 +11,18 @@ weight: 826 增加一些 Boolean 值,用于决定不同功能块可以使用哪些模型,同时增加了模型的 logo:[点击查看最新的配置文件](/docs/development/configuration/) +## 初始化脚本 + +从任意终端,发起 1 个 HTTP 请求。其中 {{rootkey}} 替换成环境变量里的 `rootkey`;{{host}} 替换成自己域名 + +```bash +curl --location --request POST 'https://{{host}}/api/admin/initv47' \ +--header 'rootkey: {{rootkey}}' \ +--header 'Content-Type: application/json' +``` + +脚本功能: +1. 初始化插件的 parentId ## V4.7 更新说明 diff --git a/packages/service/core/dataset/training/schema.ts b/packages/service/core/dataset/training/schema.ts index 66662da883e..96610bb7ae4 100644 --- a/packages/service/core/dataset/training/schema.ts +++ b/packages/service/core/dataset/training/schema.ts @@ -13,11 +13,6 @@ import { export const DatasetTrainingCollectionName = 'dataset.trainings'; const TrainingDataSchema = new Schema({ - userId: { - // abandon - type: Schema.Types.ObjectId, - ref: 'user' - }, teamId: { type: Schema.Types.ObjectId, ref: TeamCollectionName, @@ -100,7 +95,7 @@ try { // lock training data; delete training data TrainingDataSchema.index({ teamId: 1, collectionId: 1 }); // get training data and sort - TrainingDataSchema.index({ lockTime: 1, mode: 1, weight: -1 }); + TrainingDataSchema.index({ mode: 1, lockTime: 1, weight: -1 }); TrainingDataSchema.index({ expireAt: 1 }, { expireAfterSeconds: 7 * 24 * 60 * 60 }); // 7 days } catch (error) { console.log(error); diff --git a/projects/app/src/service/core/dataset/training/utils.ts b/projects/app/src/service/core/dataset/training/utils.ts index df3f033b3f8..d94cd352159 100644 --- a/projects/app/src/service/core/dataset/training/utils.ts +++ b/projects/app/src/service/core/dataset/training/utils.ts @@ -24,7 +24,7 @@ export const createDatasetTrainingMongoWatch = () => { export const startTrainingQueue = (fast?: boolean) => { const max = global.systemEnv?.qaMaxProcess || 10; - for (let i = 0; i < max; i++) { + for (let i = 0; i < (fast ? max : 1); i++) { generateQA(); generateVector(); } diff --git a/projects/app/src/service/events/generateVector.ts b/projects/app/src/service/events/generateVector.ts index a9a37cfd17a..6845b918039 100644 --- a/projects/app/src/service/events/generateVector.ts +++ b/projects/app/src/service/events/generateVector.ts @@ -30,31 +30,26 @@ export async function generateVector(): Promise { try { const data = await MongoDatasetTraining.findOneAndUpdate( { - lockTime: { $lte: addMinutes(new Date(), -1) }, - mode: TrainingModeEnum.chunk + mode: TrainingModeEnum.chunk, + lockTime: { $lte: addMinutes(new Date(), -1) } }, { lockTime: new Date() } - ) - .sort({ - weight: -1 - }) - .select({ - _id: 1, - userId: 1, - teamId: 1, - tmbId: 1, - datasetId: 1, - collectionId: 1, - q: 1, - a: 1, - chunkIndex: 1, - indexes: 1, - model: 1, - billId: 1 - }) - .lean(); + ).select({ + _id: 1, + userId: 1, + teamId: 1, + tmbId: 1, + datasetId: 1, + collectionId: 1, + q: 1, + a: 1, + chunkIndex: 1, + indexes: 1, + model: 1, + billId: 1 + }); // task preemption if (!data) { @@ -102,7 +97,7 @@ export async function generateVector(): Promise { try { // invalid data if (!data.q.trim()) { - await MongoDatasetTraining.findByIdAndDelete(data._id); + await data.deleteOne(); reduceQueue(); generateVector(); return; @@ -131,7 +126,7 @@ export async function generateVector(): Promise { }); // delete data from training - await MongoDatasetTraining.findByIdAndDelete(data._id); + await data.deleteOne(); reduceQueue(); generateVector(); diff --git a/projects/app/src/service/mongo.ts b/projects/app/src/service/mongo.ts index 954757d9917..13b5e91d6de 100644 --- a/projects/app/src/service/mongo.ts +++ b/projects/app/src/service/mongo.ts @@ -21,15 +21,15 @@ export function connectToDatabase(): Promise { initGlobal(); }, afterHook: async () => { - startMongoWatch(); - // cron - startCron(); // init system config getInitConfig(); - // init vector database, init root user await Promise.all([initVectorStore(), initRootUser()]); + startMongoWatch(); + // cron + startCron(); + // start queue startTrainingQueue(true); }