From b916183848287ca479374877578a3e3bda2ef6ee Mon Sep 17 00:00:00 2001 From: Archer <545436317@qq.com> Date: Wed, 29 Nov 2023 20:45:36 +0800 Subject: [PATCH] 4.6.3-alpha1 (#529) --- packages/global/common/string/time.ts | 3 + packages/global/core/app/utils.ts | 2 +- packages/global/core/chat/type.d.ts | 2 + packages/global/core/dataset/constant.ts | 57 +++-- packages/global/core/dataset/controller.d.ts | 1 + packages/global/core/dataset/type.d.ts | 15 +- .../service/core/dataset/collection/schema.ts | 39 +-- packages/service/core/dataset/data/schema.ts | 9 + .../service/core/dataset/training/schema.ts | 4 + projects/app/package.json | 2 +- projects/app/public/locales/en/common.json | 35 ++- projects/app/public/locales/zh/common.json | 35 ++- .../components/ChatBox/WholeResponseModal.tsx | 8 + .../src/components/common/MyRadio/index.tsx | 9 +- .../core/module/Flow/ModuleTemplateList.tsx | 11 +- projects/app/src/constants/dataset.ts | 7 +- .../app/src/global/core/api/datasetReq.d.ts | 13 +- .../app/src/global/core/dataset/type.d.ts | 3 +- projects/app/src/pages/api/admin/initv463.ts | 55 +++++ .../api/core/dataset/collection/create.ts | 23 +- .../api/core/dataset/collection/delById.ts | 4 +- .../api/core/dataset/collection/detail.ts | 10 +- .../pages/api/core/dataset/collection/list.ts | 5 +- .../api/core/dataset/collection/update.ts | 7 +- .../pages/api/core/dataset/data/insertData.ts | 1 + .../src/pages/api/core/dataset/data/list.ts | 4 +- .../pages/api/core/dataset/data/pushData.ts | 3 +- .../app/src/pages/api/core/dataset/delete.ts | 1 - .../src/pages/api/core/plugin/templates.ts | 2 +- .../app/src/pages/api/v1/chat/completions.ts | 2 +- .../detail/components/SimpleEdit/index.tsx | 1 + .../detail/components/CollectionCard.tsx | 8 +- .../dataset/detail/components/DataCard.tsx | 232 ++++++++++++++---- .../detail/components/Import/FileSelect.tsx | 22 +- .../detail/components/Import/ImportModal.tsx | 16 +- .../detail/components/Import/Provider.tsx | 12 +- .../detail/components/InputDataModal.tsx | 6 +- .../service/core/dataset/data/controller.ts | 2 + .../app/src/service/core/dataset/data/pg.ts | 8 +- .../app/src/service/events/generateVector.ts | 2 + .../service/moduleDispatch/dataset/search.ts | 3 +- .../support/permission/auth/dataset.ts | 2 +- projects/app/src/web/core/dataset/api.ts | 5 +- 43 files changed, 511 insertions(+), 180 deletions(-) create mode 100644 packages/global/common/string/time.ts create mode 100644 projects/app/src/pages/api/admin/initv463.ts diff --git a/packages/global/common/string/time.ts b/packages/global/common/string/time.ts new file mode 100644 index 00000000000..fc80c93e549 --- /dev/null +++ b/packages/global/common/string/time.ts @@ -0,0 +1,3 @@ +import dayjs from 'dayjs'; + +export const formatTime2YMDHM = (time: Date) => dayjs(time).format('YYYY-MM-DD HH:mm'); diff --git a/packages/global/core/app/utils.ts b/packages/global/core/app/utils.ts index e663ae20b6f..dbe46aab8f6 100644 --- a/packages/global/core/app/utils.ts +++ b/packages/global/core/app/utils.ts @@ -77,7 +77,7 @@ export const appModules2Form = ({ ); defaultAppForm.aiSettings.quotePrompt = findInputValueByKey( module.inputs, - ModuleInputKeyEnum.aiChatQuoteTemplate + ModuleInputKeyEnum.aiChatQuotePrompt ); } else if (module.flowType === FlowNodeTypeEnum.datasetSearchNode) { defaultAppForm.dataset.datasets = findInputValueByKey( diff --git a/packages/global/core/chat/type.d.ts b/packages/global/core/chat/type.d.ts index bcad0f51e92..61daac17690 100644 --- a/packages/global/core/chat/type.d.ts +++ b/packages/global/core/chat/type.d.ts @@ -4,6 +4,7 @@ import { ChatRoleEnum, ChatSourceEnum } from './constants'; import { FlowNodeTypeEnum } from '../module/node/constant'; import { ModuleOutputKeyEnum } from '../module/constants'; import { AppSchema } from '../app/type'; +import { DatasetSearchModeEnum } from '../dataset/constant'; export type ChatSchema = { _id: string; @@ -94,6 +95,7 @@ export type moduleDispatchResType = { // dataset search similarity?: number; limit?: number; + searchMode?: `${DatasetSearchModeEnum}`; // cq cqList?: ClassifyQuestionAgentItemType[]; diff --git a/packages/global/core/dataset/constant.ts b/packages/global/core/dataset/constant.ts index 2f12a97288a..560c23b973a 100644 --- a/packages/global/core/dataset/constant.ts +++ b/packages/global/core/dataset/constant.ts @@ -1,5 +1,6 @@ export const PgDatasetTableName = 'modeldata'; +/* ------------ dataset -------------- */ export enum DatasetTypeEnum { folder = 'folder', dataset = 'dataset' @@ -14,28 +15,45 @@ export const DatasetTypeMap = { } }; +/* ------------ collection -------------- */ export enum DatasetCollectionTypeEnum { - file = 'file', folder = 'folder', + file = 'file', link = 'link', virtual = 'virtual' } - export const DatasetCollectionTypeMap = { - [DatasetCollectionTypeEnum.file]: { - name: 'dataset.file' - }, [DatasetCollectionTypeEnum.folder]: { - name: 'dataset.folder' + name: 'core.dataset.folder' + }, + [DatasetCollectionTypeEnum.file]: { + name: 'core.dataset.file' }, [DatasetCollectionTypeEnum.link]: { - name: 'dataset.link' + name: 'core.dataset.link' }, [DatasetCollectionTypeEnum.virtual]: { - name: 'dataset.Virtual File' + name: 'core.dataset.Virtual File' + } +}; +export enum DatasetCollectionTrainingModeEnum { + manual = 'manual', + chunk = 'chunk', + qa = 'qa' +} +export const DatasetCollectionTrainingTypeMap = { + [DatasetCollectionTrainingModeEnum.manual]: { + label: 'core.dataset.collection.training.type manual' + }, + [DatasetCollectionTrainingModeEnum.chunk]: { + label: 'core.dataset.collection.training.type chunk' + }, + [DatasetCollectionTrainingModeEnum.qa]: { + label: 'core.dataset.collection.training.type qa' } }; +/* ------------ data -------------- */ export enum DatasetDataIndexTypeEnum { chunk = 'chunk', qa = 'qa', @@ -61,31 +79,22 @@ export const DatasetDataIndexTypeMap = { } }; +/* ------------ training -------------- */ export enum TrainingModeEnum { - 'chunk' = 'chunk', - 'qa' = 'qa' - // 'hypothetical' = 'hypothetical', - // 'summary' = 'summary', - // 'multipleIndex' = 'multipleIndex' + chunk = 'chunk', + qa = 'qa' } + export const TrainingTypeMap = { [TrainingModeEnum.chunk]: { - name: 'chunk' + label: 'core.dataset.training.type chunk' }, [TrainingModeEnum.qa]: { - name: 'qa' + label: 'core.dataset.training.type qa' } - // [TrainingModeEnum.hypothetical]: { - // name: 'hypothetical' - // }, - // [TrainingModeEnum.summary]: { - // name: 'summary' - // }, - // [TrainingModeEnum.multipleIndex]: { - // name: 'multipleIndex' - // } }; +/* ------------ search -------------- */ export enum DatasetSearchModeEnum { embedding = 'embedding', embeddingReRank = 'embeddingReRank', diff --git a/packages/global/core/dataset/controller.d.ts b/packages/global/core/dataset/controller.d.ts index a83607de916..99867174477 100644 --- a/packages/global/core/dataset/controller.d.ts +++ b/packages/global/core/dataset/controller.d.ts @@ -5,6 +5,7 @@ export type CreateDatasetDataProps = { tmbId: string; datasetId: string; collectionId: string; + chunkIndex?: number; q: string; a?: string; indexes?: Omit[]; diff --git a/packages/global/core/dataset/type.d.ts b/packages/global/core/dataset/type.d.ts index 391be8cc553..be6b04d9e2e 100644 --- a/packages/global/core/dataset/type.d.ts +++ b/packages/global/core/dataset/type.d.ts @@ -27,19 +27,18 @@ export type DatasetSchemaType = { export type DatasetCollectionSchemaType = { _id: string; - userId: string; teamId: string; tmbId: string; datasetId: string; parentId?: string; name: string; type: `${DatasetCollectionTypeEnum}`; + createTime: Date; updateTime: Date; - metadata: { - fileId?: string; - rawLink?: string; - pgCollectionId?: string; - }; + trainingType: `${TrainingModeEnum}`; + chunkSize: number; + fileId?: string; + rawLink?: string; }; export type DatasetDataIndexItemType = { @@ -57,6 +56,8 @@ export type DatasetDataSchemaType = { collectionId: string; datasetId: string; collectionId: string; + chunkIndex: number; + updateTime: Date; q: string; // large chunks or question a: string; // answer or custom content fullTextToken: string; @@ -78,6 +79,7 @@ export type DatasetTrainingSchemaType = { prompt: string; q: string; a: string; + chunkIndex: number; indexes: Omit[]; }; @@ -101,6 +103,7 @@ export type DatasetCollectionItemType = CollectionWithDatasetType & { canWrite: boolean; sourceName: string; sourceId?: string; + file?: DatasetFileSchema; }; /* ================= data ===================== */ diff --git a/packages/service/core/dataset/collection/schema.ts b/packages/service/core/dataset/collection/schema.ts index 4aae6dfc1ba..0eb48aa16d9 100644 --- a/packages/service/core/dataset/collection/schema.ts +++ b/packages/service/core/dataset/collection/schema.ts @@ -1,7 +1,10 @@ import { connectionMongo, type Model } from '../../../common/mongo'; const { Schema, model, models } = connectionMongo; import { DatasetCollectionSchemaType } from '@fastgpt/global/core/dataset/type.d'; -import { DatasetCollectionTypeMap } from '@fastgpt/global/core/dataset/constant'; +import { + DatasetCollectionTrainingTypeMap, + DatasetCollectionTypeMap +} from '@fastgpt/global/core/dataset/constant'; import { DatasetCollectionName } from '../schema'; import { TeamCollectionName, @@ -45,24 +48,32 @@ const DatasetCollectionSchema = new Schema({ enum: Object.keys(DatasetCollectionTypeMap), required: true }, + createTime: { + type: Date, + default: () => new Date() + }, updateTime: { type: Date, default: () => new Date() }, + trainingType: { + type: String, + enum: Object.keys(DatasetCollectionTrainingTypeMap), + required: true + }, + chunkSize: { + type: Number, + required: true + }, + fileId: { + type: Schema.Types.ObjectId, + ref: 'dataset.files' + }, + rawLink: { + type: String + }, metadata: { - type: { - fileId: { - type: Schema.Types.ObjectId, - ref: 'dataset.files' - }, - rawLink: { - type: String - }, - // 451 初始化 - pgCollectionId: { - type: String - } - }, + type: Object, default: {} } }); diff --git a/packages/service/core/dataset/data/schema.ts b/packages/service/core/dataset/data/schema.ts index d79dfbbff64..e11d1634cd1 100644 --- a/packages/service/core/dataset/data/schema.ts +++ b/packages/service/core/dataset/data/schema.ts @@ -70,6 +70,15 @@ const DatasetDataSchema = new Schema({ } ], default: [] + }, + // metadata + updateTime: { + type: Date, + default: () => new Date() + }, + chunkIndex: { + type: Number, + default: 0 } }); diff --git a/packages/service/core/dataset/training/schema.ts b/packages/service/core/dataset/training/schema.ts index d68f570dacc..7f971824108 100644 --- a/packages/service/core/dataset/training/schema.ts +++ b/packages/service/core/dataset/training/schema.ts @@ -75,6 +75,10 @@ const TrainingDataSchema = new Schema({ type: String, default: '' }, + chunkIndex: { + type: Number, + default: 0 + }, indexes: { type: [ { diff --git a/projects/app/package.json b/projects/app/package.json index 01de1ee192a..97b0e417668 100644 --- a/projects/app/package.json +++ b/projects/app/package.json @@ -1,6 +1,6 @@ { "name": "app", - "version": "4.6.2", + "version": "4.6.3", "private": false, "scripts": { "dev": "next dev", diff --git a/projects/app/public/locales/en/common.json b/projects/app/public/locales/en/common.json index 2452292555c..880f52a2908 100644 --- a/projects/app/public/locales/en/common.json +++ b/projects/app/public/locales/en/common.json @@ -266,15 +266,39 @@ "Search Top K": "Top K", "Set Empty Result Tip": ",Response empty text", "Similarity": "Similarity", + "Sync Time": "Update Time", + "Virtual File": "Virtual File", + "collection": { + "metadata": { + "Chunk Size": "Chunk Size", + "Createtime": "Create Time", + "Read Metadata": "Read Metadata", + "Training Type": "Training Type", + "Updatetime": "Update Time", + "metadata": "Metadata", + "read source": "Read Source", + "source": "Source", + "source name": "Source Name", + "source size": "Source Size" + }, + "training": { + "type chunk": "Chunk", + "type manual": "Manual", + "type qa": "QA" + } + }, "data": { "Edit": "Edit Data", "data is deleted": "Data is deleted", "id": "Data ID" }, + "file": "File", + "folder": "Folder", "import": { "Ideal chunk length": "Ideal chunk length", "Ideal chunk length Tips": "Segment by end symbol. We recommend that your document should be properly punctuated to ensure that each complete sentence length does not exceed this value \n Chinese document recommended 400~1000\n English document recommended 600~1200" }, + "link": "Link", "search": { "Empty result response": "Empty Response", "Empty result response Tips": "If you fill in the content, if no suitable content is found, you will directly reply to the content.", @@ -289,7 +313,8 @@ "embedding desc": "Direct vector topk correlation query ", "embeddingReRank": "Enhanced semantic retrieval ", "embeddingReRank desc": "Sort using Rerank after overperforming vector topk queries " - } + }, + "search mode": "Search Mode" }, "test": { "Test": "Start", @@ -300,6 +325,10 @@ "test history": "Test History", "test result placeholder": "The test results will be presented here", "test result tip": "The contents of the knowledge base are sorted according to their similarity to the test text, and you can adjust the corresponding text according to the test results. Note: The data in the test record may have been modified, clicking on a test data will show the latest data." + }, + "training": { + "type chunk": "Chunk", + "type qa": "QA" } }, "module": { @@ -693,9 +722,9 @@ "wallet": { "bill": { "Audio Speech": "Audio Speech", + "ReRank": "ReRank", "Whisper": "Whisper", - "bill username": "User", - "ReRank": "ReRank" + "bill username": "User" } } } diff --git a/projects/app/public/locales/zh/common.json b/projects/app/public/locales/zh/common.json index 2f8af1f69f7..2a8692851bd 100644 --- a/projects/app/public/locales/zh/common.json +++ b/projects/app/public/locales/zh/common.json @@ -266,15 +266,39 @@ "Search Top K": "单次搜索数量", "Set Empty Result Tip": ",未搜索到内容时回复指定内容", "Similarity": "相似度", + "Sync Time": "最后更新时间", + "Virtual File": "虚拟文件", + "collection": { + "metadata": { + "Chunk Size": "分割大小", + "Createtime": "创建时间", + "Read Metadata": "查看元数据", + "Training Type": "训练模式", + "Updatetime": "更新时间", + "metadata": "元数据", + "read source": "查看原始内容", + "source": "数据来源", + "source name": "来源名", + "source size": "来源大小" + }, + "training": { + "type manual": "手动", + "type chunk": "直接分段", + "type qa": "问答拆分" + } + }, "data": { "Edit": "编辑数据", "data is deleted": "该数据已被删除", "id": "数据ID" }, + "file": "文件", + "folder": "目录", "import": { "Ideal chunk length": "理想分块长度", "Ideal chunk length Tips": "按结束符号进行分段。我们建议您的文档应合理的使用标点符号,以确保每个完整的句子长度不要超过该值\n中文文档建议400~1000\n英文文档建议600~1200" }, + "link": "链接", "search": { "Empty result response": "空搜索回复", "Empty result response Tips": "若填写该内容,没有搜索到合适内容时,将直接回复填写的内容。", @@ -289,7 +313,8 @@ "embedding desc": "直接进行向量 topk 相关性查询", "embeddingReRank": "增强语义检索", "embeddingReRank desc": "超额进行向量 topk 查询后再使用 Rerank 进行排序" - } + }, + "search mode": "检索模式" }, "test": { "Test": "测试", @@ -300,6 +325,10 @@ "test history": "测试历史", "test result placeholder": "测试结果将在这里展示", "test result tip": "根据知识库内容与测试文本的相似度进行排序,你可以根据测试结果调整对应的文本。\n注意:测试记录中的数据可能已经被修改过,点击某条测试数据后将展示最新的数据。" + }, + "training": { + "type chunk": "直接分段", + "type qa": "问答拆分" } }, "module": { @@ -693,9 +722,9 @@ "wallet": { "bill": { "Audio Speech": "语音播报", + "ReRank": "结果重排", "Whisper": "语音输入", - "bill username": "用户", - "ReRank": "结果重排" + "bill username": "用户" } } } diff --git a/projects/app/src/components/ChatBox/WholeResponseModal.tsx b/projects/app/src/components/ChatBox/WholeResponseModal.tsx index 9ec503293d3..a2d173c6caa 100644 --- a/projects/app/src/components/ChatBox/WholeResponseModal.tsx +++ b/projects/app/src/components/ChatBox/WholeResponseModal.tsx @@ -10,6 +10,7 @@ import MyTooltip from '../MyTooltip'; import { QuestionOutlineIcon } from '@chakra-ui/icons'; import { formatPrice } from '@fastgpt/global/support/wallet/bill/tools'; import Markdown from '../Markdown'; +import { DatasetSearchModeMap } from '@fastgpt/global/core/dataset/constant'; function Row({ label, value }: { label: string; value?: string | number }) { const theme = useTheme(); @@ -127,6 +128,13 @@ const WholeResponseModal = ({ )} {/* dataset search */} + {activeModule?.searchMode && ( + + )} diff --git a/projects/app/src/components/common/MyRadio/index.tsx b/projects/app/src/components/common/MyRadio/index.tsx index db0bca123ff..ff83f341355 100644 --- a/projects/app/src/components/common/MyRadio/index.tsx +++ b/projects/app/src/components/common/MyRadio/index.tsx @@ -36,16 +36,17 @@ const MyRadio = ({ border={theme.borders.sm} borderWidth={'1.5px'} borderRadius={'md'} - bg={'myWhite.300'} position={'relative'} {...(value === item.value ? { - borderColor: 'myBlue.700' + borderColor: 'myBlue.500', + bg: 'myBlue.100' } : { + bg: 'myWhite.300', _hover: { - bg: 'myBlue.100', - borderColor: 'myBlue.600' + bg: '#f5f8ff', + borderColor: '#b2ccff' } })} _after={{ diff --git a/projects/app/src/components/core/module/Flow/ModuleTemplateList.tsx b/projects/app/src/components/core/module/Flow/ModuleTemplateList.tsx index 45a4914ee0e..b1769acd472 100644 --- a/projects/app/src/components/core/module/Flow/ModuleTemplateList.tsx +++ b/projects/app/src/components/core/module/Flow/ModuleTemplateList.tsx @@ -1,14 +1,5 @@ import React, { useCallback, useMemo } from 'react'; -import { - Box, - Flex, - Accordion, - AccordionItem, - AccordionButton, - AccordionPanel, - AccordionIcon, - useTheme -} from '@chakra-ui/react'; +import { Box, Flex } from '@chakra-ui/react'; import type { FlowModuleTemplateType, moduleTemplateListType diff --git a/projects/app/src/constants/dataset.ts b/projects/app/src/constants/dataset.ts index 1303e4c425a..3f05385b81a 100644 --- a/projects/app/src/constants/dataset.ts +++ b/projects/app/src/constants/dataset.ts @@ -24,7 +24,6 @@ export const defaultDatasetDetail: DatasetItemType = { export const defaultCollectionDetail: DatasetCollectionItemType = { _id: '', - userId: '', teamId: '', tmbId: '', datasetId: { @@ -46,8 +45,10 @@ export const defaultCollectionDetail: DatasetCollectionItemType = { name: '', type: 'file', updateTime: new Date(), - metadata: {}, canWrite: false, sourceName: '', - sourceId: '' + sourceId: '', + createTime: new Date(), + trainingType: 'chunk', + chunkSize: 0 }; diff --git a/projects/app/src/global/core/api/datasetReq.d.ts b/projects/app/src/global/core/api/datasetReq.d.ts index 1872e1cc31d..f8e156d0bd8 100644 --- a/projects/app/src/global/core/api/datasetReq.d.ts +++ b/projects/app/src/global/core/api/datasetReq.d.ts @@ -1,4 +1,8 @@ -import { DatasetCollectionTypeEnum, DatasetTypeEnum } from '@fastgpt/global/core/dataset/constant'; +import { + DatasetCollectionTrainingModeEnum, + DatasetCollectionTypeEnum, + DatasetTypeEnum +} from '@fastgpt/global/core/dataset/constant'; import type { RequestPaging } from '@/types'; import { TrainingModeEnum } from '@fastgpt/global/core/dataset/constant'; import type { SearchTestItemType } from '@/types/core/dataset'; @@ -31,14 +35,15 @@ export type CreateDatasetCollectionParams = { parentId?: string; name: string; type: `${DatasetCollectionTypeEnum}`; - metadata?: DatasetCollectionSchemaType['metadata']; - updateTime?: string; + trainingType?: `${DatasetCollectionTrainingModeEnum}`; + chunkSize?: number; + fileId?: string; + rawLink?: string; }; export type UpdateDatasetCollectionParams = { id: string; parentId?: string; name?: string; - metadata?: DatasetCollectionSchemaType['metadata']; }; /* ==== data ===== */ diff --git a/projects/app/src/global/core/dataset/type.d.ts b/projects/app/src/global/core/dataset/type.d.ts index bb8a030737a..e576acc96e4 100644 --- a/projects/app/src/global/core/dataset/type.d.ts +++ b/projects/app/src/global/core/dataset/type.d.ts @@ -16,7 +16,8 @@ export type DatasetCollectionsListItemType = { updateTime: Date; dataAmount: number; trainingAmount: number; - metadata: DatasetCollectionSchemaType['metadata']; + fileId?: string; + rawLink?: string; canWrite: boolean; }; diff --git a/projects/app/src/pages/api/admin/initv463.ts b/projects/app/src/pages/api/admin/initv463.ts new file mode 100644 index 00000000000..b64423c4967 --- /dev/null +++ b/projects/app/src/pages/api/admin/initv463.ts @@ -0,0 +1,55 @@ +import type { NextApiRequest, NextApiResponse } from 'next'; +import { jsonRes } from '@fastgpt/service/common/response'; +import { connectToDatabase } from '@/service/mongo'; +import { authCert } from '@fastgpt/service/support/permission/auth/common'; +import { MongoDatasetData } from '@fastgpt/service/core/dataset/data/schema'; +import { MongoDatasetCollection } from '@fastgpt/service/core/dataset/collection/schema'; +import { TrainingModeEnum } from '@fastgpt/global/core/dataset/constant'; + +let success = 0; +/* pg 中的数据搬到 mongo dataset.datas 中,并做映射 */ +export default async function handler(req: NextApiRequest, res: NextApiResponse) { + try { + const { limit = 50 } = req.body as { limit: number }; + await authCert({ req, authRoot: true }); + await connectToDatabase(); + success = 0; + + await MongoDatasetCollection.updateMany({}, [ + { + $set: { + createTime: '$updateTime', + trainingType: { + $cond: { + if: { $ifNull: ['$a', false] }, + then: TrainingModeEnum.qa, + else: TrainingModeEnum.chunk + } + }, + chunkSize: 0, + fileId: '$metadata.fileId', + rawLink: '$metadata.rawLink' + } + } + ]); + + await MongoDatasetData.updateMany( + {}, + { + chunkIndex: 0, + updateTime: new Date() + } + ); + + jsonRes(res, { + message: 'success' + }); + } catch (error) { + console.log(error); + + jsonRes(res, { + code: 500, + error + }); + } +} diff --git a/projects/app/src/pages/api/core/dataset/collection/create.ts b/projects/app/src/pages/api/core/dataset/collection/create.ts index 378f0534fd9..e0933413d29 100644 --- a/projects/app/src/pages/api/core/dataset/collection/create.ts +++ b/projects/app/src/pages/api/core/dataset/collection/create.ts @@ -6,8 +6,11 @@ import { jsonRes } from '@fastgpt/service/common/response'; import { connectToDatabase } from '@/service/mongo'; import type { CreateDatasetCollectionParams } from '@/global/core/api/datasetReq.d'; import { MongoDatasetCollection } from '@fastgpt/service/core/dataset/collection/schema'; -import { DatasetCollectionTypeEnum } from '@fastgpt/global/core/dataset/constant'; -import { getCollectionUpdateTime } from '@fastgpt/service/core/dataset/collection/utils'; +import { + TrainingModeEnum, + DatasetCollectionTypeEnum, + DatasetCollectionTrainingModeEnum +} from '@fastgpt/global/core/dataset/constant'; import { authUserNotVisitor } from '@fastgpt/service/support/permission/auth/user'; import { authDataset } from '@fastgpt/service/support/permission/auth/dataset'; @@ -45,7 +48,10 @@ export async function createOneCollection({ parentId, datasetId, type, - metadata = {}, + trainingType = DatasetCollectionTrainingModeEnum.manual, + chunkSize = 0, + fileId, + rawLink, teamId, tmbId }: CreateDatasetCollectionParams & { teamId: string; tmbId: string }) { @@ -56,8 +62,10 @@ export async function createOneCollection({ datasetId, parentId: parentId || null, type, - metadata, - updateTime: getCollectionUpdateTime({ name }) + trainingType, + chunkSize, + fileId, + rawLink }); // create default collection @@ -94,7 +102,8 @@ export function createDefaultCollection({ datasetId, parentId, type: DatasetCollectionTypeEnum.virtual, - updateTime: new Date('2099'), - metadata: {} + trainingType: DatasetCollectionTrainingModeEnum.manual, + chunkSize: 0, + updateTime: new Date('2099') }); } diff --git a/projects/app/src/pages/api/core/dataset/collection/delById.ts b/projects/app/src/pages/api/core/dataset/collection/delById.ts index a3be7461d5e..660c1302084 100644 --- a/projects/app/src/pages/api/core/dataset/collection/delById.ts +++ b/projects/app/src/pages/api/core/dataset/collection/delById.ts @@ -42,10 +42,10 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse< // delete file await Promise.all( collections.map((collection) => { - if (!collection.metadata?.fileId) return; + if (!collection?.fileId) return; return delFileById({ bucketName: BucketNameEnum.dataset, - fileId: collection.metadata.fileId + fileId: collection.fileId }); }) ); diff --git a/projects/app/src/pages/api/core/dataset/collection/detail.ts b/projects/app/src/pages/api/core/dataset/collection/detail.ts index 73ba162d4bf..737eb6b2413 100644 --- a/projects/app/src/pages/api/core/dataset/collection/detail.ts +++ b/projects/app/src/pages/api/core/dataset/collection/detail.ts @@ -6,6 +6,8 @@ import { jsonRes } from '@fastgpt/service/common/response'; import { connectToDatabase } from '@/service/mongo'; import { authDatasetCollection } from '@fastgpt/service/support/permission/auth/dataset'; import { DatasetCollectionItemType } from '@fastgpt/global/core/dataset/type'; +import { BucketNameEnum } from '@fastgpt/global/common/file/constants'; +import { getFileById } from '@fastgpt/service/common/file/gridfs/controller'; export default async function handler(req: NextApiRequest, res: NextApiResponse) { try { @@ -24,12 +26,18 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse< per: 'r' }); + // get file + const file = collection?.fileId + ? await getFileById({ bucketName: BucketNameEnum.dataset, fileId: collection.fileId }) + : undefined; + jsonRes(res, { data: { ...collection, canWrite, sourceName: collection?.name, - sourceId: collection?.metadata?.fileId || collection?.metadata?.rawLink + sourceId: collection?.fileId || collection?.rawLink, + file } }); } catch (err) { diff --git a/projects/app/src/pages/api/core/dataset/collection/list.ts b/projects/app/src/pages/api/core/dataset/collection/list.ts index 9b06ea4c8f7..91cb0878d5a 100644 --- a/projects/app/src/pages/api/core/dataset/collection/list.ts +++ b/projects/app/src/pages/api/core/dataset/collection/list.ts @@ -115,9 +115,10 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse< name: 1, type: 1, updateTime: 1, - trainingAmount: { $size: '$trainings' }, dataAmount: { $size: '$datas' }, - metadata: 1 + trainingAmount: { $size: '$trainings' }, + fileId: 1, + rawLink: 1 } }, { diff --git a/projects/app/src/pages/api/core/dataset/collection/update.ts b/projects/app/src/pages/api/core/dataset/collection/update.ts index 4b5c0641631..ac29bd7e35e 100644 --- a/projects/app/src/pages/api/core/dataset/collection/update.ts +++ b/projects/app/src/pages/api/core/dataset/collection/update.ts @@ -9,7 +9,7 @@ import { authDatasetCollection } from '@fastgpt/service/support/permission/auth/ export default async function handler(req: NextApiRequest, res: NextApiResponse) { try { await connectToDatabase(); - const { id, parentId, name, metadata = {} } = req.body as UpdateDatasetCollectionParams; + const { id, parentId, name } = req.body as UpdateDatasetCollectionParams; if (!id) { throw new Error('缺少参数'); @@ -23,11 +23,6 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse< ...(name && { name, updateTime: getCollectionUpdateTime({ name }) }) }; - // 将metadata的每个字段添加到updateFields中 - for (const [key, value] of Object.entries(metadata)) { - updateFields[`metadata.${key}`] = value; - } - await MongoDatasetCollection.findByIdAndUpdate(id, { $set: updateFields }); diff --git a/projects/app/src/pages/api/core/dataset/data/insertData.ts b/projects/app/src/pages/api/core/dataset/data/insertData.ts index f7de1e63e55..904e0d36487 100644 --- a/projects/app/src/pages/api/core/dataset/data/insertData.ts +++ b/projects/app/src/pages/api/core/dataset/data/insertData.ts @@ -76,6 +76,7 @@ export default withNextCors(async function handler(req: NextApiRequest, res: Nex collectionId, q: formatQ, a: formatA, + chunkIndex: 0, model: vectorModelData.model, indexes: formatIndexes }); diff --git a/projects/app/src/pages/api/core/dataset/data/list.ts b/projects/app/src/pages/api/core/dataset/data/list.ts index d4707b0a4bf..088c20839b8 100644 --- a/projects/app/src/pages/api/core/dataset/data/list.ts +++ b/projects/app/src/pages/api/core/dataset/data/list.ts @@ -32,8 +32,8 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse< }; const [data, total] = await Promise.all([ - MongoDatasetData.find(match, '_id datasetId collectionId q a indexes') - .sort({ _id: -1 }) + MongoDatasetData.find(match, '_id datasetId collectionId q a indexes') + .sort({ chunkIndex: 1, updateTime: -1 }) .skip((pageNum - 1) * pageSize) .limit(pageSize) .lean(), diff --git a/projects/app/src/pages/api/core/dataset/data/pushData.ts b/projects/app/src/pages/api/core/dataset/data/pushData.ts index 3329bcd6ed9..f484dddfb71 100644 --- a/projects/app/src/pages/api/core/dataset/data/pushData.ts +++ b/projects/app/src/pages/api/core/dataset/data/pushData.ts @@ -125,7 +125,7 @@ export async function pushDataToDatasetCollection({ // 插入记录 const insertRes = await MongoDatasetTraining.insertMany( - filterResult.success.map((item) => ({ + filterResult.success.map((item, i) => ({ teamId, tmbId, datasetId, @@ -136,6 +136,7 @@ export async function pushDataToDatasetCollection({ model, q: item.q, a: item.a, + chunkIndex: i, indexes: item.indexes })) ); diff --git a/projects/app/src/pages/api/core/dataset/delete.ts b/projects/app/src/pages/api/core/dataset/delete.ts index 741bf2c9e9d..ad30359421b 100644 --- a/projects/app/src/pages/api/core/dataset/delete.ts +++ b/projects/app/src/pages/api/core/dataset/delete.ts @@ -4,7 +4,6 @@ import { connectToDatabase } from '@/service/mongo'; import { MongoDatasetTraining } from '@fastgpt/service/core/dataset/training/schema'; import { MongoDataset } from '@fastgpt/service/core/dataset/schema'; import { delDatasetFiles } from '@fastgpt/service/core/dataset/file/controller'; -import { Types } from '@fastgpt/service/common/mongo'; import { MongoDatasetCollection } from '@fastgpt/service/core/dataset/collection/schema'; import { authDataset } from '@fastgpt/service/support/permission/auth/dataset'; import { delDataByDatasetId } from '@/service/core/dataset/data/controller'; diff --git a/projects/app/src/pages/api/core/plugin/templates.ts b/projects/app/src/pages/api/core/plugin/templates.ts index 5e17ea19a76..e32b8cd4aa1 100644 --- a/projects/app/src/pages/api/core/plugin/templates.ts +++ b/projects/app/src/pages/api/core/plugin/templates.ts @@ -16,7 +16,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse< const [userPlugins, plusPlugins] = await Promise.all([ MongoPlugin.find({ teamId }).lean(), - GET('/core/plugin/getTemplates') + global.systemEnv.pluginBaseUrl ? GET('/core/plugin/getTemplates') : [] ]); const data: FlowModuleTemplateType[] = [ diff --git a/projects/app/src/pages/api/v1/chat/completions.ts b/projects/app/src/pages/api/v1/chat/completions.ts index d1acf10e9dc..2704ece78c6 100644 --- a/projects/app/src/pages/api/v1/chat/completions.ts +++ b/projects/app/src/pages/api/v1/chat/completions.ts @@ -142,7 +142,7 @@ export default withNextCors(async function handler(req: NextApiRequest, res: Nex responseDetail: detail, apikey, authType, - canWrite: false + canWrite: true }; } diff --git a/projects/app/src/pages/app/detail/components/SimpleEdit/index.tsx b/projects/app/src/pages/app/detail/components/SimpleEdit/index.tsx index ac8691e41d9..09442bffc34 100644 --- a/projects/app/src/pages/app/detail/components/SimpleEdit/index.tsx +++ b/projects/app/src/pages/app/detail/components/SimpleEdit/index.tsx @@ -144,6 +144,7 @@ function ConfigForm({ templateId: appDetail.simpleTemplateId, modules: appDetail.modules }); + reset(formVal); setTimeout(() => { setRefresh((state) => !state); diff --git a/projects/app/src/pages/dataset/detail/components/CollectionCard.tsx b/projects/app/src/pages/dataset/detail/components/CollectionCard.tsx index 77be8415d88..aa22f2288b3 100644 --- a/projects/app/src/pages/dataset/detail/components/CollectionCard.tsx +++ b/projects/app/src/pages/dataset/detail/components/CollectionCard.tsx @@ -36,7 +36,11 @@ import MyMenu from '@/components/MyMenu'; import { useEditTitle } from '@/web/common/hooks/useEditTitle'; import type { DatasetCollectionsListItemType } from '@/global/core/dataset/type.d'; import EmptyTip from '@/components/EmptyTip'; -import { FolderAvatarSrc, DatasetCollectionTypeEnum } from '@fastgpt/global/core/dataset/constant'; +import { + FolderAvatarSrc, + DatasetCollectionTypeEnum, + TrainingModeEnum +} from '@fastgpt/global/core/dataset/constant'; import { getCollectionIcon } from '@fastgpt/global/core/dataset/utils'; import EditFolderModal, { useEditFolder } from '../../component/EditFolderModal'; import { TabEnum } from '..'; @@ -347,7 +351,7 @@ const CollectionCard = () => { # {t('common.Name')} {t('dataset.collections.Data Amount')} - {t('common.Time')} + {t('core.dataset.Sync Time')} {t('common.Status')} diff --git a/projects/app/src/pages/dataset/detail/components/DataCard.tsx b/projects/app/src/pages/dataset/detail/components/DataCard.tsx index f97fda3dbd2..7c5ac396811 100644 --- a/projects/app/src/pages/dataset/detail/components/DataCard.tsx +++ b/projects/app/src/pages/dataset/detail/components/DataCard.tsx @@ -1,5 +1,20 @@ import React, { useCallback, useState, useRef, useMemo } from 'react'; -import { Box, Card, IconButton, Flex, Grid, Button } from '@chakra-ui/react'; +import { + Box, + Card, + IconButton, + Flex, + Grid, + Button, + useTheme, + Drawer, + DrawerBody, + DrawerFooter, + DrawerHeader, + DrawerOverlay, + DrawerContent, + useDisclosure +} from '@chakra-ui/react'; import { usePagination } from '@/web/common/hooks/usePagination'; import { getDatasetDataList, @@ -23,12 +38,23 @@ import { TabEnum } from '..'; import { useUserStore } from '@/web/support/user/useUserStore'; import { TeamMemberRoleEnum } from '@fastgpt/global/support/user/team/constant'; import { getDefaultIndex } from '@fastgpt/global/core/dataset/utils'; +import { useSystemStore } from '@/web/common/system/useSystemStore'; +import { + DatasetCollectionTypeMap, + DatasetCollectionTrainingTypeMap +} from '@fastgpt/global/core/dataset/constant'; +import { formatTime2YMDHM } from '@fastgpt/global/common/string/time'; +import { formatFileSize } from '@fastgpt/global/common/file/tools'; +import { getFileAndOpen } from '@/web/core/dataset/utils'; +import MyTooltip from '@/components/MyTooltip'; const DataCard = () => { const BoxRef = useRef(null); + const theme = useTheme(); const lastSearch = useRef(''); const router = useRouter(); const { userInfo } = useUserStore(); + const { isPc } = useSystemStore(); const { collectionId = '' } = router.query as { collectionId: string }; const { Loading, setIsLoading } = useLoading({ defaultLoading: true }); const { t } = useTranslation(); @@ -37,6 +63,7 @@ const DataCard = () => { const { openConfirm, ConfirmModal } = useConfirm({ content: t('dataset.Confirm to delete the data') }); + const { isOpen, onOpen, onClose } = useDisclosure(); const { data: datasetDataList, @@ -81,6 +108,43 @@ const DataCard = () => { [collection?.canWrite, userInfo?.team?.role] ); + const metadataList = useMemo( + () => + collection + ? [ + { + label: t('core.dataset.collection.metadata.source'), + value: t(DatasetCollectionTypeMap[collection.type]?.name) + }, + { + label: t('core.dataset.collection.metadata.source name'), + value: collection.file?.filename || collection?.rawLink || collection?.name + }, + { + label: t('core.dataset.collection.metadata.source size'), + value: collection.file ? formatFileSize(collection.file.length) : '-' + }, + { + label: t('core.dataset.collection.metadata.Createtime'), + value: formatTime2YMDHM(collection.createTime) + }, + { + label: t('core.dataset.collection.metadata.Updatetime'), + value: formatTime2YMDHM(collection.updateTime) + }, + { + label: t('core.dataset.collection.metadata.Training Type'), + value: t(DatasetCollectionTrainingTypeMap[collection.trainingType]?.label) + }, + { + label: t('core.dataset.collection.metadata.Chunk Size'), + value: collection.chunkSize || '-' + } + ] + : [], + [collection, t] + ); + return ( @@ -106,7 +170,7 @@ const DataCard = () => { { {canWrite && ( )} + {isPc && ( + + } + aria-label={''} + onClick={onOpen} + /> + + )} @@ -178,16 +253,23 @@ const DataCard = () => { gridTemplateColumns={['1fr', 'repeat(2,1fr)', 'repeat(3,1fr)', 'repeat(4,1fr)']} gridGap={4} > - {datasetDataList.map((item) => ( + {datasetDataList.map((item, index) => ( { if (!collection) return; setEditInputData({ @@ -198,57 +280,113 @@ const DataCard = () => { }); }} > + + + # {index + 1} + + + ID:{item._id} + + - + {item.q} - {item.a} + {item.a} + + + + + {item.q.length + (item.a?.length || 0)} + + + {canWrite && ( + } + variant={'base'} + colorScheme={'gray'} + aria-label={'delete'} + size={'xs'} + borderRadius={'md'} + _hover={{ color: 'red.600' }} + onClick={(e) => { + e.stopPropagation(); + openConfirm(async () => { + try { + setIsLoading(true); + await delOneDatasetDataById(item._id); + getData(pageNum); + } catch (error) { + toast({ + title: getErrText(error), + status: 'error' + }); + } + setIsLoading(false); + })(); + }} + /> + )} + - - - ID:{item._id} - - {canWrite && ( - } - variant={'base'} - colorScheme={'gray'} - aria-label={'delete'} - size={'xs'} - borderRadius={'md'} - _hover={{ color: 'red.600' }} - onClick={(e) => { - e.stopPropagation(); - openConfirm(async () => { - try { - setIsLoading(true); - await delOneDatasetDataById(item._id); - getData(pageNum); - } catch (error) { - toast({ - title: getErrText(error), - status: 'error' - }); - } - setIsLoading(false); - })(); - }} - /> - )} - ))} + {/* metadata drawer */} + + + + {t('core.dataset.collection.metadata.metadata')} + + + {metadataList.map((item) => ( + + + {item.label} + + {item.value} + + ))} + {collection?.sourceId && ( + + )} + + + + + + + + {total > pageSize && ( diff --git a/projects/app/src/pages/dataset/detail/components/Import/FileSelect.tsx b/projects/app/src/pages/dataset/detail/components/Import/FileSelect.tsx index d45001efef6..6ec9985a886 100644 --- a/projects/app/src/pages/dataset/detail/components/Import/FileSelect.tsx +++ b/projects/app/src/pages/dataset/detail/components/Import/FileSelect.tsx @@ -19,7 +19,6 @@ import { customAlphabet } from 'nanoid'; import dynamic from 'next/dynamic'; import MyTooltip from '@/components/MyTooltip'; import type { FetchResultItem } from '@fastgpt/global/common/plugin/types/pluginRes.d'; -import type { DatasetCollectionSchemaType } from '@fastgpt/global/core/dataset/type'; import { getErrText } from '@fastgpt/global/common/error/utils'; import { useDatasetStore } from '@/web/core/dataset/store/dataset'; import { getFileIcon } from '@fastgpt/global/common/file/icon'; @@ -40,7 +39,8 @@ export type FileItemType = { icon: string; tokens: number; // total tokens type: DatasetCollectionTypeEnum.file | DatasetCollectionTypeEnum.link; - metadata: DatasetCollectionSchemaType['metadata']; + fileId?: string; + rawLink?: string; }; export interface Props extends BoxProps { @@ -157,9 +157,7 @@ const FileSelect = ({ .join('\n')}`, chunks: filterData, type: DatasetCollectionTypeEnum.file, - metadata: { - fileId - } + fileId }; onPushFiles([fileItem]); @@ -195,9 +193,7 @@ const FileSelect = ({ text, tokens: splitRes.tokens, type: DatasetCollectionTypeEnum.file, - metadata: { - fileId - }, + fileId, chunks: splitRes.chunks.map((chunk) => ({ q: chunk, a: '' @@ -220,7 +216,7 @@ const FileSelect = ({ // link fetch const onUrlFetch = useCallback( (e: FetchResultItem[]) => { - const result: FileItemType[] = e.map(({ url, content }) => { + const result: FileItemType[] = e.map(({ url, content }) => { const splitRes = splitText2Chunks({ text: content, chunkLen, @@ -233,9 +229,7 @@ const FileSelect = ({ text: content, tokens: splitRes.tokens, type: DatasetCollectionTypeEnum.link, - metadata: { - rawLink: url - }, + rawLink: url, chunks: splitRes.chunks.map((chunk) => ({ q: chunk, a: '' @@ -277,9 +271,7 @@ const FileSelect = ({ text: content, tokens: splitRes.tokens, type: DatasetCollectionTypeEnum.file, - metadata: { - fileId: fileIds[0] - }, + fileId: fileIds[0], chunks: splitRes.chunks.map((chunk) => ({ q: chunk, a: '' diff --git a/projects/app/src/pages/dataset/detail/components/Import/ImportModal.tsx b/projects/app/src/pages/dataset/detail/components/Import/ImportModal.tsx index c71c95267b1..f50df67dcfb 100644 --- a/projects/app/src/pages/dataset/detail/components/Import/ImportModal.tsx +++ b/projects/app/src/pages/dataset/detail/components/Import/ImportModal.tsx @@ -11,7 +11,10 @@ import MyModal from '@/components/MyModal'; import Provider from './Provider'; import { useDatasetStore } from '@/web/core/dataset/store/dataset'; import { qaModelList } from '@/web/common/system/staticData'; -import { TrainingModeEnum } from '@fastgpt/global/core/dataset/constant'; +import { + DatasetCollectionTrainingModeEnum, + TrainingModeEnum +} from '@fastgpt/global/core/dataset/constant'; export enum ImportTypeEnum { chunk = 'chunk', @@ -43,19 +46,22 @@ const ImportData = ({ defaultChunkLen: vectorModel?.defaultToken || 500, chunkOverlapRatio: 0.2, unitPrice: vectorModel?.price || 0.2, - mode: TrainingModeEnum.chunk + mode: TrainingModeEnum.chunk, + collectionTrainingType: DatasetCollectionTrainingModeEnum.chunk }, [ImportTypeEnum.qa]: { defaultChunkLen: agentModel?.maxContext * 0.6 || 8000, chunkOverlapRatio: 0, unitPrice: agentModel?.price || 3, - mode: TrainingModeEnum.qa + mode: TrainingModeEnum.qa, + collectionTrainingType: DatasetCollectionTrainingModeEnum.qa }, [ImportTypeEnum.csv]: { - defaultChunkLen: vectorModel?.defaultToken || 500, + defaultChunkLen: 0, chunkOverlapRatio: 0, unitPrice: vectorModel?.price || 0.2, - mode: TrainingModeEnum.chunk + mode: TrainingModeEnum.chunk, + collectionTrainingType: DatasetCollectionTrainingModeEnum.manual } }; return map[importType]; diff --git a/projects/app/src/pages/dataset/detail/components/Import/Provider.tsx b/projects/app/src/pages/dataset/detail/components/Import/Provider.tsx index 5ed496a9146..8eac380cd67 100644 --- a/projects/app/src/pages/dataset/detail/components/Import/Provider.tsx +++ b/projects/app/src/pages/dataset/detail/components/Import/Provider.tsx @@ -15,7 +15,10 @@ import { formatPrice } from '@fastgpt/global/support/wallet/bill/tools'; import { splitText2Chunks } from '@fastgpt/global/common/string/textSplitter'; import { useToast } from '@/web/common/hooks/useToast'; import { getErrText } from '@fastgpt/global/common/error/utils'; -import { TrainingModeEnum } from '@fastgpt/global/core/dataset/constant'; +import { + DatasetCollectionTrainingModeEnum, + TrainingModeEnum +} from '@fastgpt/global/core/dataset/constant'; import { Box, Flex, Image, useTheme } from '@chakra-ui/react'; import { CloseIcon } from '@chakra-ui/icons'; import DeleteIcon, { hoverDeleteStyles } from '@/components/Icon/delete'; @@ -92,6 +95,7 @@ const Provider = ({ parentId, unitPrice, mode, + collectionTrainingType, vectorModel, agentModel, defaultChunkLen = 500, @@ -104,6 +108,7 @@ const Provider = ({ parentId: string; unitPrice: number; mode: `${TrainingModeEnum}`; + collectionTrainingType: `${DatasetCollectionTrainingModeEnum}`; vectorModel: string; agentModel: string; defaultChunkLen: number; @@ -150,7 +155,10 @@ const Provider = ({ parentId, name: file.filename, type: file.type, - metadata: file.metadata + fileId: file.fileId, + rawLink: file.rawLink, + chunkSize: chunkLen, + trainingType: collectionTrainingType }); // upload data diff --git a/projects/app/src/pages/dataset/detail/components/InputDataModal.tsx b/projects/app/src/pages/dataset/detail/components/InputDataModal.tsx index e2bb8f439ad..1f201bccc7e 100644 --- a/projects/app/src/pages/dataset/detail/components/InputDataModal.tsx +++ b/projects/app/src/pages/dataset/detail/components/InputDataModal.tsx @@ -427,7 +427,11 @@ export function RawSourceText({ {...props} > - + {sourceName || t('common.UnKnow Source')} diff --git a/projects/app/src/service/core/dataset/data/controller.ts b/projects/app/src/service/core/dataset/data/controller.ts index a51142530f8..e9e0030977f 100644 --- a/projects/app/src/service/core/dataset/data/controller.ts +++ b/projects/app/src/service/core/dataset/data/controller.ts @@ -22,6 +22,7 @@ export async function insertData2Dataset({ collectionId, q, a = '', + chunkIndex = 0, indexes, model }: CreateDatasetDataProps & { @@ -73,6 +74,7 @@ export async function insertData2Dataset({ q, a, fullTextToken: jiebaSplit({ text: qaStr }), + chunkIndex, indexes: indexes.map((item, i) => ({ ...item, dataId: result[i].insertId diff --git a/projects/app/src/service/core/dataset/data/pg.ts b/projects/app/src/service/core/dataset/data/pg.ts index cb28800bd48..3ce3b16b78b 100644 --- a/projects/app/src/service/core/dataset/data/pg.ts +++ b/projects/app/src/service/core/dataset/data/pg.ts @@ -270,7 +270,7 @@ export async function embeddingRecall({ { _id: { $in: filterRows.map((item) => item.collection_id) } }, - 'name metadata' + 'name fileId rawLink' ).lean(), MongoDatasetData.find( { @@ -297,7 +297,7 @@ export async function embeddingRecall({ datasetId: String(data.datasetId), collectionId: String(data.collectionId), sourceName: collection.name || '', - sourceId: collection.metadata?.fileId || collection.metadata?.rawLink, + sourceId: collection?.fileId || collection?.rawLink, score: item.score }; }) @@ -352,7 +352,7 @@ export async function fullTextRecall({ text, limit, datasetIds = [] }: SearchPro { _id: { $in: searchResults.map((item) => item.collectionId) } }, - '_id name metadata' + '_id name fileId rawLink' ); return { @@ -363,7 +363,7 @@ export async function fullTextRecall({ text, limit, datasetIds = [] }: SearchPro datasetId: String(item.datasetId), collectionId: String(item.collectionId), sourceName: collection?.name || '', - sourceId: collection?.metadata?.fileId || collection?.metadata?.rawLink, + sourceId: collection?.fileId || collection?.rawLink, q: item.q, a: item.a, indexes: item.indexes, diff --git a/projects/app/src/service/events/generateVector.ts b/projects/app/src/service/events/generateVector.ts index 9374e3a6d49..bb32e2ade10 100644 --- a/projects/app/src/service/events/generateVector.ts +++ b/projects/app/src/service/events/generateVector.ts @@ -50,6 +50,7 @@ export async function generateVector(): Promise { collectionId: 1, q: 1, a: 1, + chunkIndex: 1, indexes: 1, model: 1, billId: 1 @@ -134,6 +135,7 @@ export async function generateVector(): Promise { collectionId: data.collectionId, q: dataItem.q, a: dataItem.a, + chunkIndex: data.chunkIndex, indexes: dataItem.indexes, model: data.model }); diff --git a/projects/app/src/service/moduleDispatch/dataset/search.ts b/projects/app/src/service/moduleDispatch/dataset/search.ts index 646c9a31cd1..bd5a6021b45 100644 --- a/projects/app/src/service/moduleDispatch/dataset/search.ts +++ b/projects/app/src/service/moduleDispatch/dataset/search.ts @@ -65,7 +65,8 @@ export async function dispatchDatasetSearch( model: vectorModel.name, tokens: tokenLen, similarity, - limit + limit, + searchMode } }; } diff --git a/projects/app/src/service/support/permission/auth/dataset.ts b/projects/app/src/service/support/permission/auth/dataset.ts index 5c5f680e347..497a7bff74e 100644 --- a/projects/app/src/service/support/permission/auth/dataset.ts +++ b/projects/app/src/service/support/permission/auth/dataset.ts @@ -31,7 +31,7 @@ export async function authDatasetData({ datasetId: String(datasetData.datasetId), collectionId: String(datasetData.collectionId), sourceName: result.collection.name || '', - sourceId: result.collection.metadata?.fileId || result.collection.metadata?.rawLink, + sourceId: result.collection?.fileId || result.collection?.rawLink, isOwner: String(datasetData.tmbId) === result.tmbId, canWrite: result.canWrite }; diff --git a/projects/app/src/web/core/dataset/api.ts b/projects/app/src/web/core/dataset/api.ts index 08d47f8944b..d0eb37d387e 100644 --- a/projects/app/src/web/core/dataset/api.ts +++ b/projects/app/src/web/core/dataset/api.ts @@ -16,10 +16,7 @@ import type { InsertOneDatasetDataProps } from '@/global/core/dataset/api.d'; import type { PushDataResponse } from '@/global/core/api/datasetRes.d'; -import type { - DatasetCollectionItemType, - SearchDataResponseItemType -} from '@fastgpt/global/core/dataset/type'; +import type { DatasetCollectionItemType } from '@fastgpt/global/core/dataset/type'; import { DatasetTypeEnum } from '@fastgpt/global/core/dataset/constant'; import type { DatasetDataItemType } from '@fastgpt/global/core/dataset/type'; import type { DatasetCollectionsListItemType } from '@/global/core/dataset/type.d';