Skip to content

Commit

Permalink
perf: Mongo visutal field (#3464)
Browse files Browse the repository at this point in the history
* remve invalid code

* perf: team member visutal code

* perf: virtual search; perf: search test data

* fix: ts

* fix: image response headers

* perf: template code
  • Loading branch information
c121914yu committed Dec 26, 2024
1 parent 92eb927 commit 7e060fe
Show file tree
Hide file tree
Showing 57 changed files with 322 additions and 306 deletions.
9 changes: 8 additions & 1 deletion docSite/content/zh-cn/docs/development/openapi/dataset.md
Original file line number Diff line number Diff line change
Expand Up @@ -1424,7 +1424,11 @@ curl --location --request POST 'https://api.fastgpt.in/api/core/dataset/searchTe
"limit": 5000,
"similarity": 0,
"searchMode": "embedding",
"usingReRank": false
"usingReRank": false,
"datasetSearchUsingExtensionQuery": true,
"datasetSearchExtensionModel": "gpt-4o-mini",
"datasetSearchExtensionBg": ""
}'
```

Expand All @@ -1441,6 +1445,9 @@ curl --location --request POST 'https://api.fastgpt.in/api/core/dataset/searchTe
- similarity - 最低相关度(0~1,可选)
- searchMode - 搜索模式:embedding | fullTextRecall | mixedRecall
- usingReRank - 使用重排
- datasetSearchUsingExtensionQuery - 使用问题优化
- datasetSearchExtensionModel - 问题优化模型
- datasetSearchExtensionBg - 问题优化背景描述
{{% /alert %}}

{{< /markdownify >}}
Expand Down
7 changes: 5 additions & 2 deletions docSite/content/zh-cn/docs/development/upgrading/4817.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,5 +13,8 @@ weight: 807
1.
2. 新增 - LLM 模型参数支持关闭 max_tokens 和 temperature。
3. 新增 - 商业版支持后台配置自定义工作流变量,用于与业务系统鉴权打通。
4. 优化 - 知识库搜索参数,滑动条支持输入模式,可以更精准的控制。
5. 优化 - 可用模型展示
4. 新增 - 搜索测试接口支持问题优化。
5. 优化 - 知识库搜索参数,滑动条支持输入模式,可以更精准的控制。
6. 优化 - 可用模型展示
7. 优化 - Mongo 查询语句,增加 virtual 字段。
8. 修复 - 文件返回接口缺少 Content-Length 头。
1 change: 0 additions & 1 deletion packages/global/common/system/types/index.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ export type ExternalProviderWorkflowVarType = {
intro: string;
isOpen: boolean;
url?: string;
// value?: string;
};

/* fastgpt main */
Expand Down
4 changes: 2 additions & 2 deletions packages/global/core/app/type.d.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import type { FlowNodeTemplateType, StoreNodeItemType } from '../workflow/type/node';
import { AppTypeEnum } from './constants';
import { AppTemplateTypeEnum, AppTypeEnum } from './constants';
import { PermissionTypeEnum } from '../../support/permission/constant';
import {
NodeInputKeyEnum,
Expand Down Expand Up @@ -206,6 +206,6 @@ export type AppTemplateSchemaType = {

export type TemplateTypeSchemaType = {
typeName: string;
typeId: string;
typeId: AppTemplateTypeEnum;
typeOrder: number;
};
6 changes: 2 additions & 4 deletions packages/global/core/dataset/collection/utils.ts
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
import { DatasetCollectionTypeEnum, TrainingModeEnum, TrainingTypeMap } from '../constants';
import { CollectionWithDatasetType, DatasetCollectionSchemaType } from '../type';
import { DatasetCollectionSchemaType } from '../type';

export const getCollectionSourceData = (
collection?: CollectionWithDatasetType | DatasetCollectionSchemaType
) => {
export const getCollectionSourceData = (collection?: DatasetCollectionSchemaType) => {
return {
sourceId:
collection?.fileId ||
Expand Down
7 changes: 2 additions & 5 deletions packages/global/core/dataset/type.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -133,11 +133,8 @@ export type DatasetTrainingSchemaType = {
indexes: Omit<DatasetDataIndexItemType, 'dataId'>[];
};

export type CollectionWithDatasetType = Omit<DatasetCollectionSchemaType, 'datasetId'> & {
datasetId: DatasetSchemaType;
};
export type DatasetDataWithCollectionType = Omit<DatasetDataSchemaType, 'collectionId'> & {
collectionId: DatasetCollectionSchemaType;
export type CollectionWithDatasetType = DatasetCollectionSchemaType & {
dataset: DatasetSchemaType;
};

/* ================= dataset ===================== */
Expand Down
5 changes: 0 additions & 5 deletions packages/global/support/outLink/type.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -83,11 +83,6 @@ export type OutLinkSchema<T extends OutlinkAppType = undefined> = {
app: T;
};

// to handle MongoDB querying
export type OutLinkWithAppType = Omit<OutLinkSchema, 'appId'> & {
appId: AppSchema;
};

// Edit the Outlink
export type OutLinkEditType<T = undefined> = {
_id?: string;
Expand Down
9 changes: 3 additions & 6 deletions packages/global/support/permission/type.d.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import { UserModelSchema } from '../user/type';
import { RequireOnlyOne } from '../../common/type/utils';
import { TeamMemberWithUserSchema } from '../user/team/type';
import { TeamMemberSchema } from '../user/team/type';
import { AuthUserTypeEnum, PermissionKeyEnum, PerResourceTypeEnum } from './constant';
import { MemberGroupSchemaType } from './memberGroup/type';

Expand Down Expand Up @@ -31,11 +32,7 @@ export type ResourcePermissionType = {
}>;

export type ResourcePerWithTmbWithUser = Omit<ResourcePermissionType, 'tmbId'> & {
tmbId: TeamMemberWithUserSchema;
};

export type ResourcePerWithGroup = Omit<ResourcePermissionType, 'groupId'> & {
groupId: MemberGroupSchemaType;
tmbId: TeamMemberSchema & { user: UserModelSchema };
};

export type PermissionSchemaType = {
Expand Down
7 changes: 2 additions & 5 deletions packages/global/support/user/team/controller.d.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { PermissionValueType } from '../../permission/type';
import { TeamMemberRoleEnum } from './constant';
import { LafAccountType, TeamMemberSchema } from './type';
import { LafAccountType, TeamMemberSchema, ThirdPartyAccountType } from './type';

export type AuthTeamRoleProps = {
teamId: string;
Expand All @@ -11,15 +11,12 @@ export type CreateTeamProps = {
name: string;
avatar?: string;
defaultTeam?: boolean;
lafAccount?: LafAccountType;
memberName?: string;
};
export type UpdateTeamProps = {
export type UpdateTeamProps = Omit<ThirdPartyAccountType, 'externalWorkflowVariable'> & {
name?: string;
avatar?: string;
teamDomain?: string;
lafAccount?: LafAccountType;
openaiAccount?: OpenaiAccountType;
externalWorkflowVariable?: { key: string; value: string };
};

Expand Down
13 changes: 3 additions & 10 deletions packages/global/support/user/team/type.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -47,16 +47,9 @@ export type TeamMemberSchema = {
defaultTeam: boolean;
};

export type TeamMemberWithUserSchema = Omit<TeamMemberSchema, 'userId'> & {
userId: UserModelSchema;
};

export type TeamMemberWithTeamSchema = Omit<TeamMemberSchema, 'teamId'> & {
teamId: TeamSchema;
};

export type TeamMemberWithTeamAndUserSchema = Omit<TeamMemberWithTeamSchema, 'userId'> & {
userId: UserModelSchema;
export type TeamMemberWithTeamAndUserSchema = TeamMemberSchema & {
team: TeamSchema;
user: UserModelSchema;
};

export type TeamTmbItemType = {
Expand Down
34 changes: 6 additions & 28 deletions packages/service/core/dataset/collection/controller.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,7 @@ import {
} from '@fastgpt/global/core/dataset/constants';
import type { CreateDatasetCollectionParams } from '@fastgpt/global/core/dataset/api.d';
import { MongoDatasetCollection } from './schema';
import {
CollectionWithDatasetType,
DatasetCollectionSchemaType,
DatasetSchemaType
} from '@fastgpt/global/core/dataset/type';
import { DatasetCollectionSchemaType, DatasetSchemaType } from '@fastgpt/global/core/dataset/type';
import { MongoDatasetTraining } from '../training/schema';
import { MongoDatasetData } from '../data/schema';
import { delImgByRelatedId } from '../../../common/file/image/controller';
Expand Down Expand Up @@ -230,7 +226,7 @@ export const delCollectionRelatedSource = async ({
collections,
session
}: {
collections: (CollectionWithDatasetType | DatasetCollectionSchemaType)[];
collections: DatasetCollectionSchemaType[];
session: ClientSession;
}) => {
if (collections.length === 0) return;
Expand Down Expand Up @@ -264,7 +260,7 @@ export async function delCollection({
session,
delRelatedSource
}: {
collections: (CollectionWithDatasetType | DatasetCollectionSchemaType)[];
collections: DatasetCollectionSchemaType[];
session: ClientSession;
delRelatedSource: boolean;
}) {
Expand All @@ -274,16 +270,7 @@ export async function delCollection({

if (!teamId) return Promise.reject('teamId is not exist');

const datasetIds = Array.from(
new Set(
collections.map((item) => {
if (typeof item.datasetId === 'string') {
return String(item.datasetId);
}
return String(item.datasetId._id);
})
)
);
const datasetIds = Array.from(new Set(collections.map((item) => String(item.datasetId))));
const collectionIds = collections.map((item) => String(item._id));

// delete training data
Expand Down Expand Up @@ -324,7 +311,7 @@ export async function delOnlyCollection({
collections,
session
}: {
collections: (CollectionWithDatasetType | DatasetCollectionSchemaType)[];
collections: DatasetCollectionSchemaType[];
session: ClientSession;
}) {
if (collections.length === 0) return;
Expand All @@ -333,16 +320,7 @@ export async function delOnlyCollection({

if (!teamId) return Promise.reject('teamId is not exist');

const datasetIds = Array.from(
new Set(
collections.map((item) => {
if (typeof item.datasetId === 'string') {
return String(item.datasetId);
}
return String(item.datasetId._id);
})
)
);
const datasetIds = Array.from(new Set(collections.map((item) => String(item.datasetId))));
const collectionIds = collections.map((item) => String(item._id));

// delete training data
Expand Down
7 changes: 7 additions & 0 deletions packages/service/core/dataset/collection/schema.ts
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,13 @@ const DatasetCollectionSchema = new Schema({
}
});

DatasetCollectionSchema.virtual('dataset', {
ref: DatasetCollectionName,
localField: 'datasetId',
foreignField: '_id',
justOne: true
});

try {
// auth file
DatasetCollectionSchema.index({ teamId: 1, fileId: 1 });
Expand Down
4 changes: 2 additions & 2 deletions packages/service/core/dataset/collection/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ export const collectionTagsToTagLabel = async ({
};

export const syncCollection = async (collection: CollectionWithDatasetType) => {
const dataset = collection.datasetId;
const dataset = collection.dataset;

if (
collection.type !== DatasetCollectionTypeEnum.link &&
Expand Down Expand Up @@ -183,7 +183,7 @@ export const syncCollection = async (collection: CollectionWithDatasetType) => {
teamId: collection.teamId,
tmbId: collection.tmbId,
name: collection.name,
datasetId: collection.datasetId._id,
datasetId: collection.datasetId,
parentId: collection.parentId,
type: collection.type,

Expand Down
8 changes: 4 additions & 4 deletions packages/service/core/dataset/controller.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { CollectionWithDatasetType, DatasetSchemaType } from '@fastgpt/global/core/dataset/type';
import { DatasetSchemaType } from '@fastgpt/global/core/dataset/type';
import { MongoDatasetCollection } from './collection/schema';
import { MongoDataset } from './schema';
import { delCollectionRelatedSource } from './collection/controller';
Expand Down Expand Up @@ -49,9 +49,9 @@ export async function findDatasetAndAllChildren({
}

export async function getCollectionWithDataset(collectionId: string) {
const data = (await MongoDatasetCollection.findById(collectionId)
.populate('datasetId')
.lean()) as CollectionWithDatasetType;
const data = await MongoDatasetCollection.findById(collectionId)
.populate<{ dataset: DatasetSchemaType }>('dataset')
.lean();
if (!data) {
return Promise.reject('Collection is not exist');
}
Expand Down
39 changes: 25 additions & 14 deletions packages/service/core/dataset/data/schema.ts
Original file line number Diff line number Diff line change
Expand Up @@ -77,21 +77,32 @@ const DatasetDataSchema = new Schema({
rebuilding: Boolean
});

// list collection and count data; list data; delete collection(relate data)
DatasetDataSchema.index({
teamId: 1,
datasetId: 1,
collectionId: 1,
chunkIndex: 1,
updateTime: -1
DatasetDataSchema.virtual('collection', {
ref: DatasetColCollectionName,
localField: 'collectionId',
foreignField: '_id',
justOne: true
});
// full text index
DatasetDataSchema.index({ teamId: 1, datasetId: 1, fullTextToken: 'text' });
// Recall vectors after data matching
DatasetDataSchema.index({ teamId: 1, datasetId: 1, collectionId: 1, 'indexes.dataId': 1 });
DatasetDataSchema.index({ updateTime: 1 });
// rebuild data
DatasetDataSchema.index({ rebuilding: 1, teamId: 1, datasetId: 1 });

try {
// list collection and count data; list data; delete collection(relate data)
DatasetDataSchema.index({
teamId: 1,
datasetId: 1,
collectionId: 1,
chunkIndex: 1,
updateTime: -1
});
// full text index
DatasetDataSchema.index({ teamId: 1, datasetId: 1, fullTextToken: 'text' });
// Recall vectors after data matching
DatasetDataSchema.index({ teamId: 1, datasetId: 1, collectionId: 1, 'indexes.dataId': 1 });
DatasetDataSchema.index({ updateTime: 1 });
// rebuild data
DatasetDataSchema.index({ rebuilding: 1, teamId: 1, datasetId: 1 });
} catch (error) {
console.log(error);
}

export const MongoDatasetData = getMongoModel<DatasetDataSchemaType>(
DatasetDataCollectionName,
Expand Down
15 changes: 9 additions & 6 deletions packages/service/core/dataset/search/controller.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ import { getVectorsByText } from '../../ai/embedding';
import { getVectorModel } from '../../ai/model';
import { MongoDatasetData } from '../data/schema';
import {
DatasetCollectionSchemaType,
DatasetDataSchemaType,
DatasetDataWithCollectionType,
SearchDataResponseItemType
} from '@fastgpt/global/core/dataset/type';
import { MongoDatasetCollection } from '../collection/schema';
Expand Down Expand Up @@ -267,7 +267,7 @@ export async function searchDatasetData(props: SearchDatasetDataProps) {
});

// get q and a
const dataList = (await MongoDatasetData.find(
const dataList = await MongoDatasetData.find(
{
teamId,
datasetId: { $in: datasetIds },
Expand All @@ -276,8 +276,11 @@ export async function searchDatasetData(props: SearchDatasetDataProps) {
},
'datasetId collectionId updateTime q a chunkIndex indexes'
)
.populate('collectionId', 'name fileId rawLink externalFileId externalFileUrl')
.lean()) as DatasetDataWithCollectionType[];
.populate<{ collection: DatasetCollectionSchemaType }>(
'collection',
'name fileId rawLink externalFileId externalFileUrl'
)
.lean();

// add score to data(It's already sorted. The first one is the one with the most points)
const concatResults = dataList.map((data) => {
Expand Down Expand Up @@ -307,8 +310,8 @@ export async function searchDatasetData(props: SearchDatasetDataProps) {
a: data.a,
chunkIndex: data.chunkIndex,
datasetId: String(data.datasetId),
collectionId: String(data.collectionId?._id),
...getCollectionSourceData(data.collectionId),
collectionId: String(data.collectionId),
...getCollectionSourceData(data.collection),
score: [{ type: SearchScoreTypeEnum.embedding, value: data.score, index }]
};

Expand Down
2 changes: 1 addition & 1 deletion packages/service/core/dataset/training/controller.ts
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ export const pushDataListToTrainingQueueByCollectionId = async ({
session?: ClientSession;
} & PushDatasetDataProps) => {
const {
datasetId: { _id: datasetId, agentModel, vectorModel }
dataset: { _id: datasetId, agentModel, vectorModel }
} = await getCollectionWithDataset(collectionId);
return pushDataListToTrainingQueue({
...props,
Expand Down
Loading

0 comments on commit 7e060fe

Please sign in to comment.