Skip to content

Commit

Permalink
4.6.3-alpha1 (#529)
Browse files Browse the repository at this point in the history
  • Loading branch information
c121914yu authored Nov 29, 2023
1 parent 007fce2 commit b916183
Show file tree
Hide file tree
Showing 43 changed files with 511 additions and 180 deletions.
3 changes: 3 additions & 0 deletions packages/global/common/string/time.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
import dayjs from 'dayjs';

export const formatTime2YMDHM = (time: Date) => dayjs(time).format('YYYY-MM-DD HH:mm');
2 changes: 1 addition & 1 deletion packages/global/core/app/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ export const appModules2Form = ({
);
defaultAppForm.aiSettings.quotePrompt = findInputValueByKey(
module.inputs,
ModuleInputKeyEnum.aiChatQuoteTemplate
ModuleInputKeyEnum.aiChatQuotePrompt
);
} else if (module.flowType === FlowNodeTypeEnum.datasetSearchNode) {
defaultAppForm.dataset.datasets = findInputValueByKey(
Expand Down
2 changes: 2 additions & 0 deletions packages/global/core/chat/type.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import { ChatRoleEnum, ChatSourceEnum } from './constants';
import { FlowNodeTypeEnum } from '../module/node/constant';
import { ModuleOutputKeyEnum } from '../module/constants';
import { AppSchema } from '../app/type';
import { DatasetSearchModeEnum } from '../dataset/constant';

export type ChatSchema = {
_id: string;
Expand Down Expand Up @@ -94,6 +95,7 @@ export type moduleDispatchResType = {
// dataset search
similarity?: number;
limit?: number;
searchMode?: `${DatasetSearchModeEnum}`;

// cq
cqList?: ClassifyQuestionAgentItemType[];
Expand Down
57 changes: 33 additions & 24 deletions packages/global/core/dataset/constant.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
export const PgDatasetTableName = 'modeldata';

/* ------------ dataset -------------- */
export enum DatasetTypeEnum {
folder = 'folder',
dataset = 'dataset'
Expand All @@ -14,28 +15,45 @@ export const DatasetTypeMap = {
}
};

/* ------------ collection -------------- */
export enum DatasetCollectionTypeEnum {
file = 'file',
folder = 'folder',
file = 'file',
link = 'link',
virtual = 'virtual'
}

export const DatasetCollectionTypeMap = {
[DatasetCollectionTypeEnum.file]: {
name: 'dataset.file'
},
[DatasetCollectionTypeEnum.folder]: {
name: 'dataset.folder'
name: 'core.dataset.folder'
},
[DatasetCollectionTypeEnum.file]: {
name: 'core.dataset.file'
},
[DatasetCollectionTypeEnum.link]: {
name: 'dataset.link'
name: 'core.dataset.link'
},
[DatasetCollectionTypeEnum.virtual]: {
name: 'dataset.Virtual File'
name: 'core.dataset.Virtual File'
}
};
export enum DatasetCollectionTrainingModeEnum {
manual = 'manual',
chunk = 'chunk',
qa = 'qa'
}
export const DatasetCollectionTrainingTypeMap = {
[DatasetCollectionTrainingModeEnum.manual]: {
label: 'core.dataset.collection.training.type manual'
},
[DatasetCollectionTrainingModeEnum.chunk]: {
label: 'core.dataset.collection.training.type chunk'
},
[DatasetCollectionTrainingModeEnum.qa]: {
label: 'core.dataset.collection.training.type qa'
}
};

/* ------------ data -------------- */
export enum DatasetDataIndexTypeEnum {
chunk = 'chunk',
qa = 'qa',
Expand All @@ -61,31 +79,22 @@ export const DatasetDataIndexTypeMap = {
}
};

/* ------------ training -------------- */
export enum TrainingModeEnum {
'chunk' = 'chunk',
'qa' = 'qa'
// 'hypothetical' = 'hypothetical',
// 'summary' = 'summary',
// 'multipleIndex' = 'multipleIndex'
chunk = 'chunk',
qa = 'qa'
}

export const TrainingTypeMap = {
[TrainingModeEnum.chunk]: {
name: 'chunk'
label: 'core.dataset.training.type chunk'
},
[TrainingModeEnum.qa]: {
name: 'qa'
label: 'core.dataset.training.type qa'
}
// [TrainingModeEnum.hypothetical]: {
// name: 'hypothetical'
// },
// [TrainingModeEnum.summary]: {
// name: 'summary'
// },
// [TrainingModeEnum.multipleIndex]: {
// name: 'multipleIndex'
// }
};

/* ------------ search -------------- */
export enum DatasetSearchModeEnum {
embedding = 'embedding',
embeddingReRank = 'embeddingReRank',
Expand Down
1 change: 1 addition & 0 deletions packages/global/core/dataset/controller.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ export type CreateDatasetDataProps = {
tmbId: string;
datasetId: string;
collectionId: string;
chunkIndex?: number;
q: string;
a?: string;
indexes?: Omit<DatasetDataIndexItemType, 'dataId'>[];
Expand Down
15 changes: 9 additions & 6 deletions packages/global/core/dataset/type.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,19 +27,18 @@ export type DatasetSchemaType = {

export type DatasetCollectionSchemaType = {
_id: string;
userId: string;
teamId: string;
tmbId: string;
datasetId: string;
parentId?: string;
name: string;
type: `${DatasetCollectionTypeEnum}`;
createTime: Date;
updateTime: Date;
metadata: {
fileId?: string;
rawLink?: string;
pgCollectionId?: string;
};
trainingType: `${TrainingModeEnum}`;
chunkSize: number;
fileId?: string;
rawLink?: string;
};

export type DatasetDataIndexItemType = {
Expand All @@ -57,6 +56,8 @@ export type DatasetDataSchemaType = {
collectionId: string;
datasetId: string;
collectionId: string;
chunkIndex: number;
updateTime: Date;
q: string; // large chunks or question
a: string; // answer or custom content
fullTextToken: string;
Expand All @@ -78,6 +79,7 @@ export type DatasetTrainingSchemaType = {
prompt: string;
q: string;
a: string;
chunkIndex: number;
indexes: Omit<DatasetDataIndexItemType, 'dataId'>[];
};

Expand All @@ -101,6 +103,7 @@ export type DatasetCollectionItemType = CollectionWithDatasetType & {
canWrite: boolean;
sourceName: string;
sourceId?: string;
file?: DatasetFileSchema;
};

/* ================= data ===================== */
Expand Down
39 changes: 25 additions & 14 deletions packages/service/core/dataset/collection/schema.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
import { connectionMongo, type Model } from '../../../common/mongo';
const { Schema, model, models } = connectionMongo;
import { DatasetCollectionSchemaType } from '@fastgpt/global/core/dataset/type.d';
import { DatasetCollectionTypeMap } from '@fastgpt/global/core/dataset/constant';
import {
DatasetCollectionTrainingTypeMap,
DatasetCollectionTypeMap
} from '@fastgpt/global/core/dataset/constant';
import { DatasetCollectionName } from '../schema';
import {
TeamCollectionName,
Expand Down Expand Up @@ -45,24 +48,32 @@ const DatasetCollectionSchema = new Schema({
enum: Object.keys(DatasetCollectionTypeMap),
required: true
},
createTime: {
type: Date,
default: () => new Date()
},
updateTime: {
type: Date,
default: () => new Date()
},
trainingType: {
type: String,
enum: Object.keys(DatasetCollectionTrainingTypeMap),
required: true
},
chunkSize: {
type: Number,
required: true
},
fileId: {
type: Schema.Types.ObjectId,
ref: 'dataset.files'
},
rawLink: {
type: String
},
metadata: {
type: {
fileId: {
type: Schema.Types.ObjectId,
ref: 'dataset.files'
},
rawLink: {
type: String
},
// 451 初始化
pgCollectionId: {
type: String
}
},
type: Object,
default: {}
}
});
Expand Down
9 changes: 9 additions & 0 deletions packages/service/core/dataset/data/schema.ts
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,15 @@ const DatasetDataSchema = new Schema({
}
],
default: []
},
// metadata
updateTime: {
type: Date,
default: () => new Date()
},
chunkIndex: {
type: Number,
default: 0
}
});

Expand Down
4 changes: 4 additions & 0 deletions packages/service/core/dataset/training/schema.ts
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,10 @@ const TrainingDataSchema = new Schema({
type: String,
default: ''
},
chunkIndex: {
type: Number,
default: 0
},
indexes: {
type: [
{
Expand Down
2 changes: 1 addition & 1 deletion projects/app/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "app",
"version": "4.6.2",
"version": "4.6.3",
"private": false,
"scripts": {
"dev": "next dev",
Expand Down
35 changes: 32 additions & 3 deletions projects/app/public/locales/en/common.json
Original file line number Diff line number Diff line change
Expand Up @@ -266,15 +266,39 @@
"Search Top K": "Top K",
"Set Empty Result Tip": ",Response empty text",
"Similarity": "Similarity",
"Sync Time": "Update Time",
"Virtual File": "Virtual File",
"collection": {
"metadata": {
"Chunk Size": "Chunk Size",
"Createtime": "Create Time",
"Read Metadata": "Read Metadata",
"Training Type": "Training Type",
"Updatetime": "Update Time",
"metadata": "Metadata",
"read source": "Read Source",
"source": "Source",
"source name": "Source Name",
"source size": "Source Size"
},
"training": {
"type chunk": "Chunk",
"type manual": "Manual",
"type qa": "QA"
}
},
"data": {
"Edit": "Edit Data",
"data is deleted": "Data is deleted",
"id": "Data ID"
},
"file": "File",
"folder": "Folder",
"import": {
"Ideal chunk length": "Ideal chunk length",
"Ideal chunk length Tips": "Segment by end symbol. We recommend that your document should be properly punctuated to ensure that each complete sentence length does not exceed this value \n Chinese document recommended 400~1000\n English document recommended 600~1200"
},
"link": "Link",
"search": {
"Empty result response": "Empty Response",
"Empty result response Tips": "If you fill in the content, if no suitable content is found, you will directly reply to the content.",
Expand All @@ -289,7 +313,8 @@
"embedding desc": "Direct vector topk correlation query ",
"embeddingReRank": "Enhanced semantic retrieval ",
"embeddingReRank desc": "Sort using Rerank after overperforming vector topk queries "
}
},
"search mode": "Search Mode"
},
"test": {
"Test": "Start",
Expand All @@ -300,6 +325,10 @@
"test history": "Test History",
"test result placeholder": "The test results will be presented here",
"test result tip": "The contents of the knowledge base are sorted according to their similarity to the test text, and you can adjust the corresponding text according to the test results. Note: The data in the test record may have been modified, clicking on a test data will show the latest data."
},
"training": {
"type chunk": "Chunk",
"type qa": "QA"
}
},
"module": {
Expand Down Expand Up @@ -693,9 +722,9 @@
"wallet": {
"bill": {
"Audio Speech": "Audio Speech",
"ReRank": "ReRank",
"Whisper": "Whisper",
"bill username": "User",
"ReRank": "ReRank"
"bill username": "User"
}
}
}
Loading

0 comments on commit b916183

Please sign in to comment.