Skip to content

Commit

Permalink
fix: file extension check (#2876)
Browse files Browse the repository at this point in the history
  • Loading branch information
c121914yu committed Oct 12, 2024
1 parent e22a96c commit 67dd58c
Show file tree
Hide file tree
Showing 6 changed files with 28 additions and 5 deletions.
2 changes: 1 addition & 1 deletion docSite/content/zh-cn/docs/development/upgrading/4811.md
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ weight: 813

### 3. 修改镜像 tag 并重启

- 更新 FastGPT 镜像 tag: v4.8.11
- 更新 FastGPT 镜像 tag: v4.8.11-fix
- 更新 FastGPT 商业版镜像 tag: v4.8.11
- 更新 FastGPT Sandbox 镜像 tag: v4.8.11

Expand Down
3 changes: 2 additions & 1 deletion docSite/content/zh-cn/docs/development/upgrading/4812.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,5 @@ weight: 812

## 更新说明

1. 新增 - 全局变量支持更多数据类型
1. 新增 - 全局变量支持更多数据类型
2. 修复 - 文件后缀判断,去除 query 影响。
18 changes: 18 additions & 0 deletions packages/global/common/string/tools.ts
Original file line number Diff line number Diff line change
Expand Up @@ -102,3 +102,21 @@ export const sliceStrStartEnd = (str: string, start: number, end: number) => {

return `${startContent}${overSize ? `\n\n...[hide ${str.length - start - end} chars]...\n\n` : ''}${endContent}`;
};

/*
Parse file extension from url
Test:
1. https://xxx.com/file.pdf?token=123
=> pdf
2. https://xxx.com/file.pdf
=> pdf
*/
export const parseFileExtensionFromUrl = (url = '') => {
// Remove query params
const urlWithoutQuery = url.split('?')[0];
// Get file name
const fileName = urlWithoutQuery.split('/').pop() || '';
// Get file extension
const extension = fileName.split('.').pop();
return (extension || '').toLowerCase();
};
3 changes: 2 additions & 1 deletion packages/service/common/file/gridfs/controller.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import { readRawContentByFileBuffer } from '../read/utils';
import { gridFsStream2Buffer, stream2Encoding } from './utils';
import { addLog } from '../../system/log';
import { readFromSecondary } from '../../mongo/utils';
import { parseFileExtensionFromUrl } from '@fastgpt/global/common/string/tools';

export function getGFSCollection(bucket: `${BucketNameEnum}`) {
MongoDatasetFileSchema;
Expand Down Expand Up @@ -163,7 +164,7 @@ export const readFileContentFromMongo = async ({
return Promise.reject(CommonErrEnum.fileNotFound);
}

const extension = file?.filename?.split('.')?.pop()?.toLowerCase() || '';
const extension = parseFileExtensionFromUrl(file?.filename);

const start = Date.now();
const fileBuffers = await gridFsStream2Buffer(fileStream);
Expand Down
3 changes: 2 additions & 1 deletion packages/service/core/dataset/read.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import { parseCsvTable2Chunks } from './training/utils';
import { TextSplitProps, splitText2Chunks } from '@fastgpt/global/common/string/textSplitter';
import axios from 'axios';
import { readRawContentByFileBuffer } from '../../common/file/read/utils';
import { parseFileExtensionFromUrl } from '@fastgpt/global/common/string/tools';

export const readFileRawTextByUrl = async ({
teamId,
Expand All @@ -21,7 +22,7 @@ export const readFileRawTextByUrl = async ({
url: url,
responseType: 'arraybuffer'
});
const extension = url.split('.')?.pop()?.toLowerCase() || '';
const extension = parseFileExtensionFromUrl(url);

const buffer = Buffer.from(response.data, 'binary');

Expand Down
4 changes: 3 additions & 1 deletion packages/service/core/workflow/dispatch/tools/readFiles.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import { detectFileEncoding } from '@fastgpt/global/common/file/tools';
import { readRawContentByFileBuffer } from '../../../../common/file/read/utils';
import { ChatRoleEnum } from '@fastgpt/global/core/chat/constants';
import { UserChatItemValueItemType } from '@fastgpt/global/core/chat/type';
import { parseFileExtensionFromUrl } from '@fastgpt/global/common/string/tools';

type Props = ModuleDispatchProps<{
[NodeInputKeyEnum.fileUrlList]: string[];
Expand Down Expand Up @@ -144,7 +145,8 @@ export const dispatchReadFiles = async (props: Props): Promise<Response> => {
return url;
})();
// Extension
const extension = filename.split('.').pop()?.toLowerCase() || '';
const extension = parseFileExtensionFromUrl(filename);

// Get encoding
const encoding = (() => {
const contentType = response.headers['content-type'];
Expand Down

0 comments on commit 67dd58c

Please sign in to comment.