From 00870b0f3d9cea2e39f3572a482b2317256c7aab Mon Sep 17 00:00:00 2001 From: MingWei Liu Date: Thu, 19 Dec 2024 08:30:03 +0000 Subject: [PATCH 1/5] fix:The link of upload files is broken in the chat view (#466) --- backend/app/api/main.py | 2 ++ backend/app/api/routes/download.py | 24 ++++++++++++++++++++++++ docker-compose.yml | 2 +- 3 files changed, 27 insertions(+), 1 deletion(-) create mode 100644 backend/app/api/routes/download.py diff --git a/backend/app/api/main.py b/backend/app/api/main.py index bbf1f5810..ab7ccfcc2 100644 --- a/backend/app/api/main.py +++ b/backend/app/api/main.py @@ -7,6 +7,7 @@ user, api_key, feedback, + download, ) from app.api.admin_routes.knowledge_base.routes import router as admin_knowledge_base_router from app.api.admin_routes.knowledge_base.graph.routes import router as admin_knowledge_base_graph_router @@ -34,6 +35,7 @@ api_router.include_router(feedback.router, tags=["chat"]) api_router.include_router(user.router, tags=["user"]) api_router.include_router(api_key.router, tags=["auth"]) +api_router.include_router(download.router) api_router.include_router(admin_rag_index.router, tags=["admin/rag_index"]) api_router.include_router(admin_chat_engine.router, tags=["admin/chat_engine"]) diff --git a/backend/app/api/routes/download.py b/backend/app/api/routes/download.py new file mode 100644 index 000000000..303bc4f23 --- /dev/null +++ b/backend/app/api/routes/download.py @@ -0,0 +1,24 @@ +from fastapi import FastAPI, HTTPException +from fastapi.responses import FileResponse + +from app.models import Upload +from app.rag.datasource import FileDataSource +from app.file_storage import default_file_storage + +router = APIRouter() + +@router.get("/documents/{document_id}/download") +def download_file(document_id: int): + isfound = False + for f_config in FileDataSource.config: + if f_config["file_id"] == document_id: + isfound = True + # 找到了就返回文件 + if isfound == True: + upload = FileDataSource.session.get(Upload, document_id) + return FileResponse(path=upload.path, filename=upload.name, media_type='application/octet-stream') + # 没找到应该 302 到对应的 url,但先404 + else: + raise HTTPException(status_code=404, detail="lmw : File not found") + + diff --git a/docker-compose.yml b/docker-compose.yml index b5d89cbc5..99ffc52c4 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -14,7 +14,7 @@ services: depends_on: - redis ports: - - "8000:80" + - "8002:80" env_file: - .env volumes: From 60ea247ac6baad7e945227c3802ec905b8c208dd Mon Sep 17 00:00:00 2001 From: MingWei Liu Date: Mon, 23 Dec 2024 10:35:29 +0000 Subject: [PATCH 2/5] The link of upload files is broken in the chat view #466 --- backend/.env.example | 28 +++++++++++++++++++++++ backend/app/api/main.py | 2 +- backend/app/api/routes/download.py | 36 +++++++++++++++++------------- 3 files changed, 49 insertions(+), 17 deletions(-) create mode 100644 backend/.env.example diff --git a/backend/.env.example b/backend/.env.example new file mode 100644 index 000000000..62dacae85 --- /dev/null +++ b/backend/.env.example @@ -0,0 +1,28 @@ +ENVIRONMENT=production + +# You can generate a new secret key by running the following command +# $ python3 -c "import secrets; print(secrets.token_urlsafe(32))" +# SECRET_KEY is very important, please do not share it with others, +# SECRET_KEY must greater or equal to 32 characters. +SECRET_KEY= + +# Replace with your own sentry dsn, leave it commented if you don't want to use sentry +# SENTRY_DSN=https://xxxx@xxx.sentry.io/xxxxxx + +# Replace with your own TiDB cluster connection information, +# TiDB Serverless is recommended. You can quickly create one from https://tidbcloud.com/ +TIDB_HOST=xxxxx.prod.aws.tidbcloud.com +TIDB_USER= +TIDB_PASSWORD= +TIDB_DATABASE= +TIDB_SSL=true + +# EMBEDDING_MAX_TOKENS indicates the max size of document chunks. +# +# EMBEDDING_MAX_TOKENS should be smaller than the embedding model's max tokens due +# to the tokenizer difference. (see: https://github.com/pingcap/autoflow/issues/397) +# +# Go to https://tidb.ai/docs/embedding-model to check the max tokens of the embedding model. +# +# Notice: this variable will be deprecated in the future. +EMBEDDING_MAX_TOKENS=2048 diff --git a/backend/app/api/main.py b/backend/app/api/main.py index 744e8bb0a..378876aa5 100644 --- a/backend/app/api/main.py +++ b/backend/app/api/main.py @@ -53,7 +53,7 @@ api_router.include_router(feedback.router, tags=["chat"]) api_router.include_router(user.router, tags=["user"]) api_router.include_router(api_key.router, tags=["auth"]) -api_router.include_router(download.router) +api_router.include_router(download.router, tags=["download_file"]) api_router.include_router(admin_chat_engine.router, tags=["admin/chat_engine"]) api_router.include_router(admin_document_router, tags=["admin/documents"]) api_router.include_router(admin_feedback.router, tags=["admin/feedback"]) diff --git a/backend/app/api/routes/download.py b/backend/app/api/routes/download.py index 303bc4f23..cdc6c5ed7 100644 --- a/backend/app/api/routes/download.py +++ b/backend/app/api/routes/download.py @@ -1,24 +1,28 @@ -from fastapi import FastAPI, HTTPException +from fastapi import FastAPI, HTTPException, APIRouter from fastapi.responses import FileResponse -from app.models import Upload -from app.rag.datasource import FileDataSource -from app.file_storage import default_file_storage +from sqlmodel import select, Session, col +from app.api.deps import SessionDep +from app.models import Document +from app.repositories import document_repo +import os router = APIRouter() -@router.get("/documents/{document_id}/download") -def download_file(document_id: int): - isfound = False - for f_config in FileDataSource.config: - if f_config["file_id"] == document_id: - isfound = True - # 找到了就返回文件 - if isfound == True: - upload = FileDataSource.session.get(Upload, document_id) - return FileResponse(path=upload.path, filename=upload.name, media_type='application/octet-stream') - # 没找到应该 302 到对应的 url,但先404 +@router.get("/documents/{doc_id}/download") +def download_file( + doc_id: int, + session: SessionDep +): + doc = session.get(Document, doc_id) + if not doc: + raise HTTPException(status_code = 404, detail = "File not found") + + DATA_PATH = "../data" + source_uri = os.path.join(DATA_PATH, doc.source_uri) + if os.path.exists(source_uri): + return FileResponse(path = source_uri, filename = doc.name, media_type = doc.mime_type) else: - raise HTTPException(status_code=404, detail="lmw : File not found") + raise HTTPException(status_code = 404, detail = "File not found") From 634f951364b667a6144419f2ec6e6c697400d35b Mon Sep 17 00:00:00 2001 From: Rutheniumlmw <1791016517@qq.com> Date: Mon, 23 Dec 2024 11:04:56 +0000 Subject: [PATCH 3/5] fix:The link of upload files is broken in the chat view #466 --- backend/.env.example | 28 ---------------------------- 1 file changed, 28 deletions(-) delete mode 100644 backend/.env.example diff --git a/backend/.env.example b/backend/.env.example deleted file mode 100644 index 62dacae85..000000000 --- a/backend/.env.example +++ /dev/null @@ -1,28 +0,0 @@ -ENVIRONMENT=production - -# You can generate a new secret key by running the following command -# $ python3 -c "import secrets; print(secrets.token_urlsafe(32))" -# SECRET_KEY is very important, please do not share it with others, -# SECRET_KEY must greater or equal to 32 characters. -SECRET_KEY= - -# Replace with your own sentry dsn, leave it commented if you don't want to use sentry -# SENTRY_DSN=https://xxxx@xxx.sentry.io/xxxxxx - -# Replace with your own TiDB cluster connection information, -# TiDB Serverless is recommended. You can quickly create one from https://tidbcloud.com/ -TIDB_HOST=xxxxx.prod.aws.tidbcloud.com -TIDB_USER= -TIDB_PASSWORD= -TIDB_DATABASE= -TIDB_SSL=true - -# EMBEDDING_MAX_TOKENS indicates the max size of document chunks. -# -# EMBEDDING_MAX_TOKENS should be smaller than the embedding model's max tokens due -# to the tokenizer difference. (see: https://github.com/pingcap/autoflow/issues/397) -# -# Go to https://tidb.ai/docs/embedding-model to check the max tokens of the embedding model. -# -# Notice: this variable will be deprecated in the future. -EMBEDDING_MAX_TOKENS=2048 From 621e4aa8337f5e536b1144e811451dcd6eb15655 Mon Sep 17 00:00:00 2001 From: Rutheniumlmw <1791016517@qq.com> Date: Tue, 24 Dec 2024 06:47:33 +0000 Subject: [PATCH 4/5] fix:The link of upload files is broken in the chat view #466 --- backend/app/api/main.py | 4 ++-- backend/app/api/routes/document.py | 35 ++++++++++++++++++++++++++++++ backend/app/api/routes/download.py | 28 ------------------------ docker-compose.yml | 2 +- 4 files changed, 38 insertions(+), 31 deletions(-) create mode 100644 backend/app/api/routes/document.py delete mode 100644 backend/app/api/routes/download.py diff --git a/backend/app/api/main.py b/backend/app/api/main.py index 378876aa5..6f29bc390 100644 --- a/backend/app/api/main.py +++ b/backend/app/api/main.py @@ -7,7 +7,7 @@ user, api_key, feedback, - download, + document, ) from app.api.admin_routes.knowledge_base.routes import ( router as admin_knowledge_base_router, @@ -53,7 +53,7 @@ api_router.include_router(feedback.router, tags=["chat"]) api_router.include_router(user.router, tags=["user"]) api_router.include_router(api_key.router, tags=["auth"]) -api_router.include_router(download.router, tags=["download_file"]) +api_router.include_router(document.router, tags=["documents"]) api_router.include_router(admin_chat_engine.router, tags=["admin/chat_engine"]) api_router.include_router(admin_document_router, tags=["admin/documents"]) api_router.include_router(admin_feedback.router, tags=["admin/feedback"]) diff --git a/backend/app/api/routes/document.py b/backend/app/api/routes/document.py new file mode 100644 index 000000000..91d363721 --- /dev/null +++ b/backend/app/api/routes/document.py @@ -0,0 +1,35 @@ +from fastapi import FastAPI, HTTPException, APIRouter +from fastapi.responses import StreamingResponse +from sqlmodel import Session +from app.api.deps import SessionDep +from app.repositories import document_repo +from app.file_storage import get_file_storage + +router = APIRouter() + +@router.get("/documents/{doc_id}/download") +def download_file( + doc_id: int, + session: SessionDep +): + doc = document_repo.must_get(session, doc_id) + + name = doc.source_uri + filestorage = get_file_storage() + if filestorage.exists(name): + file_size = filestorage.size(name) + headers = {"Content-Length": str(file_size)} + def iterfile(): + with filestorage.open(name) as f: + while chunk := f.read(8192): # 每次读取 8KB + yield chunk + return StreamingResponse( + iterfile(), + media_type = doc.mime_type, + headers = headers + ) + else: + raise HTTPException(status_code = 404, detail = "File not found") + + + diff --git a/backend/app/api/routes/download.py b/backend/app/api/routes/download.py deleted file mode 100644 index cdc6c5ed7..000000000 --- a/backend/app/api/routes/download.py +++ /dev/null @@ -1,28 +0,0 @@ -from fastapi import FastAPI, HTTPException, APIRouter -from fastapi.responses import FileResponse - -from sqlmodel import select, Session, col -from app.api.deps import SessionDep -from app.models import Document -from app.repositories import document_repo -import os - -router = APIRouter() - -@router.get("/documents/{doc_id}/download") -def download_file( - doc_id: int, - session: SessionDep -): - doc = session.get(Document, doc_id) - if not doc: - raise HTTPException(status_code = 404, detail = "File not found") - - DATA_PATH = "../data" - source_uri = os.path.join(DATA_PATH, doc.source_uri) - if os.path.exists(source_uri): - return FileResponse(path = source_uri, filename = doc.name, media_type = doc.mime_type) - else: - raise HTTPException(status_code = 404, detail = "File not found") - - diff --git a/docker-compose.yml b/docker-compose.yml index 1ec0e9eb8..49e44ac57 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -14,7 +14,7 @@ services: depends_on: - redis ports: - - "8002:80" + - "8000:80" env_file: - .env volumes: From 5c553c9fed05eadb2ad138a8f46ecf6e755301b8 Mon Sep 17 00:00:00 2001 From: Rutheniumlmw <1791016517@qq.com> Date: Tue, 24 Dec 2024 08:48:01 +0000 Subject: [PATCH 5/5] fix:The link of upload files is broken in the chat view #466 --- backend/app/api/routes/document.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/backend/app/api/routes/document.py b/backend/app/api/routes/document.py index 91d363721..eb88871a2 100644 --- a/backend/app/api/routes/document.py +++ b/backend/app/api/routes/document.py @@ -21,8 +21,7 @@ def download_file( headers = {"Content-Length": str(file_size)} def iterfile(): with filestorage.open(name) as f: - while chunk := f.read(8192): # 每次读取 8KB - yield chunk + yield from f return StreamingResponse( iterfile(), media_type = doc.mime_type,