From c103dd27463aca4f35de95b5b65d42f2c538c721 Mon Sep 17 00:00:00 2001 From: JobSmithManipulation <143315462+JobSmithManipulation@users.noreply.github.com> Date: Sun, 29 Sep 2024 10:13:07 +0800 Subject: [PATCH] change chunk.status to chunk.available (#2646) ### What problem does this PR solve? #1102 ### Type of change - [x] New Feature (non-breaking change which adds functionality) --- api/apps/sdk/doc.py | 4 ++-- sdk/python/ragflow/modules/chunk.py | 4 ++-- sdk/python/test/t_document.py | 26 +++++++++++++++++--------- 3 files changed, 21 insertions(+), 13 deletions(-) diff --git a/api/apps/sdk/doc.py b/api/apps/sdk/doc.py index 21b983466c7..31aef8bb96d 100644 --- a/api/apps/sdk/doc.py +++ b/api/apps/sdk/doc.py @@ -609,8 +609,8 @@ def set(tenant_id): d["content_sm_ltks"] = rag_tokenizer.fine_grained_tokenize(d["content_ltks"]) d["important_kwd"] = req["important_keywords"] d["important_tks"] = rag_tokenizer.tokenize(" ".join(req["important_keywords"])) - if "available_int" in req: - d["available_int"] = req["available_int"] + if "available" in req: + d["available_int"] = req["available"] try: tenant_id = DocumentService.get_tenant_id(req["document_id"]) diff --git a/sdk/python/ragflow/modules/chunk.py b/sdk/python/ragflow/modules/chunk.py index 98cf7939161..e1cdd50eb1e 100644 --- a/sdk/python/ragflow/modules/chunk.py +++ b/sdk/python/ragflow/modules/chunk.py @@ -11,7 +11,7 @@ def __init__(self, rag, res_dict): self.knowledgebase_id = None self.document_name = "" self.document_id = "" - self.status = "1" + self.available = 1 for k in list(res_dict.keys()): if k not in self.__dict__: res_dict.pop(k) @@ -39,7 +39,7 @@ def save(self) -> bool: "content": self.content, "important_keywords": self.important_keywords, "document_id": self.document_id, - "status": self.status, + "available": self.available, }) res = res.json() if res.get("retmsg") == "success": diff --git a/sdk/python/test/t_document.py b/sdk/python/test/t_document.py index eed572f341d..c70a4d8f539 100644 --- a/sdk/python/test/t_document.py +++ b/sdk/python/test/t_document.py @@ -151,14 +151,12 @@ def test_parse_and_cancel_document(self): name3 = 'westworld.pdf' path = 'test_data/westworld.pdf' - # Create a document in the dataset using the file path rag.create_document(ds, name=name3, blob=open(path, "rb").read()) # Retrieve the document by name doc = rag.get_document(name="westworld.pdf") - # Initiate asynchronous parsing doc.async_parse() @@ -231,7 +229,7 @@ def test_bulk_parse_and_cancel_documents(self): def test_parse_document_and_chunk_list(self): rag = RAGFlow(API_KEY, HOST_ADDRESS) ds = rag.create_dataset(name="God7") - name='story.txt' + name = 'story.txt' path = 'test_data/story.txt' # name = "Test Document rag.txt" # blob = " Sample document content for rag test66. rag wonderful apple os documents apps. Sample document content for rag test66. rag wonderful apple os documents apps.Sample document content for rag test66. rag wonderful apple os documents apps.Sample document content for rag test66. rag wonderful apple os documents apps. Sample document content for rag test66. rag wonderful apple os documents apps. Sample document content for rag test66. rag wonderful apple os documents apps. Sample document content for rag test66. rag wonderful apple os documents apps. Sample document content for rag test66. rag wonderful apple os documents apps. Sample document content for rag test66. rag wonderful apple os documents apps. Sample document content for rag test66. rag wonderful apple os documents apps. Sample document content for rag test66. rag wonderful apple os documents apps. Sample document content for rag test66. rag wonderful apple os documents apps." @@ -266,11 +264,11 @@ def test_delete_chunk_of_chunk_list(self): assert chunk is not None, "Chunk is None" assert isinstance(chunk, Chunk), "Chunk was not added to chunk list" doc = rag.get_document(name='story.txt') - chunk_count_before=doc.chunk_count + chunk_count_before = doc.chunk_count chunk.delete() doc = rag.get_document(name='story.txt') - assert doc.chunk_count == chunk_count_before-1, "Chunk was not deleted" - + assert doc.chunk_count == chunk_count_before - 1, "Chunk was not deleted" + def test_update_chunk_content(self): rag = RAGFlow(API_KEY, HOST_ADDRESS) doc = rag.get_document(name='story.txt') @@ -278,9 +276,19 @@ def test_update_chunk_content(self): assert chunk is not None, "Chunk is None" assert isinstance(chunk, Chunk), "Chunk was not added to chunk list" chunk.content = "ragflow123" - res=chunk.save() - assert res is True, f"Failed to update chunk, error: {res}" - + res = chunk.save() + assert res is True, f"Failed to update chunk content, error: {res}" + + def test_update_chunk_available(self): + rag = RAGFlow(API_KEY, HOST_ADDRESS) + doc = rag.get_document(name='story.txt') + chunk = doc.add_chunk(content="ragflow") + assert chunk is not None, "Chunk is None" + assert isinstance(chunk, Chunk), "Chunk was not added to chunk list" + chunk.available = 0 + res = chunk.save() + assert res is True, f"Failed to update chunk status, error: {res}" + def test_retrieval_chunks(self): rag = RAGFlow(API_KEY, HOST_ADDRESS) ds = rag.create_dataset(name="God8")