From b2b497ec30ebe510f98e2d9c8099b8a1c481f35c Mon Sep 17 00:00:00 2001 From: glorenzo972 Date: Tue, 9 Jul 2024 17:36:57 +0200 Subject: [PATCH] fix: user-agent for scrape --- CHANGELOG.md | 5 ++++ pyproject.toml | 2 +- tilellm/__main__.py | 24 +++++++++++++++- tilellm/controller/controller.py | 16 ++++++++++- tilellm/models/item_model.py | 1 + .../pinecone/pinecone_repository_base.py | 28 +++++++++++++++++++ 6 files changed, 73 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b62ce1a..5012133 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,11 @@ *Andrea Sponziello* ### **Copyrigth**: *Tiledesk SRL* +## [2024-07-09] +### 0.2.6 +- add: DELETE /api/chunk//namespace/ +- add: search_type parameter similarity|mmr + ## [2024-07-01] ### 0.2.5 - fix: user-agent for scrape diff --git a/pyproject.toml b/pyproject.toml index 06a57b8..ef3cde6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "tilellm" -version = "0.2.5" +version = "0.2.6" description = "tiledesk for RAG" authors = ["Gianluca Lorenzo "] repository = "https://github.com/Tiledesk/tiledesk-llm" diff --git a/tilellm/__main__.py b/tilellm/__main__.py index b21cf16..f1a1260 100644 --- a/tilellm/__main__.py +++ b/tilellm/__main__.py @@ -30,11 +30,13 @@ add_pc_item, delete_namespace, delete_id_from_namespace, + delete_chunk_id_from_namespace, get_ids_namespace, get_listitems_namespace, get_desc_namespace, get_list_namespace, - get_sources_namespace, ask_to_llm) + get_sources_namespace, + ask_to_llm) import logging @@ -475,6 +477,26 @@ async def delete_namespace_main(namespace: str): raise HTTPException(status_code=400, detail=repr(ex)) +@app.delete("/api/chunk/{chunk_id}/namespace/{namespace}") +async def delete_item_chunk_id_namespace_main(chunk_id: str, namespace: str): + """ + Delete items from namespace identified by id and namespace + :param chunk_id: + :param namespace: + :return: + """ + try: + + logger.info(f"delete id {chunk_id} dal namespace {namespace}") + result = await delete_chunk_id_from_namespace(chunk_id, namespace) + + return JSONResponse(content={"message": f"ids {chunk_id} in Namespace {namespace} deleted"}) + except Exception as ex: + print(repr(ex)) + logger.error(ex) + raise HTTPException(status_code=400, detail=repr(ex)) + + @app.post("/api/delete/namespace") async def delete_namespace_main(namespace_to_delete: PineconeNamespaceToDelete): """ diff --git a/tilellm/controller/controller.py b/tilellm/controller/controller.py index d51637f..bee90b5 100644 --- a/tilellm/controller/controller.py +++ b/tilellm/controller/controller.py @@ -240,7 +240,7 @@ async def ask_with_memory(question_answer, repo=None) -> RetrievalResult: vector_store = await repo.create_pc_index(oai_embeddings, emb_dimension) - retriever = vector_store.as_retriever(search_type='similarity', + retriever = vector_store.as_retriever(search_type=question_answer.search_type, search_kwargs={'k': question_answer.top_k, 'namespace': question_answer.namespace} ) @@ -596,6 +596,20 @@ async def delete_id_from_namespace(metadata_id: str, namespace: str, repo=None): logger.error(ex) raise ex +@inject_repo +async def delete_chunk_id_from_namespace(chunk_id:str, namespace: str, repo=None): + """ + Delete chunk by id from namespace + :param chunk_id: + :param namespace: + :param repo: + :return: + """ + try: + return await repo.delete_pc_chunk_id_namespace(chunk_id=chunk_id, namespace=namespace) + except Exception as ex: + logger.error(ex) + raise ex @inject_repo async def get_list_namespace(repo=None) -> PineconeNamespaceResult: diff --git a/tilellm/models/item_model.py b/tilellm/models/item_model.py index 10f659f..90e4e9a 100644 --- a/tilellm/models/item_model.py +++ b/tilellm/models/item_model.py @@ -59,6 +59,7 @@ class QuestionAnswer(BaseModel): embedding: str = Field(default_factory=lambda: "text-embedding-ada-002") debug: bool = Field(default_factory=lambda: False) system_context: Optional[str] = None + search_type: str = Field(default_factory=lambda: "similarity") chat_history_dict: Optional[Dict[str, ChatEntry]] = None @field_validator("temperature") diff --git a/tilellm/store/pinecone/pinecone_repository_base.py b/tilellm/store/pinecone/pinecone_repository_base.py index 44c6fc7..b39d055 100644 --- a/tilellm/store/pinecone/pinecone_repository_base.py +++ b/tilellm/store/pinecone/pinecone_repository_base.py @@ -50,6 +50,34 @@ async def delete_pc_namespace(namespace: str): async def delete_pc_ids_namespace(self, metadata_id: str, namespace: str): pass + @staticmethod + async def delete_pc_chunk_id_namespace(chunk_id: str, namespace: str): + """ + delete chunk from pinecone + :param chunk_id: + :param namespace: + :return: + """ + """ + Delete namespace from Pinecone index + :param namespace: + :return: + """ + import pinecone + try: + pc = pinecone.Pinecone( + api_key=const.PINECONE_API_KEY + ) + host = pc.describe_index(const.PINECONE_INDEX).host + index = pc.Index(name=const.PINECONE_INDEX, host=host) + # vector_store = Pinecone.from_existing_index(const.PINECONE_INDEX,) + delete_response = index.delete(ids=[chunk_id], namespace=namespace) + except Exception as ex: + + logger.error(ex) + + raise ex + @staticmethod async def get_pc_ids_namespace( metadata_id: str, namespace: str) -> PineconeItems: """