Skip to content

Commit

Permalink
fix: /api/id/<id>/namespace/<namespace>/<token>
Browse files Browse the repository at this point in the history
  • Loading branch information
glorenzo972 committed Oct 10, 2024
1 parent b5469a7 commit 1e8892c
Show file tree
Hide file tree
Showing 8 changed files with 119 additions and 40 deletions.
6 changes: 4 additions & 2 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,10 @@
*Andrea Sponziello*
### **Copyrigth**: *Tiledesk SRL*

## [2024-09-21]
### 0.3.1

## [2024-10-10]
### 0.3.2-rc2
- fix: /api/id/{id}/namespace/{namespace}/{token}
- add sentence embedding with bge-m3
- add: hybrid search with bg3-m3
- modify: deleted env variable for vector store
Expand Down
77 changes: 52 additions & 25 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,8 @@ pip install -e .


```commandline
export REDIS_URL="redis://localhost:6379/0"
export PINECONE_TYPE="serverless|pod"
export PINECONE_API_KEY="pinecone api key"
export PINECONE_TEXT_KEY="pinecone field for text - default text in pod content"
export PINECONE_INDEX="pinecone index name"
export TILELLM_ROLE="role in pod. Train enable all the APIs, qa do not consume redis queue only Q&A"
export JWT_SECRET_KEY="yourkey-256-bit"
export TOKENIZERS_PARALLELISM=false
export WORKERS=INT number of workers 2*CPU+1
export TIMEOUT=INT seconds of timeout default=180
export MAXREQUESTS=INT The maximum number of requests a worker will process before restarting. deafult=1200
Expand All @@ -34,11 +30,9 @@ sudo docker build -t tilellm .


```
sudo docker run -d -p 8000:8000 --env environment="dev|prod" \
--env PINECONE_API_KEY="yourapikey" \
--env PINECONE_TEXT_KEY="text|content" \
--env PINECONE_INDEX="index_name" \
--env TILELLM_ROLE="train|qa" \
sudo docker run -d -p 8000:8000 \
--env JWT_SECRET_KEY = "yourkey-256-bit"
--env TOKENIZERS_PARALLELISM=false
--env WORKERS=3 \
--env TIMEOUT=180 \
--env MAXREQUESTS=1200 \
Expand Down Expand Up @@ -145,19 +139,52 @@ In this method, the gradient of distance is used to split chunks along with the

```json
{
...
"embedding":"huggingface",
"hybrid":true,
"sparse_encoder":"splade|bge-m3",
...
"engine":
{
"name": "",
"type": "",
"apikey" : "",
"vector_size": 1024,
"index_name": ""
}
"id": "content id",
"source": "name or url of document",
"type": "text|txt|url|pdf|docx",
"content": "content of document",
"hybrid": true,
"sparse_encoder": "splade|bge-m3",
"gptkey": "llm key; openai|anthropic|groq|cohere|gemini|ollama, ",
"scrape_type": 0,
"embedding": "name of embedding; huggingface|ollama|openai...|bge-m3",
"model": {
"name": "optional, used only with ollama",
"url": "ollama base url",
"dimension": 3072
},
"namespace": "vector store namespace",
"webhook": "string",
"semantic_chunk": false,
"breakpoint_threshold_type": "percentile",
"chunk_size": 1000,
"chunk_overlap": 100,
"parameters_scrape_type_4": {
"unwanted_tags": [
"string"
],
"tags_to_extract": [
"string"
],
"unwanted_classnames": [
"string"
],
"desired_classnames": [
"string"
],
"remove_lines": true,
"remove_comments": true,
"time_sleep": 2
},
"engine": {
"name": "pinecone",
"type": "serverless",
"apikey": "string",
"vector_size": 1536,
"index_name": "index name",
"text_key": "text for serverless; content for pod",
"metric": "cosine|dotproduct for hybrid"
}
}
```

Expand All @@ -174,7 +201,7 @@ In this method, the gradient of distance is used to split chunks along with the
"model": "es. claude-3-5-sonnet-20240620 | llama-3.1-70b-versatile",
"temperature": 0.9,
"max_tokens":2048,
"embedding":"huggingfacce",
"embedding":"huggingface",
"sparse_encoder":"splade|bge-m3",
"search_type":"hybrid",
"alpha": 0.2,
Expand Down
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "tilellm"
version = "0.3.0"
version = "0.3.2-rc2"
description = "tiledesk for RAG"
authors = ["Gianluca Lorenzo <[email protected]>"]
repository = "https://github.com/Tiledesk/tiledesk-llm"
Expand Down Expand Up @@ -34,6 +34,7 @@ langchain-community = "0.3.1" #"0.2.10"
langchain-experimental = "0.3.1" #no previous
langchain-pinecone = "0.2.0"
langchain-huggingface="0.1.0"
langchain-ollama="0.2.0"
peft = "0.13.0"

tiktoken = "0.7.0"
Expand Down
6 changes: 5 additions & 1 deletion tilellm/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -496,9 +496,13 @@ async def scrape_status_main(scrape_status_req: ScrapeStatusReq,
return JSONResponse(content=scrape_status_response.model_dump())
else:
try:
retrieved_pinecone_data = await get_ids_namespace(metadata_id=scrape_status_req.id,
repository_engine = RepositoryEngine(engine=scrape_status_req.engine)
print(repository_engine.engine)
retrieved_pinecone_data = await get_ids_namespace(repository_engine,
metadata_id=scrape_status_req.id,
namespace=scrape_status_req.namespace)


if retrieved_pinecone_data.matches:
logger.debug(retrieved_pinecone_data.matches[0].date)
date_from_metadata = retrieved_pinecone_data.matches[0].date
Expand Down
9 changes: 6 additions & 3 deletions tilellm/controller/controller_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,9 +155,12 @@ async def generate_answer_with_history(llm, question_answer, rag_chain, retrieve
| qa_prompt
| llm.with_structured_output(QuotedAnswer)
)
chain_w_citations = RunnablePassthrough.assign(context=retrieve_docs).assign(
answer=rag_chain_from_docs
).assign(only_answer=lambda text: text["answer"].answer)

chain_w_citations = (RunnablePassthrough.assign(context=retrieve_docs)
.assign(answer=rag_chain_from_docs)
.assign(only_answer=lambda text: text["answer"].answer)
)

conversational_rag_chain = RunnableWithMessageHistory(
chain_w_citations,
get_session_history,
Expand Down
15 changes: 11 additions & 4 deletions tilellm/models/item_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,11 @@
from typing import Dict, Optional, List, Union, Any
import datetime

class OllamaModel(BaseModel):
name: str
url: str
dimension: Optional[int] = 1024


class Engine(BaseModel):
name: str = Field(default="pinecone")
Expand Down Expand Up @@ -50,6 +55,7 @@ class ItemSingle(BaseModel):
gptkey: str | None = None
scrape_type: int = Field(default_factory=lambda: 0)
embedding: str = Field(default_factory=lambda: "text-embedding-ada-002")
model: Optional[OllamaModel] | None = None
namespace: str | None = None
webhook: str = Field(default_factory=lambda: "")
semantic_chunk: Optional[bool] = Field(default=False)
Expand Down Expand Up @@ -108,15 +114,15 @@ class QuestionAnswer(BaseModel):
namespace: str
llm: Optional[str] = Field(default="openai")
gptkey: str
model: str = Field(default="gpt-3.5-turbo")
model: Union[str, OllamaModel] = Field(default="gpt-3.5-turbo")
sparse_encoder: Optional[str] = Field(default="splade") #bge-m3
temperature: float = Field(default=0.0)
top_k: int = Field(default=5)
max_tokens: int = Field(default=128)
max_tokens: int = Field(default=1024)
embedding: str = Field(default_factory=lambda: "text-embedding-ada-002")
similarity_threshold: float = Field(default_factory=lambda: 1.0)
debug: bool = Field(default_factory=lambda: False)
citations: bool = Field(default_factory=lambda: True)
citations: bool = Field(default_factory=lambda: False)
alpha: Optional[float] = Field(default=0.5)
system_context: Optional[str] = None
search_type: str = Field(default_factory=lambda: "similarity")
Expand Down Expand Up @@ -155,7 +161,7 @@ class QuestionToLLM(BaseModel):
question: str
llm_key: Union[str, AWSAuthentication]
llm: str
model: str = Field(default="gpt-3.5-turbo")
model: Union[str, OllamaModel] = Field(default="gpt-3.5-turbo")
temperature: float = Field(default=0.0)
max_tokens: int = Field(default=128)
debug: bool = Field(default_factory=lambda: False)
Expand Down Expand Up @@ -285,6 +291,7 @@ class ScrapeStatusReq(BaseModel):
id: str
namespace: str
namespace_list: Optional[List[str]] | None = None
engine: Engine


class ScrapeStatusResponse(BaseModel):
Expand Down
39 changes: 37 additions & 2 deletions tilellm/shared/utility.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import os
from functools import wraps

import logging
Expand All @@ -7,10 +6,12 @@
import langchain_aws
from langchain_community.callbacks.openai_info import OpenAICallbackHandler
from langchain_community.embeddings import CohereEmbeddings #, GooglePalmEmbeddings
from langchain_experimental.llms.ollama_functions import OllamaFunctions
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_ollama import ChatOllama
from langchain_voyageai import VoyageAIEmbeddings
from langchain_openai import OpenAIEmbeddings

from openai import base_url

from tilellm.shared import const

Expand Down Expand Up @@ -92,6 +93,14 @@ async def wrapper(self, item, *args, **kwargs):

)
dimension = 1024
elif item.embedding == "ollama":
from langchain_ollama.embeddings import OllamaEmbeddings
embedding_obj = OllamaEmbeddings(model=item.model.name,
base_url=item.model.url
)
dimension = item.model.dimension
# dimension for llama3.2 3072

else:
embedding_obj = OpenAIEmbeddings(api_key=item.gptkey, model=item.embedding)
dimension = 1536
Expand Down Expand Up @@ -138,6 +147,11 @@ async def wrapper(question, *args, **kwargs):
max_tokens=question.max_tokens,
convert_system_message_to_human=True)

elif question.llm == "ollama":
chat_model = ChatOllama(model = question.model.name,
temperature=question.temperature,
um_predict = question.max_tokens,
base_url=question.model.url)
elif question.llm == "groq":
chat_model = ChatGroq(api_key=question.llm_key,
model=question.model,
Expand Down Expand Up @@ -263,8 +277,29 @@ async def wrapper(question, *args, **kwargs):
model=question.model,
temperature=question.temperature,
max_tokens=question.max_tokens


)

elif question.llm == "ollama":
callback_handler = TiledeskAICallbackHandler()

from langchain_ollama.embeddings import OllamaEmbeddings
llm_embeddings = OllamaEmbeddings(model=question.model.name,
base_url=question.model.url
)
dimension = question.model.dimension

llm = ChatOllama(model=question.model.name,
temperature=question.temperature,
num_predict=question.max_tokens,
base_url=question.model.url,
format="json",
callback_handler=[callback_handler]
)



elif question.llm == "aws":
import os

Expand Down
4 changes: 2 additions & 2 deletions tilellm/store/pinecone/pinecone_repository_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ async def get_pc_ids_namespace(engine: Engine, metadata_id: str, namespace: str)
metadata_source=obj.get('metadata').get('source'),
metadata_type=obj.get('metadata').get('type'),
date=obj.get('metadata').get('date', 'Date not defined'),
text=obj.get('metadata').get(const.PINECONE_TEXT_KEY)
text=obj.get('metadata').get(engine.text_key)
# su pod content, su Serverless text
)
)
Expand Down Expand Up @@ -373,7 +373,7 @@ async def get_pc_sources_namespace(engine: Engine, source: str, namespace: str)
metadata_source=obj.get('metadata').get('source'),
metadata_type=obj.get('metadata').get('type'),
date=obj.get('metadata').get('date', 'Date not defined'),
text=obj.get('metadata').get(const.PINECONE_TEXT_KEY)
text=obj.get('metadata').get(engine.text_key)
# su pod content, su Serverless text
)
)
Expand Down

0 comments on commit 1e8892c

Please sign in to comment.