diff --git a/assets/config_custom_chromadb.json b/assets/config_custom_chromadb.json index 75fa7412..ea53620c 100644 --- a/assets/config_custom_chromadb.json +++ b/assets/config_custom_chromadb.json @@ -7,7 +7,7 @@ }, "vectordb": { "name": "chromadb", - "class_name": "llm_stack", + "class_name": "genai_stack", "embedding": { "name": "HuggingFaceEmbeddings", "fields": { diff --git a/assets/etl.json b/assets/etl.json index 75a1a6f9..4bb3d20e 100644 --- a/assets/etl.json +++ b/assets/etl.json @@ -8,6 +8,6 @@ }, "vectordb": { "name": "chromadb", - "class_name": "llm_stack" + "class_name": "genai_stack" } } diff --git a/genai_stack/__init__.py b/genai_stack/__init__.py index d08b6917..2cb310a7 100644 --- a/genai_stack/__init__.py +++ b/genai_stack/__init__.py @@ -1,8 +1,8 @@ """Top-level package for genai_stack.""" -__author__ = """AIM by DPhi""" +__author__ = """AI Planet Tech Team""" __email__ = "support@aiplanet.com" -__version__ = "0.2.2" +__version__ = "0.2.3" import os diff --git a/genai_stack/constant.py b/genai_stack/constant.py index 4a63e152..0e7b405b 100644 --- a/genai_stack/constant.py +++ b/genai_stack/constant.py @@ -7,3 +7,4 @@ VECTORDB = "/vectordb" ETL = "/etl" PROMPT_ENGINE = "/prompt-engine" +MODEL = "/model" diff --git a/genai_stack/etl/run.py b/genai_stack/etl/run.py index c7febd65..c5b1c323 100644 --- a/genai_stack/etl/run.py +++ b/genai_stack/etl/run.py @@ -1,17 +1,17 @@ -from genai_stack.constants.etl.etl import PREBUILT_ETL_LOADERS, ETL_MODULE +from genai_stack.constants.etl.etl import AVAILABLE_ETL_LOADERS, ETL_MODULE from genai_stack.utils.importing import import_class from genai_stack.core import ConfigLoader def list_etl_loaders(): - return PREBUILT_ETL_LOADERS.keys() + return AVAILABLE_ETL_LOADERS.keys() def run_etl_loader(config_file: str, vectordb): config_cls = ConfigLoader(name="EtlLoader", config=config_file) etl_cls = import_class( - f"{ETL_MODULE}.{PREBUILT_ETL_LOADERS.get(config_cls.config.get('etl'))}".replace( + f"{ETL_MODULE}.{AVAILABLE_ETL_LOADERS.get(config_cls.config.get('etl'))}".replace( "/", ".", ) diff --git a/genai_stack/genai_server/models/model_models.py b/genai_stack/genai_server/models/model_models.py new file mode 100644 index 00000000..a532c69c --- /dev/null +++ b/genai_stack/genai_server/models/model_models.py @@ -0,0 +1,13 @@ +from pydantic import BaseModel + + +class ModelBaseModel(BaseModel): + pass + + +class ModelRequestModel(ModelBaseModel): + prompt: str + + +class ModelResponseModel(ModelBaseModel): + output: str diff --git a/genai_stack/genai_server/routers/model_routes.py b/genai_stack/genai_server/routers/model_routes.py new file mode 100644 index 00000000..d3a89f61 --- /dev/null +++ b/genai_stack/genai_server/routers/model_routes.py @@ -0,0 +1,15 @@ +from fastapi import APIRouter + +from genai_stack.constant import API, MODEL +from genai_stack.genai_server.settings.settings import settings +from genai_stack.genai_server.services.model_service import ModelService +from genai_stack.genai_server.models.model_models import ModelResponseModel, ModelRequestModel + +service = ModelService(store=settings.STORE) + +router = APIRouter(prefix=API + MODEL, tags=["model"]) + + +@router.post("/predict") +def predict(data: ModelRequestModel) -> ModelResponseModel: + return service.predict(data=data) diff --git a/genai_stack/genai_server/server.py b/genai_stack/genai_server/server.py index 7bdee2e0..ebb695d1 100644 --- a/genai_stack/genai_server/server.py +++ b/genai_stack/genai_server/server.py @@ -1,6 +1,12 @@ from fastapi import FastAPI - -from genai_stack.genai_server.routers import session_routes, retriever_routes, vectordb_routes, etl_routes, prompt_engine_routes +from genai_stack.genai_server.routers import ( + session_routes, + retriever_routes, + vectordb_routes, + etl_routes, + prompt_engine_routes, + model_routes, +) def get_genai_server_app(): @@ -21,5 +27,6 @@ def get_genai_server_app(): app.include_router(vectordb_routes.router) app.include_router(etl_routes.router) app.include_router(prompt_engine_routes.router) + app.include_router(model_routes.router) return app diff --git a/genai_stack/genai_server/services/model_service.py b/genai_stack/genai_server/services/model_service.py new file mode 100644 index 00000000..04d33eff --- /dev/null +++ b/genai_stack/genai_server/services/model_service.py @@ -0,0 +1,13 @@ +from genai_stack.genai_platform.services.base_service import BaseService +from genai_stack.genai_server.models.model_models import ModelRequestModel, ModelResponseModel +from genai_stack.genai_server.utils import get_current_stack +from genai_stack.genai_server.settings.config import stack_config + + +class ModelService(BaseService): + def predict(self, data: ModelRequestModel) -> ModelResponseModel: + stack = get_current_stack(config=stack_config) + response = stack.model.predict(data.prompt) + return ModelResponseModel( + output=response["output"], + ) diff --git a/genai_stack/llm_stack.py b/genai_stack/genai_stack.py similarity index 100% rename from genai_stack/llm_stack.py rename to genai_stack/genai_stack.py diff --git a/genai_stack/model/__init__.py b/genai_stack/model/__init__.py index 42f40b4f..fdd3eb4a 100644 --- a/genai_stack/model/__init__.py +++ b/genai_stack/model/__init__.py @@ -1,4 +1,3 @@ -from .server import HttpServer from .base import BaseModel from .gpt3_5 import OpenAIGpt35Model from .run import list_supported_models, get_model_class, AVAILABLE_MODEL_MAPS, run_custom_model diff --git a/genai_stack/model/gpt3_5.py b/genai_stack/model/gpt3_5.py index f68327b3..0a5efe47 100644 --- a/genai_stack/model/gpt3_5.py +++ b/genai_stack/model/gpt3_5.py @@ -7,30 +7,58 @@ class OpenAIGpt35Parameters(BaseModelConfigModel): model_name: str = Field(default="gpt-3.5-turbo-16k", alias="model") - """Model name to use.""" + """ + Model name to use. + + """ temperature: float = 0 - """What sampling temperature to use.""" + """ + What sampling temperature to use. + + """ model_kwargs: Dict[str, Any] = Field(default_factory=dict) - """Holds any model parameters valid for `create` call not explicitly specified.""" + """ + Holds any model parameters valid for `create` call not explicitly specified. + + """ openai_api_key: str - """Base URL path for API requests, - leave blank if not using a proxy or service emulator.""" + """ + Base URL path for API requests, + leave blank if not using a proxy or service emulator. + + """ openai_api_base: Optional[str] = None openai_organization: Optional[str] = None # to support explicit proxy for OpenAI openai_proxy: Optional[str] = None request_timeout: Optional[Union[float, Tuple[float, float]]] = None - """Timeout for requests to OpenAI completion API. Default is 600 seconds.""" + """ + Timeout for requests to OpenAI completion API. Default is 600 seconds. + + """ max_retries: int = 6 - """Maximum number of retries to make when generating.""" + """ + Maximum number of retries to make when generating. + + """ streaming: bool = False - """Whether to stream the results or not.""" + """ + Whether to stream the results or not. + + """ n: int = 1 - """Number of chat completions to generate for each prompt.""" + """ + Number of chat completions to generate for each prompt. + + """ max_tokens: Optional[int] = None - """Maximum number of tokens to generate.""" + """ + Maximum number of tokens to generate. + + """ tiktoken_model_name: Optional[str] = None - """The model name to pass to tiktoken when using this class. + """ + The model name to pass to tiktoken when using this class. Tiktoken is used to count the number of tokens in documents to constrain them to be under a certain limit. By default, when set to None, this will be the same as the embedding model name. However, there are some cases @@ -38,7 +66,9 @@ class OpenAIGpt35Parameters(BaseModelConfigModel): supported by tiktoken. This can include when using Azure embeddings or when using one of the many model providers that expose an OpenAI-like API but with different models. In those cases, in order to avoid erroring - when tiktoken is called, you can specify a model name to use here.""" + when tiktoken is called, you can specify a model name to use here. + + """ class OpenAIGpt35ModelConfigModel(BaseModelConfigModel): diff --git a/genai_stack/model/hf.py b/genai_stack/model/hf.py index 2fc99ee4..95253494 100644 --- a/genai_stack/model/hf.py +++ b/genai_stack/model/hf.py @@ -1,6 +1,4 @@ from typing import Optional, Dict - -import torch from langchain.llms import HuggingFacePipeline from genai_stack.model.base import BaseModel, BaseModelConfig, BaseModelConfigModel @@ -31,18 +29,14 @@ class HuggingFaceModel(BaseModel): def _post_init(self, *args, **kwargs): self.model = self.load() - def get_device(self): - return torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu") - def load(self): model = HuggingFacePipeline.from_model_id( - model_id=self.config.model, - task=self.config.task, - model_kwargs=self.config.model_kwargs, - device=self.get_device(), + model_id=self.config.model, task=self.config.task, model_kwargs=self.config.model_kwargs ) return model def predict(self, prompt: str): response = self.model(prompt) - return {"output": response[0]["generated_text"]} + # Note: Huggingface model response format is different for different model + # so user should extract the info which is required. + return {"output": response} diff --git a/genai_stack/model/server.py b/genai_stack/model/server.py deleted file mode 100644 index c5d4c78e..00000000 --- a/genai_stack/model/server.py +++ /dev/null @@ -1,86 +0,0 @@ -import uvicorn -from fastapi import FastAPI, Request -from fastapi.responses import Response, JSONResponse - - -class HttpServer: - name: str = None - - def predict(self, data=None): - print(data) - raise NotImplementedError - - def chat_history(self): - raise NotImplementedError - - async def predict_api(self, request: Request): - # Accessing request data - request_body = await request.body() - - response_data = self.predict(request_body) - ResponseClass = self.response_class or JSONResponse - return ResponseClass(content=response_data) - - async def chat_history_api(self, request: Request): - response_data = self.chat_history() - ResponseClass = self.response_class or JSONResponse - return ResponseClass(content=response_data) - - def run_http_server( - self, - host: str = "127.0.0.1", - port: int = 8082, - response_class: Response = Response, - ): - self.response_class = response_class - self.app = FastAPI( - title="genai_stack Model Server", - description=f"genai_stack {self.name} HTTP Model Server", - ) - app: FastAPI = self.app - - app.post("/predict", response_class=response_class)(self.predict_api) - app.get("/chat_history", response_class=response_class)(self.chat_history_api) - - uvicorn.run(app, host=host, port=port) - - -# import asyncio - -# import uvicorn -# from fastapi import FastAPI, Request -# from fastapi.responses import JSONResponse - -# class HttpServer: -# def __init__(self): -# self.version = "0.0.1" - -# self.app = FastAPI( -# title="genai_stack Model Server", -# description="genai_stack HTTP Model Server using FastAPI", -# ) -# self.serving_task: Optional[asyncio.Task] = None - -# async def serve(self): -# app: FastAPI = self.app - -# @app.post("/version") -# async def predict(request: Request) -> JSONResponse: -# # Accessing request data -# request_body = await request.json() - -# return JSONResponse( -# { -# "Request body": request_body, -# } -# ) - -# # serve -# config = uvicorn.Config(app, host="127.0.0.1", port=8082) -# server = uvicorn.Server(config) -# await server.serve() - - -# if __name__ == "__main__": -# instance = HttpServer() -# asyncio.run(instance.serve()) diff --git a/genai_stack/retriever/base.py b/genai_stack/retriever/base.py index d6d0a52e..3a8b544a 100644 --- a/genai_stack/retriever/base.py +++ b/genai_stack/retriever/base.py @@ -6,6 +6,7 @@ class BaseRetrieverConfigModel(BaseModel): """ Data Model for the configs """ + pass @@ -16,55 +17,26 @@ class BaseRetrieverConfig(StackComponentConfig): class BaseRetriever(StackComponent): config_class = BaseRetrieverConfig - def get_prompt(self, query:str): + def get_prompt(self, query: str): """ This method returns the prompt template from the prompt engine component """ return self.mediator.get_prompt_template(query) - def retrieve(self, query:str) -> dict: + def retrieve(self, query: str) -> dict: """ This method returns the model response for the prompt template. """ raise NotImplementedError() - - def get_context(self, query:str): + + def get_context(self, query: str): """ This method returns the relevant documents returned by the similarity search from a vectordb based on the query """ raise NotImplementedError() - + def get_chat_history(self) -> str: """ This method returns the chat conversation history """ return self.mediator.get_chat_history() - - -# from typing import Any - -# from genai_stack.core import BaseComponent -# from genai_stack.constants.retriever import RETRIEVER_CONFIG_KEY -# from genai_stack.vectordb.base import BaseVectordb - -# class BaseRetriever(BaseComponent): -# module_name = "BaseRetriever" -# config_key = RETRIEVER_CONFIG_KEY - -# def __init__(self, config: str, vectordb: BaseVectordb = None): -# super().__init__(self.module_name, config) -# self.parse_config(self.config_key, self.required_fields) -# self.vectordb = vectordb - -# def retrieve(self, query: Any): -# raise NotImplementedError() - -# def get_langchain_retriever(self): -# return self.vectordb.get_langchain_client().as_retriever() - -# def get_langchain_memory_retriever(self): -# return self.vectordb.get_langchain_memory_client().as_retriever() - -# @classmethod -# def from_config(cls, config): -# raise NotImplementedError \ No newline at end of file diff --git a/genai_stack/retriever/langchain.py b/genai_stack/retriever/langchain.py index 0e2f3842..6e322a15 100644 --- a/genai_stack/retriever/langchain.py +++ b/genai_stack/retriever/langchain.py @@ -10,6 +10,7 @@ class LangChainRetrieverConfigModel(BaseRetrieverConfigModel): """ Data Model for the configs """ + pass @@ -23,55 +24,23 @@ class LangChainRetriever(BaseRetriever): def retrieve(self, query: str, context: List[Document] = None): prompt_template = self.get_prompt(query=query) - prompt_dict = { - "query": query - } + prompt_dict = {"query": query} metadata = None if "context" in prompt_template.input_variables: if not context: context = self.mediator.search_vectordb(query=query) metadata = context[0].metadata if context else None - prompt_dict['context'] = parse_search_results(context) + prompt_dict["context"] = parse_search_results(context) if "history" in prompt_template.input_variables: - prompt_dict['history'] = self.get_chat_history() + prompt_dict["history"] = self.get_chat_history() else: # Cache and memory cannot co-exist. Memory is given priority. - cache = self.mediator.get_cache( - query=query, - metadata=metadata - ) + cache = self.mediator.get_cache(query=query, metadata=metadata) if cache: - return {'output': cache} - final_prompt_template = prompt_template.template.format( - **{k: v for k, v in prompt_dict.items()} - ) + return {"output": cache} + final_prompt_template = prompt_template.template.format(**{k: v for k, v in prompt_dict.items()}) response = self.mediator.get_model_response(prompt=final_prompt_template) - self.mediator.add_text(user_text=query, model_text=response['output']) + self.mediator.add_text(user_text=query, model_text=response["output"]) if "history" not in prompt_template.input_variables: - self.mediator.set_cache( - response=response['output'], - query=query, - metadata=metadata - ) + self.mediator.set_cache(response=response["output"], query=query, metadata=metadata) return response - -# from typing import List -# from langchain.docstore.document import Document - -# from .base import BaseRetriever - - -# class LangChainRetriever(BaseRetriever): -# required_fields = [] - -# def retrieve(self, query): -# vectordb = self.vectordb -# return self.parse_search_results(vectordb.search(query)) - -# def parse_search_results(self, search_results: List[Document]): -# result = "" - -# for idx, search_result in enumerate(search_results): -# result += f"{idx + 1}. {search_result.page_content} \n" - -# return result diff --git a/setup.py b/setup.py index 812e5dfd..660648ac 100644 --- a/setup.py +++ b/setup.py @@ -33,7 +33,7 @@ test_requirements = [] setup( - author="AIM by DPhi", + author="AI Planet Tech Team", author_email="support@aiplanet.com", python_requires=">=3.8", classifiers=[ @@ -88,6 +88,6 @@ test_suite="tests", tests_require=test_requirements, url="https://github.com/aiplanethub/genai-stack", - version="0.2.1", + version="0.2.3", zip_safe=False, ) diff --git a/ui/requirements.txt b/ui/requirements.txt index c95f5fd3..12790d66 100644 --- a/ui/requirements.txt +++ b/ui/requirements.txt @@ -45,7 +45,7 @@ tornado==6.3.2 typing_extensions==4.7.1 tzdata==2023.3 tzlocal==4.3.1 -urllib3==2.0.4 +urllib3==2.0.6 validators==0.20.0 watchdog==3.0.0 zipp==3.16.2