Merge branch 'main' into chunking

knmlprz · May 21, 2024 · 015664e · 015664e
2 parents a891098 + af9a03e
commit 015664e
Show file tree

Hide file tree

Showing 3 changed files with 53 additions and 31 deletions.
diff --git a/README.md b/README.md
@@ -57,36 +57,42 @@
 
 ### Starting app production
 
-#### Embedding api
-
-Download models (need git-lfs):
+#### Starting app
 
 ```sh
-cd models
-git clone [email protected]:intfloat/e5-large-v2
+docker compose --profile prod up
 ```
 
-Upon app startup, OpenAI-compatible embedding API will be available at:
-<http://172.16.3.101:5001/v1>
+### Starting llm and embedding
 
-Check the docs here: <http://172.16.3.101:5001/docs>
+1. Download model (must have for service llm-embedding to work!!!)
 
-#### Download llm model (must have for servis llm to work !!!)
+    Download model (size of file 3.6GB ):
 
-Download model (size of file 3.6GB ):
+    ```sh
+    curl -o ./llm/models/llama-2-7b.Q3_K_L.gguf -L https://huggingface.co/TheBloke/Llama-2-7B-GGUF/resolve/main/llama-2-7b.Q3_K_L.gguf
+    ```
 
-```sh
-curl -o ./llm/models/llama-2-7b.Q3_K_L.gguf -L https://huggingface.co/TheBloke/Llama-2-7B-GGUF/resolve/main/llama-2-7b.Q3_K_L.gguf
-```
+    or
 
-or
+    ```sh
+    wget -P ./llm/models/llama-2-7b.Q3_K_L.gguf https://huggingface.co/TheBloke/Llama-2-7B-GGUF/resolve/main/llama-2-7b.Q3_K_L.gguf
+    ```
 
-```sh
-wget -P ./llm/models/llama-2-7b.Q3_K_L.gguf https://huggingface.co/TheBloke/Llama-2-7B-GGUF/resolve/main/llama-2-7b.Q3_K_L.gguf
-```
+2. Launching llm and embedding
 
-#### Starting app
+    2.1. Running on cpu
 
-```sh
-docker compose --profile prod up
-```
+    ```sh
+    docker compose --profile cpu up
+    ```
+
+    2.2. Running on gpu
+
+    ```sh
+    docker compose --profile gpu up
+    ```
+
+#### LLM and embedding api swagger
+
+Swegger with EP for completions(llm + embedding) and only embedding is [here](http://0.0.0.0:9000/docs)
diff --git a/docker-compose.yml b/docker-compose.yml
@@ -5,7 +5,7 @@ services:
     env_file: .env
 
   db:
-    profiles: ["dev","prod"]
+    profiles: [ "dev", "prod" ]
     build:
       context: ./postgres
       dockerfile: postgres.Dockerfile
@@ -28,11 +28,27 @@ services:
     depends_on:
       - db
 
-  llm:
-    profiles: [ "dev", "prod" ]
+  llm-embedding-cpu:
+    profiles: [ "cpu" ]
+    build:
+      context: ./llm/
+    volumes:
+      - ./llm/models:/models
+    ports:
+      - "9000:9000"
+
+  llm-embedding-gpu:
+    profiles: [ "gpu" ]
     build:
       context: ./llm/
     volumes:
       - ./llm/models:/models
     ports:
       - "9000:9000"
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: 1
+              capabilities: [ gpu ]
diff --git a/llm/poetry.lock b/llm/poetry.lock