feat: add postgres driver

superlinear-ai · Sep 14, 2024 · a541cfd · a541cfd
1 parent eba82ce
commit a541cfd
Show file tree

Hide file tree

Showing 16 changed files with 697 additions and 379 deletions.
diff --git a/README.md b/README.md
@@ -2,21 +2,22 @@
 
 # 🧵 RAGLite
 
-RAGLite is a Python package for Retrieval-Augmented Generation (RAG) with SQLite.
+RAGLite is a Python package for Retrieval-Augmented Generation (RAG) with PostgreSQL or SQLite.
 
 ## Features
 
 1. ❤️ Only lightweight and permissive open source dependencies (e.g., no [PyTorch](https://github.com/pytorch/pytorch), [LangChain](https://github.com/langchain-ai/langchain), or [PyMuPDF](https://github.com/pymupdf/PyMuPDF))
-2. 🔒 Fully local RAG with [llama-cpp-python](https://github.com/abetlen/llama-cpp-python) as an LLM provider and [SQLite](https://github.com/sqlite/sqlite) as a local database
-3. 🚀 Acceleration with Metal on macOS and with CUDA on Linux and Windows
-4. 📖 PDF to Markdown conversion on top of [pdftext](https://github.com/VikParuchuri/pdftext) and [pypdfium2](https://github.com/pypdfium2-team/pypdfium2)
-5. ✂️ Optimal [level 4 semantic chunking](https://medium.com/@anuragmishra_27746/five-levels-of-chunking-strategies-in-rag-notes-from-gregs-video-7b735895694d) by solving a [binary integer programming problem](https://en.wikipedia.org/wiki/Integer_programming)
-6. 📌 Markdown-based [contextual chunk headings](https://d-star.ai/solving-the-out-of-context-chunk-problem-for-rag)
-7. 🌈 Combined sentence-level and chunk-level matching with [multi-vector chunk retrieval](https://python.langchain.com/v0.2/docs/how_to/multi_vector/)
-8. 🌀 Optimal [closed-form linear query adapter](src/raglite/_query_adapter.py) by solving an [orthogonal Procrustes problem](https://en.wikipedia.org/wiki/Orthogonal_Procrustes_problem)
-9. 🔍 [Hybrid search](https://plg.uwaterloo.ca/~gvcormac/cormacksigir09-rrf.pdf) that combines [SQLite's BM25 full-text search](https://sqlite.org/fts5.html) with [PyNNDescent's ANN vector search](https://github.com/lmcinnes/pynndescent)
-10. ✍️ Optional support for conversion of any input document to Markdown with [Pandoc](https://github.com/jgm/pandoc)
-11. ✅ Optional support for evaluation of retrieval and generation with [Ragas](https://github.com/explodinggradients/ragas)
+2. 🧠 Your choice of local LLM with [llama-cpp-python](https://github.com/abetlen/llama-cpp-python)
+3. 💾 Your choice of [PostgreSQL](https://github.com/postgres/postgres) or [SQLite](https://github.com/sqlite/sqlite) as a full-text & vector search database
+4. 🚀 Acceleration with Metal on macOS and with CUDA on Linux and Windows
+5. 📖 PDF to Markdown conversion on top of [pdftext](https://github.com/VikParuchuri/pdftext) and [pypdfium2](https://github.com/pypdfium2-team/pypdfium2)
+6. ✂️ Optimal [level 4 semantic chunking](https://medium.com/@anuragmishra_27746/five-levels-of-chunking-strategies-in-rag-notes-from-gregs-video-7b735895694d) by solving a [binary integer programming problem](https://en.wikipedia.org/wiki/Integer_programming)
+7. 📌 Markdown-based [contextual chunk headings](https://d-star.ai/solving-the-out-of-context-chunk-problem-for-rag)
+8. 🌈 Combined sentence-level and chunk-level matching with [multi-vector chunk retrieval](https://python.langchain.com/v0.2/docs/how_to/multi_vector/)
+9. 🌀 Optimal [closed-form linear query adapter](src/raglite/_query_adapter.py) by solving an [orthogonal Procrustes problem](https://en.wikipedia.org/wiki/Orthogonal_Procrustes_problem)
+10. 🔍 [Hybrid search](https://plg.uwaterloo.ca/~gvcormac/cormacksigir09-rrf.pdf) that combines the database's built-in full-text search ([tsvector](https://www.postgresql.org/docs/current/datatype-textsearch.html) in PostgreSQL, [FTS5](https://www.sqlite.org/fts5.html) in SQLite) with their native vector search extensions ([pgvector](https://github.com/pgvector/pgvector) in PostgreSQL, [sqlite-vec](https://github.com/asg017/sqlite-vec) in SQLite)
+11. ✍️ Optional support for conversion of any input document to Markdown with [Pandoc](https://github.com/jgm/pandoc)
+12. ✅ Optional support for evaluation of retrieval and generation performance with [Ragas](https://github.com/explodinggradients/ragas)
 
 ## Installing
 

diff --git a/docker-compose.yml b/docker-compose.yml
@@ -12,6 +12,10 @@ services:
         GID: ${GID:-1000}
     environment:
       - POETRY_PYPI_TOKEN_PYPI
+    depends_on:
+      - postgres
+    networks:
+      - raglite-network
     volumes:
       - ..:/workspaces
       - command-history-volume:/home/user/.history/
@@ -21,21 +25,35 @@ services:
     stdin_open: true
     tty: true
     entrypoint: []
-    command:
-      [
-        "sh",
-        "-c",
-        "sudo chown user $$SSH_AUTH_SOCK && cp --update /opt/build/poetry/poetry.lock /workspaces/raglite/ && mkdir -p /workspaces/raglite/.git/hooks/ && cp --update /opt/build/git/* /workspaces/raglite/.git/hooks/ && zsh"
-      ]
+    command: [ "sh", "-c", "sudo chown user $$SSH_AUTH_SOCK && cp --update /opt/build/poetry/poetry.lock /workspaces/raglite/ && mkdir -p /workspaces/raglite/.git/hooks/ && cp --update /opt/build/git/* /workspaces/raglite/.git/hooks/ && zsh" ]
     environment:
       - POETRY_PYPI_TOKEN_PYPI
       - SSH_AUTH_SOCK=/run/host-services/ssh-auth.sock
+    depends_on:
+      - postgres
+    networks:
+      - raglite-network
     volumes:
       - ~/.gitconfig:/etc/gitconfig
       - ~/.ssh/known_hosts:/home/user/.ssh/known_hosts
       - ${SSH_AGENT_AUTH_SOCK:-/run/host-services/ssh-auth.sock}:/run/host-services/ssh-auth.sock
     profiles:
       - dev
 
+  postgres:
+    image: postgres:16
+    environment:
+      POSTGRES_DB: raglite_db
+      POSTGRES_USER: raglite_user
+      POSTGRES_PASSWORD: raglite_password
+    networks:
+      - raglite-network
+    tmpfs:
+      - /var/lib/postgresql/data
+
+networks:
+  raglite-network:
+    driver: bridge
+
 volumes:
   command-history-volume:
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -51,6 +51,7 @@ pydantic = ">=2.7.0"
 # Approximate Nearest Neighbors:
 pynndescent = ">=0.5.12"
 # Storage:
+pg8000 = ">=1.31.2"
 sqlmodel-slim = ">=0.0.18"
 # Progress:
 tqdm = ">=4.66.0"
@@ -125,7 +126,55 @@ src = ["src", "tests"]
 target-version = "py310"
 
 [tool.ruff.lint]
-select = ["A", "ASYNC", "B", "BLE", "C4", "C90", "D", "DTZ", "E", "EM", "ERA", "F", "FBT", "FLY", "FURB", "G", "I", "ICN", "INP", "INT", "ISC", "LOG", "N", "NPY", "PERF", "PGH", "PIE", "PL", "PT", "PTH", "PYI", "Q", "RET", "RSE", "RUF", "S", "SIM", "SLF", "SLOT", "T10", "T20", "TCH", "TID", "TRY", "UP", "W", "YTT"]
+select = [
+  "A",
+  "ASYNC",
+  "B",
+  "BLE",
+  "C4",
+  "C90",
+  "D",
+  "DTZ",
+  "E",
+  "EM",
+  "ERA",
+  "F",
+  "FBT",
+  "FLY",
+  "FURB",
+  "G",
+  "I",
+  "ICN",
+  "INP",
+  "INT",
+  "ISC",
+  "LOG",
+  "N",
+  "NPY",
+  "PERF",
+  "PGH",
+  "PIE",
+  "PL",
+  "PT",
+  "PTH",
+  "PYI",
+  "Q",
+  "RET",
+  "RSE",
+  "RUF",
+  "S",
+  "SIM",
+  "SLF",
+  "SLOT",
+  "T10",
+  "T20",
+  "TCH",
+  "TID",
+  "TRY",
+  "UP",
+  "W",
+  "YTT",
+]
 ignore = ["D203", "D213", "E501", "RET504", "RUF002", "S101", "S307"]
 unfixable = ["ERA001", "F401", "F841", "T201", "T203"]
 

diff --git a/src/raglite/__init__.py b/src/raglite/__init__.py
@@ -2,7 +2,7 @@
 
 from raglite._config import RAGLiteConfig
 from raglite._eval import answer_evals, evaluate, insert_evals
-from raglite._index import insert_document, update_vector_index
+from raglite._index import insert_document
 from raglite._query_adapter import update_query_adapter
 from raglite._rag import rag
 from raglite._search import (
@@ -18,7 +18,6 @@
     "RAGLiteConfig",
     # Index
     "insert_document",
-    "update_vector_index",
     # Search
     "fusion_search",
     "hybrid_search",

diff --git a/src/raglite/_config.py b/src/raglite/_config.py
@@ -3,23 +3,21 @@
 from dataclasses import dataclass, field
 from functools import lru_cache
 
-import numpy as np
-import numpy.typing as npt
 from llama_cpp import Llama, LlamaRAMCache, llama_supports_gpu_offload  # type: ignore[attr-defined]
 from sqlalchemy.engine import URL
 
 
 @lru_cache(maxsize=1)
 def default_llm() -> Llama:
     """Get default LLM."""
-    # Select the best available LLM for the given accelerator.
+    # Select the best available LLM for the given accelerator:
+    # - Llama-3.1-8B-instruct on GPU.
+    # - Phi-3.5-mini-instruct on CPU.
     if llama_supports_gpu_offload():
-        # Llama-3.1-8B-instruct on GPU.
         repo_id = "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF"  # https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct
         filename = "*Q4_K_M.gguf"
         n_ctx = 8192
     else:
-        # Phi-3.1-mini-128k-instruct on CPU.
         repo_id = "bartowski/Phi-3.5-mini-instruct-GGUF"  # https://huggingface.co/microsoft/Phi-3.5-mini-instruct
         filename = "*Q4_K_M.gguf"
         n_ctx = 4096
@@ -61,7 +59,6 @@ class RAGLiteConfig:
     # Embedder config used for indexing.
     embedder: Llama = field(default_factory=default_embedder)
     embedder_batch_size: int = 128
-    embedder_dtype: npt.DTypeLike = np.float16
     embedder_normalize: bool = True
     sentence_embedding_weight: float = 0.5  # Between 0 (chunk level) and 1 (sentence level).
     # Chunker config used to partition documents into chunks.
@@ -70,7 +67,5 @@ class RAGLiteConfig:
     # Database config.
     db_url: str | URL = "sqlite:///raglite.sqlite"
     # Vector search config.
-    vector_search_index_id: str = "default"
     vector_search_index_metric: str = "cosine"  # The query adapter supports "dot" and "cosine".
-    # Query adapter config.
-    enable_query_adapter: bool = True
+    vector_search_query_adapter: bool = True