Skip to content

Commit

Permalink
test: improve test speed
Browse files Browse the repository at this point in the history
  • Loading branch information
lsorber committed Aug 15, 2024
1 parent f16a581 commit 49f831e
Show file tree
Hide file tree
Showing 4 changed files with 42 additions and 60 deletions.
4 changes: 1 addition & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,7 @@

# RAGLite

A Retrieval-Augmented Generation (RAG) library for SQLite.

⚠️ This project is a work in progress!
RAGLite is a Python package for Retrieval-Augmented Generation (RAG) with SQLite.

## Features

Expand Down
52 changes: 2 additions & 50 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ build-backend = "poetry.core.masonry.api"
[tool.poetry] # https://python-poetry.org/docs/pyproject/
name = "raglite"
version = "0.0.0"
description = "A RAG extension for SQLite."
description = "A Python package for Retrieval-Augmented Generation (RAG) with SQLite."
authors = ["Laurent Sorber <[email protected]>"]
readme = "README.md"
repository = "https://github.com/radix-ai/raglite"
Expand Down Expand Up @@ -123,55 +123,7 @@ src = ["src", "tests"]
target-version = "py310"

[tool.ruff.lint]
select = [
"A",
"ASYNC",
"B",
"BLE",
"C4",
"C90",
"D",
"DTZ",
"E",
"EM",
"ERA",
"F",
"FBT",
"FLY",
"FURB",
"G",
"I",
"ICN",
"INP",
"INT",
"ISC",
"LOG",
"N",
"NPY",
"PERF",
"PGH",
"PIE",
"PL",
"PT",
"PTH",
"PYI",
"Q",
"RET",
"RSE",
"RUF",
"S",
"SIM",
"SLF",
"SLOT",
"T10",
"T20",
"TCH",
"TID",
"TRY",
"UP",
"W",
"YTT",
]
select = ["A", "ASYNC", "B", "BLE", "C4", "C90", "D", "DTZ", "E", "EM", "ERA", "F", "FBT", "FLY", "FURB", "G", "I", "ICN", "INP", "INT", "ISC", "LOG", "N", "NPY", "PERF", "PGH", "PIE", "PL", "PT", "PTH", "PYI", "Q", "RET", "RSE", "RUF", "S", "SIM", "SLF", "SLOT", "T10", "T20", "TCH", "TID", "TRY", "UP", "W", "YTT"]
ignore = ["D203", "D213", "E501", "RET504", "RUF002", "S101", "S307"]
unfixable = ["ERA001", "F401", "F841", "T201", "T203"]

Expand Down
34 changes: 34 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
"""Fixtures for the tests."""

import pytest
from llama_cpp import Llama, LlamaRAMCache # type: ignore[attr-defined]

from raglite import RAGLiteConfig


@pytest.fixture()
def simple_config() -> RAGLiteConfig:
"""Create a lightweight in-memory config for testing."""
# Use a lightweight LLM.
llm = Llama.from_pretrained(
repo_id="bartowski/Phi-3.1-mini-4k-instruct-GGUF", # https://huggingface.co/microsoft/Phi-3-mini-4k-instruct
filename="*Q4_K_M.gguf",
n_ctx=4096, # 0 = Use the model's context size (default is 512).
n_gpu_layers=-1, # -1 = Offload all layers to the GPU (default is 0).
verbose=False,
)
llm.set_cache(LlamaRAMCache())
# Use a lightweight embedder.
embedder = Llama.from_pretrained(
repo_id="ChristianAzinn/snowflake-arctic-embed-xs-gguf", # https://github.com/Snowflake-Labs/arctic-embed
filename="*f16.GGUF",
n_ctx=0, # 0 = Use the model's context size (default is 512).
n_gpu_layers=-1, # -1 = Offload all layers to the GPU (default is 0).
verbose=False,
embedding=True,
)
# Use an in-memory SQLite database.
db_url = "sqlite:///:memory:"
# Create the config.
config = RAGLiteConfig(llm=llm, embedder=embedder, db_url=db_url)
return config
12 changes: 5 additions & 7 deletions tests/test_basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,22 +11,20 @@
)


def test_insert_index_search() -> None:
def test_insert_index_search(simple_config: RAGLiteConfig) -> None:
"""Test inserting a document, updating the vector index, and searching for a query."""
# Run this test with an in-memory SQLite database.
in_memory_db = RAGLiteConfig(db_url="sqlite:///:memory:")
# Insert a document.
doc_path = Path(__file__).parent / "specrel.pdf" # Einstein's special relativity paper.
insert_document(doc_path, config=in_memory_db)
insert_document(doc_path, config=simple_config)
# Update the vector index with the new document.
update_vector_index(config=in_memory_db)
update_vector_index(config=simple_config)
# Search for a query.
query = "What does it mean for two events to be simultaneous?"
chunk_rowids, scores = hybrid_search(query, config=in_memory_db)
chunk_rowids, scores = hybrid_search(query, config=simple_config)
assert len(chunk_rowids) == len(scores)
assert all(isinstance(rowid, int) for rowid in chunk_rowids)
assert all(isinstance(score, float) for score in scores)
# Group the chunks into segments and retrieve them.
segments = retrieve_segments(chunk_rowids, neighbors=None, config=in_memory_db)
segments = retrieve_segments(chunk_rowids, neighbors=None, config=simple_config)
assert all(isinstance(segment, str) for segment in segments)
assert "Definition of Simultaneity" in segments[0]

0 comments on commit 49f831e

Please sign in to comment.