Skip to content

Commit

Permalink
fix: support pgvector v0.7.0+ (#63)
Browse files Browse the repository at this point in the history
  • Loading branch information
undo76 authored Dec 15, 2024
1 parent 574e407 commit 7ec9582
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 7 deletions.
2 changes: 1 addition & 1 deletion poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ ragas = { version = ">=0.1.12", optional = true }
typer = ">=0.12.5"
# Frontend:
chainlit = { version = ">=1.2.0", optional = true }
# Utilities:
packaging = ">=23.0"

[tool.poetry.extras] # https://python-poetry.org/docs/pyproject/#extras
chainlit = ["chainlit"]
Expand Down
28 changes: 22 additions & 6 deletions src/raglite/_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,13 @@
from functools import lru_cache
from hashlib import sha256
from pathlib import Path
from typing import Any
from typing import Any, cast
from xml.sax.saxutils import escape

import numpy as np
from markdown_it import MarkdownIt
from packaging import version
from packaging.version import Version
from pydantic import ConfigDict
from sqlalchemy.engine import Engine, make_url
from sqlmodel import JSON, Column, Field, Relationship, Session, SQLModel, create_engine, text
Expand Down Expand Up @@ -310,6 +312,18 @@ def from_chunks(
)


def _pgvector_version(session: Session) -> Version:
try:
result = session.execute(
text("SELECT extversion FROM pg_extension WHERE extname = 'vector'")
)
pgvector_version = version.parse(cast(str, result.scalar_one()))
except Exception as e:
error_message = "Unable to parse pgvector version, is pgvector installed?"
raise ValueError(error_message) from e
return pgvector_version


@lru_cache(maxsize=1)
def create_database_engine(config: RAGLiteConfig | None = None) -> Engine:
"""Create a database engine and initialize it."""
Expand Down Expand Up @@ -358,17 +372,19 @@ def create_database_engine(config: RAGLiteConfig | None = None) -> Engine:
CREATE INDEX IF NOT EXISTS keyword_search_chunk_index ON chunk USING GIN (to_tsvector('simple', body));
""")
)
session.execute(
text(f"""
create_vector_index_sql = f"""
CREATE INDEX IF NOT EXISTS vector_search_chunk_index ON chunk_embedding
USING hnsw (
(embedding::halfvec({embedding_dim}))
halfvec_{metrics[config.vector_search_index_metric]}_ops
);
SET hnsw.ef_search = {20 * 4 * 8};
SET hnsw.iterative_scan = {'relaxed_order' if config.reranker else 'strict_order'};
""")
)
"""
# Enable iterative scan for pgvector v0.8.0 and up.
pgvector_version = _pgvector_version(session)
if pgvector_version and pgvector_version >= version.parse("0.8.0"):
create_vector_index_sql += f"\nSET hnsw.iterative_scan = {'relaxed_order' if config.reranker else 'strict_order'};"
session.execute(text(create_vector_index_sql))
session.commit()
elif db_backend == "sqlite":
# Create a virtual table for keyword search on the chunk table.
Expand Down

0 comments on commit 7ec9582

Please sign in to comment.