From 92fce765837d9eb445fc1d943346455e2e5ff82d Mon Sep 17 00:00:00 2001 From: Yuhong Sun Date: Sun, 27 Aug 2023 11:55:24 -0700 Subject: [PATCH 01/14] checkpoint --- backend/danswer/background/update.py | 14 ++ backend/danswer/configs/constants.py | 12 ++ .../danswer/db/connector_credential_pair.py | 1 - backend/danswer/db/feedback.py | 130 ++++++++++++++++++ backend/danswer/db/models.py | 74 ++++++++-- backend/danswer/direct_qa/answer_question.py | 18 ++- backend/danswer/listeners/slack_listener.py | 28 ++-- backend/danswer/server/manage.py | 4 - backend/danswer/server/models.py | 15 ++ backend/danswer/server/search_backend.py | 101 ++++++++++++-- 10 files changed, 356 insertions(+), 41 deletions(-) create mode 100644 backend/danswer/db/feedback.py diff --git a/backend/danswer/background/update.py b/backend/danswer/background/update.py index e5a2a1515e4..7b3b6d6f9f5 100755 --- a/backend/danswer/background/update.py +++ b/backend/danswer/background/update.py @@ -23,6 +23,7 @@ from danswer.db.credentials import backend_update_credential_json from danswer.db.engine import get_db_current_time from danswer.db.engine import get_sqlalchemy_engine +from danswer.db.feedback import create_document_metadata from danswer.db.index_attempt import create_index_attempt from danswer.db.index_attempt import get_index_attempt from danswer.db.index_attempt import get_inprogress_index_attempts @@ -246,6 +247,19 @@ def _index( logger.debug( f"Indexing batch of documents: {[doc.to_short_descriptor() for doc in doc_batch]}" ) + + # Save in Postgres before indexing + for doc in doc_batch: + first_link = next( + (section.link for section in doc.sections if section.link), "" + ) + create_document_metadata( + doc_id=doc.id, + semantic_id=doc.semantic_identifier, + link=first_link, + db_session=db_session, + ) + index_user_id = ( None if db_credential.public_doc else db_credential.user_id ) diff --git a/backend/danswer/configs/constants.py b/backend/danswer/configs/constants.py index 467e7e7a178..0be93f7a686 100644 --- a/backend/danswer/configs/constants.py +++ b/backend/danswer/configs/constants.py @@ -66,3 +66,15 @@ class ModelHostType(str, Enum): # https://medium.com/@yuhongsun96/host-a-llama-2-api-on-gpu-for-free-a5311463c183 COLAB_DEMO = "colab-demo" # TODO support for Azure, AWS, GCP GenAI model hosting + + +class QAFeedbackType(str, Enum): + LIKE = "like" # User likes the answer, used for metrics + DISLIKE = "dislike" # User dislikes the answer, used for metrics + + +class SearchFeedbackType(str, Enum): + ENDORSE = "endorse" # boost this document for all future queries + REJECT = "reject" # down-boost this document for all future queries + HIDE = "hide" # mark this document as untrusted, hide from LLM + UNHIDE = "unhide" diff --git a/backend/danswer/db/connector_credential_pair.py b/backend/danswer/db/connector_credential_pair.py index bc7a4cc7f54..f1c89e24c68 100644 --- a/backend/danswer/db/connector_credential_pair.py +++ b/backend/danswer/db/connector_credential_pair.py @@ -8,7 +8,6 @@ from danswer.db.connector import fetch_connector_by_id from danswer.db.credentials import fetch_credential_by_id from danswer.db.models import ConnectorCredentialPair -from danswer.db.models import IndexAttempt from danswer.db.models import IndexingStatus from danswer.db.models import User from danswer.server.models import StatusResponse diff --git a/backend/danswer/db/feedback.py b/backend/danswer/db/feedback.py new file mode 100644 index 00000000000..ff1163b6088 --- /dev/null +++ b/backend/danswer/db/feedback.py @@ -0,0 +1,130 @@ +from uuid import UUID + +from sqlalchemy import select +from sqlalchemy.orm import Session + +from danswer.configs.constants import QAFeedbackType +from danswer.configs.constants import SearchFeedbackType +from danswer.db.models import DocumentMetadata +from danswer.db.models import DocumentRetrievalFeedback +from danswer.db.models import QueryEvent +from danswer.search.models import SearchType + + +def fetch_query_event_by_id(query_id: int, db_session: Session) -> QueryEvent: + stmt = select(QueryEvent).where(QueryEvent.id == query_id) + result = db_session.execute(stmt) + query_event = result.scalar_one_or_none() + + if not query_event: + raise ValueError("Invalid Query Event provided for updating") + + return query_event + + +def fetch_doc_m_by_id(doc_id: str, db_session: Session) -> DocumentMetadata: + stmt = select(DocumentMetadata).where(DocumentMetadata.id == doc_id) + result = db_session.execute(stmt) + doc_m = result.scalar_one_or_none() + + if not doc_m: + raise ValueError("Invalid Document provided for updating") + + return doc_m + + +def create_document_metadata( + doc_id: str, + semantic_id: str, + link: str | None, + db_session: Session, +) -> None: + try: + fetch_doc_m_by_id(doc_id, db_session) + return + except ValueError: + # Document already exists, don't reset its data + pass + + DocumentMetadata( + id=doc_id, + semantic_id=semantic_id, + link=link, + ) + + +def create_query_event( + query: str, + selected_flow: SearchType | None, + llm_answer: str | None, + user_id: UUID | None, + db_session: Session, +) -> int: + query_event = QueryEvent( + query=query, + selected_search_flow=selected_flow, + llm_answer=llm_answer, + user_id=user_id, + ) + db_session.add(query_event) + db_session.commit() + + return query_event.id + + +def update_query_event_feedback( + feedback: QAFeedbackType, + query_id: int, + user_id: UUID | None, + db_session: Session, +) -> None: + query_event = fetch_query_event_by_id(query_id, db_session) + + if user_id != query_event.user_id: + raise ValueError("User trying to give feedback on a query run by another user.") + + query_event.feedback = feedback + + db_session.commit() + + +def create_doc_retrieval_feedback( + qa_event_id: int, + document_id: str, + document_rank: int, + db_session: Session, + clicked: bool = False, + feedback: SearchFeedbackType | None = None, +) -> None: + if not clicked and feedback is None: + raise ValueError("No action taken, not valid feedback") + + # Ensure this query event is valid so we hit exception here + # instead of a more confusing foreign key issue + fetch_query_event_by_id(qa_event_id, db_session) + + doc_m = fetch_doc_m_by_id(document_id, db_session) + + DocumentRetrievalFeedback( + qa_event_id=qa_event_id, + document_id=document_id, + document_rank=document_rank, + clicked=clicked, + feedback=feedback, + ) + + if feedback is not None: + if feedback == SearchFeedbackType.ENDORSE: + doc_m.boost += 1 + elif feedback == SearchFeedbackType.REJECT: + doc_m.boost -= 1 + elif feedback == SearchFeedbackType.HIDE: + doc_m.hidden = True + elif feedback == SearchFeedbackType.UNHIDE: + doc_m.hidden = False + else: + raise ValueError("Unhandled document feedback type") + + # TODO UPDATE INDEX BOOST + + db_session.commit() diff --git a/backend/danswer/db/models.py b/backend/danswer/db/models.py index e8b42be56f7..a21bb36b1cd 100644 --- a/backend/danswer/db/models.py +++ b/backend/danswer/db/models.py @@ -24,7 +24,10 @@ from danswer.auth.schemas import UserRole from danswer.configs.constants import DocumentSource +from danswer.configs.constants import QAFeedbackType +from danswer.configs.constants import SearchFeedbackType from danswer.connectors.models import InputType +from danswer.search.models import SearchType class IndexingStatus(str, PyEnum): @@ -162,7 +165,7 @@ class Credential(Base): deletion_attempt: Mapped[Optional["DeletionAttempt"]] = relationship( "DeletionAttempt", back_populates="credential" ) - user: Mapped[User] = relationship("User", back_populates="credentials") + user: Mapped[User] | None = relationship("User", back_populates="credentials") class IndexAttempt(Base): @@ -258,17 +261,6 @@ class DeletionAttempt(Base): ) -class Document(Base): - """Represents a single documents from a source. This is used to store - document level metadata, but currently nothing is stored""" - - __tablename__ = "document" - - # this should correspond to the ID of the document (as is passed around - # in Danswer) - id: Mapped[str] = mapped_column(String, primary_key=True) - - class DocumentByConnectorCredentialPair(Base): """Represents an indexing of a document by a specific connector / credential pair""" @@ -289,3 +281,61 @@ class DocumentByConnectorCredentialPair(Base): credential: Mapped[Credential] = relationship( "Credential", back_populates="documents_by_credential" ) + + +class QueryEvent(Base): + __tablename__ = "query_event" + + id: Mapped[int] = mapped_column(primary_key=True) + query: Mapped[str] = mapped_column(String()) + # search_flow refers to user selection, None if user used auto + selected_search_flow: Mapped[SearchType | None] = mapped_column( + Enum(SearchType), nullable=True + ) + llm_answer: Mapped[str | None] = mapped_column(String(), default=None) + feedback: Mapped[QAFeedbackType | None] = mapped_column( + Enum(QAFeedbackType), nullable=True + ) + user_id: Mapped[UUID | None] = mapped_column(ForeignKey("user.id"), nullable=True) + time_created: Mapped[datetime.datetime] = mapped_column( + DateTime(timezone=True), + server_default=func.now(), + ) + + user: Mapped[User] | None = relationship("User", back_populates="query_event") + + +class DocumentRetrievalFeedback(Base): + __tablename__ = "document_retrieval_feedback" + + id: Mapped[int] = mapped_column(primary_key=True) + qa_event_id: Mapped[int] = mapped_column( + ForeignKey("query_event.id"), + ) + document_id: Mapped[str] = mapped_column( + ForeignKey("document.id"), + ) + # How high up this document is in the results, 1 for first + document_rank: Mapped[int] = mapped_column(Integer) + clicked: Mapped[bool] = mapped_column(Boolean, default=False) + feedback: Mapped[SearchFeedbackType | None] = mapped_column( + Enum(SearchFeedbackType), nullable=True + ) + + qa_event: Mapped[QueryEvent] = relationship( + "QueryEvent", back_populates="document_retrieval_feedback" + ) + + +class DocumentMetadata(Base): + __tablename__ = "document" + + # this should correspond to the ID of the document (as is passed around + # in Danswer) + id: Mapped[str] = mapped_column(String, primary_key=True) + # 0 for neutral, positive for mostly endorse, negative for mostly reject + boost: Mapped[int] = mapped_column(Integer, default=0) + hidden: Mapped[bool] = mapped_column(Boolean, default=False) + semantic_id: Mapped[str] = mapped_column(String) + # First Section's link + link: Mapped[str | None] = mapped_column(String, nullable=True) diff --git a/backend/danswer/direct_qa/answer_question.py b/backend/danswer/direct_qa/answer_question.py index 7af0b668828..5141919f38f 100644 --- a/backend/danswer/direct_qa/answer_question.py +++ b/backend/danswer/direct_qa/answer_question.py @@ -1,8 +1,11 @@ +from sqlalchemy.orm import Session + from danswer.chunking.models import InferenceChunk from danswer.configs.app_configs import DISABLE_GENERATIVE_AI from danswer.configs.app_configs import NUM_GENERATIVE_AI_INPUT_DOCS from danswer.configs.app_configs import QA_TIMEOUT from danswer.datastores.document_index import get_default_document_index +from danswer.db.feedback import create_query_event from danswer.db.models import User from danswer.direct_qa.exceptions import OpenAIKeyMissing from danswer.direct_qa.exceptions import UnknownModelError @@ -22,9 +25,10 @@ @log_function_time() -def answer_question( +def answer_qa_query( question: QuestionRequest, user: User | None, + db_session: Session, disable_generative_answer: bool = DISABLE_GENERATIVE_AI, answer_generation_timeout: int = QA_TIMEOUT, ) -> QAResponse: @@ -35,6 +39,14 @@ def answer_question( offset_count = question.offset if question.offset is not None else 0 logger.info(f"Received QA query: {query}") + query_event_id = create_query_event( + query=query, + selected_flow=SearchType.KEYWORD, + llm_answer=None, + user_id=user.id if user is not None else None, + db_session=db_session, + ) + predicted_search, predicted_flow = query_intent(query) if use_keyword is None: use_keyword = predicted_search == SearchType.KEYWORD @@ -57,6 +69,7 @@ def answer_question( lower_ranked_docs=None, predicted_flow=predicted_flow, predicted_search=predicted_search, + query_event_id=query_event_id, ) if disable_generative_answer: @@ -70,6 +83,7 @@ def answer_question( # to run QA over more documents predicted_flow=QueryFlow.SEARCH, predicted_search=predicted_search, + query_event_id=query_event_id, ) try: @@ -83,6 +97,7 @@ def answer_question( predicted_flow=predicted_flow, predicted_search=predicted_search, error_msg=str(e), + query_event_id=query_event_id, ) chunk_offset = offset_count * NUM_GENERATIVE_AI_INPUT_DOCS @@ -108,4 +123,5 @@ def answer_question( predicted_flow=predicted_flow, predicted_search=predicted_search, error_msg=error_msg, + query_event_id=query_event_id, ) diff --git a/backend/danswer/listeners/slack_listener.py b/backend/danswer/listeners/slack_listener.py index db2bd78d03a..f9ed5e21e9c 100644 --- a/backend/danswer/listeners/slack_listener.py +++ b/backend/danswer/listeners/slack_listener.py @@ -1,8 +1,6 @@ import logging import os -from collections.abc import Callable from collections.abc import MutableMapping -from functools import wraps from typing import Any from typing import cast @@ -11,6 +9,7 @@ from slack_sdk.socket_mode import SocketModeClient from slack_sdk.socket_mode.request import SocketModeRequest from slack_sdk.socket_mode.response import SocketModeResponse +from sqlalchemy.orm import Session from danswer.configs.app_configs import DANSWER_BOT_ANSWER_GENERATION_TIMEOUT from danswer.configs.app_configs import DANSWER_BOT_DISPLAY_ERROR_MSGS @@ -20,7 +19,8 @@ from danswer.configs.constants import DocumentSource from danswer.connectors.slack.utils import make_slack_api_rate_limited from danswer.connectors.slack.utils import UserIdReplacer -from danswer.direct_qa.answer_question import answer_question +from danswer.db.engine import get_sqlalchemy_engine +from danswer.direct_qa.answer_question import answer_qa_query from danswer.direct_qa.interfaces import DanswerQuote from danswer.server.models import QAResponse from danswer.server.models import QuestionRequest @@ -230,17 +230,19 @@ def process_slack_event(client: SocketModeClient, req: SocketModeRequest) -> Non logger=cast(logging.Logger, logger), ) def _get_answer(question: QuestionRequest) -> QAResponse: - answer = answer_question( - question=question, - user=None, - answer_generation_timeout=DANSWER_BOT_ANSWER_GENERATION_TIMEOUT, - ) - if not answer.error_msg: - return answer - else: - raise RuntimeError(answer.error_msg) + engine = get_sqlalchemy_engine() + with Session(engine, expire_on_commit=False) as db_session: + answer = answer_qa_query( + question=question, + user=None, + db_session=db_session, + answer_generation_timeout=DANSWER_BOT_ANSWER_GENERATION_TIMEOUT, + ) + if not answer.error_msg: + return answer + else: + raise RuntimeError(answer.error_msg) - answer = None try: answer = _get_answer( QuestionRequest( diff --git a/backend/danswer/server/manage.py b/backend/danswer/server/manage.py index a8054549e2d..799e5bdaad8 100644 --- a/backend/danswer/server/manage.py +++ b/backend/danswer/server/manage.py @@ -43,11 +43,8 @@ from danswer.db.connector_credential_pair import get_connector_credential_pairs from danswer.db.connector_credential_pair import remove_credential_from_connector from danswer.db.credentials import create_credential -from danswer.db.credentials import delete_credential from danswer.db.credentials import delete_google_drive_service_account_credentials from danswer.db.credentials import fetch_credential_by_id -from danswer.db.credentials import fetch_credentials -from danswer.db.credentials import update_credential from danswer.db.deletion_attempt import check_deletion_attempt_is_allowed from danswer.db.deletion_attempt import create_deletion_attempt from danswer.db.deletion_attempt import get_deletion_attempts @@ -68,7 +65,6 @@ from danswer.server.models import ConnectorCredentialPairIdentifier from danswer.server.models import ConnectorIndexingStatus from danswer.server.models import ConnectorSnapshot -from danswer.server.models import CredentialBase from danswer.server.models import CredentialSnapshot from danswer.server.models import DeletionAttemptSnapshot from danswer.server.models import FileUploadResponse diff --git a/backend/danswer/server/models.py b/backend/danswer/server/models.py index f79ce90728c..72802cbdbcd 100644 --- a/backend/danswer/server/models.py +++ b/backend/danswer/server/models.py @@ -11,6 +11,8 @@ from danswer.configs.app_configs import MASK_CREDENTIAL_PREFIX from danswer.configs.constants import DocumentSource +from danswer.configs.constants import QAFeedbackType +from danswer.configs.constants import SearchFeedbackType from danswer.connectors.models import InputType from danswer.datastores.interfaces import IndexFilter from danswer.db.models import Connector @@ -121,10 +123,23 @@ class QuestionRequest(BaseModel): offset: int | None +class QAFeedbackRequest(BaseModel): + query_id: int + feedback: QAFeedbackType + + +class SearchFeedbackRequest(BaseModel): + document_id: str + document_rank: int + click: bool + search_feedback: SearchFeedbackType + + class SearchResponse(BaseModel): # For semantic search, top docs are reranked, the remaining are as ordered from retrieval top_ranked_docs: list[SearchDoc] | None lower_ranked_docs: list[SearchDoc] | None + query_event_id: int class QAResponse(SearchResponse): diff --git a/backend/danswer/server/search_backend.py b/backend/danswer/server/search_backend.py index 1e4cc47f3eb..90257942683 100644 --- a/backend/danswer/server/search_backend.py +++ b/backend/danswer/server/search_backend.py @@ -5,17 +5,22 @@ from fastapi import APIRouter from fastapi import Depends from fastapi.responses import StreamingResponse +from sqlalchemy.orm import Session from danswer.auth.users import current_user from danswer.chunking.models import InferenceChunk from danswer.configs.app_configs import DISABLE_GENERATIVE_AI from danswer.configs.app_configs import NUM_GENERATIVE_AI_INPUT_DOCS from danswer.datastores.document_index import get_default_document_index +from danswer.db.engine import get_session +from danswer.db.feedback import create_query_event +from danswer.db.feedback import update_query_event_feedback from danswer.db.models import User -from danswer.direct_qa.answer_question import answer_question +from danswer.direct_qa.answer_question import answer_qa_query from danswer.direct_qa.exceptions import OpenAIKeyMissing from danswer.direct_qa.exceptions import UnknownModelError from danswer.direct_qa.llm_utils import get_default_qa_model +from danswer.direct_qa.interfaces import DanswerAnswerPiece from danswer.search.danswer_helper import query_intent from danswer.search.danswer_helper import recommend_search_flow from danswer.search.keyword_search import retrieve_keyword_documents @@ -24,8 +29,10 @@ from danswer.search.semantic_search import chunks_to_search_docs from danswer.search.semantic_search import retrieve_ranked_documents from danswer.server.models import HelperResponse +from danswer.server.models import QAFeedbackRequest from danswer.server.models import QAResponse from danswer.server.models import QuestionRequest +from danswer.server.models import SearchFeedbackRequest from danswer.server.models import SearchResponse from danswer.utils.logger import setup_logger from danswer.utils.timing import log_generator_function_time @@ -50,62 +57,97 @@ def get_search_type( @router.post("/semantic-search") def semantic_search( - question: QuestionRequest, user: User = Depends(current_user) + question: QuestionRequest, + user: User = Depends(current_user), + db_session: Session = Depends(get_session), ) -> SearchResponse: query = question.query collection = question.collection filters = question.filters logger.info(f"Received semantic search query: {query}") + query_event_id = create_query_event( + query=query, + selected_flow=SearchType.KEYWORD, + llm_answer=None, + user_id=user.id, + db_session=db_session, + ) + user_id = None if user is None else user.id ranked_chunks, unranked_chunks = retrieve_ranked_documents( query, user_id, filters, get_default_document_index(collection=collection) ) if not ranked_chunks: - return SearchResponse(top_ranked_docs=None, lower_ranked_docs=None) + return SearchResponse( + top_ranked_docs=None, lower_ranked_docs=None, query_event_id=query_event_id + ) top_docs = chunks_to_search_docs(ranked_chunks) other_top_docs = chunks_to_search_docs(unranked_chunks) - return SearchResponse(top_ranked_docs=top_docs, lower_ranked_docs=other_top_docs) + return SearchResponse( + top_ranked_docs=top_docs, + lower_ranked_docs=other_top_docs, + query_event_id=query_event_id, + ) @router.post("/keyword-search") def keyword_search( - question: QuestionRequest, user: User = Depends(current_user) + question: QuestionRequest, + user: User = Depends(current_user), + db_session: Session = Depends(get_session), ) -> SearchResponse: query = question.query collection = question.collection filters = question.filters logger.info(f"Received keyword search query: {query}") + query_event_id = create_query_event( + query=query, + selected_flow=SearchType.KEYWORD, + llm_answer=None, + user_id=user.id, + db_session=db_session, + ) + user_id = None if user is None else user.id ranked_chunks = retrieve_keyword_documents( query, user_id, filters, get_default_document_index(collection=collection) ) if not ranked_chunks: - return SearchResponse(top_ranked_docs=None, lower_ranked_docs=None) + return SearchResponse( + top_ranked_docs=None, lower_ranked_docs=None, query_event_id=query_event_id + ) top_docs = chunks_to_search_docs(ranked_chunks) - return SearchResponse(top_ranked_docs=top_docs, lower_ranked_docs=None) + return SearchResponse( + top_ranked_docs=top_docs, lower_ranked_docs=None, query_event_id=query_event_id + ) @router.post("/direct-qa") def direct_qa( - question: QuestionRequest, user: User = Depends(current_user) + question: QuestionRequest, + user: User | None = Depends(current_user), + db_session: Session = Depends(get_session), ) -> QAResponse: - return answer_question(question=question, user=user) + return answer_qa_query(question=question, user=user, db_session=db_session) @router.post("/stream-direct-qa") def stream_direct_qa( - question: QuestionRequest, user: User = Depends(current_user) + question: QuestionRequest, + user: User | None = Depends(current_user), + db_session: Session = Depends(get_session), ) -> StreamingResponse: send_packet_debug_msg = "Sending Packet: {}" top_documents_key = "top_documents" unranked_top_docs_key = "unranked_top_documents" predicted_flow_key = "predicted_flow" predicted_search_key = "predicted_search" + query_event_id_key = "query_event_id" logger.debug(f"Received QA query: {question.query}") logger.debug(f"Query filters: {question.filters}") @@ -116,6 +158,7 @@ def stream_direct_qa( def stream_qa_portions( disable_generative_answer: bool = DISABLE_GENERATIVE_AI, ) -> Generator[str, None, None]: + answer_so_far: str = "" query = question.query collection = question.collection filters = question.filters @@ -194,6 +237,11 @@ def stream_qa_portions( ): if response_packet is None: continue + if ( + isinstance(response_packet, DanswerAnswerPiece) + and response_packet.answer_piece + ): + answer_so_far = answer_so_far + response_packet.answer_piece logger.debug(f"Sending packet: {response_packet}") yield get_json_line(asdict(response_packet)) except Exception as e: @@ -201,6 +249,39 @@ def stream_qa_portions( yield get_json_line({"error": str(e)}) logger.exception("Failed to run QA") + query_event_id = create_query_event( + query=query, + selected_flow=SearchType.KEYWORD if use_keyword else SearchType.SEMANTIC, + llm_answer=answer_so_far, + user_id=user_id, + db_session=db_session, + ) + + yield get_json_line({query_event_id_key: query_event_id}) + return return StreamingResponse(stream_qa_portions(), media_type="application/json") + + +@router.post("/query-feedback") +def process_query_feedback( + feedback: QAFeedbackRequest, + user: User | None = Depends(current_user), + db_session: Session = Depends(get_session), +) -> None: + update_query_event_feedback( + feedback=feedback.feedback, + query_id=feedback.query_id, + user_id=user.id if user is not None else None, + db_session=db_session, + ) + + +@router.post("/doc-retrieval-feedback") +def process_doc_retrieval_feedback( + feedback: SearchFeedbackRequest, + user: User | None = Depends(current_user), + db_session: Session = Depends(get_session), +) -> None: + pass From 309b85a135a9273a8eb087e7bc5bbc9d8bba7677 Mon Sep 17 00:00:00 2001 From: Yuhong Sun Date: Sun, 27 Aug 2023 12:50:13 -0700 Subject: [PATCH 02/14] no migrations yet --- backend/danswer/datastores/datastore_utils.py | 9 ++++++++ backend/danswer/datastores/interfaces.py | 2 +- backend/danswer/datastores/vespa/store.py | 2 +- backend/danswer/db/feedback.py | 20 +++++++++++++---- backend/danswer/direct_qa/answer_question.py | 5 ++--- backend/danswer/server/models.py | 1 + backend/danswer/server/search_backend.py | 22 ++++++++++++------- 7 files changed, 44 insertions(+), 17 deletions(-) diff --git a/backend/danswer/datastores/datastore_utils.py b/backend/danswer/datastores/datastore_utils.py index 9b8a4aab252..b1723f5c78c 100644 --- a/backend/danswer/datastores/datastore_utils.py +++ b/backend/danswer/datastores/datastore_utils.py @@ -12,6 +12,15 @@ DEFAULT_BATCH_SIZE = 30 +BOOST_MULTIPLIER = 1.2 + + +def translate_boost_count_to_multiplier(boost: int) -> float: + if boost > 0: + return BOOST_MULTIPLIER**boost + elif boost < 0: + return 1 / (BOOST_MULTIPLIER**boost) + return 1 def get_uuid_from_chunk( diff --git a/backend/danswer/datastores/interfaces.py b/backend/danswer/datastores/interfaces.py index 0ccddc81a84..f8a5f935c78 100644 --- a/backend/danswer/datastores/interfaces.py +++ b/backend/danswer/datastores/interfaces.py @@ -32,7 +32,7 @@ class UpdateRequest: document_ids: list[str] # all other fields will be left alone allowed_users: list[str] | None = None - boost: int | None = None + boost: float | None = None class Verifiable(abc.ABC): diff --git a/backend/danswer/datastores/vespa/store.py b/backend/danswer/datastores/vespa/store.py index dba887951f5..464578233af 100644 --- a/backend/danswer/datastores/vespa/store.py +++ b/backend/danswer/datastores/vespa/store.py @@ -342,7 +342,7 @@ def update(self, update_requests: list[UpdateRequest]) -> None: logger.error("Update request received but nothing to update") continue - update_dict: dict[str, dict[str, list[str] | int]] = {"fields": {}} + update_dict: dict[str, dict[str, list[str] | int | float]] = {"fields": {}} if update_request.boost: update_dict["fields"][BOOST] = update_request.boost if update_request.allowed_users: diff --git a/backend/danswer/db/feedback.py b/backend/danswer/db/feedback.py index ff1163b6088..30c66900ebf 100644 --- a/backend/danswer/db/feedback.py +++ b/backend/danswer/db/feedback.py @@ -5,6 +5,9 @@ from danswer.configs.constants import QAFeedbackType from danswer.configs.constants import SearchFeedbackType +from danswer.datastores.datastore_utils import translate_boost_count_to_multiplier +from danswer.datastores.document_index import get_default_document_index +from danswer.datastores.interfaces import UpdateRequest from danswer.db.models import DocumentMetadata from danswer.db.models import DocumentRetrievalFeedback from danswer.db.models import QueryEvent @@ -92,6 +95,7 @@ def create_doc_retrieval_feedback( qa_event_id: int, document_id: str, document_rank: int, + user_id: UUID | None, db_session: Session, clicked: bool = False, feedback: SearchFeedbackType | None = None, @@ -99,9 +103,10 @@ def create_doc_retrieval_feedback( if not clicked and feedback is None: raise ValueError("No action taken, not valid feedback") - # Ensure this query event is valid so we hit exception here - # instead of a more confusing foreign key issue - fetch_query_event_by_id(qa_event_id, db_session) + query_event = fetch_query_event_by_id(qa_event_id, db_session) + + if user_id != query_event.user_id: + raise ValueError("User trying to give feedback on a query run by another user.") doc_m = fetch_doc_m_by_id(document_id, db_session) @@ -125,6 +130,13 @@ def create_doc_retrieval_feedback( else: raise ValueError("Unhandled document feedback type") - # TODO UPDATE INDEX BOOST + if feedback in [SearchFeedbackType.ENDORSE, SearchFeedbackType.REJECT]: + document_index = get_default_document_index() + update = UpdateRequest( + document_ids=[document_id], + boost=translate_boost_count_to_multiplier(doc_m.boost), + ) + # Updates are generally batched for efficiency, this case only 1 doc/value is updated + document_index.update([update]) db_session.commit() diff --git a/backend/danswer/direct_qa/answer_question.py b/backend/danswer/direct_qa/answer_question.py index 5141919f38f..d810f323abf 100644 --- a/backend/danswer/direct_qa/answer_question.py +++ b/backend/danswer/direct_qa/answer_question.py @@ -33,7 +33,6 @@ def answer_qa_query( answer_generation_timeout: int = QA_TIMEOUT, ) -> QAResponse: query = question.query - collection = question.collection filters = question.filters use_keyword = question.use_keyword offset_count = question.offset if question.offset is not None else 0 @@ -54,12 +53,12 @@ def answer_qa_query( user_id = None if user is None else user.id if use_keyword: ranked_chunks: list[InferenceChunk] | None = retrieve_keyword_documents( - query, user_id, filters, get_default_document_index(collection=collection) + query, user_id, filters, get_default_document_index() ) unranked_chunks: list[InferenceChunk] | None = [] else: ranked_chunks, unranked_chunks = retrieve_ranked_documents( - query, user_id, filters, get_default_document_index(collection=collection) + query, user_id, filters, get_default_document_index() ) if not ranked_chunks: return QAResponse( diff --git a/backend/danswer/server/models.py b/backend/danswer/server/models.py index 72802cbdbcd..b189265ea93 100644 --- a/backend/danswer/server/models.py +++ b/backend/danswer/server/models.py @@ -129,6 +129,7 @@ class QAFeedbackRequest(BaseModel): class SearchFeedbackRequest(BaseModel): + query_id: int document_id: str document_rank: int click: bool diff --git a/backend/danswer/server/search_backend.py b/backend/danswer/server/search_backend.py index 90257942683..37940991b4e 100644 --- a/backend/danswer/server/search_backend.py +++ b/backend/danswer/server/search_backend.py @@ -13,6 +13,7 @@ from danswer.configs.app_configs import NUM_GENERATIVE_AI_INPUT_DOCS from danswer.datastores.document_index import get_default_document_index from danswer.db.engine import get_session +from danswer.db.feedback import create_doc_retrieval_feedback from danswer.db.feedback import create_query_event from danswer.db.feedback import update_query_event_feedback from danswer.db.models import User @@ -62,7 +63,6 @@ def semantic_search( db_session: Session = Depends(get_session), ) -> SearchResponse: query = question.query - collection = question.collection filters = question.filters logger.info(f"Received semantic search query: {query}") @@ -76,7 +76,7 @@ def semantic_search( user_id = None if user is None else user.id ranked_chunks, unranked_chunks = retrieve_ranked_documents( - query, user_id, filters, get_default_document_index(collection=collection) + query, user_id, filters, get_default_document_index() ) if not ranked_chunks: return SearchResponse( @@ -100,7 +100,6 @@ def keyword_search( db_session: Session = Depends(get_session), ) -> SearchResponse: query = question.query - collection = question.collection filters = question.filters logger.info(f"Received keyword search query: {query}") @@ -114,7 +113,7 @@ def keyword_search( user_id = None if user is None else user.id ranked_chunks = retrieve_keyword_documents( - query, user_id, filters, get_default_document_index(collection=collection) + query, user_id, filters, get_default_document_index() ) if not ranked_chunks: return SearchResponse( @@ -160,7 +159,6 @@ def stream_qa_portions( ) -> Generator[str, None, None]: answer_so_far: str = "" query = question.query - collection = question.collection filters = question.filters use_keyword = question.use_keyword offset_count = question.offset if question.offset is not None else 0 @@ -175,7 +173,7 @@ def stream_qa_portions( query, user_id, filters, - get_default_document_index(collection=collection), + get_default_document_index(), ) unranked_chunks: list[InferenceChunk] | None = [] else: @@ -183,7 +181,7 @@ def stream_qa_portions( query, user_id, filters, - get_default_document_index(collection=collection), + get_default_document_index(), ) if not ranked_chunks: logger.debug("No Documents Found") @@ -284,4 +282,12 @@ def process_doc_retrieval_feedback( user: User | None = Depends(current_user), db_session: Session = Depends(get_session), ) -> None: - pass + create_doc_retrieval_feedback( + qa_event_id=feedback.query_id, + document_id=feedback.document_id, + document_rank=feedback.document_rank, + clicked=feedback.click, + feedback=feedback.search_feedback, + user_id=user.id if user is not None else None, + db_session=db_session, + ) From 458d4db2b89c3bd245d11bd34e954f73af508258 Mon Sep 17 00:00:00 2001 From: Yuhong Sun Date: Sun, 27 Aug 2023 12:59:37 -0700 Subject: [PATCH 03/14] important comment --- backend/danswer/db/models.py | 1 + 1 file changed, 1 insertion(+) diff --git a/backend/danswer/db/models.py b/backend/danswer/db/models.py index a21bb36b1cd..a01fb27d102 100644 --- a/backend/danswer/db/models.py +++ b/backend/danswer/db/models.py @@ -339,3 +339,4 @@ class DocumentMetadata(Base): semantic_id: Mapped[str] = mapped_column(String) # First Section's link link: Mapped[str | None] = mapped_column(String, nullable=True) + # TODO if more sensitive data is added here for display, make sure to add user/group permission From 36671bef0c5043b1be0131f67041526cfe2682d5 Mon Sep 17 00:00:00 2001 From: Yuhong Sun Date: Sun, 27 Aug 2023 13:44:57 -0700 Subject: [PATCH 04/14] add migrations --- .../versions/d929f0c1c6af_feedback_feature.py | 93 +++++++++++++++++++ backend/danswer/db/models.py | 6 +- 2 files changed, 97 insertions(+), 2 deletions(-) create mode 100644 backend/alembic/versions/d929f0c1c6af_feedback_feature.py diff --git a/backend/alembic/versions/d929f0c1c6af_feedback_feature.py b/backend/alembic/versions/d929f0c1c6af_feedback_feature.py new file mode 100644 index 00000000000..985880e4025 --- /dev/null +++ b/backend/alembic/versions/d929f0c1c6af_feedback_feature.py @@ -0,0 +1,93 @@ +"""Feedback Feature + +Revision ID: d929f0c1c6af +Revises: 8aabb57f3b49 +Create Date: 2023-08-27 13:03:54.274987 + +""" +import fastapi_users_db_sqlalchemy +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = "d929f0c1c6af" +down_revision = "8aabb57f3b49" +branch_labels = None +depends_on = None + + +def upgrade() -> None: + op.create_table( + "query_event", + sa.Column("id", sa.Integer(), nullable=False), + sa.Column("query", sa.String(), nullable=False), + sa.Column( + "selected_search_flow", + sa.Enum("KEYWORD", "SEMANTIC", name="searchtype"), + nullable=True, + ), + sa.Column("llm_answer", sa.String(), nullable=True), + sa.Column( + "feedback", + sa.Enum("LIKE", "DISLIKE", name="qafeedbacktype"), + nullable=True, + ), + sa.Column( + "user_id", + fastapi_users_db_sqlalchemy.generics.GUID(), + nullable=True, + ), + sa.Column( + "time_created", + sa.DateTime(timezone=True), + server_default=sa.text("now()"), + nullable=False, + ), + sa.ForeignKeyConstraint( + ["user_id"], + ["user.id"], + ), + sa.PrimaryKeyConstraint("id"), + ) + op.create_table( + "document_retrieval_feedback", + sa.Column("id", sa.Integer(), nullable=False), + sa.Column("qa_event_id", sa.Integer(), nullable=False), + sa.Column("document_id", sa.String(), nullable=False), + sa.Column("document_rank", sa.Integer(), nullable=False), + sa.Column("clicked", sa.Boolean(), nullable=False), + sa.Column( + "feedback", + sa.Enum( + "ENDORSE", + "REJECT", + "HIDE", + "UNHIDE", + name="searchfeedbacktype", + ), + nullable=True, + ), + sa.ForeignKeyConstraint( + ["document_id"], + ["document.id"], + ), + sa.ForeignKeyConstraint( + ["qa_event_id"], + ["query_event.id"], + ), + sa.PrimaryKeyConstraint("id"), + ) + op.add_column("document", sa.Column("boost", sa.Integer(), nullable=False)) + op.add_column("document", sa.Column("hidden", sa.Boolean(), nullable=False)) + op.add_column("document", sa.Column("semantic_id", sa.String(), nullable=False)) + op.add_column("document", sa.Column("link", sa.String(), nullable=True)) + + +def downgrade() -> None: + op.drop_column("document", "link") + op.drop_column("document", "semantic_id") + op.drop_column("document", "hidden") + op.drop_column("document", "boost") + op.drop_table("document_retrieval_feedback") + op.drop_table("query_event") diff --git a/backend/danswer/db/models.py b/backend/danswer/db/models.py index a01fb27d102..16ba9404a28 100644 --- a/backend/danswer/db/models.py +++ b/backend/danswer/db/models.py @@ -165,7 +165,7 @@ class Credential(Base): deletion_attempt: Mapped[Optional["DeletionAttempt"]] = relationship( "DeletionAttempt", back_populates="credential" ) - user: Mapped[User] | None = relationship("User", back_populates="credentials") + user: Mapped[User | None] = relationship("User", back_populates="credentials") class IndexAttempt(Base): @@ -302,7 +302,7 @@ class QueryEvent(Base): server_default=func.now(), ) - user: Mapped[User] | None = relationship("User", back_populates="query_event") + user: Mapped[User | None] = relationship("User", back_populates="query_event") class DocumentRetrievalFeedback(Base): @@ -328,6 +328,8 @@ class DocumentRetrievalFeedback(Base): class DocumentMetadata(Base): + """Maps semantically to a "Document", named this way to disambiguate in code""" + __tablename__ = "document" # this should correspond to the ID of the document (as is passed around From 3f9a3106b8ddc7549bdc90cfac67785ccd4b6fbc Mon Sep 17 00:00:00 2001 From: Yuhong Sun Date: Sun, 27 Aug 2023 15:26:21 -0700 Subject: [PATCH 05/14] test time --- backend/danswer/db/feedback.py | 17 +++++++++++++++++ backend/danswer/server/manage.py | 14 +++++++++++++- backend/danswer/server/models.py | 5 +++++ 3 files changed, 35 insertions(+), 1 deletion(-) diff --git a/backend/danswer/db/feedback.py b/backend/danswer/db/feedback.py index 30c66900ebf..12fc13ccc4f 100644 --- a/backend/danswer/db/feedback.py +++ b/backend/danswer/db/feedback.py @@ -1,5 +1,7 @@ from uuid import UUID +from sqlalchemy import asc +from sqlalchemy import desc from sqlalchemy import select from sqlalchemy.orm import Session @@ -36,6 +38,21 @@ def fetch_doc_m_by_id(doc_id: str, db_session: Session) -> DocumentMetadata: return doc_m +def fetch_docs_ranked_by_boost( + db_session: Session, ascending: bool = False, limit: int = 100 +) -> list[DocumentMetadata]: + order_func = asc if ascending else desc + stmt = ( + select(DocumentMetadata) + .order_by(order_func(DocumentMetadata.boost)) + .limit(limit) + ) + result = db_session.execute(stmt) + doc_m_list = result.scalars().all() + + return list(doc_m_list) + + def create_document_metadata( doc_id: str, semantic_id: str, diff --git a/backend/danswer/server/manage.py b/backend/danswer/server/manage.py index 799e5bdaad8..c84b0840dd3 100644 --- a/backend/danswer/server/manage.py +++ b/backend/danswer/server/manage.py @@ -49,6 +49,7 @@ from danswer.db.deletion_attempt import create_deletion_attempt from danswer.db.deletion_attempt import get_deletion_attempts from danswer.db.engine import get_session +from danswer.db.feedback import fetch_docs_ranked_by_boost from danswer.db.index_attempt import create_index_attempt from danswer.db.index_attempt import get_latest_index_attempts from danswer.db.models import DeletionAttempt @@ -69,6 +70,7 @@ from danswer.server.models import DeletionAttemptSnapshot from danswer.server.models import FileUploadResponse from danswer.server.models import GDriveCallback +from danswer.server.models import GetBoostedDocsRequest from danswer.server.models import GoogleAppCredentials from danswer.server.models import GoogleServiceAccountCredentialRequest from danswer.server.models import GoogleServiceAccountKey @@ -79,7 +81,6 @@ from danswer.server.models import UserRoleResponse from danswer.utils.logger import setup_logger - router = APIRouter(prefix="/manage") logger = setup_logger() @@ -89,6 +90,17 @@ """Admin only API endpoints""" +@router.get("/doc-boosts") +def get_most_boosted_docs( + doc_options: GetBoostedDocsRequest, + _: User | None = Depends(current_admin_user), + db_session: Session = Depends(get_session), +) -> list[DocumentMetadata]: + return fetch_docs_ranked_by_boost( + ascending=doc_options.ascending, limit=doc_options.limit, db_session=db_session + ) + + @router.get("/admin/connector/google-drive/app-credential") def check_google_app_credentials_exist( _: User = Depends(current_admin_user), diff --git a/backend/danswer/server/models.py b/backend/danswer/server/models.py index b189265ea93..0edee1a38bc 100644 --- a/backend/danswer/server/models.py +++ b/backend/danswer/server/models.py @@ -123,6 +123,11 @@ class QuestionRequest(BaseModel): offset: int | None +class GetBoostedDocsRequest(BaseModel): + ascending: bool + limit: int + + class QAFeedbackRequest(BaseModel): query_id: int feedback: QAFeedbackType From 934d196badef3efc31c94c114bdcb84ab1417160 Mon Sep 17 00:00:00 2001 From: Yuhong Sun Date: Sun, 27 Aug 2023 16:11:16 -0700 Subject: [PATCH 06/14] test more --- backend/danswer/db/document.py | 28 ++++++++++++++-------------- backend/danswer/db/feedback.py | 16 ++++++---------- backend/danswer/db/models.py | 4 +--- backend/danswer/server/manage.py | 15 +++++++++++++-- backend/danswer/server/models.py | 8 ++++++++ 5 files changed, 42 insertions(+), 29 deletions(-) diff --git a/backend/danswer/db/document.py b/backend/danswer/db/document.py index c35db0c8682..bf4d237d515 100644 --- a/backend/danswer/db/document.py +++ b/backend/danswer/db/document.py @@ -8,7 +8,7 @@ from sqlalchemy.orm import Session from danswer.datastores.interfaces import DocumentMetadata -from danswer.db.models import Document +from danswer.db.models import Document as DbDocument from danswer.db.models import DocumentByConnectorCredentialPair from danswer.db.utils import model_to_dict from danswer.utils.logger import setup_logger @@ -20,7 +20,7 @@ def get_documents_with_single_connector_credential_pair( db_session: Session, connector_id: int, credential_id: int, -) -> Sequence[Document]: +) -> Sequence[DbDocument]: initial_doc_ids_stmt = select(DocumentByConnectorCredentialPair.id).where( and_( DocumentByConnectorCredentialPair.connector_id == connector_id, @@ -31,17 +31,17 @@ def get_documents_with_single_connector_credential_pair( # Filter it down to the documents with only a single connector/credential pair # Meaning if this connector/credential pair is removed, this doc should be gone trimmed_doc_ids_stmt = ( - select(Document.id) + select(DbDocument.id) .join( DocumentByConnectorCredentialPair, - DocumentByConnectorCredentialPair.id == Document.id, + DocumentByConnectorCredentialPair.id == DbDocument.id, ) - .where(Document.id.in_(initial_doc_ids_stmt)) - .group_by(Document.id) + .where(DbDocument.id.in_(initial_doc_ids_stmt)) + .group_by(DbDocument.id) .having(func.count(DocumentByConnectorCredentialPair.id) == 1) ) - stmt = select(Document).where(Document.id.in_(trimmed_doc_ids_stmt)) + stmt = select(DbDocument).where(DbDocument.id.in_(trimmed_doc_ids_stmt)) return db_session.scalars(stmt).all() @@ -60,13 +60,13 @@ def get_document_by_connector_credential_pairs_indexed_by_multiple( # Filter it down to the documents with more than 1 connector/credential pair # Meaning if this connector/credential pair is removed, this doc is still accessible trimmed_doc_ids_stmt = ( - select(Document.id) + select(DbDocument.id) .join( DocumentByConnectorCredentialPair, - DocumentByConnectorCredentialPair.id == Document.id, + DocumentByConnectorCredentialPair.id == DbDocument.id, ) - .where(Document.id.in_(initial_doc_ids_stmt)) - .group_by(Document.id) + .where(DbDocument.id.in_(initial_doc_ids_stmt)) + .group_by(DbDocument.id) .having(func.count(DocumentByConnectorCredentialPair.id) > 1) ) @@ -86,8 +86,8 @@ def upsert_documents( if document_metadata.document_id not in seen_document_ids: seen_document_ids.add(document_metadata.document_id) - insert_stmt = insert(Document).values( - [model_to_dict(Document(id=doc_id)) for doc_id in seen_document_ids] + insert_stmt = insert(DbDocument).values( + [model_to_dict(DbDocument(id=doc_id)) for doc_id in seen_document_ids] ) # for now, there are no columns to update. If more metadata is added, then this # needs to change to an `on_conflict_do_update` @@ -140,7 +140,7 @@ def delete_document_by_connector_credential_pair( def delete_documents(db_session: Session, document_ids: list[str]) -> None: - db_session.execute(delete(Document).where(Document.id.in_(document_ids))) + db_session.execute(delete(DbDocument).where(DbDocument.id.in_(document_ids))) def delete_documents_complete(db_session: Session, document_ids: list[str]) -> None: diff --git a/backend/danswer/db/feedback.py b/backend/danswer/db/feedback.py index 12fc13ccc4f..92c1534cba5 100644 --- a/backend/danswer/db/feedback.py +++ b/backend/danswer/db/feedback.py @@ -10,7 +10,7 @@ from danswer.datastores.datastore_utils import translate_boost_count_to_multiplier from danswer.datastores.document_index import get_default_document_index from danswer.datastores.interfaces import UpdateRequest -from danswer.db.models import DocumentMetadata +from danswer.db.models import Document as DbDocument from danswer.db.models import DocumentRetrievalFeedback from danswer.db.models import QueryEvent from danswer.search.models import SearchType @@ -27,8 +27,8 @@ def fetch_query_event_by_id(query_id: int, db_session: Session) -> QueryEvent: return query_event -def fetch_doc_m_by_id(doc_id: str, db_session: Session) -> DocumentMetadata: - stmt = select(DocumentMetadata).where(DocumentMetadata.id == doc_id) +def fetch_doc_m_by_id(doc_id: str, db_session: Session) -> DbDocument: + stmt = select(DbDocument).where(DbDocument.id == doc_id) result = db_session.execute(stmt) doc_m = result.scalar_one_or_none() @@ -40,13 +40,9 @@ def fetch_doc_m_by_id(doc_id: str, db_session: Session) -> DocumentMetadata: def fetch_docs_ranked_by_boost( db_session: Session, ascending: bool = False, limit: int = 100 -) -> list[DocumentMetadata]: +) -> list[DbDocument]: order_func = asc if ascending else desc - stmt = ( - select(DocumentMetadata) - .order_by(order_func(DocumentMetadata.boost)) - .limit(limit) - ) + stmt = select(DbDocument).order_by(order_func(DbDocument.boost)).limit(limit) result = db_session.execute(stmt) doc_m_list = result.scalars().all() @@ -66,7 +62,7 @@ def create_document_metadata( # Document already exists, don't reset its data pass - DocumentMetadata( + DbDocument( id=doc_id, semantic_id=semantic_id, link=link, diff --git a/backend/danswer/db/models.py b/backend/danswer/db/models.py index 16ba9404a28..9a9f5dffcda 100644 --- a/backend/danswer/db/models.py +++ b/backend/danswer/db/models.py @@ -327,9 +327,7 @@ class DocumentRetrievalFeedback(Base): ) -class DocumentMetadata(Base): - """Maps semantically to a "Document", named this way to disambiguate in code""" - +class Document(Base): __tablename__ = "document" # this should correspond to the ID of the document (as is passed around diff --git a/backend/danswer/server/manage.py b/backend/danswer/server/manage.py index c84b0840dd3..30dae9db635 100644 --- a/backend/danswer/server/manage.py +++ b/backend/danswer/server/manage.py @@ -62,6 +62,7 @@ from danswer.server.models import ApiKey from danswer.server.models import AuthStatus from danswer.server.models import AuthUrl +from danswer.server.models import BoostDoc from danswer.server.models import ConnectorBase from danswer.server.models import ConnectorCredentialPairIdentifier from danswer.server.models import ConnectorIndexingStatus @@ -95,10 +96,20 @@ def get_most_boosted_docs( doc_options: GetBoostedDocsRequest, _: User | None = Depends(current_admin_user), db_session: Session = Depends(get_session), -) -> list[DocumentMetadata]: - return fetch_docs_ranked_by_boost( +) -> list[BoostDoc]: + boost_docs = fetch_docs_ranked_by_boost( ascending=doc_options.ascending, limit=doc_options.limit, db_session=db_session ) + return [ + BoostDoc( + document_id=doc.id, + semantic_id=doc.semantic_id, + link=doc.link or "", + boost=doc.boost, + hidden=doc.hidden, + ) + for doc in boost_docs + ] @router.get("/admin/connector/google-drive/app-credential") diff --git a/backend/danswer/server/models.py b/backend/danswer/server/models.py index 0edee1a38bc..1a80f1689e5 100644 --- a/backend/danswer/server/models.py +++ b/backend/danswer/server/models.py @@ -107,6 +107,14 @@ class UserRoleResponse(BaseModel): role: str +class BoostDoc(BaseModel): + document_id: str + semantic_id: str + link: str + boost: int + hidden: bool + + class SearchDoc(BaseModel): document_id: str semantic_identifier: str From e0000e51d1485c7b19c74a969212a309f04a9d40 Mon Sep 17 00:00:00 2001 From: Yuhong Sun Date: Sun, 27 Aug 2023 16:47:14 -0700 Subject: [PATCH 07/14] fix db models --- backend/danswer/db/models.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/backend/danswer/db/models.py b/backend/danswer/db/models.py index 9a9f5dffcda..2efdba540c6 100644 --- a/backend/danswer/db/models.py +++ b/backend/danswer/db/models.py @@ -64,6 +64,9 @@ class User(SQLAlchemyBaseUserTableUUID, Base): credentials: Mapped[List["Credential"]] = relationship( "Credential", back_populates="user", lazy="joined" ) + query_events: Mapped[List["QueryEvent"]] = relationship( + "QueryEvent", back_populates="user" + ) class AccessToken(SQLAlchemyBaseAccessTokenTableUUID, Base): @@ -302,7 +305,10 @@ class QueryEvent(Base): server_default=func.now(), ) - user: Mapped[User | None] = relationship("User", back_populates="query_event") + user: Mapped[User | None] = relationship("User", back_populates="query_events") + document_feedbacks: Mapped[List["DocumentRetrievalFeedback"]] = relationship( + "DocumentRetrievalFeedback", back_populates="qa_event" + ) class DocumentRetrievalFeedback(Base): @@ -323,7 +329,10 @@ class DocumentRetrievalFeedback(Base): ) qa_event: Mapped[QueryEvent] = relationship( - "QueryEvent", back_populates="document_retrieval_feedback" + "QueryEvent", back_populates="document_feedbacks" + ) + document: Mapped["Document"] = relationship( + "Document", back_populates="retrieval_feedbacks" ) @@ -340,3 +349,7 @@ class Document(Base): # First Section's link link: Mapped[str | None] = mapped_column(String, nullable=True) # TODO if more sensitive data is added here for display, make sure to add user/group permission + + retrieval_feedbacks: Mapped[List[DocumentRetrievalFeedback]] = relationship( + "DocumentRetrievalFeedback", back_populates="document" + ) From c5fecffd9c5b28e7ba84c5de0f5ebd40d58544d5 Mon Sep 17 00:00:00 2001 From: Weves Date: Sun, 27 Aug 2023 18:11:30 -0700 Subject: [PATCH 08/14] Initial FE + making the endpoint use query params --- backend/danswer/server/manage.py | 8 +-- backend/danswer/server/models.py | 5 -- web/src/app/admin/documents/feedback/page.tsx | 71 +++++++++++++++++++ web/src/app/admin/layout.tsx | 15 ++++ web/src/components/icons/icons.tsx | 8 +++ web/src/lib/hooks.ts | 22 +++++- web/src/lib/types.ts | 8 +++ 7 files changed, 126 insertions(+), 11 deletions(-) create mode 100644 web/src/app/admin/documents/feedback/page.tsx diff --git a/backend/danswer/server/manage.py b/backend/danswer/server/manage.py index 30dae9db635..a4d89708367 100644 --- a/backend/danswer/server/manage.py +++ b/backend/danswer/server/manage.py @@ -71,7 +71,6 @@ from danswer.server.models import DeletionAttemptSnapshot from danswer.server.models import FileUploadResponse from danswer.server.models import GDriveCallback -from danswer.server.models import GetBoostedDocsRequest from danswer.server.models import GoogleAppCredentials from danswer.server.models import GoogleServiceAccountCredentialRequest from danswer.server.models import GoogleServiceAccountKey @@ -91,14 +90,15 @@ """Admin only API endpoints""" -@router.get("/doc-boosts") +@router.get("/admin/doc-boosts") def get_most_boosted_docs( - doc_options: GetBoostedDocsRequest, + ascending: bool, + limit: int, _: User | None = Depends(current_admin_user), db_session: Session = Depends(get_session), ) -> list[BoostDoc]: boost_docs = fetch_docs_ranked_by_boost( - ascending=doc_options.ascending, limit=doc_options.limit, db_session=db_session + ascending=ascending, limit=limit, db_session=db_session ) return [ BoostDoc( diff --git a/backend/danswer/server/models.py b/backend/danswer/server/models.py index 1a80f1689e5..1052b7e2305 100644 --- a/backend/danswer/server/models.py +++ b/backend/danswer/server/models.py @@ -131,11 +131,6 @@ class QuestionRequest(BaseModel): offset: int | None -class GetBoostedDocsRequest(BaseModel): - ascending: bool - limit: int - - class QAFeedbackRequest(BaseModel): query_id: int feedback: QAFeedbackType diff --git a/web/src/app/admin/documents/feedback/page.tsx b/web/src/app/admin/documents/feedback/page.tsx new file mode 100644 index 00000000000..e17503c245f --- /dev/null +++ b/web/src/app/admin/documents/feedback/page.tsx @@ -0,0 +1,71 @@ +"use client"; + +import { Button } from "@/components/Button"; +import { LoadingAnimation } from "@/components/Loading"; +import { BasicTable } from "@/components/admin/connectors/BasicTable"; +import { usePopup } from "@/components/admin/connectors/Popup"; +import { ThumbsUpIcon, UsersIcon } from "@/components/icons/icons"; +import { fetcher } from "@/lib/fetcher"; +import { useMostReactedToDocuments } from "@/lib/hooks"; +import { User } from "@/lib/types"; +import useSWR, { mutate } from "swr"; + +const columns = [ + { + header: "Document Name", + key: "name", + }, + { + header: "Boost", + key: "boost", + }, + { + header: "Promote", + key: "promote", + }, +]; + +const DocumentFeedbackTable = () => { + const { popup, setPopup } = usePopup(); + + const { data, isLoading, error, refreshDocs } = useMostReactedToDocuments(); + + if (isLoading) { + return ; + } + + if (error || !data) { + return
Error loading users
; + } + + return ( +
+ {popup} + { + return { + name: documentBoostStatus.semantic_id, + boost: documentBoostStatus.boost, + promote: "hi" + }; + })} + /> +
+ ); +}; + +const Page = () => { + return ( +
+
+ +

Document Feedback

+
+ + +
+ ); +}; + +export default Page; diff --git a/web/src/app/admin/layout.tsx b/web/src/app/admin/layout.tsx index 761b3dd04f4..7c9edba2d30 100644 --- a/web/src/app/admin/layout.tsx +++ b/web/src/app/admin/layout.tsx @@ -18,6 +18,7 @@ import { ProductboardIcon, LinearIcon, UsersIcon, + ThumbsUpIcon, } from "@/components/icons/icons"; import { DISABLE_AUTH } from "@/lib/constants"; import { getCurrentUserSS } from "@/lib/userSS"; @@ -219,6 +220,20 @@ export default async function AdminLayout({ }, ], }, + { + name: "Document Management", + items: [ + { + name: ( +
+ +
Feedback
+
+ ), + link: "/admin/documents/feedback", + }, + ], + }, ]} />
diff --git a/web/src/components/icons/icons.tsx b/web/src/components/icons/icons.tsx index 60ff4eeb9ff..c068d0d2a11 100644 --- a/web/src/components/icons/icons.tsx +++ b/web/src/components/icons/icons.tsx @@ -14,6 +14,7 @@ import { X, Question, Users, + ThumbsUp, } from "@phosphor-icons/react"; import { SiBookstack } from "react-icons/si"; import { FaFile, FaGlobe } from "react-icons/fa"; @@ -136,6 +137,13 @@ export const XIcon = ({ return ; }; +export const ThumbsUpIcon = ({ + size = 16, + className = defaultTailwindCSS, +}: IconProps) => { + return ; +}; + // // COMPANY LOGOS // diff --git a/web/src/lib/hooks.ts b/web/src/lib/hooks.ts index 0dd0237d6ca..41c41f35a85 100644 --- a/web/src/lib/hooks.ts +++ b/web/src/lib/hooks.ts @@ -1,5 +1,5 @@ -import { Credential } from "@/lib/types"; -import useSWR, { useSWRConfig } from "swr"; +import { Credential, DocumentBoostStatus } from "@/lib/types"; +import useSWR, { mutate, useSWRConfig } from "swr"; import { fetcher } from "./fetcher"; const CREDENTIAL_URL = "/api/manage/admin/credential"; @@ -13,3 +13,21 @@ export const usePublicCredentials = () => { refreshCredentials: () => mutate(CREDENTIAL_URL), }; }; + +const MOST_REACTED_DOCS_URL = "/api/manage/doc-boosts"; + +const buildReactedDocsUrl = (ascending: boolean, limit: number) => { + return `/api/manage/admin/doc-boosts?ascending=${ascending}&limit=${limit}`; +}; + +export const useMostReactedToDocuments = () => { + const swrResponse = useSWR( + buildReactedDocsUrl(true, 100), + fetcher + ); + + return { + ...swrResponse, + refreshDocs: () => mutate(MOST_REACTED_DOCS_URL), + }; +}; diff --git a/web/src/lib/types.ts b/web/src/lib/types.ts index 86dd7c79224..b357e8ff60b 100644 --- a/web/src/lib/types.ts +++ b/web/src/lib/types.ts @@ -29,6 +29,14 @@ export type ValidStatuses = | "in_progress" | "not_started"; +export interface DocumentBoostStatus { + document_id: string; + semantic_id: string; + link: string; + boost: number; + hidden: boolean; +} + // CONNECTORS export interface ConnectorBase { name: string; From 64d2fe7e62aca43d4549708f7881fa61e0505821 Mon Sep 17 00:00:00 2001 From: Yuhong Sun Date: Sun, 27 Aug 2023 19:16:02 -0700 Subject: [PATCH 09/14] test again --- backend/danswer/background/update.py | 12 ---------- backend/danswer/configs/constants.py | 1 + .../danswer/datastores/indexing_pipeline.py | 18 ++++++++++++-- backend/danswer/datastores/interfaces.py | 2 ++ backend/danswer/db/document.py | 24 +++++++++++++++---- backend/danswer/db/models.py | 3 ++- 6 files changed, 40 insertions(+), 20 deletions(-) diff --git a/backend/danswer/background/update.py b/backend/danswer/background/update.py index 7b3b6d6f9f5..ba310232a2a 100755 --- a/backend/danswer/background/update.py +++ b/backend/danswer/background/update.py @@ -248,18 +248,6 @@ def _index( f"Indexing batch of documents: {[doc.to_short_descriptor() for doc in doc_batch]}" ) - # Save in Postgres before indexing - for doc in doc_batch: - first_link = next( - (section.link for section in doc.sections if section.link), "" - ) - create_document_metadata( - doc_id=doc.id, - semantic_id=doc.semantic_identifier, - link=first_link, - db_session=db_session, - ) - index_user_id = ( None if db_credential.public_doc else db_credential.user_id ) diff --git a/backend/danswer/configs/constants.py b/backend/danswer/configs/constants.py index 0be93f7a686..7ce83b49d49 100644 --- a/backend/danswer/configs/constants.py +++ b/backend/danswer/configs/constants.py @@ -18,6 +18,7 @@ PUBLIC_DOC_PAT = "PUBLIC" QUOTE = "quote" BOOST = "boost" +DEFAULT_BOOST = 0 class DocumentSource(str, Enum): diff --git a/backend/danswer/datastores/indexing_pipeline.py b/backend/danswer/datastores/indexing_pipeline.py index 5bcd9d3f15d..7fe7a55d723 100644 --- a/backend/danswer/datastores/indexing_pipeline.py +++ b/backend/danswer/datastores/indexing_pipeline.py @@ -32,6 +32,7 @@ def __call__( def _upsert_insertion_records( insertion_records: set[DocumentInsertionRecord], index_attempt_metadata: IndexAttemptMetadata, + doc_m_data_lookup: dict[str, tuple[str, str]], ) -> None: with Session(get_sqlalchemy_engine()) as session: upsert_documents_complete( @@ -40,9 +41,11 @@ def _upsert_insertion_records( DocumentMetadata( connector_id=index_attempt_metadata.connector_id, credential_id=index_attempt_metadata.credential_id, - document_id=insertion_record.document_id, + document_id=i_r.document_id, + semantic_identifier=doc_m_data_lookup[i_r.document_id][0], + first_link=doc_m_data_lookup[i_r.document_id][1], ) - for insertion_record in insertion_records + for i_r in insertion_records ], ) @@ -62,6 +65,11 @@ def _get_net_new_documents( return net_new_documents +def _extract_minimal_document_metadata(doc: Document) -> tuple[str, str]: + first_link = next((section.link for section in doc.sections if section.link), "") + return doc.semantic_identifier, first_link + + def _indexing_pipeline( *, chunker: Chunker, @@ -73,6 +81,11 @@ def _indexing_pipeline( """Takes different pieces of the indexing pipeline and applies it to a batch of documents Note that the documents should already be batched at this point so that it does not inflate the memory requirements""" + + document_metadata_lookup = { + doc.id: _extract_minimal_document_metadata(doc) for doc in documents + } + chunks: list[DocAwareChunk] = list( chain(*[chunker.chunk(document=document) for document in documents]) ) @@ -92,6 +105,7 @@ def _indexing_pipeline( _upsert_insertion_records( insertion_records=insertion_records, index_attempt_metadata=index_attempt_metadata, + doc_m_data_lookup=document_metadata_lookup, ) except Exception as e: logger.error( diff --git a/backend/danswer/datastores/interfaces.py b/backend/danswer/datastores/interfaces.py index f8a5f935c78..8e36f5355ea 100644 --- a/backend/danswer/datastores/interfaces.py +++ b/backend/danswer/datastores/interfaces.py @@ -22,6 +22,8 @@ class DocumentMetadata: connector_id: int credential_id: int document_id: str + semantic_identifier: str + first_link: str @dataclass diff --git a/backend/danswer/db/document.py b/backend/danswer/db/document.py index bf4d237d515..eab0db81459 100644 --- a/backend/danswer/db/document.py +++ b/backend/danswer/db/document.py @@ -7,6 +7,7 @@ from sqlalchemy.dialects.postgresql import insert from sqlalchemy.orm import Session +from danswer.configs.constants import DEFAULT_BOOST from danswer.datastores.interfaces import DocumentMetadata from danswer.db.models import Document as DbDocument from danswer.db.models import DocumentByConnectorCredentialPair @@ -81,13 +82,25 @@ def upsert_documents( db_session: Session, document_metadata_batch: list[DocumentMetadata] ) -> None: """NOTE: this function is Postgres specific. Not all DBs support the ON CONFLICT clause.""" - seen_document_ids: set[str] = set() + seen_documents: dict[str, DocumentMetadata] = {} for document_metadata in document_metadata_batch: - if document_metadata.document_id not in seen_document_ids: - seen_document_ids.add(document_metadata.document_id) + doc_id = document_metadata.document_id + if doc_id not in seen_documents: + seen_documents[doc_id] = document_metadata insert_stmt = insert(DbDocument).values( - [model_to_dict(DbDocument(id=doc_id)) for doc_id in seen_document_ids] + [ + model_to_dict( + DbDocument( + id=doc.document_id, + boost=DEFAULT_BOOST, + hidden=False, + semantic_id=doc.semantic_identifier, + link=doc.first_link, + ) + ) + for doc in seen_documents.values() + ] ) # for now, there are no columns to update. If more metadata is added, then this # needs to change to an `on_conflict_do_update` @@ -120,7 +133,8 @@ def upsert_document_by_connector_credential_pair( def upsert_documents_complete( - db_session: Session, document_metadata_batch: list[DocumentMetadata] + db_session: Session, + document_metadata_batch: list[DocumentMetadata], ) -> None: upsert_documents(db_session, document_metadata_batch) upsert_document_by_connector_credential_pair(db_session, document_metadata_batch) diff --git a/backend/danswer/db/models.py b/backend/danswer/db/models.py index 2efdba540c6..7d845b5d786 100644 --- a/backend/danswer/db/models.py +++ b/backend/danswer/db/models.py @@ -23,6 +23,7 @@ from sqlalchemy.orm import relationship from danswer.auth.schemas import UserRole +from danswer.configs.constants import DEFAULT_BOOST from danswer.configs.constants import DocumentSource from danswer.configs.constants import QAFeedbackType from danswer.configs.constants import SearchFeedbackType @@ -343,7 +344,7 @@ class Document(Base): # in Danswer) id: Mapped[str] = mapped_column(String, primary_key=True) # 0 for neutral, positive for mostly endorse, negative for mostly reject - boost: Mapped[int] = mapped_column(Integer, default=0) + boost: Mapped[int] = mapped_column(Integer, default=DEFAULT_BOOST) hidden: Mapped[bool] = mapped_column(Boolean, default=False) semantic_id: Mapped[str] = mapped_column(String) # First Section's link From ff281da770bafd0a9d0181a84f24870407daa959 Mon Sep 17 00:00:00 2001 From: Yuhong Sun Date: Sun, 27 Aug 2023 20:10:47 -0700 Subject: [PATCH 10/14] fix updates vespa --- backend/danswer/datastores/vespa/store.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/backend/danswer/datastores/vespa/store.py b/backend/danswer/datastores/vespa/store.py index 464578233af..8ddc9d839eb 100644 --- a/backend/danswer/datastores/vespa/store.py +++ b/backend/danswer/datastores/vespa/store.py @@ -342,16 +342,20 @@ def update(self, update_requests: list[UpdateRequest]) -> None: logger.error("Update request received but nothing to update") continue - update_dict: dict[str, dict[str, list[str] | int | float]] = {"fields": {}} + update_dict: dict[str, dict] = {"fields": {}} if update_request.boost: - update_dict["fields"][BOOST] = update_request.boost + update_dict["fields"][BOOST] = {"assign": update_request.boost} if update_request.allowed_users: - update_dict["fields"][ALLOWED_USERS] = update_request.allowed_users + update_dict["fields"][ALLOWED_USERS] = { + "assign": update_request.allowed_users + } for document_id in update_request.document_ids: for doc_chunk_id in _get_vespa_chunk_ids_by_document_id(document_id): url = f"{DOCUMENT_ID_ENDPOINT}/{doc_chunk_id}" - res = requests.put(url, headers=json_header, json=update_dict) + res = requests.put( + url, headers=json_header, data=json.dumps(update_dict) + ) try: res.raise_for_status() From c9d593a9c89124b90fca7f70c6c54a55f71ebd1b Mon Sep 17 00:00:00 2001 From: Yuhong Sun Date: Sun, 27 Aug 2023 20:20:48 -0700 Subject: [PATCH 11/14] fixed issues --- backend/danswer/db/feedback.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/backend/danswer/db/feedback.py b/backend/danswer/db/feedback.py index 92c1534cba5..adc00546ecf 100644 --- a/backend/danswer/db/feedback.py +++ b/backend/danswer/db/feedback.py @@ -123,7 +123,7 @@ def create_doc_retrieval_feedback( doc_m = fetch_doc_m_by_id(document_id, db_session) - DocumentRetrievalFeedback( + retrieval_feedback = DocumentRetrievalFeedback( qa_event_id=qa_event_id, document_id=document_id, document_rank=document_rank, @@ -152,4 +152,5 @@ def create_doc_retrieval_feedback( # Updates are generally batched for efficiency, this case only 1 doc/value is updated document_index.update([update]) + db_session.add(retrieval_feedback) db_session.commit() From 7b7d9a0f13f9d2f18fdc12b56912a0734f873363 Mon Sep 17 00:00:00 2001 From: Yuhong Sun Date: Sun, 27 Aug 2023 20:27:14 -0700 Subject: [PATCH 12/14] fix search type logging --- backend/danswer/server/search_backend.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/backend/danswer/server/search_backend.py b/backend/danswer/server/search_backend.py index 37940991b4e..49709598028 100644 --- a/backend/danswer/server/search_backend.py +++ b/backend/danswer/server/search_backend.py @@ -20,8 +20,8 @@ from danswer.direct_qa.answer_question import answer_qa_query from danswer.direct_qa.exceptions import OpenAIKeyMissing from danswer.direct_qa.exceptions import UnknownModelError -from danswer.direct_qa.llm_utils import get_default_qa_model from danswer.direct_qa.interfaces import DanswerAnswerPiece +from danswer.direct_qa.llm_utils import get_default_qa_model from danswer.search.danswer_helper import query_intent from danswer.search.danswer_helper import recommend_search_flow from danswer.search.keyword_search import retrieve_keyword_documents @@ -68,7 +68,7 @@ def semantic_search( query_event_id = create_query_event( query=query, - selected_flow=SearchType.KEYWORD, + selected_flow=SearchType.SEMANTIC, llm_answer=None, user_id=user.id, db_session=db_session, @@ -249,7 +249,9 @@ def stream_qa_portions( query_event_id = create_query_event( query=query, - selected_flow=SearchType.KEYWORD if use_keyword else SearchType.SEMANTIC, + selected_flow=SearchType.KEYWORD + if question.use_keyword + else SearchType.SEMANTIC, llm_answer=answer_so_far, user_id=user_id, db_session=db_session, From 6f0068b5472922814a370e6d8ab73c1433e128d6 Mon Sep 17 00:00:00 2001 From: Weves Date: Mon, 28 Aug 2023 13:00:13 -0700 Subject: [PATCH 13/14] Remove FE changes --- web/src/app/admin/documents/feedback/page.tsx | 71 ------------------- web/src/app/admin/layout.tsx | 15 ---- web/src/components/icons/icons.tsx | 8 --- web/src/lib/hooks.ts | 22 +----- web/src/lib/types.ts | 8 --- 5 files changed, 2 insertions(+), 122 deletions(-) delete mode 100644 web/src/app/admin/documents/feedback/page.tsx diff --git a/web/src/app/admin/documents/feedback/page.tsx b/web/src/app/admin/documents/feedback/page.tsx deleted file mode 100644 index e17503c245f..00000000000 --- a/web/src/app/admin/documents/feedback/page.tsx +++ /dev/null @@ -1,71 +0,0 @@ -"use client"; - -import { Button } from "@/components/Button"; -import { LoadingAnimation } from "@/components/Loading"; -import { BasicTable } from "@/components/admin/connectors/BasicTable"; -import { usePopup } from "@/components/admin/connectors/Popup"; -import { ThumbsUpIcon, UsersIcon } from "@/components/icons/icons"; -import { fetcher } from "@/lib/fetcher"; -import { useMostReactedToDocuments } from "@/lib/hooks"; -import { User } from "@/lib/types"; -import useSWR, { mutate } from "swr"; - -const columns = [ - { - header: "Document Name", - key: "name", - }, - { - header: "Boost", - key: "boost", - }, - { - header: "Promote", - key: "promote", - }, -]; - -const DocumentFeedbackTable = () => { - const { popup, setPopup } = usePopup(); - - const { data, isLoading, error, refreshDocs } = useMostReactedToDocuments(); - - if (isLoading) { - return ; - } - - if (error || !data) { - return
Error loading users
; - } - - return ( -
- {popup} - { - return { - name: documentBoostStatus.semantic_id, - boost: documentBoostStatus.boost, - promote: "hi" - }; - })} - /> -
- ); -}; - -const Page = () => { - return ( -
-
- -

Document Feedback

-
- - -
- ); -}; - -export default Page; diff --git a/web/src/app/admin/layout.tsx b/web/src/app/admin/layout.tsx index 7c9edba2d30..761b3dd04f4 100644 --- a/web/src/app/admin/layout.tsx +++ b/web/src/app/admin/layout.tsx @@ -18,7 +18,6 @@ import { ProductboardIcon, LinearIcon, UsersIcon, - ThumbsUpIcon, } from "@/components/icons/icons"; import { DISABLE_AUTH } from "@/lib/constants"; import { getCurrentUserSS } from "@/lib/userSS"; @@ -220,20 +219,6 @@ export default async function AdminLayout({ }, ], }, - { - name: "Document Management", - items: [ - { - name: ( -
- -
Feedback
-
- ), - link: "/admin/documents/feedback", - }, - ], - }, ]} />
diff --git a/web/src/components/icons/icons.tsx b/web/src/components/icons/icons.tsx index c068d0d2a11..60ff4eeb9ff 100644 --- a/web/src/components/icons/icons.tsx +++ b/web/src/components/icons/icons.tsx @@ -14,7 +14,6 @@ import { X, Question, Users, - ThumbsUp, } from "@phosphor-icons/react"; import { SiBookstack } from "react-icons/si"; import { FaFile, FaGlobe } from "react-icons/fa"; @@ -137,13 +136,6 @@ export const XIcon = ({ return ; }; -export const ThumbsUpIcon = ({ - size = 16, - className = defaultTailwindCSS, -}: IconProps) => { - return ; -}; - // // COMPANY LOGOS // diff --git a/web/src/lib/hooks.ts b/web/src/lib/hooks.ts index 41c41f35a85..0dd0237d6ca 100644 --- a/web/src/lib/hooks.ts +++ b/web/src/lib/hooks.ts @@ -1,5 +1,5 @@ -import { Credential, DocumentBoostStatus } from "@/lib/types"; -import useSWR, { mutate, useSWRConfig } from "swr"; +import { Credential } from "@/lib/types"; +import useSWR, { useSWRConfig } from "swr"; import { fetcher } from "./fetcher"; const CREDENTIAL_URL = "/api/manage/admin/credential"; @@ -13,21 +13,3 @@ export const usePublicCredentials = () => { refreshCredentials: () => mutate(CREDENTIAL_URL), }; }; - -const MOST_REACTED_DOCS_URL = "/api/manage/doc-boosts"; - -const buildReactedDocsUrl = (ascending: boolean, limit: number) => { - return `/api/manage/admin/doc-boosts?ascending=${ascending}&limit=${limit}`; -}; - -export const useMostReactedToDocuments = () => { - const swrResponse = useSWR( - buildReactedDocsUrl(true, 100), - fetcher - ); - - return { - ...swrResponse, - refreshDocs: () => mutate(MOST_REACTED_DOCS_URL), - }; -}; diff --git a/web/src/lib/types.ts b/web/src/lib/types.ts index b357e8ff60b..86dd7c79224 100644 --- a/web/src/lib/types.ts +++ b/web/src/lib/types.ts @@ -29,14 +29,6 @@ export type ValidStatuses = | "in_progress" | "not_started"; -export interface DocumentBoostStatus { - document_id: string; - semantic_id: string; - link: string; - boost: number; - hidden: boolean; -} - // CONNECTORS export interface ConnectorBase { name: string; From d37e8ce60b5dade9c1ea6b0b2b5e5c4c9b7b17a2 Mon Sep 17 00:00:00 2001 From: Weves Date: Mon, 28 Aug 2023 13:21:31 -0700 Subject: [PATCH 14/14] Fix startup exception + mypy --- backend/danswer/background/connector_deletion.py | 2 +- backend/danswer/llm/azure.py | 6 ++++++ backend/danswer/llm/openai.py | 6 ++++++ 3 files changed, 13 insertions(+), 1 deletion(-) diff --git a/backend/danswer/background/connector_deletion.py b/backend/danswer/background/connector_deletion.py index f3b27c2c191..e5d0c6d2eea 100644 --- a/backend/danswer/background/connector_deletion.py +++ b/backend/danswer/background/connector_deletion.py @@ -90,7 +90,7 @@ def _update_multi_indexed_docs() -> None: def _get_user( credential: Credential, ) -> str: - if credential.public_doc: + if credential.public_doc or not credential.user: return PUBLIC_DOC_PAT return str(credential.user.id) diff --git a/backend/danswer/llm/azure.py b/backend/danswer/llm/azure.py index 49a91afacf6..cce164466eb 100644 --- a/backend/danswer/llm/azure.py +++ b/backend/danswer/llm/azure.py @@ -1,3 +1,4 @@ +import os from typing import Any from langchain.chat_models.azure_openai import AzureChatOpenAI @@ -22,6 +23,11 @@ def __init__( *args: list[Any], **kwargs: dict[str, Any] ): + # set a dummy API key if not specified so that LangChain doesn't throw an + # exception when trying to initialize the LLM which would prevent the API + # server from starting up + if not api_key: + api_key = os.environ.get("OPENAI_API_KEY") or "dummy_api_key" self._llm = AzureChatOpenAI( model=model_version, openai_api_type="azure", diff --git a/backend/danswer/llm/openai.py b/backend/danswer/llm/openai.py index 4aa9274a0bc..891e5258650 100644 --- a/backend/danswer/llm/openai.py +++ b/backend/danswer/llm/openai.py @@ -1,3 +1,4 @@ +import os from typing import Any from langchain.chat_models.openai import ChatOpenAI @@ -16,6 +17,11 @@ def __init__( *args: list[Any], **kwargs: dict[str, Any] ): + # set a dummy API key if not specified so that LangChain doesn't throw an + # exception when trying to initialize the LLM which would prevent the API + # server from starting up + if not api_key: + api_key = os.environ.get("OPENAI_API_KEY") or "dummy_api_key" self._llm = ChatOpenAI( model=model_version, openai_api_key=api_key,