IDinsight · lickem22 · Aug 15, 2024 · Aug 16, 2024 · Aug 19, 2024 · Aug 19, 2024
@@ -70,6 +70,7 @@
 ALIGN_SCORE_METHOD = os.environ.get("ALIGN_SCORE_METHOD", "LLM")
 # if AlignScore, set ALIGN_SCORE_API. If LLM, set LITELLM_MODEL_ALIGNSCORE above.
 ALIGN_SCORE_API = os.environ.get("ALIGN_SCORE_API", "")
+ALIGN_SCORE_N_RETRIES = os.environ.get("ALIGN_SCORE_N_RETRIES", 1)
 
 # Backend paths
 BACKEND_ROOT_PATH = os.environ.get("BACKEND_ROOT_PATH", "")

@@ -177,6 +177,11 @@ def get_prompt(cls) -> str:
 You are a helpful question-answering AI. You understand user question and answer their \
 question using the REFERENCE TEXT below.
 """
+RETRY_PROMPT_SUFFIX = """
+If the response above is not aligned with the question, please rectify this by \
+considering the following reason(s) for misalignment: "{failure_reason}". 
+Make necessary adjustments to ensure the answer is aligned with the question.
+"""
 RAG_RESPONSE_PROMPT = (
     _RAG_PROFILE_PROMPT
     + """
@@ -224,6 +229,7 @@ class RAG(BaseModel):
     answer: str
 
     prompt: ClassVar[str] = RAG_RESPONSE_PROMPT
+    retry_prompt: ClassVar[str] = RAG_RESPONSE_PROMPT + RETRY_PROMPT_SUFFIX
 
 
 class AlignmentScore(BaseModel):

@@ -37,7 +37,14 @@ async def get_llm_rag_answer(
     """
 
     metadata = metadata or {}
-    prompt = RAG.prompt.format(context=context, original_language=original_language)
+    if "failure_reason" in metadata and metadata["failure_reason"]:
+        prompt = RAG.retry_prompt.format(
+            context=context,
+            original_language=original_language,
+            failure_reason=metadata["failure_reason"],
+        )
+    else:
+        prompt = RAG.prompt.format(context=context, original_language=original_language)
 
     result = await _ask_llm_async(
         user_message=question,

@@ -56,7 +56,11 @@ async def generate_llm_query_response(
     Only runs if the generate_llm_response flag is set to True.
     Requires "search_results" and "original_language" in the response.
     """
-    if isinstance(response, QueryResponseError):
+    if (
+        isinstance(response, QueryResponseError)
+        and metadata
+        and not metadata["failure_reason"]
+    ):
         return response
 
     if response.search_results is None:

@@ -6,13 +6,14 @@
 from io import BytesIO
 from typing import Tuple
 
+import backoff
 from fastapi import APIRouter, Depends, File, UploadFile, status
 from fastapi.responses import JSONResponse
 from sqlalchemy.exc import IntegrityError
 from sqlalchemy.ext.asyncio import AsyncSession
 
 from ..auth.dependencies import authenticate_key, rate_limiter
-from ..config import CUSTOM_SPEECH_ENDPOINT, GCS_SPEECH_BUCKET
+from ..config import ALIGN_SCORE_N_RETRIES, CUSTOM_SPEECH_ENDPOINT, GCS_SPEECH_BUCKET
 from ..contents.models import (
     get_similar_content_async,
     increment_query_count,
@@ -50,6 +51,7 @@
 )
 from .schemas import (
     ContentFeedback,
+    ErrorType,
     QueryAudioResponse,
     QueryBase,
     QueryRefined,
@@ -123,6 +125,12 @@ async def search(
             query_refined=user_query_refined_template,
             response=response,
         )
+        if is_unable_to_generate_response(response):
+            failure_reason = response.debug_info["factual_consistency"]
+            response = await retry_search(
+                query_refined=user_query_refined_template, response=response
+            )
+            response.debug_info["past_failure"] = failure_reason
 
     await save_query_response_to_db(user_query_db, response, asession)
     await increment_query_count(
@@ -228,7 +236,6 @@ async def voice_search(
         asession=asession,
         exclude_archived=True,
     )
-
     if user_query.generate_llm_response:
         response = await get_generation_response(
             query_refined=user_query_refined_template,
@@ -322,6 +329,36 @@ async def get_search_response(
     return response
 
 
+def is_unable_to_generate_response(response: QueryResponse) -> bool:
+    """
+    Check if the response is of type QueryResponseError and caused
+    by low alignment score.
+    """
+    return (
+        isinstance(response, QueryResponseError)
+        and response.error_type == ErrorType.ALIGNMENT_TOO_LOW
+    )
+
+
+@backoff.on_predicate(
+    backoff.expo,
+    max_tries=int(ALIGN_SCORE_N_RETRIES),
+    predicate=is_unable_to_generate_response,
+)
+async def retry_search(
+    query_refined: QueryRefined,
+    response: QueryResponse | QueryResponseError,
+) -> QueryResponse | QueryResponseError:
+    """
+    Retry wrapper for get_generation_response.
+    """
+
+    metadata = query_refined.query_metadata
+    metadata["failure_reason"] = response.debug_info["factual_consistency"]["reason"]
+    query_refined.query_metadata = metadata
+    return await get_generation_response(query_refined, response)
+
+
 @generate_tts__after
 @check_align_score__after
 async def get_generation_response(
@@ -341,10 +378,13 @@ async def get_generation_response(
         query_id=response.query_id, user_id=query_refined.user_id
     )
 
+    metadata["failure_reason"] = query_refined.query_metadata.get(
+        "failure_reason", None
+    )
+
     response = await generate_llm_query_response(
         query_refined=query_refined, response=response, metadata=metadata
     )
-
     return response
 
 

@@ -18,6 +18,7 @@ pandas-stubs==2.2.2.240603
 types-openpyxl==3.1.4.20240621
 redis==5.0.8
 python-dateutil==2.8.2
+backoff==2.2.1
 google-cloud-storage==2.18.2
 google-cloud-texttospeech==2.16.5
 google-cloud-speech==2.27.0