From bada2d6d68f3e61eb4ab9a67ed0998e05c55a0b5 Mon Sep 17 00:00:00 2001 From: "Richard Kuo (Danswer)" Date: Wed, 9 Oct 2024 13:11:07 -0700 Subject: [PATCH 1/3] check last_pruned instead of is_pruning --- backend/danswer/server/documents/cc_pair.py | 12 ++++----- .../common_utils/managers/cc_pair.py | 26 ++++++++++++++----- .../connector_job_tests/slack/test_prune.py | 3 ++- .../integration/tests/pruning/test_pruning.py | 3 ++- 4 files changed, 29 insertions(+), 15 deletions(-) diff --git a/backend/danswer/server/documents/cc_pair.py b/backend/danswer/server/documents/cc_pair.py index ce3131d050f..fadd97d02c0 100644 --- a/backend/danswer/server/documents/cc_pair.py +++ b/backend/danswer/server/documents/cc_pair.py @@ -1,4 +1,5 @@ import math +from datetime import datetime from http import HTTPStatus from fastapi import APIRouter @@ -201,12 +202,12 @@ def update_cc_pair_name( raise HTTPException(status_code=400, detail="Name must be unique") -@router.get("/admin/cc-pair/{cc_pair_id}/prune") -def get_cc_pair_latest_prune( +@router.get("/admin/cc-pair/{cc_pair_id}/last_pruned") +def get_cc_pair_last_pruned( cc_pair_id: int, user: User = Depends(current_curator_or_admin_user), db_session: Session = Depends(get_session), -) -> bool: +) -> datetime | None: cc_pair = get_connector_credential_pair_from_id( cc_pair_id=cc_pair_id, db_session=db_session, @@ -216,11 +217,10 @@ def get_cc_pair_latest_prune( if not cc_pair: raise HTTPException( status_code=400, - detail="Connection not found for current user's permissions", + detail="cc_pair not found for current user's permissions", ) - rcp = RedisConnectorPruning(cc_pair.id) - return rcp.is_pruning(db_session, get_redis_client()) + return cc_pair.last_pruned @router.post("/admin/cc-pair/{cc_pair_id}/prune") diff --git a/backend/tests/integration/common_utils/managers/cc_pair.py b/backend/tests/integration/common_utils/managers/cc_pair.py index 48ae805cbb9..46780acd52d 100644 --- a/backend/tests/integration/common_utils/managers/cc_pair.py +++ b/backend/tests/integration/common_utils/managers/cc_pair.py @@ -274,31 +274,43 @@ def prune( result.raise_for_status() @staticmethod - def is_pruning( + def last_pruned( cc_pair: DATestCCPair, user_performing_action: DATestUser | None = None, - ) -> bool: + ) -> datetime | None: response = requests.get( - url=f"{API_SERVER_URL}/manage/admin/cc-pair/{cc_pair.id}/prune", + url=f"{API_SERVER_URL}/manage/admin/cc-pair/{cc_pair.id}/last_pruned", headers=user_performing_action.headers if user_performing_action else GENERAL_HEADERS, ) response.raise_for_status() - response_bool = response.json() - return response_bool + response_str = response.json() + + # If the response itself is a datetime string, parse it + if not isinstance(response_str, str): + return None + + try: + return datetime.fromisoformat(response_str) + except ValueError: + # Handle invalid datetime format + pass + + return None @staticmethod def wait_for_prune( cc_pair: DATestCCPair, + after: datetime, timeout: float = MAX_DELAY, user_performing_action: DATestUser | None = None, ) -> None: """after: The task register time must be after this time.""" start = time.monotonic() while True: - result = CCPairManager.is_pruning(cc_pair, user_performing_action) - if not result: + last_pruned = CCPairManager.last_pruned(cc_pair, user_performing_action) + if last_pruned and last_pruned > after: break elapsed = time.monotonic() - start diff --git a/backend/tests/integration/connector_job_tests/slack/test_prune.py b/backend/tests/integration/connector_job_tests/slack/test_prune.py index 96638417c4d..12cf7da7a97 100644 --- a/backend/tests/integration/connector_job_tests/slack/test_prune.py +++ b/backend/tests/integration/connector_job_tests/slack/test_prune.py @@ -195,8 +195,9 @@ def test_slack_prune( ) # Prune the cc_pair + now = datetime.now(timezone.utc) CCPairManager.prune(cc_pair, user_performing_action=admin_user) - CCPairManager.wait_for_prune(cc_pair, user_performing_action=admin_user) + CCPairManager.wait_for_prune(cc_pair, now, user_performing_action=admin_user) # ----------------------------VERIFY THE CHANGES--------------------------- # Ensure admin user can't see deleted messages diff --git a/backend/tests/integration/tests/pruning/test_pruning.py b/backend/tests/integration/tests/pruning/test_pruning.py index 4c4e82cdfb0..f74b1764a4b 100644 --- a/backend/tests/integration/tests/pruning/test_pruning.py +++ b/backend/tests/integration/tests/pruning/test_pruning.py @@ -105,9 +105,10 @@ def test_web_pruning(reset: None, vespa_client: vespa_fixture) -> None: logger.info("Removing courses.html.") os.remove(os.path.join(website_tgt, "courses.html")) + now = datetime.now(timezone.utc) CCPairManager.prune(cc_pair_1, user_performing_action=admin_user) CCPairManager.wait_for_prune( - cc_pair_1, timeout=60, user_performing_action=admin_user + cc_pair_1, now, timeout=60, user_performing_action=admin_user ) selected_cc_pair = CCPairManager.get_one( From d358dd1718522c90f5646072946713181a9bc55d Mon Sep 17 00:00:00 2001 From: "Richard Kuo (Danswer)" Date: Wed, 9 Oct 2024 13:45:46 -0700 Subject: [PATCH 2/3] try using the ThreadingHTTPServer class for stability and avoiding blocking single-threaded behavior --- backend/tests/integration/tests/pruning/test_pruning.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/backend/tests/integration/tests/pruning/test_pruning.py b/backend/tests/integration/tests/pruning/test_pruning.py index f74b1764a4b..5f985199bf1 100644 --- a/backend/tests/integration/tests/pruning/test_pruning.py +++ b/backend/tests/integration/tests/pruning/test_pruning.py @@ -24,7 +24,7 @@ @contextmanager def http_server_context( directory: str, port: int = 8000 -) -> Generator[http.server.HTTPServer, None, None]: +) -> Generator[http.server.ThreadingHTTPServer, None, None]: # Create a handler that serves files from the specified directory def handler_class( *args: Any, **kwargs: Any @@ -34,7 +34,7 @@ def handler_class( ) # Create an HTTPServer instance - httpd = http.server.HTTPServer(("0.0.0.0", port), handler_class) + httpd = http.server.ThreadingHTTPServer(("0.0.0.0", port), handler_class) # Define a thread that runs the server in the background server_thread = threading.Thread(target=httpd.serve_forever) From 9fae8f5774f673406bd703257066636bad7f5903 Mon Sep 17 00:00:00 2001 From: "Richard Kuo (Danswer)" Date: Thu, 10 Oct 2024 12:43:29 -0700 Subject: [PATCH 3/3] add startup delay to web server in test --- backend/tests/integration/tests/pruning/test_pruning.py | 1 + 1 file changed, 1 insertion(+) diff --git a/backend/tests/integration/tests/pruning/test_pruning.py b/backend/tests/integration/tests/pruning/test_pruning.py index 5f985199bf1..d7d6b8ae692 100644 --- a/backend/tests/integration/tests/pruning/test_pruning.py +++ b/backend/tests/integration/tests/pruning/test_pruning.py @@ -45,6 +45,7 @@ def handler_class( try: # Start the server in the background server_thread.start() + sleep(5) # give it a few seconds to start yield httpd finally: # Shutdown the server and wait for the thread to finish