groundlight · CoreyEWood · Dec 27, 2024 · Jan 3, 2025 · Jan 3, 2025 · Jan 6, 2025
diff --git a/CONFIGURING-DETECTORS.md b/CONFIGURING-DETECTORS.md
@@ -26,6 +26,10 @@ The global config contains parameters that affect the overall behavior of the ed
 
 `refresh_rate` is a float that defines how often the edge endpoint will attempt to fetch updated ML models (in seconds). If you expect a detector to frequently have a better model available, you can reduce this to ensure that the improved models will quickly be fetched and deployed. For example, you may want to label many image queries on a new detector. A higher refresh rate will ensure that the latest model improvements from these labels are promptly deployed to the edge. In practice, you likely won't want this to be lower than ~30 seconds due to the time it takes to train and fetch new models. If not specified, the default is 60 seconds.
 
+#### `confident_audit_rate`
+
+`confident_audit_rate` is a float that defines the probability that any given confident prediction will be escalated to the cloud for auditing. This enables the accuracy of the edge model to be evaluated in the cloud even when it answers queries confidently. If a detector is configured to have cloud escalation disabled, this parameter will be ignored. If not specified, the default value is 0.001 (meaning there is a 0.1% chance that a confident prediction will be audited).
+
 ### `edge_inference_configs`
 
 Edge inference configs are 'templates' that define the behavior of a detector on the edge. Each detector you configure will be assigned one of these templates. There are some predefined configs that represent the main ways you might want to configure a detector. However, you can edit these and also create your own as you wish.

diff --git a/app/api/routes/image_queries.py b/app/api/routes/image_queries.py
@@ -1,4 +1,5 @@
 import logging
+import random
 from typing import Literal, Optional
 
 from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException, Query, Request, status
@@ -145,7 +146,7 @@ async def post_image_query(  # noqa: PLR0913, PLR0915, PLR0912
         ml_confidence = results["confidence"]
 
         is_confident_enough = ml_confidence >= confidence_threshold
-        if return_edge_prediction or is_confident_enough:  # return the edge prediction
+        if return_edge_prediction or is_confident_enough:  # Return the edge prediction
             if return_edge_prediction:
                 logger.debug(f"Returning edge prediction without cloud escalation. {detector_id=}")
             else:
@@ -164,9 +165,34 @@ async def post_image_query(  # noqa: PLR0913, PLR0915, PLR0912
                 text=results["text"],
             )
 
-            # Escalate after returning edge prediction if escalation is enabled and we have low confidence
-            if not disable_cloud_escalation and not is_confident_enough:
-                # Only escalate if we haven't escalated on this detector too recently
+            # Skip cloud operations if escalation is disabled
+            if disable_cloud_escalation:
+                return image_query
+
+            if is_confident_enough:  # Audit confident edge predictions at the specified rate
+                if random.random() < app_state.edge_config.global_config.confident_audit_rate:
+                    logger.debug(
+                        f"Auditing confident edge prediction with confidence {ml_confidence} for detector {detector_id=}."
+                    )
+                    background_tasks.add_task(
+                        safe_call_sdk,
+                        gl.submit_image_query,
+                        detector=detector_id,
+                        image=image_bytes,
+                        wait=0,
+                        patience_time=patience_time,
+                        confidence_threshold=confidence_threshold,
+                        human_review="ALWAYS",  # Require human review for audited queries so we can evaluate accuracy
+                        want_async=True,
+                        metadata={"is_edge_audit": True},  # Provide metadata to identify edge audits in the cloud
+                    )
+
+                    # Don't want to escalate to cloud again if we're already auditing the query
+                    return image_query
+
+            # Escalate after returning edge prediction if escalation is enabled and we have low confidence.
+            if not is_confident_enough:
+                # Only escalate if we haven't escalated on this detector too recently.
                 if app_state.edge_inference_manager.escalation_cooldown_complete(detector_id=detector_id):
                     logger.debug(
                         f"Escalating to cloud due to low confidence: {ml_confidence} < thresh={confidence_threshold}"
@@ -189,7 +215,6 @@ async def post_image_query(  # noqa: PLR0913, PLR0915, PLR0912
                     )
 
             return image_query
-
     # -- Edge-inference is not available --
     else:
         # Create an edge-inference deployment record, which may be used to spin up an edge-inference server.

diff --git a/app/core/configs.py b/app/core/configs.py
@@ -11,6 +11,10 @@ class GlobalConfig(BaseModel):
         default=60.0,
         description="The interval (in seconds) at which the inference server checks for a new model binary update.",
     )
+    confident_audit_rate: float = Field(
+        default=0.001,
+        description="The probability that any given confident prediction will be sent to the cloud for auditing.",
+    )
 
 
 class EdgeInferenceConfig(BaseModel):
@@ -84,6 +88,7 @@ def validate_inference_configs(self):
             {
                 'global_config': {
                     'refresh_rate': 60.0,
+                    'confident_audit_rate': 0.001,
                 },
                 'edge_inference_configs': {
                     'default': EdgeInferenceConfig(

diff --git a/configs/edge-config.yaml b/configs/edge-config.yaml
@@ -1,7 +1,8 @@
 # For configuring detectors on the edge endpoint. See CONFIGURING-DETECTORS.md for more information.
 
 global_config: # These settings affect the overall behavior of the edge endpoint.
-  refresh_rate: 60 # How often to attempt to fetch updated ML models (in seconds). If not set, defaults to 60.
+  refresh_rate: 60 # How often to attempt to fetch updated ML models (in seconds). Defaults to 60.
+  confident_audit_rate: 0.001 # Probability that a confident prediction will be sent to cloud for auditing. Defaults to 0.001.
 
 edge_inference_configs: # These configs define detector-specific behavior and can be applied to detectors below.
   default: # Return the edge model's prediction if sufficiently confident; otherwise, escalate to the cloud.