From 5d0904e08e42a245ecfcac90ecfe68cc9ad00b6c Mon Sep 17 00:00:00 2001 From: CoreyEWood Date: Fri, 27 Dec 2024 14:23:24 -0800 Subject: [PATCH 1/5] initial auditing --- app/api/routes/image_queries.py | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/app/api/routes/image_queries.py b/app/api/routes/image_queries.py index bcb007f1..234819d7 100644 --- a/app/api/routes/image_queries.py +++ b/app/api/routes/image_queries.py @@ -1,4 +1,5 @@ import logging +import random from typing import Literal, Optional from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException, Query, Request, status @@ -16,6 +17,8 @@ ) from app.core.utils import create_iq, safe_call_sdk +CONFIDENT_AUDIT_RATE = 0.1 # TODO move to edge-config.yaml + logger = logging.getLogger(__name__) router = APIRouter() @@ -164,8 +167,27 @@ async def post_image_query( # noqa: PLR0913, PLR0915, PLR0912 text=results["text"], ) - # Escalate after returning edge prediction if escalation is enabled and we have low confidence - if not disable_cloud_escalation and not is_confident_enough: + if is_confident_enough: # Audit confident edge predictions at a low rate + if random.random() < CONFIDENT_AUDIT_RATE: + logger.info(f"Auditing confident edge prediction: {image_query=}") # TODO change to debug + background_tasks.add_task( + safe_call_sdk, + gl.submit_image_query, # This has to be submit_image_query in order to specify image_query_id + detector=detector_id, + image=image_bytes, + wait=0, + patience_time=patience_time, + confidence_threshold=confidence_threshold, + human_review="ALWAYS", # TODO do we want humans to always review it? + want_async=True, + image_query_id=image_query.id, # TODO do we want the IDs to match? + metadata=None, # TODO maybe we want metadata that it was an audited query? + ) + else: + logger.info("Not auditing confident edge prediction") + elif ( + not disable_cloud_escalation + ): # Escalate after returning edge prediction if escalation is enabled and we have low confidence # Only escalate if we haven't escalated on this detector too recently if app_state.edge_inference_manager.escalation_cooldown_complete(detector_id=detector_id): logger.debug( From a16fcb36e1f2989b67f6c8d85441d504e7fe8bc8 Mon Sep 17 00:00:00 2001 From: CoreyEWood Date: Fri, 3 Jan 2025 14:06:45 -0800 Subject: [PATCH 2/5] some reorganizing --- app/api/routes/image_queries.py | 35 ++++++++++++++++++--------------- app/core/configs.py | 5 +++++ configs/edge-config.yaml | 1 + 3 files changed, 25 insertions(+), 16 deletions(-) diff --git a/app/api/routes/image_queries.py b/app/api/routes/image_queries.py index 234819d7..50661084 100644 --- a/app/api/routes/image_queries.py +++ b/app/api/routes/image_queries.py @@ -17,8 +17,6 @@ ) from app.core.utils import create_iq, safe_call_sdk -CONFIDENT_AUDIT_RATE = 0.1 # TODO move to edge-config.yaml - logger = logging.getLogger(__name__) router = APIRouter() @@ -148,7 +146,7 @@ async def post_image_query( # noqa: PLR0913, PLR0915, PLR0912 ml_confidence = results["confidence"] is_confident_enough = ml_confidence >= confidence_threshold - if return_edge_prediction or is_confident_enough: # return the edge prediction + if return_edge_prediction or is_confident_enough: # Return the edge prediction if return_edge_prediction: logger.debug(f"Returning edge prediction without cloud escalation. {detector_id=}") else: @@ -167,28 +165,34 @@ async def post_image_query( # noqa: PLR0913, PLR0915, PLR0912 text=results["text"], ) - if is_confident_enough: # Audit confident edge predictions at a low rate - if random.random() < CONFIDENT_AUDIT_RATE: - logger.info(f"Auditing confident edge prediction: {image_query=}") # TODO change to debug + # Skip cloud operations if escalation is disabled + if disable_cloud_escalation: + return image_query + + if is_confident_enough: # Audit confident edge predictions at the specified rate + if random.random() < app_state.edge_config.global_config.confident_audit_rate: + logger.debug( + f"Auditing confident edge prediction with confidence {ml_confidence} for detector {detector_id=}." + ) background_tasks.add_task( safe_call_sdk, - gl.submit_image_query, # This has to be submit_image_query in order to specify image_query_id + gl.submit_image_query, detector=detector_id, image=image_bytes, wait=0, patience_time=patience_time, confidence_threshold=confidence_threshold, - human_review="ALWAYS", # TODO do we want humans to always review it? + human_review=human_review, # TODO do we want humans to always review it? want_async=True, - image_query_id=image_query.id, # TODO do we want the IDs to match? metadata=None, # TODO maybe we want metadata that it was an audited query? ) - else: - logger.info("Not auditing confident edge prediction") - elif ( - not disable_cloud_escalation - ): # Escalate after returning edge prediction if escalation is enabled and we have low confidence - # Only escalate if we haven't escalated on this detector too recently + + # Don't want to escalate to cloud if we've already audited + return image_query + + # Escalate after returning edge prediction if escalation is enabled and we have low confidence. + if not is_confident_enough: + # Only escalate if we haven't escalated on this detector too recently. if app_state.edge_inference_manager.escalation_cooldown_complete(detector_id=detector_id): logger.debug( f"Escalating to cloud due to low confidence: {ml_confidence} < thresh={confidence_threshold}" @@ -211,7 +215,6 @@ async def post_image_query( # noqa: PLR0913, PLR0915, PLR0912 ) return image_query - # -- Edge-inference is not available -- else: # Create an edge-inference deployment record, which may be used to spin up an edge-inference server. diff --git a/app/core/configs.py b/app/core/configs.py index cbef038b..02875c7c 100644 --- a/app/core/configs.py +++ b/app/core/configs.py @@ -11,6 +11,10 @@ class GlobalConfig(BaseModel): default=60.0, description="The interval (in seconds) at which the inference server checks for a new model binary update.", ) + confident_audit_rate: float = Field( + default=0.01, + description="The rate at which confident predictions are audited.", + ) class EdgeInferenceConfig(BaseModel): @@ -84,6 +88,7 @@ def validate_inference_configs(self): { 'global_config': { 'refresh_rate': 60.0, + 'confident_audit_rate': 0.01, }, 'edge_inference_configs': { 'default': EdgeInferenceConfig( diff --git a/configs/edge-config.yaml b/configs/edge-config.yaml index b9d7bb36..4be73ba6 100644 --- a/configs/edge-config.yaml +++ b/configs/edge-config.yaml @@ -2,6 +2,7 @@ global_config: # These settings affect the overall behavior of the edge endpoint. refresh_rate: 60 # How often to attempt to fetch updated ML models (in seconds). If not set, defaults to 60. + confident_audit_rate: 0.01 # Rate at which to audit confident predictions. If not set, defaults to 0.01. edge_inference_configs: # These configs define detector-specific behavior and can be applied to detectors below. default: # Return the edge model's prediction if sufficiently confident; otherwise, escalate to the cloud. From 879303bd77f3ec7085863a4808125e398c340e95 Mon Sep 17 00:00:00 2001 From: CoreyEWood Date: Fri, 3 Jan 2025 14:38:50 -0800 Subject: [PATCH 3/5] add info about audit rate param --- CONFIGURING-DETECTORS.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CONFIGURING-DETECTORS.md b/CONFIGURING-DETECTORS.md index 830dafae..536da639 100644 --- a/CONFIGURING-DETECTORS.md +++ b/CONFIGURING-DETECTORS.md @@ -26,6 +26,10 @@ The global config contains parameters that affect the overall behavior of the ed `refresh_rate` is a float that defines how often the edge endpoint will attempt to fetch updated ML models (in seconds). If you expect a detector to frequently have a better model available, you can reduce this to ensure that the improved models will quickly be fetched and deployed. For example, you may want to label many image queries on a new detector. A higher refresh rate will ensure that the latest model improvements from these labels are promptly deployed to the edge. In practice, you likely won't want this to be lower than ~30 seconds due to the time it takes to train and fetch new models. If not specified, the default is 60 seconds. +#### `confident_audit_rate` + +`confident_audit_rate` is a float that defines the rate at which confident predictions are escalated to the cloud for auditing. This enables the accuracy of the edge model to be evaluated in the cloud even when it answers queries confidently. If a detector is configured to have cloud escalation disabled, this parameter will be ignored. If not specified, the default value is 0.01 (meaning there is a 1% chance that a confident prediction will be audited). + ### `edge_inference_configs` Edge inference configs are 'templates' that define the behavior of a detector on the edge. Each detector you configure will be assigned one of these templates. There are some predefined configs that represent the main ways you might want to configure a detector. However, you can edit these and also create your own as you wish. From 28c69868f11ada1949b82baedd82697dd2e8e5e1 Mon Sep 17 00:00:00 2001 From: CoreyEWood Date: Mon, 6 Jan 2025 13:15:21 -0800 Subject: [PATCH 4/5] audit metadata and some wording --- CONFIGURING-DETECTORS.md | 2 +- app/api/routes/image_queries.py | 6 +++--- app/core/configs.py | 2 +- configs/edge-config.yaml | 4 ++-- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/CONFIGURING-DETECTORS.md b/CONFIGURING-DETECTORS.md index 536da639..3c167754 100644 --- a/CONFIGURING-DETECTORS.md +++ b/CONFIGURING-DETECTORS.md @@ -28,7 +28,7 @@ The global config contains parameters that affect the overall behavior of the ed #### `confident_audit_rate` -`confident_audit_rate` is a float that defines the rate at which confident predictions are escalated to the cloud for auditing. This enables the accuracy of the edge model to be evaluated in the cloud even when it answers queries confidently. If a detector is configured to have cloud escalation disabled, this parameter will be ignored. If not specified, the default value is 0.01 (meaning there is a 1% chance that a confident prediction will be audited). +`confident_audit_rate` is a float that defines the probability that any given confident prediction will be escalated to the cloud for auditing. This enables the accuracy of the edge model to be evaluated in the cloud even when it answers queries confidently. If a detector is configured to have cloud escalation disabled, this parameter will be ignored. If not specified, the default value is 0.01 (meaning there is a 1% chance that a confident prediction will be audited). ### `edge_inference_configs` diff --git a/app/api/routes/image_queries.py b/app/api/routes/image_queries.py index 50661084..2be87f76 100644 --- a/app/api/routes/image_queries.py +++ b/app/api/routes/image_queries.py @@ -182,12 +182,12 @@ async def post_image_query( # noqa: PLR0913, PLR0915, PLR0912 wait=0, patience_time=patience_time, confidence_threshold=confidence_threshold, - human_review=human_review, # TODO do we want humans to always review it? + human_review="ALWAYS", # Require human review for audited queries so we can evaluate accuracy want_async=True, - metadata=None, # TODO maybe we want metadata that it was an audited query? + metadata={"is_edge_audit": True}, # Provide metadata to identify edge audits in the cloud ) - # Don't want to escalate to cloud if we've already audited + # Don't want to escalate to cloud again if we're already auditing the query return image_query # Escalate after returning edge prediction if escalation is enabled and we have low confidence. diff --git a/app/core/configs.py b/app/core/configs.py index 02875c7c..83eed151 100644 --- a/app/core/configs.py +++ b/app/core/configs.py @@ -13,7 +13,7 @@ class GlobalConfig(BaseModel): ) confident_audit_rate: float = Field( default=0.01, - description="The rate at which confident predictions are audited.", + description="The probability that any given confident prediction will be sent to the cloud for auditing.", ) diff --git a/configs/edge-config.yaml b/configs/edge-config.yaml index 4be73ba6..229065e4 100644 --- a/configs/edge-config.yaml +++ b/configs/edge-config.yaml @@ -1,8 +1,8 @@ # For configuring detectors on the edge endpoint. See CONFIGURING-DETECTORS.md for more information. global_config: # These settings affect the overall behavior of the edge endpoint. - refresh_rate: 60 # How often to attempt to fetch updated ML models (in seconds). If not set, defaults to 60. - confident_audit_rate: 0.01 # Rate at which to audit confident predictions. If not set, defaults to 0.01. + refresh_rate: 60 # How often to attempt to fetch updated ML models (in seconds). Defaults to 60. + confident_audit_rate: 0.01 # Probability that a confident prediction will be sent to cloud for auditing. Defaults to 0.01. edge_inference_configs: # These configs define detector-specific behavior and can be applied to detectors below. default: # Return the edge model's prediction if sufficiently confident; otherwise, escalate to the cloud. From 0f4b3fa1134179b66d9b59379b9b17c7e00f6c67 Mon Sep 17 00:00:00 2001 From: CoreyEWood Date: Mon, 6 Jan 2025 13:25:33 -0800 Subject: [PATCH 5/5] change default rate to 0.001 --- CONFIGURING-DETECTORS.md | 2 +- app/core/configs.py | 4 ++-- configs/edge-config.yaml | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/CONFIGURING-DETECTORS.md b/CONFIGURING-DETECTORS.md index 3c167754..e7b6104c 100644 --- a/CONFIGURING-DETECTORS.md +++ b/CONFIGURING-DETECTORS.md @@ -28,7 +28,7 @@ The global config contains parameters that affect the overall behavior of the ed #### `confident_audit_rate` -`confident_audit_rate` is a float that defines the probability that any given confident prediction will be escalated to the cloud for auditing. This enables the accuracy of the edge model to be evaluated in the cloud even when it answers queries confidently. If a detector is configured to have cloud escalation disabled, this parameter will be ignored. If not specified, the default value is 0.01 (meaning there is a 1% chance that a confident prediction will be audited). +`confident_audit_rate` is a float that defines the probability that any given confident prediction will be escalated to the cloud for auditing. This enables the accuracy of the edge model to be evaluated in the cloud even when it answers queries confidently. If a detector is configured to have cloud escalation disabled, this parameter will be ignored. If not specified, the default value is 0.001 (meaning there is a 0.1% chance that a confident prediction will be audited). ### `edge_inference_configs` diff --git a/app/core/configs.py b/app/core/configs.py index 83eed151..2e402319 100644 --- a/app/core/configs.py +++ b/app/core/configs.py @@ -12,7 +12,7 @@ class GlobalConfig(BaseModel): description="The interval (in seconds) at which the inference server checks for a new model binary update.", ) confident_audit_rate: float = Field( - default=0.01, + default=0.001, description="The probability that any given confident prediction will be sent to the cloud for auditing.", ) @@ -88,7 +88,7 @@ def validate_inference_configs(self): { 'global_config': { 'refresh_rate': 60.0, - 'confident_audit_rate': 0.01, + 'confident_audit_rate': 0.001, }, 'edge_inference_configs': { 'default': EdgeInferenceConfig( diff --git a/configs/edge-config.yaml b/configs/edge-config.yaml index 229065e4..4c0b01ed 100644 --- a/configs/edge-config.yaml +++ b/configs/edge-config.yaml @@ -2,7 +2,7 @@ global_config: # These settings affect the overall behavior of the edge endpoint. refresh_rate: 60 # How often to attempt to fetch updated ML models (in seconds). Defaults to 60. - confident_audit_rate: 0.01 # Probability that a confident prediction will be sent to cloud for auditing. Defaults to 0.01. + confident_audit_rate: 0.001 # Probability that a confident prediction will be sent to cloud for auditing. Defaults to 0.001. edge_inference_configs: # These configs define detector-specific behavior and can be applied to detectors below. default: # Return the edge model's prediction if sufficiently confident; otherwise, escalate to the cloud.