Skip to content

Commit

Permalink
sample more special cases (#340)
Browse files Browse the repository at this point in the history
  • Loading branch information
kat-statsig authored Oct 10, 2024
1 parent 77d8c9f commit 692c50a
Show file tree
Hide file tree
Showing 5 changed files with 128 additions and 65 deletions.
16 changes: 12 additions & 4 deletions statsig/spec_store.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import json
import threading
from concurrent.futures import wait, ThreadPoolExecutor
from enum import Enum
from typing import List, Optional, Dict, Set, Tuple

from . import globals
Expand All @@ -15,6 +16,12 @@
from .utils import djb2_hash


class EntityType(Enum):
GATE = "feature_gates"
CONFIG = "dynamic_configs"
LAYER = "layer_configs"


class _SpecStore:
_background_download_configs: Optional[threading.Thread]
_background_download_id_lists: Optional[threading.Thread]
Expand Down Expand Up @@ -202,9 +209,9 @@ def parse_target_value_map_from_spec(spec, parsed):
rule["conditions"][i]["fast_target_value"][str(val)] = True

self.unsupported_configs.clear()
new_gates = get_parsed_specs("feature_gates")
new_configs = get_parsed_specs("dynamic_configs")
new_layers = get_parsed_specs("layer_configs")
new_gates = get_parsed_specs(EntityType.GATE.value)
new_configs = get_parsed_specs(EntityType.CONFIG.value)
new_layers = get_parsed_specs(EntityType.LAYER.value)

new_experiment_to_layer = {}
layers_dict = specs_json.get("layers", {})
Expand Down Expand Up @@ -353,7 +360,8 @@ def _get_initialize_strategy(self) -> List[DataSource]:
strategies.insert(0, DataSource.DATASTORE)
if self._options.bootstrap_values:
if data_store is not None:
globals.logger.debug("data_store gets priority over bootstrap_values. bootstrap_values will be ignored")
globals.logger.debug(
"data_store gets priority over bootstrap_values. bootstrap_values will be ignored")
else:
strategies.insert(0, DataSource.BOOTSTRAP)
if self._options.fallback_to_statsig_api:
Expand Down
14 changes: 12 additions & 2 deletions statsig/statsig_logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ def log_gate_exposure(
if is_manual_exposure:
event.metadata["isManualExposure"] = "true"
if sampling_rate is not None:
event.statsigMetadata = {"samplingRate": sampling_rate}
event.statsigMetadata["samplingRate"] = sampling_rate
if shadow_logged is not None:
event.statsigMetadata["shadowLogged"] = shadow_logged
if sampling_mode is not None:
Expand Down Expand Up @@ -138,7 +138,7 @@ def log_config_exposure(
if is_manual_exposure:
event.metadata["isManualExposure"] = "true"
if sampling_rate is not None:
event.statsigMetadata = {"samplingRate": sampling_rate}
event.statsigMetadata["samplingRate"] = sampling_rate
if shadow_logged is not None:
event.statsigMetadata["shadowLogged"] = shadow_logged
if sampling_mode is not None:
Expand All @@ -158,6 +158,9 @@ def log_layer_exposure(
parameter_name: str,
config_evaluation: _ConfigEvaluation,
is_manual_exposure=False,
sampling_rate=None,
shadow_logged=None,
sampling_mode=None,
):
event = StatsigEvent(user, _LAYER_EXPOSURE_EVENT)

Expand All @@ -178,8 +181,15 @@ def log_layer_exposure(
if not self._is_unique_exposure(user, _LAYER_EXPOSURE_EVENT, metadata):
return
event.metadata = metadata
event.statsigMetadata = {}
if is_manual_exposure:
event.metadata["isManualExposure"] = "true"
if sampling_rate is not None:
event.statsigMetadata["samplingRate"] = sampling_rate
if shadow_logged is not None:
event.statsigMetadata["shadowLogged"] = shadow_logged
if sampling_mode is not None:
event.statsigMetadata["samplingMode"] = sampling_mode

event._secondary_exposures = [] if exposures is None else exposures

Expand Down
54 changes: 33 additions & 21 deletions statsig/statsig_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from .feature_gate import FeatureGate
from .layer import Layer
from .sdk_configs import _SDK_Configs
from .spec_store import _SpecStore
from .spec_store import _SpecStore, EntityType
from .statsig_error_boundary import _StatsigErrorBoundary
from .statsig_errors import StatsigNameError, StatsigRuntimeError, StatsigValueError
from .statsig_event import StatsigEvent
Expand All @@ -21,7 +21,7 @@
from .statsig_user import StatsigUser
from .ttl_set import TTLSet
from .utils import HashingAlgorithm, compute_dedupe_key_for_gate, is_hash_in_sampling_rate, \
compute_dedupe_key_for_config
compute_dedupe_key_for_config, compute_dedupe_key_for_layer

RULESETS_SYNC_INTERVAL = 10
IDLISTS_SYNC_INTERVAL = 60
Expand Down Expand Up @@ -278,9 +278,14 @@ def task():
result = self._evaluator.get_layer(normal_user, layer_name)

def log_func(layer: Layer, parameter_name: str):
if log_exposure:
should_log, logged_sampling_rate, shadow_logged = self.__determine_sampling(
EntityType.LAYER, layer_name, result, user, parameter_name)

if log_exposure and should_log:
self._logger.log_layer_exposure(
normal_user, layer, parameter_name, result
normal_user, layer, parameter_name, result, sampling_rate=logged_sampling_rate,
shadow_logged=shadow_logged,
sampling_mode=_SDK_Configs.get_config_str_value("sampling_mode")
)

layer = Layer._create(
Expand Down Expand Up @@ -490,7 +495,8 @@ def _verify_bg_threads_running(self):
def __check_gate(self, user: StatsigUser, gate_name: str, log_exposure=True):
user = self.__normalize_user(user)
result = self._evaluator.check_gate(user, gate_name)
should_log, logged_sampling_rate, shadow_logged = self.__determine_sampling("GATE", gate_name, result, user)
should_log, logged_sampling_rate, shadow_logged = self.__determine_sampling(EntityType.GATE, gate_name, result,
user)

if log_exposure and should_log:
self._logger.log_gate_exposure(
Expand All @@ -511,7 +517,7 @@ def __get_config(self, user: StatsigUser, config_name: str, log_exposure=True):

result = self._evaluator.get_config(user, config_name)
result.user = user
should_log, logged_sampling_rate, shadow_logged = self.__determine_sampling("CONFIG", config_name,
should_log, logged_sampling_rate, shadow_logged = self.__determine_sampling(EntityType.CONFIG, config_name,
result, user)

if log_exposure and should_log:
Expand All @@ -527,47 +533,53 @@ def __get_config(self, user: StatsigUser, config_name: str, log_exposure=True):
)
return result

def __determine_sampling(self, type: str, name: str, result: _ConfigEvaluation,
user: StatsigUser) -> Tuple[
def __determine_sampling(self, type: EntityType, name: str, result: _ConfigEvaluation, user: StatsigUser,
param_name="") -> Tuple[
bool, Optional[int], Optional[str]]: # should_log, logged_sampling_rate, shadow_logged
try:
shadow_should_log, logged_sampling_rate = True, None
env = self._options.get_sdk_environment_tier()
sampling_mode = _SDK_Configs.get_config_str_value("sampling_mode")
default_rule_id_sampling_rate = _SDK_Configs.get_config_int_value("default_rule_id_sampling_rate")
special_case_sampling_rate = _SDK_Configs.get_config_int_value("special_case_sampling_rate")

if sampling_mode is None or sampling_mode == "none" or env != "production":
return True, None, None
return True, None, "logged"

if result.rule_id == "default" and result.forward_all_exposures:
return True, None, None
if result.forward_all_exposures:
return True, None, "logged"

samplingSetKey = f"{name}_{result.rule_id}"
if not self._sampling_key_set.contains(samplingSetKey):
self._sampling_key_set.add(samplingSetKey)
return True, None, None
return True, None, "logged"

if result.sample_rate is not None:
exposure_key = ""
if type == "GATE":
if type == EntityType.GATE:
exposure_key = compute_dedupe_key_for_gate(name, result.rule_id, result.boolean_value,
user.user_id, user.custom_ids)
elif type == "CONFIG":
elif type == EntityType.CONFIG:
exposure_key = compute_dedupe_key_for_config(name, result.rule_id, user.user_id, user.custom_ids)
elif type == EntityType.LAYER:
exposure_key = compute_dedupe_key_for_layer(name, result.allocated_experiment, param_name,
result.rule_id,
user.user_id, user.custom_ids)
shadow_should_log = is_hash_in_sampling_rate(exposure_key, result.sample_rate)
logged_sampling_rate = result.sample_rate

if default_rule_id_sampling_rate is not None and result.rule_id == "default":
shadow_should_log = is_hash_in_sampling_rate(name, default_rule_id_sampling_rate)
logged_sampling_rate = default_rule_id_sampling_rate
special_case_rules = ["disabled", "default", ""]

if result.rule_id in special_case_rules and special_case_sampling_rate is not None:
shadow_should_log = is_hash_in_sampling_rate(name, special_case_sampling_rate)
logged_sampling_rate = special_case_sampling_rate

shadow_logged = None if result.sample_rate is None else "logged" if shadow_should_log else "dropped"
if sampling_mode == "on":
return shadow_should_log, logged_sampling_rate, None
return shadow_should_log, logged_sampling_rate, shadow_logged
if sampling_mode == "shadow":
shadow_logged = None if result.sample_rate is None else "logged" if shadow_should_log else "dropped"
return True, logged_sampling_rate, shadow_logged

return True, None, None
return True, None, "logged"
except Exception as e:
self._errorBoundary.log_exception("__determine_sampling", e, log_mode="debug")
return True, None, None
Expand Down
50 changes: 32 additions & 18 deletions testdata/download_config_specs_sampling.json
Original file line number Diff line number Diff line change
Expand Up @@ -48,27 +48,24 @@
},
"rules": [
{
"name": "1kNmlB23wylPFZi1M0Divl",
"groupName": "statsig email",
"name": "33qGYzVZr1MchRe4Ncj6MO",
"passPercentage": 100,
"conditions": [
{
"type": "user_field",
"targetValue": [
"@statsig.com"
],
"operator": "str_contains_any",
"field": "email",
"additionalValues": {}
"type": "public",
"targetValue": null,
"operator": null,
"field": null,
"additionalValues": {},
"isDeviceBased": false,
"idType": "userID"
}
],
"returnValue": {
"number": 7,
"string": "statsig",
"boolean": false
},
"id": "1kNmlB23wylPFZi1M0Divl",
"salt": "f2ac6975-174d-497e-be7f-599fea626132",
"returnValue": {},
"id": "33qGYzVZr1MchRe4Ncj6MO",
"salt": "55a3430e-b239-4941-8208-951f5a9f8496",
"isDeviceBased": false,
"idType": "userID",
"samplingRate": 101
}
]
Expand Down Expand Up @@ -1307,14 +1304,31 @@
]
}
],
"layer_configs": [],
"layers": {
"not_allocated_layer": []
},
"layer_configs": [
{
"name": "not_allocated_layer",
"type": "dynamic_config",
"salt": "b39af118-3f2c-4645-a4e4-7f7c96225ecc",
"enabled": true,
"defaultValue": {
"param": "ello"
},
"rules": [],
"isDeviceBased": false,
"idType": "userID",
"entity": "layer"
}
],
"has_updates": true,
"time": 1631638014811,
"id_lists": {
"list_1": true,
"list_2": true
},
"sdk_configs": {
"default_sampling_rate": 101
"special_case_sampling_rate": 101
}
}
Loading

0 comments on commit 692c50a

Please sign in to comment.