diff --git a/service_capacity_modeling/capacity_planner.py b/service_capacity_modeling/capacity_planner.py index 930ba9c..f7e3083 100644 --- a/service_capacity_modeling/capacity_planner.py +++ b/service_capacity_modeling/capacity_planner.py @@ -26,7 +26,6 @@ from service_capacity_modeling.interface import Instance from service_capacity_modeling.interface import Interval from service_capacity_modeling.interface import interval -from service_capacity_modeling.interface import interval_percentile from service_capacity_modeling.interface import Lifecycle from service_capacity_modeling.interface import PlanExplanation from service_capacity_modeling.interface import Platform @@ -39,6 +38,7 @@ from service_capacity_modeling.models.org import netflix from service_capacity_modeling.models.utils import reduce_by_family from service_capacity_modeling.stats import dist_for_interval +from service_capacity_modeling.stats import interval_percentile logger = logging.getLogger(__name__) @@ -124,37 +124,40 @@ def model_desires_percentiles( for field in sorted(query_pattern.__fields__): d = getattr(query_pattern, field) if isinstance(d, Interval): - query_pattern_means[field] = certain_float(d.mid) - if d.confidence <= 0.99: - samples = dist_for_interval(d).rvs(1028) - query_pattern_simulation[field] = interval_percentile( - samples, percentiles - ) - continue - query_pattern_simulation[field] = [d] * len(percentiles) - query_pattern_means[field] = d + query_pattern_simulation[field] = interval_percentile(d, percentiles) + if d.can_simulate: + query_pattern_means[field] = certain_float(d.mid) + else: + query_pattern_means[field] = d + else: + query_pattern_simulation[field] = [d] * len(percentiles) + query_pattern_means[field] = d data_shape_simulation = {} data_shape_means = {} for field in sorted(data_shape.__fields__): d = getattr(data_shape, field) if isinstance(d, Interval): - data_shape_means[field] = certain_float(d.mid) - if d.confidence <= 0.99: - samples = dist_for_interval(d).rvs(1028) - data_shape_simulation[field] = interval_percentile(samples, percentiles) - continue - data_shape_simulation[field] = [d] * len(percentiles) - data_shape_means[field] = d + data_shape_simulation[field] = interval_percentile(d, percentiles) + if d.can_simulate: + data_shape_means[field] = certain_float(d.mid) + else: + data_shape_means[field] = d + else: + data_shape_simulation[field] = [d] * len(percentiles) + data_shape_means[field] = d results = [] for i in range(len(percentiles)): - query_pattern = QueryPattern( - **{ - f: query_pattern_simulation[f][i] - for f in sorted(query_pattern.__fields__) - } - ) + try: + query_pattern = QueryPattern( + **{ + f: query_pattern_simulation[f][i] + for f in sorted(query_pattern.__fields__) + } + ) + except Exception as exp: + raise exp data_shape = DataShape( **{f: data_shape_simulation[f][i] for f in sorted(data_shape.__fields__)} ) diff --git a/service_capacity_modeling/hardware/profiles/shapes/aws.json b/service_capacity_modeling/hardware/profiles/shapes/aws.json index b44f23f..79b6299 100644 --- a/service_capacity_modeling/hardware/profiles/shapes/aws.json +++ b/service_capacity_modeling/hardware/profiles/shapes/aws.json @@ -254,7 +254,7 @@ "ram_gib": 15.48, "net_mbps": 781, "drive": { - "name": "ephem", "size_gib": 436.5, + "name": "ephem", "size_gib": 436, "read_io_latency_ms": { "minimum_value":0.05, "low":0.10, "mid":0.125, "high":0.17, @@ -271,7 +271,7 @@ "ram_gib": 30.955, "net_mbps": 1875, "drive": { - "name": "ephem", "size_gib": 873.0, + "name": "ephem", "size_gib": 873, "read_io_latency_ms": { "minimum_value": 0.05, "low": 0.10, "mid": 0.125, "high": 0.17, diff --git a/service_capacity_modeling/interface.py b/service_capacity_modeling/interface.py index de6568b..ae60960 100644 --- a/service_capacity_modeling/interface.py +++ b/service_capacity_modeling/interface.py @@ -155,13 +155,6 @@ def interval(samples: Sequence[float], low_p: int = 5, high_p: int = 95) -> Inte ) -def interval_percentile( - samples: Sequence[float], percentiles: Sequence[int] -) -> Sequence[Interval]: - p = np.percentile(samples, percentiles) - return [certain_float(i) for i in p] - - ############################################################################### # Models (structs) for how we describe hardware # ############################################################################### @@ -555,11 +548,11 @@ class DataShape(ExcludeUnsetModel): # How much fixed memory must be provisioned per instance for the # application (e.g. for process heap memory) - reserved_instance_app_mem_gib: int = 2 + reserved_instance_app_mem_gib: float = 2 # How much fixed memory must be provisioned per instance for the # system (e.g. for kernel and other system processes) - reserved_instance_system_mem_gib: int = 1 + reserved_instance_system_mem_gib: float = 1 # How durable does this dataset need to be. We want to provision # sufficient replication and backups of data to achieve the target diff --git a/service_capacity_modeling/stats.py b/service_capacity_modeling/stats.py index ab45e8b..226e823 100644 --- a/service_capacity_modeling/stats.py +++ b/service_capacity_modeling/stats.py @@ -1,4 +1,5 @@ from functools import lru_cache +from typing import Sequence from typing import Tuple import numpy as np @@ -10,6 +11,7 @@ from scipy.stats import gamma as gamma_dist from scipy.stats import rv_continuous +from service_capacity_modeling.interface import certain_float from service_capacity_modeling.interface import Interval from service_capacity_modeling.interface import IntervalModel @@ -66,7 +68,7 @@ def _gamma_dist_from_interval( result = root(f, 2) shape = result.x[0] - dist = gamma_dist(shape, loc=minimum, scale=(mean / shape)) + dist = gamma_dist(shape, loc=minimum, scale=mean / shape) dist.random_state = np.random.default_rng(seed=seed) return (shape, dist) @@ -159,3 +161,14 @@ def dist_for_interval(interval: Interval, seed: int = 0xCAFE) -> rv_continuous: else: result = beta_for_interval(interval=interval, seed=seed) return result + + +def interval_percentile( + interval: Interval, percentiles: Sequence[int] +) -> Sequence[Interval]: + if interval.can_simulate: + samples = dist_for_interval(interval).rvs(1028) + p = np.percentile(samples, percentiles) + return [certain_float(i) for i in p] + else: + return [interval] * len(percentiles)