Skip to content

Commit

Permalink
Make SCM compatible with v1 and v2 pydantic
Browse files Browse the repository at this point in the history
  • Loading branch information
jolynch committed Jul 17, 2023
1 parent ec81407 commit cc6e266
Show file tree
Hide file tree
Showing 4 changed files with 44 additions and 35 deletions.
49 changes: 26 additions & 23 deletions service_capacity_modeling/capacity_planner.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
from service_capacity_modeling.interface import Instance
from service_capacity_modeling.interface import Interval
from service_capacity_modeling.interface import interval
from service_capacity_modeling.interface import interval_percentile
from service_capacity_modeling.interface import Lifecycle
from service_capacity_modeling.interface import PlanExplanation
from service_capacity_modeling.interface import Platform
Expand All @@ -39,6 +38,7 @@
from service_capacity_modeling.models.org import netflix
from service_capacity_modeling.models.utils import reduce_by_family
from service_capacity_modeling.stats import dist_for_interval
from service_capacity_modeling.stats import interval_percentile

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -124,37 +124,40 @@ def model_desires_percentiles(
for field in sorted(query_pattern.__fields__):
d = getattr(query_pattern, field)
if isinstance(d, Interval):
query_pattern_means[field] = certain_float(d.mid)
if d.confidence <= 0.99:
samples = dist_for_interval(d).rvs(1028)
query_pattern_simulation[field] = interval_percentile(
samples, percentiles
)
continue
query_pattern_simulation[field] = [d] * len(percentiles)
query_pattern_means[field] = d
query_pattern_simulation[field] = interval_percentile(d, percentiles)
if d.can_simulate:
query_pattern_means[field] = certain_float(d.mid)
else:
query_pattern_means[field] = d
else:
query_pattern_simulation[field] = [d] * len(percentiles)
query_pattern_means[field] = d

data_shape_simulation = {}
data_shape_means = {}
for field in sorted(data_shape.__fields__):
d = getattr(data_shape, field)
if isinstance(d, Interval):
data_shape_means[field] = certain_float(d.mid)
if d.confidence <= 0.99:
samples = dist_for_interval(d).rvs(1028)
data_shape_simulation[field] = interval_percentile(samples, percentiles)
continue
data_shape_simulation[field] = [d] * len(percentiles)
data_shape_means[field] = d
data_shape_simulation[field] = interval_percentile(d, percentiles)
if d.can_simulate:
data_shape_means[field] = certain_float(d.mid)
else:
data_shape_means[field] = d
else:
data_shape_simulation[field] = [d] * len(percentiles)
data_shape_means[field] = d

results = []
for i in range(len(percentiles)):
query_pattern = QueryPattern(
**{
f: query_pattern_simulation[f][i]
for f in sorted(query_pattern.__fields__)
}
)
try:
query_pattern = QueryPattern(
**{
f: query_pattern_simulation[f][i]
for f in sorted(query_pattern.__fields__)
}
)
except Exception as exp:
raise exp
data_shape = DataShape(
**{f: data_shape_simulation[f][i] for f in sorted(data_shape.__fields__)}
)
Expand Down
4 changes: 2 additions & 2 deletions service_capacity_modeling/hardware/profiles/shapes/aws.json
Original file line number Diff line number Diff line change
Expand Up @@ -254,7 +254,7 @@
"ram_gib": 15.48,
"net_mbps": 781,
"drive": {
"name": "ephem", "size_gib": 436.5,
"name": "ephem", "size_gib": 436,
"read_io_latency_ms": {
"minimum_value":0.05,
"low":0.10, "mid":0.125, "high":0.17,
Expand All @@ -271,7 +271,7 @@
"ram_gib": 30.955,
"net_mbps": 1875,
"drive": {
"name": "ephem", "size_gib": 873.0,
"name": "ephem", "size_gib": 873,
"read_io_latency_ms": {
"minimum_value": 0.05,
"low": 0.10, "mid": 0.125, "high": 0.17,
Expand Down
11 changes: 2 additions & 9 deletions service_capacity_modeling/interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,13 +155,6 @@ def interval(samples: Sequence[float], low_p: int = 5, high_p: int = 95) -> Inte
)


def interval_percentile(
samples: Sequence[float], percentiles: Sequence[int]
) -> Sequence[Interval]:
p = np.percentile(samples, percentiles)
return [certain_float(i) for i in p]


###############################################################################
# Models (structs) for how we describe hardware #
###############################################################################
Expand Down Expand Up @@ -555,11 +548,11 @@ class DataShape(ExcludeUnsetModel):

# How much fixed memory must be provisioned per instance for the
# application (e.g. for process heap memory)
reserved_instance_app_mem_gib: int = 2
reserved_instance_app_mem_gib: float = 2

# How much fixed memory must be provisioned per instance for the
# system (e.g. for kernel and other system processes)
reserved_instance_system_mem_gib: int = 1
reserved_instance_system_mem_gib: float = 1

# How durable does this dataset need to be. We want to provision
# sufficient replication and backups of data to achieve the target
Expand Down
15 changes: 14 additions & 1 deletion service_capacity_modeling/stats.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from functools import lru_cache
from typing import Sequence
from typing import Tuple

import numpy as np
Expand All @@ -10,6 +11,7 @@
from scipy.stats import gamma as gamma_dist
from scipy.stats import rv_continuous

from service_capacity_modeling.interface import certain_float
from service_capacity_modeling.interface import Interval
from service_capacity_modeling.interface import IntervalModel

Expand Down Expand Up @@ -66,7 +68,7 @@ def _gamma_dist_from_interval(
result = root(f, 2)
shape = result.x[0]

dist = gamma_dist(shape, loc=minimum, scale=(mean / shape))
dist = gamma_dist(shape, loc=minimum, scale=mean / shape)
dist.random_state = np.random.default_rng(seed=seed)
return (shape, dist)

Expand Down Expand Up @@ -159,3 +161,14 @@ def dist_for_interval(interval: Interval, seed: int = 0xCAFE) -> rv_continuous:
else:
result = beta_for_interval(interval=interval, seed=seed)
return result


def interval_percentile(
interval: Interval, percentiles: Sequence[int]
) -> Sequence[Interval]:
if interval.can_simulate:
samples = dist_for_interval(interval).rvs(1028)
p = np.percentile(samples, percentiles)
return [certain_float(i) for i in p]
else:
return [interval] * len(percentiles)

0 comments on commit cc6e266

Please sign in to comment.