Skip to content

Commit

Permalink
Tweaks evcache model and adds experiment
Browse files Browse the repository at this point in the history
  • Loading branch information
akashdeepgoel authored and jolynch committed Sep 7, 2023
1 parent 6f8622b commit da95176
Show file tree
Hide file tree
Showing 4 changed files with 299 additions and 245 deletions.
18 changes: 17 additions & 1 deletion service_capacity_modeling/models/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,23 @@ def _sqrt_staffed_cores(rps: float, latency_s: float, qos: float) -> int:


def sqrt_staffed_cores(desires: CapacityDesires) -> int:
"""Computes cores given a sqrt staffing model"""
"""Computes cores given a sqrt staffing model
Little's Law: Concurrency = Average Rate * Average Latency
For example: 0.1 average concurrency = 100 / second * 1 millisecond
However, if you provision for average, when statistically unlikely traffic
spikes happen, you will queue, creating _latency_.
Square root staffing says to avoid that latency instead of provisioning
average number of cores, you provision
Cores = (Rate * Latency) + (QoS * sqrt(Rate * Latency))
Cores = (Required cores) + (Safety margin)
Pick higher QoS to minimize the probability of queueing. In our case we do it
based on tier.
"""
qos = _QOS(desires.service_tier)
read_rps, read_lat = (
desires.query_pattern.estimated_read_per_second.mid,
Expand Down
124 changes: 79 additions & 45 deletions service_capacity_modeling/models/org/netflix/evcache.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,10 +45,34 @@ class Replication(str, Enum):
evicts = "evicts"


def calculate_read_cpu_time_evcache_ms(read_size_bytes: float) -> float:
# Fitted a curve based on some data that we crunched from couple of
# read heavy clusters
# In memory
# 250 bit - 10 micros
# 1520 bit - 41 micros
# 8250 bit - 66 micros
# On disk
# 24 KiB - 133 micros
# 40 KiB - 158 top of our curve
# Fit a logistic curve, requiring it to go through first
# point
read_latency_ms = \
979.4009 + (-0.06853492 - 979.4009)/math.pow((1 + math.pow(read_size_bytes/13061.23, 0.180864)), 0.0002819491)
return max(read_latency_ms, 0.005)

def calculate_spread_cost(cluster_size: int, max_cost=100000, min_cost=0.0) -> float:
if cluster_size > 10:
return min_cost
if cluster_size < 2:
return max_cost
return min_cost + (max_cost - cluster_size * (max_cost - min_cost) / 30.0)


def _estimate_evcache_requirement(
instance: Instance,
desires: CapacityDesires,
working_set: float,
working_set: Optional[float],
copies_per_region: int,
zones_per_region: int = 3,
) -> Tuple[CapacityRequirement, Tuple[str, ...]]:
Expand All @@ -57,20 +81,25 @@ def _estimate_evcache_requirement(
The input desires should be the **regional** desire, and this function will
return the zonal capacity requirement
"""
# EVCache can run at full CPU utilization
# EVCache needs to have headroom for region failover

needed_cores = sqrt_staffed_cores(desires)

# For tier 0, we double the number of cores to account for caution
if desires.service_tier == 0:
needed_cores = needed_cores * 2

# (Arun): Keep 20% of available bandwidth for cache warmer
needed_network_mbps = simple_network_mbps(desires) * 1.25

needed_disk = math.ceil(
desires.data_shape.estimated_state_size_gib.mid * copies_per_region,
desires.data_shape.estimated_state_size_gib.mid,
)

regrets: Tuple[str, ...] = ("spend", "mem")
# (Arun): As of 2021 we are using ephemerals exclusively and do not
# use cloud drives
if instance.drive is None:
if working_set is None or desires.data_shape.estimated_state_size_gib.mid < 110.0:
# We can't currently store data on cloud drives, but we can put the
# dataset into memory!
needed_memory = float(needed_disk)
Expand All @@ -81,13 +110,9 @@ def _estimate_evcache_requirement(
needed_memory = float(working_set) * float(needed_disk)
regrets = ("spend", "disk", "mem")

# Now convert to per zone
needed_cores = max(1, needed_cores // zones_per_region)
if needed_disk > 0:
needed_disk = max(1, needed_disk // zones_per_region)
else:
needed_disk = needed_disk // zones_per_region
needed_memory = max(1, int(needed_memory // zones_per_region))
# For EVCache, writes go to all zones
# Regional reads can also go to any one zone due to app's zone affinity
needed_cores = max(1, needed_cores)
logger.debug(
"Need (cpu, mem, disk, working) = (%s, %s, %s, %f)",
needed_cores,
Expand Down Expand Up @@ -146,16 +171,20 @@ def _estimate_evcache_cluster_zonal(
# working set to keep more or less data in RAM. Faster drives need
# less fronting RAM.
ws_drive = instance.drive or drive
working_set = working_set_from_drive_and_slo(
drive_read_latency_dist=dist_for_interval(ws_drive.read_io_latency_ms),
read_slo_latency_dist=dist_for_interval(
desires.query_pattern.read_latency_slo_ms
),
estimated_working_set=desires.data_shape.estimated_working_set_percent,
# Caches have very tight latency SLOs, so we target a high
# percentile of the drive latency distribution for WS calculation
target_percentile=0.99,
).mid

if ws_drive:
working_set = working_set_from_drive_and_slo(
drive_read_latency_dist=dist_for_interval(ws_drive.read_io_latency_ms),
read_slo_latency_dist=dist_for_interval(
desires.query_pattern.read_latency_slo_ms
),
estimated_working_set=desires.data_shape.estimated_working_set_percent,
# Caches have very tight latency SLOs, so we target a high
# percentile of the drive latency distribution for WS calculation
target_percentile=0.99,
).mid
else:
working_set = None

requirement, regrets = _estimate_evcache_requirement(
instance=instance,
Expand All @@ -182,7 +211,6 @@ def reserve_memory(instance_mem_gib):
return base_mem + variable_os

requirement.context["osmem"] = reserve_memory(instance.ram_gib)

# EVCache clusters aim to be at least 2 nodes per zone to start
# out with for tier 0
min_count = 0
Expand All @@ -208,7 +236,6 @@ def reserve_memory(instance_mem_gib):
reserve_memory=lambda x: base_mem,
core_reference_ghz=requirement.core_reference_ghz,
)

# Communicate to the actual provision that if we want reduced RF
params = {"evcache.copies": copies_per_region}
_upsert_params(cluster, params)
Expand Down Expand Up @@ -239,9 +266,11 @@ def reserve_memory(instance_mem_gib):
)

ec2_cost = zones_per_region * cluster.annual_cost
spread_cost = calculate_spread_cost(cluster.count)

# Account for the clusters and replication costs
evcache_costs = {"evcache.zonal-clusters": ec2_cost}
evcache_costs = {"evcache.zonal-clusters": ec2_cost, "evcache.spread.cost": spread_cost}

for s in services:
evcache_costs[f"{s.service_type}"] = s.annual_cost

Expand Down Expand Up @@ -354,6 +383,19 @@ def default_desires(user_desires, extra_model_arguments: Dict[str, Any]):
f"User asked for {key}={value}"
)

estimated_read_size: Interval = Interval(
**user_desires.query_pattern.dict(exclude_unset=True).get(
"estimated_mean_read_size_bytes",
dict(low=16, mid=1024, high=65536, confidence=0.95),
)
)
estimated_read_latency_ms: Interval = Interval(
low=calculate_read_cpu_time_evcache_ms(estimated_read_size.low),
mid=calculate_read_cpu_time_evcache_ms(estimated_read_size.mid),
high=calculate_read_cpu_time_evcache_ms(estimated_read_size.high),
confidence=estimated_read_size.confidence,
)

if user_desires.query_pattern.access_pattern == AccessPattern.latency:
return CapacityDesires(
query_pattern=QueryPattern(
Expand All @@ -367,20 +409,16 @@ def default_desires(user_desires, extra_model_arguments: Dict[str, Any]):
target_consistency=AccessConsistency.never
),
),
estimated_mean_read_size_bytes=Interval(
low=128, mid=1024, high=65536, confidence=0.95
),
estimated_mean_read_size_bytes=estimated_read_size,
estimated_mean_write_size_bytes=Interval(
low=64, mid=512, high=1024, confidence=0.95
),
# memcache point queries usually take just around 100us
# of on CPU time for reads and writes. Memcache is very
# fast
estimated_mean_read_latency_ms=Interval(
low=0.01, mid=0.1, high=0.2, confidence=0.98
),
# evcache read latency is sensitive to payload size
# so this is computed above
estimated_mean_read_latency_ms=estimated_read_latency_ms,
# evcache bulk puts usually take slightly longer
estimated_mean_write_latency_ms=Interval(
low=0.01, mid=0.1, high=0.2, confidence=0.98
low=0.01, mid=0.01, high=0.01, confidence=0.98
),
# Assume point queries, "1 millisecond SLO"
read_latency_slo_ms=FixedInterval(
Expand All @@ -406,7 +444,7 @@ def default_desires(user_desires, extra_model_arguments: Dict[str, Any]):
low=10, mid=100, high=600, confidence=0.98
),
# (Arun): The management sidecar takes 512 MiB
reserved_instance_app_mem_gib=0.5,
reserved_instance_app_mem_gib=1,
# account for the memcached connection memory
# and system requirements.
# (Arun) We currently use 1 GiB for connection memory
Expand All @@ -415,7 +453,6 @@ def default_desires(user_desires, extra_model_arguments: Dict[str, Any]):
)
else:
return CapacityDesires(
# (FIXME): Need to pair with memcache folks on the exact values
query_pattern=QueryPattern(
access_pattern=AccessPattern.throughput,
access_consistency=GlobalConsistency(
Expand All @@ -427,19 +464,16 @@ def default_desires(user_desires, extra_model_arguments: Dict[str, Any]):
target_consistency=AccessConsistency.never
),
),
estimated_mean_read_size_bytes=Interval(
low=128, mid=1024, high=65536, confidence=0.95
),
estimated_mean_read_size_bytes=estimated_read_size,
estimated_mean_write_size_bytes=Interval(
low=128, mid=1024, high=65536, confidence=0.95
),
# evcache bulk reads usually take slightly longer
estimated_mean_read_latency_ms=Interval(
low=0.01, mid=0.15, high=0.3, confidence=0.98
),
# evcache read latency is sensitive to payload size
# so this is computed above
estimated_mean_read_latency_ms=estimated_read_latency_ms,
# evcache bulk puts usually take slightly longer
estimated_mean_write_latency_ms=Interval(
low=0.01, mid=0.15, high=0.3, confidence=0.98
low=0.01, mid=0.01, high=0.01, confidence=0.98
),
# Assume they're multi-getting -> slow reads
read_latency_slo_ms=FixedInterval(
Expand All @@ -466,7 +500,7 @@ def default_desires(user_desires, extra_model_arguments: Dict[str, Any]):
low=10, mid=100, high=1000, confidence=0.98
),
# (Arun): The management sidecar takes 512 MiB
reserved_instance_app_mem_gib=0.5,
reserved_instance_app_mem_gib=1,
# account for the memcached connection memory
# and system requirements.
# (Arun) We currently use 1 GiB base for connection memory
Expand Down
Loading

0 comments on commit da95176

Please sign in to comment.