diff --git a/service_capacity_modeling/models/common.py b/service_capacity_modeling/models/common.py
index 551fa47..1fe2014 100644
--- a/service_capacity_modeling/models/common.py
+++ b/service_capacity_modeling/models/common.py
@@ -57,7 +57,23 @@ def _sqrt_staffed_cores(rps: float, latency_s: float, qos: float) -> int:
 
 
 def sqrt_staffed_cores(desires: CapacityDesires) -> int:
-    """Computes cores given a sqrt staffing model"""
+    """Computes cores given a sqrt staffing model
+
+    Little's Law: Concurrency = Average Rate * Average Latency
+    For example: 0.1 average concurrency = 100 / second * 1 millisecond
+
+    However, if you provision for average, when statistically unlikely traffic
+    spikes happen, you will queue, creating _latency_.
+
+    Square root staffing says to avoid that latency instead of provisioning
+    average number of cores, you provision
+
+    Cores = (Rate * Latency) + (QoS * sqrt(Rate * Latency))
+    Cores = (Required cores) + (Safety margin)
+
+    Pick higher QoS to minimize the probability of queueing. In our case we do it
+    based on tier.
+    """
     qos = _QOS(desires.service_tier)
     read_rps, read_lat = (
         desires.query_pattern.estimated_read_per_second.mid,
diff --git a/service_capacity_modeling/models/org/netflix/evcache.py b/service_capacity_modeling/models/org/netflix/evcache.py
index a07cfd8..45b5190 100644
--- a/service_capacity_modeling/models/org/netflix/evcache.py
+++ b/service_capacity_modeling/models/org/netflix/evcache.py
@@ -45,10 +45,34 @@ class Replication(str, Enum):
     evicts = "evicts"
 
 
+def calculate_read_cpu_time_evcache_ms(read_size_bytes: float) -> float:
+    # Fitted a curve based on some data that we crunched from couple of
+    # read heavy clusters
+    # In memory
+    #  250 bit - 10 micros
+    # 1520 bit - 41 micros
+    # 8250 bit - 66 micros
+    # On disk
+    # 24   KiB - 133 micros
+    # 40   KiB - 158 top of our curve
+    # Fit a logistic curve, requiring it to go through first
+    # point
+    read_latency_ms = \
+        979.4009 + (-0.06853492 - 979.4009)/math.pow((1 + math.pow(read_size_bytes/13061.23, 0.180864)), 0.0002819491)
+    return max(read_latency_ms, 0.005)
+
+def calculate_spread_cost(cluster_size: int, max_cost=100000, min_cost=0.0) -> float:
+    if cluster_size > 10:
+        return min_cost
+    if cluster_size < 2:
+        return max_cost
+    return min_cost + (max_cost - cluster_size * (max_cost - min_cost) / 30.0)
+
+
 def _estimate_evcache_requirement(
     instance: Instance,
     desires: CapacityDesires,
-    working_set: float,
+    working_set: Optional[float],
     copies_per_region: int,
     zones_per_region: int = 3,
 ) -> Tuple[CapacityRequirement, Tuple[str, ...]]:
@@ -57,20 +81,25 @@ def _estimate_evcache_requirement(
     The input desires should be the **regional** desire, and this function will
     return the zonal capacity requirement
     """
-    # EVCache can run at full CPU utilization
+    # EVCache needs to have headroom for region failover
+
     needed_cores = sqrt_staffed_cores(desires)
 
+    # For tier 0, we double the number of cores to account for caution
+    if desires.service_tier == 0:
+        needed_cores = needed_cores * 2
+
     # (Arun): Keep 20% of available bandwidth for cache warmer
     needed_network_mbps = simple_network_mbps(desires) * 1.25
 
     needed_disk = math.ceil(
-        desires.data_shape.estimated_state_size_gib.mid * copies_per_region,
+        desires.data_shape.estimated_state_size_gib.mid,
     )
 
     regrets: Tuple[str, ...] = ("spend", "mem")
     # (Arun): As of 2021 we are using ephemerals exclusively and do not
     # use cloud drives
-    if instance.drive is None:
+    if working_set is None or desires.data_shape.estimated_state_size_gib.mid < 110.0:
         # We can't currently store data on cloud drives, but we can put the
         # dataset into memory!
         needed_memory = float(needed_disk)
@@ -81,13 +110,9 @@ def _estimate_evcache_requirement(
         needed_memory = float(working_set) * float(needed_disk)
         regrets = ("spend", "disk", "mem")
 
-    # Now convert to per zone
-    needed_cores = max(1, needed_cores // zones_per_region)
-    if needed_disk > 0:
-        needed_disk = max(1, needed_disk // zones_per_region)
-    else:
-        needed_disk = needed_disk // zones_per_region
-    needed_memory = max(1, int(needed_memory // zones_per_region))
+    # For EVCache, writes go to all zones
+    # Regional reads can also go to any one zone due to app's zone affinity
+    needed_cores = max(1, needed_cores)
     logger.debug(
         "Need (cpu, mem, disk, working) = (%s, %s, %s, %f)",
         needed_cores,
@@ -146,16 +171,20 @@ def _estimate_evcache_cluster_zonal(
     # working set to keep more or less data in RAM. Faster drives need
     # less fronting RAM.
     ws_drive = instance.drive or drive
-    working_set = working_set_from_drive_and_slo(
-        drive_read_latency_dist=dist_for_interval(ws_drive.read_io_latency_ms),
-        read_slo_latency_dist=dist_for_interval(
-            desires.query_pattern.read_latency_slo_ms
-        ),
-        estimated_working_set=desires.data_shape.estimated_working_set_percent,
-        # Caches have very tight latency SLOs, so we target a high
-        # percentile of the drive latency distribution for WS calculation
-        target_percentile=0.99,
-    ).mid
+
+    if ws_drive:
+        working_set = working_set_from_drive_and_slo(
+            drive_read_latency_dist=dist_for_interval(ws_drive.read_io_latency_ms),
+            read_slo_latency_dist=dist_for_interval(
+                desires.query_pattern.read_latency_slo_ms
+            ),
+            estimated_working_set=desires.data_shape.estimated_working_set_percent,
+            # Caches have very tight latency SLOs, so we target a high
+            # percentile of the drive latency distribution for WS calculation
+            target_percentile=0.99,
+        ).mid
+    else:
+        working_set = None
 
     requirement, regrets = _estimate_evcache_requirement(
         instance=instance,
@@ -182,7 +211,6 @@ def reserve_memory(instance_mem_gib):
         return base_mem + variable_os
 
     requirement.context["osmem"] = reserve_memory(instance.ram_gib)
-
     # EVCache clusters aim to be at least 2 nodes per zone to start
     # out with for tier 0
     min_count = 0
@@ -208,7 +236,6 @@ def reserve_memory(instance_mem_gib):
         reserve_memory=lambda x: base_mem,
         core_reference_ghz=requirement.core_reference_ghz,
     )
-
     # Communicate to the actual provision that if we want reduced RF
     params = {"evcache.copies": copies_per_region}
     _upsert_params(cluster, params)
@@ -239,9 +266,11 @@ def reserve_memory(instance_mem_gib):
         )
 
     ec2_cost = zones_per_region * cluster.annual_cost
+    spread_cost = calculate_spread_cost(cluster.count)
 
     # Account for the clusters and replication costs
-    evcache_costs = {"evcache.zonal-clusters": ec2_cost}
+    evcache_costs = {"evcache.zonal-clusters": ec2_cost, "evcache.spread.cost": spread_cost}
+
     for s in services:
         evcache_costs[f"{s.service_type}"] = s.annual_cost
 
@@ -354,6 +383,19 @@ def default_desires(user_desires, extra_model_arguments: Dict[str, Any]):
                     f"User asked for {key}={value}"
                 )
 
+        estimated_read_size: Interval = Interval(
+            **user_desires.query_pattern.dict(exclude_unset=True).get(
+                "estimated_mean_read_size_bytes",
+                dict(low=16, mid=1024, high=65536, confidence=0.95),
+            )
+        )
+        estimated_read_latency_ms: Interval = Interval(
+            low=calculate_read_cpu_time_evcache_ms(estimated_read_size.low),
+            mid=calculate_read_cpu_time_evcache_ms(estimated_read_size.mid),
+            high=calculate_read_cpu_time_evcache_ms(estimated_read_size.high),
+            confidence=estimated_read_size.confidence,
+        )
+
         if user_desires.query_pattern.access_pattern == AccessPattern.latency:
             return CapacityDesires(
                 query_pattern=QueryPattern(
@@ -367,20 +409,16 @@ def default_desires(user_desires, extra_model_arguments: Dict[str, Any]):
                             target_consistency=AccessConsistency.never
                         ),
                     ),
-                    estimated_mean_read_size_bytes=Interval(
-                        low=128, mid=1024, high=65536, confidence=0.95
-                    ),
+                    estimated_mean_read_size_bytes=estimated_read_size,
                     estimated_mean_write_size_bytes=Interval(
                         low=64, mid=512, high=1024, confidence=0.95
                     ),
-                    # memcache point queries usually take just around 100us
-                    # of on CPU time for reads and writes. Memcache is very
-                    # fast
-                    estimated_mean_read_latency_ms=Interval(
-                        low=0.01, mid=0.1, high=0.2, confidence=0.98
-                    ),
+                    # evcache read latency is sensitive to payload size
+                    # so this is computed above
+                    estimated_mean_read_latency_ms=estimated_read_latency_ms,
+                    # evcache bulk puts usually take slightly longer
                     estimated_mean_write_latency_ms=Interval(
-                        low=0.01, mid=0.1, high=0.2, confidence=0.98
+                        low=0.01, mid=0.01, high=0.01, confidence=0.98
                     ),
                     # Assume point queries, "1 millisecond SLO"
                     read_latency_slo_ms=FixedInterval(
@@ -406,7 +444,7 @@ def default_desires(user_desires, extra_model_arguments: Dict[str, Any]):
                         low=10, mid=100, high=600, confidence=0.98
                     ),
                     # (Arun): The management sidecar takes 512 MiB
-                    reserved_instance_app_mem_gib=0.5,
+                    reserved_instance_app_mem_gib=1,
                     # account for the memcached connection memory
                     # and system requirements.
                     # (Arun) We currently use 1 GiB for connection memory
@@ -415,7 +453,6 @@ def default_desires(user_desires, extra_model_arguments: Dict[str, Any]):
             )
         else:
             return CapacityDesires(
-                # (FIXME): Need to pair with memcache folks on the exact values
                 query_pattern=QueryPattern(
                     access_pattern=AccessPattern.throughput,
                     access_consistency=GlobalConsistency(
@@ -427,19 +464,16 @@ def default_desires(user_desires, extra_model_arguments: Dict[str, Any]):
                             target_consistency=AccessConsistency.never
                         ),
                     ),
-                    estimated_mean_read_size_bytes=Interval(
-                        low=128, mid=1024, high=65536, confidence=0.95
-                    ),
+                    estimated_mean_read_size_bytes=estimated_read_size,
                     estimated_mean_write_size_bytes=Interval(
                         low=128, mid=1024, high=65536, confidence=0.95
                     ),
-                    # evcache bulk reads usually take slightly longer
-                    estimated_mean_read_latency_ms=Interval(
-                        low=0.01, mid=0.15, high=0.3, confidence=0.98
-                    ),
+                    # evcache read latency is sensitive to payload size
+                    # so this is computed above
+                    estimated_mean_read_latency_ms=estimated_read_latency_ms,
                     # evcache bulk puts usually take slightly longer
                     estimated_mean_write_latency_ms=Interval(
-                        low=0.01, mid=0.15, high=0.3, confidence=0.98
+                        low=0.01, mid=0.01, high=0.01, confidence=0.98
                     ),
                     # Assume they're multi-getting -> slow reads
                     read_latency_slo_ms=FixedInterval(
@@ -466,7 +500,7 @@ def default_desires(user_desires, extra_model_arguments: Dict[str, Any]):
                         low=10, mid=100, high=1000, confidence=0.98
                     ),
                     # (Arun): The management sidecar takes 512 MiB
-                    reserved_instance_app_mem_gib=0.5,
+                    reserved_instance_app_mem_gib=1,
                     # account for the memcached connection memory
                     # and system requirements.
                     # (Arun) We currently use 1 GiB base for connection memory
diff --git a/tests/netflix/test_evcache.py b/tests/netflix/test_evcache.py
index bf3b688..966328d 100644
--- a/tests/netflix/test_evcache.py
+++ b/tests/netflix/test_evcache.py
@@ -1,294 +1,298 @@
 from service_capacity_modeling.capacity_planner import planner
-from service_capacity_modeling.interface import AccessPattern, certain_float
 from service_capacity_modeling.interface import CapacityDesires
 from service_capacity_modeling.interface import DataShape
 from service_capacity_modeling.interface import Interval
 from service_capacity_modeling.interface import QueryPattern
-
-
-def test_evcache_high_qps():
-    qps = 100_000
-    high_qps = CapacityDesires(
+from service_capacity_modeling.models.org.netflix.evcache import (
+    calculate_read_cpu_time_evcache_ms,
+)
+
+
+def test_evcache_read_latency():
+    # 256 bits = 32 bytes 10
+    small = calculate_read_cpu_time_evcache_ms(32)
+    # 1600 bits = 200 bytes 41
+    medium = calculate_read_cpu_time_evcache_ms(200)
+    # 8192 bits = 1024 bytes 66
+    large = calculate_read_cpu_time_evcache_ms(1024)
+    # 24   KiB  = 133
+    very_large = calculate_read_cpu_time_evcache_ms(24 * 1024)
+    # 40   KiB  = 158
+    extra_large = calculate_read_cpu_time_evcache_ms(40 * 1024)
+
+    assert calculate_read_cpu_time_evcache_ms(1) > 0
+    assert 0.008 < small < 0.015
+    assert 0.030 < medium < 0.050
+    assert 0.060 < large < 0.080
+    assert 0.120 < very_large < 0.140
+    assert 0.140 < extra_large < 0.160
+
+
+def test_evcache_inmemory_low_latency_reads_cpu():
+    inmemory_cluster_low_latency_reads_qps = CapacityDesires(
         service_tier=1,
         query_pattern=QueryPattern(
             estimated_read_per_second=Interval(
-                low=qps // 10, mid=qps, high=qps * 10, confidence=0.98
+                low=18300000, mid=34200000, high=34200000 * 1.2, confidence=1.0
             ),
             estimated_write_per_second=Interval(
-                low=qps // 10, mid=qps, high=qps * 10, confidence=0.98
+                low=228000, mid=536000, high=536000 * 1.2, confidence=1.0
             ),
-            estimated_write_size_bytes=Interval(
-                low=10, mid=100, high=1000, confidence=0.98
+            estimated_mean_write_size_bytes=Interval(
+                low=3778, mid=3778, high=3778 * 1.2, confidence=1.0
+            ),
+            estimated_mean_read_size_bytes=Interval(
+                low=35, mid=35, high=35 * 1.2, confidence=1.0
             ),
         ),
         data_shape=DataShape(
-            estimated_state_size_gib=Interval(
-                low=10, mid=100, high=1000, confidence=0.98
-            ),
+            estimated_state_size_gib=Interval(low=36, mid=36, high=36, confidence=1.0),
             estimated_state_item_count=Interval(
-                low=10, mid=100, high=1000, confidence=0.98
+                low=416000000, mid=804000000, high=804000000 * 1.2, confidence=1.0
             ),
         ),
     )
-    plan = planner.plan(
+
+    plan = planner.plan_certain(
         model_name="org.netflix.evcache",
         region="us-east-1",
-        desires=high_qps,
+        desires=inmemory_cluster_low_latency_reads_qps,
     )
 
-    assert len(plan.least_regret) >= 2
+    for candidate in plan:
+        total_cpu_power = candidate.candidate_clusters.zonal[0].count * \
+                          candidate.candidate_clusters.zonal[0].instance.cpu * \
+                          candidate.candidate_clusters.zonal[0].instance.cpu_ghz
 
-    lr = plan.least_regret[0]
-    # EVCache should regret having too little RAM, disk and spending too much
-    assert all(k in lr.requirements.regrets for k in ("spend", "mem", "disk"))
+        assert total_cpu_power > 1100
+
+def test_evcache_inmemory_medium_latency_reads_cpu():
+    inmemory_cluster_medium_latency_reads_qps = CapacityDesires(
+        service_tier=0,
+        query_pattern=QueryPattern(
+            estimated_read_per_second=Interval(
+                low=470000, mid=1800000, high=1800000 * 1.2, confidence=1.0
+            ),
+            estimated_mean_write_per_second=Interval(
+                low=505000, mid=861000, high=861000 * 1.2, confidence=1.0
+            ),
+            estimated_mean_write_size_bytes=Interval(
+                low=365, mid=365, high=365 * 1.2, confidence=1.0
+            ),
+            estimated_mean_read_size_bytes=Interval(
+                low=193, mid=193, high=193 * 1.2, confidence=1.0
+            ),
+        ),
+        data_shape=DataShape(
+            estimated_state_size_gib=Interval(low=61, mid=61, high=61, confidence=1.0),
+            estimated_state_item_count=Interval(
+                low=125000000, mid=202000000, high=202000000 * 1.2, confidence=1.0
+            ),
+        ),
+    )
 
-    # EVCache should be pretty cheap for 100k QPS
-    assert lr.candidate_clusters.annual_costs["evcache.zonal-clusters"] < 10000
-    # Without replication shouldn't have network costs
-    assert len(lr.candidate_clusters.annual_costs.keys()) == 1
+    plan = planner.plan_certain(
+       model_name="org.netflix.evcache",
+       region="us-east-1",
+       desires=inmemory_cluster_medium_latency_reads_qps,
+    )
 
-    zc = lr.candidate_clusters.zonal[0]
+    for candidate in plan:
+        total_cpu_power = candidate.candidate_clusters.zonal[0].count * \
+                          candidate.candidate_clusters.zonal[0].instance.cpu * \
+                          candidate.candidate_clusters.zonal[0].instance.cpu_ghz
 
-    if zc.instance.drive is not None:
-        # If we end up with disk we want at least 100 GiB of disk per zone
-        assert zc.count * zc.instance.drive.size_gib > 100
-    else:
-        # If we end up with RAM we want at least 100 GiB of ram per zone
-        assert zc.count * zc.instance.ram_gib > 100
+        assert total_cpu_power > 400
 
 
-def test_evcache_large_data():
-    qps = 10_000
-    large_data = CapacityDesires(
-        service_tier=1,
+def test_evcache_inmemory_high_latency_reads_cpu():
+    inmemory_cluster_high_latency_reads_qps = CapacityDesires(
+        service_tier=0,
         query_pattern=QueryPattern(
             estimated_read_per_second=Interval(
-                low=qps // 10, mid=qps, high=qps * 10, confidence=0.98
+                low=113000, mid=441000, high=441000 * 1.2, confidence=1.0
             ),
             estimated_write_per_second=Interval(
-                low=qps // 10, mid=qps, high=qps * 10, confidence=0.98
+                low=19000, mid=35000, high=35000 * 1.2, confidence=1.0
             ),
-            estimated_write_size_bytes=Interval(
-                low=1000, mid=5000, high=10_000, confidence=0.98
+            estimated_mean_write_size_bytes=Interval(
+                low=7250, mid=7250, high=7250 * 1.2, confidence=1.0
+            ),
+            estimated_mean_read_size_bytes=Interval(
+                low=5100, mid=5100, high=5100 * 1.2, confidence=1.0
             ),
         ),
         data_shape=DataShape(
-            estimated_state_size_gib=Interval(
-                low=100, mid=5000, high=10_000, confidence=0.98
-            )
+            estimated_state_size_gib=Interval(low=1662, mid=1662, high=1662, confidence=1.0),
+            estimated_state_item_count=Interval(
+                low=750000000, mid=750000000, high=750000000 * 1.2, confidence=1.0
+            ),
         ),
     )
-    plan = planner.plan(
-        model_name="org.netflix.evcache",
-        region="us-east-1",
-        desires=large_data,
-    )
-
-    assert len(plan.least_regret) >= 1
-
-    lr = plan.least_regret[0]
-    # EVCache should regret having too little RAM, disk and spending too much
-    assert all(k in lr.requirements.regrets for k in ("spend", "mem", "disk"))
-
-    # EVCache should be somewhat expensive due to the large amount of data
-    assert lr.candidate_clusters.annual_costs["evcache.zonal-clusters"] > 10_000
-    # Without replication shouldn't have network costs
-    assert len(lr.candidate_clusters.annual_costs.keys()) == 1
 
-    zc = lr.candidate_clusters.zonal[0]
+    plan = planner.plan_certain(
+       model_name="org.netflix.evcache",
+       region="us-east-1",
+       desires=inmemory_cluster_high_latency_reads_qps,
+    )
 
-    # For the sheer volume of data, it probably doesn't make sense for the least regretful cluster to not have disk.
-    assert zc.instance.drive is not None
+    for candidate in plan:
+        total_cpu_power = candidate.candidate_clusters.zonal[0].count * \
+                          candidate.candidate_clusters.zonal[0].instance.cpu * \
+                          candidate.candidate_clusters.zonal[0].instance.cpu_ghz
 
-    # We want at least 1 TiB of disk per zone
-    assert zc.count * zc.instance.drive.size_gib > 1000
+        assert total_cpu_power > 100
 
 
-def test_evcache_replication():
-    high_qps = CapacityDesires(
-        service_tier=1,
+def test_evcache_ondisk_low_latency_reads_cpu():
+    ondisk_cluster_low_latency_reads_qps = CapacityDesires(
+        service_tier=0,
         query_pattern=QueryPattern(
-            access_pattern=AccessPattern.latency,
             estimated_read_per_second=Interval(
-                low=10_000, mid=100_000, high=1_000_000, confidence=0.98
+                low=284, mid=7110000, high=7110000 * 1.2, confidence=1.0
             ),
             estimated_write_per_second=Interval(
-                low=10_000, mid=100_000, high=1_000_000, confidence=0.98
+                low=0, mid=2620000, high=2620000 * 1.2, confidence=1.0
+            ),
+            estimated_mean_write_size_bytes=Interval(
+                low=12000, mid=12000, high=12000 * 1.2, confidence=1.0
+            ),
+            estimated_mean_read_size_bytes=Interval(
+                low=16000, mid=16000, high=16000 * 1.2, confidence=1.0
             ),
         ),
-        # This should work out to around 200 GiB of state
         data_shape=DataShape(
+            estimated_state_size_gib=Interval(low=2306867, mid=2306867, high=2306867, confidence=1.0),
             estimated_state_item_count=Interval(
-                low=100_000_000, mid=1_000_000_000, high=10_000_000_000, confidence=0.98
-            )
+                low=132000000000, mid=132000000000, high=132000000000 * 1.2, confidence=1.0
+            ),
         ),
     )
-    plan = planner.plan(
-        model_name="org.netflix.evcache",
-        region="us-east-1",
-        desires=high_qps,
-        num_regions=3,
-        extra_model_arguments={"cross_region_replication": "sets"},
+
+    plan = planner.plan_certain(
+       model_name="org.netflix.evcache",
+       region="us-east-1",
+       desires=ondisk_cluster_low_latency_reads_qps,
     )
-    assert len(plan.least_regret) >= 2
 
-    lr = plan.least_regret[0]
-    # EVCache should regret having too little RAM, disk and spending too much
-    assert all(k in lr.requirements.regrets for k in ("spend", "mem", "disk"))
-    assert lr.requirements.zonal[0].disk_gib.mid > 200
+    for candidate in plan:
+        total_cpu_power = candidate.candidate_clusters.zonal[0].count * \
+                          candidate.candidate_clusters.zonal[0].instance.cpu * \
+                          candidate.candidate_clusters.zonal[0].instance.cpu_ghz
 
-    # EVCache compute should be pretty cheap for 100k RPS with 10k WPS
-    assert lr.candidate_clusters.annual_costs["evcache.zonal-clusters"] < 10000
+        assert total_cpu_power > 8000
 
-    set_inter_region = lr.candidate_clusters.annual_costs["evcache.net.inter.region"]
 
-    # With replication should have network costs
-    assert 10000 < set_inter_region < 40000
-    assert (
-        50000 < lr.candidate_clusters.annual_costs["evcache.net.intra.region"] < 120000
+def test_evcache_ondisk_high_latency_reads_cpu():
+    ondisk_cluster_high_latency_reads_qps = CapacityDesires(
+        service_tier=0,
+        query_pattern=QueryPattern(
+            estimated_read_per_second=Interval(
+                low=312000, mid=853000, high=853000 * 1.2, confidence=1.0
+            ),
+            estimated_write_per_second=Interval(
+                low=0, mid=310000, high=310000 * 1.2, confidence=1.0
+            ),
+            estimated_write_size_bytes=Interval(
+                low=34500, mid=34500, high=34500 * 1.2, confidence=1.0
+            ),
+            estimated_mean_read_size_bytes=Interval(
+                low=41000, mid=41000, high=41000 * 1.2, confidence=1.0
+            ),
+        ),
+        data_shape=DataShape(
+            estimated_state_size_gib=Interval(low=281000, mid=281000, high=281000, confidence=1.0),
+            estimated_state_item_count=Interval(
+                low=8518318523, mid=8518318523, high=8518318523 * 1.2, confidence=1.0
+            ),
+        ),
     )
 
-    delete_plan = planner.plan(
-        model_name="org.netflix.evcache",
-        region="us-east-1",
-        desires=high_qps,
-        num_regions=3,
-        extra_model_arguments={
-            "cross_region_replication": "evicts",
-            "copies_per_region": 3,
-        },
+    plan = planner.plan_certain(
+       model_name="org.netflix.evcache",
+       region="us-east-1",
+       desires=ondisk_cluster_high_latency_reads_qps,
     )
 
-    lr = delete_plan.least_regret[0]
+    for candidate in plan:
+        total_cpu_power = candidate.candidate_clusters.zonal[0].count * \
+                          candidate.candidate_clusters.zonal[0].instance.cpu * \
+                          candidate.candidate_clusters.zonal[0].instance.cpu_ghz
 
-    # Evicts should be cheaper than sets
-    evict_inter_region = lr.candidate_clusters.annual_costs["evcache.net.inter.region"]
-    assert evict_inter_region < set_inter_region
-
-    # With replication should have network costs
-    assert 5000 < evict_inter_region < 15000
-    assert (
-        12000 < lr.candidate_clusters.annual_costs["evcache.net.intra.region"] < 40000
-    )
+        assert total_cpu_power > 800
 
 
-def test_evcache_compare_working_sets():
-    small = CapacityDesires(
-        service_tier=2,
+def test_evcache_inmemory_ram_usage():
+    inmemory_qps= CapacityDesires(
+        service_tier=1,
         query_pattern=QueryPattern(
             estimated_read_per_second=Interval(
-                low=10_000, mid=100_000, high=1_000_000, confidence=0.98
+                low=18300000, mid=34200000, high=34200000 * 1.2, confidence=1.0
             ),
             estimated_write_per_second=Interval(
-                low=10_000, mid=100_000, high=1_000_000, confidence=0.98
+                low=228000, mid=536000, high=536000 * 1.2, confidence=1.0
+            ),
+            estimated_mean_write_size_bytes=Interval(
+                low=3778, mid=3778, high=3778 * 1.2, confidence=1.0
+            ),
+            estimated_mean_read_size_bytes=Interval(
+                low=35, mid=35, high=35 * 1.2, confidence=1.0
             ),
-            estimated_write_size_bytes=Interval(
-                low=10, mid=100, high=1000, confidence=0.98
-            )
         ),
         data_shape=DataShape(
-            estimated_state_size_gib=Interval(
-                low=10, mid=100, high=1000, confidence=0.98
+            estimated_state_size_gib=Interval(low=36, mid=36, high=36, confidence=1.0),
+            estimated_state_item_count=Interval(
+                low=416000000, mid=804000000, high=804000000 * 1.2, confidence=1.0
             ),
-            estimated_working_set_percent=certain_float(0.10)
         ),
     )
-    large = small.copy(deep=True)
-    large.data_shape.estimated_working_set_percent = certain_float(0.90)
 
-    plan_small = planner.plan(
+    plan = planner.plan_certain(
         model_name="org.netflix.evcache",
         region="us-east-1",
-        desires=small,
+        desires=inmemory_qps,
     )
-    plan_large = planner.plan(
-        model_name="org.netflix.evcache",
-        region="us-east-1",
-        desires=large,
-    )
-
-    assert len(plan_small.least_regret) >= 2
-    assert len(plan_large.least_regret) >= 2
-
-    lr_small = plan_small.least_regret[0]
-    lr_large = plan_large.least_regret[0]
-
-    # Only the plan whose desires contain the smaller working set percentage should care about disk.
-    assert all(k in lr_small.requirements.regrets for k in ("spend", "mem", "disk"))
-    assert all(k in lr_large.requirements.regrets for k in ("spend", "mem"))
-
-    # Smaller working set percentage should lead to fewer costs.
-    assert lr_small.candidate_clusters.annual_costs["evcache.zonal-clusters"] < \
-           lr_large.candidate_clusters.annual_costs["evcache.zonal-clusters"]
 
-    # The large difference in working set percentage should lead to a difference in RAM.
-    assert lr_small.candidate_clusters.zonal[0].instance.ram_gib < \
-           lr_large.candidate_clusters.zonal[0].instance.ram_gib
+    for candidate in plan:
+        total_ram = candidate.candidate_clusters.zonal[0].instance.ram_gib * \
+                    candidate.candidate_clusters.zonal[0].count
 
-    # The small working set percentage should lead to picking an instance with both memory and disk.
-    assert lr_small.candidate_clusters.zonal[0].instance.drive is not None
-    assert lr_small.candidate_clusters.zonal[0].instance.ram_gib is not None
+        assert total_ram > inmemory_qps.data_shape.estimated_state_size_gib.mid
 
-    # The large working set percentage should lead to only memory (no disk).
-    assert lr_large.candidate_clusters.zonal[0].instance.drive is None
-    assert lr_large.candidate_clusters.zonal[0].instance.ram_gib is not None
 
-    # Without replication shouldn't have network costs
-    assert len(lr_small.candidate_clusters.annual_costs.keys()) == 1
-    assert len(lr_large.candidate_clusters.annual_costs.keys()) == 1
-
-
-def test_evcache_compare_tiers():
-    low = CapacityDesires(
-        service_tier=0,
+def test_evcache_ondisk_disk_usage():
+    inmemory_qps= CapacityDesires(
+        service_tier=1,
         query_pattern=QueryPattern(
             estimated_read_per_second=Interval(
-                low=10_000, mid=100_000, high=1_000_000, confidence=0.98
+                low=18300000, mid=34200000, high=34200000 * 1.2, confidence=1.0
             ),
             estimated_write_per_second=Interval(
-                low=10_000, mid=100_000, high=1_000_000, confidence=0.98
+                low=228000, mid=536000, high=536000 * 1.2, confidence=1.0
+            ),
+            estimated_mean_write_size_bytes=Interval(
+                low=3778, mid=3778, high=3778 * 1.2, confidence=1.0
+            ),
+            estimated_mean_read_size_bytes=Interval(
+                low=35, mid=35, high=35 * 1.2, confidence=1.0
             ),
-            estimated_write_size_bytes=Interval(
-                low=10, mid=100, high=1000, confidence=0.98
-            )
         ),
         data_shape=DataShape(
-            estimated_state_size_gib=Interval(
-                low=10, mid=100, high=1000, confidence=0.98
+            estimated_state_size_gib=Interval(low=36, mid=36, high=36, confidence=1.0),
+            estimated_state_item_count=Interval(
+                low=416000000, mid=804000000, high=804000000 * 1.2, confidence=1.0
             ),
         ),
     )
-    high = low.copy(deep=True)
-    high.service_tier = 3
 
-    plan_low = planner.plan(
-        model_name="org.netflix.evcache",
-        region="us-east-1",
-        desires=low,
-    )
-    plan_high = planner.plan(
+    plan = planner.plan_certain(
         model_name="org.netflix.evcache",
         region="us-east-1",
-        desires=high,
+        desires=inmemory_qps,
     )
 
-    assert len(plan_low.least_regret) >= 2
-    assert len(plan_high.least_regret) >= 2
-
-    lr_low = plan_low.least_regret[0]
-    lr_high = plan_high.least_regret[0]
-
-    # EVCache should regret having too little RAM, disk and spending too much
-    assert all(k in lr_low.requirements.regrets for k in ("spend", "mem", "disk"))
-    assert all(k in lr_high.requirements.regrets for k in ("spend", "mem", "disk"))
-
-    # Lower tier should lead to greater costs.
-    assert lr_low.candidate_clusters.annual_costs["evcache.zonal-clusters"] > \
-           lr_high.candidate_clusters.annual_costs["evcache.zonal-clusters"]
-
-    # Large difference in tiers should lead to different instance family types.
-    assert lr_low.candidate_clusters.zonal[0].instance.family != lr_high.candidate_clusters.zonal[0].instance.family
+    for candidate in plan:
+        total_ram = candidate.candidate_clusters.zonal[0].instance.ram_gib * \
+                    candidate.candidate_clusters.zonal[0].count
 
-    # Without replication shouldn't have network costs
-    assert len(lr_low.candidate_clusters.annual_costs.keys()) == 1
-    assert len(lr_high.candidate_clusters.annual_costs.keys()) == 1
+        assert total_ram > inmemory_qps.data_shape.estimated_state_size_gib.mid
\ No newline at end of file
diff --git a/tests/netflix/test_key_value.py b/tests/netflix/test_key_value.py
index 2cbdd9a..fb90169 100644
--- a/tests/netflix/test_key_value.py
+++ b/tests/netflix/test_key_value.py
@@ -283,10 +283,10 @@ def test_kv_plus_evcache_rps_exceeding_250k():
             assert zlr_ev.instance.family[0] in ("r", "m", "i")
 
             # Validate EVCache cost for 300k RPS + 300k WPS
-            assert least_regret_clusters.annual_costs["evcache.zonal-clusters"] < 10000
+            assert least_regret_clusters.annual_costs["evcache.zonal-clusters"] < 30000
 
             # Costs for KV + C* + EVCache clusters, including networking for C*
-            assert len(least_regret_clusters.annual_costs.keys()) == 6
+            assert len(least_regret_clusters.annual_costs.keys()) == 7
 
 
 def test_kv_plus_evcache_rps_exceeding_100k_and_sufficient_read_write_ratio():
@@ -386,10 +386,10 @@ def test_kv_plus_evcache_rps_exceeding_100k_and_sufficient_read_write_ratio():
             assert zlr_ev.instance.family[0] in ("r", "m", "i")
 
             # Validate EVCache cost for 300k RPS + 300k WPS
-            assert least_regret_clusters.annual_costs["evcache.zonal-clusters"] < 10000
+            assert least_regret_clusters.annual_costs["evcache.zonal-clusters"] < 30000
 
             # Costs for KV + C* + EVCache clusters, including networking for C*
-            assert len(least_regret_clusters.annual_costs.keys()) == 6
+            assert len(least_regret_clusters.annual_costs.keys()) == 7
 
 
 def test_kv_rps_exceeding_100k_but_insufficient_read_write_ratio():