From 53ba575e12272452130cadbecbdf0d9df424a00b Mon Sep 17 00:00:00 2001
From: George Campbell <gscampbell@netflix.com>
Date: Thu, 19 Oct 2023 09:29:22 -0700
Subject: [PATCH] fix the pre-commit errors

---
 .github/workflows/python-build.yml            |   2 +
 .pre-commit-config.yaml                       |  49 +++--
 service_capacity_modeling/capacity_planner.py | 192 +++++++++++-------
 .../hardware/__init__.py                      |  11 +-
 service_capacity_modeling/interface.py        |   7 +-
 .../models/org/netflix/__init__.py            |   2 +-
 .../models/org/netflix/crdb.py                |  11 +-
 .../models/org/netflix/ddb.py                 |  26 +--
 .../models/org/netflix/entity.py              |   1 -
 .../models/org/netflix/evcache.py             |  28 +--
 .../models/org/netflix/key_value.py           |  41 +++-
 .../models/org/netflix/postgres.py            |  15 +-
 .../models/org/netflix/time_series_config.py  |   2 +-
 tests/netflix/test_elasticsearch.py           |  12 +-
 tests/netflix/test_evcache.py                 | 135 +++++++-----
 tests/netflix/test_postgres.py                |  24 +--
 tests/test_common.py                          |   4 +-
 tests/test_simulation.py                      |   2 +-
 tox.ini                                       |   8 +-
 19 files changed, 340 insertions(+), 232 deletions(-)

diff --git a/.github/workflows/python-build.yml b/.github/workflows/python-build.yml
index 456e325..14ca73e 100644
--- a/.github/workflows/python-build.yml
+++ b/.github/workflows/python-build.yml
@@ -20,3 +20,5 @@ jobs:
       run: pip install tox
     - name: Run Tox (pydantic v1)
       run: tox -e py
+    - name: Run Tox (full pre-commit check)
+      run: tox -e pre-commit
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 90fe525..9ba6e2f 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,34 +1,33 @@
 repos:
- -  repo: https://github.com/python/black
+  - repo: https://github.com/python/black
     rev: 22.3.0
     hooks:
-    - id: black
- -  repo: https://github.com/pre-commit/pre-commit-hooks
+      - id: black
+  - repo: https://github.com/pre-commit/pre-commit-hooks
     rev: v2.4.0
     hooks:
-    - id: check-yaml
-    - id: check-json
-    - id: trailing-whitespace
-    - id: flake8
-    - id: check-merge-conflict
-    - id: detect-private-key
-    - id: end-of-file-fixer
-    - id: mixed-line-ending
-      args:
-        - --fix
-        - lf
- -  repo: https://github.com/asottile/reorder_python_imports
+      - id: check-yaml
+      - id: check-json
+      - id: trailing-whitespace
+      - id: flake8
+      - id: check-merge-conflict
+      - id: detect-private-key
+      - id: end-of-file-fixer
+      - id: mixed-line-ending
+        args:
+          - --fix
+          - lf
+  - repo: https://github.com/asottile/reorder_python_imports
     rev: v1.9.0
     hooks:
-    - id: reorder-python-imports
- -  repo: local
+      - id: reorder-python-imports
+  - repo: local
     hooks:
-    - id: pylint
-      name: pylint
-      entry: .tox/py39/bin/pylint
-      require_serial: true
-      language: system
-      types: [python]
-      verbose: true
-
+      - id: pylint
+        name: pylint
+        entry: .tox/pre-commit/bin/pylint
+        require_serial: true
+        language: system
+        types: [python]
+        verbose: true
 fail_fast: true
diff --git a/service_capacity_modeling/capacity_planner.py b/service_capacity_modeling/capacity_planner.py
index c0b3090..ce64ed3 100644
--- a/service_capacity_modeling/capacity_planner.py
+++ b/service_capacity_modeling/capacity_planner.py
@@ -2,8 +2,9 @@
 import functools
 import logging
 from hashlib import blake2b
-from typing import Any, cast
+from typing import Any
 from typing import Callable
+from typing import cast
 from typing import Dict
 from typing import Generator
 from typing import List
@@ -348,10 +349,11 @@ def _plan_percentiles(
         lifecycles = lifecycles or self._default_lifecycles
 
         model_mean_desires: Dict[str, CapacityDesires] = {}
-        model_percentile_desires: List[Dict[str, CapacityDesires]] = []
         sorted_percentiles = sorted(percentiles)
-        for percentile in sorted_percentiles:
+        model_percentile_desires: List[Dict[str, CapacityDesires]] = []
+        for _ in sorted_percentiles:
             model_percentile_desires.append({})
+
         for sub_model, sub_desires in self._sub_models(
             model_name=model_name,
             desires=desires,
@@ -366,26 +368,42 @@ def _plan_percentiles(
                 model_percentile_desires[index][sub_model] = percentile_input
                 index = index + 1
 
-        mean_plans = []
-        for mean_sub_model, mean_sub_desire in model_mean_desires.items():
-            mean_sub_plan = self._plan_certain(
-                    model_name=mean_sub_model,
-                    region=region,
-                    desires=mean_sub_desire,
-                    num_results=num_results,
-                    num_regions=num_regions,
-                    extra_model_arguments=extra_model_arguments,
-                    lifecycles=lifecycles,
-                    instance_families=instance_families,
-                    drives=drives,
-                )
-            if mean_sub_plan:
-                mean_plans.append(mean_sub_plan)
-
-        mean_plan = cast(
-            Sequence[CapacityPlan],
-            [functools.reduce(merge_plan, composed) for composed in zip(*mean_plans)],
+        mean_plan = self._mean_plan(
+            drives,
+            extra_model_arguments,
+            instance_families,
+            lifecycles,
+            num_regions,
+            num_results,
+            region,
+            model_mean_desires,
+        )
+        percentile_plans = self._group_plans_by_percentile(
+            drives,
+            extra_model_arguments,
+            instance_families,
+            lifecycles,
+            num_regions,
+            num_results,
+            region,
+            model_percentile_desires,
+            sorted_percentiles,
         )
+
+        return mean_plan, percentile_plans
+
+    def _group_plans_by_percentile(
+        self,
+        drives,
+        extra_model_arguments,
+        instance_families,
+        lifecycles,
+        num_regions,
+        num_results,
+        region,
+        model_percentile_desires,
+        sorted_percentiles,
+    ):
         percentile_plans = {}
         for index, percentile in enumerate(sorted_percentiles):
             percentile_plan = []
@@ -393,16 +411,16 @@ def _plan_percentiles(
                 index
             ].items():
                 percentile_sub_plan = self._plan_certain(
-                        model_name=percentile_sub_model,
-                        region=region,
-                        desires=percentile_sub_desire,
-                        num_results=num_results,
-                        num_regions=num_regions,
-                        extra_model_arguments=extra_model_arguments,
-                        lifecycles=lifecycles,
-                        instance_families=instance_families,
-                        drives=drives,
-                    )
+                    model_name=percentile_sub_model,
+                    region=region,
+                    desires=percentile_sub_desire,
+                    num_results=num_results,
+                    num_regions=num_regions,
+                    extra_model_arguments=extra_model_arguments,
+                    lifecycles=lifecycles,
+                    instance_families=instance_families,
+                    drives=drives,
+                )
                 if percentile_sub_plan:
                     percentile_plan.append(percentile_sub_plan)
 
@@ -413,8 +431,39 @@ def _plan_percentiles(
                     for composed in zip(*percentile_plan)
                 ],
             )
+        return percentile_plans
 
-        return mean_plan, percentile_plans
+    def _mean_plan(
+        self,
+        drives,
+        extra_model_arguments,
+        instance_families,
+        lifecycles,
+        num_regions,
+        num_results,
+        region,
+        model_mean_desires,
+    ):
+        mean_plans = []
+        for mean_sub_model, mean_sub_desire in model_mean_desires.items():
+            mean_sub_plan = self._plan_certain(
+                model_name=mean_sub_model,
+                region=region,
+                desires=mean_sub_desire,
+                num_results=num_results,
+                num_regions=num_regions,
+                extra_model_arguments=extra_model_arguments,
+                lifecycles=lifecycles,
+                instance_families=instance_families,
+                drives=drives,
+            )
+            if mean_sub_plan:
+                mean_plans.append(mean_sub_plan)
+        mean_plan = cast(
+            Sequence[CapacityPlan],
+            [functools.reduce(merge_plan, composed) for composed in zip(*mean_plans)],
+        )
+        return mean_plan
 
     def plan_certain(
         self,
@@ -445,16 +494,16 @@ def plan_certain(
             extra_model_arguments=extra_model_arguments,
         ):
             sub_plan = self._plan_certain(
-                    model_name=sub_model,
-                    region=region,
-                    desires=sub_desires,
-                    num_results=num_results,
-                    num_regions=num_regions,
-                    extra_model_arguments=extra_model_arguments,
-                    lifecycles=lifecycles,
-                    instance_families=instance_families,
-                    drives=drives,
-                )
+                model_name=sub_model,
+                region=region,
+                desires=sub_desires,
+                num_results=num_results,
+                num_regions=num_regions,
+                extra_model_arguments=extra_model_arguments,
+                lifecycles=lifecycles,
+                instance_families=instance_families,
+                drives=drives,
+            )
             if sub_plan:
                 results.append(sub_plan)
 
@@ -473,12 +522,36 @@ def _plan_certain(
         extra_model_arguments: Optional[Dict[str, Any]] = None,
     ) -> Sequence[CapacityPlan]:
         extra_model_arguments = extra_model_arguments or {}
+        model = self._models[model_name]
+
+        plans = []
+        for instance, drive, context in self.generate_scenarios(
+            model, region, desires, num_regions, lifecycles, instance_families, drives
+        ):
+            plan = model.capacity_plan(
+                instance=instance,
+                drive=drive,
+                context=context,
+                desires=desires,
+                extra_model_arguments=extra_model_arguments,
+            )
+            if plan is not None:
+                plans.append(plan)
+
+        # lowest cost first
+        plans.sort(key=lambda p: (p.rank, p.candidate_clusters.total_annual_cost))
+
+        num_results = num_results or self._default_num_results
+        return reduce_by_family(plans)[:num_results]
+
+    def generate_scenarios(
+        self, model, region, desires, num_regions, lifecycles, instance_families, drives
+    ):
         lifecycles = lifecycles or self._default_lifecycles
         instance_families = instance_families or []
         drives = drives or []
 
         hardware = self._shapes.region(region)
-        num_results = num_results or self._default_num_results
 
         context = RegionContext(
             zones_in_region=hardware.zones_in_region,
@@ -492,7 +565,6 @@ def _plan_certain(
             desires.data_shape.reserved_instance_app_mem_gib
             + desires.data_shape.reserved_instance_system_mem_gib
         )
-        model = self._models[model_name]
         allowed_platforms: Set[Platform] = set(model.allowed_platforms())
         allowed_drives: Set[str] = set(drives or [])
         for drive_name in model.allowed_cloud_drives():
@@ -503,7 +575,6 @@ def _plan_certain(
         if len(allowed_drives) == 0:
             allowed_drives.update(hardware.drives.keys())
 
-        plans = []
         if model.run_hardware_simulation():
             for instance in hardware.instances.values():
                 if not _allow_instance(
@@ -518,32 +589,11 @@ def _plan_certain(
                     if not _allow_drive(drive, drives, lifecycles, allowed_drives):
                         continue
 
-                    plan = model.capacity_plan(
-                        instance=instance,
-                        drive=drive,
-                        context=context,
-                        desires=desires,
-                        extra_model_arguments=extra_model_arguments,
-                    )
-                    if plan is not None:
-                        plans.append(plan)
+                    yield instance, drive, context
         else:
-            plan = model.capacity_plan(
-                instance=Instance.get_managed_instance(),
-                drive=Drive.get_managed_drive(),
-                context=context,
-                desires=desires,
-                extra_model_arguments=extra_model_arguments,
-            )
-            if plan is not None:
-                plans.append(plan)
-
-        # lowest cost first
-        plans.sort(
-            key=lambda plan: (plan.rank, plan.candidate_clusters.total_annual_cost)
-        )
-
-        return reduce_by_family(plans)[:num_results]
+            instance = Instance.get_managed_instance()
+            drive = Drive.get_managed_drive()
+            yield instance, drive, context
 
     # pylint: disable-msg=too-many-locals
     def plan(
diff --git a/service_capacity_modeling/hardware/__init__.py b/service_capacity_modeling/hardware/__init__.py
index 2092e15..af4d36e 100644
--- a/service_capacity_modeling/hardware/__init__.py
+++ b/service_capacity_modeling/hardware/__init__.py
@@ -1,4 +1,5 @@
-# -*- coding: utf-8 -*-
+# pylint: disable=cyclic-import
+# in HardwareShapes.hardware it imports from hardware.profiles dynamically
 import json
 import logging
 import os
@@ -56,9 +57,7 @@ def price_hardware(hardware: Hardware, pricing: Pricing) -> GlobalHardware:
             priced_services[
                 svc
             ].annual_cost_per_write_io = svc_price.annual_cost_per_write_io
-            priced_services[
-                svc
-            ].annual_cost_per_core = svc_price.annual_cost_per_core
+            priced_services[svc].annual_cost_per_core = svc_price.annual_cost_per_core
 
         regions[region] = Hardware(
             instances=priced_instances,
@@ -75,10 +74,10 @@ def load_hardware_from_disk(
     shape_path=os.environ.get("HARDWARE_SHAPES"),
 ) -> GlobalHardware:
     if price_path is not None and shape_path is not None:
-        with open(price_path) as pfd:
+        with open(price_path, encoding="utf-8") as pfd:
             pricing = load_pricing(json.load(pfd))
 
-        with open(shape_path) as sfd:
+        with open(shape_path, encoding="utf-8") as sfd:
             hardware = load_hardware(json.load(sfd))
 
         return price_hardware(hardware=hardware, pricing=pricing)
diff --git a/service_capacity_modeling/interface.py b/service_capacity_modeling/interface.py
index 9a065f8..81c56bb 100644
--- a/service_capacity_modeling/interface.py
+++ b/service_capacity_modeling/interface.py
@@ -5,13 +5,14 @@
 from decimal import Decimal
 from enum import Enum
 from functools import lru_cache
-from typing import Any, Union
+from typing import Any
 from typing import cast
 from typing import Dict
 from typing import List
 from typing import Optional
 from typing import Sequence
 from typing import Tuple
+from typing import Union
 
 import numpy as np
 from pydantic import BaseModel
@@ -369,10 +370,10 @@ def annual_cost_gib(self, data_gib: float = 0):
             return self.annual_cost_per_gib * data_gib
         else:
             _annual_data = data_gib
-            transfer_costs = self.annual_cost_per_gib
+            transfer_costs = list(self.annual_cost_per_gib)
             annual_cost = 0.0
             for transfer_cost in transfer_costs:
-                if not _annual_data > 0:
+                if _annual_data <= 0:
                     break
                 if transfer_cost[0] > 0:
                     annual_cost += (
diff --git a/service_capacity_modeling/models/org/netflix/__init__.py b/service_capacity_modeling/models/org/netflix/__init__.py
index 4d9b873..ce207c0 100644
--- a/service_capacity_modeling/models/org/netflix/__init__.py
+++ b/service_capacity_modeling/models/org/netflix/__init__.py
@@ -8,13 +8,13 @@
 from .elasticsearch import nflx_elasticsearch_master_capacity_model
 from .entity import nflx_entity_capacity_model
 from .evcache import nflx_evcache_capacity_model
+from .kafka import nflx_kafka_capacity_model
 from .key_value import nflx_key_value_capacity_model
 from .postgres import nflx_postgres_capacity_model
 from .rds import nflx_rds_capacity_model
 from .stateless_java import nflx_java_app_capacity_model
 from .time_series import nflx_time_series_capacity_model
 from .zookeeper import nflx_zookeeper_capacity_model
-from .kafka import nflx_kafka_capacity_model
 
 
 def models():
diff --git a/service_capacity_modeling/models/org/netflix/crdb.py b/service_capacity_modeling/models/org/netflix/crdb.py
index 4e81cc2..cf9b727 100644
--- a/service_capacity_modeling/models/org/netflix/crdb.py
+++ b/service_capacity_modeling/models/org/netflix/crdb.py
@@ -283,9 +283,14 @@ def capacity_plan(
         max_rps_to_disk: int = extra_model_arguments.get("max_rps_to_disk", 500)
         # Very large nodes are hard to recover
         max_local_disk_gib: int = extra_model_arguments.get("max_local_disk_gib", 2048)
-        # Cockroach Labs recommends a minimum of 8 vCPUs and strongly recommends no fewer than 4 vCPUs per node.
-        min_vcpu_per_instance: int = extra_model_arguments.get("min_vcpu_per_instance", 4)
-        license_fee_per_core: float = context.services["crdb_core_license"].annual_cost_per_core
+        # Cockroach Labs recommends a minimum of 8 vCPUs and strongly
+        # recommends no fewer than 4 vCPUs per node.
+        min_vcpu_per_instance: int = extra_model_arguments.get(
+            "min_vcpu_per_instance", 4
+        )
+        license_fee_per_core: float = context.services[
+            "crdb_core_license"
+        ].annual_cost_per_core
 
         return _estimate_cockroachdb_cluster_zonal(
             instance=instance,
diff --git a/service_capacity_modeling/models/org/netflix/ddb.py b/service_capacity_modeling/models/org/netflix/ddb.py
index 35b4a26..6a7ecf1 100644
--- a/service_capacity_modeling/models/org/netflix/ddb.py
+++ b/service_capacity_modeling/models/org/netflix/ddb.py
@@ -6,11 +6,12 @@
 from pydantic import BaseModel
 from pydantic import Field
 
-from service_capacity_modeling.interface import AccessConsistency, certain_float
+from service_capacity_modeling.interface import AccessConsistency
 from service_capacity_modeling.interface import AccessPattern
 from service_capacity_modeling.interface import CapacityDesires
 from service_capacity_modeling.interface import CapacityPlan
 from service_capacity_modeling.interface import CapacityRequirement
+from service_capacity_modeling.interface import certain_float
 from service_capacity_modeling.interface import certain_int
 from service_capacity_modeling.interface import Clusters
 from service_capacity_modeling.interface import Consistency
@@ -142,9 +143,9 @@ def _get_read_consistency_percentages(
             transactional_read_percent = 1.0
             eventual_read_percent = 0.0
             strong_read_percent = 0.0
-        elif (
-            access_consistency == AccessConsistency.read_your_writes
-            or access_consistency == AccessConsistency.linearizable
+        elif access_consistency in (
+            AccessConsistency.read_your_writes,
+            AccessConsistency.linearizable,
         ):
             transactional_read_percent = 0.0
             eventual_read_percent = 0.0
@@ -156,9 +157,10 @@ def _get_read_consistency_percentages(
     total_percent = (
         eventual_read_percent + transactional_read_percent + strong_read_percent
     )
-    assert (
-        total_percent == 1
-    ), "eventual_read_percent, transactional_read_percent, strong_read_percent should sum to 1"
+    assert total_percent == 1, (
+        "eventual_read_percent, transactional_read_percent, strong_read_percent"
+        " should sum to 1"
+    )
     return {
         "transactional_read_percent": transactional_read_percent,
         "eventual_read_percent": eventual_read_percent,
@@ -433,9 +435,9 @@ def capacity_plan(
         target_max_annual_cost: float = extra_model_arguments.get(
             "target_max_annual_cost", 0
         )
-        target_utilization_percentage = 0.80
+        target_util_percentage = 0.80
         if desires.service_tier in _TIER_TARGET_UTILIZATION_MAPPING:
-            target_utilization_percentage = _TIER_TARGET_UTILIZATION_MAPPING[
+            target_util_percentage = _TIER_TARGET_UTILIZATION_MAPPING[
                 desires.service_tier
             ]
 
@@ -443,7 +445,7 @@ def capacity_plan(
             "read_capacity_units": read_plan.read_capacity_units,
             "write_capacity_units": write_plan.write_capacity_units,
             "data_transfer_gib": data_transfer_plan.total_data_transfer_gib,
-            "target_utilization_percentage": target_utilization_percentage,
+            "target_utilization_percentage": target_util_percentage,
         }
         requirement_context[
             "replicated_write_capacity_units"
@@ -506,7 +508,7 @@ def capacity_plan(
                         "auto_scale": {
                             "min": 1,
                             "max": max_read_capacity_units,
-                            "target_utilization_percentage": target_utilization_percentage,
+                            "target_utilization_percentage": target_util_percentage,
                         },
                     },
                     "write_capacity_units": {
@@ -517,7 +519,7 @@ def capacity_plan(
                         "auto_scale": {
                             "min": 1,
                             "max": max_write_capacity_units,
-                            "target_utilization_percentage": target_utilization_percentage,
+                            "target_utilization_percentage": target_util_percentage,
                         },
                     },
                 },
diff --git a/service_capacity_modeling/models/org/netflix/entity.py b/service_capacity_modeling/models/org/netflix/entity.py
index b52720f..3561167 100644
--- a/service_capacity_modeling/models/org/netflix/entity.py
+++ b/service_capacity_modeling/models/org/netflix/entity.py
@@ -2,7 +2,6 @@
 from typing import Callable
 from typing import Dict
 from typing import Optional
-from typing import Sequence
 from typing import Tuple
 
 from .stateless_java import nflx_java_app_capacity_model
diff --git a/service_capacity_modeling/models/org/netflix/evcache.py b/service_capacity_modeling/models/org/netflix/evcache.py
index a76c743..41b3bbf 100644
--- a/service_capacity_modeling/models/org/netflix/evcache.py
+++ b/service_capacity_modeling/models/org/netflix/evcache.py
@@ -57,10 +57,12 @@ def calculate_read_cpu_time_evcache_ms(read_size_bytes: float) -> float:
     # 40   KiB - 158 top of our curve
     # Fit a logistic curve, requiring it to go through first
     # point
-    read_latency_ms = \
-        979.4009 + (-0.06853492 - 979.4009)/math.pow((1 + math.pow(read_size_bytes/13061.23, 0.180864)), 0.0002819491)
+    read_latency_ms = 979.4009 + (-0.06853492 - 979.4009) / math.pow(
+        (1 + math.pow(read_size_bytes / 13061.23, 0.180864)), 0.0002819491
+    )
     return max(read_latency_ms, 0.005)
 
+
 def calculate_spread_cost(cluster_size: int, max_cost=100000, min_cost=0.0) -> float:
     if cluster_size > 10:
         return min_cost
@@ -70,11 +72,9 @@ def calculate_spread_cost(cluster_size: int, max_cost=100000, min_cost=0.0) -> f
 
 
 def _estimate_evcache_requirement(
-    instance: Instance,
     desires: CapacityDesires,
     working_set: Optional[float],
     copies_per_region: int,
-    zones_per_region: int = 3,
 ) -> Tuple[CapacityRequirement, Tuple[str, ...]]:
     """Estimate the capacity required for one zone given a regional desire
 
@@ -88,7 +88,7 @@ def _estimate_evcache_requirement(
     # For tier 0, we double the number of cores to account for caution
     if desires.service_tier == 0:
         needed_cores = needed_cores * 2
- 
+
     # (Arun): Keep 20% of available bandwidth for cache warmer
     needed_network_mbps = simple_network_mbps(desires) * 1.25
 
@@ -108,11 +108,12 @@ def _estimate_evcache_requirement(
         if desires.query_pattern.estimated_mean_read_size_bytes.mid > 200.0:
             payload_greater_than_classic = True
 
-
-
     # (Arun): As of 2021 we are using ephemerals exclusively and do not
     # use cloud drives
-    if working_set is None or (desires.data_shape.estimated_state_size_gib.mid < 110.0 and payload_greater_than_classic):
+    if working_set is None or (
+        desires.data_shape.estimated_state_size_gib.mid < 110.0
+        and payload_greater_than_classic
+    ):
         # We can't currently store data on cloud drives, but we can put the
         # dataset into memory!
         needed_memory = float(needed_disk)
@@ -159,7 +160,7 @@ def _upsert_params(cluster, params):
 
 
 # pylint: disable=too-many-locals
-def _estimate_evcache_cluster_zonal(
+def _estimate_evcache_cluster_zonal(  # noqa: C901
     instance: Instance,
     drive: Drive,
     desires: CapacityDesires,
@@ -203,10 +204,8 @@ def _estimate_evcache_cluster_zonal(
         working_set = None
 
     requirement, regrets = _estimate_evcache_requirement(
-        instance=instance,
         desires=desires,
         working_set=working_set,
-        zones_per_region=zones_per_region,
         copies_per_region=copies_per_region,
     )
 
@@ -283,7 +282,10 @@ def reserve_memory(instance_mem_gib):
     spread_cost = calculate_spread_cost(cluster.count)
 
     # Account for the clusters and replication costs
-    evcache_costs = {"evcache.zonal-clusters": ec2_cost, "evcache.spread.cost": spread_cost}
+    evcache_costs = {
+        "evcache.zonal-clusters": ec2_cost,
+        "evcache.spread.cost": spread_cost,
+    }
 
     for s in services:
         evcache_costs[f"{s.service_type}"] = s.annual_cost
@@ -402,7 +404,7 @@ def default_desires(user_desires, extra_model_arguments: Dict[str, Any]):
                 "estimated_mean_read_size_bytes",
                 user_desires.query_pattern.dict(exclude_unset=True).get(
                     "estimated_mean_write_size_bytes",
-                    dict(low=16, mid=1024, high=65536, confidence=0.95)
+                    {"low": 16, "mid": 1024, "high": 65536, "confidence": 0.95},
                 ),
             )
         )
diff --git a/service_capacity_modeling/models/org/netflix/key_value.py b/service_capacity_modeling/models/org/netflix/key_value.py
index dda1775..dfb09ac 100644
--- a/service_capacity_modeling/models/org/netflix/key_value.py
+++ b/service_capacity_modeling/models/org/netflix/key_value.py
@@ -60,7 +60,9 @@ def compose_with(
         user_desires: CapacityDesires, extra_model_arguments: Dict[str, Any]
     ) -> Tuple[Tuple[str, Callable[[CapacityDesires], CapacityDesires]], ...]:
         query_pattern = user_desires.query_pattern
-        target_consistency = query_pattern.access_consistency.same_region.target_consistency
+        target_consistency = (
+            query_pattern.access_consistency.same_region.target_consistency
+        )
         rps_interval = query_pattern.estimated_read_per_second
         rps: float = rps_interval.mid
         wps: float = query_pattern.estimated_write_per_second.mid
@@ -68,28 +70,45 @@ def compose_with(
 
         # Parameterizing this in case we want to configure it to something else later.
         # The read/write ratio should be relatively high to make EVCache effective.
-        evcache_rw_ratio_threshold: float = extra_model_arguments.get("kv_evcache_read_write_ratio_threshold", 0.9)
-        use_evcache = target_consistency in (AccessConsistency.eventual, AccessConsistency.best_effort) and \
-            (rps > 250_000 or (rps > 100_000 and read_write_ratio > evcache_rw_ratio_threshold))
+        evcache_rw_ratio_threshold: float = extra_model_arguments.get(
+            "kv_evcache_read_write_ratio_threshold", 0.9
+        )
+        use_evcache = target_consistency in (
+            AccessConsistency.eventual,
+            AccessConsistency.best_effort,
+        ) and (
+            rps > 250_000
+            or (rps > 100_000 and read_write_ratio > evcache_rw_ratio_threshold)
+        )
 
         if use_evcache:
+
             def _modify_cassandra_desires(
                 desires: CapacityDesires,
             ) -> CapacityDesires:
                 relaxed = desires.copy(deep=True)
 
-                # This is an initial best guess. Parameterizing in case we want to configure it in the future.
-                estimated_kv_cache_hit_rate: float = extra_model_arguments.get("estimated_kv_cache_hit_rate", 0.8)
-
-                # Scale down the Cassandra estimated rps since those reads will be serviced by EVCache.
-                relaxed.query_pattern.estimated_read_per_second = rps_interval.scale(1 - estimated_kv_cache_hit_rate)
+                # This is an initial best guess. Parameterizing in case we want to
+                # configure it in the future.
+                estimated_kv_cache_hit_rate: float = extra_model_arguments.get(
+                    "estimated_kv_cache_hit_rate", 0.8
+                )
+
+                # Scale down the Cassandra estimated rps since those reads will be
+                # serviced by EVCache.
+                relaxed.query_pattern.estimated_read_per_second = rps_interval.scale(
+                    1 - estimated_kv_cache_hit_rate
+                )
                 return relaxed
 
             def _modify_evcache_desires(
                 desires: CapacityDesires,
             ) -> CapacityDesires:
                 relaxed = desires.copy(deep=True)
-                relaxed.query_pattern.access_consistency.same_region.target_consistency = AccessConsistency.best_effort
+                access_consistency = relaxed.query_pattern.access_consistency
+                access_consistency.same_region.target_consistency = (
+                    AccessConsistency.best_effort
+                )
                 return relaxed
 
             return (
@@ -97,7 +116,7 @@ def _modify_evcache_desires(
                 ("org.netflix.evcache", _modify_evcache_desires),
             )
         else:
-            return ("org.netflix.cassandra", lambda x: x),
+            return (("org.netflix.cassandra", lambda x: x),)
 
     @staticmethod
     def default_desires(user_desires, extra_model_arguments):
diff --git a/service_capacity_modeling/models/org/netflix/postgres.py b/service_capacity_modeling/models/org/netflix/postgres.py
index 153ec99..6e80665 100644
--- a/service_capacity_modeling/models/org/netflix/postgres.py
+++ b/service_capacity_modeling/models/org/netflix/postgres.py
@@ -3,7 +3,9 @@
 from typing import Optional
 from typing import Tuple
 
-from service_capacity_modeling.interface import AccessConsistency, Platform
+from .aurora import nflx_aurora_capacity_model
+from .crdb import nflx_cockroachdb_capacity_model
+from service_capacity_modeling.interface import AccessConsistency
 from service_capacity_modeling.interface import AccessPattern
 from service_capacity_modeling.interface import CapacityDesires
 from service_capacity_modeling.interface import CapacityPlan
@@ -14,10 +16,10 @@
 from service_capacity_modeling.interface import GlobalConsistency
 from service_capacity_modeling.interface import Instance
 from service_capacity_modeling.interface import Interval
+from service_capacity_modeling.interface import Platform
 from service_capacity_modeling.interface import QueryPattern
 from service_capacity_modeling.interface import RegionContext
 from service_capacity_modeling.models import CapacityModel
-from . import nflx_aurora_capacity_model, nflx_cockroachdb_capacity_model
 
 
 class NflxPostgresCapacityModel(CapacityModel):
@@ -46,7 +48,9 @@ def capacity_plan(
         if plan is not None:
             return plan
 
-        if set(nflx_cockroachdb_capacity_model.allowed_platforms()).intersection(instance.platforms):
+        if set(nflx_cockroachdb_capacity_model.allowed_platforms()).intersection(
+            instance.platforms
+        ):
             plan = nflx_cockroachdb_capacity_model.capacity_plan(
                 instance=instance,
                 drive=drive,
@@ -55,7 +59,8 @@ def capacity_plan(
                 extra_model_arguments=extra_model_arguments,
             )
         if plan is not None:
-            # We want to lower the rank so this plan will only be chosen when no other workaround
+            # We want to lower the rank so this plan will only be chosen when no other
+            # workaround
             plan.rank = 1
 
         return plan
@@ -88,14 +93,12 @@ def default_desires(user_desires, extra_model_arguments):
                 estimated_mean_write_size_bytes=Interval(
                     low=64, mid=512, high=2048, confidence=0.90
                 ),
-
                 estimated_mean_read_latency_ms=Interval(
                     low=1, mid=4, high=100, confidence=0.90
                 ),
                 estimated_mean_write_latency_ms=Interval(
                     low=1, mid=6, high=200, confidence=0.90
                 ),
-
                 read_latency_slo_ms=FixedInterval(
                     minimum_value=1,
                     maximum_value=100,
diff --git a/service_capacity_modeling/models/org/netflix/time_series_config.py b/service_capacity_modeling/models/org/netflix/time_series_config.py
index 65776a2..29e2317 100644
--- a/service_capacity_modeling/models/org/netflix/time_series_config.py
+++ b/service_capacity_modeling/models/org/netflix/time_series_config.py
@@ -6,8 +6,8 @@
 from service_capacity_modeling.models.org.netflix.iso_date_math import (
     _iso_to_proto_duration,
 )
-from service_capacity_modeling.models.org.netflix.iso_date_math import iso_to_seconds
 from service_capacity_modeling.models.org.netflix.iso_date_math import _iso_to_timedelta
+from service_capacity_modeling.models.org.netflix.iso_date_math import iso_to_seconds
 
 DURATION_1H = timedelta(hours=1)
 DURATION_4H = timedelta(hours=4)
diff --git a/tests/netflix/test_elasticsearch.py b/tests/netflix/test_elasticsearch.py
index 3a35f1f..5a3634a 100644
--- a/tests/netflix/test_elasticsearch.py
+++ b/tests/netflix/test_elasticsearch.py
@@ -88,13 +88,13 @@ def test_es_simple_mean_percentiles():
     )
 
     assert len(cap_plan.mean) > 0, "mean is empty"
-    assert (
-        all(mean_plan for mean_plan in cap_plan.mean)
+    assert all(
+        mean_plan for mean_plan in cap_plan.mean
     ), "One or more mean plans are empty"
 
     assert len(cap_plan.percentiles) > 0, "percentiles are empty"
-    assert (
-        all(percentile_plan for percentile_plan in cap_plan.percentiles.values())
+    assert all(
+        percentile_plan for percentile_plan in cap_plan.percentiles.values()
     ), "One or more percentile plans are empty"
 
 
@@ -123,9 +123,7 @@ def test_es_simple_certain():
     )
 
     assert len(cap_plan) > 0, "Resulting cap_plan is empty"
-    assert (
-        all(plan for plan in cap_plan)
-    ), "One or more plans is empty"
+    assert all(plan for plan in cap_plan), "One or more plans is empty"
 
 
 def zonal_summary(zlr):
diff --git a/tests/netflix/test_evcache.py b/tests/netflix/test_evcache.py
index cf1d8b9..e8f46a8 100644
--- a/tests/netflix/test_evcache.py
+++ b/tests/netflix/test_evcache.py
@@ -60,12 +60,15 @@ def test_evcache_inmemory_low_latency_reads_cpu():
     )
 
     for candidate in plan:
-        total_cpu_power = candidate.candidate_clusters.zonal[0].count * \
-                          candidate.candidate_clusters.zonal[0].instance.cpu * \
-                          candidate.candidate_clusters.zonal[0].instance.cpu_ghz
+        total_cpu_power = (
+            candidate.candidate_clusters.zonal[0].count
+            * candidate.candidate_clusters.zonal[0].instance.cpu
+            * candidate.candidate_clusters.zonal[0].instance.cpu_ghz
+        )
 
         assert total_cpu_power > 1100
 
+
 def test_evcache_inmemory_medium_latency_reads_cpu():
     inmemory_cluster_medium_latency_reads_qps = CapacityDesires(
         service_tier=0,
@@ -92,15 +95,17 @@ def test_evcache_inmemory_medium_latency_reads_cpu():
     )
 
     plan = planner.plan_certain(
-       model_name="org.netflix.evcache",
-       region="us-east-1",
-       desires=inmemory_cluster_medium_latency_reads_qps,
+        model_name="org.netflix.evcache",
+        region="us-east-1",
+        desires=inmemory_cluster_medium_latency_reads_qps,
     )
 
     for candidate in plan:
-        total_cpu_power = candidate.candidate_clusters.zonal[0].count * \
-                          candidate.candidate_clusters.zonal[0].instance.cpu * \
-                          candidate.candidate_clusters.zonal[0].instance.cpu_ghz
+        total_cpu_power = (
+            candidate.candidate_clusters.zonal[0].count
+            * candidate.candidate_clusters.zonal[0].instance.cpu
+            * candidate.candidate_clusters.zonal[0].instance.cpu_ghz
+        )
 
         assert total_cpu_power > 400
 
@@ -123,7 +128,9 @@ def test_evcache_inmemory_high_latency_reads_cpu():
             ),
         ),
         data_shape=DataShape(
-            estimated_state_size_gib=Interval(low=1662, mid=1662, high=1662, confidence=1.0),
+            estimated_state_size_gib=Interval(
+                low=1662, mid=1662, high=1662, confidence=1.0
+            ),
             estimated_state_item_count=Interval(
                 low=750000000, mid=750000000, high=750000000 * 1.2, confidence=1.0
             ),
@@ -131,15 +138,17 @@ def test_evcache_inmemory_high_latency_reads_cpu():
     )
 
     plan = planner.plan_certain(
-       model_name="org.netflix.evcache",
-       region="us-east-1",
-       desires=inmemory_cluster_high_latency_reads_qps,
+        model_name="org.netflix.evcache",
+        region="us-east-1",
+        desires=inmemory_cluster_high_latency_reads_qps,
     )
 
     for candidate in plan:
-        total_cpu_power = candidate.candidate_clusters.zonal[0].count * \
-                          candidate.candidate_clusters.zonal[0].instance.cpu * \
-                          candidate.candidate_clusters.zonal[0].instance.cpu_ghz
+        total_cpu_power = (
+            candidate.candidate_clusters.zonal[0].count
+            * candidate.candidate_clusters.zonal[0].instance.cpu
+            * candidate.candidate_clusters.zonal[0].instance.cpu_ghz
+        )
 
         assert total_cpu_power > 100
 
@@ -162,23 +171,30 @@ def test_evcache_ondisk_low_latency_reads_cpu():
             ),
         ),
         data_shape=DataShape(
-            estimated_state_size_gib=Interval(low=2306867, mid=2306867, high=2306867, confidence=1.0),
+            estimated_state_size_gib=Interval(
+                low=2306867, mid=2306867, high=2306867, confidence=1.0
+            ),
             estimated_state_item_count=Interval(
-                low=132000000000, mid=132000000000, high=132000000000 * 1.2, confidence=1.0
+                low=132000000000,
+                mid=132000000000,
+                high=132000000000 * 1.2,
+                confidence=1.0,
             ),
         ),
     )
 
     plan = planner.plan_certain(
-       model_name="org.netflix.evcache",
-       region="us-east-1",
-       desires=ondisk_cluster_low_latency_reads_qps,
+        model_name="org.netflix.evcache",
+        region="us-east-1",
+        desires=ondisk_cluster_low_latency_reads_qps,
     )
 
     for candidate in plan:
-        total_cpu_power = candidate.candidate_clusters.zonal[0].count * \
-                          candidate.candidate_clusters.zonal[0].instance.cpu * \
-                          candidate.candidate_clusters.zonal[0].instance.cpu_ghz
+        total_cpu_power = (
+            candidate.candidate_clusters.zonal[0].count
+            * candidate.candidate_clusters.zonal[0].instance.cpu
+            * candidate.candidate_clusters.zonal[0].instance.cpu_ghz
+        )
 
         assert total_cpu_power > 8000
 
@@ -201,7 +217,9 @@ def test_evcache_ondisk_high_latency_reads_cpu():
             ),
         ),
         data_shape=DataShape(
-            estimated_state_size_gib=Interval(low=281000, mid=281000, high=281000, confidence=1.0),
+            estimated_state_size_gib=Interval(
+                low=281000, mid=281000, high=281000, confidence=1.0
+            ),
             estimated_state_item_count=Interval(
                 low=8518318523, mid=8518318523, high=8518318523 * 1.2, confidence=1.0
             ),
@@ -209,21 +227,23 @@ def test_evcache_ondisk_high_latency_reads_cpu():
     )
 
     plan = planner.plan_certain(
-       model_name="org.netflix.evcache",
-       region="us-east-1",
-       desires=ondisk_cluster_high_latency_reads_qps,
+        model_name="org.netflix.evcache",
+        region="us-east-1",
+        desires=ondisk_cluster_high_latency_reads_qps,
     )
 
     for candidate in plan:
-        total_cpu_power = candidate.candidate_clusters.zonal[0].count * \
-                          candidate.candidate_clusters.zonal[0].instance.cpu * \
-                          candidate.candidate_clusters.zonal[0].instance.cpu_ghz
+        total_cpu_power = (
+            candidate.candidate_clusters.zonal[0].count
+            * candidate.candidate_clusters.zonal[0].instance.cpu
+            * candidate.candidate_clusters.zonal[0].instance.cpu_ghz
+        )
 
         assert total_cpu_power > 800
 
 
 def test_evcache_inmemory_ram_usage():
-    inmemory_qps= CapacityDesires(
+    inmemory_qps = CapacityDesires(
         service_tier=1,
         query_pattern=QueryPattern(
             estimated_read_per_second=Interval(
@@ -254,14 +274,16 @@ def test_evcache_inmemory_ram_usage():
     )
 
     for candidate in plan:
-        total_ram = candidate.candidate_clusters.zonal[0].instance.ram_gib * \
-                    candidate.candidate_clusters.zonal[0].count
+        total_ram = (
+            candidate.candidate_clusters.zonal[0].instance.ram_gib
+            * candidate.candidate_clusters.zonal[0].count
+        )
 
         assert total_ram > inmemory_qps.data_shape.estimated_state_size_gib.mid
 
 
 def test_evcache_ondisk_disk_usage():
-    inmemory_qps= CapacityDesires(
+    inmemory_qps = CapacityDesires(
         service_tier=1,
         query_pattern=QueryPattern(
             estimated_read_per_second=Interval(
@@ -292,11 +314,14 @@ def test_evcache_ondisk_disk_usage():
     )
 
     for candidate in plan:
-        total_ram = candidate.candidate_clusters.zonal[0].instance.ram_gib * \
-                    candidate.candidate_clusters.zonal[0].count
+        total_ram = (
+            candidate.candidate_clusters.zonal[0].instance.ram_gib
+            * candidate.candidate_clusters.zonal[0].count
+        )
 
         assert total_ram > inmemory_qps.data_shape.estimated_state_size_gib.mid
 
+
 def test_evcache_ondisk_high_disk_usage():
     high_disk_usage_rps = CapacityDesires(
         service_tier=0,
@@ -315,9 +340,14 @@ def test_evcache_ondisk_high_disk_usage():
             ),
         ),
         data_shape=DataShape(
-            estimated_state_size_gib=Interval(low=2306867, mid=2306867, high=2306867, confidence=1.0),
+            estimated_state_size_gib=Interval(
+                low=2306867, mid=2306867, high=2306867, confidence=1.0
+            ),
             estimated_state_item_count=Interval(
-                low=132000000000, mid=132000000000, high=132000000000 * 1.2, confidence=1.0
+                low=132000000000,
+                mid=132000000000,
+                high=132000000000 * 1.2,
+                confidence=1.0,
             ),
         ),
     )
@@ -330,10 +360,15 @@ def test_evcache_ondisk_high_disk_usage():
 
     for candidate in plan:
         if candidate.candidate_clusters.zonal[0].instance.drive is not None:
-            total_disk = candidate.candidate_clusters.zonal[0].instance.drive.size_gib * \
-                        candidate.candidate_clusters.zonal[0].count
+            total_disk = (
+                candidate.candidate_clusters.zonal[0].instance.drive.size_gib
+                * candidate.candidate_clusters.zonal[0].count
+            )
+
+            assert (
+                total_disk > high_disk_usage_rps.data_shape.estimated_state_size_gib.mid
+            )
 
-            assert total_disk > high_disk_usage_rps.data_shape.estimated_state_size_gib.mid
 
 def test_evcache_zero_item_count():
     zero_item_count_rps = CapacityDesires(
@@ -354,9 +389,7 @@ def test_evcache_zero_item_count():
         ),
         data_shape=DataShape(
             estimated_state_size_gib=Interval(low=0, mid=0, high=0, confidence=1.0),
-            estimated_state_item_count=Interval(
-                low=0, mid=0, high=0, confidence=1.0
-            ),
+            estimated_state_item_count=Interval(low=0, mid=0, high=0, confidence=1.0),
         ),
     )
 
@@ -368,7 +401,11 @@ def test_evcache_zero_item_count():
 
     for candidate in plan:
         if candidate.candidate_clusters.zonal[0].instance.drive is not None:
-            total_ram = candidate.candidate_clusters.zonal[0].instance.drive.size_gib * \
-                        candidate.candidate_clusters.zonal[0].count
-
-            assert total_ram > zero_item_count_rps.data_shape.estimated_state_size_gib.mid
+            total_ram = (
+                candidate.candidate_clusters.zonal[0].instance.drive.size_gib
+                * candidate.candidate_clusters.zonal[0].count
+            )
+
+            assert (
+                total_ram > zero_item_count_rps.data_shape.estimated_state_size_gib.mid
+            )
diff --git a/tests/netflix/test_postgres.py b/tests/netflix/test_postgres.py
index 1a85269..05bc34e 100644
--- a/tests/netflix/test_postgres.py
+++ b/tests/netflix/test_postgres.py
@@ -1,10 +1,10 @@
 from service_capacity_modeling.capacity_planner import planner
 from service_capacity_modeling.interface import CapacityDesires
+from service_capacity_modeling.interface import certain_float
+from service_capacity_modeling.interface import certain_int
 from service_capacity_modeling.interface import DataShape
 from service_capacity_modeling.interface import Interval
 from service_capacity_modeling.interface import QueryPattern
-from service_capacity_modeling.interface import certain_float
-from service_capacity_modeling.interface import certain_int
 
 tier_0 = CapacityDesires(
     service_tier=0,
@@ -101,11 +101,7 @@ def test_small_footprint_multi_region():
     )
     assert cap_plan[0].candidate_clusters.zonal[0].instance.name == "m5d.xlarge"
 
-    assert (
-        2000
-        < cap_plan[0].candidate_clusters.total_annual_cost
-        < 4000
-    )
+    assert 2000 < cap_plan[0].candidate_clusters.total_annual_cost < 4000
 
 
 def test_small_footprint_plan_uncertain():
@@ -114,17 +110,13 @@ def test_small_footprint_plan_uncertain():
         region="us-east-1",
         desires=small_footprint,
         num_regions=1,
-        simulations=256
+        simulations=256,
     )
     plan_a = cap_plan.least_regret[0]
 
     assert plan_a.candidate_clusters.regional[0].instance.name == "db.r5.large"
 
-    assert (
-        2000
-        < plan_a.candidate_clusters.total_annual_cost
-        < 4000
-    )
+    assert 2000 < plan_a.candidate_clusters.total_annual_cost < 4000
 
 
 def test_large_footprint():
@@ -138,11 +130,7 @@ def test_large_footprint():
     assert cap_plan[0].candidate_clusters.zonal[0].instance.name == "i3.xlarge"
     assert cap_plan[0].candidate_clusters.zonal[0].count == 41
 
-    assert (
-        100_000
-        < cap_plan[0].candidate_clusters.total_annual_cost
-        < 150_000
-    )
+    assert 100_000 < cap_plan[0].candidate_clusters.total_annual_cost < 150_000
 
 
 def test_tier_3():
diff --git a/tests/test_common.py b/tests/test_common.py
index 123d605..8aecdc0 100644
--- a/tests/test_common.py
+++ b/tests/test_common.py
@@ -11,8 +11,9 @@
 from service_capacity_modeling.interface import RegionContext
 from service_capacity_modeling.interface import Requirements
 from service_capacity_modeling.interface import ZoneClusterCapacity
-from service_capacity_modeling.models.common import merge_plan, sqrt_staffed_cores
+from service_capacity_modeling.models.common import merge_plan
 from service_capacity_modeling.models.common import network_services
+from service_capacity_modeling.models.common import sqrt_staffed_cores
 
 
 def test_merge_plan():
@@ -189,4 +190,3 @@ def test_different_tier_qos():
         cores = sqrt_staffed_cores(desires)
         assert cores >= prev_cores
         prev_cores = cores
-
diff --git a/tests/test_simulation.py b/tests/test_simulation.py
index cf0635c..eafa2be 100644
--- a/tests/test_simulation.py
+++ b/tests/test_simulation.py
@@ -113,7 +113,7 @@ def test_simulate_interval_beta():
         (alpha, beta, root), d = _beta_dist_from_interval(interval)
         # Check that the delta/uniform function has roughly equal values
         # since deltas and uniforms should just spread over the interval
-        if interval.mid == 100 or interval.mid == 1000:
+        if interval.mid in (100, 1000):
             assert abs(alpha - beta) < 0.01
         assert root.success
         assert soln[0] < alpha < soln[1]
diff --git a/tox.ini b/tox.ini
index 1c5a541..8127404 100644
--- a/tox.ini
+++ b/tox.ini
@@ -7,7 +7,6 @@ deps =
     pytest
     pytest-xdist
     mypy
-    pylint
     ipdb
     isodate
 commands =
@@ -26,8 +25,13 @@ commands = {posargs}
 skip_install = true
 deps =
     pre-commit
+    pylint
+    pydantic<2.0
+    scipy
+    numpy
+    isodate
 commands =
-    pre-commit install
+    pre-commit run --all-files
 
 [testenv:mypy]
 skip_install = true