From 53ba575e12272452130cadbecbdf0d9df424a00b Mon Sep 17 00:00:00 2001 From: George Campbell Date: Thu, 19 Oct 2023 09:29:22 -0700 Subject: [PATCH] fix the pre-commit errors --- .github/workflows/python-build.yml | 2 + .pre-commit-config.yaml | 49 +++-- service_capacity_modeling/capacity_planner.py | 192 +++++++++++------- .../hardware/__init__.py | 11 +- service_capacity_modeling/interface.py | 7 +- .../models/org/netflix/__init__.py | 2 +- .../models/org/netflix/crdb.py | 11 +- .../models/org/netflix/ddb.py | 26 +-- .../models/org/netflix/entity.py | 1 - .../models/org/netflix/evcache.py | 28 +-- .../models/org/netflix/key_value.py | 41 +++- .../models/org/netflix/postgres.py | 15 +- .../models/org/netflix/time_series_config.py | 2 +- tests/netflix/test_elasticsearch.py | 12 +- tests/netflix/test_evcache.py | 135 +++++++----- tests/netflix/test_postgres.py | 24 +-- tests/test_common.py | 4 +- tests/test_simulation.py | 2 +- tox.ini | 8 +- 19 files changed, 340 insertions(+), 232 deletions(-) diff --git a/.github/workflows/python-build.yml b/.github/workflows/python-build.yml index 456e325..14ca73e 100644 --- a/.github/workflows/python-build.yml +++ b/.github/workflows/python-build.yml @@ -20,3 +20,5 @@ jobs: run: pip install tox - name: Run Tox (pydantic v1) run: tox -e py + - name: Run Tox (full pre-commit check) + run: tox -e pre-commit diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 90fe525..9ba6e2f 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,34 +1,33 @@ repos: - - repo: https://github.com/python/black + - repo: https://github.com/python/black rev: 22.3.0 hooks: - - id: black - - repo: https://github.com/pre-commit/pre-commit-hooks + - id: black + - repo: https://github.com/pre-commit/pre-commit-hooks rev: v2.4.0 hooks: - - id: check-yaml - - id: check-json - - id: trailing-whitespace - - id: flake8 - - id: check-merge-conflict - - id: detect-private-key - - id: end-of-file-fixer - - id: mixed-line-ending - args: - - --fix - - lf - - repo: https://github.com/asottile/reorder_python_imports + - id: check-yaml + - id: check-json + - id: trailing-whitespace + - id: flake8 + - id: check-merge-conflict + - id: detect-private-key + - id: end-of-file-fixer + - id: mixed-line-ending + args: + - --fix + - lf + - repo: https://github.com/asottile/reorder_python_imports rev: v1.9.0 hooks: - - id: reorder-python-imports - - repo: local + - id: reorder-python-imports + - repo: local hooks: - - id: pylint - name: pylint - entry: .tox/py39/bin/pylint - require_serial: true - language: system - types: [python] - verbose: true - + - id: pylint + name: pylint + entry: .tox/pre-commit/bin/pylint + require_serial: true + language: system + types: [python] + verbose: true fail_fast: true diff --git a/service_capacity_modeling/capacity_planner.py b/service_capacity_modeling/capacity_planner.py index c0b3090..ce64ed3 100644 --- a/service_capacity_modeling/capacity_planner.py +++ b/service_capacity_modeling/capacity_planner.py @@ -2,8 +2,9 @@ import functools import logging from hashlib import blake2b -from typing import Any, cast +from typing import Any from typing import Callable +from typing import cast from typing import Dict from typing import Generator from typing import List @@ -348,10 +349,11 @@ def _plan_percentiles( lifecycles = lifecycles or self._default_lifecycles model_mean_desires: Dict[str, CapacityDesires] = {} - model_percentile_desires: List[Dict[str, CapacityDesires]] = [] sorted_percentiles = sorted(percentiles) - for percentile in sorted_percentiles: + model_percentile_desires: List[Dict[str, CapacityDesires]] = [] + for _ in sorted_percentiles: model_percentile_desires.append({}) + for sub_model, sub_desires in self._sub_models( model_name=model_name, desires=desires, @@ -366,26 +368,42 @@ def _plan_percentiles( model_percentile_desires[index][sub_model] = percentile_input index = index + 1 - mean_plans = [] - for mean_sub_model, mean_sub_desire in model_mean_desires.items(): - mean_sub_plan = self._plan_certain( - model_name=mean_sub_model, - region=region, - desires=mean_sub_desire, - num_results=num_results, - num_regions=num_regions, - extra_model_arguments=extra_model_arguments, - lifecycles=lifecycles, - instance_families=instance_families, - drives=drives, - ) - if mean_sub_plan: - mean_plans.append(mean_sub_plan) - - mean_plan = cast( - Sequence[CapacityPlan], - [functools.reduce(merge_plan, composed) for composed in zip(*mean_plans)], + mean_plan = self._mean_plan( + drives, + extra_model_arguments, + instance_families, + lifecycles, + num_regions, + num_results, + region, + model_mean_desires, + ) + percentile_plans = self._group_plans_by_percentile( + drives, + extra_model_arguments, + instance_families, + lifecycles, + num_regions, + num_results, + region, + model_percentile_desires, + sorted_percentiles, ) + + return mean_plan, percentile_plans + + def _group_plans_by_percentile( + self, + drives, + extra_model_arguments, + instance_families, + lifecycles, + num_regions, + num_results, + region, + model_percentile_desires, + sorted_percentiles, + ): percentile_plans = {} for index, percentile in enumerate(sorted_percentiles): percentile_plan = [] @@ -393,16 +411,16 @@ def _plan_percentiles( index ].items(): percentile_sub_plan = self._plan_certain( - model_name=percentile_sub_model, - region=region, - desires=percentile_sub_desire, - num_results=num_results, - num_regions=num_regions, - extra_model_arguments=extra_model_arguments, - lifecycles=lifecycles, - instance_families=instance_families, - drives=drives, - ) + model_name=percentile_sub_model, + region=region, + desires=percentile_sub_desire, + num_results=num_results, + num_regions=num_regions, + extra_model_arguments=extra_model_arguments, + lifecycles=lifecycles, + instance_families=instance_families, + drives=drives, + ) if percentile_sub_plan: percentile_plan.append(percentile_sub_plan) @@ -413,8 +431,39 @@ def _plan_percentiles( for composed in zip(*percentile_plan) ], ) + return percentile_plans - return mean_plan, percentile_plans + def _mean_plan( + self, + drives, + extra_model_arguments, + instance_families, + lifecycles, + num_regions, + num_results, + region, + model_mean_desires, + ): + mean_plans = [] + for mean_sub_model, mean_sub_desire in model_mean_desires.items(): + mean_sub_plan = self._plan_certain( + model_name=mean_sub_model, + region=region, + desires=mean_sub_desire, + num_results=num_results, + num_regions=num_regions, + extra_model_arguments=extra_model_arguments, + lifecycles=lifecycles, + instance_families=instance_families, + drives=drives, + ) + if mean_sub_plan: + mean_plans.append(mean_sub_plan) + mean_plan = cast( + Sequence[CapacityPlan], + [functools.reduce(merge_plan, composed) for composed in zip(*mean_plans)], + ) + return mean_plan def plan_certain( self, @@ -445,16 +494,16 @@ def plan_certain( extra_model_arguments=extra_model_arguments, ): sub_plan = self._plan_certain( - model_name=sub_model, - region=region, - desires=sub_desires, - num_results=num_results, - num_regions=num_regions, - extra_model_arguments=extra_model_arguments, - lifecycles=lifecycles, - instance_families=instance_families, - drives=drives, - ) + model_name=sub_model, + region=region, + desires=sub_desires, + num_results=num_results, + num_regions=num_regions, + extra_model_arguments=extra_model_arguments, + lifecycles=lifecycles, + instance_families=instance_families, + drives=drives, + ) if sub_plan: results.append(sub_plan) @@ -473,12 +522,36 @@ def _plan_certain( extra_model_arguments: Optional[Dict[str, Any]] = None, ) -> Sequence[CapacityPlan]: extra_model_arguments = extra_model_arguments or {} + model = self._models[model_name] + + plans = [] + for instance, drive, context in self.generate_scenarios( + model, region, desires, num_regions, lifecycles, instance_families, drives + ): + plan = model.capacity_plan( + instance=instance, + drive=drive, + context=context, + desires=desires, + extra_model_arguments=extra_model_arguments, + ) + if plan is not None: + plans.append(plan) + + # lowest cost first + plans.sort(key=lambda p: (p.rank, p.candidate_clusters.total_annual_cost)) + + num_results = num_results or self._default_num_results + return reduce_by_family(plans)[:num_results] + + def generate_scenarios( + self, model, region, desires, num_regions, lifecycles, instance_families, drives + ): lifecycles = lifecycles or self._default_lifecycles instance_families = instance_families or [] drives = drives or [] hardware = self._shapes.region(region) - num_results = num_results or self._default_num_results context = RegionContext( zones_in_region=hardware.zones_in_region, @@ -492,7 +565,6 @@ def _plan_certain( desires.data_shape.reserved_instance_app_mem_gib + desires.data_shape.reserved_instance_system_mem_gib ) - model = self._models[model_name] allowed_platforms: Set[Platform] = set(model.allowed_platforms()) allowed_drives: Set[str] = set(drives or []) for drive_name in model.allowed_cloud_drives(): @@ -503,7 +575,6 @@ def _plan_certain( if len(allowed_drives) == 0: allowed_drives.update(hardware.drives.keys()) - plans = [] if model.run_hardware_simulation(): for instance in hardware.instances.values(): if not _allow_instance( @@ -518,32 +589,11 @@ def _plan_certain( if not _allow_drive(drive, drives, lifecycles, allowed_drives): continue - plan = model.capacity_plan( - instance=instance, - drive=drive, - context=context, - desires=desires, - extra_model_arguments=extra_model_arguments, - ) - if plan is not None: - plans.append(plan) + yield instance, drive, context else: - plan = model.capacity_plan( - instance=Instance.get_managed_instance(), - drive=Drive.get_managed_drive(), - context=context, - desires=desires, - extra_model_arguments=extra_model_arguments, - ) - if plan is not None: - plans.append(plan) - - # lowest cost first - plans.sort( - key=lambda plan: (plan.rank, plan.candidate_clusters.total_annual_cost) - ) - - return reduce_by_family(plans)[:num_results] + instance = Instance.get_managed_instance() + drive = Drive.get_managed_drive() + yield instance, drive, context # pylint: disable-msg=too-many-locals def plan( diff --git a/service_capacity_modeling/hardware/__init__.py b/service_capacity_modeling/hardware/__init__.py index 2092e15..af4d36e 100644 --- a/service_capacity_modeling/hardware/__init__.py +++ b/service_capacity_modeling/hardware/__init__.py @@ -1,4 +1,5 @@ -# -*- coding: utf-8 -*- +# pylint: disable=cyclic-import +# in HardwareShapes.hardware it imports from hardware.profiles dynamically import json import logging import os @@ -56,9 +57,7 @@ def price_hardware(hardware: Hardware, pricing: Pricing) -> GlobalHardware: priced_services[ svc ].annual_cost_per_write_io = svc_price.annual_cost_per_write_io - priced_services[ - svc - ].annual_cost_per_core = svc_price.annual_cost_per_core + priced_services[svc].annual_cost_per_core = svc_price.annual_cost_per_core regions[region] = Hardware( instances=priced_instances, @@ -75,10 +74,10 @@ def load_hardware_from_disk( shape_path=os.environ.get("HARDWARE_SHAPES"), ) -> GlobalHardware: if price_path is not None and shape_path is not None: - with open(price_path) as pfd: + with open(price_path, encoding="utf-8") as pfd: pricing = load_pricing(json.load(pfd)) - with open(shape_path) as sfd: + with open(shape_path, encoding="utf-8") as sfd: hardware = load_hardware(json.load(sfd)) return price_hardware(hardware=hardware, pricing=pricing) diff --git a/service_capacity_modeling/interface.py b/service_capacity_modeling/interface.py index 9a065f8..81c56bb 100644 --- a/service_capacity_modeling/interface.py +++ b/service_capacity_modeling/interface.py @@ -5,13 +5,14 @@ from decimal import Decimal from enum import Enum from functools import lru_cache -from typing import Any, Union +from typing import Any from typing import cast from typing import Dict from typing import List from typing import Optional from typing import Sequence from typing import Tuple +from typing import Union import numpy as np from pydantic import BaseModel @@ -369,10 +370,10 @@ def annual_cost_gib(self, data_gib: float = 0): return self.annual_cost_per_gib * data_gib else: _annual_data = data_gib - transfer_costs = self.annual_cost_per_gib + transfer_costs = list(self.annual_cost_per_gib) annual_cost = 0.0 for transfer_cost in transfer_costs: - if not _annual_data > 0: + if _annual_data <= 0: break if transfer_cost[0] > 0: annual_cost += ( diff --git a/service_capacity_modeling/models/org/netflix/__init__.py b/service_capacity_modeling/models/org/netflix/__init__.py index 4d9b873..ce207c0 100644 --- a/service_capacity_modeling/models/org/netflix/__init__.py +++ b/service_capacity_modeling/models/org/netflix/__init__.py @@ -8,13 +8,13 @@ from .elasticsearch import nflx_elasticsearch_master_capacity_model from .entity import nflx_entity_capacity_model from .evcache import nflx_evcache_capacity_model +from .kafka import nflx_kafka_capacity_model from .key_value import nflx_key_value_capacity_model from .postgres import nflx_postgres_capacity_model from .rds import nflx_rds_capacity_model from .stateless_java import nflx_java_app_capacity_model from .time_series import nflx_time_series_capacity_model from .zookeeper import nflx_zookeeper_capacity_model -from .kafka import nflx_kafka_capacity_model def models(): diff --git a/service_capacity_modeling/models/org/netflix/crdb.py b/service_capacity_modeling/models/org/netflix/crdb.py index 4e81cc2..cf9b727 100644 --- a/service_capacity_modeling/models/org/netflix/crdb.py +++ b/service_capacity_modeling/models/org/netflix/crdb.py @@ -283,9 +283,14 @@ def capacity_plan( max_rps_to_disk: int = extra_model_arguments.get("max_rps_to_disk", 500) # Very large nodes are hard to recover max_local_disk_gib: int = extra_model_arguments.get("max_local_disk_gib", 2048) - # Cockroach Labs recommends a minimum of 8 vCPUs and strongly recommends no fewer than 4 vCPUs per node. - min_vcpu_per_instance: int = extra_model_arguments.get("min_vcpu_per_instance", 4) - license_fee_per_core: float = context.services["crdb_core_license"].annual_cost_per_core + # Cockroach Labs recommends a minimum of 8 vCPUs and strongly + # recommends no fewer than 4 vCPUs per node. + min_vcpu_per_instance: int = extra_model_arguments.get( + "min_vcpu_per_instance", 4 + ) + license_fee_per_core: float = context.services[ + "crdb_core_license" + ].annual_cost_per_core return _estimate_cockroachdb_cluster_zonal( instance=instance, diff --git a/service_capacity_modeling/models/org/netflix/ddb.py b/service_capacity_modeling/models/org/netflix/ddb.py index 35b4a26..6a7ecf1 100644 --- a/service_capacity_modeling/models/org/netflix/ddb.py +++ b/service_capacity_modeling/models/org/netflix/ddb.py @@ -6,11 +6,12 @@ from pydantic import BaseModel from pydantic import Field -from service_capacity_modeling.interface import AccessConsistency, certain_float +from service_capacity_modeling.interface import AccessConsistency from service_capacity_modeling.interface import AccessPattern from service_capacity_modeling.interface import CapacityDesires from service_capacity_modeling.interface import CapacityPlan from service_capacity_modeling.interface import CapacityRequirement +from service_capacity_modeling.interface import certain_float from service_capacity_modeling.interface import certain_int from service_capacity_modeling.interface import Clusters from service_capacity_modeling.interface import Consistency @@ -142,9 +143,9 @@ def _get_read_consistency_percentages( transactional_read_percent = 1.0 eventual_read_percent = 0.0 strong_read_percent = 0.0 - elif ( - access_consistency == AccessConsistency.read_your_writes - or access_consistency == AccessConsistency.linearizable + elif access_consistency in ( + AccessConsistency.read_your_writes, + AccessConsistency.linearizable, ): transactional_read_percent = 0.0 eventual_read_percent = 0.0 @@ -156,9 +157,10 @@ def _get_read_consistency_percentages( total_percent = ( eventual_read_percent + transactional_read_percent + strong_read_percent ) - assert ( - total_percent == 1 - ), "eventual_read_percent, transactional_read_percent, strong_read_percent should sum to 1" + assert total_percent == 1, ( + "eventual_read_percent, transactional_read_percent, strong_read_percent" + " should sum to 1" + ) return { "transactional_read_percent": transactional_read_percent, "eventual_read_percent": eventual_read_percent, @@ -433,9 +435,9 @@ def capacity_plan( target_max_annual_cost: float = extra_model_arguments.get( "target_max_annual_cost", 0 ) - target_utilization_percentage = 0.80 + target_util_percentage = 0.80 if desires.service_tier in _TIER_TARGET_UTILIZATION_MAPPING: - target_utilization_percentage = _TIER_TARGET_UTILIZATION_MAPPING[ + target_util_percentage = _TIER_TARGET_UTILIZATION_MAPPING[ desires.service_tier ] @@ -443,7 +445,7 @@ def capacity_plan( "read_capacity_units": read_plan.read_capacity_units, "write_capacity_units": write_plan.write_capacity_units, "data_transfer_gib": data_transfer_plan.total_data_transfer_gib, - "target_utilization_percentage": target_utilization_percentage, + "target_utilization_percentage": target_util_percentage, } requirement_context[ "replicated_write_capacity_units" @@ -506,7 +508,7 @@ def capacity_plan( "auto_scale": { "min": 1, "max": max_read_capacity_units, - "target_utilization_percentage": target_utilization_percentage, + "target_utilization_percentage": target_util_percentage, }, }, "write_capacity_units": { @@ -517,7 +519,7 @@ def capacity_plan( "auto_scale": { "min": 1, "max": max_write_capacity_units, - "target_utilization_percentage": target_utilization_percentage, + "target_utilization_percentage": target_util_percentage, }, }, }, diff --git a/service_capacity_modeling/models/org/netflix/entity.py b/service_capacity_modeling/models/org/netflix/entity.py index b52720f..3561167 100644 --- a/service_capacity_modeling/models/org/netflix/entity.py +++ b/service_capacity_modeling/models/org/netflix/entity.py @@ -2,7 +2,6 @@ from typing import Callable from typing import Dict from typing import Optional -from typing import Sequence from typing import Tuple from .stateless_java import nflx_java_app_capacity_model diff --git a/service_capacity_modeling/models/org/netflix/evcache.py b/service_capacity_modeling/models/org/netflix/evcache.py index a76c743..41b3bbf 100644 --- a/service_capacity_modeling/models/org/netflix/evcache.py +++ b/service_capacity_modeling/models/org/netflix/evcache.py @@ -57,10 +57,12 @@ def calculate_read_cpu_time_evcache_ms(read_size_bytes: float) -> float: # 40 KiB - 158 top of our curve # Fit a logistic curve, requiring it to go through first # point - read_latency_ms = \ - 979.4009 + (-0.06853492 - 979.4009)/math.pow((1 + math.pow(read_size_bytes/13061.23, 0.180864)), 0.0002819491) + read_latency_ms = 979.4009 + (-0.06853492 - 979.4009) / math.pow( + (1 + math.pow(read_size_bytes / 13061.23, 0.180864)), 0.0002819491 + ) return max(read_latency_ms, 0.005) + def calculate_spread_cost(cluster_size: int, max_cost=100000, min_cost=0.0) -> float: if cluster_size > 10: return min_cost @@ -70,11 +72,9 @@ def calculate_spread_cost(cluster_size: int, max_cost=100000, min_cost=0.0) -> f def _estimate_evcache_requirement( - instance: Instance, desires: CapacityDesires, working_set: Optional[float], copies_per_region: int, - zones_per_region: int = 3, ) -> Tuple[CapacityRequirement, Tuple[str, ...]]: """Estimate the capacity required for one zone given a regional desire @@ -88,7 +88,7 @@ def _estimate_evcache_requirement( # For tier 0, we double the number of cores to account for caution if desires.service_tier == 0: needed_cores = needed_cores * 2 - + # (Arun): Keep 20% of available bandwidth for cache warmer needed_network_mbps = simple_network_mbps(desires) * 1.25 @@ -108,11 +108,12 @@ def _estimate_evcache_requirement( if desires.query_pattern.estimated_mean_read_size_bytes.mid > 200.0: payload_greater_than_classic = True - - # (Arun): As of 2021 we are using ephemerals exclusively and do not # use cloud drives - if working_set is None or (desires.data_shape.estimated_state_size_gib.mid < 110.0 and payload_greater_than_classic): + if working_set is None or ( + desires.data_shape.estimated_state_size_gib.mid < 110.0 + and payload_greater_than_classic + ): # We can't currently store data on cloud drives, but we can put the # dataset into memory! needed_memory = float(needed_disk) @@ -159,7 +160,7 @@ def _upsert_params(cluster, params): # pylint: disable=too-many-locals -def _estimate_evcache_cluster_zonal( +def _estimate_evcache_cluster_zonal( # noqa: C901 instance: Instance, drive: Drive, desires: CapacityDesires, @@ -203,10 +204,8 @@ def _estimate_evcache_cluster_zonal( working_set = None requirement, regrets = _estimate_evcache_requirement( - instance=instance, desires=desires, working_set=working_set, - zones_per_region=zones_per_region, copies_per_region=copies_per_region, ) @@ -283,7 +282,10 @@ def reserve_memory(instance_mem_gib): spread_cost = calculate_spread_cost(cluster.count) # Account for the clusters and replication costs - evcache_costs = {"evcache.zonal-clusters": ec2_cost, "evcache.spread.cost": spread_cost} + evcache_costs = { + "evcache.zonal-clusters": ec2_cost, + "evcache.spread.cost": spread_cost, + } for s in services: evcache_costs[f"{s.service_type}"] = s.annual_cost @@ -402,7 +404,7 @@ def default_desires(user_desires, extra_model_arguments: Dict[str, Any]): "estimated_mean_read_size_bytes", user_desires.query_pattern.dict(exclude_unset=True).get( "estimated_mean_write_size_bytes", - dict(low=16, mid=1024, high=65536, confidence=0.95) + {"low": 16, "mid": 1024, "high": 65536, "confidence": 0.95}, ), ) ) diff --git a/service_capacity_modeling/models/org/netflix/key_value.py b/service_capacity_modeling/models/org/netflix/key_value.py index dda1775..dfb09ac 100644 --- a/service_capacity_modeling/models/org/netflix/key_value.py +++ b/service_capacity_modeling/models/org/netflix/key_value.py @@ -60,7 +60,9 @@ def compose_with( user_desires: CapacityDesires, extra_model_arguments: Dict[str, Any] ) -> Tuple[Tuple[str, Callable[[CapacityDesires], CapacityDesires]], ...]: query_pattern = user_desires.query_pattern - target_consistency = query_pattern.access_consistency.same_region.target_consistency + target_consistency = ( + query_pattern.access_consistency.same_region.target_consistency + ) rps_interval = query_pattern.estimated_read_per_second rps: float = rps_interval.mid wps: float = query_pattern.estimated_write_per_second.mid @@ -68,28 +70,45 @@ def compose_with( # Parameterizing this in case we want to configure it to something else later. # The read/write ratio should be relatively high to make EVCache effective. - evcache_rw_ratio_threshold: float = extra_model_arguments.get("kv_evcache_read_write_ratio_threshold", 0.9) - use_evcache = target_consistency in (AccessConsistency.eventual, AccessConsistency.best_effort) and \ - (rps > 250_000 or (rps > 100_000 and read_write_ratio > evcache_rw_ratio_threshold)) + evcache_rw_ratio_threshold: float = extra_model_arguments.get( + "kv_evcache_read_write_ratio_threshold", 0.9 + ) + use_evcache = target_consistency in ( + AccessConsistency.eventual, + AccessConsistency.best_effort, + ) and ( + rps > 250_000 + or (rps > 100_000 and read_write_ratio > evcache_rw_ratio_threshold) + ) if use_evcache: + def _modify_cassandra_desires( desires: CapacityDesires, ) -> CapacityDesires: relaxed = desires.copy(deep=True) - # This is an initial best guess. Parameterizing in case we want to configure it in the future. - estimated_kv_cache_hit_rate: float = extra_model_arguments.get("estimated_kv_cache_hit_rate", 0.8) - - # Scale down the Cassandra estimated rps since those reads will be serviced by EVCache. - relaxed.query_pattern.estimated_read_per_second = rps_interval.scale(1 - estimated_kv_cache_hit_rate) + # This is an initial best guess. Parameterizing in case we want to + # configure it in the future. + estimated_kv_cache_hit_rate: float = extra_model_arguments.get( + "estimated_kv_cache_hit_rate", 0.8 + ) + + # Scale down the Cassandra estimated rps since those reads will be + # serviced by EVCache. + relaxed.query_pattern.estimated_read_per_second = rps_interval.scale( + 1 - estimated_kv_cache_hit_rate + ) return relaxed def _modify_evcache_desires( desires: CapacityDesires, ) -> CapacityDesires: relaxed = desires.copy(deep=True) - relaxed.query_pattern.access_consistency.same_region.target_consistency = AccessConsistency.best_effort + access_consistency = relaxed.query_pattern.access_consistency + access_consistency.same_region.target_consistency = ( + AccessConsistency.best_effort + ) return relaxed return ( @@ -97,7 +116,7 @@ def _modify_evcache_desires( ("org.netflix.evcache", _modify_evcache_desires), ) else: - return ("org.netflix.cassandra", lambda x: x), + return (("org.netflix.cassandra", lambda x: x),) @staticmethod def default_desires(user_desires, extra_model_arguments): diff --git a/service_capacity_modeling/models/org/netflix/postgres.py b/service_capacity_modeling/models/org/netflix/postgres.py index 153ec99..6e80665 100644 --- a/service_capacity_modeling/models/org/netflix/postgres.py +++ b/service_capacity_modeling/models/org/netflix/postgres.py @@ -3,7 +3,9 @@ from typing import Optional from typing import Tuple -from service_capacity_modeling.interface import AccessConsistency, Platform +from .aurora import nflx_aurora_capacity_model +from .crdb import nflx_cockroachdb_capacity_model +from service_capacity_modeling.interface import AccessConsistency from service_capacity_modeling.interface import AccessPattern from service_capacity_modeling.interface import CapacityDesires from service_capacity_modeling.interface import CapacityPlan @@ -14,10 +16,10 @@ from service_capacity_modeling.interface import GlobalConsistency from service_capacity_modeling.interface import Instance from service_capacity_modeling.interface import Interval +from service_capacity_modeling.interface import Platform from service_capacity_modeling.interface import QueryPattern from service_capacity_modeling.interface import RegionContext from service_capacity_modeling.models import CapacityModel -from . import nflx_aurora_capacity_model, nflx_cockroachdb_capacity_model class NflxPostgresCapacityModel(CapacityModel): @@ -46,7 +48,9 @@ def capacity_plan( if plan is not None: return plan - if set(nflx_cockroachdb_capacity_model.allowed_platforms()).intersection(instance.platforms): + if set(nflx_cockroachdb_capacity_model.allowed_platforms()).intersection( + instance.platforms + ): plan = nflx_cockroachdb_capacity_model.capacity_plan( instance=instance, drive=drive, @@ -55,7 +59,8 @@ def capacity_plan( extra_model_arguments=extra_model_arguments, ) if plan is not None: - # We want to lower the rank so this plan will only be chosen when no other workaround + # We want to lower the rank so this plan will only be chosen when no other + # workaround plan.rank = 1 return plan @@ -88,14 +93,12 @@ def default_desires(user_desires, extra_model_arguments): estimated_mean_write_size_bytes=Interval( low=64, mid=512, high=2048, confidence=0.90 ), - estimated_mean_read_latency_ms=Interval( low=1, mid=4, high=100, confidence=0.90 ), estimated_mean_write_latency_ms=Interval( low=1, mid=6, high=200, confidence=0.90 ), - read_latency_slo_ms=FixedInterval( minimum_value=1, maximum_value=100, diff --git a/service_capacity_modeling/models/org/netflix/time_series_config.py b/service_capacity_modeling/models/org/netflix/time_series_config.py index 65776a2..29e2317 100644 --- a/service_capacity_modeling/models/org/netflix/time_series_config.py +++ b/service_capacity_modeling/models/org/netflix/time_series_config.py @@ -6,8 +6,8 @@ from service_capacity_modeling.models.org.netflix.iso_date_math import ( _iso_to_proto_duration, ) -from service_capacity_modeling.models.org.netflix.iso_date_math import iso_to_seconds from service_capacity_modeling.models.org.netflix.iso_date_math import _iso_to_timedelta +from service_capacity_modeling.models.org.netflix.iso_date_math import iso_to_seconds DURATION_1H = timedelta(hours=1) DURATION_4H = timedelta(hours=4) diff --git a/tests/netflix/test_elasticsearch.py b/tests/netflix/test_elasticsearch.py index 3a35f1f..5a3634a 100644 --- a/tests/netflix/test_elasticsearch.py +++ b/tests/netflix/test_elasticsearch.py @@ -88,13 +88,13 @@ def test_es_simple_mean_percentiles(): ) assert len(cap_plan.mean) > 0, "mean is empty" - assert ( - all(mean_plan for mean_plan in cap_plan.mean) + assert all( + mean_plan for mean_plan in cap_plan.mean ), "One or more mean plans are empty" assert len(cap_plan.percentiles) > 0, "percentiles are empty" - assert ( - all(percentile_plan for percentile_plan in cap_plan.percentiles.values()) + assert all( + percentile_plan for percentile_plan in cap_plan.percentiles.values() ), "One or more percentile plans are empty" @@ -123,9 +123,7 @@ def test_es_simple_certain(): ) assert len(cap_plan) > 0, "Resulting cap_plan is empty" - assert ( - all(plan for plan in cap_plan) - ), "One or more plans is empty" + assert all(plan for plan in cap_plan), "One or more plans is empty" def zonal_summary(zlr): diff --git a/tests/netflix/test_evcache.py b/tests/netflix/test_evcache.py index cf1d8b9..e8f46a8 100644 --- a/tests/netflix/test_evcache.py +++ b/tests/netflix/test_evcache.py @@ -60,12 +60,15 @@ def test_evcache_inmemory_low_latency_reads_cpu(): ) for candidate in plan: - total_cpu_power = candidate.candidate_clusters.zonal[0].count * \ - candidate.candidate_clusters.zonal[0].instance.cpu * \ - candidate.candidate_clusters.zonal[0].instance.cpu_ghz + total_cpu_power = ( + candidate.candidate_clusters.zonal[0].count + * candidate.candidate_clusters.zonal[0].instance.cpu + * candidate.candidate_clusters.zonal[0].instance.cpu_ghz + ) assert total_cpu_power > 1100 + def test_evcache_inmemory_medium_latency_reads_cpu(): inmemory_cluster_medium_latency_reads_qps = CapacityDesires( service_tier=0, @@ -92,15 +95,17 @@ def test_evcache_inmemory_medium_latency_reads_cpu(): ) plan = planner.plan_certain( - model_name="org.netflix.evcache", - region="us-east-1", - desires=inmemory_cluster_medium_latency_reads_qps, + model_name="org.netflix.evcache", + region="us-east-1", + desires=inmemory_cluster_medium_latency_reads_qps, ) for candidate in plan: - total_cpu_power = candidate.candidate_clusters.zonal[0].count * \ - candidate.candidate_clusters.zonal[0].instance.cpu * \ - candidate.candidate_clusters.zonal[0].instance.cpu_ghz + total_cpu_power = ( + candidate.candidate_clusters.zonal[0].count + * candidate.candidate_clusters.zonal[0].instance.cpu + * candidate.candidate_clusters.zonal[0].instance.cpu_ghz + ) assert total_cpu_power > 400 @@ -123,7 +128,9 @@ def test_evcache_inmemory_high_latency_reads_cpu(): ), ), data_shape=DataShape( - estimated_state_size_gib=Interval(low=1662, mid=1662, high=1662, confidence=1.0), + estimated_state_size_gib=Interval( + low=1662, mid=1662, high=1662, confidence=1.0 + ), estimated_state_item_count=Interval( low=750000000, mid=750000000, high=750000000 * 1.2, confidence=1.0 ), @@ -131,15 +138,17 @@ def test_evcache_inmemory_high_latency_reads_cpu(): ) plan = planner.plan_certain( - model_name="org.netflix.evcache", - region="us-east-1", - desires=inmemory_cluster_high_latency_reads_qps, + model_name="org.netflix.evcache", + region="us-east-1", + desires=inmemory_cluster_high_latency_reads_qps, ) for candidate in plan: - total_cpu_power = candidate.candidate_clusters.zonal[0].count * \ - candidate.candidate_clusters.zonal[0].instance.cpu * \ - candidate.candidate_clusters.zonal[0].instance.cpu_ghz + total_cpu_power = ( + candidate.candidate_clusters.zonal[0].count + * candidate.candidate_clusters.zonal[0].instance.cpu + * candidate.candidate_clusters.zonal[0].instance.cpu_ghz + ) assert total_cpu_power > 100 @@ -162,23 +171,30 @@ def test_evcache_ondisk_low_latency_reads_cpu(): ), ), data_shape=DataShape( - estimated_state_size_gib=Interval(low=2306867, mid=2306867, high=2306867, confidence=1.0), + estimated_state_size_gib=Interval( + low=2306867, mid=2306867, high=2306867, confidence=1.0 + ), estimated_state_item_count=Interval( - low=132000000000, mid=132000000000, high=132000000000 * 1.2, confidence=1.0 + low=132000000000, + mid=132000000000, + high=132000000000 * 1.2, + confidence=1.0, ), ), ) plan = planner.plan_certain( - model_name="org.netflix.evcache", - region="us-east-1", - desires=ondisk_cluster_low_latency_reads_qps, + model_name="org.netflix.evcache", + region="us-east-1", + desires=ondisk_cluster_low_latency_reads_qps, ) for candidate in plan: - total_cpu_power = candidate.candidate_clusters.zonal[0].count * \ - candidate.candidate_clusters.zonal[0].instance.cpu * \ - candidate.candidate_clusters.zonal[0].instance.cpu_ghz + total_cpu_power = ( + candidate.candidate_clusters.zonal[0].count + * candidate.candidate_clusters.zonal[0].instance.cpu + * candidate.candidate_clusters.zonal[0].instance.cpu_ghz + ) assert total_cpu_power > 8000 @@ -201,7 +217,9 @@ def test_evcache_ondisk_high_latency_reads_cpu(): ), ), data_shape=DataShape( - estimated_state_size_gib=Interval(low=281000, mid=281000, high=281000, confidence=1.0), + estimated_state_size_gib=Interval( + low=281000, mid=281000, high=281000, confidence=1.0 + ), estimated_state_item_count=Interval( low=8518318523, mid=8518318523, high=8518318523 * 1.2, confidence=1.0 ), @@ -209,21 +227,23 @@ def test_evcache_ondisk_high_latency_reads_cpu(): ) plan = planner.plan_certain( - model_name="org.netflix.evcache", - region="us-east-1", - desires=ondisk_cluster_high_latency_reads_qps, + model_name="org.netflix.evcache", + region="us-east-1", + desires=ondisk_cluster_high_latency_reads_qps, ) for candidate in plan: - total_cpu_power = candidate.candidate_clusters.zonal[0].count * \ - candidate.candidate_clusters.zonal[0].instance.cpu * \ - candidate.candidate_clusters.zonal[0].instance.cpu_ghz + total_cpu_power = ( + candidate.candidate_clusters.zonal[0].count + * candidate.candidate_clusters.zonal[0].instance.cpu + * candidate.candidate_clusters.zonal[0].instance.cpu_ghz + ) assert total_cpu_power > 800 def test_evcache_inmemory_ram_usage(): - inmemory_qps= CapacityDesires( + inmemory_qps = CapacityDesires( service_tier=1, query_pattern=QueryPattern( estimated_read_per_second=Interval( @@ -254,14 +274,16 @@ def test_evcache_inmemory_ram_usage(): ) for candidate in plan: - total_ram = candidate.candidate_clusters.zonal[0].instance.ram_gib * \ - candidate.candidate_clusters.zonal[0].count + total_ram = ( + candidate.candidate_clusters.zonal[0].instance.ram_gib + * candidate.candidate_clusters.zonal[0].count + ) assert total_ram > inmemory_qps.data_shape.estimated_state_size_gib.mid def test_evcache_ondisk_disk_usage(): - inmemory_qps= CapacityDesires( + inmemory_qps = CapacityDesires( service_tier=1, query_pattern=QueryPattern( estimated_read_per_second=Interval( @@ -292,11 +314,14 @@ def test_evcache_ondisk_disk_usage(): ) for candidate in plan: - total_ram = candidate.candidate_clusters.zonal[0].instance.ram_gib * \ - candidate.candidate_clusters.zonal[0].count + total_ram = ( + candidate.candidate_clusters.zonal[0].instance.ram_gib + * candidate.candidate_clusters.zonal[0].count + ) assert total_ram > inmemory_qps.data_shape.estimated_state_size_gib.mid + def test_evcache_ondisk_high_disk_usage(): high_disk_usage_rps = CapacityDesires( service_tier=0, @@ -315,9 +340,14 @@ def test_evcache_ondisk_high_disk_usage(): ), ), data_shape=DataShape( - estimated_state_size_gib=Interval(low=2306867, mid=2306867, high=2306867, confidence=1.0), + estimated_state_size_gib=Interval( + low=2306867, mid=2306867, high=2306867, confidence=1.0 + ), estimated_state_item_count=Interval( - low=132000000000, mid=132000000000, high=132000000000 * 1.2, confidence=1.0 + low=132000000000, + mid=132000000000, + high=132000000000 * 1.2, + confidence=1.0, ), ), ) @@ -330,10 +360,15 @@ def test_evcache_ondisk_high_disk_usage(): for candidate in plan: if candidate.candidate_clusters.zonal[0].instance.drive is not None: - total_disk = candidate.candidate_clusters.zonal[0].instance.drive.size_gib * \ - candidate.candidate_clusters.zonal[0].count + total_disk = ( + candidate.candidate_clusters.zonal[0].instance.drive.size_gib + * candidate.candidate_clusters.zonal[0].count + ) + + assert ( + total_disk > high_disk_usage_rps.data_shape.estimated_state_size_gib.mid + ) - assert total_disk > high_disk_usage_rps.data_shape.estimated_state_size_gib.mid def test_evcache_zero_item_count(): zero_item_count_rps = CapacityDesires( @@ -354,9 +389,7 @@ def test_evcache_zero_item_count(): ), data_shape=DataShape( estimated_state_size_gib=Interval(low=0, mid=0, high=0, confidence=1.0), - estimated_state_item_count=Interval( - low=0, mid=0, high=0, confidence=1.0 - ), + estimated_state_item_count=Interval(low=0, mid=0, high=0, confidence=1.0), ), ) @@ -368,7 +401,11 @@ def test_evcache_zero_item_count(): for candidate in plan: if candidate.candidate_clusters.zonal[0].instance.drive is not None: - total_ram = candidate.candidate_clusters.zonal[0].instance.drive.size_gib * \ - candidate.candidate_clusters.zonal[0].count - - assert total_ram > zero_item_count_rps.data_shape.estimated_state_size_gib.mid + total_ram = ( + candidate.candidate_clusters.zonal[0].instance.drive.size_gib + * candidate.candidate_clusters.zonal[0].count + ) + + assert ( + total_ram > zero_item_count_rps.data_shape.estimated_state_size_gib.mid + ) diff --git a/tests/netflix/test_postgres.py b/tests/netflix/test_postgres.py index 1a85269..05bc34e 100644 --- a/tests/netflix/test_postgres.py +++ b/tests/netflix/test_postgres.py @@ -1,10 +1,10 @@ from service_capacity_modeling.capacity_planner import planner from service_capacity_modeling.interface import CapacityDesires +from service_capacity_modeling.interface import certain_float +from service_capacity_modeling.interface import certain_int from service_capacity_modeling.interface import DataShape from service_capacity_modeling.interface import Interval from service_capacity_modeling.interface import QueryPattern -from service_capacity_modeling.interface import certain_float -from service_capacity_modeling.interface import certain_int tier_0 = CapacityDesires( service_tier=0, @@ -101,11 +101,7 @@ def test_small_footprint_multi_region(): ) assert cap_plan[0].candidate_clusters.zonal[0].instance.name == "m5d.xlarge" - assert ( - 2000 - < cap_plan[0].candidate_clusters.total_annual_cost - < 4000 - ) + assert 2000 < cap_plan[0].candidate_clusters.total_annual_cost < 4000 def test_small_footprint_plan_uncertain(): @@ -114,17 +110,13 @@ def test_small_footprint_plan_uncertain(): region="us-east-1", desires=small_footprint, num_regions=1, - simulations=256 + simulations=256, ) plan_a = cap_plan.least_regret[0] assert plan_a.candidate_clusters.regional[0].instance.name == "db.r5.large" - assert ( - 2000 - < plan_a.candidate_clusters.total_annual_cost - < 4000 - ) + assert 2000 < plan_a.candidate_clusters.total_annual_cost < 4000 def test_large_footprint(): @@ -138,11 +130,7 @@ def test_large_footprint(): assert cap_plan[0].candidate_clusters.zonal[0].instance.name == "i3.xlarge" assert cap_plan[0].candidate_clusters.zonal[0].count == 41 - assert ( - 100_000 - < cap_plan[0].candidate_clusters.total_annual_cost - < 150_000 - ) + assert 100_000 < cap_plan[0].candidate_clusters.total_annual_cost < 150_000 def test_tier_3(): diff --git a/tests/test_common.py b/tests/test_common.py index 123d605..8aecdc0 100644 --- a/tests/test_common.py +++ b/tests/test_common.py @@ -11,8 +11,9 @@ from service_capacity_modeling.interface import RegionContext from service_capacity_modeling.interface import Requirements from service_capacity_modeling.interface import ZoneClusterCapacity -from service_capacity_modeling.models.common import merge_plan, sqrt_staffed_cores +from service_capacity_modeling.models.common import merge_plan from service_capacity_modeling.models.common import network_services +from service_capacity_modeling.models.common import sqrt_staffed_cores def test_merge_plan(): @@ -189,4 +190,3 @@ def test_different_tier_qos(): cores = sqrt_staffed_cores(desires) assert cores >= prev_cores prev_cores = cores - diff --git a/tests/test_simulation.py b/tests/test_simulation.py index cf0635c..eafa2be 100644 --- a/tests/test_simulation.py +++ b/tests/test_simulation.py @@ -113,7 +113,7 @@ def test_simulate_interval_beta(): (alpha, beta, root), d = _beta_dist_from_interval(interval) # Check that the delta/uniform function has roughly equal values # since deltas and uniforms should just spread over the interval - if interval.mid == 100 or interval.mid == 1000: + if interval.mid in (100, 1000): assert abs(alpha - beta) < 0.01 assert root.success assert soln[0] < alpha < soln[1] diff --git a/tox.ini b/tox.ini index 1c5a541..8127404 100644 --- a/tox.ini +++ b/tox.ini @@ -7,7 +7,6 @@ deps = pytest pytest-xdist mypy - pylint ipdb isodate commands = @@ -26,8 +25,13 @@ commands = {posargs} skip_install = true deps = pre-commit + pylint + pydantic<2.0 + scipy + numpy + isodate commands = - pre-commit install + pre-commit run --all-files [testenv:mypy] skip_install = true