Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reduce carbon footprint estimation to total energy amount #80

Merged
merged 12 commits into from
Apr 12, 2024
31 changes: 11 additions & 20 deletions cats/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,7 @@
from datetime import timedelta
from typing import Optional

from .carbonFootprint import Estimates, greenAlgorithmsCalculator
from .check_clean_arguments import validate_jobinfo
from .carbonFootprint import Estimates, get_footprint_reduction_estimate
from .CI_api_interface import InvalidLocationError
from .CI_api_query import get_CI_forecast # noqa: F401
from .configure import get_runtime_config
Expand Down Expand Up @@ -170,7 +169,7 @@ class CATSOutput:
emmissionEstimate: Optional[Estimates] = None

def __str__(self) -> str:
out = f"Best job start time: {self.carbonIntensityOptimal.start}\n"
out = f"Best job start time: {self.carbonIntensityOptimal.start}"

if self.emmissionEstimate:
out += (
Expand Down Expand Up @@ -215,7 +214,8 @@ def main(arguments=None) -> Optional[int]:
" specify the scheduler with the -s or --scheduler option"
)
return 1
config, CI_API_interface, location, duration = get_runtime_config(args)

CI_API_interface, location, duration, jobinfo, PUE = get_runtime_config(args)
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The config dict representing the yaml config file can now be confined to the configure module and not appear in the __init__ module. The jobinfo list of (ndevice, power) is all that is require for carbon footprint estimation down the line.


########################
## Obtain CI forecast ##
Expand Down Expand Up @@ -244,24 +244,15 @@ def main(arguments=None) -> Optional[int]:
## Calculate carbon footprint ##
################################

if args.jobinfo:
jobinfo = validate_jobinfo(
args.jobinfo, expected_partition_names=config["partitions"].keys()
if args.footprint:
output.emmissionEstimate = get_footprint_reduction_estimate(
PUE=PUE,
jobinfo=jobinfo,
runtime=timedelta(minutes=args.duration),
average_best_ci=best_avg.value,
average_now_ci=now_avg.value,
)

if not (jobinfo and config):
logging.warning(
"Not enough information to estimate total carbon footprint, "
"both --jobinfo and config files are needed.\n"
)
else:
output.emmissionEstimate = greenAlgorithmsCalculator(
config=config,
runtime=timedelta(minutes=args.duration),
averageBest_carbonIntensity=best_avg.value, # TODO replace with real carbon intensity
averageNow_carbonIntensity=now_avg.value,
**jobinfo,
).get_footprint()
if args.format == "json":
if isinstance(args.dateformat, str) and "%" not in args.dateformat:
dateformat = SCHEDULER_DATE_FORMAT.get(args.dateformat, "")
Expand Down
192 changes: 19 additions & 173 deletions cats/carbonFootprint.py
Original file line number Diff line number Diff line change
@@ -1,178 +1,24 @@
import datetime
from collections import namedtuple

import yaml

Estimates = namedtuple("Estimates", ["now", "best", "savings"])


class greenAlgorithmsCalculator:
def __init__(
self,
config,
partition,
runtime,
memory,
cpus,
gpus,
averageBest_carbonIntensity,
averageNow_carbonIntensity,
):
"""

:param partition: [str] has to match one of the partitions in `config.yml`
:param runtime: [datetime.timedelta]
:param memory: [int] in GB
:param cpus: [int]
:param gpus: [int]
:param averageBest_carbonIntensity: [float] in gCO2e/kWh
:param averageNow_carbonIntensity: [float] in gCO2e/kWh
"""
# ### Load cluster specific info
# with open(config, "r") as stream:
# try:
# self.cluster_info = yaml.safe_load(stream)
# except yaml.YAMLError as exc:
# print(exc)
self.cluster_info = config

### Load fixed parameters
with open("fixed_parameters.yaml", "r") as stream:
try:
self.fParams = yaml.safe_load(stream)
except yaml.YAMLError as exc:
print(exc)

self.partition = partition
self.runtime = runtime
self.memory = memory
self.cpus = cpus
self.gpus = gpus
self.averageBest_carbonIntensity = averageBest_carbonIntensity
self.averageNow_carbonIntensity = averageNow_carbonIntensity

def formatText_footprint(self, footprint_g):
"""
Format the text to display the carbon footprint
:param footprint_g: [float] carbon footprint, in gCO2e
:return: [str] the text to display
"""
if footprint_g < 1e3:
text_footprint = f"{footprint_g:,.0f} gCO2e"
elif footprint_g < 1e6:
text_footprint = f"{footprint_g / 1e3:,.0f} kgCO2e"
else:
text_footprint = f"{footprint_g / 1e3:,.0f} TCO2e"
return text_footprint

def formatText_treemonths(self, tm_float):
"""
Format the text to display the tree months
:param tm_float: [float] tree-months
:return: [str] the text to display
"""
tm = int(tm_float)
ty = int(tm / 12)
if tm < 1:
text_trees = f"{tm_float:.3f} tree-months"
elif tm == 1:
text_trees = f"{tm_float:.1f} tree-month"
elif tm < 6:
text_trees = f"{tm_float:.1f} tree-months"
elif tm <= 24:
text_trees = f"{tm} tree-months"
elif tm < 120:
text_trees = f"{ty} tree-years and {tm - ty * 12} tree-months"
else:
text_trees = f"{ty} tree-years"
return text_trees

def formatText_driving(self, dist):
"""
Format the text to display the driving distance
:param dist: [float] driving distance, in km
:return: [str] text to display
"""
if dist < 10:
text_driving = f"driving {dist:,.2f} km"
else:
text_driving = f"driving {dist:,.0f} km"
return text_driving

def formatText_flying(self, footprint_g, fParams):
"""
Format the text to display about flying
:param footprint_g: [float] carbon footprint, in gCO2e
:param fParams: [dict] Fixed parameters, from fixed_parameters.yaml
:return: [str] text to display
"""
if footprint_g < 0.5 * fParams["flight_NY_SF"]:
text_flying = f"{footprint_g / fParams['flight_PAR_LON']:,.2f} flights between Paris and London"
elif footprint_g < 0.5 * fParams["flight_NYC_MEL"]:
text_flying = f"{footprint_g / fParams['flight_NY_SF']:,.2f} flights between New York and San Francisco"
else:
text_flying = f"{footprint_g / fParams['flight_NYC_MEL']:,.2f} flights between New York and Melbourne"
return text_flying

def calculate_energies(self):
### Power draw CPU and GPU
partition_info = self.cluster_info["partitions"][self.partition]
if partition_info["type"] == "CPU":
TDP2use4CPU = partition_info["TDP"]
TDP2use4GPU = 0
else:
TDP2use4CPU = partition_info["TDP_CPU"]
TDP2use4GPU = partition_info["TDP"]

### Energy usage
energies = {
"energy_CPUs": self.runtime.total_seconds()
/ 3600
* self.cpus
* TDP2use4CPU
/ 1000, # in kWh
"energy_GPUs": self.runtime.total_seconds()
/ 3600
* self.gpus
* TDP2use4GPU
/ 1000, # in kWh
"energy_memory": self.runtime.total_seconds()
/ 3600
* self.memory
* self.fParams["power_memory_perGB"]
/ 1000, # in kWh
}

energies["total_energy"] = self.cluster_info["PUE"] * (
energies["energy_CPUs"]
+ energies["energy_GPUs"]
+ energies["energy_memory"]
)

return energies

def calculate_CF(self, energies):
CF_best = {
"CF_CPUs": energies["energy_CPUs"] * self.averageBest_carbonIntensity,
"CF_GPUs": energies["energy_GPUs"] * self.averageBest_carbonIntensity,
"CF_memory": energies["energy_memory"] * self.averageBest_carbonIntensity,
"total_CF": energies["total_energy"] * self.averageBest_carbonIntensity,
}

CF_now = {
"CF_CPUs": energies["energy_CPUs"] * self.averageNow_carbonIntensity,
"CF_GPUs": energies["energy_GPUs"] * self.averageNow_carbonIntensity,
"CF_memory": energies["energy_memory"] * self.averageNow_carbonIntensity,
"total_CF": energies["total_energy"] * self.averageNow_carbonIntensity,
}

return CF_best, CF_now

def get_footprint(self):
energies = self.calculate_energies()
CF_best, CF_now = self.calculate_CF(energies)
best = CF_best["total_CF"]
now = CF_now["total_CF"]

return Estimates(
*[self.formatText_footprint(e) for e in [now, best, now - best]]
)
def get_footprint_reduction_estimate(
PUE: float,
jobinfo: list[tuple[int, float]],
runtime: datetime.timedelta,
average_best_ci: float, # in gCO2/kWh
average_now_ci: float,
) -> Estimates:
# energy in kWh
energy = (
PUE
* (runtime.total_seconds() / 3600)
* sum([(nunits * power) for nunits, power in jobinfo])
/ 1000
)
best = energy * average_best_ci
now = energy * average_now_ci

return Estimates(now, best, now - best)
56 changes: 0 additions & 56 deletions cats/check_clean_arguments.py

This file was deleted.

2 changes: 1 addition & 1 deletion cats/configure.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ def get_runtime_config(args) -> tuple[dict, APIInterface, str, int]:
jobinfo = None
PUE = None

return configmapping, CI_API_interface, location, duration, jobinfo, PUE
return CI_API_interface, location, duration, jobinfo, PUE


def config_from_file(configpath="") -> Mapping[str, Any]:
Expand Down
22 changes: 0 additions & 22 deletions config.yml

This file was deleted.

Loading
Loading