From 7e87599a184604101b328cd7f06c53004dd5fbe1 Mon Sep 17 00:00:00 2001 From: codeflare-machine-account Date: Mon, 13 May 2024 14:18:42 +0000 Subject: [PATCH] Changes in docs for release: v0.16.1 --- .../cluster/cluster.html | 158 +++++++++++------- .../cluster/config.html | 82 ++++++--- .../utils/generate_yaml.html | 72 +++++--- .../utils/pretty_print.html | 4 +- 4 files changed, 212 insertions(+), 104 deletions(-) diff --git a/docs/detailed-documentation/cluster/cluster.html b/docs/detailed-documentation/cluster/cluster.html index eed1e1544..efb6ccfed 100644 --- a/docs/detailed-documentation/cluster/cluster.html +++ b/docs/detailed-documentation/cluster/cluster.html @@ -50,6 +50,7 @@

Module codeflare_sdk.cluster.cluster

cluster setup queue, a list of all existing clusters, and the user's working namespace. """ +import re from time import sleep from typing import List, Optional, Tuple, Dict @@ -73,11 +74,13 @@

Module codeflare_sdk.cluster.cluster

RayClusterStatus, ) from kubernetes import client, config +from kubernetes.utils import parse_quantity import yaml import os import requests from kubernetes import config +from kubernetes.client.rest import ApiException class Cluster: @@ -216,6 +219,7 @@

Module codeflare_sdk.cluster.cluster

write_to_file = self.config.write_to_file verify_tls = self.config.verify_tls local_queue = self.config.local_queue + labels = self.config.labels return generate_appwrapper( name=name, namespace=namespace, @@ -240,6 +244,7 @@

Module codeflare_sdk.cluster.cluster

write_to_file=write_to_file, verify_tls=verify_tls, local_queue=local_queue, + labels=labels, ) # creates a new cluster with the provided or default spec @@ -248,6 +253,10 @@

Module codeflare_sdk.cluster.cluster

Applies the AppWrapper yaml, pushing the resource request onto the MCAD queue. """ + + # check if RayCluster CustomResourceDefinition exists if not throw RuntimeError + self._throw_for_no_raycluster() + namespace = self.config.namespace try: @@ -278,12 +287,32 @@

Module codeflare_sdk.cluster.cluster

except Exception as e: # pragma: no cover return _kube_api_error_handling(e) + def _throw_for_no_raycluster(self): + api_instance = client.CustomObjectsApi(api_config_handler()) + try: + api_instance.list_namespaced_custom_object( + group="ray.io", + version="v1", + namespace=self.config.namespace, + plural="rayclusters", + ) + except ApiException as e: + if e.status == 404: + raise RuntimeError( + "RayCluster CustomResourceDefinition unavailable contact your administrator." + ) + else: + raise RuntimeError( + "Failed to get RayCluster CustomResourceDefinition: " + str(e) + ) + def down(self): """ Deletes the AppWrapper yaml, scaling-down and deleting all resources associated with the cluster. """ namespace = self.config.namespace + self._throw_for_no_raycluster() try: config_check() api_instance = client.CustomObjectsApi(api_config_handler()) @@ -520,26 +549,18 @@

Module codeflare_sdk.cluster.cluster

namespace=rc["metadata"]["namespace"], machine_types=machine_types, num_workers=rc["spec"]["workerGroupSpecs"][0]["minReplicas"], - min_cpus=int( - rc["spec"]["workerGroupSpecs"][0]["template"]["spec"]["containers"][0][ - "resources" - ]["requests"]["cpu"] - ), - max_cpus=int( - rc["spec"]["workerGroupSpecs"][0]["template"]["spec"]["containers"][0][ - "resources" - ]["limits"]["cpu"] - ), - min_memory=int( - rc["spec"]["workerGroupSpecs"][0]["template"]["spec"]["containers"][0][ - "resources" - ]["requests"]["memory"][:-1] - ), - max_memory=int( - rc["spec"]["workerGroupSpecs"][0]["template"]["spec"]["containers"][0][ - "resources" - ]["limits"]["memory"][:-1] - ), + min_cpus=rc["spec"]["workerGroupSpecs"][0]["template"]["spec"][ + "containers" + ][0]["resources"]["requests"]["cpu"], + max_cpus=rc["spec"]["workerGroupSpecs"][0]["template"]["spec"][ + "containers" + ][0]["resources"]["limits"]["cpu"], + min_memory=rc["spec"]["workerGroupSpecs"][0]["template"]["spec"][ + "containers" + ][0]["resources"]["requests"]["memory"], + max_memory=rc["spec"]["workerGroupSpecs"][0]["template"]["spec"][ + "containers" + ][0]["resources"]["limits"]["memory"], num_gpus=int( rc["spec"]["workerGroupSpecs"][0]["template"]["spec"]["containers"][0][ "resources" @@ -1265,6 +1286,7 @@

Classes

write_to_file = self.config.write_to_file verify_tls = self.config.verify_tls local_queue = self.config.local_queue + labels = self.config.labels return generate_appwrapper( name=name, namespace=namespace, @@ -1289,6 +1311,7 @@

Classes

write_to_file=write_to_file, verify_tls=verify_tls, local_queue=local_queue, + labels=labels, ) # creates a new cluster with the provided or default spec @@ -1297,6 +1320,10 @@

Classes

Applies the AppWrapper yaml, pushing the resource request onto the MCAD queue. """ + + # check if RayCluster CustomResourceDefinition exists if not throw RuntimeError + self._throw_for_no_raycluster() + namespace = self.config.namespace try: @@ -1327,12 +1354,32 @@

Classes

except Exception as e: # pragma: no cover return _kube_api_error_handling(e) + def _throw_for_no_raycluster(self): + api_instance = client.CustomObjectsApi(api_config_handler()) + try: + api_instance.list_namespaced_custom_object( + group="ray.io", + version="v1", + namespace=self.config.namespace, + plural="rayclusters", + ) + except ApiException as e: + if e.status == 404: + raise RuntimeError( + "RayCluster CustomResourceDefinition unavailable contact your administrator." + ) + else: + raise RuntimeError( + "Failed to get RayCluster CustomResourceDefinition: " + str(e) + ) + def down(self): """ Deletes the AppWrapper yaml, scaling-down and deleting all resources associated with the cluster. """ namespace = self.config.namespace + self._throw_for_no_raycluster() try: config_check() api_instance = client.CustomObjectsApi(api_config_handler()) @@ -1569,26 +1616,18 @@

Classes

namespace=rc["metadata"]["namespace"], machine_types=machine_types, num_workers=rc["spec"]["workerGroupSpecs"][0]["minReplicas"], - min_cpus=int( - rc["spec"]["workerGroupSpecs"][0]["template"]["spec"]["containers"][0][ - "resources" - ]["requests"]["cpu"] - ), - max_cpus=int( - rc["spec"]["workerGroupSpecs"][0]["template"]["spec"]["containers"][0][ - "resources" - ]["limits"]["cpu"] - ), - min_memory=int( - rc["spec"]["workerGroupSpecs"][0]["template"]["spec"]["containers"][0][ - "resources" - ]["requests"]["memory"][:-1] - ), - max_memory=int( - rc["spec"]["workerGroupSpecs"][0]["template"]["spec"]["containers"][0][ - "resources" - ]["limits"]["memory"][:-1] - ), + min_cpus=rc["spec"]["workerGroupSpecs"][0]["template"]["spec"][ + "containers" + ][0]["resources"]["requests"]["cpu"], + max_cpus=rc["spec"]["workerGroupSpecs"][0]["template"]["spec"][ + "containers" + ][0]["resources"]["limits"]["cpu"], + min_memory=rc["spec"]["workerGroupSpecs"][0]["template"]["spec"][ + "containers" + ][0]["resources"]["requests"]["memory"], + max_memory=rc["spec"]["workerGroupSpecs"][0]["template"]["spec"][ + "containers" + ][0]["resources"]["limits"]["memory"], num_gpus=int( rc["spec"]["workerGroupSpecs"][0]["template"]["spec"]["containers"][0][ "resources" @@ -1811,6 +1850,7 @@

Methods

write_to_file = self.config.write_to_file verify_tls = self.config.verify_tls local_queue = self.config.local_queue + labels = self.config.labels return generate_appwrapper( name=name, namespace=namespace, @@ -1835,6 +1875,7 @@

Methods

write_to_file=write_to_file, verify_tls=verify_tls, local_queue=local_queue, + labels=labels, ) @@ -1870,6 +1911,7 @@

Methods

associated with the cluster. """ namespace = self.config.namespace + self._throw_for_no_raycluster() try: config_check() api_instance = client.CustomObjectsApi(api_config_handler()) @@ -1944,26 +1986,18 @@

Methods

namespace=rc["metadata"]["namespace"], machine_types=machine_types, num_workers=rc["spec"]["workerGroupSpecs"][0]["minReplicas"], - min_cpus=int( - rc["spec"]["workerGroupSpecs"][0]["template"]["spec"]["containers"][0][ - "resources" - ]["requests"]["cpu"] - ), - max_cpus=int( - rc["spec"]["workerGroupSpecs"][0]["template"]["spec"]["containers"][0][ - "resources" - ]["limits"]["cpu"] - ), - min_memory=int( - rc["spec"]["workerGroupSpecs"][0]["template"]["spec"]["containers"][0][ - "resources" - ]["requests"]["memory"][:-1] - ), - max_memory=int( - rc["spec"]["workerGroupSpecs"][0]["template"]["spec"]["containers"][0][ - "resources" - ]["limits"]["memory"][:-1] - ), + min_cpus=rc["spec"]["workerGroupSpecs"][0]["template"]["spec"][ + "containers" + ][0]["resources"]["requests"]["cpu"], + max_cpus=rc["spec"]["workerGroupSpecs"][0]["template"]["spec"][ + "containers" + ][0]["resources"]["limits"]["cpu"], + min_memory=rc["spec"]["workerGroupSpecs"][0]["template"]["spec"][ + "containers" + ][0]["resources"]["requests"]["memory"], + max_memory=rc["spec"]["workerGroupSpecs"][0]["template"]["spec"][ + "containers" + ][0]["resources"]["limits"]["memory"], num_gpus=int( rc["spec"]["workerGroupSpecs"][0]["template"]["spec"]["containers"][0][ "resources" @@ -2168,6 +2202,10 @@

Methods

Applies the AppWrapper yaml, pushing the resource request onto the MCAD queue. """ + + # check if RayCluster CustomResourceDefinition exists if not throw RuntimeError + self._throw_for_no_raycluster() + namespace = self.config.namespace try: diff --git a/docs/detailed-documentation/cluster/config.html b/docs/detailed-documentation/cluster/config.html index c7b9ccfcd..1879afaf0 100644 --- a/docs/detailed-documentation/cluster/config.html +++ b/docs/detailed-documentation/cluster/config.html @@ -52,6 +52,7 @@

Module codeflare_sdk.cluster.config

from dataclasses import dataclass, field import pathlib +import typing dir = pathlib.Path(__file__).parent.parent.resolve() @@ -66,15 +67,15 @@

Module codeflare_sdk.cluster.config

name: str namespace: str = None head_info: list = field(default_factory=list) - head_cpus: int = 2 - head_memory: int = 8 + head_cpus: typing.Union[int, str] = 2 + head_memory: typing.Union[int, str] = 8 head_gpus: int = 0 machine_types: list = field(default_factory=list) # ["m4.xlarge", "g4dn.xlarge"] - min_cpus: int = 1 - max_cpus: int = 1 + min_cpus: typing.Union[int, str] = 1 + max_cpus: typing.Union[int, str] = 1 num_workers: int = 1 - min_memory: int = 2 - max_memory: int = 2 + min_memory: typing.Union[int, str] = 2 + max_memory: typing.Union[int, str] = 2 num_gpus: int = 0 template: str = f"{dir}/templates/base-template.yaml" instascale: bool = False @@ -85,12 +86,31 @@

Module codeflare_sdk.cluster.config

dispatch_priority: str = None write_to_file: bool = False verify_tls: bool = True + labels: dict = field(default_factory=dict) def __post_init__(self): if not self.verify_tls: print( "Warning: TLS verification has been disabled - Endpoint checks will be bypassed" ) + self._memory_to_string() + self._str_mem_no_unit_add_GB() + + def _str_mem_no_unit_add_GB(self): + if isinstance(self.head_memory, str) and self.head_memory.isdecimal(): + self.head_memory = f"{self.head_memory}G" + if isinstance(self.min_memory, str) and self.min_memory.isdecimal(): + self.min_memory = f"{self.min_memory}G" + if isinstance(self.max_memory, str) and self.max_memory.isdecimal(): + self.max_memory = f"{self.max_memory}G" + + def _memory_to_string(self): + if isinstance(self.head_memory, int): + self.head_memory = f"{self.head_memory}G" + if isinstance(self.min_memory, int): + self.min_memory = f"{self.min_memory}G" + if isinstance(self.max_memory, int): + self.max_memory = f"{self.max_memory}G" local_queue: str = None @@ -106,7 +126,7 @@

Classes

class ClusterConfiguration -(name: str, namespace: str = None, head_info: list = <factory>, head_cpus: int = 2, head_memory: int = 8, head_gpus: int = 0, machine_types: list = <factory>, min_cpus: int = 1, max_cpus: int = 1, num_workers: int = 1, min_memory: int = 2, max_memory: int = 2, num_gpus: int = 0, template: str = '/home/runner/work/codeflare-sdk/codeflare-sdk/src/codeflare_sdk/templates/base-template.yaml', instascale: bool = False, mcad: bool = False, envs: dict = <factory>, image: str = '', image_pull_secrets: list = <factory>, dispatch_priority: str = None, write_to_file: bool = False, verify_tls: bool = True, local_queue: str = None) +(name: str, namespace: str = None, head_info: list = <factory>, head_cpus: Union[int, str] = 2, head_memory: Union[int, str] = 8, head_gpus: int = 0, machine_types: list = <factory>, min_cpus: Union[int, str] = 1, max_cpus: Union[int, str] = 1, num_workers: int = 1, min_memory: Union[int, str] = 2, max_memory: Union[int, str] = 2, num_gpus: int = 0, template: str = '/home/runner/work/codeflare-sdk/codeflare-sdk/src/codeflare_sdk/templates/base-template.yaml', instascale: bool = False, mcad: bool = False, envs: dict = <factory>, image: str = '', image_pull_secrets: list = <factory>, dispatch_priority: str = None, write_to_file: bool = False, verify_tls: bool = True, labels: dict = <factory>, local_queue: str = None)

This dataclass is used to specify resource requirements and other details, and @@ -124,15 +144,15 @@

Classes

name: str namespace: str = None head_info: list = field(default_factory=list) - head_cpus: int = 2 - head_memory: int = 8 + head_cpus: typing.Union[int, str] = 2 + head_memory: typing.Union[int, str] = 8 head_gpus: int = 0 machine_types: list = field(default_factory=list) # ["m4.xlarge", "g4dn.xlarge"] - min_cpus: int = 1 - max_cpus: int = 1 + min_cpus: typing.Union[int, str] = 1 + max_cpus: typing.Union[int, str] = 1 num_workers: int = 1 - min_memory: int = 2 - max_memory: int = 2 + min_memory: typing.Union[int, str] = 2 + max_memory: typing.Union[int, str] = 2 num_gpus: int = 0 template: str = f"{dir}/templates/base-template.yaml" instascale: bool = False @@ -143,12 +163,31 @@

Classes

dispatch_priority: str = None write_to_file: bool = False verify_tls: bool = True + labels: dict = field(default_factory=dict) def __post_init__(self): if not self.verify_tls: print( "Warning: TLS verification has been disabled - Endpoint checks will be bypassed" ) + self._memory_to_string() + self._str_mem_no_unit_add_GB() + + def _str_mem_no_unit_add_GB(self): + if isinstance(self.head_memory, str) and self.head_memory.isdecimal(): + self.head_memory = f"{self.head_memory}G" + if isinstance(self.min_memory, str) and self.min_memory.isdecimal(): + self.min_memory = f"{self.min_memory}G" + if isinstance(self.max_memory, str) and self.max_memory.isdecimal(): + self.max_memory = f"{self.max_memory}G" + + def _memory_to_string(self): + if isinstance(self.head_memory, int): + self.head_memory = f"{self.head_memory}G" + if isinstance(self.min_memory, int): + self.min_memory = f"{self.min_memory}G" + if isinstance(self.max_memory, int): + self.max_memory = f"{self.max_memory}G" local_queue: str = None @@ -162,7 +201,7 @@

Class variables

-
var head_cpus : int
+
var head_cpus : Union[int, str]
@@ -174,7 +213,7 @@

Class variables

-
var head_memory : int
+
var head_memory : Union[int, str]
@@ -190,6 +229,10 @@

Class variables

+
var labels : dict
+
+
+
var local_queue : str
@@ -198,11 +241,11 @@

Class variables

-
var max_cpus : int
+
var max_cpus : Union[int, str]
-
var max_memory : int
+
var max_memory : Union[int, str]
@@ -210,11 +253,11 @@

Class variables

-
var min_cpus : int
+
var min_cpus : Union[int, str]
-
var min_memory : int
+
var min_memory : Union[int, str]
@@ -276,6 +319,7 @@

image
  • image_pull_secrets
  • instascale
  • +
  • labels
  • local_queue
  • machine_types
  • max_cpus
  • diff --git a/docs/detailed-documentation/utils/generate_yaml.html b/docs/detailed-documentation/utils/generate_yaml.html index fc7147800..141901016 100644 --- a/docs/detailed-documentation/utils/generate_yaml.html +++ b/docs/detailed-documentation/utils/generate_yaml.html @@ -171,8 +171,8 @@

    Module codeflare_sdk.utils.generate_yaml

    # Leave head node resources as template default resource["requests"]["cpu"] = head_cpus resource["limits"]["cpu"] = head_cpus - resource["requests"]["memory"] = str(head_memory) + "G" - resource["limits"]["memory"] = str(head_memory) + "G" + resource["requests"]["memory"] = head_memory + resource["limits"]["memory"] = head_memory resource["requests"]["nvidia.com/gpu"] = head_gpus resource["limits"]["nvidia.com/gpu"] = head_gpus @@ -189,9 +189,9 @@

    Module codeflare_sdk.utils.generate_yaml

    resource[k][spec] = min_cpu if spec == "memory": if k == "limits": - resource[k][spec] = str(max_memory) + "G" + resource[k][spec] = max_memory else: - resource[k][spec] = str(min_memory) + "G" + resource[k][spec] = min_memory if spec == "nvidia.com/gpu": if i == 0: resource[k][spec] = 0 @@ -244,12 +244,12 @@

    Module codeflare_sdk.utils.generate_yaml

    requests = resource.get("resources").get("requests") if requests is not None: requests["cpu"] = min_cpu - requests["memory"] = str(min_memory) + "G" + requests["memory"] = min_memory requests["nvidia.com/gpu"] = gpu limits = resource.get("resources").get("limits") if limits is not None: limits["cpu"] = max_cpu - limits["memory"] = str(max_memory) + "G" + limits["memory"] = max_memory limits["nvidia.com/gpu"] = gpu @@ -340,7 +340,11 @@

    Module codeflare_sdk.utils.generate_yaml

    def write_components( - user_yaml: dict, output_file_name: str, namespace: str, local_queue: Optional[str] + user_yaml: dict, + output_file_name: str, + namespace: str, + local_queue: Optional[str], + labels: dict, ): # Create the directory if it doesn't exist directory_path = os.path.dirname(output_file_name) @@ -350,6 +354,7 @@

    Module codeflare_sdk.utils.generate_yaml

    components = user_yaml.get("spec", "resources")["resources"].get("GenericItems") open(output_file_name, "w").close() lq_name = local_queue or get_default_kueue_name(namespace) + cluster_labels = labels with open(output_file_name, "a") as outfile: for component in components: if "generictemplate" in component: @@ -362,6 +367,7 @@

    Module codeflare_sdk.utils.generate_yaml

    ] labels = component["generictemplate"]["metadata"]["labels"] labels.update({"kueue.x-k8s.io/queue-name": lq_name}) + labels.update(cluster_labels) outfile.write("---\n") yaml.dump( component["generictemplate"], outfile, default_flow_style=False @@ -370,11 +376,16 @@

    Module codeflare_sdk.utils.generate_yaml

    def load_components( - user_yaml: dict, name: str, namespace: str, local_queue: Optional[str] + user_yaml: dict, + name: str, + namespace: str, + local_queue: Optional[str], + labels: dict, ): component_list = [] components = user_yaml.get("spec", "resources")["resources"].get("GenericItems") lq_name = local_queue or get_default_kueue_name(namespace) + cluster_labels = labels for component in components: if "generictemplate" in component: if ( @@ -386,6 +397,7 @@

    Module codeflare_sdk.utils.generate_yaml

    ] labels = component["generictemplate"]["metadata"]["labels"] labels.update({"kueue.x-k8s.io/queue-name": lq_name}) + labels.update(cluster_labels) component_list.append(component["generictemplate"]) resources = "---\n" + "---\n".join( @@ -426,6 +438,7 @@

    Module codeflare_sdk.utils.generate_yaml

    write_to_file: bool, verify_tls: bool, local_queue: Optional[str], + labels, ): user_yaml = read_template(template) appwrapper_name, cluster_name = gen_names(name) @@ -477,13 +490,13 @@

    Module codeflare_sdk.utils.generate_yaml

    if mcad: write_user_appwrapper(user_yaml, outfile) else: - write_components(user_yaml, outfile, namespace, local_queue) + write_components(user_yaml, outfile, namespace, local_queue, labels) return outfile else: if mcad: user_yaml = load_appwrapper(user_yaml, name) else: - user_yaml = load_components(user_yaml, name, namespace, local_queue) + user_yaml = load_components(user_yaml, name, namespace, local_queue, labels) return user_yaml @@ -527,7 +540,7 @@

    Functions

    -def generate_appwrapper(name: str, namespace: str, head_cpus: int, head_memory: int, head_gpus: int, min_cpu: int, max_cpu: int, min_memory: int, max_memory: int, gpu: int, workers: int, template: str, image: str, instascale: bool, mcad: bool, instance_types: list, env, image_pull_secrets: list, dispatch_priority: str, priority_val: int, write_to_file: bool, verify_tls: bool, local_queue: Optional[str]) +def generate_appwrapper(name: str, namespace: str, head_cpus: int, head_memory: int, head_gpus: int, min_cpu: int, max_cpu: int, min_memory: int, max_memory: int, gpu: int, workers: int, template: str, image: str, instascale: bool, mcad: bool, instance_types: list, env, image_pull_secrets: list, dispatch_priority: str, priority_val: int, write_to_file: bool, verify_tls: bool, local_queue: Optional[str], labels)
    @@ -559,6 +572,7 @@

    Functions

    write_to_file: bool, verify_tls: bool, local_queue: Optional[str], + labels, ): user_yaml = read_template(template) appwrapper_name, cluster_name = gen_names(name) @@ -610,13 +624,13 @@

    Functions

    if mcad: write_user_appwrapper(user_yaml, outfile) else: - write_components(user_yaml, outfile, namespace, local_queue) + write_components(user_yaml, outfile, namespace, local_queue, labels) return outfile else: if mcad: user_yaml = load_appwrapper(user_yaml, name) else: - user_yaml = load_components(user_yaml, name, namespace, local_queue) + user_yaml = load_components(user_yaml, name, namespace, local_queue, labels) return user_yaml
    @@ -715,7 +729,7 @@

    Functions

    -def load_components(user_yaml: dict, name: str, namespace: str, local_queue: Optional[str]) +def load_components(user_yaml: dict, name: str, namespace: str, local_queue: Optional[str], labels: dict)
    @@ -724,11 +738,16 @@

    Functions

    Expand source code
    def load_components(
    -    user_yaml: dict, name: str, namespace: str, local_queue: Optional[str]
    +    user_yaml: dict,
    +    name: str,
    +    namespace: str,
    +    local_queue: Optional[str],
    +    labels: dict,
     ):
         component_list = []
         components = user_yaml.get("spec", "resources")["resources"].get("GenericItems")
         lq_name = local_queue or get_default_kueue_name(namespace)
    +    cluster_labels = labels
         for component in components:
             if "generictemplate" in component:
                 if (
    @@ -740,6 +759,7 @@ 

    Functions

    ] labels = component["generictemplate"]["metadata"]["labels"] labels.update({"kueue.x-k8s.io/queue-name": lq_name}) + labels.update(cluster_labels) component_list.append(component["generictemplate"]) resources = "---\n" + "---\n".join( @@ -819,8 +839,8 @@

    Functions

    # Leave head node resources as template default resource["requests"]["cpu"] = head_cpus resource["limits"]["cpu"] = head_cpus - resource["requests"]["memory"] = str(head_memory) + "G" - resource["limits"]["memory"] = str(head_memory) + "G" + resource["requests"]["memory"] = head_memory + resource["limits"]["memory"] = head_memory resource["requests"]["nvidia.com/gpu"] = head_gpus resource["limits"]["nvidia.com/gpu"] = head_gpus @@ -837,9 +857,9 @@

    Functions

    resource[k][spec] = min_cpu if spec == "memory": if k == "limits": - resource[k][spec] = str(max_memory) + "G" + resource[k][spec] = max_memory else: - resource[k][spec] = str(min_memory) + "G" + resource[k][spec] = min_memory if spec == "nvidia.com/gpu": if i == 0: resource[k][spec] = 0 @@ -1037,17 +1057,17 @@

    Functions

    requests = resource.get("resources").get("requests") if requests is not None: requests["cpu"] = min_cpu - requests["memory"] = str(min_memory) + "G" + requests["memory"] = min_memory requests["nvidia.com/gpu"] = gpu limits = resource.get("resources").get("limits") if limits is not None: limits["cpu"] = max_cpu - limits["memory"] = str(max_memory) + "G" + limits["memory"] = max_memory limits["nvidia.com/gpu"] = gpu
    -def write_components(user_yaml: dict, output_file_name: str, namespace: str, local_queue: Optional[str]) +def write_components(user_yaml: dict, output_file_name: str, namespace: str, local_queue: Optional[str], labels: dict)
    @@ -1056,7 +1076,11 @@

    Functions

    Expand source code
    def write_components(
    -    user_yaml: dict, output_file_name: str, namespace: str, local_queue: Optional[str]
    +    user_yaml: dict,
    +    output_file_name: str,
    +    namespace: str,
    +    local_queue: Optional[str],
    +    labels: dict,
     ):
         # Create the directory if it doesn't exist
         directory_path = os.path.dirname(output_file_name)
    @@ -1066,6 +1090,7 @@ 

    Functions

    components = user_yaml.get("spec", "resources")["resources"].get("GenericItems") open(output_file_name, "w").close() lq_name = local_queue or get_default_kueue_name(namespace) + cluster_labels = labels with open(output_file_name, "a") as outfile: for component in components: if "generictemplate" in component: @@ -1078,6 +1103,7 @@

    Functions

    ] labels = component["generictemplate"]["metadata"]["labels"] labels.update({"kueue.x-k8s.io/queue-name": lq_name}) + labels.update(cluster_labels) outfile.write("---\n") yaml.dump( component["generictemplate"], outfile, default_flow_style=False diff --git a/docs/detailed-documentation/utils/pretty_print.html b/docs/detailed-documentation/utils/pretty_print.html index e7c108256..2e7a69b81 100644 --- a/docs/detailed-documentation/utils/pretty_print.html +++ b/docs/detailed-documentation/utils/pretty_print.html @@ -167,7 +167,7 @@

    Module codeflare_sdk.utils.pretty_print

    name = cluster.name dashboard = cluster.dashboard workers = str(cluster.workers) - memory = str(cluster.worker_mem_min) + "~" + str(cluster.worker_mem_max) + memory = f"{cluster.worker_mem_min}~{cluster.worker_mem_max}" cpu = str(cluster.worker_cpu) gpu = str(cluster.worker_gpu) @@ -345,7 +345,7 @@

    Functions

    name = cluster.name dashboard = cluster.dashboard workers = str(cluster.workers) - memory = str(cluster.worker_mem_min) + "~" + str(cluster.worker_mem_max) + memory = f"{cluster.worker_mem_min}~{cluster.worker_mem_max}" cpu = str(cluster.worker_cpu) gpu = str(cluster.worker_gpu)