diff --git a/SETUP.md b/SETUP.md index 5d091ee..fa5616f 100644 --- a/SETUP.md +++ b/SETUP.md @@ -34,6 +34,7 @@ Instructions are provided for the following OpenShift AI ***fast*** releases: + OpenShift AI 2.11 + [RHOAI 2.11 Cluster Setup](./setup.RHOAI-v2.11/CLUSTER-SETUP.md) + [RHOAI 2.11 Team Setup](./setup.RHOAI-v2.11/TEAM-SETUP.md) + + [UPGRADING from RHOAI 2.10](./setup.RHOAI-v2.11/UPGRADE.md) + [RHOAI 2.11 Uninstall](./setup.RHOAI-v2.11/UNINSTALL.md) ## Kubernetes diff --git a/setup.RHOAI-v2.11/UPGRADE.md b/setup.RHOAI-v2.11/UPGRADE.md new file mode 100644 index 0000000..13821a3 --- /dev/null +++ b/setup.RHOAI-v2.11/UPGRADE.md @@ -0,0 +1,31 @@ +# Upgrading from RHOAI 2.10 + +These instructions assume you installed and configured RHOAI 2.10 following +the MLBatch [install instructions for RHOAI-v2.10](../setup.RHOAI-v2.10/CLUSTER-SETUP.md). + +Your subscription will have automatically created an unapproved +install plan to upgrade to RHOAI 2.11. + +Before beginning, verify that the expected install plan exists: +```sh +oc get ip -n redhat-ods-operator +``` +Typical output would be: +```sh +NAME CSV APPROVAL APPROVED +install-nqrbp rhods-operator.2.10.0 Manual true +install-st8vh rhods-operator.2.11.0 Manual false +``` + +Assuming the install plan exists you can begin the upgrade process. + +First, update the MLBatch modifications to the default RHOAI configuration maps. +```sh +oc apply -f setup.RHOAI-v2.11/mlbatch-upgrade-configmaps.yaml +``` + +Second, approve the install plan replacing the example plan name below with the actual +value on your cluster: +```sh +oc patch ip -n redhat-ods-operator --type merge --patch '{"spec":{"approved":true}}' install-st8vh +``` diff --git a/setup.RHOAI-v2.11/mlbatch-upgrade-configmaps.yaml b/setup.RHOAI-v2.11/mlbatch-upgrade-configmaps.yaml new file mode 100644 index 0000000..c4e2397 --- /dev/null +++ b/setup.RHOAI-v2.11/mlbatch-upgrade-configmaps.yaml @@ -0,0 +1,102 @@ +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: codeflare-operator-config + namespace: redhat-ods-applications +data: + config.yaml: | + appwrapper: + Config: + manageJobsWithoutQueueName: true + userRBACAdmissionCheck: false + schedulerName: scheduler-plugins-scheduler + defaultQueueName: default-queue + enabled: true +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: mlbatch-kueue + namespace: redhat-ods-operator +data: + controller_manager_config.yaml: | + apiVersion: config.kueue.x-k8s.io/v1beta1 + kind: Configuration + health: + healthProbeBindAddress: :8081 + metrics: + bindAddress: :8080 + # enableClusterQueueResources: true + webhook: + port: 9443 + leaderElection: + leaderElect: true + resourceName: c1f6bfd2.kueue.x-k8s.io + controller: + groupKindConcurrency: + Job.batch: 5 + Pod: 5 + Workload.kueue.x-k8s.io: 5 + LocalQueue.kueue.x-k8s.io: 1 + ClusterQueue.kueue.x-k8s.io: 1 + ResourceFlavor.kueue.x-k8s.io: 1 + clientConnection: + qps: 50 + burst: 100 + #pprofBindAddress: :8082 + waitForPodsReady: + enable: false + blockAdmission: false + manageJobsWithoutQueueName: true + #internalCertManagement: + # enable: false + # webhookServiceName: "" + # webhookSecretName: "" + integrations: + frameworks: + # - "batch/job" + - "kubeflow.org/mpijob" + - "ray.io/rayjob" + - "ray.io/raycluster" + - "jobset.x-k8s.io/jobset" + - "kubeflow.org/mxjob" + - "kubeflow.org/paddlejob" + - "kubeflow.org/pytorchjob" + - "kubeflow.org/tfjob" + - "kubeflow.org/xgboostjob" + # - "pod" + externalFrameworks: + - "AppWrapper.v1beta2.workload.codeflare.dev" + # podOptions: + # namespaceSelector: + # matchExpressions: + # - key: kubernetes.io/metadata.name + # operator: NotIn + # values: [ kube-system, kueue-system ] + manager_config_patch.yaml: | + apiVersion: apps/v1 + kind: Deployment + metadata: + name: controller-manager + namespace: system + spec: + template: + spec: + priorityClassName: system-node-critical + containers: + - name: manager + image: $(image) + args: + - "--config=/controller_manager_config.yaml" + - "--zap-log-level=2" + - "--feature-gates=LendingLimit=true" + volumeMounts: + - name: manager-config + mountPath: /controller_manager_config.yaml + subPath: controller_manager_config.yaml + volumes: + - name: manager-config + configMap: + name: manager-config +---