Skip to content

Commit dbdc4c5

Browse files
committed
fix helm chart for ray integration
1 parent 6e36675 commit dbdc4c5

File tree

2 files changed

+52
-3
lines changed

2 files changed

+52
-3
lines changed

applications/hcc/modules/nemo/main.tf

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,18 @@ resource "helm_release" "kuberay_operator" {
9999
version = "1.1.0"
100100
namespace = "default"
101101
reset_values = true
102+
set {
103+
name = "tolerations[0].key"
104+
value = "nvidia.com/gpu"
105+
}
106+
set {
107+
name = "tolerations[0].operator"
108+
value = "Exists"
109+
}
110+
set {
111+
name = "tolerations[0].effect"
112+
value = "NoSchedule"
113+
}
102114
}
103115

104116
resource "helm_release" "raycluster" {
@@ -108,7 +120,44 @@ resource "helm_release" "raycluster" {
108120
chart = "ray-cluster"
109121
version = "1.1.0"
110122
namespace = "default"
123+
reset_values = true
111124
depends_on = [
112125
helm_release.kuberay_operator
113126
]
127+
set {
128+
name = "tolerations[0].key"
129+
value = "nvidia.com/gpu"
130+
}
131+
set {
132+
name = "tolerations[0].operator"
133+
value = "Exists"
134+
}
135+
set {
136+
name = "tolerations[0].effect"
137+
value = "NoSchedule"
138+
}
139+
set {
140+
name = "head.tolerations[0].key"
141+
value = "nvidia.com/gpu"
142+
}
143+
set {
144+
name = "head.tolerations[0].operator"
145+
value = "Exists"
146+
}
147+
set {
148+
name = "head.tolerations[0].effect"
149+
value = "NoSchedule"
150+
}
151+
set {
152+
name = "worker.tolerations[0].key"
153+
value = "nvidia.com/gpu"
154+
}
155+
set {
156+
name = "worker.tolerations[0].operator"
157+
value = "Exists"
158+
}
159+
set {
160+
name = "worker.tolerations[0].effect"
161+
value = "NoSchedule"
162+
}
114163
}

applications/hcc/variables.tf

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -118,14 +118,14 @@ variable "a3_mega_consumption_model" {
118118
}
119119

120120
locals {
121-
placement_policy_valid = var.gpu_type != "A3 Mega" || local.recipe == "gke" || length(var.placement_policy_name) > 0
121+
placement_policy_valid = var.gpu_type != "A3 Mega" || local.recipe == "gke" || local.recipe == "gke-ray" || length(var.placement_policy_name) > 0
122122
a3_consumption_model_check = length(var.a3_ultra_consumption_model) > 0 || length(var.a3_mega_consumption_model) > 0
123123
recipe = {
124124
"A3 Mega" = var.a3mega_recipe
125125
"A3 Ultra" = var.a3ultra_recipe
126126
}[var.gpu_type]
127127
recipes_not_empty = length(local.recipe) > 0
128-
reservation_valid = !(local.recipe != "gke") || length(var.reservation) > 0
128+
reservation_valid = local.recipe == "gke" || local.recipe == "gke-ray" || length(var.reservation) > 0
129129
}
130130

131131
resource "null_resource" "input_validation" {
@@ -146,7 +146,7 @@ resource "null_resource" "input_validation" {
146146
}
147147
precondition {
148148
condition = local.reservation_valid
149-
error_message = "The 'reservation' variable must not be empty when recipe is not 'gke'."
149+
error_message = "The 'reservation' variable must not be empty when recipe is not 'gke' or 'gke-ray'."
150150
}
151151
}
152152
}

0 commit comments

Comments
 (0)