Skip to content

Commit 1a4b7f8

Browse files
feat(google_container_node_pool): support fast socket (#8884) (#15872)
Signed-off-by: Modular Magician <[email protected]>
1 parent 6efd3fc commit 1a4b7f8

File tree

5 files changed

+150
-0
lines changed

5 files changed

+150
-0
lines changed

.changelog/8884.txt

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
```release-note:enhancement
2+
container: added `node_config.fast_socket` field to `google_container_node_pool`
3+
```

google/services/container/node_config.go

+33
Original file line numberDiff line numberDiff line change
@@ -589,6 +589,21 @@ func schemaNodeConfig() *schema.Schema {
589589
},
590590
},
591591
},
592+
"fast_socket": {
593+
Type: schema.TypeList,
594+
Optional: true,
595+
MaxItems: 1,
596+
Description: `Enable or disable NCCL Fast Socket in the node pool.`,
597+
Elem: &schema.Resource{
598+
Schema: map[string]*schema.Schema{
599+
"enabled": {
600+
Type: schema.TypeBool,
601+
Required: true,
602+
Description: `Whether or not NCCL Fast Socket is enabled`,
603+
},
604+
},
605+
},
606+
},
592607
},
593608
},
594609
}
@@ -710,6 +725,13 @@ func expandNodeConfig(v interface{}) *container.NodeConfig {
710725
}
711726
}
712727

728+
if v, ok := nodeConfig["fast_socket"]; ok && len(v.([]interface{})) > 0 {
729+
conf := v.([]interface{})[0].(map[string]interface{})
730+
nc.FastSocket = &container.FastSocket{
731+
Enabled: conf["enabled"].(bool),
732+
}
733+
}
734+
713735
if v, ok := nodeConfig["reservation_affinity"]; ok && len(v.([]interface{})) > 0 {
714736
conf := v.([]interface{})[0].(map[string]interface{})
715737
valuesSet := conf["values"].(*schema.Set)
@@ -1007,6 +1029,7 @@ func flattenNodeConfig(c *container.NodeConfig) []map[string]interface{} {
10071029
"node_group": c.NodeGroup,
10081030
"advanced_machine_features": flattenAdvancedMachineFeaturesConfig(c.AdvancedMachineFeatures),
10091031
"sole_tenant_config": flattenSoleTenantConfig(c.SoleTenantConfig),
1032+
"fast_socket": flattenFastSocket(c.FastSocket),
10101033
})
10111034

10121035
if len(c.OauthScopes) > 0 {
@@ -1197,3 +1220,13 @@ func flattenSoleTenantConfig(c *container.SoleTenantConfig) []map[string]interfa
11971220
"node_affinity": affinities,
11981221
})
11991222
}
1223+
1224+
func flattenFastSocket(c *container.FastSocket) []map[string]interface{} {
1225+
result := []map[string]interface{}{}
1226+
if c != nil {
1227+
result = append(result, map[string]interface{}{
1228+
"enabled": c.Enabled,
1229+
})
1230+
}
1231+
return result
1232+
}

google/services/container/resource_container_node_pool.go

+34
Original file line numberDiff line numberDiff line change
@@ -1518,7 +1518,41 @@ func nodePoolUpdate(d *schema.ResourceData, meta interface{}, nodePoolInfo *Node
15181518

15191519
log.Printf("[INFO] Updated linux_node_config for node pool %s", name)
15201520
}
1521+
if d.HasChange(prefix + "node_config.0.fast_socket") {
1522+
req := &container.UpdateNodePoolRequest{
1523+
NodePoolId: name,
1524+
FastSocket: &container.FastSocket{},
1525+
}
1526+
if v, ok := d.GetOk(prefix + "node_config.0.fast_socket"); ok {
1527+
fastSocket := v.([]interface{})[0].(map[string]interface{})
1528+
req.FastSocket = &container.FastSocket{
1529+
Enabled: fastSocket["enabled"].(bool),
1530+
}
1531+
}
1532+
updateF := func() error {
1533+
clusterNodePoolsUpdateCall := config.NewContainerClient(userAgent).Projects.Locations.Clusters.NodePools.Update(nodePoolInfo.fullyQualifiedName(name), req)
1534+
if config.UserProjectOverride {
1535+
clusterNodePoolsUpdateCall.Header().Add("X-Goog-User-Project", nodePoolInfo.project)
1536+
}
1537+
op, err := clusterNodePoolsUpdateCall.Do()
1538+
if err != nil {
1539+
return err
1540+
}
1541+
1542+
// Wait until it's updated
1543+
return ContainerOperationWait(config, op,
1544+
nodePoolInfo.project,
1545+
nodePoolInfo.location,
1546+
"updating GKE node pool fast_socket", userAgent,
1547+
timeout)
1548+
}
1549+
1550+
if err := tpgresource.RetryWhileIncompatibleOperation(timeout, npLockKey, updateF); err != nil {
1551+
return err
1552+
}
15211553

1554+
log.Printf("[INFO] Updated fast_socket for node pool %s", name)
1555+
}
15221556
}
15231557

15241558
if d.HasChange(prefix + "node_count") {

google/services/container/resource_container_node_pool_test.go

+72
Original file line numberDiff line numberDiff line change
@@ -1345,6 +1345,78 @@ resource "google_container_node_pool" "np" {
13451345
`, cluster, np)
13461346
}
13471347

1348+
func TestAccContainerNodePool_fastSocket(t *testing.T) {
1349+
t.Parallel()
1350+
1351+
cluster := fmt.Sprintf("tf-test-cluster-%s", acctest.RandString(t, 10))
1352+
np := fmt.Sprintf("tf-test-nodepool-%s", acctest.RandString(t, 10))
1353+
1354+
acctest.VcrTest(t, resource.TestCase{
1355+
PreCheck: func() { acctest.AccTestPreCheck(t) },
1356+
ProtoV5ProviderFactories: acctest.ProtoV5ProviderFactories(t),
1357+
CheckDestroy: testAccCheckContainerNodePoolDestroyProducer(t),
1358+
Steps: []resource.TestStep{
1359+
{
1360+
Config: testAccContainerNodePool_fastSocket(cluster, np, true),
1361+
Check: resource.ComposeTestCheckFunc(
1362+
resource.TestCheckResourceAttr("google_container_node_pool.np",
1363+
"node_config.0.fast_socket.0.enabled", "true"),
1364+
),
1365+
},
1366+
{
1367+
ResourceName: "google_container_node_pool.np",
1368+
ImportState: true,
1369+
ImportStateVerify: true,
1370+
},
1371+
{
1372+
Config: testAccContainerNodePool_fastSocket(cluster, np, false),
1373+
Check: resource.ComposeTestCheckFunc(
1374+
resource.TestCheckResourceAttr("google_container_node_pool.np",
1375+
"node_config.0.fast_socket.0.enabled", "false"),
1376+
),
1377+
},
1378+
{
1379+
ResourceName: "google_container_node_pool.np",
1380+
ImportState: true,
1381+
ImportStateVerify: true,
1382+
},
1383+
},
1384+
})
1385+
}
1386+
1387+
func testAccContainerNodePool_fastSocket(cluster, np string, enabled bool) string {
1388+
return fmt.Sprintf(`
1389+
resource "google_container_cluster" "cluster" {
1390+
name = "%s"
1391+
location = "us-central1-f"
1392+
initial_node_count = 1
1393+
min_master_version = "1.25"
1394+
}
1395+
1396+
resource "google_container_node_pool" "np" {
1397+
name = "%s"
1398+
location = "us-central1-f"
1399+
cluster = google_container_cluster.cluster.name
1400+
initial_node_count = 1
1401+
1402+
node_config {
1403+
machine_type = "n1-standard-8"
1404+
image_type = "COS_CONTAINERD"
1405+
guest_accelerator {
1406+
type = "nvidia-tesla-p100"
1407+
count = 1
1408+
}
1409+
gvnic {
1410+
enabled = true
1411+
}
1412+
fast_socket {
1413+
enabled = %t
1414+
}
1415+
}
1416+
}
1417+
`, cluster, np, enabled)
1418+
}
1419+
13481420
func TestAccContainerNodePool_compactPlacement(t *testing.T) {
13491421
t.Parallel()
13501422

website/docs/r/container_cluster.html.markdown

+8
Original file line numberDiff line numberDiff line change
@@ -795,6 +795,10 @@ ephemeral_storage_local_ssd_config {
795795
local_ssd_count = 2
796796
}
797797
```
798+
* `fast_socket` - (Optional) Parameters for the NCCL Fast Socket feature. If unspecified, NCCL Fast Socket will not be enabled on the node pool.
799+
Node Pool must enable gvnic.
800+
GKE version 1.25.2-gke.1700 or later.
801+
Structure is [documented below](#nested_fast_socket).
798802

799803
* `local_nvme_ssd_block_config` - (Optional) Parameters for the local NVMe SSDs. Structure is [documented below](#nested_local_nvme_ssd_block_config).
800804

@@ -965,6 +969,10 @@ sole_tenant_config {
965969

966970
* `local_ssd_count` (Required) - Number of local SSDs to use to back ephemeral storage. Uses NVMe interfaces. Each local SSD is 375 GB in size. If zero, it means to disable using local SSDs as ephemeral storage.
967971

972+
<a name="nasted_fast_socket"></a>The `fast_socket` block supports:
973+
974+
* `enabled` (Required) - Whether or not the NCCL Fast Socket is enabled
975+
968976
<a name="nested_local_nvme_ssd_block_config"></a>The `local_nvme_ssd_block_config` block supports:
969977

970978
* `local_ssd_count` (Required) - Number of raw-block local NVMe SSD disks to be attached to the node. Each local SSD is 375 GB in size. If zero, it means no raw-block local NVMe SSD disks to be attached to the node.

0 commit comments

Comments
 (0)