Add resource Dataproc batch (GoogleCloudPlatform#11750)

zli82016 · anavada · commit a7df7d309b10 · 2024-09-30T22:56:10.000Z
diff --git a/mmv1/products/dataproc/Batch.yaml b/mmv1/products/dataproc/Batch.yaml
diff --git a/mmv1/products/dataproc/product.yaml b/mmv1/products/dataproc/product.yaml
@@ -17,7 +17,5 @@ display_name: 'Dataproc'
 versions:
   - name: 'ga'
     base_url: 'https://dataproc.googleapis.com/v1/'
-  - name: 'beta'
-    base_url: 'https://dataproc.googleapis.com/v1beta2/'
 scopes:
   - 'https://www.googleapis.com/auth/cloud-identity'
diff --git a/mmv1/templates/terraform/constants/cloud_dataproc_batch.go.tmpl b/mmv1/templates/terraform/constants/cloud_dataproc_batch.go.tmpl
@@ -0,0 +1,15 @@
+/*
+ * Dataproc Batch api apends subminor version to the provided
+ * version. We are suppressing this server generated subminor.
+ */
+func CloudDataprocBatchRuntimeConfigVersionDiffSuppressFunc(old, new string) bool {
+	if old != "" && strings.HasPrefix(new, old) || (new != "" && strings.HasPrefix(old, new)) {
+		return true
+	}
+
+	return old == new
+}
+
+func CloudDataprocBatchRuntimeConfigVersionDiffSuppress(_, old, new string, d *schema.ResourceData) bool {
+	return CloudDataprocBatchRuntimeConfigVersionDiffSuppressFunc(old, new)
+}
diff --git a/mmv1/templates/terraform/decoders/cloud_dataproc_batch.go.tmpl b/mmv1/templates/terraform/decoders/cloud_dataproc_batch.go.tmpl
@@ -0,0 +1,37 @@
+{{/*
+	The license inside this block applies to this file
+	Copyright 2024 Google Inc.
+	Licensed under the Apache License, Version 2.0 (the "License");
+	you may not use this file except in compliance with the License.
+	You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
+	Unless required by applicable law or agreed to in writing, software
+	distributed under the License is distributed on an "AS IS" BASIS,
+	WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	See the License for the specific language governing permissions and
+	limitations under the License.
+*/ -}}
+if obj1, ok := res["runtimeConfig"]; ok {
+    if rconfig, ok :=  obj1.(map[string]interface{}); ok {
+       if obj2, ok := rconfig["properties"]; ok {
+           if properties, ok :=  obj2.(map[string]interface{}); ok {
+               // Update effective_properties to include both server set and client set properties
+               propertiesCopy := make(map[string]interface{})
+               for k, v := range properties {
+                  propertiesCopy[k] = v
+               }
+               rconfig["effectiveProperties"] = propertiesCopy
+
+               // Update properties back to original client set properties
+               originalPropertiesCopy := make(map[string]interface{})
+               originalProperties := d.Get("runtime_config.0.properties").(interface{}).(map[string]interface{})
+               for k, v := range originalProperties {
+                   originalPropertiesCopy[k] = v
+               }
+               rconfig["properties"] = originalPropertiesCopy
+               return res, nil
+           }
+        }
+    }
+}
+
+return res, nil
diff --git a/mmv1/templates/terraform/examples/dataproc_batch_pyspark.tf.tmpl b/mmv1/templates/terraform/examples/dataproc_batch_pyspark.tf.tmpl
@@ -0,0 +1,28 @@
+resource "google_dataproc_batch" "{{$.PrimaryResourceId}}" {
+    batch_id      = "tf-test-batch%{random_suffix}"
+    location      = "us-central1"
+
+    runtime_config {
+      properties    = { "spark.dynamicAllocation.enabled": "false", "spark.executor.instances": "2" }
+    }
+
+    environment_config {
+      execution_config {
+        subnetwork_uri = "{{index $.Vars "subnetwork_name"}}"
+      }
+    }
+
+    pyspark_batch {
+      main_python_file_uri = "https://storage.googleapis.com/terraform-batches/test_util.py"
+      args                 = ["10"]
+      jar_file_uris        = ["file:///usr/lib/spark/examples/jars/spark-examples.jar"]
+      python_file_uris     = ["gs://dataproc-examples/pyspark/hello-world/hello-world.py"]
+      archive_uris         = [
+        "https://storage.googleapis.com/terraform-batches/animals.txt.tar.gz#unpacked",
+        "https://storage.googleapis.com/terraform-batches/animals.txt.jar",
+        "https://storage.googleapis.com/terraform-batches/animals.txt"
+      ]
+      file_uris            = ["https://storage.googleapis.com/terraform-batches/people.txt"]
+    }
+}
+
diff --git a/mmv1/templates/terraform/examples/dataproc_batch_spark.tf.tmpl b/mmv1/templates/terraform/examples/dataproc_batch_spark.tf.tmpl
@@ -0,0 +1,25 @@
+resource "google_dataproc_batch" "{{$.PrimaryResourceId}}" {
+
+    batch_id      = "tf-test-batch%{random_suffix}"
+    location      = "us-central1"
+    labels        = {"batch_test": "terraform"}
+
+    runtime_config {
+      properties    = { "spark.dynamicAllocation.enabled": "false", "spark.executor.instances": "2" }
+    }
+
+    environment_config {
+      execution_config {
+        subnetwork_uri = "{{index $.Vars "subnetwork_name"}}"
+        ttl            = "3600s"
+        network_tags   = ["tag1"]
+      }
+    }
+
+    spark_batch {
+      main_class    = "org.apache.spark.examples.SparkPi"
+      args          = ["10"]
+      jar_file_uris = ["file:///usr/lib/spark/examples/jars/spark-examples.jar"]
+    }
+}
+
diff --git a/mmv1/templates/terraform/examples/dataproc_batch_spark_full.tf.tmpl b/mmv1/templates/terraform/examples/dataproc_batch_spark_full.tf.tmpl
@@ -0,0 +1,114 @@
+data "google_project" "project" {
+}
+
+data "google_storage_project_service_account" "gcs_account" {
+}
+
+resource "google_dataproc_batch" "{{$.PrimaryResourceId}}" {
+    batch_id      = "{{index $.Vars "dataproc_batch"}}"
+    location      = "us-central1"
+    labels        = {"batch_test": "terraform"}
+
+    runtime_config {
+      properties    = { "spark.dynamicAllocation.enabled": "false", "spark.executor.instances": "2" }
+      version = "2.2"
+    }
+
+    environment_config {
+      execution_config {
+        ttl = "3600s"
+        network_tags = ["tag1"]
+        kms_key = google_kms_crypto_key.crypto_key.id
+        network_uri = "default"
+        service_account = "${data.google_project.project.number}-compute@developer.gserviceaccount.com"
+        staging_bucket = google_storage_bucket.bucket.name
+      }
+      peripherals_config {
+        metastore_service = google_dataproc_metastore_service.ms.name
+        spark_history_server_config {
+          dataproc_cluster = google_dataproc_cluster.basic.id
+        }
+      }
+    }
+
+    spark_batch {
+      main_class    = "org.apache.spark.examples.SparkPi"
+      args          = ["10"]
+      jar_file_uris = ["file:///usr/lib/spark/examples/jars/spark-examples.jar"]
+    }
+
+    depends_on = [
+      google_kms_crypto_key_iam_member.crypto_key_member_1,
+    ]
+}
+
+resource "google_storage_bucket" "bucket" {
+  uniform_bucket_level_access = true
+  name                        = "{{index $.Vars "bucket_name"}}"
+  location                    = "US"
+  force_destroy               = true
+}
+
+resource "google_kms_crypto_key" "crypto_key" {
+  name     = "{{index $.Vars "key_name"}}"
+  key_ring = google_kms_key_ring.key_ring.id
+  purpose  = "ENCRYPT_DECRYPT"
+}
+
+resource "google_kms_key_ring" "key_ring" {
+  name     = "{{index $.Vars "keyring_name"}}"
+  location = "us-central1"
+}
+
+resource "google_kms_crypto_key_iam_member" "crypto_key_member_1" {
+  crypto_key_id = google_kms_crypto_key.crypto_key.id
+  role          = "roles/cloudkms.cryptoKeyEncrypterDecrypter"
+  member        = "serviceAccount:service-${data.google_project.project.number}@dataproc-accounts.iam.gserviceaccount.com"
+}
+
+resource "google_dataproc_cluster" "basic" {
+  name   = "{{index $.Vars "dataproc_batch"}}"
+  region = "us-central1"
+
+  cluster_config {
+    # Keep the costs down with smallest config we can get away with
+    software_config {
+      override_properties = {
+        "dataproc:dataproc.allow.zero.workers" = "true"
+        "spark:spark.history.fs.logDirectory"  = "gs://${google_storage_bucket.bucket.name}/*/spark-job-history"
+      }
+    }
+ 
+    endpoint_config {
+      enable_http_port_access = true
+    }
+
+    master_config {
+      num_instances = 1
+      machine_type  = "e2-standard-2"
+      disk_config {
+        boot_disk_size_gb = 35
+      }
+    }
+
+    metastore_config {
+      dataproc_metastore_service = google_dataproc_metastore_service.ms.name
+    }
+  }   
+}
+
+ resource "google_dataproc_metastore_service" "ms" {
+  service_id = "{{index $.Vars "dataproc_batch"}}"
+  location   = "us-central1"
+  port       = 9080
+  tier       = "DEVELOPER"
+
+  maintenance_window {
+    hour_of_day = 2
+    day_of_week = "SUNDAY"
+  }
+
+  hive_metastore_config {
+    version = "3.1.2"
+  }
+}
diff --git a/mmv1/templates/terraform/examples/dataproc_batch_sparkr.tf.tmpl b/mmv1/templates/terraform/examples/dataproc_batch_sparkr.tf.tmpl
@@ -0,0 +1,24 @@
+resource "google_dataproc_batch" "{{$.PrimaryResourceId}}" {
+
+    batch_id      = "tf-test-batch%{random_suffix}"
+    location      = "us-central1"
+    labels        = {"batch_test": "terraform"}
+
+    runtime_config {
+      properties    = { "spark.dynamicAllocation.enabled": "false", "spark.executor.instances": "2" }
+    }
+
+    environment_config {
+      execution_config {
+        subnetwork_uri = "{{index $.Vars "subnetwork_name"}}"
+        ttl            = "3600s"
+        network_tags   = ["tag1"]
+      }
+    }
+
+    spark_r_batch {
+      main_r_file_uri  = "https://storage.googleapis.com/terraform-batches/spark-r-flights.r"
+      args             = ["https://storage.googleapis.com/terraform-batches/flights.csv"]
+    }
+}
+
diff --git a/mmv1/templates/terraform/examples/dataproc_batch_sparksql.tf.tmpl b/mmv1/templates/terraform/examples/dataproc_batch_sparksql.tf.tmpl
@@ -0,0 +1,24 @@
+resource "google_dataproc_batch" "{{$.PrimaryResourceId}}" {
+
+    batch_id      = "tf-test-batch%{random_suffix}"
+    location      = "us-central1"
+
+    runtime_config {
+      properties    = { "spark.dynamicAllocation.enabled": "false", "spark.executor.instances": "2" }
+    }
+
+    environment_config {
+      execution_config {
+        subnetwork_uri = "{{index $.Vars "subnetwork_name"}}"
+      }
+    }
+
+    spark_sql_batch {
+      query_file_uri   = "gs://dataproc-examples/spark-sql/natality/cigarette_correlations.sql"
+      jar_file_uris    = ["file:///usr/lib/spark/examples/jars/spark-examples.jar"]
+      query_variables  = {
+        name = "value"
+      }
+    }
+}
+
diff --git a/mmv1/third_party/terraform/acctest/bootstrap_test_utils.go b/mmv1/third_party/terraform/acctest/bootstrap_test_utils.go
@@ -4,6 +4,7 @@ import (
 	"context"
 	"fmt"
 	"log"
+	"maps"
 	"os"
 	"strings"
 	"testing"
@@ -910,7 +911,25 @@ func BootstrapSharedCaPoolInLocation(t *testing.T, location string) string {
 	return poolName
 }
 
+func BootstrapSubnetForDataprocBatches(t *testing.T, subnetName string, networkName string) string {
+	subnetOptions := map[string]interface{}{
+		"privateIpGoogleAccess": true,
+	}
+	return BootstrapSubnetWithOverrides(t, subnetName, networkName, subnetOptions)
+}
+
 func BootstrapSubnet(t *testing.T, subnetName string, networkName string) string {
+	return BootstrapSubnetWithOverrides(t, subnetName, networkName, make(map[string]interface{}))
+}
+
+func BootstrapSubnetWithFirewallForDataprocBatches(t *testing.T, testId string, subnetName string) string {
+	networkName := BootstrapSharedTestNetwork(t, testId)
+	subnetworkName := BootstrapSubnetForDataprocBatches(t, subnetName, networkName)
+	BootstrapFirewallForDataprocSharedNetwork(t, subnetName, networkName)
+	return subnetworkName
+}
+
+func BootstrapSubnetWithOverrides(t *testing.T, subnetName string, networkName string, subnetOptions map[string]interface{}) string {
 	projectID := envvar.GetTestProjectFromEnv()
 	region := envvar.GetTestRegionFromEnv()
 
@@ -932,20 +951,24 @@ func BootstrapSubnet(t *testing.T, subnetName string, networkName string) string
 		networkUrl := fmt.Sprintf("%sprojects/%s/global/networks/%s", config.ComputeBasePath, projectID, networkName)
 		url := fmt.Sprintf("%sprojects/%s/regions/%s/subnetworks", config.ComputeBasePath, projectID, region)
 
-		subnetObj := map[string]interface{}{
+		defaultSubnetObj := map[string]interface{}{
 			"name":        subnetName,
 			"region ":     region,
 			"network":     networkUrl,
 			"ipCidrRange": "10.77.0.0/20",
 		}
 
+		if len(subnetOptions) != 0 {
+			maps.Copy(defaultSubnetObj, subnetOptions)
+		}
+
 		res, err := transport_tpg.SendRequest(transport_tpg.SendRequestOptions{
 			Config:    config,
 			Method:    "POST",
 			Project:   projectID,
 			RawURL:    url,
 			UserAgent: config.UserAgent,
-			Body:      subnetObj,
+			Body:      defaultSubnetObj,
 			Timeout:   4 * time.Minute,
 		})
 
diff --git a/mmv1/third_party/terraform/services/dataproc/resource_dataproc_batch_test.go b/mmv1/third_party/terraform/services/dataproc/resource_dataproc_batch_test.go