Skip to content

Commit a7df7d3

Browse files
zli82016anavada
authored andcommitted
Add resource Dataproc batch (GoogleCloudPlatform#11750)
1 parent 3d433da commit a7df7d3

11 files changed

+890
-4
lines changed

mmv1/products/dataproc/Batch.yaml

+539
Large diffs are not rendered by default.

mmv1/products/dataproc/product.yaml

-2
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,5 @@ display_name: 'Dataproc'
1717
versions:
1818
- name: 'ga'
1919
base_url: 'https://dataproc.googleapis.com/v1/'
20-
- name: 'beta'
21-
base_url: 'https://dataproc.googleapis.com/v1beta2/'
2220
scopes:
2321
- 'https://www.googleapis.com/auth/cloud-identity'
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
/*
2+
* Dataproc Batch api apends subminor version to the provided
3+
* version. We are suppressing this server generated subminor.
4+
*/
5+
func CloudDataprocBatchRuntimeConfigVersionDiffSuppressFunc(old, new string) bool {
6+
if old != "" && strings.HasPrefix(new, old) || (new != "" && strings.HasPrefix(old, new)) {
7+
return true
8+
}
9+
10+
return old == new
11+
}
12+
13+
func CloudDataprocBatchRuntimeConfigVersionDiffSuppress(_, old, new string, d *schema.ResourceData) bool {
14+
return CloudDataprocBatchRuntimeConfigVersionDiffSuppressFunc(old, new)
15+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
{{/*
2+
The license inside this block applies to this file
3+
Copyright 2024 Google Inc.
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
7+
Unless required by applicable law or agreed to in writing, software
8+
distributed under the License is distributed on an "AS IS" BASIS,
9+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10+
See the License for the specific language governing permissions and
11+
limitations under the License.
12+
*/ -}}
13+
if obj1, ok := res["runtimeConfig"]; ok {
14+
if rconfig, ok := obj1.(map[string]interface{}); ok {
15+
if obj2, ok := rconfig["properties"]; ok {
16+
if properties, ok := obj2.(map[string]interface{}); ok {
17+
// Update effective_properties to include both server set and client set properties
18+
propertiesCopy := make(map[string]interface{})
19+
for k, v := range properties {
20+
propertiesCopy[k] = v
21+
}
22+
rconfig["effectiveProperties"] = propertiesCopy
23+
24+
// Update properties back to original client set properties
25+
originalPropertiesCopy := make(map[string]interface{})
26+
originalProperties := d.Get("runtime_config.0.properties").(interface{}).(map[string]interface{})
27+
for k, v := range originalProperties {
28+
originalPropertiesCopy[k] = v
29+
}
30+
rconfig["properties"] = originalPropertiesCopy
31+
return res, nil
32+
}
33+
}
34+
}
35+
}
36+
37+
return res, nil
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
resource "google_dataproc_batch" "{{$.PrimaryResourceId}}" {
2+
batch_id = "tf-test-batch%{random_suffix}"
3+
location = "us-central1"
4+
5+
runtime_config {
6+
properties = { "spark.dynamicAllocation.enabled": "false", "spark.executor.instances": "2" }
7+
}
8+
9+
environment_config {
10+
execution_config {
11+
subnetwork_uri = "{{index $.Vars "subnetwork_name"}}"
12+
}
13+
}
14+
15+
pyspark_batch {
16+
main_python_file_uri = "https://storage.googleapis.com/terraform-batches/test_util.py"
17+
args = ["10"]
18+
jar_file_uris = ["file:///usr/lib/spark/examples/jars/spark-examples.jar"]
19+
python_file_uris = ["gs://dataproc-examples/pyspark/hello-world/hello-world.py"]
20+
archive_uris = [
21+
"https://storage.googleapis.com/terraform-batches/animals.txt.tar.gz#unpacked",
22+
"https://storage.googleapis.com/terraform-batches/animals.txt.jar",
23+
"https://storage.googleapis.com/terraform-batches/animals.txt"
24+
]
25+
file_uris = ["https://storage.googleapis.com/terraform-batches/people.txt"]
26+
}
27+
}
28+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
resource "google_dataproc_batch" "{{$.PrimaryResourceId}}" {
2+
3+
batch_id = "tf-test-batch%{random_suffix}"
4+
location = "us-central1"
5+
labels = {"batch_test": "terraform"}
6+
7+
runtime_config {
8+
properties = { "spark.dynamicAllocation.enabled": "false", "spark.executor.instances": "2" }
9+
}
10+
11+
environment_config {
12+
execution_config {
13+
subnetwork_uri = "{{index $.Vars "subnetwork_name"}}"
14+
ttl = "3600s"
15+
network_tags = ["tag1"]
16+
}
17+
}
18+
19+
spark_batch {
20+
main_class = "org.apache.spark.examples.SparkPi"
21+
args = ["10"]
22+
jar_file_uris = ["file:///usr/lib/spark/examples/jars/spark-examples.jar"]
23+
}
24+
}
25+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
data "google_project" "project" {
2+
}
3+
4+
data "google_storage_project_service_account" "gcs_account" {
5+
}
6+
7+
resource "google_dataproc_batch" "{{$.PrimaryResourceId}}" {
8+
batch_id = "{{index $.Vars "dataproc_batch"}}"
9+
location = "us-central1"
10+
labels = {"batch_test": "terraform"}
11+
12+
runtime_config {
13+
properties = { "spark.dynamicAllocation.enabled": "false", "spark.executor.instances": "2" }
14+
version = "2.2"
15+
}
16+
17+
environment_config {
18+
execution_config {
19+
ttl = "3600s"
20+
network_tags = ["tag1"]
21+
kms_key = google_kms_crypto_key.crypto_key.id
22+
network_uri = "default"
23+
service_account = "${data.google_project.project.number}[email protected]"
24+
staging_bucket = google_storage_bucket.bucket.name
25+
}
26+
peripherals_config {
27+
metastore_service = google_dataproc_metastore_service.ms.name
28+
spark_history_server_config {
29+
dataproc_cluster = google_dataproc_cluster.basic.id
30+
}
31+
}
32+
}
33+
34+
spark_batch {
35+
main_class = "org.apache.spark.examples.SparkPi"
36+
args = ["10"]
37+
jar_file_uris = ["file:///usr/lib/spark/examples/jars/spark-examples.jar"]
38+
}
39+
40+
depends_on = [
41+
google_kms_crypto_key_iam_member.crypto_key_member_1,
42+
]
43+
}
44+
45+
resource "google_storage_bucket" "bucket" {
46+
uniform_bucket_level_access = true
47+
name = "{{index $.Vars "bucket_name"}}"
48+
location = "US"
49+
force_destroy = true
50+
}
51+
52+
resource "google_kms_crypto_key" "crypto_key" {
53+
name = "{{index $.Vars "key_name"}}"
54+
key_ring = google_kms_key_ring.key_ring.id
55+
purpose = "ENCRYPT_DECRYPT"
56+
}
57+
58+
resource "google_kms_key_ring" "key_ring" {
59+
name = "{{index $.Vars "keyring_name"}}"
60+
location = "us-central1"
61+
}
62+
63+
resource "google_kms_crypto_key_iam_member" "crypto_key_member_1" {
64+
crypto_key_id = google_kms_crypto_key.crypto_key.id
65+
role = "roles/cloudkms.cryptoKeyEncrypterDecrypter"
66+
member = "serviceAccount:service-${data.google_project.project.number}@dataproc-accounts.iam.gserviceaccount.com"
67+
}
68+
69+
resource "google_dataproc_cluster" "basic" {
70+
name = "{{index $.Vars "dataproc_batch"}}"
71+
region = "us-central1"
72+
73+
cluster_config {
74+
# Keep the costs down with smallest config we can get away with
75+
software_config {
76+
override_properties = {
77+
"dataproc:dataproc.allow.zero.workers" = "true"
78+
"spark:spark.history.fs.logDirectory" = "gs://${google_storage_bucket.bucket.name}/*/spark-job-history"
79+
}
80+
}
81+
82+
endpoint_config {
83+
enable_http_port_access = true
84+
}
85+
86+
master_config {
87+
num_instances = 1
88+
machine_type = "e2-standard-2"
89+
disk_config {
90+
boot_disk_size_gb = 35
91+
}
92+
}
93+
94+
metastore_config {
95+
dataproc_metastore_service = google_dataproc_metastore_service.ms.name
96+
}
97+
}
98+
}
99+
100+
resource "google_dataproc_metastore_service" "ms" {
101+
service_id = "{{index $.Vars "dataproc_batch"}}"
102+
location = "us-central1"
103+
port = 9080
104+
tier = "DEVELOPER"
105+
106+
maintenance_window {
107+
hour_of_day = 2
108+
day_of_week = "SUNDAY"
109+
}
110+
111+
hive_metastore_config {
112+
version = "3.1.2"
113+
}
114+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
resource "google_dataproc_batch" "{{$.PrimaryResourceId}}" {
2+
3+
batch_id = "tf-test-batch%{random_suffix}"
4+
location = "us-central1"
5+
labels = {"batch_test": "terraform"}
6+
7+
runtime_config {
8+
properties = { "spark.dynamicAllocation.enabled": "false", "spark.executor.instances": "2" }
9+
}
10+
11+
environment_config {
12+
execution_config {
13+
subnetwork_uri = "{{index $.Vars "subnetwork_name"}}"
14+
ttl = "3600s"
15+
network_tags = ["tag1"]
16+
}
17+
}
18+
19+
spark_r_batch {
20+
main_r_file_uri = "https://storage.googleapis.com/terraform-batches/spark-r-flights.r"
21+
args = ["https://storage.googleapis.com/terraform-batches/flights.csv"]
22+
}
23+
}
24+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
resource "google_dataproc_batch" "{{$.PrimaryResourceId}}" {
2+
3+
batch_id = "tf-test-batch%{random_suffix}"
4+
location = "us-central1"
5+
6+
runtime_config {
7+
properties = { "spark.dynamicAllocation.enabled": "false", "spark.executor.instances": "2" }
8+
}
9+
10+
environment_config {
11+
execution_config {
12+
subnetwork_uri = "{{index $.Vars "subnetwork_name"}}"
13+
}
14+
}
15+
16+
spark_sql_batch {
17+
query_file_uri = "gs://dataproc-examples/spark-sql/natality/cigarette_correlations.sql"
18+
jar_file_uris = ["file:///usr/lib/spark/examples/jars/spark-examples.jar"]
19+
query_variables = {
20+
name = "value"
21+
}
22+
}
23+
}
24+

mmv1/third_party/terraform/acctest/bootstrap_test_utils.go

+25-2
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import (
44
"context"
55
"fmt"
66
"log"
7+
"maps"
78
"os"
89
"strings"
910
"testing"
@@ -910,7 +911,25 @@ func BootstrapSharedCaPoolInLocation(t *testing.T, location string) string {
910911
return poolName
911912
}
912913

914+
func BootstrapSubnetForDataprocBatches(t *testing.T, subnetName string, networkName string) string {
915+
subnetOptions := map[string]interface{}{
916+
"privateIpGoogleAccess": true,
917+
}
918+
return BootstrapSubnetWithOverrides(t, subnetName, networkName, subnetOptions)
919+
}
920+
913921
func BootstrapSubnet(t *testing.T, subnetName string, networkName string) string {
922+
return BootstrapSubnetWithOverrides(t, subnetName, networkName, make(map[string]interface{}))
923+
}
924+
925+
func BootstrapSubnetWithFirewallForDataprocBatches(t *testing.T, testId string, subnetName string) string {
926+
networkName := BootstrapSharedTestNetwork(t, testId)
927+
subnetworkName := BootstrapSubnetForDataprocBatches(t, subnetName, networkName)
928+
BootstrapFirewallForDataprocSharedNetwork(t, subnetName, networkName)
929+
return subnetworkName
930+
}
931+
932+
func BootstrapSubnetWithOverrides(t *testing.T, subnetName string, networkName string, subnetOptions map[string]interface{}) string {
914933
projectID := envvar.GetTestProjectFromEnv()
915934
region := envvar.GetTestRegionFromEnv()
916935

@@ -932,20 +951,24 @@ func BootstrapSubnet(t *testing.T, subnetName string, networkName string) string
932951
networkUrl := fmt.Sprintf("%sprojects/%s/global/networks/%s", config.ComputeBasePath, projectID, networkName)
933952
url := fmt.Sprintf("%sprojects/%s/regions/%s/subnetworks", config.ComputeBasePath, projectID, region)
934953

935-
subnetObj := map[string]interface{}{
954+
defaultSubnetObj := map[string]interface{}{
936955
"name": subnetName,
937956
"region ": region,
938957
"network": networkUrl,
939958
"ipCidrRange": "10.77.0.0/20",
940959
}
941960

961+
if len(subnetOptions) != 0 {
962+
maps.Copy(defaultSubnetObj, subnetOptions)
963+
}
964+
942965
res, err := transport_tpg.SendRequest(transport_tpg.SendRequestOptions{
943966
Config: config,
944967
Method: "POST",
945968
Project: projectID,
946969
RawURL: url,
947970
UserAgent: config.UserAgent,
948-
Body: subnetObj,
971+
Body: defaultSubnetObj,
949972
Timeout: 4 * time.Minute,
950973
})
951974

0 commit comments

Comments
 (0)