1
+ data "google_project" "project" {
2
+ }
3
+
4
+ data "google_storage_project_service_account" "gcs_account" {
5
+ }
6
+
7
+ resource "google_dataproc_batch" "{{$.PrimaryResourceId}}" {
8
+ batch_id = "{{index $.Vars "dataproc_batch"}}"
9
+ location = "us-central1"
10
+ labels = {"batch_test": "terraform"}
11
+
12
+ runtime_config {
13
+ properties = { "spark.dynamicAllocation.enabled": "false", "spark.executor.instances": "2" }
14
+ version = "2.2"
15
+ }
16
+
17
+ environment_config {
18
+ execution_config {
19
+ ttl = "3600s"
20
+ network_tags = ["tag1"]
21
+ kms_key = google_kms_crypto_key.crypto_key.id
22
+ network_uri = "default"
23
+ service_account = "${data.google_project.project.number}
[email protected] "
24
+ staging_bucket = google_storage_bucket.bucket.name
25
+ }
26
+ peripherals_config {
27
+ metastore_service = google_dataproc_metastore_service.ms.name
28
+ spark_history_server_config {
29
+ dataproc_cluster = google_dataproc_cluster.basic.id
30
+ }
31
+ }
32
+ }
33
+
34
+ spark_batch {
35
+ main_class = "org.apache.spark.examples.SparkPi"
36
+ args = ["10"]
37
+ jar_file_uris = ["file:///usr/lib/spark/examples/jars/spark-examples.jar"]
38
+ }
39
+
40
+ depends_on = [
41
+ google_kms_crypto_key_iam_member.crypto_key_member_1,
42
+ ]
43
+ }
44
+
45
+ resource "google_storage_bucket" "bucket" {
46
+ uniform_bucket_level_access = true
47
+ name = "{{index $.Vars "bucket_name"}}"
48
+ location = "US"
49
+ force_destroy = true
50
+ }
51
+
52
+ resource "google_kms_crypto_key" "crypto_key" {
53
+ name = "{{index $.Vars "key_name"}}"
54
+ key_ring = google_kms_key_ring.key_ring.id
55
+ purpose = "ENCRYPT_DECRYPT"
56
+ }
57
+
58
+ resource "google_kms_key_ring" "key_ring" {
59
+ name = "{{index $.Vars "keyring_name"}}"
60
+ location = "us-central1"
61
+ }
62
+
63
+ resource "google_kms_crypto_key_iam_member" "crypto_key_member_1" {
64
+ crypto_key_id = google_kms_crypto_key.crypto_key.id
65
+ role = "roles/cloudkms.cryptoKeyEncrypterDecrypter"
66
+ member = "serviceAccount:service-${data.google_project.project.number}@dataproc-accounts.iam.gserviceaccount.com"
67
+ }
68
+
69
+ resource "google_dataproc_cluster" "basic" {
70
+ name = "{{index $.Vars "dataproc_batch"}}"
71
+ region = "us-central1"
72
+
73
+ cluster_config {
74
+ # Keep the costs down with smallest config we can get away with
75
+ software_config {
76
+ override_properties = {
77
+ "dataproc:dataproc.allow.zero.workers" = "true"
78
+ "spark:spark.history.fs.logDirectory" = "gs://${google_storage_bucket.bucket.name}/*/spark-job-history"
79
+ }
80
+ }
81
+
82
+ endpoint_config {
83
+ enable_http_port_access = true
84
+ }
85
+
86
+ master_config {
87
+ num_instances = 1
88
+ machine_type = "e2-standard-2"
89
+ disk_config {
90
+ boot_disk_size_gb = 35
91
+ }
92
+ }
93
+
94
+ metastore_config {
95
+ dataproc_metastore_service = google_dataproc_metastore_service.ms.name
96
+ }
97
+ }
98
+ }
99
+
100
+ resource "google_dataproc_metastore_service" "ms" {
101
+ service_id = "{{index $.Vars "dataproc_batch"}}"
102
+ location = "us-central1"
103
+ port = 9080
104
+ tier = "DEVELOPER"
105
+
106
+ maintenance_window {
107
+ hour_of_day = 2
108
+ day_of_week = "SUNDAY"
109
+ }
110
+
111
+ hive_metastore_config {
112
+ version = "3.1.2"
113
+ }
114
+ }
0 commit comments