Skip to content

Commit 19d251d

Browse files
author
Jerry Ding
committed
This PR adds support for the Dataproc on GDC SparkApplication resource to the provider.
```release-note:new-resource `google_dataproc_gdc_spark_application` ```
1 parent f6544a1 commit 19d251d

6 files changed

+393
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,302 @@
1+
name: SparkApplication
2+
description: A Spark application is a single Spark workload run on a GDC cluster.
3+
guides:
4+
'Dataproc Intro': 'https://cloud.google.com/dataproc/'
5+
api: 'https://cloud.google.com/dataproc-gdc/docs/reference/rest/v1/projects.locations.serviceInstances.sparkApplications'
6+
parameters:
7+
- name: location
8+
type: String
9+
description: 'The location of the spark application.'
10+
url_param_only: true
11+
required: true
12+
immutable: true
13+
- name: serviceinstance
14+
type: String
15+
description: 'The id of the service instance to which this spark application belongs.'
16+
url_param_only: true
17+
required: true
18+
immutable: true
19+
- name: sparkApplicationId
20+
type: String
21+
description: 'The id of the application '
22+
url_param_only: true
23+
required: true
24+
immutable: true
25+
async:
26+
actions: ['create', 'delete']
27+
type: OpAsync
28+
operation:
29+
base_url: "{{op_id}}"
30+
examples:
31+
- name: "dataprocgdc_sparkapplication_basic"
32+
primary_resource_id: "spark-application"
33+
vars:
34+
spark_application_id: "tf-e2e-spark-app-basic"
35+
project: "my-project"
36+
test_vars_overrides:
37+
'project': '"gdce-cluster-monitoring"'
38+
- name: "dataprocgdc_sparkapplication"
39+
primary_resource_id: "spark-application"
40+
vars:
41+
spark_application_id: "tf-e2e-spark-app"
42+
application_environment_id: "tf-e2e-spark-app-env"
43+
project: "my-project"
44+
test_vars_overrides:
45+
'project': '"gdce-cluster-monitoring"'
46+
- name: "dataprocgdc_sparkapplication_pyspark"
47+
primary_resource_id: "spark-application"
48+
vars:
49+
spark_application_id: "tf-e2e-pyspark-app"
50+
project: "my-project"
51+
test_vars_overrides:
52+
'project': '"gdce-cluster-monitoring"'
53+
- name: "dataprocgdc_sparkapplication_sparkr"
54+
primary_resource_id: "spark-application"
55+
vars:
56+
spark_application_id: "tf-e2e-sparkr-app"
57+
project: "my-project"
58+
test_vars_overrides:
59+
'project': '"gdce-cluster-monitoring"'
60+
- name: "dataprocgdc_sparkapplication_sparksql"
61+
primary_resource_id: "spark-application"
62+
vars:
63+
spark_application_id: "tf-e2e-sparksql-app"
64+
project: "my-project"
65+
test_vars_overrides:
66+
'project': '"gdce-cluster-monitoring"'
67+
base_url: projects/{{project}}/locations/{{location}}/serviceInstances/{{serviceinstance}}/sparkApplications
68+
create_url: projects/{{project}}/locations/{{location}}/serviceInstances/{{serviceinstance}}/sparkApplications?sparkApplicationId={{spark_application_id}}
69+
self_link: projects/{{project}}/locations/{{location}}/serviceInstances/{{serviceinstance}}/sparkApplications/{{spark_application_id}}
70+
id_format: projects/{{project}}/locations/{{location}}/serviceInstances/{{serviceinstance}}/sparkApplications/{{spark_application_id}}
71+
import_format:
72+
- projects/{{project}}/locations/{{location}}/serviceInstances/{{serviceinstance}}/sparkApplications/{{spark_application_id}}
73+
update_verb: PATCH
74+
update_mask: true
75+
autogen_async: true
76+
properties:
77+
- name: pysparkApplicationConfig
78+
type: NestedObject
79+
properties:
80+
- name: mainPythonFileUri
81+
type: String
82+
description: "The HCFS URI of the main Python file to use as the driver.
83+
Must be a .py file. "
84+
required: true
85+
- name: args
86+
type: Array
87+
item_type:
88+
type: String
89+
description: "The arguments to pass to the driver. Do not include arguments, such
90+
as `--conf`, that can be set as job properties, since a collision may occur
91+
that causes an incorrect job submission. "
92+
- name: pythonFileUris
93+
type: Array
94+
item_type:
95+
type: String
96+
description: "HCFS file URIs of Python files to pass to the PySpark framework.
97+
Supported file types: .py, .egg, and .zip. "
98+
- name: jarFileUris
99+
type: Array
100+
item_type:
101+
type: String
102+
description: "HCFS URIs of jar files to add to the CLASSPATHs of the Python
103+
driver and tasks. "
104+
- name: fileUris
105+
type: Array
106+
item_type:
107+
type: String
108+
description: "HCFS URIs of files to be placed in the working directory
109+
of each executor. Useful for naively parallel tasks. "
110+
- name: archiveUris
111+
type: Array
112+
item_type:
113+
type: String
114+
description: "HCFS URIs of archives to be extracted into the working
115+
directory of each executor. Supported file types: .jar, .tar, .tar.gz, .tgz,
116+
and .zip. "
117+
description: 'Represents the PySparkApplicationConfig. '
118+
- name: sparkApplicationConfig
119+
type: NestedObject
120+
properties:
121+
- name: mainJarFileUri
122+
type: String
123+
description: 'The HCFS URI of the jar file that contains the main class. '
124+
- name: mainClass
125+
type: String
126+
description: "The name of the driver main class. The jar file that contains the
127+
class must be in the classpath or specified in `jar_file_uris`. "
128+
- name: args
129+
type: Array
130+
item_type:
131+
type: String
132+
description: "The arguments to pass to the driver. Do not include arguments that
133+
can be set as application properties, such as `--conf`, since a collision can
134+
occur that causes an incorrect application submission. "
135+
- name: jarFileUris
136+
type: Array
137+
item_type:
138+
type: String
139+
description: "HCFS URIs of jar files to add to the classpath of the Spark
140+
driver and tasks. "
141+
- name: fileUris
142+
type: Array
143+
item_type:
144+
type: String
145+
description: "HCFS URIs of files to be placed in the working directory
146+
of each executor. "
147+
- name: archiveUris
148+
type: Array
149+
item_type:
150+
type: String
151+
description: "HCFS URIs of archives to be extracted into the working
152+
directory of each executor. Supported file types: `.jar`, `.tar`, `.tar.gz`,
153+
`.tgz`, and `.zip`. "
154+
description: 'Represents the SparkApplicationConfig. '
155+
- name: sparkRApplicationConfig
156+
type: NestedObject
157+
properties:
158+
- name: mainRFileUri
159+
type: String
160+
description: "The HCFS URI of the main R file to use as the driver. Must
161+
be a .R file. "
162+
required: true
163+
- name: args
164+
type: Array
165+
item_type:
166+
type: String
167+
description: "The arguments to pass to the driver. Do not include arguments, such
168+
as `--conf`, that can be set as job properties, since a collision may occur
169+
that causes an incorrect job submission. "
170+
- name: fileUris
171+
type: Array
172+
item_type:
173+
type: String
174+
description: "HCFS URIs of files to be placed in the working directory
175+
of each executor. Useful for naively parallel tasks. "
176+
- name: archiveUris
177+
type: Array
178+
item_type:
179+
type: String
180+
description: "HCFS URIs of archives to be extracted into the working
181+
directory of each executor. Supported file types: .jar, .tar, .tar.gz, .tgz,
182+
and .zip. "
183+
description: 'Represents the SparkRApplicationConfig. '
184+
- name: sparkSqlApplicationConfig
185+
type: NestedObject
186+
properties:
187+
- name: queryFileUri
188+
type: String
189+
description: 'The HCFS URI of the script that contains SQL queries. '
190+
- name: queryList
191+
type: NestedObject
192+
properties:
193+
- name: queries
194+
type: Array
195+
item_type:
196+
type: String
197+
description: 'The queries to run.'
198+
required: true
199+
description: 'Represents a list of queries. '
200+
- name: scriptVariables
201+
type: KeyValuePairs
202+
description: "Mapping of query variable names to values (equivalent
203+
to the Spark SQL command: SET `name=\"value\";`). "
204+
- name: jarFileUris
205+
type: Array
206+
item_type:
207+
type: String
208+
description: 'HCFS URIs of jar files to be added to the Spark CLASSPATH. '
209+
description: 'Represents the SparkRApplicationConfig. '
210+
- name: name
211+
type: String
212+
description: "Identifier. The name of the application. Format: projects/{project}/locations/{location}/serviceInstances/{service_instance}/sparkApplications/{application} "
213+
output: true
214+
- name: uid
215+
type: String
216+
description: "System generated unique identifier for this application,
217+
formatted as UUID4. "
218+
output: true
219+
- name: displayName
220+
type: String
221+
description: 'User-provided human-readable name to be used in user interfaces. '
222+
- name: createTime
223+
type: String
224+
description: 'The timestamp when the resource was created. '
225+
output: true
226+
- name: updateTime
227+
type: String
228+
description: 'The timestamp when the resource was most recently updated. '
229+
output: true
230+
- name: requestedState
231+
type: String
232+
description: |
233+
The intended state to which the application is reconciling.
234+
Possible values:
235+
* `PENDING`
236+
* `RUNNING`
237+
* `CANCELLING`
238+
* `CANCELLED`
239+
* `SUCCEEDED`
240+
* `FAILED`
241+
- name: state
242+
type: String
243+
description: |
244+
The current state.
245+
Possible values:
246+
* `STATE_UNSPECIFIED`
247+
* `PENDING`
248+
* `RUNNING`
249+
* `CANCELLING`
250+
* `CANCELLED`
251+
* `SUCCEEDED`
252+
* `FAILED`
253+
output: true
254+
- name: reconciling
255+
type: Boolean
256+
description: "Whether the application is currently reconciling. True
257+
if the current state of the resource does not match the intended state, and the
258+
system is working to reconcile them, whether or not the change was user initiated."
259+
output: true
260+
- name: labels
261+
type: KeyValueLabels
262+
description: "The labels to associate with this application. Labels may
263+
be used for filtering and billing tracking. "
264+
- name: annotations
265+
type: KeyValueAnnotations
266+
description: "The annotations to associate with this application. Annotations
267+
may be used to store client information, but are not used by the server. "
268+
- name: outputUri
269+
type: String
270+
description: "An HCFS URI pointing to the location of stdout and stdout
271+
of the application Mainly useful for Pantheon and gcloud Not in scope for private GA "
272+
output: true
273+
- name: monitoringEndpoint
274+
type: String
275+
description: "URL for a monitoring UI for this application (for eventual
276+
Spark PHS/UI support) Out of scope for private GA "
277+
output: true
278+
- name: properties
279+
type: KeyValuePairs
280+
description: 'application-specific properties. '
281+
- name: stateMessage
282+
type: String
283+
description: 'A message explaining the current state. '
284+
output: true
285+
- name: version
286+
type: String
287+
description: 'The Dataproc version of this application. '
288+
- name: applicationEnvironment
289+
type: String
290+
description: 'An ApplicationEnvironment from which to inherit configuration
291+
properties. '
292+
- name: namespace
293+
type: String
294+
description: "The Kubernetes namespace in which to create the application. This
295+
namespace must already exist on the cluster. "
296+
- name: dependencyImages
297+
type: Array
298+
item_type:
299+
type: String
300+
description: "List of container image uris for additional file dependencies. Dependent
301+
files are sequentially copied from each image. If a file with the same name exists
302+
in 2 images then the file from later image is used. "
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
resource "google_dataproc_gdc_application_environment" "app_env" {
2+
application_environment_id = "{{index $.Vars "application_environment_id"}}"
3+
serviceinstance = "do-not-delete-dataproc-gdc-instance"
4+
project = "{{index $.Vars "project"}}"
5+
location = "us-west2"
6+
namespace = "default"
7+
}
8+
9+
resource "google_dataproc_gdc_spark_application" "{{$.PrimaryResourceId}}" {
10+
spark_application_id = "{{index $.Vars "spark_application_id"}}"
11+
serviceinstance = "do-not-delete-dataproc-gdc-instance"
12+
project = "{{index $.Vars "project"}}"
13+
location = "us-west2"
14+
namespace = "default"
15+
labels = {
16+
"test-label": "label-value"
17+
}
18+
annotations = {
19+
"an_annotation": "annotation_value"
20+
}
21+
properties = {
22+
"spark.executor.instances": "2"
23+
}
24+
application_environment = google_dataproc_gdc_application_environment.app_env.name
25+
version = "1.2"
26+
spark_application_config {
27+
main_jar_file_uri = "file:///usr/lib/spark/examples/jars/spark-examples.jar"
28+
jar_file_uris = ["file:///usr/lib/spark/examples/jars/spark-examples.jar"]
29+
archive_uris = ["file://usr/lib/spark/examples/spark-examples.jar"]
30+
file_uris = ["file:///usr/lib/spark/examples/jars/spark-examples.jar"]
31+
}
32+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
resource "google_dataproc_gdc_spark_application" "{{$.PrimaryResourceId}}" {
2+
spark_application_id = "{{index $.Vars "spark_application_id"}}"
3+
serviceinstance = "do-not-delete-dataproc-gdc-instance"
4+
project = "{{index $.Vars "project"}}"
5+
location = "us-west2"
6+
namespace = "default"
7+
spark_application_config {
8+
main_class = "org.apache.spark.examples.SparkPi"
9+
jar_file_uris = ["file:///usr/lib/spark/examples/jars/spark-examples.jar"]
10+
args = ["10000"]
11+
}
12+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
resource "google_dataproc_gdc_spark_application" "{{$.PrimaryResourceId}}" {
2+
spark_application_id = "{{index $.Vars "spark_application_id"}}"
3+
serviceinstance = "do-not-delete-dataproc-gdc-instance"
4+
project = "{{index $.Vars "project"}}"
5+
location = "us-west2"
6+
namespace = "default"
7+
display_name = "A Pyspark application for a Terraform create test"
8+
pyspark_application_config {
9+
main_python_file_uri = "gs://goog-dataproc-initialization-actions-us-west2/conda/test_conda.py"
10+
jar_file_uris = ["file:///usr/lib/spark/examples/jars/spark-examples.jar"]
11+
python_file_uris = ["gs://goog-dataproc-initialization-actions-us-west2/conda/get-sys-exec.py"]
12+
file_uris = ["file://usr/lib/spark/examples/spark-examples.jar"]
13+
archive_uris = ["file://usr/lib/spark/examples/spark-examples.jar"]
14+
args = ["10"]
15+
}
16+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
resource "google_dataproc_gdc_spark_application" "{{$.PrimaryResourceId}}" {
2+
spark_application_id = "{{index $.Vars "spark_application_id"}}"
3+
serviceinstance = "do-not-delete-dataproc-gdc-instance"
4+
project = "{{index $.Vars "project"}}"
5+
location = "us-west2"
6+
namespace = "default"
7+
display_name = "A SparkR application for a Terraform create test"
8+
spark_r_application_config {
9+
main_r_file_uri = "gs://some-bucket/something.R"
10+
file_uris = ["file://usr/lib/spark/examples/spark-examples.jar"]
11+
archive_uris = ["file://usr/lib/spark/examples/spark-examples.jar"]
12+
args = ["10"]
13+
}
14+
}

0 commit comments

Comments
 (0)