Skip to content

Commit 0f91de0

Browse files
author
Jerry Ding
committed
This PR adds support for the Dataproc on GDC SparkApplication resource to the provider.
```release-note:new-resource `google_dataproc_gdc_spark_application` ```
1 parent f6544a1 commit 0f91de0

7 files changed

+441
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,334 @@
1+
# Copyright 2024 Google Inc.
2+
# Licensed under the Apache License, Version 2.0 (the "License");
3+
# you may not use this file except in compliance with the License.
4+
# You may obtain a copy of the License at
5+
#
6+
# http://www.apache.org/licenses/LICENSE-2.0
7+
#
8+
# Unless required by applicable law or agreed to in writing, software
9+
# distributed under the License is distributed on an "AS IS" BASIS,
10+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
# See the License for the specific language governing permissions and
12+
# limitations under the License.
13+
14+
---
15+
name: SparkApplication
16+
description: A Spark application is a single Spark workload run on a GDC cluster.
17+
references:
18+
guides:
19+
'Dataproc Intro': 'https://cloud.google.com/dataproc/'
20+
api: 'https://cloud.google.com/dataproc-gdc/docs/reference/rest/v1/projects.locations.serviceInstances.sparkApplications'
21+
parameters:
22+
- name: location
23+
type: String
24+
description: 'The location of the spark application.'
25+
url_param_only: true
26+
required: true
27+
immutable: true
28+
- name: serviceinstance
29+
type: String
30+
description: 'The id of the service instance to which this spark application belongs.'
31+
url_param_only: true
32+
required: true
33+
immutable: true
34+
- name: sparkApplicationId
35+
type: String
36+
description: 'The id of the application '
37+
url_param_only: true
38+
required: true
39+
immutable: true
40+
async:
41+
actions: ['create', 'delete']
42+
type: OpAsync
43+
operation:
44+
base_url: "{{op_id}}"
45+
immutable: true
46+
examples:
47+
- name: "dataprocgdc_sparkapplication_basic"
48+
primary_resource_id: "spark-application"
49+
vars:
50+
spark_application_id: "tf-e2e-spark-app-basic"
51+
project: "my-project"
52+
test_vars_overrides:
53+
'project': '"gdce-cluster-monitoring"'
54+
- name: "dataprocgdc_sparkapplication"
55+
primary_resource_id: "spark-application"
56+
vars:
57+
spark_application_id: "tf-e2e-spark-app"
58+
application_environment_id: "tf-e2e-spark-app-env"
59+
project: "my-project"
60+
test_vars_overrides:
61+
'project': '"gdce-cluster-monitoring"'
62+
- name: "dataprocgdc_sparkapplication_pyspark"
63+
primary_resource_id: "spark-application"
64+
vars:
65+
spark_application_id: "tf-e2e-pyspark-app"
66+
project: "my-project"
67+
test_vars_overrides:
68+
'project': '"gdce-cluster-monitoring"'
69+
- name: "dataprocgdc_sparkapplication_sparkr"
70+
primary_resource_id: "spark-application"
71+
vars:
72+
spark_application_id: "tf-e2e-sparkr-app"
73+
project: "my-project"
74+
test_vars_overrides:
75+
'project': '"gdce-cluster-monitoring"'
76+
- name: "dataprocgdc_sparkapplication_sparksql"
77+
primary_resource_id: "spark-application"
78+
vars:
79+
spark_application_id: "tf-e2e-sparksql-app"
80+
project: "my-project"
81+
test_vars_overrides:
82+
'project': '"gdce-cluster-monitoring"'
83+
- name: "dataprocgdc_sparkapplication_sparksql_query_file"
84+
primary_resource_id: "spark-application"
85+
vars:
86+
spark_application_id: "tf-e2e-sparksql-app"
87+
project: "my-project"
88+
test_vars_overrides:
89+
'project': '"gdce-cluster-monitoring"'
90+
base_url: projects/{{project}}/locations/{{location}}/serviceInstances/{{serviceinstance}}/sparkApplications
91+
create_url: projects/{{project}}/locations/{{location}}/serviceInstances/{{serviceinstance}}/sparkApplications?sparkApplicationId={{spark_application_id}}
92+
self_link: projects/{{project}}/locations/{{location}}/serviceInstances/{{serviceinstance}}/sparkApplications/{{spark_application_id}}
93+
id_format: projects/{{project}}/locations/{{location}}/serviceInstances/{{serviceinstance}}/sparkApplications/{{spark_application_id}}
94+
import_format:
95+
- projects/{{project}}/locations/{{location}}/serviceInstances/{{serviceinstance}}/sparkApplications/{{spark_application_id}}
96+
update_verb: PATCH
97+
update_mask: true
98+
autogen_async: true
99+
properties:
100+
- name: pysparkApplicationConfig
101+
type: NestedObject
102+
exactly_one_of:
103+
- 'pyspark_application_config'
104+
- 'spark_application_config'
105+
- 'spark_sql_application_config'
106+
- 'spark_r_application_config'
107+
properties:
108+
- name: mainPythonFileUri
109+
type: String
110+
description: "The HCFS URI of the main Python file to use as the driver.
111+
Must be a .py file. "
112+
required: true
113+
- name: args
114+
type: Array
115+
item_type:
116+
type: String
117+
description: "The arguments to pass to the driver. Do not include arguments, such
118+
as `--conf`, that can be set as job properties, since a collision may occur
119+
that causes an incorrect job submission. "
120+
- name: pythonFileUris
121+
type: Array
122+
item_type:
123+
type: String
124+
description: "HCFS file URIs of Python files to pass to the PySpark framework.
125+
Supported file types: .py, .egg, and .zip. "
126+
- name: jarFileUris
127+
type: Array
128+
item_type:
129+
type: String
130+
description: "HCFS URIs of jar files to add to the CLASSPATHs of the Python
131+
driver and tasks. "
132+
- name: fileUris
133+
type: Array
134+
item_type:
135+
type: String
136+
description: "HCFS URIs of files to be placed in the working directory
137+
of each executor. Useful for naively parallel tasks. "
138+
- name: archiveUris
139+
type: Array
140+
item_type:
141+
type: String
142+
description: "HCFS URIs of archives to be extracted into the working
143+
directory of each executor. Supported file types: .jar, .tar, .tar.gz, .tgz,
144+
and .zip. "
145+
description: 'Represents the PySparkApplicationConfig. '
146+
- name: sparkApplicationConfig
147+
type: NestedObject
148+
exactly_one_of:
149+
- 'pyspark_application_config'
150+
- 'spark_application_config'
151+
- 'spark_sql_application_config'
152+
- 'spark_r_application_config'
153+
properties:
154+
- name: mainJarFileUri
155+
type: String
156+
description: 'The HCFS URI of the jar file that contains the main class. '
157+
- name: mainClass
158+
type: String
159+
description: "The name of the driver main class. The jar file that contains the
160+
class must be in the classpath or specified in `jar_file_uris`. "
161+
- name: args
162+
type: Array
163+
item_type:
164+
type: String
165+
description: "The arguments to pass to the driver. Do not include arguments that
166+
can be set as application properties, such as `--conf`, since a collision can
167+
occur that causes an incorrect application submission. "
168+
- name: jarFileUris
169+
type: Array
170+
item_type:
171+
type: String
172+
description: "HCFS URIs of jar files to add to the classpath of the Spark
173+
driver and tasks. "
174+
- name: fileUris
175+
type: Array
176+
item_type:
177+
type: String
178+
description: "HCFS URIs of files to be placed in the working directory
179+
of each executor. "
180+
- name: archiveUris
181+
type: Array
182+
item_type:
183+
type: String
184+
description: "HCFS URIs of archives to be extracted into the working
185+
directory of each executor. Supported file types: `.jar`, `.tar`, `.tar.gz`,
186+
`.tgz`, and `.zip`. "
187+
description: 'Represents the SparkApplicationConfig. '
188+
- name: sparkRApplicationConfig
189+
type: NestedObject
190+
exactly_one_of:
191+
- 'pyspark_application_config'
192+
- 'spark_application_config'
193+
- 'spark_sql_application_config'
194+
- 'spark_r_application_config'
195+
properties:
196+
- name: mainRFileUri
197+
type: String
198+
description: "The HCFS URI of the main R file to use as the driver. Must
199+
be a .R file. "
200+
required: true
201+
- name: args
202+
type: Array
203+
item_type:
204+
type: String
205+
description: "The arguments to pass to the driver. Do not include arguments, such
206+
as `--conf`, that can be set as job properties, since a collision may occur
207+
that causes an incorrect job submission. "
208+
- name: fileUris
209+
type: Array
210+
item_type:
211+
type: String
212+
description: "HCFS URIs of files to be placed in the working directory
213+
of each executor. Useful for naively parallel tasks. "
214+
- name: archiveUris
215+
type: Array
216+
item_type:
217+
type: String
218+
description: "HCFS URIs of archives to be extracted into the working
219+
directory of each executor. Supported file types: .jar, .tar, .tar.gz, .tgz,
220+
and .zip. "
221+
description: 'Represents the SparkRApplicationConfig. '
222+
- name: sparkSqlApplicationConfig
223+
type: NestedObject
224+
exactly_one_of:
225+
- 'pyspark_application_config'
226+
- 'spark_application_config'
227+
- 'spark_sql_application_config'
228+
- 'spark_r_application_config'
229+
properties:
230+
- name: queryFileUri
231+
type: String
232+
description: 'The HCFS URI of the script that contains SQL queries. '
233+
- name: queryList
234+
type: NestedObject
235+
properties:
236+
- name: queries
237+
type: Array
238+
item_type:
239+
type: String
240+
description: 'The queries to run.'
241+
required: true
242+
description: 'Represents a list of queries. '
243+
- name: scriptVariables
244+
type: KeyValuePairs
245+
description: "Mapping of query variable names to values (equivalent
246+
to the Spark SQL command: SET `name=\"value\";`). "
247+
- name: jarFileUris
248+
type: Array
249+
item_type:
250+
type: String
251+
description: 'HCFS URIs of jar files to be added to the Spark CLASSPATH. '
252+
description: 'Represents the SparkRApplicationConfig. '
253+
- name: name
254+
type: String
255+
description: "Identifier. The name of the application. Format: projects/{project}/locations/{location}/serviceInstances/{service_instance}/sparkApplications/{application} "
256+
output: true
257+
- name: uid
258+
type: String
259+
description: "System generated unique identifier for this application,
260+
formatted as UUID4. "
261+
output: true
262+
- name: displayName
263+
type: String
264+
description: 'User-provided human-readable name to be used in user interfaces. '
265+
- name: createTime
266+
type: String
267+
description: 'The timestamp when the resource was created. '
268+
output: true
269+
- name: updateTime
270+
type: String
271+
description: 'The timestamp when the resource was most recently updated. '
272+
output: true
273+
- name: state
274+
type: String
275+
description: |
276+
The current state.
277+
Possible values:
278+
* `STATE_UNSPECIFIED`
279+
* `PENDING`
280+
* `RUNNING`
281+
* `CANCELLING`
282+
* `CANCELLED`
283+
* `SUCCEEDED`
284+
* `FAILED`
285+
output: true
286+
- name: reconciling
287+
type: Boolean
288+
description: "Whether the application is currently reconciling. True
289+
if the current state of the resource does not match the intended state, and the
290+
system is working to reconcile them, whether or not the change was user initiated."
291+
output: true
292+
- name: labels
293+
type: KeyValueLabels
294+
description: "The labels to associate with this application. Labels may
295+
be used for filtering and billing tracking. "
296+
- name: annotations
297+
type: KeyValueAnnotations
298+
description: "The annotations to associate with this application. Annotations
299+
may be used to store client information, but are not used by the server. "
300+
- name: outputUri
301+
type: String
302+
description: "An HCFS URI pointing to the location of stdout and stdout
303+
of the application Mainly useful for Pantheon and gcloud Not in scope for private GA "
304+
output: true
305+
- name: monitoringEndpoint
306+
type: String
307+
description: "URL for a monitoring UI for this application (for eventual
308+
Spark PHS/UI support) Out of scope for private GA "
309+
output: true
310+
- name: properties
311+
type: KeyValuePairs
312+
description: 'application-specific properties. '
313+
- name: stateMessage
314+
type: String
315+
description: 'A message explaining the current state. '
316+
output: true
317+
- name: version
318+
type: String
319+
description: 'The Dataproc version of this application. '
320+
- name: applicationEnvironment
321+
type: String
322+
description: 'An ApplicationEnvironment from which to inherit configuration
323+
properties. '
324+
- name: namespace
325+
type: String
326+
description: "The Kubernetes namespace in which to create the application. This
327+
namespace must already exist on the cluster. "
328+
- name: dependencyImages
329+
type: Array
330+
item_type:
331+
type: String
332+
description: "List of container image uris for additional file dependencies. Dependent
333+
files are sequentially copied from each image. If a file with the same name exists
334+
in 2 images then the file from later image is used. "
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
resource "google_dataproc_gdc_application_environment" "app_env" {
2+
application_environment_id = "{{index $.Vars "application_environment_id"}}"
3+
serviceinstance = "do-not-delete-dataproc-gdc-instance"
4+
project = "{{index $.Vars "project"}}"
5+
location = "us-west2"
6+
namespace = "default"
7+
}
8+
9+
resource "google_dataproc_gdc_spark_application" "{{$.PrimaryResourceId}}" {
10+
spark_application_id = "{{index $.Vars "spark_application_id"}}"
11+
serviceinstance = "do-not-delete-dataproc-gdc-instance"
12+
project = "{{index $.Vars "project"}}"
13+
location = "us-west2"
14+
namespace = "default"
15+
labels = {
16+
"test-label": "label-value"
17+
}
18+
annotations = {
19+
"an_annotation": "annotation_value"
20+
}
21+
properties = {
22+
"spark.executor.instances": "2"
23+
}
24+
application_environment = google_dataproc_gdc_application_environment.app_env.name
25+
version = "1.2"
26+
spark_application_config {
27+
main_jar_file_uri = "file:///usr/lib/spark/examples/jars/spark-examples.jar"
28+
jar_file_uris = ["file:///usr/lib/spark/examples/jars/spark-examples.jar"]
29+
archive_uris = ["file://usr/lib/spark/examples/spark-examples.jar"]
30+
file_uris = ["file:///usr/lib/spark/examples/jars/spark-examples.jar"]
31+
}
32+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
resource "google_dataproc_gdc_spark_application" "{{$.PrimaryResourceId}}" {
2+
spark_application_id = "{{index $.Vars "spark_application_id"}}"
3+
serviceinstance = "do-not-delete-dataproc-gdc-instance"
4+
project = "{{index $.Vars "project"}}"
5+
location = "us-west2"
6+
namespace = "default"
7+
spark_application_config {
8+
main_class = "org.apache.spark.examples.SparkPi"
9+
jar_file_uris = ["file:///usr/lib/spark/examples/jars/spark-examples.jar"]
10+
args = ["10000"]
11+
}
12+
}

0 commit comments

Comments
 (0)