This PR adds support for the Dataproc on GDC SparkApplication resource to the provider.

Jerry Ding · Jerry Ding · commit 19d251d97b61 · 2024-11-05T18:01:11.000Z
```release-note:new-resource
`google_dataproc_gdc_spark_application`
```
diff --git a/mmv1/products/dataprocgdc/SparkApplication.yaml b/mmv1/products/dataprocgdc/SparkApplication.yaml
@@ -0,0 +1,302 @@
+name: SparkApplication
+description: A Spark application is a single Spark workload run on a GDC cluster.
+  guides:
+    'Dataproc Intro': 'https://cloud.google.com/dataproc/'
+  api: 'https://cloud.google.com/dataproc-gdc/docs/reference/rest/v1/projects.locations.serviceInstances.sparkApplications'
+parameters:
+  - name: location
+    type: String
+    description: 'The location of the spark application.'
+    url_param_only: true
+    required: true
+    immutable: true
+  - name: serviceinstance
+    type: String
+    description: 'The id of the service instance to which this spark application belongs.'
+    url_param_only: true
+    required: true
+    immutable: true
+  - name: sparkApplicationId
+    type: String
+    description: 'The id of the application '
+    url_param_only: true
+    required: true
+    immutable: true
+async:
+  actions: ['create', 'delete']
+  type: OpAsync
+  operation:
+    base_url: "{{op_id}}"
+examples:
+  - name: "dataprocgdc_sparkapplication_basic"
+    primary_resource_id: "spark-application"
+    vars:
+      spark_application_id: "tf-e2e-spark-app-basic"
+      project: "my-project"
+    test_vars_overrides:
+      'project': '"gdce-cluster-monitoring"'
+  - name: "dataprocgdc_sparkapplication"
+    primary_resource_id: "spark-application"
+    vars:
+      spark_application_id: "tf-e2e-spark-app"
+      application_environment_id: "tf-e2e-spark-app-env"
+      project: "my-project"
+    test_vars_overrides:
+      'project': '"gdce-cluster-monitoring"'
+  - name: "dataprocgdc_sparkapplication_pyspark"
+    primary_resource_id: "spark-application"
+    vars:
+      spark_application_id: "tf-e2e-pyspark-app"
+      project: "my-project"
+    test_vars_overrides:
+      'project': '"gdce-cluster-monitoring"'
+  - name: "dataprocgdc_sparkapplication_sparkr"
+    primary_resource_id: "spark-application"
+    vars:
+      spark_application_id: "tf-e2e-sparkr-app"
+      project: "my-project"
+    test_vars_overrides:
+      'project': '"gdce-cluster-monitoring"'
+  - name: "dataprocgdc_sparkapplication_sparksql"
+    primary_resource_id: "spark-application"
+    vars:
+      spark_application_id: "tf-e2e-sparksql-app"
+      project: "my-project"
+    test_vars_overrides:
+      'project': '"gdce-cluster-monitoring"'
+base_url: projects/{{project}}/locations/{{location}}/serviceInstances/{{serviceinstance}}/sparkApplications
+create_url: projects/{{project}}/locations/{{location}}/serviceInstances/{{serviceinstance}}/sparkApplications?sparkApplicationId={{spark_application_id}}
+self_link: projects/{{project}}/locations/{{location}}/serviceInstances/{{serviceinstance}}/sparkApplications/{{spark_application_id}}
+id_format: projects/{{project}}/locations/{{location}}/serviceInstances/{{serviceinstance}}/sparkApplications/{{spark_application_id}}
+import_format:
+  - projects/{{project}}/locations/{{location}}/serviceInstances/{{serviceinstance}}/sparkApplications/{{spark_application_id}}
+update_verb: PATCH
+update_mask: true
+autogen_async: true
+properties:
+  - name: pysparkApplicationConfig
+    type: NestedObject
+    properties:
+      - name: mainPythonFileUri
+        type: String
+        description: "The HCFS URI of the main Python file to use as the driver.
+          Must be a .py file. "
+        required: true
+      - name: args
+        type: Array
+        item_type:
+          type: String
+        description: "The arguments to pass to the driver.  Do not include arguments, such
+          as `--conf`, that can be set as job properties, since a collision may occur
+          that causes an incorrect job submission. "
+      - name: pythonFileUris
+        type: Array
+        item_type:
+          type: String
+        description: "HCFS file URIs of Python files to pass to the PySpark framework.
+          Supported file types: .py, .egg, and .zip. "
+      - name: jarFileUris
+        type: Array
+        item_type:
+          type: String
+        description: "HCFS URIs of jar files to add to the CLASSPATHs of the Python
+          driver and tasks. "
+      - name: fileUris
+        type: Array
+        item_type:
+          type: String
+        description: "HCFS URIs of files to be placed in the working directory
+          of each executor. Useful for naively parallel tasks. "
+      - name: archiveUris
+        type: Array
+        item_type:
+          type: String
+        description: "HCFS URIs of archives to be extracted into the working
+          directory of each executor. Supported file types: .jar, .tar, .tar.gz, .tgz,
+          and .zip. "
+    description: 'Represents the PySparkApplicationConfig. '
+  - name: sparkApplicationConfig
+    type: NestedObject
+    properties:
+      - name: mainJarFileUri
+        type: String
+        description: 'The HCFS URI of the jar file that contains the main class. '
+      - name: mainClass
+        type: String
+        description: "The name of the driver main class. The jar file that contains the
+          class must be in the classpath or specified in `jar_file_uris`. "
+      - name: args
+        type: Array
+        item_type:
+          type: String
+        description: "The arguments to pass to the driver. Do not include arguments that
+          can be set as application properties, such as `--conf`, since a collision can
+          occur that causes an incorrect application submission. "
+      - name: jarFileUris
+        type: Array
+        item_type:
+          type: String
+        description: "HCFS URIs of jar files to add to the classpath of the Spark
+          driver and tasks. "
+      - name: fileUris
+        type: Array
+        item_type:
+          type: String
+        description: "HCFS URIs of files to be placed in the working directory
+          of each executor. "
+      - name: archiveUris
+        type: Array
+        item_type:
+          type: String
+        description: "HCFS URIs of archives to be extracted into the working
+          directory of each executor. Supported file types: `.jar`, `.tar`, `.tar.gz`,
+          `.tgz`, and `.zip`. "
+    description: 'Represents the SparkApplicationConfig. '
+  - name: sparkRApplicationConfig
+    type: NestedObject
+    properties:
+      - name: mainRFileUri
+        type: String
+        description: "The HCFS URI of the main R file to use as the driver. Must
+          be a .R file. "
+        required: true
+      - name: args
+        type: Array
+        item_type:
+          type: String
+        description: "The arguments to pass to the driver.  Do not include arguments, such
+          as `--conf`, that can be set as job properties, since a collision may occur
+          that causes an incorrect job submission. "
+      - name: fileUris
+        type: Array
+        item_type:
+          type: String
+        description: "HCFS URIs of files to be placed in the working directory
+          of each executor. Useful for naively parallel tasks. "
+      - name: archiveUris
+        type: Array
+        item_type:
+          type: String
+        description: "HCFS URIs of archives to be extracted into the working
+          directory of each executor. Supported file types: .jar, .tar, .tar.gz, .tgz,
+          and .zip. "
+    description: 'Represents the SparkRApplicationConfig. '
+  - name: sparkSqlApplicationConfig
+    type: NestedObject
+    properties:
+      - name: queryFileUri
+        type: String
+        description: 'The HCFS URI of the script that contains SQL queries. '
+      - name: queryList
+        type: NestedObject
+        properties:
+          - name: queries
+            type: Array
+            item_type:
+              type: String
+            description: 'The queries to run.'
+            required: true
+        description: 'Represents a list of queries. '
+      - name: scriptVariables
+        type: KeyValuePairs
+        description: "Mapping of query variable names to values (equivalent
+          to the Spark SQL command: SET `name=\"value\";`). "
+      - name: jarFileUris
+        type: Array
+        item_type:
+          type: String
+        description: 'HCFS URIs of jar files to be added to the Spark CLASSPATH. '
+    description: 'Represents the SparkRApplicationConfig. '
+  - name: name
+    type: String
+    description: "Identifier. The name of the application. Format: projects/{project}/locations/{location}/serviceInstances/{service_instance}/sparkApplications/{application} "
+    output: true
+  - name: uid
+    type: String
+    description: "System generated unique identifier for this application,
+      formatted as UUID4. "
+    output: true
+  - name: displayName
+    type: String
+    description: 'User-provided human-readable name to be used in user interfaces. '
+  - name: createTime
+    type: String
+    description: 'The timestamp when the resource was created. '
+    output: true
+  - name: updateTime
+    type: String
+    description: 'The timestamp when the resource was most recently updated. '
+    output: true
+  - name: requestedState
+    type: String
+    description: |
+      The intended state to which the application is reconciling.
+      Possible values:
+      * `PENDING`
+      * `RUNNING`
+      * `CANCELLING`
+      * `CANCELLED`
+      * `SUCCEEDED`
+      * `FAILED`
+  - name: state
+    type: String
+    description: |
+      The current state.
+      Possible values:
+      * `STATE_UNSPECIFIED`
+      * `PENDING`
+      * `RUNNING`
+      * `CANCELLING`
+      * `CANCELLED`
+      * `SUCCEEDED`
+      * `FAILED`
+    output: true
+  - name: reconciling
+    type: Boolean
+    description: "Whether the application is currently reconciling. True
+      if the current state of the resource does not match the intended state, and the
+      system is working to reconcile them, whether or not the change was user initiated."
+    output: true
+  - name: labels
+    type: KeyValueLabels
+    description: "The labels to associate with this application. Labels may
+      be used for filtering and billing tracking. "
+  - name: annotations
+    type: KeyValueAnnotations
+    description: "The annotations to associate with this application. Annotations
+      may be used to store client information, but are not used by the server. "
+  - name: outputUri
+    type: String
+    description: "An HCFS URI pointing to the location of stdout and stdout
+      of the application Mainly useful for Pantheon and gcloud Not in scope for private GA "
+    output: true
+  - name: monitoringEndpoint
+    type: String
+    description: "URL for a monitoring UI for this application (for eventual
+      Spark PHS/UI support) Out of scope for private GA "
+    output: true
+  - name: properties
+    type: KeyValuePairs
+    description: 'application-specific properties. '
+  - name: stateMessage
+    type: String
+    description: 'A message explaining the current state. '
+    output: true
+  - name: version
+    type: String
+    description: 'The Dataproc version of this application. '
+  - name: applicationEnvironment
+    type: String
+    description: 'An ApplicationEnvironment from which to inherit configuration
+      properties. '
+  - name: namespace
+    type: String
+    description: "The Kubernetes namespace in which to create the application. This
+      namespace must already exist on the cluster. "
+  - name: dependencyImages
+    type: Array
+    item_type:
+      type: String
+    description: "List of container image uris for additional file dependencies. Dependent
+      files are sequentially copied from each image. If a file with the same name exists
+      in 2 images then the file from later image is used. "
diff --git a/mmv1/templates/terraform/examples/dataprocgdc_sparkapplication.tf.tmpl b/mmv1/templates/terraform/examples/dataprocgdc_sparkapplication.tf.tmpl
@@ -0,0 +1,32 @@
+resource "google_dataproc_gdc_application_environment" "app_env" {
+  application_environment_id = "{{index $.Vars "application_environment_id"}}"
+  serviceinstance = "do-not-delete-dataproc-gdc-instance"
+  project         = "{{index $.Vars "project"}}"
+  location        = "us-west2"
+  namespace = "default"
+}
+
+resource "google_dataproc_gdc_spark_application" "{{$.PrimaryResourceId}}" {
+  spark_application_id = "{{index $.Vars "spark_application_id"}}"
+  serviceinstance = "do-not-delete-dataproc-gdc-instance"
+  project         = "{{index $.Vars "project"}}"
+  location        = "us-west2"
+  namespace = "default"
+  labels = {
+    "test-label": "label-value"
+  }
+  annotations = {
+    "an_annotation": "annotation_value"
+  }
+  properties = {
+    "spark.executor.instances": "2"
+  }
+  application_environment = google_dataproc_gdc_application_environment.app_env.name
+  version = "1.2"
+  spark_application_config {
+    main_jar_file_uri = "file:///usr/lib/spark/examples/jars/spark-examples.jar"
+    jar_file_uris = ["file:///usr/lib/spark/examples/jars/spark-examples.jar"]
+    archive_uris = ["file://usr/lib/spark/examples/spark-examples.jar"]
+    file_uris = ["file:///usr/lib/spark/examples/jars/spark-examples.jar"]
+  }
+}
diff --git a/mmv1/templates/terraform/examples/dataprocgdc_sparkapplication_basic.tf.tmpl b/mmv1/templates/terraform/examples/dataprocgdc_sparkapplication_basic.tf.tmpl
@@ -0,0 +1,12 @@
+resource "google_dataproc_gdc_spark_application" "{{$.PrimaryResourceId}}" {
+  spark_application_id = "{{index $.Vars "spark_application_id"}}"
+  serviceinstance = "do-not-delete-dataproc-gdc-instance"
+  project         = "{{index $.Vars "project"}}"
+  location        = "us-west2"
+  namespace = "default"
+  spark_application_config {
+    main_class = "org.apache.spark.examples.SparkPi"
+    jar_file_uris = ["file:///usr/lib/spark/examples/jars/spark-examples.jar"]
+    args = ["10000"]
+  }
+}
diff --git a/mmv1/templates/terraform/examples/dataprocgdc_sparkapplication_pyspark.tf.tmpl b/mmv1/templates/terraform/examples/dataprocgdc_sparkapplication_pyspark.tf.tmpl
@@ -0,0 +1,16 @@
+resource "google_dataproc_gdc_spark_application" "{{$.PrimaryResourceId}}" {
+  spark_application_id = "{{index $.Vars "spark_application_id"}}"
+  serviceinstance = "do-not-delete-dataproc-gdc-instance"
+  project         = "{{index $.Vars "project"}}"
+  location        = "us-west2"
+  namespace = "default"
+  display_name = "A Pyspark application for a Terraform create test"
+  pyspark_application_config {
+    main_python_file_uri = "gs://goog-dataproc-initialization-actions-us-west2/conda/test_conda.py"
+    jar_file_uris = ["file:///usr/lib/spark/examples/jars/spark-examples.jar"]
+    python_file_uris = ["gs://goog-dataproc-initialization-actions-us-west2/conda/get-sys-exec.py"]
+    file_uris = ["file://usr/lib/spark/examples/spark-examples.jar"]
+    archive_uris = ["file://usr/lib/spark/examples/spark-examples.jar"]
+    args = ["10"]
+  }
+}
diff --git a/mmv1/templates/terraform/examples/dataprocgdc_sparkapplication_sparkr.tf.tmpl b/mmv1/templates/terraform/examples/dataprocgdc_sparkapplication_sparkr.tf.tmpl
@@ -0,0 +1,14 @@
+resource "google_dataproc_gdc_spark_application" "{{$.PrimaryResourceId}}" {
+  spark_application_id = "{{index $.Vars "spark_application_id"}}"
+  serviceinstance = "do-not-delete-dataproc-gdc-instance"
+  project         = "{{index $.Vars "project"}}"
+  location        = "us-west2"
+  namespace = "default"
+  display_name = "A SparkR application for a Terraform create test"
+  spark_r_application_config {
+    main_r_file_uri = "gs://some-bucket/something.R"
+    file_uris = ["file://usr/lib/spark/examples/spark-examples.jar"]
+    archive_uris = ["file://usr/lib/spark/examples/spark-examples.jar"]
+    args = ["10"]
+  }
+}
diff --git a/mmv1/templates/terraform/examples/dataprocgdc_sparkapplication_sparksql.tf.tmpl b/mmv1/templates/terraform/examples/dataprocgdc_sparkapplication_sparksql.tf.tmpl