|
| 1 | +# Copyright 2024 Google Inc. |
| 2 | +# Licensed under the Apache License, Version 2.0 (the "License"); |
| 3 | +# you may not use this file except in compliance with the License. |
| 4 | +# You may obtain a copy of the License at |
| 5 | +# |
| 6 | +# http://www.apache.org/licenses/LICENSE-2.0 |
| 7 | +# |
| 8 | +# Unless required by applicable law or agreed to in writing, software |
| 9 | +# distributed under the License is distributed on an "AS IS" BASIS, |
| 10 | +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 11 | +# See the License for the specific language governing permissions and |
| 12 | +# limitations under the License. |
| 13 | + |
| 14 | +--- |
| 15 | +name: SparkApplication |
| 16 | +description: A Spark application is a single Spark workload run on a GDC cluster. |
| 17 | +references: |
| 18 | + guides: |
| 19 | + 'Dataproc Intro': 'https://cloud.google.com/dataproc/' |
| 20 | + api: 'https://cloud.google.com/dataproc-gdc/docs/reference/rest/v1/projects.locations.serviceInstances.sparkApplications' |
| 21 | +parameters: |
| 22 | + - name: location |
| 23 | + type: String |
| 24 | + description: 'The location of the spark application.' |
| 25 | + url_param_only: true |
| 26 | + required: true |
| 27 | + immutable: true |
| 28 | + - name: serviceinstance |
| 29 | + type: String |
| 30 | + description: 'The id of the service instance to which this spark application belongs.' |
| 31 | + url_param_only: true |
| 32 | + required: true |
| 33 | + immutable: true |
| 34 | + - name: sparkApplicationId |
| 35 | + type: String |
| 36 | + description: 'The id of the application ' |
| 37 | + url_param_only: true |
| 38 | + required: true |
| 39 | + immutable: true |
| 40 | +async: |
| 41 | + actions: ['create', 'delete'] |
| 42 | + type: OpAsync |
| 43 | + operation: |
| 44 | + base_url: "{{op_id}}" |
| 45 | +immutable: true |
| 46 | +examples: |
| 47 | + - name: "dataprocgdc_sparkapplication_basic" |
| 48 | + primary_resource_id: "spark-application" |
| 49 | + vars: |
| 50 | + spark_application_id: "tf-e2e-spark-app-basic" |
| 51 | + project: "my-project" |
| 52 | + test_vars_overrides: |
| 53 | + 'project': '"gdce-cluster-monitoring"' |
| 54 | + - name: "dataprocgdc_sparkapplication" |
| 55 | + primary_resource_id: "spark-application" |
| 56 | + vars: |
| 57 | + spark_application_id: "tf-e2e-spark-app" |
| 58 | + application_environment_id: "tf-e2e-spark-app-env" |
| 59 | + project: "my-project" |
| 60 | + test_vars_overrides: |
| 61 | + 'project': '"gdce-cluster-monitoring"' |
| 62 | + - name: "dataprocgdc_sparkapplication_pyspark" |
| 63 | + primary_resource_id: "spark-application" |
| 64 | + vars: |
| 65 | + spark_application_id: "tf-e2e-pyspark-app" |
| 66 | + project: "my-project" |
| 67 | + test_vars_overrides: |
| 68 | + 'project': '"gdce-cluster-monitoring"' |
| 69 | + - name: "dataprocgdc_sparkapplication_sparkr" |
| 70 | + primary_resource_id: "spark-application" |
| 71 | + vars: |
| 72 | + spark_application_id: "tf-e2e-sparkr-app" |
| 73 | + project: "my-project" |
| 74 | + test_vars_overrides: |
| 75 | + 'project': '"gdce-cluster-monitoring"' |
| 76 | + - name: "dataprocgdc_sparkapplication_sparksql" |
| 77 | + primary_resource_id: "spark-application" |
| 78 | + vars: |
| 79 | + spark_application_id: "tf-e2e-sparksql-app" |
| 80 | + project: "my-project" |
| 81 | + test_vars_overrides: |
| 82 | + 'project': '"gdce-cluster-monitoring"' |
| 83 | + - name: "dataprocgdc_sparkapplication_sparksql_query_file" |
| 84 | + primary_resource_id: "spark-application" |
| 85 | + vars: |
| 86 | + spark_application_id: "tf-e2e-sparksql-app" |
| 87 | + project: "my-project" |
| 88 | + test_vars_overrides: |
| 89 | + 'project': '"gdce-cluster-monitoring"' |
| 90 | +base_url: projects/{{project}}/locations/{{location}}/serviceInstances/{{serviceinstance}}/sparkApplications |
| 91 | +create_url: projects/{{project}}/locations/{{location}}/serviceInstances/{{serviceinstance}}/sparkApplications?sparkApplicationId={{spark_application_id}} |
| 92 | +self_link: projects/{{project}}/locations/{{location}}/serviceInstances/{{serviceinstance}}/sparkApplications/{{spark_application_id}} |
| 93 | +id_format: projects/{{project}}/locations/{{location}}/serviceInstances/{{serviceinstance}}/sparkApplications/{{spark_application_id}} |
| 94 | +import_format: |
| 95 | + - projects/{{project}}/locations/{{location}}/serviceInstances/{{serviceinstance}}/sparkApplications/{{spark_application_id}} |
| 96 | +autogen_async: true |
| 97 | +properties: |
| 98 | + - name: pysparkApplicationConfig |
| 99 | + type: NestedObject |
| 100 | + exactly_one_of: |
| 101 | + - 'pyspark_application_config' |
| 102 | + - 'spark_application_config' |
| 103 | + - 'spark_sql_application_config' |
| 104 | + - 'spark_r_application_config' |
| 105 | + properties: |
| 106 | + - name: mainPythonFileUri |
| 107 | + type: String |
| 108 | + description: "The HCFS URI of the main Python file to use as the driver. |
| 109 | + Must be a .py file. " |
| 110 | + required: true |
| 111 | + - name: args |
| 112 | + type: Array |
| 113 | + item_type: |
| 114 | + type: String |
| 115 | + description: "The arguments to pass to the driver. Do not include arguments, such |
| 116 | + as `--conf`, that can be set as job properties, since a collision may occur |
| 117 | + that causes an incorrect job submission. " |
| 118 | + - name: pythonFileUris |
| 119 | + type: Array |
| 120 | + item_type: |
| 121 | + type: String |
| 122 | + description: "HCFS file URIs of Python files to pass to the PySpark framework. |
| 123 | + Supported file types: .py, .egg, and .zip. " |
| 124 | + - name: jarFileUris |
| 125 | + type: Array |
| 126 | + item_type: |
| 127 | + type: String |
| 128 | + description: "HCFS URIs of jar files to add to the CLASSPATHs of the Python |
| 129 | + driver and tasks. " |
| 130 | + - name: fileUris |
| 131 | + type: Array |
| 132 | + item_type: |
| 133 | + type: String |
| 134 | + description: "HCFS URIs of files to be placed in the working directory |
| 135 | + of each executor. Useful for naively parallel tasks. " |
| 136 | + - name: archiveUris |
| 137 | + type: Array |
| 138 | + item_type: |
| 139 | + type: String |
| 140 | + description: "HCFS URIs of archives to be extracted into the working |
| 141 | + directory of each executor. Supported file types: .jar, .tar, .tar.gz, .tgz, |
| 142 | + and .zip. " |
| 143 | + description: 'Represents the PySparkApplicationConfig. ' |
| 144 | + - name: sparkApplicationConfig |
| 145 | + type: NestedObject |
| 146 | + exactly_one_of: |
| 147 | + - 'pyspark_application_config' |
| 148 | + - 'spark_application_config' |
| 149 | + - 'spark_sql_application_config' |
| 150 | + - 'spark_r_application_config' |
| 151 | + properties: |
| 152 | + - name: mainJarFileUri |
| 153 | + type: String |
| 154 | + description: 'The HCFS URI of the jar file that contains the main class. ' |
| 155 | + - name: mainClass |
| 156 | + type: String |
| 157 | + description: "The name of the driver main class. The jar file that contains the |
| 158 | + class must be in the classpath or specified in `jar_file_uris`. " |
| 159 | + - name: args |
| 160 | + type: Array |
| 161 | + item_type: |
| 162 | + type: String |
| 163 | + description: "The arguments to pass to the driver. Do not include arguments that |
| 164 | + can be set as application properties, such as `--conf`, since a collision can |
| 165 | + occur that causes an incorrect application submission. " |
| 166 | + - name: jarFileUris |
| 167 | + type: Array |
| 168 | + item_type: |
| 169 | + type: String |
| 170 | + description: "HCFS URIs of jar files to add to the classpath of the Spark |
| 171 | + driver and tasks. " |
| 172 | + - name: fileUris |
| 173 | + type: Array |
| 174 | + item_type: |
| 175 | + type: String |
| 176 | + description: "HCFS URIs of files to be placed in the working directory |
| 177 | + of each executor. " |
| 178 | + - name: archiveUris |
| 179 | + type: Array |
| 180 | + item_type: |
| 181 | + type: String |
| 182 | + description: "HCFS URIs of archives to be extracted into the working |
| 183 | + directory of each executor. Supported file types: `.jar`, `.tar`, `.tar.gz`, |
| 184 | + `.tgz`, and `.zip`. " |
| 185 | + description: 'Represents the SparkApplicationConfig. ' |
| 186 | + - name: sparkRApplicationConfig |
| 187 | + type: NestedObject |
| 188 | + exactly_one_of: |
| 189 | + - 'pyspark_application_config' |
| 190 | + - 'spark_application_config' |
| 191 | + - 'spark_sql_application_config' |
| 192 | + - 'spark_r_application_config' |
| 193 | + properties: |
| 194 | + - name: mainRFileUri |
| 195 | + type: String |
| 196 | + description: "The HCFS URI of the main R file to use as the driver. Must |
| 197 | + be a .R file. " |
| 198 | + required: true |
| 199 | + - name: args |
| 200 | + type: Array |
| 201 | + item_type: |
| 202 | + type: String |
| 203 | + description: "The arguments to pass to the driver. Do not include arguments, such |
| 204 | + as `--conf`, that can be set as job properties, since a collision may occur |
| 205 | + that causes an incorrect job submission. " |
| 206 | + - name: fileUris |
| 207 | + type: Array |
| 208 | + item_type: |
| 209 | + type: String |
| 210 | + description: "HCFS URIs of files to be placed in the working directory |
| 211 | + of each executor. Useful for naively parallel tasks. " |
| 212 | + - name: archiveUris |
| 213 | + type: Array |
| 214 | + item_type: |
| 215 | + type: String |
| 216 | + description: "HCFS URIs of archives to be extracted into the working |
| 217 | + directory of each executor. Supported file types: .jar, .tar, .tar.gz, .tgz, |
| 218 | + and .zip. " |
| 219 | + description: 'Represents the SparkRApplicationConfig. ' |
| 220 | + - name: sparkSqlApplicationConfig |
| 221 | + type: NestedObject |
| 222 | + exactly_one_of: |
| 223 | + - 'pyspark_application_config' |
| 224 | + - 'spark_application_config' |
| 225 | + - 'spark_sql_application_config' |
| 226 | + - 'spark_r_application_config' |
| 227 | + properties: |
| 228 | + - name: queryFileUri |
| 229 | + type: String |
| 230 | + description: 'The HCFS URI of the script that contains SQL queries. ' |
| 231 | + - name: queryList |
| 232 | + type: NestedObject |
| 233 | + properties: |
| 234 | + - name: queries |
| 235 | + type: Array |
| 236 | + item_type: |
| 237 | + type: String |
| 238 | + description: 'The queries to run.' |
| 239 | + required: true |
| 240 | + description: 'Represents a list of queries. ' |
| 241 | + - name: scriptVariables |
| 242 | + type: KeyValuePairs |
| 243 | + description: "Mapping of query variable names to values (equivalent |
| 244 | + to the Spark SQL command: SET `name=\"value\";`). " |
| 245 | + - name: jarFileUris |
| 246 | + type: Array |
| 247 | + item_type: |
| 248 | + type: String |
| 249 | + description: 'HCFS URIs of jar files to be added to the Spark CLASSPATH. ' |
| 250 | + description: 'Represents the SparkRApplicationConfig. ' |
| 251 | + - name: name |
| 252 | + type: String |
| 253 | + description: "Identifier. The name of the application. Format: projects/{project}/locations/{location}/serviceInstances/{service_instance}/sparkApplications/{application} " |
| 254 | + output: true |
| 255 | + - name: uid |
| 256 | + type: String |
| 257 | + description: "System generated unique identifier for this application, |
| 258 | + formatted as UUID4. " |
| 259 | + output: true |
| 260 | + - name: displayName |
| 261 | + type: String |
| 262 | + description: 'User-provided human-readable name to be used in user interfaces. ' |
| 263 | + - name: createTime |
| 264 | + type: String |
| 265 | + description: 'The timestamp when the resource was created. ' |
| 266 | + output: true |
| 267 | + - name: updateTime |
| 268 | + type: String |
| 269 | + description: 'The timestamp when the resource was most recently updated. ' |
| 270 | + output: true |
| 271 | + - name: state |
| 272 | + type: String |
| 273 | + description: | |
| 274 | + The current state. |
| 275 | + Possible values: |
| 276 | + * `STATE_UNSPECIFIED` |
| 277 | + * `PENDING` |
| 278 | + * `RUNNING` |
| 279 | + * `CANCELLING` |
| 280 | + * `CANCELLED` |
| 281 | + * `SUCCEEDED` |
| 282 | + * `FAILED` |
| 283 | + output: true |
| 284 | + - name: reconciling |
| 285 | + type: Boolean |
| 286 | + description: "Whether the application is currently reconciling. True |
| 287 | + if the current state of the resource does not match the intended state, and the |
| 288 | + system is working to reconcile them, whether or not the change was user initiated." |
| 289 | + output: true |
| 290 | + - name: labels |
| 291 | + type: KeyValueLabels |
| 292 | + description: "The labels to associate with this application. Labels may |
| 293 | + be used for filtering and billing tracking. " |
| 294 | + - name: annotations |
| 295 | + type: KeyValueAnnotations |
| 296 | + description: "The annotations to associate with this application. Annotations |
| 297 | + may be used to store client information, but are not used by the server. " |
| 298 | + - name: outputUri |
| 299 | + type: String |
| 300 | + description: "An HCFS URI pointing to the location of stdout and stdout |
| 301 | + of the application Mainly useful for Pantheon and gcloud Not in scope for private GA " |
| 302 | + output: true |
| 303 | + - name: monitoringEndpoint |
| 304 | + type: String |
| 305 | + description: "URL for a monitoring UI for this application (for eventual |
| 306 | + Spark PHS/UI support) Out of scope for private GA " |
| 307 | + output: true |
| 308 | + - name: properties |
| 309 | + type: KeyValuePairs |
| 310 | + description: 'application-specific properties. ' |
| 311 | + - name: stateMessage |
| 312 | + type: String |
| 313 | + description: 'A message explaining the current state. ' |
| 314 | + output: true |
| 315 | + - name: version |
| 316 | + type: String |
| 317 | + description: 'The Dataproc version of this application. ' |
| 318 | + - name: applicationEnvironment |
| 319 | + type: String |
| 320 | + description: 'An ApplicationEnvironment from which to inherit configuration |
| 321 | + properties. ' |
| 322 | + - name: namespace |
| 323 | + type: String |
| 324 | + description: "The Kubernetes namespace in which to create the application. This |
| 325 | + namespace must already exist on the cluster. " |
| 326 | + - name: dependencyImages |
| 327 | + type: Array |
| 328 | + item_type: |
| 329 | + type: String |
| 330 | + description: "List of container image uris for additional file dependencies. Dependent |
| 331 | + files are sequentially copied from each image. If a file with the same name exists |
| 332 | + in 2 images then the file from later image is used. " |
0 commit comments