All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.kestra.plugin.gcp.dataproc.batches.PySparkSubmit Maven / Gradle / Ivy

package io.kestra.plugin.gcp.dataproc.batches;

import com.google.cloud.dataproc.v1.Batch;
import com.google.cloud.dataproc.v1.PySparkBatch;
import io.kestra.core.exceptions.IllegalVariableEvaluationException;
import io.kestra.core.models.annotations.Example;
import io.kestra.core.models.annotations.Plugin;
import io.kestra.core.models.annotations.PluginProperty;
import io.kestra.core.runners.RunContext;
import io.swagger.v3.oas.annotations.media.Schema;
import lombok.EqualsAndHashCode;
import lombok.Getter;
import lombok.NoArgsConstructor;
import lombok.ToString;
import lombok.experimental.SuperBuilder;

import jakarta.validation.constraints.NotNull;

@SuperBuilder
@ToString
@EqualsAndHashCode
@Getter
@NoArgsConstructor
@Schema(
    title = "Submit an [Apache PySpark](https://spark.apache.org/docs/latest/api/python/getting_started/) batch workload."
)
@Plugin(
    examples = @Example(
        full = true,
        code = """
            id: gcp_dataproc_py_spark_submit
            namespace: company.team
            tasks:
              - id: py_spark_submit
                type: io.kestra.plugin.gcp.dataproc.batches.PySparkSubmit
                mainPythonFileUri: 'gs://spark-jobs-kestra/pi.py'
                name: test-pyspark
                region: europe-west3
            """
    )
)
public class PySparkSubmit extends AbstractSparkSubmit {
    //Can be a GCS file with the gs:// prefix, an HDFS file on the cluster with the hdfs:// prefix, or a local file on the cluster with the file:// prefix
    @Schema(
        title = "The HCFS URI of the main Python file to use as the Spark driver. Must be a .py file.",
        description = "Hadoop Compatible File System (HCFS) URIs should be accessible from the cluster. Can be a GCS file with the gs:// prefix, an HDFS file on the cluster with the hdfs:// prefix, or a local file on the cluster with the file:// prefix"
    )
    @PluginProperty(dynamic = true)
    @NotNull
    protected String mainPythonFileUri;

    @Override
    protected void buildBatch(Batch.Builder builder, RunContext runContext) throws IllegalVariableEvaluationException {
        PySparkBatch.Builder sparkBuilder = PySparkBatch.newBuilder();

        sparkBuilder.setMainPythonFileUri(runContext.render(mainPythonFileUri));

        if (this.jarFileUris != null) {
            sparkBuilder.addAllJarFileUris(runContext.render(this.jarFileUris));
        }

        if (this.fileUris != null) {
            sparkBuilder.addAllFileUris(runContext.render(this.fileUris));
        }

        if (this.archiveUris != null) {
            sparkBuilder.addAllArchiveUris(runContext.render(this.archiveUris));
        }

        if (this.args != null) {
            sparkBuilder.addAllArgs(runContext.render(this.args));
        }

        builder.setPysparkBatch(sparkBuilder.build());
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy