All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.kestra.plugin.gcp.dataproc.batches.AbstractBatch Maven / Gradle / Ivy

package io.kestra.plugin.gcp.dataproc.batches;

import com.google.api.gax.core.FixedCredentialsProvider;
import com.google.cloud.dataproc.v1.*;
import io.kestra.core.exceptions.IllegalVariableEvaluationException;
import io.kestra.core.models.annotations.PluginProperty;
import io.kestra.core.models.tasks.RunnableTask;
import io.kestra.core.runners.RunContext;
import io.kestra.core.utils.Slugify;
import io.kestra.plugin.gcp.AbstractTask;
import io.swagger.v3.oas.annotations.media.Schema;
import lombok.*;
import lombok.experimental.SuperBuilder;
import org.slf4j.Logger;

import java.util.AbstractMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import jakarta.validation.constraints.NotNull;

import static io.kestra.core.utils.Rethrow.throwFunction;

@SuperBuilder
@ToString
@EqualsAndHashCode
@Getter
@NoArgsConstructor
public abstract class AbstractBatch extends AbstractTask implements RunnableTask {
    @Schema(
        title = "The region"
    )
    @PluginProperty(dynamic = true)
    @NotNull
    private String region;

    @Schema(
        title = "The batch name"
    )
    @PluginProperty(dynamic = true)
    @NotNull
    private String name;

    @Schema(
        title = "Execution configuration for a workload."
    )
    @PluginProperty(dynamic = true)
    private AbstractBatch.ExecutionConfiguration execution;

    @Schema(
        title = "Peripherals configuration for a workload."
    )
    @PluginProperty(dynamic = true)
    private AbstractBatch.PeripheralsConfiguration peripherals;

    @Schema(
        title = "Runtime configuration for a workload."
    )
    @PluginProperty(dynamic = true)
    private AbstractBatch.RuntimeConfiguration runtime;

    abstract protected void buildBatch(Batch.Builder builder, RunContext runContext) throws IllegalVariableEvaluationException;

    @Override
    public Output run(RunContext runContext) throws Exception {
        Logger logger = runContext.logger();
        String region = runContext.render(this.region);
        String batchId = Slugify.of(runContext.render(name + "-{{ execution.id}}"));
        if (batchId.length() > 63) {
            batchId = batchId.substring(0, 63);
        }

        BatchControllerSettings batchControllerSettings = BatchControllerSettings.newBuilder()
            .setCredentialsProvider(FixedCredentialsProvider.create(this.credentials(runContext)))
            .setEndpoint(region + "-dataproc.googleapis.com:443")
            .build();

        try (BatchControllerClient batchControllerClient = BatchControllerClient.create(batchControllerSettings)) {
            Batch.Builder builder = Batch.newBuilder();
            this.buildBatch(builder, runContext);

            ExecutionConfig.Builder executionConfig = ExecutionConfig.newBuilder();

            if (this.execution != null) {
                if (this.execution.networkUri != null) {
                    executionConfig.setNetworkUri(runContext.render(this.execution.networkUri));
                }

                if (this.execution.subnetworkUri != null) {
                    executionConfig.setSubnetworkUri(runContext.render(this.execution.subnetworkUri));
                }

                if (this.execution.networkTags != null) {
                    for (int i = 0; i < this.execution.networkTags.size(); i++) {
                        executionConfig.setNetworkTags(i, runContext.render(this.execution.networkTags.get(i)));
                    }
                }

                if (this.execution.serviceAccountEmail != null) {
                    executionConfig.setServiceAccount(runContext.render(this.execution.serviceAccountEmail));
                }

                if (this.execution.kmsKey != null) {
                    executionConfig.setKmsKey(runContext.render(this.execution.kmsKey));
                }
            }

            EnvironmentConfig.Builder environmentConfig = EnvironmentConfig.newBuilder()
                .setExecutionConfig(executionConfig.build());

            if (this.peripherals != null) {
                PeripheralsConfig.Builder peripheralsConfig = PeripheralsConfig.newBuilder();

                if (this.peripherals.metastoreService != null) {
                    peripheralsConfig.setMetastoreService(runContext.render(this.peripherals.metastoreService));
                }

                if (this.peripherals.sparkHistoryServer != null) {
                    peripheralsConfig.setSparkHistoryServerConfig(SparkHistoryServerConfig.newBuilder()
                        .setDataprocCluster(runContext.render(this.peripherals.sparkHistoryServer.dataprocCluster))
                        .build()
                    );
                }

                environmentConfig.setPeripheralsConfig(peripheralsConfig.build());
            }

            Batch.Builder batchBuilder = builder.setEnvironmentConfig(environmentConfig.build());

            if (this.runtime != null) {
                RuntimeConfig.Builder runtimeConfig = RuntimeConfig.newBuilder();

                if (this.runtime.containerImage != null) {
                    runtimeConfig.setContainerImage(runContext.render(this.runtime.containerImage));
                }

                if (this.runtime.version != null) {
                    runtimeConfig.setVersion(runContext.render(this.runtime.version));
                }

                if (this.runtime.properties != null) {
                    runtimeConfig.putAllProperties(this.runtime.properties
                        .entrySet()
                        .stream()
                        .map(throwFunction(entry -> new AbstractMap.SimpleEntry<>(
                            runContext.render(entry.getKey()),
                            runContext.render(entry.getValue())
                        )))
                        .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue))
                    );
                }

                batchBuilder.setRuntimeConfig(runtimeConfig.build());
            }


            batchBuilder.setEnvironmentConfig(EnvironmentConfig.newBuilder().build());

            Batch batch = batchBuilder.build();

            logger.info("Starting with batch id '{}'", batchId);

            CreateBatchRequest request = CreateBatchRequest.newBuilder()
                .setParent(LocationName.of(runContext.render(this.projectId), region).toString())
                .setBatch(batch)
                .setBatchId(batchId)
                .build();
            Batch response = batchControllerClient.createBatchAsync(request).get();

            return Output.builder().state(response.getState()).build();
        }
    }

    @Builder
    @Getter
    public static class ExecutionConfiguration {
        @Schema(
            title = "Network URI to connect workload to."
        )
        @PluginProperty(dynamic = true)
        private String networkUri;

        @Schema(
            title = "Subnetwork URI to connect workload to."
        )
        @PluginProperty(dynamic = true)
        private String subnetworkUri;

        @Schema(
            title = "Tags used for network traffic control."
        )
        @PluginProperty(dynamic = true)
        private List networkTags;

        @Schema(
            title = "Service account used to execute workload."
        )
        @PluginProperty(dynamic = true)
        private String serviceAccountEmail;

        @Schema(
            title = "The Cloud KMS key to use for encryption."
        )
        @PluginProperty(dynamic = true)
        private String kmsKey;
    }

    @Builder
    @Getter
    public static class PeripheralsConfiguration {
        @Schema(
            title = "Resource name of an existing Dataproc Metastore service.",
            description = "Example: `projects/[project_id]/locations/[region]/services/[service_id]`"
        )
        @PluginProperty(dynamic = true)
        private String metastoreService;

        @Schema(
            title = "Resource name of an existing Dataproc Metastore service.",
            description = "Example: `projects/[project_id]/locations/[region]/services/[service_id]`"
        )
        @PluginProperty(dynamic = true)
        private SparkHistoryServerConfiguration sparkHistoryServer;
    }

    @Builder
    @Getter
    public static class SparkHistoryServerConfiguration {
        @Schema(
            title = "Resource name of an existing Dataproc Cluster to act as a Spark History Server for the workload.",
            description = "Example: `projects/[project_id]/regions/[region]/clusters/[cluster_name]`"
        )
        @PluginProperty(dynamic = true)
        private String dataprocCluster;
    }

    @Builder
    @Getter
    public static class RuntimeConfiguration {
        @Schema(
            title = "Optional custom container image for the job runtime environment.",
            description = "If not specified, a default container image will be used."
        )
        @PluginProperty(dynamic = true)
        private String containerImage;

        @Schema(
            title = "Version of the batch runtime."
        )
        @PluginProperty(dynamic = true)
        private String version;

        @Schema(
            title = "properties used to configure the workload execution (map of key/value pairs)."
        )
        @PluginProperty(dynamic = true)
        private Map properties;
    }

    @Builder
    @Getter
    public static class Output implements io.kestra.core.models.tasks.Output {
        @Schema(
            title = "The state of the batch."
        )
        private final com.google.cloud.dataproc.v1.Batch.State state;
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy