All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.kestra.plugin.gcp.bigquery.CopyPartitions Maven / Gradle / Ivy

package io.kestra.plugin.gcp.bigquery;

import com.google.cloud.bigquery.BigQuery;
import com.google.cloud.bigquery.JobInfo;
import com.google.cloud.bigquery.TableId;
import io.kestra.core.models.annotations.Example;
import io.kestra.core.models.annotations.Metric;
import io.kestra.core.models.annotations.Plugin;
import io.kestra.core.models.executions.metrics.Counter;
import io.kestra.core.models.tasks.RunnableTask;
import io.kestra.core.runners.RunContext;
import io.swagger.v3.oas.annotations.media.Schema;
import lombok.*;
import lombok.experimental.SuperBuilder;
import org.slf4j.Logger;

import java.time.Duration;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;

import static io.kestra.core.utils.Rethrow.throwFunction;

@SuperBuilder
@ToString
@EqualsAndHashCode
@Getter
@NoArgsConstructor
@Schema(
    title = "Copy partitions between interval to another table "
)
@Plugin(
    examples = {
        @Example(
            full = true,
            code = """
                id: gcp_bq_copy_partitions
                namespace: company.team

                tasks:
                  - id: copy_partitions
                    type: io.kestra.plugin.gcp.bigquery.CopyPartitions
                    projectId: my-project
                    dataset: my-dataset
                    table: my-table
                    destinationTable: my-dest-table
                    partitionType: DAY
                    from: "{{ now() | dateAdd(-30, 'DAYS') }}"
                    to: "{{ now() | dateAdd(-7, 'DAYS') }}"
                """
        )
    },
    metrics = {
        @Metric(name = "size", type = Counter.TYPE, description = "The number of partitions copied.")
    }
)
public class CopyPartitions extends AbstractPartition implements RunnableTask, AbstractJobInterface {
    protected String destinationTable;

    protected JobInfo.WriteDisposition writeDisposition;

    protected JobInfo.CreateDisposition createDisposition;

    protected Duration jobTimeout;

    protected Map labels;

    @Builder.Default
    protected Boolean dryRun = false;

    @Override
    public CopyPartitions.Output run(RunContext runContext) throws Exception {
        BigQuery connection = this.connection(runContext);
        Logger logger = runContext.logger();

        TableId tableId = this.tableId(runContext);

        List partitionToCopy = this.listPartitions(runContext, connection, tableId);

        logger.debug("Copying partitions '{}'", partitionToCopy);
        runContext.metric(Counter.of("size", partitionToCopy.size()));

        Copy task = Copy.builder()
            .sourceTables(partitionToCopy
                .stream()
                .map(throwFunction(s -> {
                    TableId current = this.tableId(runContext, s);
                    List source = new ArrayList<>();
                    if (current.getProject() != null) {
                        source.add(current.getProject());
                    }

                    source.add(current.getDataset());
                    source.add(current.getTable());

                    return String.join(".", source);
                }))
                .collect(Collectors.toList())
            )
            .destinationTable(this.destinationTable)
            .writeDisposition(this.writeDisposition)
            .createDisposition(this.createDisposition)
            .jobTimeout(this.jobTimeout)
            .labels(this.labels)
            .dryRun(this.dryRun)
            .location(this.location)
            .retryAuto(this.retryAuto)
            .retryReasons(this.retryReasons)
            .retryMessages(this.retryMessages)
            .projectId(this.projectId)
            .serviceAccount(this.serviceAccount)
            .scopes(this.scopes)
            .build();

        Copy.Output run = task.run(runContext);

        return Output.of(tableId, partitionToCopy, run.getJobId());
    }

    @Getter
    @Builder
    public static class Output implements io.kestra.core.models.tasks.Output {
        @Schema(
            title = "The project's id"
        )
        private final String projectId;

        @Schema(
            title = "The dataset's id"
        )
        private final String datasetId;

        @Schema(
            title = "The table name"
        )
        private final String table;

        @Schema(
            title = "Partitions copied"
        )
        private final List partitions;

        @Schema(
            title = "The job id"
        )
        private String jobId;

        public static Output of(TableId table, List partitions, String jobId) {
            return Output.builder()
                .projectId(table.getProject())
                .datasetId(table.getDataset())
                .table(table.getTable())
                .partitions(partitions)
                .build();
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy