
io.kestra.plugin.gcp.gcs.DeleteList Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of plugin-gcp Show documentation
Show all versions of plugin-gcp Show documentation
Integrate Google Cloud Platform services with Kestra data workflows.
package io.kestra.plugin.gcp.gcs;
import com.google.cloud.storage.Blob;
import com.google.cloud.storage.BlobId;
import com.google.cloud.storage.Storage;
import io.kestra.core.models.annotations.Example;
import io.kestra.core.models.annotations.Plugin;
import io.kestra.core.models.annotations.PluginProperty;
import io.kestra.core.models.executions.metrics.Counter;
import io.kestra.core.models.tasks.RunnableTask;
import io.kestra.core.runners.RunContext;
import io.swagger.v3.oas.annotations.media.Schema;
import lombok.*;
import lombok.experimental.SuperBuilder;
import org.apache.commons.lang3.tuple.Pair;
import org.slf4j.Logger;
import java.net.URI;
import java.util.NoSuchElementException;
import java.util.function.Function;
import jakarta.validation.constraints.Min;
import reactor.core.publisher.Flux;
import reactor.core.publisher.FluxSink;
import reactor.core.scheduler.Schedulers;
@SuperBuilder
@ToString
@EqualsAndHashCode
@Getter
@NoArgsConstructor
@Plugin(
examples = {
@Example(
full = true,
code = """
id: gcp_gcs_delete_list
namespace: company.team
tasks:
- id: delete_list
type: io.kestra.plugin.gcp.gcs.DeleteList
from: "gs://my_bucket/dir/"
"""
)
}
)
@Schema(
title = "Delete all file to a GCS bucket."
)
public class DeleteList extends AbstractList implements RunnableTask, ListInterface {
@Schema(
title = "raise an error if the file is not found"
)
@PluginProperty(dynamic = true)
@Builder.Default
private final Boolean errorOnEmpty = false;
@Min(2)
@Schema(
title = "Number of concurrent parallel deletions"
)
@PluginProperty
private Integer concurrent;
@Override
public Output run(RunContext runContext) throws Exception {
Storage connection = this.connection(runContext);
Logger logger = runContext.logger();
URI from = encode(runContext, this.from);
String regExp = runContext.render(this.regExp);
Flux flowable = Flux
.create(emitter -> {
this.iterator(connection, from)
.forEachRemaining(emitter::next);
emitter.complete();
}, FluxSink.OverflowStrategy.BUFFER);
Flux result;
if (this.concurrent != null) {
result = flowable
.parallel(this.concurrent)
.runOn(Schedulers.boundedElastic())
.filter(blob -> this.filter(blob, regExp))
.map(delete(logger, connection))
.sequential();
} else {
result = flowable
.filter(blob -> this.filter(blob, regExp))
.map(delete(logger, connection));
}
Pair finalResult = result
.reduce(Pair.of(0L, 0L), (pair, size) -> Pair.of(pair.getLeft() + 1, pair.getRight() + size))
.block();
runContext.metric(Counter.of("count", finalResult.getLeft()));
runContext.metric(Counter.of("size", finalResult.getRight()));
if (errorOnEmpty && finalResult.getLeft() == 0) {
throw new NoSuchElementException("Unable to find any files to delete on '" + from + "'");
}
logger.info("Deleted {} files for {} bytes", finalResult.getLeft(), finalResult.getValue());
return Output
.builder()
.count(finalResult.getLeft())
.size(finalResult.getRight())
.build();
}
protected boolean filter(com.google.cloud.storage.Blob blob, String regExp) {
return !blob.isDirectory() && super.filter(blob, regExp);
}
private static Function delete(Logger logger, Storage connection) {
return o -> {
logger.debug("Deleting '{}'" , io.kestra.plugin.gcp.gcs.models.Blob.uri(o));
if (connection.delete(BlobId.of(o.getBucket(), o.getName()))) {
return o.getSize();
} else {
return 0L;
}
};
}
@Builder
@Getter
public static class Output implements io.kestra.core.models.tasks.Output {
@Builder.Default
@Schema(
title = "The count of blobs deleted"
)
private final long count = 0;
@Builder.Default
@Schema(
title = "The size of all blobs deleted"
)
private final long size = 0;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy