io.kestra.plugin.elasticsearch.AbstractLoad Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of plugin-elasticsearch Show documentation
Connect Elasticsearch search and analytics engine to Kestra workflows.
There is a newer version: 0.19.0
package io.kestra.plugin.elasticsearch;

import io.kestra.core.models.annotations.PluginProperty;
import io.kestra.core.models.executions.metrics.Counter;
import io.kestra.core.models.executions.metrics.Timer;
import io.kestra.core.models.tasks.RunnableTask;
import io.kestra.core.runners.RunContext;
import io.reactivex.Flowable;
import io.swagger.v3.oas.annotations.media.Schema;
import lombok.*;
import lombok.experimental.SuperBuilder;
import org.opensearch.action.DocWriteRequest;
import org.opensearch.action.bulk.BulkRequest;
import org.opensearch.action.bulk.BulkResponse;
import org.opensearch.client.RequestOptions;
import org.opensearch.client.RestHighLevelClient;
import org.slf4j.Logger;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.URI;
import java.time.Duration;
import java.util.concurrent.atomic.AtomicLong;
import javax.validation.constraints.NotNull;

@SuperBuilder
@ToString
@EqualsAndHashCode
@Getter
@NoArgsConstructor
public abstract class AbstractLoad extends AbstractTask implements RunnableTask {
    @Schema(
        title = "The source file"
    )
    @PluginProperty(dynamic = true)
    @NotNull
    private String from;

    @Schema(
        title = "The size of chunk for every bulk request"
    )
    @PluginProperty(dynamic = true)
    @Builder.Default
    private Integer chunk = 1000;

    abstract protected Flowable> source(RunContext runContext, BufferedReader inputStream);

    @Override
    public AbstractLoad.Output run(RunContext runContext) throws Exception {
        Logger logger = runContext.logger();
        URI from = new URI(runContext.render(this.from));

        try (
            RestHighLevelClient client = this.connection.client(runContext);
            BufferedReader inputStream = new BufferedReader(new InputStreamReader(runContext.uriToInputStream(from)))
        ) {
            AtomicLong count = new AtomicLong();
            AtomicLong duration = new AtomicLong();

            Flowable flowable = this.source(runContext, inputStream)
                .doOnNext(docWriteRequest -> {
                    count.incrementAndGet();
                })
                .buffer(this.chunk, this.chunk)
                .map(indexRequests -> {
                    BulkRequest bulkRequest = new BulkRequest();
                    indexRequests.forEach(bulkRequest::add);

                    return client.bulk(bulkRequest, RequestOptions.DEFAULT);
                })
                .doOnNext(bulkItemResponse -> {
                    duration.addAndGet(bulkItemResponse.getTook().nanos());

                    if (bulkItemResponse.hasFailures()) {
                        throw new IOException("Indexer failed bulk '" + bulkItemResponse.buildFailureMessage() + "'");
                    }
                });

            // metrics & finalize
            Long requestCount = flowable.count().blockingGet();
            runContext.metric(Counter.of("requests.count", requestCount));
            runContext.metric(Counter.of("records", count.get()));
            runContext.metric(Timer.of("requests.duration", Duration.ofNanos(duration.get())));

            logger.info(
                "Successfully send {} requests for {} records in {}",
                requestCount,
                count.get(),
                Duration.ofNanos(duration.get())
            );

            return Output.builder()
                .size(count.get())
                .build();
        }
    }

    @Builder
    @Getter
    public static class Output implements io.kestra.core.models.tasks.Output {
        @Schema(
            title = "The size of the rows fetch"
        )
        private Long size;
    }
}