All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.kestra.plugin.elasticsearch.Search Maven / Gradle / Ivy

The newest version!
package io.kestra.plugin.elasticsearch;

import io.kestra.core.models.annotations.Example;
import io.kestra.core.models.annotations.Plugin;
import io.kestra.core.models.annotations.PluginProperty;
import io.kestra.core.models.executions.metrics.Counter;
import io.kestra.core.models.executions.metrics.Timer;
import io.kestra.core.models.tasks.RunnableTask;
import io.kestra.core.models.tasks.common.FetchType;
import io.kestra.core.runners.RunContext;
import io.kestra.core.serializers.FileSerde;
import io.swagger.v3.oas.annotations.media.Schema;
import lombok.*;
import lombok.experimental.SuperBuilder;
import org.apache.commons.lang3.tuple.Pair;
import org.opensearch.client.opensearch.OpenSearchClient;
import org.opensearch.client.opensearch.core.SearchRequest;
import org.opensearch.client.opensearch.core.SearchResponse;
import org.opensearch.client.transport.rest_client.RestClientTransport;
import org.slf4j.Logger;
import reactor.core.publisher.Flux;
import reactor.core.publisher.Mono;

import java.io.*;
import java.net.URI;
import java.time.Duration;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;

import static io.kestra.core.utils.Rethrow.throwConsumer;

@SuperBuilder
@ToString
@EqualsAndHashCode
@Getter
@NoArgsConstructor
@Schema(
    title = "Send a search request.",
    description = "Get all documents from a search request and store it as outputs."
)
@Plugin(
    examples = {
        @Example(
            full = true,
            code = """
                id: elasticsearch_search
                namespace: company.team
                
                tasks:
                  - id: search
                    type: io.kestra.plugin.elasticsearch.Search
                    connection:
                      hosts: 
                        - "http://localhost:9200"
                    indexes:
                      - "my_index"
                    request:
                      query: 
                        term:
                          name:
                            value: 'john'
                """
        )
    }
)
public class Search extends AbstractSearch implements RunnableTask {
    @Schema(
        title = "The way you want to store the data.",
        description = "FETCH_ONE output the first row, "
            + "FETCH output all the rows, "
            + "STORE store all rows in a file, "
            + "NONE do nothing."
    )
    @Builder.Default
    @PluginProperty
    private FetchType fetchType = FetchType.FETCH;

    @Override
    public Search.Output run(RunContext runContext) throws Exception {
        Logger logger = runContext.logger();

        try (RestClientTransport transport = this.connection.client(runContext)) {
            OpenSearchClient client = new OpenSearchClient(transport);
            // build request
            SearchRequest.Builder request = this.request(runContext, transport);
            logger.debug("Starting query: {}", request);

            SearchResponse searchResponse = client.search(request.build(), Map.class);

            Output.OutputBuilder outputBuilder = Search.Output.builder();

            switch (fetchType) {
                case FETCH:
                    Pair>, Integer> fetch = this.fetch(searchResponse);
                    outputBuilder
                        .rows(fetch.getLeft())
                        .size(fetch.getRight());
                    break;

                case FETCH_ONE:
                    var o = this.fetchOne(searchResponse);

                    outputBuilder
                        .row(o)
                        .size(o != null ? 1 : 0);
                    break;

                case STORE:
                    Pair store = this.store(runContext, searchResponse);
                    outputBuilder
                        .uri(store.getLeft())
                        .size(store.getRight().intValue());
                    break;
            }

            // metrics
            runContext.metric(Counter.of("requests.count", 1));
            runContext.metric(Counter.of("records", searchResponse.hits().hits().size()));
            runContext.metric(Timer.of("requests.duration", Duration.ofNanos(searchResponse.took())));

            // outputs
            return outputBuilder
                .total(searchResponse.hits().total().value())
                .build();
        }
    }


    protected Pair store(RunContext runContext, SearchResponse searchResponse) throws IOException {
        File tempFile = runContext.workingDir().createTempFile(".ion").toFile();

        try (var output = new BufferedWriter(new FileWriter(tempFile), FileSerde.BUFFER_SIZE)) {
            Flux hitFlux = Flux.fromIterable(searchResponse.hits().hits()).map(hit -> hit.source());
            Long count = FileSerde.writeAll(output, hitFlux).block();

            return Pair.of(
                runContext.storage().putFile(tempFile),
                count
            );
        }
    }

    protected Pair>, Integer> fetch(SearchResponse searchResponse) {
        List> result = new ArrayList<>();

        searchResponse.hits().hits()
            .forEach(throwConsumer(docs -> result.add(docs.source())));

        return Pair.of(result, searchResponse.hits().hits().size());
    }

    protected Map fetchOne(SearchResponse searchResponse) {
        if (searchResponse.hits().hits().isEmpty()) {
            return null;
        }

        return searchResponse.hits().hits().getFirst().source();
    }

    @Builder
    @Getter
    public static class Output implements io.kestra.core.models.tasks.Output {
        @Schema(
            title = "The size of the rows fetched."
        )
        private Integer size;

        @Schema(
            title = "The total of the rows fetched without pagination."
        )
        private Long total;

        @Schema(
            title = "List containing the fetched data.",
            description = "Only populated if using `fetchType=FETCH`."
        )
        private List> rows;

        @Schema(
            title = "Map containing the first row of fetched data.",
            description = "Only populated if using `fetchType=FETCH_ONE`."
        )
        private Map row;

        @Schema(
            title = "The URI of the stored data.",
            description = "Only populated if using `fetchType=STORE`."
        )
        private URI uri;
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy