All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.hazelcast.jet.elastic.ElasticSinkBuilder Maven / Gradle / Ivy

There is a newer version: 5.5.0
Show newest version
/*
 * Copyright 2023 Hazelcast Inc.
 *
 * Licensed under the Hazelcast Community License (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://hazelcast.com/hazelcast-community-license
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.hazelcast.jet.elastic;

import com.hazelcast.function.FunctionEx;
import com.hazelcast.function.SupplierEx;
import com.hazelcast.jet.JetException;
import com.hazelcast.jet.pipeline.Sink;
import com.hazelcast.jet.pipeline.SinkBuilder;
import com.hazelcast.logging.ILogger;
import org.elasticsearch.action.ActionRequest;
import org.elasticsearch.action.DocWriteRequest;
import org.elasticsearch.action.bulk.BulkRequest;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestClientBuilder;
import org.elasticsearch.client.RestHighLevelClient;

import javax.annotation.Nonnull;
import java.io.IOException;
import java.io.Serializable;

import static com.hazelcast.jet.elastic.impl.RetryUtils.withRetry;
import static com.hazelcast.jet.impl.util.Util.checkNonNullAndSerializable;
import static java.util.Objects.requireNonNull;

/**
 * Builder for Elasticsearch Sink
 * 

* The Sink first maps items from the pipeline using the provided * {@link #mapToRequestFn(FunctionEx)} and then using {@link BulkRequest}. *

* {@link BulkRequest#BulkRequest()} is used by default, it can be * modified by providing custom {@link #bulkRequestFn(SupplierEx)} * *

* Usage: *

{@code
 * Sink> elasticSink = new ElasticSinkBuilder>()
 *   .clientFn(() -> ElasticClients.client(host, port))
 *   .mapToRequestFn(item -> new IndexRequest("my-index").source(item))
 *   .build();
 * }
*

* Requires {@link #clientFn(SupplierEx)} and {@link #mapToRequestFn(FunctionEx)}. * * @param * @since Jet 4.2 */ public final class ElasticSinkBuilder implements Serializable { private static final long serialVersionUID = 1L; private static final String DEFAULT_NAME = "elasticSink"; private static final int DEFAULT_LOCAL_PARALLELISM = 2; private static final int DEFAULT_RETRIES = 5; private SupplierEx clientFn; private SupplierEx bulkRequestFn = BulkRequest::new; private FunctionEx> mapToRequestFn; private FunctionEx optionsFn = (request) -> RequestOptions.DEFAULT; private int retries = DEFAULT_RETRIES; /** * Set the client supplier function *

* The connector uses the returned instance to access Elasticsearch. * Also see {@link ElasticClients} for convenience * factory methods. *

* For example, to provide an authenticated client: *

{@code
     * builder.clientFn(() -> client(host, port, username, password))
     * }
* * This parameter is required. * * @param clientFn supplier function returning configured Elasticsearch * REST client */ @Nonnull public ElasticSinkBuilder clientFn(@Nonnull SupplierEx clientFn) { this.clientFn = checkNonNullAndSerializable(clientFn, "clientFn"); return this; } /** * Set the supplier function for BulkRequest, defaults to new {@link BulkRequest#BulkRequest()} *

* For example, to modify the BulkRequest used to index documents: *

{@code
     * builder.bulkRequestFn(() -> new BulkRequest().setRefreshPolicy(IMMEDIATE))
     * }
* * @param bulkRequestFn supplier function for the bulk request * @see * Bulk indexing usage in Elastic documentation */ @Nonnull public ElasticSinkBuilder bulkRequestFn(@Nonnull SupplierEx bulkRequestFn) { this.bulkRequestFn = checkNonNullAndSerializable(bulkRequestFn, "bulkRequestFn"); return this; } /** * Set the function mapping the item from a pipeline item to an index * request *

* For example, to create an IndexRequest for a versioned document: *

{@code
     * builder.mapToRequestFn((mapItem) ->
     *                      new IndexRequest("my-index")
     *                              .source(map)
     *                              .version((Long) map.get("version"))
     * }
* * This parameter is required. * * @param mapToRequestFn maps an item from the stream to an * {@link org.elasticsearch.action.index.IndexRequest}, * {@link org.elasticsearch.action.update.UpdateRequest} * or {@link org.elasticsearch.action.delete.DeleteRequest} * @param type of the items from the pipeline */ @Nonnull @SuppressWarnings("unchecked") public ElasticSinkBuilder mapToRequestFn( @Nonnull FunctionEx> mapToRequestFn ) { ElasticSinkBuilder newThis = (ElasticSinkBuilder) this; newThis.mapToRequestFn = checkNonNullAndSerializable(mapToRequestFn, "mapToRequestFn"); return newThis; } /** * Set the function that provides {@link RequestOptions} *

* It can either return a constant value or a value based on provided request. *

* For example, to provide a custom authentication header: *

{@code
     * sinkBuilder.optionsFn((request) -> {
     *     RequestOptions.Builder builder = RequestOptions.DEFAULT.toBuilder();
     *     builder.addHeader("Authorization", "Bearer " + TOKEN);
     *     return builder.build();
     * })
     * }
* * @param optionsFn function that provides {@link RequestOptions} * @see * RequestOptions in Elastic documentation */ @Nonnull public ElasticSinkBuilder optionsFn(@Nonnull FunctionEx optionsFn) { this.optionsFn = checkNonNullAndSerializable(optionsFn, "optionsFn"); return this; } /** * Number of retries the connector will do in addition to Elastic client * retries * * Elastic client tries to connect to a node only once for each request. * When a request fails the node is marked dead and is not retried again * for the request. This causes problems with single node clusters or in a * situation where whole cluster becomes unavailable at the same time (e.g. * due to a network issue). * * The initial delay is 2s, increasing by factor of 2 with each retry (4s, * 8s, 16s, ..). * * @param retries number of retries, defaults to 5 */ @Nonnull public ElasticSinkBuilder retries(int retries) { if (retries < 0) { throw new IllegalArgumentException("retries must be positive"); } this.retries = retries; return this; } /** * Create a sink that writes data into Elasticsearch based on this builder configuration */ @Nonnull public Sink build() { requireNonNull(clientFn, "clientFn is not set"); requireNonNull(mapToRequestFn, "mapToRequestFn is not set"); return SinkBuilder .sinkBuilder(DEFAULT_NAME, ctx -> new BulkContext(new RestHighLevelClient(clientFn.get()), bulkRequestFn, optionsFn, retries, ctx.logger())) .receiveFn((bulkContext, item) -> bulkContext.add(mapToRequestFn.apply(item))) .flushFn(BulkContext::flush) .destroyFn(BulkContext::close) .preferredLocalParallelism(DEFAULT_LOCAL_PARALLELISM) .build(); } static final class BulkContext { private final RestHighLevelClient client; private final SupplierEx bulkRequestSupplier; private final FunctionEx optionsFn; private final int retries; private BulkRequest bulkRequest; private final ILogger logger; BulkContext( RestHighLevelClient client, SupplierEx bulkRequestSupplier, FunctionEx optionsFn, int retries, ILogger logger ) { this.client = client; this.bulkRequestSupplier = bulkRequestSupplier; this.optionsFn = optionsFn; this.bulkRequest = bulkRequestSupplier.get(); this.retries = retries; this.logger = logger; } void add(DocWriteRequest request) { bulkRequest.add(request); } void flush() throws IOException { if (!bulkRequest.requests().isEmpty()) { withRetry( () -> { BulkResponse response = client.bulk(bulkRequest, optionsFn.apply(bulkRequest)); if (response.hasFailures()) { throw new JetException(response.buildFailureMessage()); } if (logger.isFineEnabled()) { logger.fine("BulkRequest with " + bulkRequest.requests().size() + " requests succeeded"); } return response; }, retries, IOException.class, JetException.class ); bulkRequest = bulkRequestSupplier.get(); } } void close() throws IOException { logger.fine("Closing BulkContext"); try { flush(); } finally { client.close(); } } } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy