com.hazelcast.jet.elastic.ElasticSourceBuilder Maven / Gradle / Ivy
Show all versions of hazelcast-jet-elasticsearch-7 Show documentation
/*
* Copyright 2023 Hazelcast Inc.
*
* Licensed under the Hazelcast Community License (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://hazelcast.com/hazelcast-community-license
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.hazelcast.jet.elastic;
import com.hazelcast.function.FunctionEx;
import com.hazelcast.function.SupplierEx;
import com.hazelcast.jet.elastic.impl.ElasticSourceConfiguration;
import com.hazelcast.jet.elastic.impl.ElasticSourcePMetaSupplier;
import com.hazelcast.jet.pipeline.BatchSource;
import com.hazelcast.jet.pipeline.Sources;
import org.elasticsearch.action.ActionRequest;
import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestClientBuilder;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.search.SearchHit;
import javax.annotation.Nonnull;
import static com.hazelcast.jet.impl.util.Util.checkNonNullAndSerializable;
import static com.hazelcast.jet.impl.util.Util.checkSerializable;
import static java.util.Objects.requireNonNull;
/**
* Builder for Elasticsearch source which reads data from Elasticsearch and
* converts SearchHits using provided {@code mapToItemFn}
*
* Usage:
*
{@code
* BatchSource source = new ElasticSourceBuilder()
* .clientFn(() -> client(host, port))
* .searchRequestFn(() -> new SearchRequest("my-index"))
* .mapToItemFn(SearchHit::getSourceAsString)
* .build();
*
* BatchStage stage = p.readFrom(source);
* }
*
* Requires {@link #clientFn(SupplierEx)},
* {@link #searchRequestFn(SupplierEx)} and {@link #mapToItemFn(FunctionEx)}.
*
* @param type of the output of the mapping function from {@link SearchHit} -> T
* @since Jet 4.2
*/
public final class ElasticSourceBuilder {
private static final String DEFAULT_NAME = "elasticSource";
private static final int DEFAULT_RETRIES = 5;
private SupplierEx clientFn;
private SupplierEx searchRequestFn;
private FunctionEx super ActionRequest, RequestOptions> optionsFn = request -> RequestOptions.DEFAULT;
private FunctionEx super SearchHit, T> mapToItemFn;
private boolean slicing;
private boolean coLocatedReading;
private String scrollKeepAlive = "1m"; // Using String because it needs to be Serializable
private int retries = DEFAULT_RETRIES;
/**
* Build Elasticsearch {@link BatchSource} with supplied parameters
*
* @return configured source which is to be used in the pipeline
*/
@Nonnull
public BatchSource build() {
requireNonNull(clientFn, "clientFn must be set");
requireNonNull(searchRequestFn, "searchRequestFn must be set");
requireNonNull(mapToItemFn, "mapToItemFn must be set");
ElasticSourceConfiguration configuration = new ElasticSourceConfiguration<>(
restHighLevelClientFn(clientFn),
searchRequestFn, optionsFn, mapToItemFn, slicing, coLocatedReading,
scrollKeepAlive, retries
);
ElasticSourcePMetaSupplier metaSupplier = new ElasticSourcePMetaSupplier<>(configuration);
return Sources.batchFromProcessor(DEFAULT_NAME, metaSupplier);
}
// Don't inline - it would capture this.clientFn and would need to serialize whole builder instance
private SupplierEx restHighLevelClientFn(SupplierEx clientFn) {
return () -> new RestHighLevelClient(clientFn.get());
}
/**
* Set the client supplier function
*
* The connector uses the returned instance to access Elasticsearch.
* Also see {@link ElasticClients} for convenience
* factory methods.
*
* For example, to provide an authenticated client:
*
{@code
* builder.clientFn(() -> client(host, port, username, password))
* }
*
* This parameter is required.
*
* @param clientFn supplier function returning configured Elasticsearch
* REST client
*/
@Nonnull
public ElasticSourceBuilder clientFn(@Nonnull SupplierEx clientFn) {
this.clientFn = checkNonNullAndSerializable(clientFn, "clientFn");
return this;
}
/**
* Set the search request supplier function
*
* The connector executes this search request to retrieve documents
* from Elasticsearch.
*
* For example, to create SearchRequest limited to an index `logs`:
*
{@code
* builder.searchRequestFn(() -> new SearchRequest("logs"))
* }
*
* This parameter is required.
*
* @param searchRequestFn search request supplier function
*/
@Nonnull
public ElasticSourceBuilder searchRequestFn(@Nonnull SupplierEx searchRequestFn) {
this.searchRequestFn = checkSerializable(searchRequestFn, "searchRequestFn");
return this;
}
/**
* Set the function to map SearchHit to a pipeline item
*
* For example, to map a SearchHit to a value of a field `productId`:
*
{@code
* builder.mapToItemFn(hit -> (String) hit.getSourceAsMap().get("productId"))
* }
*
* This parameter is required.
*
* @param mapToItemFn maps search hits to output items
*/
@Nonnull
@SuppressWarnings("unchecked")
public ElasticSourceBuilder mapToItemFn(@Nonnull FunctionEx super SearchHit, T_NEW> mapToItemFn) {
ElasticSourceBuilder newThis = (ElasticSourceBuilder) this;
newThis.mapToItemFn = checkSerializable(mapToItemFn, "mapToItemFn");
return newThis;
}
/**
* Set the function that provides {@link RequestOptions}
*
* It can either return a constant value or a value based on provided request.
*
* For example, use this to provide a custom authentication header:
*
{@code
* sourceBuilder.optionsFn((request) -> {
* RequestOptions.Builder builder = RequestOptions.DEFAULT.toBuilder();
* builder.addHeader("Authorization", "Bearer " + TOKEN);
* return builder.build();
* })
* }
*
* @param optionsFn function that provides {@link RequestOptions}
* @see
* RequestOptions in Elastic documentation
*/
@Nonnull
public ElasticSourceBuilder optionsFn(@Nonnull FunctionEx super ActionRequest, RequestOptions> optionsFn) {
this.optionsFn = checkSerializable(optionsFn, "optionsFn");
return this;
}
/**
* Enable slicing
*
* Number of slices is equal to {@code globalParallelism
* (localParallelism * numberOfNodes)} when only slicing is enabled. When
* co-located reading is enabled as well then number of slices for
* particular node is equal to {@code localParallelism}.
*
* Use this option to read from multiple shards in parallel. It can
* also be used on single shard, but it may increase initial latency.
* See Elastic documentation for
*
* Sliced Scroll for details.
*/
@Nonnull
public ElasticSourceBuilder enableSlicing() {
this.slicing = true;
return this;
}
/**
* Enable co-located reading
*
* Jet cluster member must run exactly on the same nodes as Elastic cluster.
*/
@Nonnull
public ElasticSourceBuilder enableCoLocatedReading() {
this.coLocatedReading = true;
return this;
}
/**
* Set the keepAlive for Elastic search scroll
*
* The value must be in Elastic time unit format, e.g. 500ms for 500 milliseconds, 30s for 30 seconds,
* 5m for 5 minutes. See {@link SearchRequest#scroll(String)}.
*
* @param scrollKeepAlive keepAlive value, this must be high enough to
* process all results from a single scroll, default
* value 1m
*/
@Nonnull
public ElasticSourceBuilder scrollKeepAlive(@Nonnull String scrollKeepAlive) {
this.scrollKeepAlive = requireNonNull(scrollKeepAlive, scrollKeepAlive);
return this;
}
/**
* Number of retries the connector will do in addition to Elastic
* client retries
*
* Elastic client tries to connect to a node only once for each
* request. When a request fails the node is marked dead and is
* not retried again for the request. This causes problems with
* single node clusters or in a situation where whole cluster
* becomes unavailable at the same time (e.g. due to a network
* issue).
*
* The initial delay is 2s, increasing by factor of 2 with each retry (4s, 8s, 16s, ..).
*
* @param retries number of retries, defaults to 5
*/
@Nonnull
public ElasticSourceBuilder retries(int retries) {
if (retries < 0) {
throw new IllegalArgumentException("retries must be positive");
}
this.retries = retries;
return this;
}
}