All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.hazelcast.jet.elastic.ElasticSourceBuilder Maven / Gradle / Ivy

There is a newer version: 5.5.0
Show newest version
/*
 * Copyright 2023 Hazelcast Inc.
 *
 * Licensed under the Hazelcast Community License (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://hazelcast.com/hazelcast-community-license
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.hazelcast.jet.elastic;

import com.hazelcast.function.FunctionEx;
import com.hazelcast.function.SupplierEx;
import com.hazelcast.jet.elastic.impl.ElasticSourceConfiguration;
import com.hazelcast.jet.elastic.impl.ElasticSourcePMetaSupplier;
import com.hazelcast.jet.pipeline.BatchSource;
import com.hazelcast.jet.pipeline.Sources;
import org.elasticsearch.action.ActionRequest;
import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestClientBuilder;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.search.SearchHit;

import javax.annotation.Nonnull;

import static com.hazelcast.jet.impl.util.Util.checkNonNullAndSerializable;
import static com.hazelcast.jet.impl.util.Util.checkSerializable;
import static java.util.Objects.requireNonNull;

/**
 * Builder for Elasticsearch source which reads data from Elasticsearch and
 * converts SearchHits using provided {@code mapToItemFn}
 * 

* Usage: *

{@code
 * BatchSource source = new ElasticSourceBuilder()
 *   .clientFn(() -> client(host, port))
 *   .searchRequestFn(() -> new SearchRequest("my-index"))
 *   .mapToItemFn(SearchHit::getSourceAsString)
 *   .build();
 *
 * BatchStage stage = p.readFrom(source);
 * }
* * Requires {@link #clientFn(SupplierEx)}, * {@link #searchRequestFn(SupplierEx)} and {@link #mapToItemFn(FunctionEx)}. * * @param type of the output of the mapping function from {@link SearchHit} -> T * @since Jet 4.2 */ public final class ElasticSourceBuilder { private static final String DEFAULT_NAME = "elasticSource"; private static final int DEFAULT_RETRIES = 5; private SupplierEx clientFn; private SupplierEx searchRequestFn; private FunctionEx optionsFn = request -> RequestOptions.DEFAULT; private FunctionEx mapToItemFn; private boolean slicing; private boolean coLocatedReading; private String scrollKeepAlive = "1m"; // Using String because it needs to be Serializable private int retries = DEFAULT_RETRIES; /** * Build Elasticsearch {@link BatchSource} with supplied parameters * * @return configured source which is to be used in the pipeline */ @Nonnull public BatchSource build() { requireNonNull(clientFn, "clientFn must be set"); requireNonNull(searchRequestFn, "searchRequestFn must be set"); requireNonNull(mapToItemFn, "mapToItemFn must be set"); ElasticSourceConfiguration configuration = new ElasticSourceConfiguration<>( restHighLevelClientFn(clientFn), searchRequestFn, optionsFn, mapToItemFn, slicing, coLocatedReading, scrollKeepAlive, retries ); ElasticSourcePMetaSupplier metaSupplier = new ElasticSourcePMetaSupplier<>(configuration); return Sources.batchFromProcessor(DEFAULT_NAME, metaSupplier); } // Don't inline - it would capture this.clientFn and would need to serialize whole builder instance private SupplierEx restHighLevelClientFn(SupplierEx clientFn) { return () -> new RestHighLevelClient(clientFn.get()); } /** * Set the client supplier function *

* The connector uses the returned instance to access Elasticsearch. * Also see {@link ElasticClients} for convenience * factory methods. *

* For example, to provide an authenticated client: *

{@code
     * builder.clientFn(() -> client(host, port, username, password))
     * }
* * This parameter is required. * * @param clientFn supplier function returning configured Elasticsearch * REST client */ @Nonnull public ElasticSourceBuilder clientFn(@Nonnull SupplierEx clientFn) { this.clientFn = checkNonNullAndSerializable(clientFn, "clientFn"); return this; } /** * Set the search request supplier function *

* The connector executes this search request to retrieve documents * from Elasticsearch. *

* For example, to create SearchRequest limited to an index `logs`: *

{@code
     * builder.searchRequestFn(() -> new SearchRequest("logs"))
     * }
* * This parameter is required. * * @param searchRequestFn search request supplier function */ @Nonnull public ElasticSourceBuilder searchRequestFn(@Nonnull SupplierEx searchRequestFn) { this.searchRequestFn = checkSerializable(searchRequestFn, "searchRequestFn"); return this; } /** * Set the function to map SearchHit to a pipeline item *

* For example, to map a SearchHit to a value of a field `productId`: *

{@code
     * builder.mapToItemFn(hit -> (String) hit.getSourceAsMap().get("productId"))
     * }
* * This parameter is required. * * @param mapToItemFn maps search hits to output items */ @Nonnull @SuppressWarnings("unchecked") public ElasticSourceBuilder mapToItemFn(@Nonnull FunctionEx mapToItemFn) { ElasticSourceBuilder newThis = (ElasticSourceBuilder) this; newThis.mapToItemFn = checkSerializable(mapToItemFn, "mapToItemFn"); return newThis; } /** * Set the function that provides {@link RequestOptions} *

* It can either return a constant value or a value based on provided request. *

* For example, use this to provide a custom authentication header: *

{@code
     * sourceBuilder.optionsFn((request) -> {
     *     RequestOptions.Builder builder = RequestOptions.DEFAULT.toBuilder();
     *     builder.addHeader("Authorization", "Bearer " + TOKEN);
     *     return builder.build();
     * })
     * }
* * @param optionsFn function that provides {@link RequestOptions} * @see * RequestOptions in Elastic documentation */ @Nonnull public ElasticSourceBuilder optionsFn(@Nonnull FunctionEx optionsFn) { this.optionsFn = checkSerializable(optionsFn, "optionsFn"); return this; } /** * Enable slicing *

* Number of slices is equal to {@code globalParallelism * (localParallelism * numberOfNodes)} when only slicing is enabled. When * co-located reading is enabled as well then number of slices for * particular node is equal to {@code localParallelism}. *

* Use this option to read from multiple shards in parallel. It can * also be used on single shard, but it may increase initial latency. * See Elastic documentation for * * Sliced Scroll for details. */ @Nonnull public ElasticSourceBuilder enableSlicing() { this.slicing = true; return this; } /** * Enable co-located reading * * Jet cluster member must run exactly on the same nodes as Elastic cluster. */ @Nonnull public ElasticSourceBuilder enableCoLocatedReading() { this.coLocatedReading = true; return this; } /** * Set the keepAlive for Elastic search scroll *

* The value must be in Elastic time unit format, e.g. 500ms for 500 milliseconds, 30s for 30 seconds, * 5m for 5 minutes. See {@link SearchRequest#scroll(String)}. * * @param scrollKeepAlive keepAlive value, this must be high enough to * process all results from a single scroll, default * value 1m */ @Nonnull public ElasticSourceBuilder scrollKeepAlive(@Nonnull String scrollKeepAlive) { this.scrollKeepAlive = requireNonNull(scrollKeepAlive, scrollKeepAlive); return this; } /** * Number of retries the connector will do in addition to Elastic * client retries * * Elastic client tries to connect to a node only once for each * request. When a request fails the node is marked dead and is * not retried again for the request. This causes problems with * single node clusters or in a situation where whole cluster * becomes unavailable at the same time (e.g. due to a network * issue). * * The initial delay is 2s, increasing by factor of 2 with each retry (4s, 8s, 16s, ..). * * @param retries number of retries, defaults to 5 */ @Nonnull public ElasticSourceBuilder retries(int retries) { if (retries < 0) { throw new IllegalArgumentException("retries must be positive"); } this.retries = retries; return this; } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy