All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.arakelian.elastic.bulk.BulkIngester Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.arakelian.elastic.bulk;

import static com.arakelian.elastic.bulk.BulkOperation.Action.DELETE;
import static com.arakelian.elastic.bulk.BulkOperation.Action.INDEX;

import java.io.IOException;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.concurrent.RejectedExecutionException;
import java.util.function.Function;

import com.arakelian.elastic.bulk.BulkOperation.Action;
import com.arakelian.elastic.model.BulkResponse;
import com.google.common.base.MoreObjects;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.LinkedListMultimap;
import com.google.common.collect.Multimap;
import com.google.common.util.concurrent.ListenableFuture;

/**
 * Indexes or deletes a group of documents from one or more Elastic indexes using the Elastic Bulk
 * Index API.
 */
public class BulkIngester {
    /** Name of default indexer **/
    private static final String DEFAULT_INDEXER = "default";

    /** Bulk indexers by name **/
    private final Map bulkIndexers;

    /** Bulk operation factory **/
    private final BulkOperationFactory bulkOperationFactory;

    /** Function that takes an index name and returns the indexer name that should be used **/
    private final Function indexToIndexer;

    public BulkIngester(final BulkOperationFactory factory, final BulkIndexer bulkIndexer) {
        this(factory, ImmutableMap.of(DEFAULT_INDEXER, bulkIndexer), (index) -> DEFAULT_INDEXER);
    }

    public BulkIngester(
            final BulkOperationFactory bulkOperationFactory,
            final Map bulkIndexers,
            final Function indexToIndexer) {
        this.bulkIndexers = Preconditions.checkNotNull(bulkIndexers, "bulkIndexers must be non-null");
        this.bulkOperationFactory = Preconditions
                .checkNotNull(bulkOperationFactory, "bulkOperationFactory must be non-null");
        this.indexToIndexer = Preconditions.checkNotNull(indexToIndexer, "indexToIndexer must be non-null");
        Preconditions.checkArgument(this.bulkIndexers.size() != 0, "Must have at least one bulkIndexer");
    }

    /**
     * Deletes a list of documents from their respective Elastic indexes.
     *
     * @param documents
     *            list of documents to be removed
     * @return an optional future for retrieving bulk responses associated with this request
     * @throws RejectedExecutionException
     *             if indexer is closed or background queue is full
     * @throws IOException
     *             if document could not be serialized
     */
    public Map>>> delete(final Collection documents)
            throws RejectedExecutionException, IOException {
        return delete(documents, false);
    }

    /**
     * Deletes a list of documents from their respective Elastic indexes, and optionally flushes
     * those deletes to Elastic immediately.
     *
     * @param documents
     *            list of documents to be removed
     * @param forceFlush
     *            true to flush associated index so that deletes are immediately processed
     * @return an optional future for retrieving bulk responses associated with this request
     * @throws RejectedExecutionException
     *             if indexer is closed or background queue is full
     * @throws IOException
     *             if document could not be serialized
     */
    public Map>>> delete(
            final Collection documents,
            final boolean forceFlush) throws RejectedExecutionException, IOException {

        if (documents == null || documents.size() == 0) {
            return ImmutableMap.of();
        }

        Multimap batches = null;
        for (final Object document : documents) {
            batches = makeBatch(document, DELETE, batches);
        }

        return dispatch(batches, forceFlush);
    }

    /**
     * Delete specified document from Elastic index.
     *
     * @param document
     *            document to be deleted
     * @return an optional future for retrieving bulk responses associated with this request
     * @throws RejectedExecutionException
     *             if indexer is closed or background queue is full
     * @throws IOException
     *             if document could not be serialized
     */
    public Map>>> delete(final Object document)
            throws RejectedExecutionException, IOException {
        return delete(document, false);
    }

    /**
     * Delete specified document from Elastic index.
     *
     * @param document
     *            document to be deleted
     * @param forceFlush
     *            true to force an immediate flush of data to Elastic
     * @return an optional future for retrieving bulk responses associated with this request
     * @throws RejectedExecutionException
     *             if indexer is closed or background queue is full
     * @throws IOException
     *             if document could not be serialized
     */
    public Map>>> delete(
            final Object document,
            final boolean forceFlush) throws RejectedExecutionException, IOException {

        return dispatch(makeBatch(document, DELETE, null), forceFlush);
    }

    public Map getBulkIndexers() {
        return bulkIndexers;
    }

    public BulkOperationFactory getBulkOperationFactory() {
        return bulkOperationFactory;
    }

    /**
     * Adds a list of documents to the Elastic index without immediate flush.
     *
     * @param documents
     *            list of documents to index
     * @return an optional Future for retrieving bulk responses associated with this request.
     * @throws RejectedExecutionException
     *             if indexer is closed or background queue is full
     * @throws IOException
     *             if document could not be serialized
     */
    public Map>>> index(final Collection documents)
            throws RejectedExecutionException, IOException {
        return index(documents, false);
    }

    /**
     * Adds a list of documents to the Elastic index with optional flush.
     *
     * @param documents
     *            list of documents to index
     * @param forceFlush
     *            true to flush indexer after adding documents
     * @return an optional Future for retrieving bulk responses associated with this request.
     * @throws RejectedExecutionException
     *             if indexer is closed or background queue is full
     * @throws IOException
     *             if document could not be serialized
     */
    public Map>>> index(
            final Collection documents,
            final boolean forceFlush) throws RejectedExecutionException, IOException {
        if (documents == null || documents.size() == 0) {
            return ImmutableMap.of();
        }

        Multimap batches = null;
        for (final Object document : documents) {
            batches = makeBatch(document, INDEX, batches);
        }

        return dispatch(batches, forceFlush);
    }

    /**
     * Adds a document to the Elastic index.
     *
     * @param document
     *            document to be indexed
     * @return an optional Future for retrieving bulk responses associated with this request.
     * @throws RejectedExecutionException
     *             if indexer is closed or background queue is full
     * @throws IOException
     *             if document could not be serialized
     */
    public Map>>> index(final Object document)
            throws RejectedExecutionException, IOException {
        return index(document, false);
    }

    /**
     * Adds a document to the Elastic index.
     *
     * @param document
     *            document to be indexed
     * @param forceFlush
     *            true to force an immediate flush of data to Elastic
     * @return an optional Future for retrieving bulk responses associated with this request.
     * @throws RejectedExecutionException
     *             if indexer is closed or background queue is full
     * @throws IOException
     *             if document could not be serialized
     */
    public Map>>> index(
            final Object document,
            final boolean forceFlush) throws RejectedExecutionException, IOException {

        return dispatch(makeBatch(document, INDEX, null), forceFlush);
    }

    @Override
    public String toString() {
        return MoreObjects.toStringHelper(this) //
                .omitNullValues() //
                .toString();
    }

    private Map>>> dispatch(
            final Multimap batches,
            final boolean forceFlush) {
        final ImmutableMap.Builder>>> map = ImmutableMap
                .builder();
        for (final String indexerName : batches.keySet()) {
            final BulkIndexer bulkIndexer = bulkIndexers.get(indexerName);
            final Collection batch = batches.get(indexerName);
            map.put(indexerName, bulkIndexer.add(ImmutableList.copyOf(batch), forceFlush));
        }
        return map.build();
    }

    /**
     * Adds a bulk operation to the queue, using the given document and specified action.
     *
     * @param document
     *            document
     * @param action
     *            action to be performed on document
     * @param forceFlush
     *            true to force an immediate flush of data to Elastic
     * @throws RejectedExecutionException
     *             if indexer is closed or background queue is full
     * @throws IOException
     *             if document could not be serialized
     */
    private Multimap makeBatch(
            final Object document,
            final Action action,
            Multimap batches) throws RejectedExecutionException, IOException {
        if (document == null) {
            return batches;
        }

        // a document may be indexed to multiple places
        if (!bulkOperationFactory.supports(document)) {
            throw new IOException("Unsupported document: " + document);
        }

        if (batches == null) {
            batches = LinkedListMultimap.create();
        }

        final List ops = bulkOperationFactory.createBulkOperations(document, action);
        for (final BulkOperation op : ops) {
            final String indexerName = indexToIndexer.apply(op.getIndex().getName());
            batches.put(indexerName, op);
        }
        return batches;
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy