All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.elasticsearch.river.mongodb.MongoDBRiverBulkProcessor Maven / Gradle / Ivy

The newest version!
package org.elasticsearch.river.mongodb;

import static org.elasticsearch.client.Requests.deleteRequest;
import static org.elasticsearch.client.Requests.indexRequest;

import java.io.IOException;
import java.util.Date;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.concurrent.Semaphore;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;

import org.elasticsearch.action.ActionRequest;
import org.elasticsearch.action.ActionRequestValidationException;
import org.elasticsearch.action.admin.cluster.node.info.NodeInfo;
import org.elasticsearch.action.admin.cluster.node.info.NodesInfoResponse;
import org.elasticsearch.action.admin.cluster.node.stats.NodeStats;
import org.elasticsearch.action.admin.cluster.node.stats.NodesStatsResponse;
import org.elasticsearch.action.admin.indices.mapping.put.PutMappingResponse;
import org.elasticsearch.action.bulk.BulkProcessor;
import org.elasticsearch.action.bulk.BulkProcessor.Listener;
import org.elasticsearch.action.bulk.BulkRequest;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.client.Client;
import org.elasticsearch.cluster.metadata.MappingMetaData;
import org.elasticsearch.common.collect.ImmutableMap;
import org.elasticsearch.common.collect.ImmutableOpenMap;
import org.elasticsearch.common.collect.Maps;
import org.elasticsearch.common.logging.ESLogger;
import org.elasticsearch.common.logging.ESLoggerFactory;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.river.mongodb.util.MongoDBRiverHelper;
import org.elasticsearch.threadpool.ThreadPool.Info;
import org.elasticsearch.threadpool.ThreadPoolStats.Stats;

public class MongoDBRiverBulkProcessor {

    public static final long DEFAULT_BULK_QUEUE_SIZE = 50;
    public static final Map DROP_INDEX = ImmutableMap.of("dropIndex", Boolean.TRUE);
    private final ESLogger logger = ESLoggerFactory.getLogger(this.getClass().getName());
    private final MongoDBRiver river;
    private final MongoDBRiverDefinition definition;
    private final Client client;
    private final BulkProcessor bulkProcessor;
    private final String index;
    private final String type;

    private final AtomicBoolean flushBulkProcessor = new AtomicBoolean();
    private final AtomicInteger deletedDocuments = new AtomicInteger();
    private final AtomicInteger insertedDocuments = new AtomicInteger();
    private final AtomicInteger updatedDocuments = new AtomicInteger();
    private final AtomicLong documentCount = new AtomicLong();
    private final static Semaphore semaphore = new Semaphore(1);

    private final long bulkQueueSize;
    
    public static class Builder {

        private final MongoDBRiver river;
        private final MongoDBRiverDefinition definition;
        private final Client client;
        private String index;
        private String type;

        public Builder(MongoDBRiver river, MongoDBRiverDefinition definition, Client client, String index, String type) {
            this.river = river;
            this.definition = definition;
            this.client = client;
            this.index = index;
            this.type = type;
        }

        public MongoDBRiverBulkProcessor build() {
            return new MongoDBRiverBulkProcessor(river, definition, client, index, type);
        }
    }

    private final BulkProcessor.Listener listener = new Listener() {

        @Override
        public void beforeBulk(long executionId, BulkRequest request) {
            checkBulkProcessorAvailability();
            logger.trace("beforeBulk - new bulk [{}] of items [{}]", executionId, request.numberOfActions());
            if (flushBulkProcessor.get()) {
                logger.info("About to flush bulk request index[{}] - type[{}]", index, type);
                int dropDollectionIndex = findLastDropCollection(request.requests());
                request.requests().subList(0, dropDollectionIndex + 1).clear();
                try {
                    dropRecreateMapping();
                    deletedDocuments.set(0);
                    updatedDocuments.set(0);
                    insertedDocuments.set(0);
                    flushBulkProcessor.set(false);
                } catch (Throwable t) {
                    logger.error("Drop collection operation failed", t);
                    MongoDBRiverHelper.setRiverStatus(client, definition.getRiverName(), Status.IMPORT_FAILED);
                    request.requests().clear();
                    bulkProcessor.close();
                    river.close();
                }
            }
        }

        @SuppressWarnings("rawtypes")
        private int findLastDropCollection(List request) {
            int index = 0;
            for (int i = 0; i < request.size(); i++) {
                ActionRequest action = request.get(i);
                if (action instanceof IndexRequest) {
                    Map source = ((IndexRequest) action).sourceAsMap();
                    if (source.equals(DROP_INDEX)) {
                        index = i;
                    }
                }
            }
            return index;
        }

        @Override
        public void afterBulk(long executionId, BulkRequest request, Throwable failure) {
            if (failure.getClass().equals(ActionRequestValidationException.class)) {
                if (logger.isTraceEnabled()) {
                    logger.trace("Ignore ActionRequestValidationException : {}", failure);
                }
            } else {
                logger.error("afterBulk - Bulk request failed: {} - {} - {}", executionId, request, failure);
                MongoDBRiverHelper.setRiverStatus(client, definition.getRiverName(), Status.IMPORT_FAILED);
                request.requests().clear();
                bulkProcessor.close();
                river.close();
            }
        }

        @Override
        public void afterBulk(long executionId, BulkRequest request, BulkResponse response) {
            if (response.hasFailures()) {
                logger.error("Bulk processor failed. {}", response.buildFailureMessage());
                MongoDBRiverHelper.setRiverStatus(client, definition.getRiverName(), Status.IMPORT_FAILED);
                request.requests().clear();
                bulkProcessor.close();
                river.close();
            } else {
                documentCount.addAndGet(response.getItems().length);
                logStatistics(response.getTookInMillis());
                deletedDocuments.set(0);
                updatedDocuments.set(0);
                insertedDocuments.set(0);
                if (logger.isTraceEnabled()) {
                    logger.trace("afterBulk - bulk [{}] success [{} items] [{} ms] total [{}]", executionId, response.getItems().length,
                            response.getTookInMillis(), documentCount.get());
                }
            }
        }
    };

    MongoDBRiverBulkProcessor(MongoDBRiver river, MongoDBRiverDefinition definition, Client client, String index, String type) {
        this.river = river;
        this.bulkProcessor = BulkProcessor.builder(client, listener).setBulkActions(definition.getBulk().getBulkActions())
                .setConcurrentRequests(definition.getBulk().getConcurrentRequests())
                .setFlushInterval(definition.getBulk().getFlushInterval()).setBulkSize(definition.getBulk().getBulkSize()).build();
        this.definition = definition;
        this.client = client;
        this.index = index;
        this.type = type;
        this.bulkQueueSize = getBulkQueueSize();
    }

    public void dropIndex() {
        addBulkRequest(null, DROP_INDEX, null, null);
        flushBulkProcessor.set(true);
    }

    public void addBulkRequest(String id, Map source, String routing, String parent) {
        bulkProcessor.add(indexRequest(index).type(type).id(id).source(source).routing(routing).parent(parent));
        insertedDocuments.incrementAndGet();
    }

    public void addBulkRequest(String id, XContentBuilder source, String routing, String parent) {
        bulkProcessor.add(indexRequest(index).type(type).id(id).source(source).routing(routing).parent(parent));
        insertedDocuments.incrementAndGet();
    }

    // public void updateBulkRequest(String id, XContentBuilder source, String
    // routing, String parent) {
    // deleteBulkRequest(id, routing, parent);
    // bulkProcessor.add(indexRequest(index).type(type).id(id).source(source).routing(routing).parent(parent));
    // updatedDocuments.incrementAndGet();
    // }

    public void deleteBulkRequest(String id, String routing, String parent) {
        logger.trace("deleteBulkRequest - id: {} - index: {} - type: {} - routing: {} - parent: {}", id, index, type, routing, parent);
        bulkProcessor.add(deleteRequest(index).type(type).id(id).routing(routing).parent(parent));
        deletedDocuments.incrementAndGet();
    }

    public BulkProcessor getBulkProcessor() {
        return bulkProcessor;
    }

    private void checkBulkProcessorAvailability() {
        while (!isBulkProcessorAvailable()) {
            try {
                if (logger.isInfoEnabled()) {
                    logger.info("Waiting for bulk queue to empty...");
                }
                Thread.sleep(2000);
            } catch (InterruptedException e) {
                logger.warn("checkIndexStatistics interrupted", e);
            }
        }
    }

    private long getBulkQueueSize() {
        NodesInfoResponse response = client.admin().cluster().prepareNodesInfo().setThreadPool(true).get();
        for (NodeInfo node : response.getNodes()) {
            Iterator iterator = node.getThreadPool().iterator();
            while (iterator.hasNext()) {
                Info info = iterator.next();
                if ("bulk".equals(info.getName())) {
                    return info.getQueueSize().getSingles();
                }
            }
        }
        return DEFAULT_BULK_QUEUE_SIZE;
    }
    private boolean isBulkProcessorAvailable() {
        NodesStatsResponse response = client.admin().cluster().prepareNodesStats().setThreadPool(true).get();
        for (NodeStats nodeStats : response.getNodes()) {
            Iterator iterator = nodeStats.getThreadPool().iterator();
            while (iterator.hasNext()) {
                Stats stats = iterator.next();
                if ("bulk".equals(stats.getName())) {
                    int queue = stats.getQueue();
                    logger.trace("bulkQueueSize [{}] - queue [{}] - availability [{}]", bulkQueueSize, queue, 1 - (queue / bulkQueueSize));
                    return 1 - (queue / bulkQueueSize) > 0.1;
                }
            }
        }
        return true;
    }

    private void dropRecreateMapping() throws IOException, InterruptedException {
        try {
            semaphore.acquire();
            logger.trace("dropRecreateMapping index[{}] - type[{}]", index, type);
            client.admin().indices().prepareRefresh(index).get();
            ImmutableOpenMap mappings = client.admin().cluster().prepareState().get().getState().getMetaData()
                    .index(index).mappings();
            logger.trace("mappings contains type {}: {}", type, mappings.containsKey(type));
            if (mappings.containsKey(type)) {
                /*
                 * Issue #105 - Mapping changing from custom mapping to dynamic
                 * when drop_collection = true Should capture the existing
                 * mapping metadata (in case it is has been customized before to
                 * delete.
                 */
                MappingMetaData mapping = mappings.get(type);
                if (client.admin().indices().prepareDeleteMapping(index).setType(type).get().isAcknowledged()) {
                    PutMappingResponse pmr = client.admin().indices().preparePutMapping(index).setType(type)
                            .setSource(mapping.getSourceAsMap()).get();
                    if (!pmr.isAcknowledged()) {
                        logger.error("Failed to put mapping {} / {} / {}.", index, type, mapping.source());
                    } else {
                        logger.info("Delete and recreate for index / type [{}] [{}] successfully executed.", index, type);
                    }
                } else {
                    logger.warn("Delete type[{}] on index[{}] return aknowledge false", type, index);
                }
            } else {
                logger.info("type[{}] does not exist in index[{}]. No need to remove mapping.", index, type);
            }
        } finally {
            semaphore.release();
        }
    }

    private void logStatistics(long duration) {
        if (definition.isStoreStatistics()) {
            long totalDocuments = deletedDocuments.get() + insertedDocuments.get();
            logger.debug("Indexed {} documents, {} insertions, {} updates, {} deletions", totalDocuments, insertedDocuments.get(),
                    updatedDocuments.get(), deletedDocuments.get());
            Map source = new HashMap();
            Map statistics = Maps.newHashMap();
            statistics.put("duration", duration);
            statistics.put("date", new Date());
            statistics.put("index", index);
            statistics.put("type", type);
            statistics.put("documents.inserted", insertedDocuments.get());
            statistics.put("documents.updated", updatedDocuments.get());
            statistics.put("documents.deleted", deletedDocuments.get());
            statistics.put("documents.total", documentCount.get());
            source.put("statistics", statistics);
            client.prepareIndex(definition.getStatisticsIndexName(), definition.getStatisticsTypeName()).setSource(source).get();
        }
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy