org.elasticsearch.river.mongodb.MongoDBRiverBulkProcessor Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of elasticsearch-river-mongodb Show documentation
Show all versions of elasticsearch-river-mongodb Show documentation
MongoDB River for ElasticSearch
The newest version!
package org.elasticsearch.river.mongodb;
import static org.elasticsearch.client.Requests.deleteRequest;
import static org.elasticsearch.client.Requests.indexRequest;
import java.io.IOException;
import java.util.Date;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.concurrent.Semaphore;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
import org.elasticsearch.action.ActionRequest;
import org.elasticsearch.action.ActionRequestValidationException;
import org.elasticsearch.action.admin.cluster.node.info.NodeInfo;
import org.elasticsearch.action.admin.cluster.node.info.NodesInfoResponse;
import org.elasticsearch.action.admin.cluster.node.stats.NodeStats;
import org.elasticsearch.action.admin.cluster.node.stats.NodesStatsResponse;
import org.elasticsearch.action.admin.indices.mapping.put.PutMappingResponse;
import org.elasticsearch.action.bulk.BulkProcessor;
import org.elasticsearch.action.bulk.BulkProcessor.Listener;
import org.elasticsearch.action.bulk.BulkRequest;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.client.Client;
import org.elasticsearch.cluster.metadata.MappingMetaData;
import org.elasticsearch.common.collect.ImmutableMap;
import org.elasticsearch.common.collect.ImmutableOpenMap;
import org.elasticsearch.common.collect.Maps;
import org.elasticsearch.common.logging.ESLogger;
import org.elasticsearch.common.logging.ESLoggerFactory;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.river.mongodb.util.MongoDBRiverHelper;
import org.elasticsearch.threadpool.ThreadPool.Info;
import org.elasticsearch.threadpool.ThreadPoolStats.Stats;
public class MongoDBRiverBulkProcessor {
public static final long DEFAULT_BULK_QUEUE_SIZE = 50;
public static final Map DROP_INDEX = ImmutableMap.of("dropIndex", Boolean.TRUE);
private final ESLogger logger = ESLoggerFactory.getLogger(this.getClass().getName());
private final MongoDBRiver river;
private final MongoDBRiverDefinition definition;
private final Client client;
private final BulkProcessor bulkProcessor;
private final String index;
private final String type;
private final AtomicBoolean flushBulkProcessor = new AtomicBoolean();
private final AtomicInteger deletedDocuments = new AtomicInteger();
private final AtomicInteger insertedDocuments = new AtomicInteger();
private final AtomicInteger updatedDocuments = new AtomicInteger();
private final AtomicLong documentCount = new AtomicLong();
private final static Semaphore semaphore = new Semaphore(1);
private final long bulkQueueSize;
public static class Builder {
private final MongoDBRiver river;
private final MongoDBRiverDefinition definition;
private final Client client;
private String index;
private String type;
public Builder(MongoDBRiver river, MongoDBRiverDefinition definition, Client client, String index, String type) {
this.river = river;
this.definition = definition;
this.client = client;
this.index = index;
this.type = type;
}
public MongoDBRiverBulkProcessor build() {
return new MongoDBRiverBulkProcessor(river, definition, client, index, type);
}
}
private final BulkProcessor.Listener listener = new Listener() {
@Override
public void beforeBulk(long executionId, BulkRequest request) {
checkBulkProcessorAvailability();
logger.trace("beforeBulk - new bulk [{}] of items [{}]", executionId, request.numberOfActions());
if (flushBulkProcessor.get()) {
logger.info("About to flush bulk request index[{}] - type[{}]", index, type);
int dropDollectionIndex = findLastDropCollection(request.requests());
request.requests().subList(0, dropDollectionIndex + 1).clear();
try {
dropRecreateMapping();
deletedDocuments.set(0);
updatedDocuments.set(0);
insertedDocuments.set(0);
flushBulkProcessor.set(false);
} catch (Throwable t) {
logger.error("Drop collection operation failed", t);
MongoDBRiverHelper.setRiverStatus(client, definition.getRiverName(), Status.IMPORT_FAILED);
request.requests().clear();
bulkProcessor.close();
river.close();
}
}
}
@SuppressWarnings("rawtypes")
private int findLastDropCollection(List request) {
int index = 0;
for (int i = 0; i < request.size(); i++) {
ActionRequest> action = request.get(i);
if (action instanceof IndexRequest) {
Map source = ((IndexRequest) action).sourceAsMap();
if (source.equals(DROP_INDEX)) {
index = i;
}
}
}
return index;
}
@Override
public void afterBulk(long executionId, BulkRequest request, Throwable failure) {
if (failure.getClass().equals(ActionRequestValidationException.class)) {
if (logger.isTraceEnabled()) {
logger.trace("Ignore ActionRequestValidationException : {}", failure);
}
} else {
logger.error("afterBulk - Bulk request failed: {} - {} - {}", executionId, request, failure);
MongoDBRiverHelper.setRiverStatus(client, definition.getRiverName(), Status.IMPORT_FAILED);
request.requests().clear();
bulkProcessor.close();
river.close();
}
}
@Override
public void afterBulk(long executionId, BulkRequest request, BulkResponse response) {
if (response.hasFailures()) {
logger.error("Bulk processor failed. {}", response.buildFailureMessage());
MongoDBRiverHelper.setRiverStatus(client, definition.getRiverName(), Status.IMPORT_FAILED);
request.requests().clear();
bulkProcessor.close();
river.close();
} else {
documentCount.addAndGet(response.getItems().length);
logStatistics(response.getTookInMillis());
deletedDocuments.set(0);
updatedDocuments.set(0);
insertedDocuments.set(0);
if (logger.isTraceEnabled()) {
logger.trace("afterBulk - bulk [{}] success [{} items] [{} ms] total [{}]", executionId, response.getItems().length,
response.getTookInMillis(), documentCount.get());
}
}
}
};
MongoDBRiverBulkProcessor(MongoDBRiver river, MongoDBRiverDefinition definition, Client client, String index, String type) {
this.river = river;
this.bulkProcessor = BulkProcessor.builder(client, listener).setBulkActions(definition.getBulk().getBulkActions())
.setConcurrentRequests(definition.getBulk().getConcurrentRequests())
.setFlushInterval(definition.getBulk().getFlushInterval()).setBulkSize(definition.getBulk().getBulkSize()).build();
this.definition = definition;
this.client = client;
this.index = index;
this.type = type;
this.bulkQueueSize = getBulkQueueSize();
}
public void dropIndex() {
addBulkRequest(null, DROP_INDEX, null, null);
flushBulkProcessor.set(true);
}
public void addBulkRequest(String id, Map, ?> source, String routing, String parent) {
bulkProcessor.add(indexRequest(index).type(type).id(id).source(source).routing(routing).parent(parent));
insertedDocuments.incrementAndGet();
}
public void addBulkRequest(String id, XContentBuilder source, String routing, String parent) {
bulkProcessor.add(indexRequest(index).type(type).id(id).source(source).routing(routing).parent(parent));
insertedDocuments.incrementAndGet();
}
// public void updateBulkRequest(String id, XContentBuilder source, String
// routing, String parent) {
// deleteBulkRequest(id, routing, parent);
// bulkProcessor.add(indexRequest(index).type(type).id(id).source(source).routing(routing).parent(parent));
// updatedDocuments.incrementAndGet();
// }
public void deleteBulkRequest(String id, String routing, String parent) {
logger.trace("deleteBulkRequest - id: {} - index: {} - type: {} - routing: {} - parent: {}", id, index, type, routing, parent);
bulkProcessor.add(deleteRequest(index).type(type).id(id).routing(routing).parent(parent));
deletedDocuments.incrementAndGet();
}
public BulkProcessor getBulkProcessor() {
return bulkProcessor;
}
private void checkBulkProcessorAvailability() {
while (!isBulkProcessorAvailable()) {
try {
if (logger.isInfoEnabled()) {
logger.info("Waiting for bulk queue to empty...");
}
Thread.sleep(2000);
} catch (InterruptedException e) {
logger.warn("checkIndexStatistics interrupted", e);
}
}
}
private long getBulkQueueSize() {
NodesInfoResponse response = client.admin().cluster().prepareNodesInfo().setThreadPool(true).get();
for (NodeInfo node : response.getNodes()) {
Iterator iterator = node.getThreadPool().iterator();
while (iterator.hasNext()) {
Info info = iterator.next();
if ("bulk".equals(info.getName())) {
return info.getQueueSize().getSingles();
}
}
}
return DEFAULT_BULK_QUEUE_SIZE;
}
private boolean isBulkProcessorAvailable() {
NodesStatsResponse response = client.admin().cluster().prepareNodesStats().setThreadPool(true).get();
for (NodeStats nodeStats : response.getNodes()) {
Iterator iterator = nodeStats.getThreadPool().iterator();
while (iterator.hasNext()) {
Stats stats = iterator.next();
if ("bulk".equals(stats.getName())) {
int queue = stats.getQueue();
logger.trace("bulkQueueSize [{}] - queue [{}] - availability [{}]", bulkQueueSize, queue, 1 - (queue / bulkQueueSize));
return 1 - (queue / bulkQueueSize) > 0.1;
}
}
}
return true;
}
private void dropRecreateMapping() throws IOException, InterruptedException {
try {
semaphore.acquire();
logger.trace("dropRecreateMapping index[{}] - type[{}]", index, type);
client.admin().indices().prepareRefresh(index).get();
ImmutableOpenMap mappings = client.admin().cluster().prepareState().get().getState().getMetaData()
.index(index).mappings();
logger.trace("mappings contains type {}: {}", type, mappings.containsKey(type));
if (mappings.containsKey(type)) {
/*
* Issue #105 - Mapping changing from custom mapping to dynamic
* when drop_collection = true Should capture the existing
* mapping metadata (in case it is has been customized before to
* delete.
*/
MappingMetaData mapping = mappings.get(type);
if (client.admin().indices().prepareDeleteMapping(index).setType(type).get().isAcknowledged()) {
PutMappingResponse pmr = client.admin().indices().preparePutMapping(index).setType(type)
.setSource(mapping.getSourceAsMap()).get();
if (!pmr.isAcknowledged()) {
logger.error("Failed to put mapping {} / {} / {}.", index, type, mapping.source());
} else {
logger.info("Delete and recreate for index / type [{}] [{}] successfully executed.", index, type);
}
} else {
logger.warn("Delete type[{}] on index[{}] return aknowledge false", type, index);
}
} else {
logger.info("type[{}] does not exist in index[{}]. No need to remove mapping.", index, type);
}
} finally {
semaphore.release();
}
}
private void logStatistics(long duration) {
if (definition.isStoreStatistics()) {
long totalDocuments = deletedDocuments.get() + insertedDocuments.get();
logger.debug("Indexed {} documents, {} insertions, {} updates, {} deletions", totalDocuments, insertedDocuments.get(),
updatedDocuments.get(), deletedDocuments.get());
Map source = new HashMap();
Map statistics = Maps.newHashMap();
statistics.put("duration", duration);
statistics.put("date", new Date());
statistics.put("index", index);
statistics.put("type", type);
statistics.put("documents.inserted", insertedDocuments.get());
statistics.put("documents.updated", updatedDocuments.get());
statistics.put("documents.deleted", deletedDocuments.get());
statistics.put("documents.total", documentCount.get());
source.put("statistics", statistics);
client.prepareIndex(definition.getStatisticsIndexName(), definition.getStatisticsTypeName()).setSource(source).get();
}
}
}