
org.sonar.server.es.BulkIndexer Maven / Gradle / Ivy
/*
* SonarQube
* Copyright (C) 2009-2016 SonarSource SA
* mailto:contact AT sonarsource DOT com
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 3 of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
package org.sonar.server.es;
import com.google.common.base.Preconditions;
import com.google.common.collect.Maps;
import java.util.Map;
import java.util.concurrent.Semaphore;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicLong;
import org.elasticsearch.action.ActionListener;
import org.elasticsearch.action.ActionRequest;
import org.elasticsearch.action.admin.indices.settings.get.GetSettingsResponse;
import org.elasticsearch.action.admin.indices.settings.put.UpdateSettingsRequestBuilder;
import org.elasticsearch.action.bulk.BulkItemResponse;
import org.elasticsearch.action.bulk.BulkRequestBuilder;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.action.delete.DeleteRequestBuilder;
import org.elasticsearch.action.search.SearchRequestBuilder;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.cluster.metadata.IndexMetaData;
import org.elasticsearch.common.unit.ByteSizeUnit;
import org.elasticsearch.common.unit.ByteSizeValue;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.SearchHitField;
import org.elasticsearch.search.sort.SortOrder;
import org.picocontainer.Startable;
import org.sonar.api.utils.log.Logger;
import org.sonar.api.utils.log.Loggers;
import org.sonar.core.util.ProgressLogger;
import static java.lang.String.format;
/**
* Helper to bulk requests in an efficient way :
*
* - bulk request is sent on the wire when its size is higher than 5Mb
* - on large table indexing, replicas and automatic refresh can be temporarily disabled
* - index refresh is optional (enabled by default)
*
*/
public class BulkIndexer implements Startable {
private static final Logger LOGGER = Loggers.get(BulkIndexer.class);
private static final long FLUSH_BYTE_SIZE = new ByteSizeValue(1, ByteSizeUnit.MB).bytes();
private static final String REFRESH_INTERVAL_SETTING = "index.refresh_interval";
private static final String ALREADY_STARTED_MESSAGE = "Bulk indexing is already started";
private final EsClient client;
private final String indexName;
private boolean large = false;
private long flushByteSize = FLUSH_BYTE_SIZE;
private BulkRequestBuilder bulkRequest = null;
private Map largeInitialSettings = null;
private final AtomicLong counter = new AtomicLong(0L);
private final int concurrentRequests;
private final Semaphore semaphore;
private final ProgressLogger progress;
public BulkIndexer(EsClient client, String indexName) {
this.client = client;
this.indexName = indexName;
this.progress = new ProgressLogger(format("Progress[BulkIndexer[%s]]", indexName), counter, LOGGER)
.setPluralLabel("requests");
// see https://jira.sonarsource.com/browse/SONAR-8075
this.concurrentRequests = Math.max(1, Runtime.getRuntime().availableProcessors() / 5);
this.semaphore = new Semaphore(concurrentRequests);
}
/**
* Large indexing is an heavy operation that populates an index generally from scratch. Replicas and
* automatic refresh are disabled during bulk indexing and lucene segments are optimized at the end.
*/
public BulkIndexer setLarge(boolean b) {
Preconditions.checkState(bulkRequest == null, ALREADY_STARTED_MESSAGE);
this.large = b;
return this;
}
public BulkIndexer setFlushByteSize(long flushByteSize) {
this.flushByteSize = flushByteSize;
return this;
}
@Override
public void start() {
Preconditions.checkState(bulkRequest == null, ALREADY_STARTED_MESSAGE);
if (large) {
largeInitialSettings = Maps.newHashMap();
Map bulkSettings = Maps.newHashMap();
GetSettingsResponse settingsResp = client.nativeClient().admin().indices().prepareGetSettings(indexName).get();
// deactivate replicas
int initialReplicas = Integer.parseInt(settingsResp.getSetting(indexName, IndexMetaData.SETTING_NUMBER_OF_REPLICAS));
if (initialReplicas > 0) {
largeInitialSettings.put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, initialReplicas);
bulkSettings.put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 0);
}
// deactivate periodical refresh
String refreshInterval = settingsResp.getSetting(indexName, REFRESH_INTERVAL_SETTING);
largeInitialSettings.put(REFRESH_INTERVAL_SETTING, refreshInterval);
bulkSettings.put(REFRESH_INTERVAL_SETTING, "-1");
updateSettings(bulkSettings);
}
bulkRequest = client.prepareBulk().setRefresh(false);
counter.set(0L);
progress.start();
}
public void add(ActionRequest request) {
bulkRequest.request().add(request);
if (bulkRequest.request().estimatedSizeInBytes() >= flushByteSize) {
executeBulk();
}
}
public void addDeletion(SearchRequestBuilder searchRequest) {
searchRequest
.addSort("_doc", SortOrder.ASC)
.setScroll(TimeValue.timeValueMinutes(5))
.setSize(100)
// load only doc ids, not _source fields
.setFetchSource(false);
// this search is synchronous. An optimization would be to be non-blocking,
// but it requires to tracking pending requests in close().
// Same semaphore can't be reused because of potential deadlock (requires to acquire
// two locks)
SearchResponse searchResponse = searchRequest.get();
while (true) {
SearchHit[] hits = searchResponse.getHits().getHits();
for (SearchHit hit : hits) {
DeleteRequestBuilder deleteRequestBuilder = client.prepareDelete(hit.index(), hit.type(), hit.getId());
SearchHitField routing = hit.field("_routing");
if (routing != null) {
deleteRequestBuilder.setRouting(routing.getValue());
}
add(deleteRequestBuilder.request());
}
String scrollId = searchResponse.getScrollId();
searchResponse = client.prepareSearchScroll(scrollId).setScroll(TimeValue.timeValueMinutes(5)).get();
if (hits.length == 0) {
client.nativeClient().prepareClearScroll().addScrollId(scrollId).get();
break;
}
}
}
/**
* Delete all the documents matching the given search request. This method is blocking.
* Index is refreshed, so docs are not searchable as soon as method is executed.
*
* Note that the parameter indexName could be removed if progress logs are not needed.
*/
public static void delete(EsClient client, String indexName, SearchRequestBuilder searchRequest) {
BulkIndexer bulk = new BulkIndexer(client, indexName);
bulk.start();
bulk.addDeletion(searchRequest);
bulk.stop();
}
@Override
public void stop() {
if (bulkRequest.numberOfActions() > 0) {
executeBulk();
}
try {
if (semaphore.tryAcquire(concurrentRequests, 10, TimeUnit.MINUTES)) {
semaphore.release(concurrentRequests);
}
} catch (InterruptedException e) {
throw new IllegalStateException("Elasticsearch bulk requests still being executed after 10 minutes", e);
}
progress.stop();
client.prepareRefresh(indexName).get();
if (large) {
// optimize lucene segments and revert index settings
// Optimization must be done before re-applying replicas:
// http://www.elasticsearch.org/blog/performance-considerations-elasticsearch-indexing/
client.prepareForceMerge(indexName).get();
updateSettings(largeInitialSettings);
}
bulkRequest = null;
}
private void updateSettings(Map settings) {
UpdateSettingsRequestBuilder req = client.nativeClient().admin().indices().prepareUpdateSettings(indexName);
req.setSettings(settings);
req.get();
}
private void executeBulk() {
final BulkRequestBuilder req = this.bulkRequest;
this.bulkRequest = client.prepareBulk().setRefresh(false);
semaphore.acquireUninterruptibly();
req.execute(new BulkResponseActionListener(req));
}
private class BulkResponseActionListener implements ActionListener {
private final BulkRequestBuilder req;
BulkResponseActionListener(BulkRequestBuilder req) {
this.req = req;
}
@Override
public void onResponse(BulkResponse response) {
semaphore.release();
counter.addAndGet(response.getItems().length);
for (BulkItemResponse item : response.getItems()) {
if (item.isFailed()) {
LOGGER.error("index [{}], type [{}], id [{}], message [{}]", item.getIndex(), item.getType(), item.getId(), item.getFailureMessage());
}
}
}
@Override
public void onFailure(Throwable e) {
semaphore.release();
LOGGER.error("Fail to execute bulk index request: " + req, e);
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy