org.elasticsearch.gateway.GatewayService Maven / Gradle / Ivy
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.gateway;
import org.elasticsearch.ElasticSearchException;
import org.elasticsearch.cluster.*;
import org.elasticsearch.cluster.block.ClusterBlock;
import org.elasticsearch.cluster.block.ClusterBlockLevel;
import org.elasticsearch.cluster.block.ClusterBlocks;
import org.elasticsearch.cluster.metadata.IndexMetaData;
import org.elasticsearch.cluster.metadata.MetaData;
import org.elasticsearch.cluster.node.DiscoveryNodes;
import org.elasticsearch.cluster.routing.RoutingTable;
import org.elasticsearch.cluster.routing.allocation.AllocationService;
import org.elasticsearch.cluster.routing.allocation.RoutingAllocation;
import org.elasticsearch.common.component.AbstractLifecycleComponent;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.discovery.Discovery;
import org.elasticsearch.discovery.DiscoveryService;
import org.elasticsearch.rest.RestStatus;
import org.elasticsearch.threadpool.ThreadPool;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.atomic.AtomicBoolean;
import static org.elasticsearch.cluster.ClusterState.newClusterStateBuilder;
import static org.elasticsearch.cluster.metadata.MetaData.newMetaDataBuilder;
/**
*
*/
public class GatewayService extends AbstractLifecycleComponent implements ClusterStateListener {
public static final ClusterBlock STATE_NOT_RECOVERED_BLOCK = new ClusterBlock(1, "state not recovered / initialized", true, true, RestStatus.SERVICE_UNAVAILABLE, ClusterBlockLevel.ALL);
private final Gateway gateway;
private final ThreadPool threadPool;
private final AllocationService allocationService;
private final ClusterService clusterService;
private final DiscoveryService discoveryService;
private final TimeValue recoverAfterTime;
private final int recoverAfterNodes;
private final int expectedNodes;
private final int recoverAfterDataNodes;
private final int expectedDataNodes;
private final int recoverAfterMasterNodes;
private final int expectedMasterNodes;
private final AtomicBoolean recovered = new AtomicBoolean();
private final AtomicBoolean scheduledRecovery = new AtomicBoolean();
@Inject
public GatewayService(Settings settings, Gateway gateway, AllocationService allocationService, ClusterService clusterService, DiscoveryService discoveryService, ThreadPool threadPool) {
super(settings);
this.gateway = gateway;
this.allocationService = allocationService;
this.clusterService = clusterService;
this.discoveryService = discoveryService;
this.threadPool = threadPool;
// allow to control a delay of when indices will get created
this.recoverAfterTime = componentSettings.getAsTime("recover_after_time", null);
this.recoverAfterNodes = componentSettings.getAsInt("recover_after_nodes", -1);
this.expectedNodes = componentSettings.getAsInt("expected_nodes", -1);
this.recoverAfterDataNodes = componentSettings.getAsInt("recover_after_data_nodes", -1);
this.expectedDataNodes = componentSettings.getAsInt("expected_data_nodes", -1);
// default the recover after master nodes to the minimum master nodes in the discovery
this.recoverAfterMasterNodes = componentSettings.getAsInt("recover_after_master_nodes", settings.getAsInt("discovery.zen.minimum_master_nodes", -1));
this.expectedMasterNodes = componentSettings.getAsInt("expected_master_nodes", -1);
// Add the not recovered as initial state block, we don't allow anything until
this.clusterService.addInitialStateBlock(STATE_NOT_RECOVERED_BLOCK);
}
@Override
protected void doStart() throws ElasticSearchException {
gateway.start();
// if we received initial state, see if we can recover within the start phase, so we hold the
// node from starting until we recovered properly
if (discoveryService.initialStateReceived()) {
ClusterState clusterState = clusterService.state();
DiscoveryNodes nodes = clusterState.nodes();
if (clusterState.nodes().localNodeMaster() && clusterState.blocks().hasGlobalBlock(STATE_NOT_RECOVERED_BLOCK)) {
if (clusterState.blocks().hasGlobalBlock(Discovery.NO_MASTER_BLOCK)) {
logger.debug("not recovering from gateway, no master elected yet");
} else if (recoverAfterNodes != -1 && (nodes.masterAndDataNodes().size()) < recoverAfterNodes) {
logger.debug("not recovering from gateway, nodes_size (data+master) [" + nodes.masterAndDataNodes().size() + "] < recover_after_nodes [" + recoverAfterNodes + "]");
} else if (recoverAfterDataNodes != -1 && nodes.dataNodes().size() < recoverAfterDataNodes) {
logger.debug("not recovering from gateway, nodes_size (data) [" + nodes.dataNodes().size() + "] < recover_after_data_nodes [" + recoverAfterDataNodes + "]");
} else if (recoverAfterMasterNodes != -1 && nodes.masterNodes().size() < recoverAfterMasterNodes) {
logger.debug("not recovering from gateway, nodes_size (master) [" + nodes.masterNodes().size() + "] < recover_after_master_nodes [" + recoverAfterMasterNodes + "]");
} else {
boolean ignoreRecoverAfterTime;
if (expectedNodes == -1 && expectedMasterNodes == -1 && expectedDataNodes == -1) {
// no expected is set, don't ignore the timeout
ignoreRecoverAfterTime = false;
} else {
// one of the expected is set, see if all of them meet the need, and ignore the timeout in this case
ignoreRecoverAfterTime = true;
if (expectedNodes != -1 && (nodes.masterAndDataNodes().size() < expectedNodes)) { // does not meet the expected...
ignoreRecoverAfterTime = false;
}
if (expectedMasterNodes != -1 && (nodes.masterNodes().size() < expectedMasterNodes)) { // does not meet the expected...
ignoreRecoverAfterTime = false;
}
if (expectedDataNodes != -1 && (nodes.dataNodes().size() < expectedDataNodes)) { // does not meet the expected...
ignoreRecoverAfterTime = false;
}
}
performStateRecovery(ignoreRecoverAfterTime);
}
}
} else {
logger.debug("can't wait on start for (possibly) reading state from gateway, will do it asynchronously");
}
clusterService.addLast(this);
}
@Override
protected void doStop() throws ElasticSearchException {
clusterService.remove(this);
gateway.stop();
}
@Override
protected void doClose() throws ElasticSearchException {
gateway.close();
}
@Override
public void clusterChanged(final ClusterChangedEvent event) {
if (lifecycle.stoppedOrClosed()) {
return;
}
if (event.state().blocks().hasGlobalBlock(Discovery.NO_MASTER_BLOCK)) {
// we need to clear those flags, since we might need to recover again in case we disconnect
// from the cluster and then reconnect
recovered.set(false);
scheduledRecovery.set(false);
}
if (event.localNodeMaster() && event.state().blocks().hasGlobalBlock(STATE_NOT_RECOVERED_BLOCK)) {
ClusterState clusterState = event.state();
DiscoveryNodes nodes = clusterState.nodes();
if (event.state().blocks().hasGlobalBlock(Discovery.NO_MASTER_BLOCK)) {
logger.debug("not recovering from gateway, no master elected yet");
} else if (recoverAfterNodes != -1 && (nodes.masterAndDataNodes().size()) < recoverAfterNodes) {
logger.debug("not recovering from gateway, nodes_size (data+master) [" + nodes.masterAndDataNodes().size() + "] < recover_after_nodes [" + recoverAfterNodes + "]");
} else if (recoverAfterDataNodes != -1 && nodes.dataNodes().size() < recoverAfterDataNodes) {
logger.debug("not recovering from gateway, nodes_size (data) [" + nodes.dataNodes().size() + "] < recover_after_data_nodes [" + recoverAfterDataNodes + "]");
} else if (recoverAfterMasterNodes != -1 && nodes.masterNodes().size() < recoverAfterMasterNodes) {
logger.debug("not recovering from gateway, nodes_size (master) [" + nodes.masterNodes().size() + "] < recover_after_master_nodes [" + recoverAfterMasterNodes + "]");
} else {
boolean ignoreRecoverAfterTime;
if (expectedNodes == -1 && expectedMasterNodes == -1 && expectedDataNodes == -1) {
// no expected is set, don't ignore the timeout
ignoreRecoverAfterTime = false;
} else {
// one of the expected is set, see if all of them meet the need, and ignore the timeout in this case
ignoreRecoverAfterTime = true;
if (expectedNodes != -1 && (nodes.masterAndDataNodes().size() < expectedNodes)) { // does not meet the expected...
ignoreRecoverAfterTime = false;
}
if (expectedMasterNodes != -1 && (nodes.masterNodes().size() < expectedMasterNodes)) { // does not meet the expected...
ignoreRecoverAfterTime = false;
}
if (expectedDataNodes != -1 && (nodes.dataNodes().size() < expectedDataNodes)) { // does not meet the expected...
ignoreRecoverAfterTime = false;
}
}
final boolean fIgnoreRecoverAfterTime = ignoreRecoverAfterTime;
threadPool.generic().execute(new Runnable() {
@Override
public void run() {
performStateRecovery(fIgnoreRecoverAfterTime);
}
});
}
}
}
private void performStateRecovery(boolean ignoreRecoverAfterTime) {
final Gateway.GatewayStateRecoveredListener recoveryListener = new GatewayRecoveryListener(new CountDownLatch(1));
if (!ignoreRecoverAfterTime && recoverAfterTime != null) {
if (scheduledRecovery.compareAndSet(false, true)) {
logger.debug("delaying initial state recovery for [{}]", recoverAfterTime);
threadPool.schedule(recoverAfterTime, ThreadPool.Names.GENERIC, new Runnable() {
@Override
public void run() {
if (recovered.compareAndSet(false, true)) {
logger.trace("performing state recovery...");
gateway.performStateRecovery(recoveryListener);
}
}
});
}
} else {
if (recovered.compareAndSet(false, true)) {
logger.trace("performing state recovery...");
gateway.performStateRecovery(recoveryListener);
}
}
}
class GatewayRecoveryListener implements Gateway.GatewayStateRecoveredListener {
private final CountDownLatch latch;
GatewayRecoveryListener(CountDownLatch latch) {
this.latch = latch;
}
@Override
public void onSuccess(final ClusterState recoveredState) {
logger.trace("successful state recovery, importing cluster state...");
clusterService.submitStateUpdateTask("local-gateway-elected-state", new ProcessedClusterStateUpdateTask() {
@Override
public ClusterState execute(ClusterState currentState) {
assert currentState.metaData().indices().isEmpty();
// remove the block, since we recovered from gateway
ClusterBlocks.Builder blocks = ClusterBlocks.builder()
.blocks(currentState.blocks())
.blocks(recoveredState.blocks())
.removeGlobalBlock(STATE_NOT_RECOVERED_BLOCK);
MetaData.Builder metaDataBuilder = newMetaDataBuilder()
.metaData(recoveredState.metaData());
if (recoveredState.metaData().settings().getAsBoolean(MetaData.SETTING_READ_ONLY, false) || currentState.metaData().settings().getAsBoolean(MetaData.SETTING_READ_ONLY, false)) {
blocks.addGlobalBlock(MetaData.CLUSTER_READ_ONLY_BLOCK);
}
for (IndexMetaData indexMetaData : recoveredState.metaData()) {
metaDataBuilder.put(indexMetaData, false);
blocks.addBlocks(indexMetaData);
}
// update the state to reflect the new metadata and routing
ClusterState updatedState = newClusterStateBuilder().state(currentState)
.blocks(blocks)
.metaData(metaDataBuilder)
.build();
// initialize all index routing tables as empty
RoutingTable.Builder routingTableBuilder = RoutingTable.builder().routingTable(updatedState.routingTable());
for (IndexMetaData indexMetaData : updatedState.metaData().indices().values()) {
routingTableBuilder.addAsRecovery(indexMetaData);
}
// start with 0 based versions for routing table
routingTableBuilder.version(0);
// now, reroute
RoutingAllocation.Result routingResult = allocationService.reroute(newClusterStateBuilder().state(updatedState).routingTable(routingTableBuilder).build());
return newClusterStateBuilder().state(updatedState).routingResult(routingResult).build();
}
@Override
public void clusterStateProcessed(ClusterState clusterState) {
logger.info("recovered [{}] indices into cluster_state", clusterState.metaData().indices().size());
latch.countDown();
}
});
}
@Override
public void onFailure(String message) {
recovered.set(false);
scheduledRecovery.set(false);
// don't remove the block here, we don't want to allow anything in such a case
logger.info("metadata state not restored, reason: {}", message);
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy