Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.cloud;
import static org.apache.solr.common.params.CommonParams.ID;
import java.io.Closeable;
import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.function.BiConsumer;
import org.apache.lucene.util.Version;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.cloud.SolrCloudManager;
import org.apache.solr.client.solrj.impl.CloudSolrClient;
import org.apache.solr.client.solrj.impl.ClusterStateProvider;
import org.apache.solr.client.solrj.request.CollectionAdminRequest;
import org.apache.solr.client.solrj.response.CollectionAdminResponse;
import org.apache.solr.cloud.api.collections.OverseerCollectionMessageHandler;
import org.apache.solr.cloud.autoscaling.OverseerTriggerThread;
import org.apache.solr.cloud.overseer.ClusterStateMutator;
import org.apache.solr.cloud.overseer.CollectionMutator;
import org.apache.solr.cloud.overseer.NodeMutator;
import org.apache.solr.cloud.overseer.OverseerAction;
import org.apache.solr.cloud.overseer.ReplicaMutator;
import org.apache.solr.cloud.overseer.SliceMutator;
import org.apache.solr.cloud.overseer.ZkStateWriter;
import org.apache.solr.cloud.overseer.ZkWriteCommand;
import org.apache.solr.common.AlreadyClosedException;
import org.apache.solr.common.SolrCloseable;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.cloud.ClusterState;
import org.apache.solr.common.cloud.ConnectionManager;
import org.apache.solr.common.cloud.DocCollection;
import org.apache.solr.common.cloud.Slice;
import org.apache.solr.common.cloud.SolrZkClient;
import org.apache.solr.common.cloud.ZkNodeProps;
import org.apache.solr.common.cloud.ZkStateReader;
import org.apache.solr.common.params.CollectionAdminParams;
import org.apache.solr.common.params.CollectionParams;
import org.apache.solr.common.util.IOUtils;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.ObjectReleaseTracker;
import org.apache.solr.common.util.Pair;
import org.apache.solr.common.util.Utils;
import org.apache.solr.core.CloudConfig;
import org.apache.solr.core.CoreContainer;
import org.apache.solr.handler.admin.CollectionsHandler;
import org.apache.solr.handler.component.HttpShardHandler;
import org.apache.solr.logging.MDCLoggingContext;
import org.apache.solr.update.UpdateShardHandler;
import org.apache.zookeeper.CreateMode;
import org.apache.zookeeper.KeeperException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.codahale.metrics.Timer;
/**
* Cluster leader. Responsible for processing state updates, node assignments, creating/deleting
* collections, shards, replicas and setting various properties.
*/
public class Overseer implements SolrCloseable {
public static final String QUEUE_OPERATION = "operation";
// System properties are used in tests to make them run fast
public static final int STATE_UPDATE_DELAY = ZkStateReader.STATE_UPDATE_DELAY;
public static final int STATE_UPDATE_BATCH_SIZE = Integer.getInteger("solr.OverseerStateUpdateBatchSize", 10000);
public static final int STATE_UPDATE_MAX_QUEUE = 20000;
public static final int NUM_RESPONSES_TO_STORE = 10000;
public static final String OVERSEER_ELECT = "/overseer_elect";
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
enum LeaderStatus {DONT_KNOW, NO, YES}
private class ClusterStateUpdater implements Runnable, Closeable {
private final ZkStateReader reader;
private final SolrZkClient zkClient;
private final String myId;
//queue where everybody can throw tasks
private final ZkDistributedQueue stateUpdateQueue;
//TODO remove in 9.0, we do not push message into this queue anymore
//Internal queue where overseer stores events that have not yet been published into cloudstate
//If Overseer dies while extracting the main queue a new overseer will start from this queue
private final ZkDistributedQueue workQueue;
// Internal map which holds the information about running tasks.
private final DistributedMap runningMap;
// Internal map which holds the information about successfully completed tasks.
private final DistributedMap completedMap;
// Internal map which holds the information about failed tasks.
private final DistributedMap failureMap;
private final Stats zkStats;
private boolean isClosed = false;
public ClusterStateUpdater(final ZkStateReader reader, final String myId, Stats zkStats) {
this.zkClient = reader.getZkClient();
this.zkStats = zkStats;
this.stateUpdateQueue = getStateUpdateQueue(zkStats);
this.workQueue = getInternalWorkQueue(zkClient, zkStats);
this.failureMap = getFailureMap(zkClient);
this.runningMap = getRunningMap(zkClient);
this.completedMap = getCompletedMap(zkClient);
this.myId = myId;
this.reader = reader;
}
public Stats getStateUpdateQueueStats() {
return stateUpdateQueue.getZkStats();
}
public Stats getWorkQueueStats() {
return workQueue.getZkStats();
}
@Override
public void run() {
MDCLoggingContext.setNode(zkController.getNodeName() );
LeaderStatus isLeader = amILeader();
while (isLeader == LeaderStatus.DONT_KNOW) {
log.debug("am_i_leader unclear {}", isLeader);
isLeader = amILeader(); // not a no, not a yes, try ask again
}
log.info("Starting to work on the main queue : {}", LeaderElector.getNodeName(myId));
try {
ZkStateWriter zkStateWriter = null;
ClusterState clusterState = null;
boolean refreshClusterState = true; // let's refresh in the first iteration
// we write updates in batch, but if an exception is thrown when writing new clusterstate,
// we do not sure which message is bad message, therefore we will re-process node one by one
int fallbackQueueSize = Integer.MAX_VALUE;
ZkDistributedQueue fallbackQueue = workQueue;
while (!this.isClosed) {
isLeader = amILeader();
if (LeaderStatus.NO == isLeader) {
break;
}
else if (LeaderStatus.YES != isLeader) {
log.debug("am_i_leader unclear {}", isLeader);
continue; // not a no, not a yes, try ask again
}
//TODO consider removing 'refreshClusterState' and simply check if clusterState is null
if (refreshClusterState) {
try {
reader.forciblyRefreshAllClusterStateSlow();
clusterState = reader.getClusterState();
zkStateWriter = new ZkStateWriter(reader, stats);
refreshClusterState = false;
// if there were any errors while processing
// the state queue, items would have been left in the
// work queue so let's process those first
byte[] data = fallbackQueue.peek();
while (fallbackQueueSize > 0 && data != null) {
final ZkNodeProps message = ZkNodeProps.load(data);
log.debug("processMessage: fallbackQueueSize: {}, message = {}", fallbackQueue.getZkStats().getQueueLength(), message);
// force flush to ZK after each message because there is no fallback if workQueue items
// are removed from workQueue but fail to be written to ZK
try {
clusterState = processQueueItem(message, clusterState, zkStateWriter, false, null);
} catch (Exception e) {
if (isBadMessage(e)) {
log.warn("Exception when process message = {}, consider as bad message and poll out from the queue", message);
fallbackQueue.poll();
}
throw e;
}
fallbackQueue.poll(); // poll-ing removes the element we got by peek-ing
data = fallbackQueue.peek();
fallbackQueueSize--;
}
// force flush at the end of the loop, if there are no pending updates, this is a no op call
clusterState = zkStateWriter.writePendingUpdates();
// the workQueue is empty now, use stateUpdateQueue as fallback queue
fallbackQueue = stateUpdateQueue;
fallbackQueueSize = 0;
} catch (AlreadyClosedException e) {
return;
} catch (KeeperException.SessionExpiredException e) {
log.warn("Solr cannot talk to ZK, exiting Overseer work queue loop", e);
return;
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
return;
} catch (Exception e) {
log.error("Exception in Overseer when process message from work queue, retrying", e);
refreshClusterState = true;
continue;
}
}
LinkedList> queue = null;
try {
// We do not need to filter any nodes here cause all processed nodes are removed once we flush clusterstate
queue = new LinkedList<>(stateUpdateQueue.peekElements(1000, 3000L, (x) -> true));
} catch (KeeperException.SessionExpiredException e) {
log.warn("Solr cannot talk to ZK, exiting Overseer main queue loop", e);
return;
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
return;
} catch (AlreadyClosedException e) {
} catch (Exception e) {
log.error("Exception in Overseer main queue loop", e);
}
try {
Set processedNodes = new HashSet<>();
while (queue != null && !queue.isEmpty()) {
for (Pair head : queue) {
byte[] data = head.second();
final ZkNodeProps message = ZkNodeProps.load(data);
log.debug("processMessage: queueSize: {}, message = {} current state version: {}", stateUpdateQueue.getZkStats().getQueueLength(), message, clusterState.getZkClusterStateVersion());
processedNodes.add(head.first());
fallbackQueueSize = processedNodes.size();
// The callback always be called on this thread
clusterState = processQueueItem(message, clusterState, zkStateWriter, true, () -> {
stateUpdateQueue.remove(processedNodes);
processedNodes.clear();
});
}
if (isClosed) break;
// if an event comes in the next 100ms batch it together
queue = new LinkedList<>(stateUpdateQueue.peekElements(1000, 100, node -> !processedNodes.contains(node)));
}
fallbackQueueSize = processedNodes.size();
// we should force write all pending updates because the next iteration might sleep until there
// are more items in the main queue
clusterState = zkStateWriter.writePendingUpdates();
// clean work queue
stateUpdateQueue.remove(processedNodes);
processedNodes.clear();
} catch (KeeperException.SessionExpiredException e) {
log.warn("Solr cannot talk to ZK, exiting Overseer main queue loop", e);
return;
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
return;
} catch (AlreadyClosedException e) {
} catch (Exception e) {
log.error("Exception in Overseer main queue loop", e);
refreshClusterState = true; // it might have been a bad version error
}
}
} finally {
log.info("Overseer Loop exiting : {}", LeaderElector.getNodeName(myId));
//do this in a separate thread because any wait is interrupted in this main thread
new Thread(this::checkIfIamStillLeader, "OverseerExitThread").start();
}
}
// Return true whenever the exception thrown by ZkStateWriter is correspond
// to a invalid state or 'bad' message (in this case, we should remove that message from queue)
private boolean isBadMessage(Exception e) {
if (e instanceof KeeperException) {
KeeperException ke = (KeeperException) e;
return ke.code() == KeeperException.Code.NONODE || ke.code() == KeeperException.Code.NODEEXISTS;
}
return !(e instanceof InterruptedException);
}
private ClusterState processQueueItem(ZkNodeProps message, ClusterState clusterState, ZkStateWriter zkStateWriter, boolean enableBatching, ZkStateWriter.ZkWriteCallback callback) throws Exception {
final String operation = message.getStr(QUEUE_OPERATION);
if (operation == null) {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Message missing " + QUEUE_OPERATION + ":" + message);
}
List zkWriteCommands = null;
final Timer.Context timerContext = stats.time(operation);
try {
zkWriteCommands = processMessage(clusterState, message, operation);
stats.success(operation);
} catch (Exception e) {
// generally there is nothing we can do - in most cases, we have
// an issue that will fail again on retry or we cannot communicate with a
// ZooKeeper in which case another Overseer should take over
// TODO: if ordering for the message is not important, we could
// track retries and put it back on the end of the queue
log.error("Overseer could not process the current clusterstate state update message, skipping the message: " + message, e);
stats.error(operation);
} finally {
timerContext.stop();
}
if (zkWriteCommands != null) {
clusterState = zkStateWriter.enqueueUpdate(clusterState, zkWriteCommands, callback);
if (!enableBatching) {
clusterState = zkStateWriter.writePendingUpdates();
}
}
return clusterState;
}
private void checkIfIamStillLeader() {
if (zkController != null && (zkController.getCoreContainer().isShutDown() || zkController.isClosed())) {
return;//shutting down no need to go further
}
org.apache.zookeeper.data.Stat stat = new org.apache.zookeeper.data.Stat();
final String path = OVERSEER_ELECT + "/leader";
byte[] data;
try {
data = zkClient.getData(path, null, stat, true);
} catch (AlreadyClosedException e) {
return;
} catch (Exception e) {
log.warn("Error communicating with ZooKeeper", e);
return;
}
try {
Map m = (Map) Utils.fromJSON(data);
String id = (String) m.get(ID);
if(overseerCollectionConfigSetProcessor.getId().equals(id)){
try {
log.warn("I (id={}) am exiting, but I'm still the leader",
overseerCollectionConfigSetProcessor.getId());
zkClient.delete(path,stat.getVersion(),true);
} catch (KeeperException.BadVersionException e) {
//no problem ignore it some other Overseer has already taken over
} catch (Exception e) {
log.error("Could not delete my leader node "+path, e);
}
} else{
log.info("somebody else (id={}) has already taken up the overseer position", id);
}
} finally {
//if I am not shutting down, Then I need to rejoin election
try {
if (zkController != null && !zkController.getCoreContainer().isShutDown()) {
zkController.rejoinOverseerElection(null, false);
}
} catch (Exception e) {
log.warn("Unable to rejoinElection ",e);
}
}
}
private List processMessage(ClusterState clusterState,
final ZkNodeProps message, final String operation) {
CollectionParams.CollectionAction collectionAction = CollectionParams.CollectionAction.get(operation);
if (collectionAction != null) {
switch (collectionAction) {
case CREATE:
return Collections.singletonList(new ClusterStateMutator(getSolrCloudManager()).createCollection(clusterState, message));
case DELETE:
return Collections.singletonList(new ClusterStateMutator(getSolrCloudManager()).deleteCollection(clusterState, message));
case CREATESHARD:
return Collections.singletonList(new CollectionMutator(getSolrCloudManager()).createShard(clusterState, message));
case DELETESHARD:
return Collections.singletonList(new CollectionMutator(getSolrCloudManager()).deleteShard(clusterState, message));
case ADDREPLICA:
return Collections.singletonList(new SliceMutator(getSolrCloudManager()).addReplica(clusterState, message));
case ADDREPLICAPROP:
return Collections.singletonList(new ReplicaMutator(getSolrCloudManager()).addReplicaProperty(clusterState, message));
case DELETEREPLICAPROP:
return Collections.singletonList(new ReplicaMutator(getSolrCloudManager()).deleteReplicaProperty(clusterState, message));
case BALANCESHARDUNIQUE:
ExclusiveSliceProperty dProp = new ExclusiveSliceProperty(clusterState, message);
if (dProp.balanceProperty()) {
String collName = message.getStr(ZkStateReader.COLLECTION_PROP);
return Collections.singletonList(new ZkWriteCommand(collName, dProp.getDocCollection()));
}
break;
case MODIFYCOLLECTION:
CollectionsHandler.verifyRuleParams(zkController.getCoreContainer() ,message.getProperties());
return Collections.singletonList(new CollectionMutator(getSolrCloudManager()).modifyCollection(clusterState,message));
case MIGRATESTATEFORMAT:
return Collections.singletonList(new ClusterStateMutator(getSolrCloudManager()).migrateStateFormat(clusterState, message));
default:
throw new RuntimeException("unknown operation:" + operation
+ " contents:" + message.getProperties());
}
} else {
OverseerAction overseerAction = OverseerAction.get(operation);
if (overseerAction == null) {
throw new RuntimeException("unknown operation:" + operation + " contents:" + message.getProperties());
}
switch (overseerAction) {
case STATE:
return Collections.singletonList(new ReplicaMutator(getSolrCloudManager()).setState(clusterState, message));
case LEADER:
return Collections.singletonList(new SliceMutator(getSolrCloudManager()).setShardLeader(clusterState, message));
case DELETECORE:
return Collections.singletonList(new SliceMutator(getSolrCloudManager()).removeReplica(clusterState, message));
case ADDROUTINGRULE:
return Collections.singletonList(new SliceMutator(getSolrCloudManager()).addRoutingRule(clusterState, message));
case REMOVEROUTINGRULE:
return Collections.singletonList(new SliceMutator(getSolrCloudManager()).removeRoutingRule(clusterState, message));
case UPDATESHARDSTATE:
return Collections.singletonList(new SliceMutator(getSolrCloudManager()).updateShardState(clusterState, message));
case QUIT:
if (myId.equals(message.get(ID))) {
log.info("Quit command received {} {}", message, LeaderElector.getNodeName(myId));
overseerCollectionConfigSetProcessor.close();
close();
} else {
log.warn("Overseer received wrong QUIT message {}", message);
}
break;
case DOWNNODE:
return new NodeMutator().downNode(clusterState, message);
default:
throw new RuntimeException("unknown operation:" + operation + " contents:" + message.getProperties());
}
}
return Collections.singletonList(ZkStateWriter.NO_OP);
}
private LeaderStatus amILeader() {
Timer.Context timerContext = stats.time("am_i_leader");
boolean success = true;
String propsId = null;
try {
ZkNodeProps props = ZkNodeProps.load(zkClient.getData(
OVERSEER_ELECT + "/leader", null, null, true));
propsId = props.getStr(ID);
if (myId.equals(propsId)) {
return LeaderStatus.YES;
}
} catch (KeeperException e) {
success = false;
if (e.code() == KeeperException.Code.CONNECTIONLOSS) {
log.error("", e);
return LeaderStatus.DONT_KNOW;
} else if (e.code() != KeeperException.Code.SESSIONEXPIRED) {
log.warn("", e);
} else {
log.debug("", e);
}
} catch (InterruptedException e) {
success = false;
Thread.currentThread().interrupt();
} catch (AlreadyClosedException e) {
success = false;
} catch (Exception e) {
success = false;
log.warn("Unexpected exception", e);
} finally {
timerContext.stop();
if (success) {
stats.success("am_i_leader");
} else {
stats.error("am_i_leader");
}
}
log.info("According to ZK I (id={}) am no longer a leader. propsId={}", myId, propsId);
return LeaderStatus.NO;
}
@Override
public void close() {
this.isClosed = true;
}
}
public static class OverseerThread extends Thread implements Closeable {
protected volatile boolean isClosed;
private Closeable thread;
public OverseerThread(ThreadGroup tg, Closeable thread) {
super(tg, (Runnable) thread);
this.thread = thread;
}
public OverseerThread(ThreadGroup ccTg, Closeable thread, String name) {
super(ccTg, (Runnable) thread, name);
this.thread = thread;
}
@Override
public void close() throws IOException {
thread.close();
this.isClosed = true;
}
public Closeable getThread() {
return thread;
}
public boolean isClosed() {
return this.isClosed;
}
}
private OverseerThread ccThread;
private OverseerThread updaterThread;
private OverseerThread triggerThread;
private final ZkStateReader reader;
private final HttpShardHandler shardHandler;
private final UpdateShardHandler updateShardHandler;
private final String adminPath;
private OverseerCollectionConfigSetProcessor overseerCollectionConfigSetProcessor;
private ZkController zkController;
private Stats stats;
private String id;
private volatile boolean closed;
private volatile boolean systemCollCompatCheck = true;
private CloudConfig config;
// overseer not responsible for closing reader
public Overseer(HttpShardHandler shardHandler,
UpdateShardHandler updateShardHandler, String adminPath,
final ZkStateReader reader, ZkController zkController, CloudConfig config)
throws KeeperException, InterruptedException {
this.reader = reader;
this.shardHandler = shardHandler;
this.updateShardHandler = updateShardHandler;
this.adminPath = adminPath;
this.zkController = zkController;
this.stats = new Stats();
this.config = config;
}
public synchronized void start(String id) {
MDCLoggingContext.setNode(zkController == null ?
null :
zkController.getNodeName());
this.id = id;
closed = false;
doClose();
stats = new Stats();
log.info("Overseer (id=" + id + ") starting");
createOverseerNode(reader.getZkClient());
//launch cluster state updater thread
ThreadGroup tg = new ThreadGroup("Overseer state updater.");
updaterThread = new OverseerThread(tg, new ClusterStateUpdater(reader, id, stats), "OverseerStateUpdate-" + id);
updaterThread.setDaemon(true);
ThreadGroup ccTg = new ThreadGroup("Overseer collection creation process.");
OverseerNodePrioritizer overseerPrioritizer = new OverseerNodePrioritizer(reader, getStateUpdateQueue(), adminPath, shardHandler.getShardHandlerFactory(), updateShardHandler.getDefaultHttpClient());
overseerCollectionConfigSetProcessor = new OverseerCollectionConfigSetProcessor(reader, id, shardHandler, adminPath, stats, Overseer.this, overseerPrioritizer);
ccThread = new OverseerThread(ccTg, overseerCollectionConfigSetProcessor, "OverseerCollectionConfigSetProcessor-" + id);
ccThread.setDaemon(true);
ThreadGroup triggerThreadGroup = new ThreadGroup("Overseer autoscaling triggers");
OverseerTriggerThread trigger = new OverseerTriggerThread(zkController.getCoreContainer().getResourceLoader(),
zkController.getSolrCloudManager(), config);
triggerThread = new OverseerThread(triggerThreadGroup, trigger, "OverseerAutoScalingTriggerThread-" + id);
updaterThread.start();
ccThread.start();
triggerThread.start();
systemCollectionCompatCheck(new BiConsumer() {
boolean firstPair = true;
@Override
public void accept(String s, Object o) {
if (firstPair) {
log.warn("WARNING: Collection '.system' may need re-indexing due to compatibility issues listed below. See REINDEXCOLLECTION documentation for more details.");
firstPair = false;
}
log.warn("WARNING: *\t{}:\t{}", s, o);
}
});
assert ObjectReleaseTracker.track(this);
}
public void systemCollectionCompatCheck(final BiConsumer consumer) {
ClusterState clusterState = zkController.getClusterState();
if (clusterState == null) {
log.warn("Unable to check back-compat of .system collection - can't obtain ClusterState.");
return;
}
DocCollection coll = clusterState.getCollectionOrNull(CollectionAdminParams.SYSTEM_COLL);
if (coll == null) {
return;
}
// check that all shard leaders are active
boolean allActive = true;
for (Slice s : coll.getActiveSlices()) {
if (s.getLeader() == null || !s.getLeader().isActive(clusterState.getLiveNodes())) {
allActive = false;
break;
}
}
if (allActive) {
doCompatCheck(consumer);
} else {
// wait for all leaders to become active and then check
zkController.zkStateReader.registerCollectionStateWatcher(CollectionAdminParams.SYSTEM_COLL, (liveNodes, state) -> {
boolean active = true;
if (state == null || liveNodes.isEmpty()) {
return true;
}
for (Slice s : state.getActiveSlices()) {
if (s.getLeader() == null || !s.getLeader().isActive(liveNodes)) {
active = false;
break;
}
}
if (active) {
doCompatCheck(consumer);
}
return active;
});
}
}
private void doCompatCheck(BiConsumer consumer) {
if (systemCollCompatCheck) {
systemCollCompatCheck = false;
} else {
return;
}
try (CloudSolrClient client = new CloudSolrClient.Builder(Collections.singletonList(getZkController().getZkServerAddress()), Optional.empty())
.withSocketTimeout(30000).withConnectionTimeout(15000)
.withHttpClient(updateShardHandler.getDefaultHttpClient()).build()) {
CollectionAdminRequest.ColStatus req = CollectionAdminRequest.collectionStatus(CollectionAdminParams.SYSTEM_COLL)
.setWithSegments(true)
.setWithFieldInfo(true);
CollectionAdminResponse rsp = req.process(client);
NamedList