org.apache.solr.cloud.api.collections.OverseerCollectionMessageHandler Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of solr-core Show documentation
Show all versions of solr-core Show documentation
Apache Solr (module: core)
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.cloud.api.collections;
import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.Set;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.SynchronousQueue;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import com.google.common.collect.ImmutableMap;
import org.apache.commons.lang3.StringUtils;
import org.apache.solr.client.solrj.SolrResponse;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.cloud.DistribStateManager;
import org.apache.solr.client.solrj.cloud.SolrCloudManager;
import org.apache.solr.client.solrj.cloud.autoscaling.AlreadyExistsException;
import org.apache.solr.client.solrj.cloud.autoscaling.BadVersionException;
import org.apache.solr.client.solrj.impl.HttpSolrClient;
import org.apache.solr.client.solrj.impl.HttpSolrClient.RemoteSolrException;
import org.apache.solr.client.solrj.request.AbstractUpdateRequest;
import org.apache.solr.client.solrj.request.UpdateRequest;
import org.apache.solr.client.solrj.response.UpdateResponse;
import org.apache.solr.cloud.LockTree;
import org.apache.solr.cloud.Overseer;
import org.apache.solr.cloud.OverseerMessageHandler;
import org.apache.solr.cloud.OverseerNodePrioritizer;
import org.apache.solr.cloud.OverseerSolrResponse;
import org.apache.solr.cloud.OverseerTaskProcessor;
import org.apache.solr.cloud.Stats;
import org.apache.solr.cloud.ZkController;
import org.apache.solr.cloud.overseer.OverseerAction;
import org.apache.solr.common.SolrCloseable;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.common.cloud.ClusterState;
import org.apache.solr.common.cloud.DocCollection;
import org.apache.solr.common.cloud.DocRouter;
import org.apache.solr.common.cloud.Replica;
import org.apache.solr.common.cloud.Slice;
import org.apache.solr.common.cloud.SolrZkClient;
import org.apache.solr.common.cloud.ZkConfigManager;
import org.apache.solr.common.cloud.ZkCoreNodeProps;
import org.apache.solr.common.cloud.ZkNodeProps;
import org.apache.solr.common.cloud.ZkStateReader;
import org.apache.solr.common.params.CollectionAdminParams;
import org.apache.solr.common.params.CollectionParams.CollectionAction;
import org.apache.solr.common.params.CoreAdminParams;
import org.apache.solr.common.params.CoreAdminParams.CoreAdminAction;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.util.ExecutorUtil;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.common.util.StrUtils;
import org.apache.solr.common.util.SuppressForbidden;
import org.apache.solr.common.util.TimeSource;
import org.apache.solr.common.util.Utils;
import org.apache.solr.handler.component.HttpShardHandlerFactory;
import org.apache.solr.handler.component.ShardHandler;
import org.apache.solr.handler.component.ShardRequest;
import org.apache.solr.handler.component.ShardResponse;
import org.apache.solr.logging.MDCLoggingContext;
import org.apache.solr.util.DefaultSolrThreadFactory;
import org.apache.solr.util.RTimer;
import org.apache.solr.util.TimeOut;
import org.apache.zookeeper.CreateMode;
import org.apache.zookeeper.KeeperException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import static org.apache.solr.client.solrj.cloud.autoscaling.Policy.POLICY;
import static org.apache.solr.common.cloud.DocCollection.SNITCH;
import static org.apache.solr.common.cloud.ZkStateReader.BASE_URL_PROP;
import static org.apache.solr.common.cloud.ZkStateReader.COLLECTION_PROP;
import static org.apache.solr.common.cloud.ZkStateReader.CORE_NAME_PROP;
import static org.apache.solr.common.cloud.ZkStateReader.CORE_NODE_NAME_PROP;
import static org.apache.solr.common.cloud.ZkStateReader.ELECTION_NODE_PROP;
import static org.apache.solr.common.cloud.ZkStateReader.PROPERTY_PROP;
import static org.apache.solr.common.cloud.ZkStateReader.PROPERTY_VALUE_PROP;
import static org.apache.solr.common.cloud.ZkStateReader.REJOIN_AT_HEAD_PROP;
import static org.apache.solr.common.cloud.ZkStateReader.REPLICA_PROP;
import static org.apache.solr.common.cloud.ZkStateReader.SHARD_ID_PROP;
import static org.apache.solr.common.params.CollectionAdminParams.COLLECTION;
import static org.apache.solr.common.params.CollectionAdminParams.COLOCATED_WITH;
import static org.apache.solr.common.params.CollectionAdminParams.WITH_COLLECTION;
import static org.apache.solr.common.params.CollectionParams.CollectionAction.*;
import static org.apache.solr.common.params.CommonAdminParams.ASYNC;
import static org.apache.solr.common.params.CommonParams.NAME;
import static org.apache.solr.common.util.Utils.makeMap;
/**
* A {@link OverseerMessageHandler} that handles Collections API related
* overseer messages.
*/
public class OverseerCollectionMessageHandler implements OverseerMessageHandler, SolrCloseable {
public static final String NUM_SLICES = "numShards";
public static final boolean CREATE_NODE_SET_SHUFFLE_DEFAULT = true;
public static final String CREATE_NODE_SET_SHUFFLE = CollectionAdminParams.CREATE_NODE_SET_SHUFFLE_PARAM;
public static final String CREATE_NODE_SET_EMPTY = "EMPTY";
public static final String CREATE_NODE_SET = CollectionAdminParams.CREATE_NODE_SET_PARAM;
public static final String ROUTER = "router";
public static final String SHARDS_PROP = "shards";
public static final String REQUESTID = "requestid";
public static final String COLL_PROP_PREFIX = "property.";
public static final String ONLY_IF_DOWN = "onlyIfDown";
public static final String SHARD_UNIQUE = "shardUnique";
public static final String ONLY_ACTIVE_NODES = "onlyactivenodes";
static final String SKIP_CREATE_REPLICA_IN_CLUSTER_STATE = "skipCreateReplicaInClusterState";
public static final Map COLLECTION_PROPS_AND_DEFAULTS = Collections.unmodifiableMap(makeMap(
ROUTER, DocRouter.DEFAULT_NAME,
ZkStateReader.REPLICATION_FACTOR, "1",
ZkStateReader.NRT_REPLICAS, "1",
ZkStateReader.TLOG_REPLICAS, "0",
ZkStateReader.PULL_REPLICAS, "0",
ZkStateReader.MAX_SHARDS_PER_NODE, "1",
ZkStateReader.AUTO_ADD_REPLICAS, "false",
DocCollection.RULE, null,
POLICY, null,
SNITCH, null,
WITH_COLLECTION, null,
COLOCATED_WITH, null));
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
public static final String FAILURE_FIELD = "failure";
public static final String SUCCESS_FIELD = "success";
Overseer overseer;
HttpShardHandlerFactory shardHandlerFactory;
String adminPath;
ZkStateReader zkStateReader;
SolrCloudManager cloudManager;
String myId;
Stats stats;
TimeSource timeSource;
// Set that tracks collections that are currently being processed by a running task.
// This is used for handling mutual exclusion of the tasks.
final private LockTree lockTree = new LockTree();
ExecutorService tpe = new ExecutorUtil.MDCAwareThreadPoolExecutor(5, 10, 0L, TimeUnit.MILLISECONDS,
new SynchronousQueue<>(),
new DefaultSolrThreadFactory("OverseerCollectionMessageHandlerThreadFactory"));
protected static final Random RANDOM;
static {
// We try to make things reproducible in the context of our tests by initializing the random instance
// based on the current seed
String seed = System.getProperty("tests.seed");
if (seed == null) {
RANDOM = new Random();
} else {
RANDOM = new Random(seed.hashCode());
}
}
final Map commandMap;
private volatile boolean isClosed;
public OverseerCollectionMessageHandler(ZkStateReader zkStateReader, String myId,
final HttpShardHandlerFactory shardHandlerFactory,
String adminPath,
Stats stats,
Overseer overseer,
OverseerNodePrioritizer overseerPrioritizer) {
this.zkStateReader = zkStateReader;
this.shardHandlerFactory = shardHandlerFactory;
this.adminPath = adminPath;
this.myId = myId;
this.stats = stats;
this.overseer = overseer;
this.cloudManager = overseer.getSolrCloudManager();
this.timeSource = cloudManager.getTimeSource();
this.isClosed = false;
commandMap = new ImmutableMap.Builder()
.put(REPLACENODE, new ReplaceNodeCmd(this))
.put(DELETENODE, new DeleteNodeCmd(this))
.put(BACKUP, new BackupCmd(this))
.put(RESTORE, new RestoreCmd(this))
.put(CREATESNAPSHOT, new CreateSnapshotCmd(this))
.put(DELETESNAPSHOT, new DeleteSnapshotCmd(this))
.put(SPLITSHARD, new SplitShardCmd(this))
.put(ADDROLE, new OverseerRoleCmd(this, ADDROLE, overseerPrioritizer))
.put(REMOVEROLE, new OverseerRoleCmd(this, REMOVEROLE, overseerPrioritizer))
.put(MOCK_COLL_TASK, this::mockOperation)
.put(MOCK_SHARD_TASK, this::mockOperation)
.put(MOCK_REPLICA_TASK, this::mockOperation)
.put(MIGRATESTATEFORMAT, this::migrateStateFormat)
.put(CREATESHARD, new CreateShardCmd(this))
.put(MIGRATE, new MigrateCmd(this))
.put(CREATE, new CreateCollectionCmd(this))
.put(MODIFYCOLLECTION, this::modifyCollection)
.put(ADDREPLICAPROP, this::processReplicaAddPropertyCommand)
.put(DELETEREPLICAPROP, this::processReplicaDeletePropertyCommand)
.put(BALANCESHARDUNIQUE, this::balanceProperty)
.put(REBALANCELEADERS, this::processRebalanceLeaders)
.put(RELOAD, this::reloadCollection)
.put(DELETE, new DeleteCollectionCmd(this))
.put(CREATEALIAS, new CreateAliasCmd(this))
.put(DELETEALIAS, new DeleteAliasCmd(this))
.put(ALIASPROP, new SetAliasPropCmd(this))
.put(MAINTAINROUTEDALIAS, new MaintainRoutedAliasCmd(this))
.put(OVERSEERSTATUS, new OverseerStatusCmd(this))
.put(DELETESHARD, new DeleteShardCmd(this))
.put(DELETEREPLICA, new DeleteReplicaCmd(this))
.put(ADDREPLICA, new AddReplicaCmd(this))
.put(MOVEREPLICA, new MoveReplicaCmd(this))
.put(REINDEXCOLLECTION, new ReindexCollectionCmd(this))
.put(UTILIZENODE, new UtilizeNodeCmd(this))
.put(RENAME, new RenameCmd(this))
.build()
;
}
@Override
@SuppressWarnings("unchecked")
public SolrResponse processMessage(ZkNodeProps message, String operation) {
MDCLoggingContext.setCollection(message.getStr(COLLECTION));
MDCLoggingContext.setShard(message.getStr(SHARD_ID_PROP));
MDCLoggingContext.setReplica(message.getStr(REPLICA_PROP));
log.debug("OverseerCollectionMessageHandler.processMessage : {} , {}", operation, message);
NamedList results = new NamedList();
try {
CollectionAction action = getCollectionAction(operation);
Cmd command = commandMap.get(action);
if (command != null) {
command.call(cloudManager.getClusterStateProvider().getClusterState(), message, results);
} else {
throw new SolrException(ErrorCode.BAD_REQUEST, "Unknown operation:"
+ operation);
}
} catch (Exception e) {
String collName = message.getStr("collection");
if (collName == null) collName = message.getStr(NAME);
if (collName == null) {
SolrException.log(log, "Operation " + operation + " failed", e);
} else {
SolrException.log(log, "Collection: " + collName + " operation: " + operation
+ " failed", e);
}
results.add("Operation " + operation + " caused exception:", e);
SimpleOrderedMap nl = new SimpleOrderedMap();
nl.add("msg", e.getMessage());
nl.add("rspCode", e instanceof SolrException ? ((SolrException)e).code() : -1);
results.add("exception", nl);
}
return new OverseerSolrResponse(results);
}
@SuppressForbidden(reason = "Needs currentTimeMillis for mock requests")
private void mockOperation(ClusterState state, ZkNodeProps message, NamedList results) throws InterruptedException {
//only for test purposes
Thread.sleep(message.getInt("sleep", 1));
log.info("MOCK_TASK_EXECUTED time {} data {}", System.currentTimeMillis(), Utils.toJSONString(message));
results.add("MOCK_FINISHED", System.currentTimeMillis());
}
private CollectionAction getCollectionAction(String operation) {
CollectionAction action = CollectionAction.get(operation);
if (action == null) {
throw new SolrException(ErrorCode.BAD_REQUEST, "Unknown operation:" + operation);
}
return action;
}
private void reloadCollection(ClusterState clusterState, ZkNodeProps message, NamedList results) {
ModifiableSolrParams params = new ModifiableSolrParams();
params.set(CoreAdminParams.ACTION, CoreAdminAction.RELOAD.toString());
String asyncId = message.getStr(ASYNC);
collectionCmd(message, params, results, Replica.State.ACTIVE, asyncId);
}
@SuppressWarnings("unchecked")
private void processRebalanceLeaders(ClusterState clusterState, ZkNodeProps message, NamedList results)
throws Exception {
checkRequired(message, COLLECTION_PROP, SHARD_ID_PROP, CORE_NAME_PROP, ELECTION_NODE_PROP,
CORE_NODE_NAME_PROP, BASE_URL_PROP, REJOIN_AT_HEAD_PROP);
ModifiableSolrParams params = new ModifiableSolrParams();
params.set(COLLECTION_PROP, message.getStr(COLLECTION_PROP));
params.set(SHARD_ID_PROP, message.getStr(SHARD_ID_PROP));
params.set(REJOIN_AT_HEAD_PROP, message.getStr(REJOIN_AT_HEAD_PROP));
params.set(CoreAdminParams.ACTION, CoreAdminAction.REJOINLEADERELECTION.toString());
params.set(CORE_NAME_PROP, message.getStr(CORE_NAME_PROP));
params.set(CORE_NODE_NAME_PROP, message.getStr(CORE_NODE_NAME_PROP));
params.set(ELECTION_NODE_PROP, message.getStr(ELECTION_NODE_PROP));
params.set(BASE_URL_PROP, message.getStr(BASE_URL_PROP));
String baseUrl = message.getStr(BASE_URL_PROP);
ShardRequest sreq = new ShardRequest();
sreq.nodeName = message.getStr(ZkStateReader.CORE_NAME_PROP);
// yes, they must use same admin handler path everywhere...
params.set("qt", adminPath);
sreq.purpose = ShardRequest.PURPOSE_PRIVATE;
sreq.shards = new String[] {baseUrl};
sreq.actualShards = sreq.shards;
sreq.params = params;
ShardHandler shardHandler = shardHandlerFactory.getShardHandler(overseer.getCoreContainer().getUpdateShardHandler().getDefaultHttpClient());
shardHandler.submit(sreq, baseUrl, sreq.params);
}
@SuppressWarnings("unchecked")
private void processReplicaAddPropertyCommand(ClusterState clusterState, ZkNodeProps message, NamedList results)
throws Exception {
checkRequired(message, COLLECTION_PROP, SHARD_ID_PROP, REPLICA_PROP, PROPERTY_PROP, PROPERTY_VALUE_PROP);
SolrZkClient zkClient = zkStateReader.getZkClient();
Map propMap = new HashMap<>();
propMap.put(Overseer.QUEUE_OPERATION, ADDREPLICAPROP.toLower());
propMap.putAll(message.getProperties());
ZkNodeProps m = new ZkNodeProps(propMap);
overseer.offerStateUpdate(Utils.toJSON(m));
}
private void processReplicaDeletePropertyCommand(ClusterState clusterState, ZkNodeProps message, NamedList results)
throws Exception {
checkRequired(message, COLLECTION_PROP, SHARD_ID_PROP, REPLICA_PROP, PROPERTY_PROP);
SolrZkClient zkClient = zkStateReader.getZkClient();
Map propMap = new HashMap<>();
propMap.put(Overseer.QUEUE_OPERATION, DELETEREPLICAPROP.toLower());
propMap.putAll(message.getProperties());
ZkNodeProps m = new ZkNodeProps(propMap);
overseer.offerStateUpdate(Utils.toJSON(m));
}
private void balanceProperty(ClusterState clusterState, ZkNodeProps message, NamedList results) throws Exception {
if (StringUtils.isBlank(message.getStr(COLLECTION_PROP)) || StringUtils.isBlank(message.getStr(PROPERTY_PROP))) {
throw new SolrException(ErrorCode.BAD_REQUEST,
"The '" + COLLECTION_PROP + "' and '" + PROPERTY_PROP +
"' parameters are required for the BALANCESHARDUNIQUE operation, no action taken");
}
SolrZkClient zkClient = zkStateReader.getZkClient();
Map m = new HashMap<>();
m.put(Overseer.QUEUE_OPERATION, BALANCESHARDUNIQUE.toLower());
m.putAll(message.getProperties());
overseer.offerStateUpdate(Utils.toJSON(m));
}
/**
* Get collection status from cluster state.
* Can return collection status by given shard name.
*
*
* @param collection collection map parsed from JSON-serialized {@link ClusterState}
* @param name collection name
* @param requestedShards a set of shards to be returned in the status.
* An empty or null values indicates all shards.
* @return map of collection properties
*/
@SuppressWarnings("unchecked")
private Map getCollectionStatus(Map collection, String name, Set requestedShards) {
if (collection == null) {
throw new SolrException(ErrorCode.BAD_REQUEST, "Collection: " + name + " not found");
}
if (requestedShards == null || requestedShards.isEmpty()) {
return collection;
} else {
Map shards = (Map) collection.get("shards");
Map selected = new HashMap<>();
for (String selectedShard : requestedShards) {
if (!shards.containsKey(selectedShard)) {
throw new SolrException(ErrorCode.BAD_REQUEST, "Collection: " + name + " shard: " + selectedShard + " not found");
}
selected.put(selectedShard, shards.get(selectedShard));
collection.put("shards", selected);
}
return collection;
}
}
@SuppressWarnings("unchecked")
void deleteReplica(ClusterState clusterState, ZkNodeProps message, NamedList results, Runnable onComplete)
throws Exception {
((DeleteReplicaCmd) commandMap.get(DELETEREPLICA)).deleteReplica(clusterState, message, results, onComplete);
}
boolean waitForCoreNodeGone(String collectionName, String shard, String replicaName, int timeoutms) throws InterruptedException {
try {
zkStateReader.waitForState(collectionName, timeoutms, TimeUnit.MILLISECONDS, (c) -> {
if (c == null)
return true;
Slice slice = c.getSlice(shard);
if(slice == null || slice.getReplica(replicaName) == null) {
return true;
}
return false;
});
} catch (TimeoutException e) {
return false;
}
return true;
}
void deleteCoreNode(String collectionName, String replicaName, Replica replica, String core) throws Exception {
ZkNodeProps m = new ZkNodeProps(
Overseer.QUEUE_OPERATION, OverseerAction.DELETECORE.toLower(),
ZkStateReader.CORE_NAME_PROP, core,
ZkStateReader.NODE_NAME_PROP, replica.getStr(ZkStateReader.NODE_NAME_PROP),
ZkStateReader.COLLECTION_PROP, collectionName,
ZkStateReader.CORE_NODE_NAME_PROP, replicaName,
ZkStateReader.BASE_URL_PROP, replica.getStr(ZkStateReader.BASE_URL_PROP));
overseer.offerStateUpdate(Utils.toJSON(m));
}
void checkRequired(ZkNodeProps message, String... props) {
for (String prop : props) {
if(message.get(prop) == null){
throw new SolrException(ErrorCode.BAD_REQUEST, StrUtils.join(Arrays.asList(props),',') +" are required params" );
}
}
}
void checkResults(String label, NamedList