Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
oracle.kv.impl.admin.Admin Maven / Gradle / Ivy
Go to download
NoSQL Database Server - supplies build and runtime support for the server (store) side of the Oracle NoSQL Database.
/*-
* Copyright (C) 2011, 2018 Oracle and/or its affiliates. All rights reserved.
*
* This file was distributed by Oracle as part of a version of Oracle NoSQL
* Database made available at:
*
* http://www.oracle.com/technetwork/database/database-technologies/nosqldb/downloads/index.html
*
* Please see the LICENSE file included in the top-level directory of the
* appropriate version of Oracle NoSQL Database for a copy of the license and
* additional information.
*/
package oracle.kv.impl.admin;
import static oracle.kv.impl.util.contextlogger.ContextUtils.fineWithCtx;
import static oracle.kv.impl.util.contextlogger.ContextUtils.isLoggableWithCtx;
import static oracle.kv.impl.util.contextlogger.ContextUtils.warningWithCtx;
import java.io.File;
import java.io.IOException;
import java.io.Serializable;
import java.lang.Thread.UncaughtExceptionHandler;
import java.net.InetSocketAddress;
import java.net.URI;
import java.net.URISyntaxException;
import java.rmi.NotBoundException;
import java.rmi.RemoteException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import java.util.SortedSet;
import java.util.StringTokenizer;
import java.util.TreeSet;
import java.util.concurrent.TimeUnit;
import java.util.logging.Level;
import java.util.logging.Logger;
import oracle.kv.KVSecurityConstants;
import oracle.kv.KVStore;
import oracle.kv.KVStoreConfig;
import oracle.kv.KVStoreFactory;
import oracle.kv.KVVersion;
import oracle.kv.LoginCredentials;
import oracle.kv.impl.admin.client.PoolCommand;
import oracle.kv.impl.admin.criticalevent.CriticalEvent;
import oracle.kv.impl.admin.criticalevent.EventRecorder;
import oracle.kv.impl.admin.criticalevent.EventRecorder.LatestEventTimestamps;
import oracle.kv.impl.admin.param.AdminParams;
import oracle.kv.impl.admin.param.ArbNodeParams;
import oracle.kv.impl.admin.param.DatacenterParams;
import oracle.kv.impl.admin.param.GlobalParams;
import oracle.kv.impl.admin.param.Parameters;
import oracle.kv.impl.admin.param.RepNodeParams;
import oracle.kv.impl.admin.param.StorageNodeParams;
import oracle.kv.impl.admin.param.StorageNodePool;
import oracle.kv.impl.admin.plan.DeployDatacenterPlan;
import oracle.kv.impl.admin.plan.DeploymentInfo;
import oracle.kv.impl.admin.plan.ExecutionState.ExceptionTransfer;
import oracle.kv.impl.admin.plan.Plan;
import oracle.kv.impl.admin.plan.PlanRun;
import oracle.kv.impl.admin.plan.Planner;
import oracle.kv.impl.admin.plan.StatusReport;
import oracle.kv.impl.admin.plan.VerifyDataPlan;
import oracle.kv.impl.admin.plan.task.UpdateMetadata;
import oracle.kv.impl.admin.topo.RealizedTopology;
import oracle.kv.impl.admin.topo.Rules;
import oracle.kv.impl.admin.topo.Rules.Results;
import oracle.kv.impl.admin.topo.TopologyBuilder;
import oracle.kv.impl.admin.topo.TopologyCandidate;
import oracle.kv.impl.admin.topo.TopologyDiff;
import oracle.kv.impl.admin.topo.Validations.RulesProblem;
import oracle.kv.impl.api.table.TableLimits;
import oracle.kv.impl.client.admin.ExecutionInfo;
import oracle.kv.impl.client.admin.ExecutionInfoImpl;
import oracle.kv.impl.fault.CommandFaultException;
import oracle.kv.impl.fault.DatabaseNotReadyException;
import oracle.kv.impl.fault.OperationFaultException;
import oracle.kv.impl.fault.ProcessExitCode;
import oracle.kv.impl.metadata.Metadata;
import oracle.kv.impl.metadata.Metadata.MetadataType;
import oracle.kv.impl.metadata.MetadataInfo;
import oracle.kv.impl.monitor.Monitor;
import oracle.kv.impl.monitor.MonitorKeeper;
import oracle.kv.impl.param.LoadParameters;
import oracle.kv.impl.param.Parameter;
import oracle.kv.impl.param.ParameterListener;
import oracle.kv.impl.param.ParameterMap;
import oracle.kv.impl.param.ParameterState;
import oracle.kv.impl.param.ParameterTracker;
import oracle.kv.impl.param.ParameterUtils;
import oracle.kv.impl.param.SizeParameter;
import oracle.kv.impl.query.QueryException;
import oracle.kv.impl.query.QueryStateException;
import oracle.kv.impl.security.AccessChecker;
import oracle.kv.impl.security.ClientProxyCredentials;
import oracle.kv.impl.security.KVStoreUserPrincipal;
import oracle.kv.impl.security.RoleResolver;
import oracle.kv.impl.security.login.LoginManager;
import oracle.kv.impl.security.login.LoginUpdater;
import oracle.kv.impl.security.metadata.SecurityMDListener;
import oracle.kv.impl.security.metadata.SecurityMDTracker;
import oracle.kv.impl.security.metadata.SecurityMDUpdater;
import oracle.kv.impl.security.metadata.SecurityMetadata;
import oracle.kv.impl.security.oauth.IDCSOAuthAuthenticator;
import oracle.kv.impl.sna.StorageNodeAgentAPI;
import oracle.kv.impl.sna.StorageNodeStatus;
import oracle.kv.impl.test.TestHook;
import oracle.kv.impl.test.TestHookExecute;
import oracle.kv.impl.test.TestStatus;
import oracle.kv.impl.topo.AdminId;
import oracle.kv.impl.topo.AdminType;
import oracle.kv.impl.topo.ArbNodeId;
import oracle.kv.impl.topo.Datacenter;
import oracle.kv.impl.topo.DatacenterId;
import oracle.kv.impl.topo.DatacenterType;
import oracle.kv.impl.topo.PartitionId;
import oracle.kv.impl.topo.RepGroupId;
import oracle.kv.impl.topo.RepNode;
import oracle.kv.impl.topo.RepNodeId;
import oracle.kv.impl.topo.ResourceId;
import oracle.kv.impl.topo.StorageNode;
import oracle.kv.impl.topo.StorageNodeId;
import oracle.kv.impl.topo.StorageNodeMap;
import oracle.kv.impl.topo.Topology;
import oracle.kv.impl.util.ConfigUtils;
import oracle.kv.impl.util.ConfigurableService.ServiceStatus;
import oracle.kv.impl.util.DatabaseUtils;
import oracle.kv.impl.util.FileNames;
import oracle.kv.impl.util.FileUtils;
import oracle.kv.impl.util.JsonUtils;
import oracle.kv.impl.util.SerialVersion;
import oracle.kv.impl.util.ServiceUtils;
import oracle.kv.impl.util.StateTracker;
import oracle.kv.impl.util.TopologyPrinter;
import oracle.kv.impl.util.TxnUtil;
import oracle.kv.impl.util.UserDataControl;
import oracle.kv.impl.util.VersionUtil;
import oracle.kv.impl.util.contextlogger.LogContext;
import oracle.kv.impl.util.registry.ClientSocketFactory;
import oracle.kv.impl.util.registry.RegistryUtils;
import oracle.kv.impl.util.server.JENotifyHooks.LogRewriteListener;
import oracle.kv.impl.util.server.JENotifyHooks.RecoveryListener;
import oracle.kv.impl.util.server.JENotifyHooks.RedirectHandler;
import oracle.kv.impl.util.server.JENotifyHooks.SyncupListener;
import oracle.kv.impl.util.server.LoggerUtils;
import oracle.kv.util.ErrorMessage;
import com.sleepycat.je.DatabaseException;
import com.sleepycat.je.DbInternal;
import com.sleepycat.je.Durability;
import com.sleepycat.je.Durability.ReplicaAckPolicy;
import com.sleepycat.je.Durability.SyncPolicy;
import com.sleepycat.je.EnvironmentConfig;
import com.sleepycat.je.EnvironmentFailureException;
import com.sleepycat.je.LockConflictException;
import com.sleepycat.je.StatsConfig;
import com.sleepycat.je.ThreadInterruptedException;
import com.sleepycat.je.Transaction;
import com.sleepycat.je.TransactionConfig;
import com.sleepycat.je.dbi.EnvironmentImpl;
import com.sleepycat.je.rep.InsufficientAcksException;
import com.sleepycat.je.rep.InsufficientLogException;
import com.sleepycat.je.rep.InsufficientReplicasException;
import com.sleepycat.je.rep.MemberNotFoundException;
import com.sleepycat.je.rep.NetworkRestore;
import com.sleepycat.je.rep.NetworkRestoreConfig;
import com.sleepycat.je.rep.NodeType;
import com.sleepycat.je.rep.RepInternal;
import com.sleepycat.je.rep.ReplicaConsistencyException;
import com.sleepycat.je.rep.ReplicaWriteException;
import com.sleepycat.je.rep.ReplicatedEnvironment;
import com.sleepycat.je.rep.ReplicatedEnvironment.State;
import com.sleepycat.je.rep.ReplicationConfig;
import com.sleepycat.je.rep.ReplicationGroup;
import com.sleepycat.je.rep.ReplicationNetworkConfig;
import com.sleepycat.je.rep.ReplicationNode;
import com.sleepycat.je.rep.RollbackException;
import com.sleepycat.je.rep.StateChangeEvent;
import com.sleepycat.je.rep.StateChangeListener;
import com.sleepycat.je.rep.UnknownMasterException;
import com.sleepycat.je.rep.impl.RepImpl;
import com.sleepycat.je.rep.impl.RepParams;
import com.sleepycat.je.rep.util.ReplicationGroupAdmin;
import com.sleepycat.je.rep.utilint.HostPortPair;
import com.sleepycat.je.utilint.JVMSystemUtils;
import com.sleepycat.je.utilint.StoppableThread;
import com.sleepycat.persist.model.Entity;
import com.sleepycat.persist.model.PrimaryKey;
import org.checkerframework.checker.nullness.qual.Nullable;
import org.codehaus.jackson.node.ArrayNode;
import org.codehaus.jackson.node.ObjectNode;
/**
* An instance of the Admin class provides the general API underlying the
* administrative interface. It also owns the global Topology, System
* Metadata, Parameters and Monitor instances, and provides interfaces
* for fetching and storing them.
*/
public class Admin
implements MonitorKeeper, ParameterListener, StateChangeListener {
public static final String CAUSE_CREATE = "after create";
public static final String CAUSE_APPROVE = "after approval";
private static final String CAUSE_CANCEL = "after cancel";
private static final String CAUSE_INTERRUPT = "after interrupt";
public static final String CAUSE_INTERRUPT_REQUEST =
"after interrupt requested";
public static final String CAUSE_EXEC = "after execution";
private static final long ADMIN_JE_CACHE_SIZE = 0;
private final AdminId adminId;
private AdminId masterId;
/* For testing only */
public static TestHook EXECUTE_HOOK;
/**
* For testing only. Called at the start of the shutdown method with the
* admin object as an argument.
*/
public static volatile TestHook beginShutdownHook;
/**
* This is the maximum number of change history objects that are allowed
* to be stored with each metadata objects in the Admin database.
*/
private static final int MAX_MD_CHANGE_HISTORY = 1000;
/*
* The parameters used to configure the Admin instance.
*/
private final AdminServiceParams myParams;
/*
* Reference to planner. The planner is present only when the Admin is the
* master, otherwise the field is null. Access must be synchronized
* on the Admin instance.
*/
private volatile Planner planner;
/*
* Track listeners for parameter changes.
*/
private final ParameterTracker parameterTracker;
/*
* Track listeners for global parameter changes.
*/
private final ParameterTracker globalParameterTracker;
/*
* Track listeners for security metadata change.
*/
private final SecurityMDTracker securityMDTracker;
/*
* Track listeners for topology and parameter changes.
*/
private final SNParameterChangeTracker parameterChangeTracker;
/*
* Monitoring services in the store.
*/
private Monitor monitor;
/*
* SN consistency checker
*/
private ParamConsistencyChecker metadataAdminChecker;
private SoftwareVersionUpdater versionUpdater;
/*
* The minimum release that supports the metadata consistency and
* version updater utility threads.
*/
private KVVersion MIN_UTIL_THREAD_VERSION = KVVersion.R18_1;
/*
* Subsystem for recording significant monitor events.
*/
private EventRecorder eventRecorder;
private final Logger logger;
private Parameters parameters;
private Memo memo;
private final EnvironmentConfig envConfig;
private final ReplicationConfig repConfig;
private final File envDir;
private final File snapshotDir;
private final StateTracker stateTracker;
private final ReplicatedEnvironment environment;
private final AdminService owner; /* May be null. */
private final UncaughtExceptionHandler exceptionHandler;
/*
* startupStatus is used to coordinate admin initialization between the
* current thread and the thread that has been spawned to asynchronously do
* the longer-running init tasks.
*/
private final StartupStatus startupStatus;
private volatile boolean closing = false;
private int eventStoreCounter = 0;
private int eventStoreAgingFrequency = 100;
/*
* This is the minimum version that the store is operating at. This
* starts at the current release prerequisite since this admin should
* not be able to be installed if the prereq was not met.
*/
private volatile KVVersion storeVersion = KVVersion.PREREQUISITE_VERSION;
/*
* This is the minimum version the whole admin group is operating at.
*/
private volatile KVVersion adminVersion = KVVersion.PREREQUISITE_VERSION;
private final AdminStores stores;
/* Thread to monotor Admin upgrade. */
private StoppableThread upgradeMonitor;
/* Thread to creat/upgrade system tables. */
private SysTableMonitor sysTableMonitor;
/**
* Admin je.maxDisk default value. This is an estimated maximum based on
* creation of a large number of plans and tables. If it is insufficient
* it can be overridden by specifying configParameters for the admin. In
* the future we expect to reduce this size using metadata pruning.
*/
private static final long ADMIN_DEFAULT_MAX_DISK =
3 * 1024L * 1024L * 1024L;
/*
* Public constructor for unit test.
*/
public Admin(AdminServiceParams params) {
this(params, null);
}
/**
* Constructor.
*/
Admin(AdminServiceParams params, AdminService owner) {
this.owner = owner;
parameters = null;
myParams = params;
exceptionHandler = new AdminExceptionHandler();
parameterTracker = new ParameterTracker();
globalParameterTracker = new ParameterTracker();
securityMDTracker = new SecurityMDTracker();
parameterChangeTracker = new SNParameterChangeTracker();
final AdminParams adminParams = myParams.getAdminParams();
final StorageNodeParams snParams = myParams.getStorageNodeParams();
this.adminId = adminParams.getAdminId();
/*
* adminMountMap contains single entry corresponding to
* individual admin storage directory.
*/
ParameterMap adminMountMap = snParams.getAdminDirMap();
String adminDirName = null;
long adminDirSize = 0L;
if (adminMountMap != null) {
for (Parameter adminDir : adminMountMap) {
adminDirName = adminDir.getName();
adminDirSize = SizeParameter.getSize(adminDir);
}
}
/*
* TODO : We are doing validate[Rep/Arb]NodeDirectory and also
* raising warnings for not mentioning size before following steps
* for storage node. These checks need to be considered for admindir.
*/
if (adminDirName != null) {
/*
* We have dedicated admin mount point specified.
*/
envDir = FileNames.getAdminEnvDir(adminDirName, adminId);
snapshotDir = FileNames.getAdminSnapshotDir(adminDirName, adminId);
} else {
envDir =
FileNames.getEnvDir
(snParams.getRootDirPath(),
myParams.getGlobalParams().getKVStoreName(),
null, snParams.getStorageNodeId(), adminId);
snapshotDir =
FileNames.getSnapshotDir
(snParams.getRootDirPath(),
myParams.getGlobalParams().getKVStoreName(),
null, snParams.getStorageNodeId(), adminId);
}
final boolean created = FileNames.makeDir(envDir);
/*
* Add a log handler to send logging messages on the Admin process to
* the Monitor. This must happen before the first monitor is
* instantiated. This registration lasts the lifetime of the Admin.
*/
LoggerUtils.registerMonitorAdminHandler
(myParams.getGlobalParams().getKVStoreName(), adminId, this);
/*
* Monitor should exist before the planner, and preferably before
* any loggers, so that Admin logging can be sent to the store-wide
* view.
*/
monitor = new Monitor(myParams, this, getLoginManager());
/*
* By creating the event recorder early in the bootstrapping process, we
* put a stake in the ground with the Trackers to avoid the pruning of
* early events. However, we can't start up the recorder until after
* the persistent environment is viable.
*/
eventRecorder = new EventRecorder(this);
addParameterListener(monitor);
addParameterListener(UserDataControl.getParamListener());
addParameterListener(this);
addParameterListener(new KVMetadataAdminThreadParameterListenter());
addParameterChangeListener(new KVAdminParameterListener());
/* Logger must be created after the registering the monitor handler */
logger = LoggerUtils.getLogger(this.getClass(), myParams);
/*
* Any socket timeouts observed by the ClientSocketFactory in this
* process will be logged to this logger.
*/
ClientSocketFactory.setTimeoutLogger(logger);
if (isAdminThreadEnabled(
adminParams.isMetadataAdminThreadEnabled())) {
startMetadataAdminThread(
adminParams.getKVMetadataAdminCheckInterval().toMillis(),
adminParams.getKVMetadataAdminMaxPlanWait().toMillis());
}
if (isAdminThreadEnabled(adminParams.isVersionThreadEnabled())) {
startVersionUpdaterThread(
adminParams.getVersionCheckInterval().toMillis());
}
logger.info("Initializing " + myParams.getAdminParams().getType() +
" Admin for store: " +
myParams.getGlobalParams().getKVStoreName());
if (created) {
logger.info("Created new admin environment dir: " + envDir);
}
/*
* maxDisk should always be set for Admin.
*
* If admindirsize is not specified, then check diskSize for admindir,
* if specified, or kvroot, if admindir is not specified. If the
* kvroot or admindir diskSize is more than 3 GB, then set upper 3 GB
* limit as maxDisk.
*/
if (adminDirSize == 0L) {
adminDirSize = (adminDirName == null) ?
FileUtils.getDirectorySize(snParams.getRootDirPath()) :
FileUtils.getDirectorySize(adminDirName);
if (adminDirSize > ADMIN_DEFAULT_MAX_DISK) {
adminDirSize = ADMIN_DEFAULT_MAX_DISK;
}
}
Properties props = new ParameterUtils(adminParams.getMap()).
createProperties(true, true, adminDirSize);
logger.info("JVM Runtime maxMemory (bytes): " +
JVMSystemUtils.getRuntimeMaxMemory());
logger.info("Non-default JE properties for environment: " + props);
envConfig = createEnvironmentConfig(props);
/*
* Use the service name as the JE log file prefix so that multiple
* services can store their log files in the same directory
*/
envConfig.setConfigParam
(EnvironmentConfig.FILE_LOGGING_PREFIX,
adminId.getFullName());
/*
* Logging je.info, je.stat, je.config file in kvroot
* log directory.
*/
String dirName = myParams.getStorageNodeParams().getRootDirPath() +
"/" + myParams.getGlobalParams().getKVStoreName() +
"/log";
envConfig.setConfigParam(EnvironmentConfig.FILE_LOGGING_DIRECTORY,
dirName);
repConfig = createReplicationConfig();
stateTracker = new AdminStateTracker(logger);
stateTracker.start();
maybeResetRepGroup();
environment = openEnv();
stores = new AdminStores(this);
/*
* Some initialization of the Admin takes place in a separate thread
* triggered by the StateChangeListener callback. We'll wait here to
* be notified that the initialization is complete.
*/
startupStatus = new StartupStatus();
environment.setStateChangeListener(this);
startupStatus.waitForIsReady(this);
logger.info("Replicated environment handle established." +
" Cache size: " + environment.getConfig().getCacheSize() +
", State: " + environment.getState());
}
public Logger getLogger() {
return logger;
}
/**
* Gets the Admin's exception handler.
*
* @return the Admin's exception handler
*/
public UncaughtExceptionHandler getExceptionHandler() {
return exceptionHandler;
}
/**
* Check whether the Admin is master or not. If the admin is not master,
* AdminNotReadyException is thrown.
*/
private void checkIfReadonlyAdmin() {
if (parameters == null) {
/* This Admin is not master when parameter is null */
throw new AdminNotReadyException
("Cannot service request, admin is not the master");
}
}
/**
* Returns the set of plans that were not in a terminal state when the Admin
* failover happened.
*
* 1.RUNNING plans ->INTERRUPT_REQUESTED -> INTERRUPTED, and
* will be restarted.
* 2.INTERRUPT_REQUESTED plans -> INTERRUPTED and are not restarted. The
* failover is as if the cleanup phase was interrupted by the user.
* 3.INTERRUPTED plans are left as is.
*
* interrupted, and retry them.
*/
private Set readAndRecoverPlans() {
assert Thread.holdsLock(this);
assert planner != null;
final Map activePlans =
new RunTransaction>
(environment, RunTransaction.readOnly, logger) {
@Override
protected Map doTransaction(Transaction txn) {
final Map plans =
stores.getPlanStore().getActivePlans(txn,
planner,
myParams);
stores.getTopologyStore().initCachedStartTime(txn);
logger.log(Level.FINE, "Fetched {0} plans.", plans.size());
return plans;
}
}.run();
/* Recover any plans that are not in a terminal state. */
final Set restartSet = new HashSet<>();
for (Plan p : activePlans.values()) {
final Plan restart = planner.recover(p);
if (restart != null) {
restartSet.add(restart.getId());
}
}
/*
* Protect against the possibility of restarting multiple exclusive
* plans. Allow only the latest-created exlusive plan to be restarted,
* if more than one exists. Others will remain in INTERRUPTED state.
* [#26303]
*/
Plan latestExclPlan = null;
final Set removals = new HashSet<>();
for (int planId : restartSet) {
Plan p = getPlanById(planId);
if (p.isExclusive()) {
if (latestExclPlan == null) {
latestExclPlan = p;
} else if (latestExclPlan.getCreateTime().
compareTo(p.getCreateTime()) < 0) {
removals.add(latestExclPlan.getId());
latestExclPlan = p;
} else {
removals.add(planId);
}
}
}
restartSet.removeAll(removals);
return restartSet;
}
/**
* Restart plans that were RUNNING before the Admin failed over.
*/
private void restartPlans(Set restartSet) {
assert Thread.holdsLock(this);
for (Integer planId : restartSet) {
logger.info("Restarting plan " + planId);
try {
executePlan(planId, false);
} catch (IllegalCommandException ice) {
logger.log(Level.WARNING,
"Unable to restart plan " + planId, ice);
}
}
}
/**
* Create an EnvironmentConfig specific for the Admin.
*/
private EnvironmentConfig createEnvironmentConfig(Properties props) {
final EnvironmentConfig config = new EnvironmentConfig(props);
config.setAllowCreate(true);
config.setTransactional(true);
config.setCacheSize(ADMIN_JE_CACHE_SIZE);
return config;
}
/**
* Create a ReplicationConfig specific for the Admin.
*/
private ReplicationConfig createReplicationConfig() {
final AdminParams ap = myParams.getAdminParams();
final ReplicationConfig config =
new ParameterUtils(ap.getMap()).getAdminRepEnvConfig();
config.setGroupName
(getAdminRepGroupName(myParams.getGlobalParams().getKVStoreName()));
config.setNodeName(getAdminRepNodeName(ap.getAdminId()));
config.setNodeType(getNodeType(ap.getType()));
config.setElectableGroupSizeOverride(
ap.getElectableGroupSizeOverride());
config.setLogFileRewriteListener
(new AdminLogRewriteListener(snapshotDir, myParams));
if (myParams.getSecurityParams() != null) {
final Properties haProps =
myParams.getSecurityParams().getJEHAProperties();
logger.info("DataChannelFactory: " +
haProps.getProperty(
ReplicationNetworkConfig.CHANNEL_TYPE));
config.setRepNetConfig(ReplicationNetworkConfig.create(haProps));
}
/**
* Reduce the message queue size, to ensure that the Admin can operate
* in a small heap.
*/
config.setConfigParam(RepParams.REPLICA_MESSAGE_QUEUE_SIZE.getName(),
"10");
return config;
}
/*
* Converts the specified Admin type into a JE replication node type.
*/
private NodeType getNodeType(AdminType type) {
switch (type) {
case PRIMARY : return NodeType.ELECTABLE;
case SECONDARY : return NodeType.SECONDARY;
}
throw new IllegalStateException("Unknown Admin type: " + type);
}
ReplicationNetworkConfig getRepNetConfig() {
return repConfig.getRepNetConfig();
}
public static String getAdminRepGroupName(String kvstoreName) {
return kvstoreName + "Admin";
}
public static String getAdminRepNodeName(AdminId adminId) {
return (Integer.toString(adminId.getAdminInstanceId()));
}
/** Reset the JE replication group, if requested. */
private void maybeResetRepGroup() {
final AdminParams adminParams = myParams.getAdminParams();
if (!adminParams.getResetRepGroup()) {
return;
}
logger.info("Resetting replication group");
final EnvironmentConfig resetEnvConfig = envConfig.clone();
final ReplicationConfig resetRepConfig = repConfig.clone();
/*
* Make the reset retain the current rep group UUID so that existing
* nodes can rejoin the group when they restart
*/
resetRepConfig.setConfigParam(
RepParams.RESET_REP_GROUP_RETAIN_UUID.getName(), "true");
DatabaseUtils.resetRepGroup(envDir, resetEnvConfig, resetRepConfig);
}
/**
* Returns a replicated environment handle, dealing with any recoverable
* exceptions in the process.
*
* @return the replicated environment handle. The handle may be in the
* Master, Replica, or Unknown state.
*/
private ReplicatedEnvironment openEnv() {
final boolean networkRestoreDone = false;
/*
* Plumb JE environment logging and progress listening output to
* KVStore monitoring.
*/
envConfig.setLoggingHandler(new AdminRedirectHandler(myParams));
envConfig.setRecoveryProgressListener
(new AdminRecoveryListener(myParams));
repConfig.setSyncupProgressListener(new AdminSyncupListener(myParams));
while (true) {
try {
return new ReplicatedEnvironment(envDir, repConfig, envConfig);
} catch (InsufficientLogException ile) {
if (networkRestoreDone) {
/*
* Should have made progress after the earlier network
* restore, propagate the exception to the caller so it
* can be logged and propagated back to the client.
*/
throw ile;
}
final NetworkRestore networkRestore = new NetworkRestore();
final NetworkRestoreConfig config = new NetworkRestoreConfig();
config.setLogProviders(null);
networkRestore.execute(ile, config);
continue;
} catch (RollbackException rbe) {
final Long time = rbe.getEarliestTransactionCommitTime();
logger.info("Rollback exception retrying: " +
rbe.getMessage() +
((time == null) ?
"" :
" Rolling back to: " + new Date(time)));
continue;
} catch (Throwable t) {
final String msg = "unexpected exception creating environment";
logger.log(Level.WARNING, msg, t);
throw new IllegalStateException(msg, t);
}
}
}
@Override
public String toString() {
return "Admin " + adminId;
}
/**
* A custom Handler for JE log handling. This class provides a unique
* class scope for the purpose of logger creation.
*/
private static class AdminRedirectHandler extends RedirectHandler {
AdminRedirectHandler(AdminServiceParams adminServiceParams) {
super(LoggerUtils.getLogger(AdminRedirectHandler.class,
adminServiceParams));
}
}
/**
* A custom Handler for JE recovery recovery notification. This class
* exists only to provide a unique class scope for the purpose of logger
* creation.
*/
private static class AdminRecoveryListener extends RecoveryListener {
AdminRecoveryListener(AdminServiceParams adminServiceParams) {
super(LoggerUtils.getLogger(AdminRecoveryListener.class,
adminServiceParams));
}
}
/**
* A custom Handler for JE syncup progress notification. This class
* provides a unique class scope for the purpose of logger creation.
*/
private static class AdminSyncupListener extends SyncupListener {
AdminSyncupListener(AdminServiceParams adminServiceParams) {
super(LoggerUtils.getLogger(AdminSyncupListener.class,
adminServiceParams));
}
}
/**
* A custom Handler for JE log rewrite notification. This class provides
* a unique class scope for the purpose of logger creation.
*/
private static class AdminLogRewriteListener extends LogRewriteListener {
AdminLogRewriteListener(File snapshotDir,
AdminServiceParams adminServiceParams) {
super(snapshotDir,
LoggerUtils.getLogger(AdminLogRewriteListener.class,
adminServiceParams));
}
}
/**
* Initialize the Admin database. If it already exists, read its contents
* into the in-memory caches. If not, create the structures that it needs.
*
* TODO: we need to check for the situation where an environment exists,
* and the parameters passed in to the admin do not match those that are
* already stored, which means that the config.xml is not consistent with
* the database. This could happen if there is a failure during:
* - a change to admin params (which is not yet implemented)
* - there is a failure when the admin is deployed.
* The database should be considered the point of authority, which means
* that changing params needs to be carefully orchestrated with changes
* to the database.
* Open may be called multiple times on the same environment. It should be
* idempotent. It is called from the StateChangeListener when the
* environment changes state.
*/
private void open() {
assert Thread.holdsLock(this);
new RunTransaction(environment,
RunTransaction.sync, logger) {
@Override
protected Void doTransaction(Transaction txn) {
final Topology topology = stores.getTopology(txn);
parameters = readParameters(txn);
memo = readMemo(txn);
if (topology == null || parameters == null || memo == null) {
if (!(topology == null &&
parameters == null &&
memo == null)) {
throw new IllegalStateException
("Inconsistency in Admin database: " +
"One of Topology, Parameters, or Memo is missing");
}
/* We are populating the database for the first time. */
logger.info("Initializing Admin database");
final String storeName =
myParams.getGlobalParams().getKVStoreName();
stores.putTopology(txn, new RealizedTopology(storeName));
logger.fine("Creating Parameters");
/* Seed new parameters with info from myParams. */
parameters = new Parameters(storeName);
updateAndNotify(myParams.getGlobalParams());
stores.putParameters(txn, parameters);
logger.fine("Creating Memo");
memo = new Memo(1, new LatestEventTimestamps(0L, 0L, 0L));
stores.putMemo(txn, memo);
logger.info("Admin database initialized");
} else {
logger.info("Using existing Admin database");
// TODO: add verification that the params passed into the
// constructor match what is in the database.
}
return null;
}
}.run();
/*
* Now that the persistent environment is established, we can start
* recording events. Until now, any recordable events that occurred
* would be buffered in the Trackers. Now we can pull them out, record
* them, and allow the Trackers to prune them.
*/
eventRecorder.start(memo.getLatestEventTimestamps());
planner = new Planner(this, myParams, getNextId());
}
/*
* The StopAdmin task needs a way to shut down the whole service, hence
* this entry point.
*/
public void stopAdminService(boolean force) {
if (owner != null) {
/* Eventually calls shutdown(force) */
owner.stop(force);
} else {
/* If there is no service, just treat this as shutdown. */
shutdown(force);
}
}
/**
* The force boolean means don't bother to cleanly shutdown JE.
*/
public synchronized void shutdown(boolean force) {
assert TestHookExecute.doHookIfSet(beginShutdownHook, this);
if (closing) {
return;
}
closing = true;
eventRecorder.shutdown();
if (upgradeMonitor != null) {
upgradeMonitor.shutdownThread(logger);
}
shutdownSysTableMonitor();
/* This will wait for running plans to complete (if force == false) */
shutdownPlanner(force, true);
monitor.shutdown();
shutdownKVMetadataAdminThread();
shutdownVersionUpdaterThread();
if (force) {
try {
stores.close();
} catch (Exception possible) /* CHECKSTYLE:OFF */ {
/*
* Ignore exceptions that come from a forced close of the
* environment.
*/
}/* CHECKSTYLE:ON */
/* Use an internal interface to close without checkpointing */
final EnvironmentImpl envImpl =
DbInternal.getEnvironmentImpl(environment);
if (envImpl != null) {
envImpl.close(false);
}
} else {
stores.close();
environment.close();
}
/* Release all files. */
if ((owner == null) || !owner.getUsingThreads()) {
LoggerUtils.closeHandlers
(myParams.getGlobalParams().getKVStoreName());
}
}
private void shutdownPlanner(boolean force, boolean wait) {
assert Thread.holdsLock(this);
if (planner != null) {
planner.shutdown(force, wait);
planner = null;
}
}
/**
* Notification that there are modified service parameters.
*/
public synchronized void newParameters() {
final ParameterMap oldMap = myParams.getAdminParams().getMap();
ParameterMap newMap = null;
StorageNodeParams snp = myParams.getStorageNodeParams();
GlobalParams gp = myParams.getGlobalParams();
String serviceName = null;
try {
final StorageNodeId snid1 = snp.getStorageNodeId();
serviceName = RegistryUtils.bindingName(
gp.getKVStoreName(), snid1.getFullName(),
RegistryUtils.InterfaceType.MAIN);
StorageNodeAgentAPI snapi =
RegistryUtils.getStorageNodeAgent(snp.getHostname(),
snp.getRegistryPort(),
serviceName,
getLoginManager());
LoadParameters lp = snapi.getParams();
newMap = lp.getMap(oldMap.getName(), oldMap.getType());
} catch (NotBoundException | RemoteException e) {
/*
* Ignore and read directly from file.
*/
}
if (newMap == null) {
newMap =
ConfigUtils.getAdminMap(adminId,
myParams.getStorageNodeParams(),
myParams.getGlobalParams(),
logger);
}
/* Do nothing if maps are the same */
if (oldMap.equals(newMap)) {
return;
}
parameterTracker.notifyListeners(oldMap, newMap);
myParams.setAdminParams(new AdminParams(newMap));
}
/**
* Notification that there are modified global parameters
*/
synchronized public void newGlobalParameters() {
ParameterMap oldMap = myParams.getGlobalParams().getMap();
ParameterMap newMap =
ConfigUtils.getGlobalMap(myParams.getStorageNodeParams(),
myParams.getGlobalParams(),
logger);
/* Do nothing if maps are the same */
if (oldMap.equals(newMap)) {
logger.info(
"newGlobalParameters are identical to old global parameters");
return;
}
logger.info("newGlobalParameters: refreshing global parameters");
myParams.setGlobalParams(new GlobalParams(newMap));
globalParameterTracker.notifyListeners(oldMap, newMap);
}
/**
* Notification that there is new security metadata change.
*/
public void newSecurityMDChange() {
final SecurityMetadata secMd = getMetadata(SecurityMetadata.class,
MetadataType.SECURITY);
if (secMd == null) {
throw new IllegalStateException(
"newSecurityMDChange: no security metadata");
}
securityMDTracker.notifyListeners(secMd.getLatestChange());
logger.info("newSecurityMDChange: update with the " +
"latest security metadata change");
}
public void addParameterListener(ParameterListener pl) {
parameterTracker.addListener(pl);
}
public void removeParameterListener(ParameterListener pl) {
parameterTracker.removeListener(pl);
}
private void addGlobalParameterListener(ParameterListener pl) {
globalParameterTracker.addListener(pl);
}
private void addSecurityMDListener(SecurityMDListener secMdListener) {
securityMDTracker.addListener(secMdListener);
}
private void addParameterChangeListener(SNParameterChangeListener cl) {
parameterChangeTracker.addListener(cl);
}
public ReplicatedEnvironment getEnv() {
return environment;
}
/**
* Returns a clone of the current Topology.
*
* The concurrency protocol for Topology is that we always give out copies.
* Updates to the One True Topology happen in the course of Plan execution
* via savePlanResults, in which a new Topology replaces the old one. The
* Planner bears the responsibility for serializing such updates.
*/
public Topology getCurrentTopology() {
return new RunTransaction
(environment, RunTransaction.readOnly, logger) {
@Override
protected Topology doTransaction(Transaction txn) {
return stores.getTopology(txn);
}
}.run();
}
/**
* The concurrency protocol for Parameters is that we give out read-only
* copies. Updates happen only through methods on the Admin instance.
*
* Updates to parameters objects that occur in the course of plan execution
* happen in the various flavors of the savePlanResults method. These
* updates are serialized with respect to each other by the Planner.
*
* Parameters updates that are not controlled by the Planner include
* changes to StorageNodePools, which occur directly through the public
* Admin interface, in response to operator actions. The Planner requires
* that the contents of a StorageNodePool remain unchanged during the
* course of plan execution that involves the pool. This constraint is
* enforced by a read/write lock on each StorageNodePool object.
*/
public Parameters getCurrentParameters() {
return new RunTransaction
(environment, RunTransaction.readOnly, logger) {
@Override
protected Parameters doTransaction(Transaction txn) {
return readParameters(txn);
}
}.run();
}
public AdminServiceParams getParams() {
return myParams;
}
/**
* Saves this value for use as the next plan id. This value is retrieved
* from the database and given to the Planner at construction.
*/
public void saveNextId(int nextId) {
memo.setPlanId(nextId);
logger.log(Level.FINE, "Storing Memo, planId = {0}", nextId);
new RunTransaction(environment,
RunTransaction.sync, logger) {
@Override
protected Void doTransaction(Transaction txn) {
stores.putMemo(txn, memo);
return null;
}
}.run();
}
public int getNextId() {
if (memo == null) {
return 0;
}
return memo.getPlanId();
}
/**
* A convenience for the methods that all save the plan using CAUSE_CREATE
* and return the id. This is public to allow Planner to use it.
*/
public int saveCreatePlan(final Plan plan) {
savePlan(plan, CAUSE_CREATE);
return plan.getId();
}
/**
* Persists the plan. It may or may not be finished. Plans can be saved
* concurrently by concurrently executing tasks, so this method requires
* synchronization.
*/
public void savePlan(final Plan plan, String cause) {
logger.log(Level.FINE, "Saving {0} {1}", new Object[]{plan, cause});
/*
* Assert that this plan was either terminal, or in the Planner's
* cache, before storing it.
*/
if (getPlanner().getCachedPlan(plan.getId()) == null &&
!plan.getState().isTerminal()) {
throw new IllegalStateException
("Attempting to save plan that is not in the Planner's " +
"cache " + plan);
}
new RunTransaction(environment, RunTransaction.sync, logger) {
@Override
protected Void doTransaction(Transaction txn) {
synchronized(plan) {
stores.putPlan(txn, plan);
}
return null;
}
}.run();
}
/**
* Notes on topology saving methods.
* 1. all new topologies must have version numbers greater than the current
* topology.
* 2. all updates to Topology must be transactional and atomic. The means
* that updating methods must be synchronized and database updates are
* performed in a single transaction.
*/
/**
* Verifies that the version of the new Topology is higher than that of
* the current Topology. The topology version should ascend with every save.
* If it does not, something unexpected is happening.
*/
private void checkTopoVersion(Topology newTopo) {
final Topology current = getCurrentTopology();
if (newTopo.getSequenceNumber() <= current.getSequenceNumber()) {
throw new IllegalStateException
("Only save newer topologies. Current version=" +
current.getSequenceNumber() + " new version=" +
newTopo.getSequenceNumber());
}
}
/**
* Persists a new topology after an elasticity plan runs.
* See note above regarding synchronization.
*/
public synchronized void saveTopo(Topology topology,
DeploymentInfo info,
Plan plan) {
checkTopoVersion(topology);
storeTopoAndParams(topology, info, plan);
}
/**
* Persists a new topology and ArbNode params after deploying a single ARB.
* See note above regarding synchronization.
*/
public synchronized
void saveTopoAndARBParam(Topology topology,
DeploymentInfo info,
ArbNodeParams anp,
Plan plan) {
checkIfReadonlyAdmin();
checkTopoVersion(topology);
parameters.add(anp);
storeTopoAndParams(topology, info, plan);
}
/**
* Persists a new topology and RepNode params after deploying a single RN.
* See note above regarding synchronization.
*/
public synchronized void saveTopoAndRNParam(Topology topology,
DeploymentInfo info,
RepNodeParams rnp,
Plan plan) {
checkIfReadonlyAdmin();
checkTopoVersion(topology);
parameters.add(rnp);
storeTopoAndParams(topology, info, plan);
}
/**
* Persists a new topology after a DeployDatacenterPlan runs.
* See note above regarding synchronization.
*/
public synchronized void saveTopoAndParams(Topology topology,
DeploymentInfo info,
DatacenterParams params,
Plan plan) {
checkIfReadonlyAdmin();
checkTopoVersion(topology);
parameters.add(params);
storeTopoAndParams(topology, info, plan);
}
/**
* Persists a new topology and params after a DeploySNPlan runs.
* See note above regarding synchronization.
*/
public synchronized void saveTopoAndParams(Topology topology,
DeploymentInfo info,
StorageNodeParams snp,
GlobalParams gp,
Plan plan) {
checkIfReadonlyAdmin();
checkTopoVersion(topology);
parameters.add(snp);
if (gp != null) {
updateAndNotify(gp);
}
/* Add the new SN to the pool of all storage nodes */
final StorageNodePool pool =
parameters.getStorageNodePool(Parameters.DEFAULT_POOL_NAME);
pool.add(snp.getStorageNodeId());
storeTopoAndParams(topology, info, plan);
}
/**
* Persists a new topology and removes an SN from the params and storage
* node pools, as one atomic action. Because our policy is to restrict
* modifying the Parameters instance to the Admin, we can only supply
* a StorageNodeId and ask the Admin to do the work.
*
* See note above regarding synchronization.
*/
public synchronized void saveTopoAndRemoveSN(Topology topology,
DeploymentInfo info,
StorageNodeId target,
Plan plan) {
checkIfReadonlyAdmin();
checkTopoVersion(topology);
/* Remove the snId from the params and all pools. */
parameters.remove(target);
storeTopoAndParams(topology, info, plan);
}
/**
* Persists a new topology and removes the specified RepNode from params.
* See note above regarding synchronization.
*/
public synchronized void saveTopoAndRemoveRN(Topology topology,
DeploymentInfo info,
RepNodeId targetRN,
Plan plan) {
checkIfReadonlyAdmin();
checkTopoVersion(topology);
/* Remove the rnId from the params and all pools. */
RepNodeParams rnp = parameters.remove(targetRN);
storeTopoAndParams(topology, info, plan);
parameterChangeTracker.notifyListeners(rnp.getStorageNodeId());
}
/**
* Persists a new topology and removes the specified ArbNode from params.
* See note above regarding synchronization.
*/
public synchronized void saveTopoAndRemoveAN(Topology topology,
DeploymentInfo info,
ArbNodeId targetAN,
Plan plan) {
checkIfReadonlyAdmin();
checkTopoVersion(topology);
/* Remove the anId from the params and all pools. */
parameters.remove(targetAN);
storeTopoAndParams(topology, info, plan);
}
/**
* Persists a new topology and params after RemoveDatacenterPlan runs.
* See note above regarding synchronization.
*/
public synchronized void saveTopoAndRemoveDatacenter(Topology topology,
DeploymentInfo info,
DatacenterId targetId,
Plan plan) {
checkIfReadonlyAdmin();
checkTopoVersion(topology);
/* Remove the targetId from the params. */
parameters.remove(targetId);
storeTopoAndParams(topology, info, plan);
}
/**
* Persists a new topology and params after a MigrateStorageNode plan runs.
* See note above regarding synchronization.
*/
public synchronized void saveTopoAndParams(Topology topology,
DeploymentInfo info,
Set repNodeParams,
Set adminParams,
Set arbNodeParams,
Plan plan) {
checkIfReadonlyAdmin();
checkTopoVersion(topology);
/*
* Use parameters.update rather than add, because we may be
* re-executing this upon plan retry.
*/
for (RepNodeParams rnParams : repNodeParams) {
updateAndNotify(rnParams);
}
for (AdminParams ap : adminParams) {
parameters.update(ap);
}
for (ArbNodeParams arbParams : arbNodeParams) {
updateAndNotify(arbParams);
}
storeTopoAndParams(topology, info, plan);
}
/**
* Persists a new set of params atomically after a MigrateStorageNode plan
* runs.
*/
public synchronized void saveParams(Set repNodeParams,
Set adminParams,
Set arbNodeParams) {
checkIfReadonlyAdmin();
/*
* Use parameters.update rather than add, because we may be
* re-executing this upon plan retry.
*/
for (RepNodeParams rnParams : repNodeParams) {
updateAndNotify(rnParams);
}
for (AdminParams ap : adminParams) {
parameters.update(ap);
}
for (ArbNodeParams arbParams : arbNodeParams) {
updateAndNotify(arbParams);
}
storeParameters();
}
/**
* Updates the current topology stored within the AdminDB to reflect
* the migration of the specified partition to a new shard. Done atomically
* within a single transaction because partition migration can be executed
* in parallel.
*
* If the topology is updated true is returned, otherwise false is returned.
*/
public synchronized boolean updatePartition(final PartitionId partitionId,
final RepGroupId targetRGId,
final DeploymentInfo info,
final Plan plan) {
return new RunTransaction
(environment, RunTransaction.sync, logger) {
@Override
protected Boolean doTransaction(Transaction txn) {
/*
* Be sure to take the plan mutex before any JE locks are
* acquired in this transaction, per the synchronization
* hierarchy rules.
*/
Topology t = null;
synchronized (plan) {
t = stores.getTopology(txn);
/*
* If the partition is already at the target, there is no
* need to update the topology.
*/
if (t.get(partitionId).getRepGroupId().equals(targetRGId)){
return false;
}
t.updatePartition(partitionId, targetRGId);
/*
* Now that the topo has been modified, inform the plan that
* the topology is being updated. Save the plan if needed.
*/
if (plan.updatingMetadata(t)) {
stores.putPlan(txn, plan);
}
}
stores.putTopology(txn, new RealizedTopology(t, info));
return true;
}
}.run();
}
/**
* Persists new RepNodeParams following a ChangeParamsPlan.
*/
public synchronized void updateParams(RepNodeParams rnp) {
checkIfReadonlyAdmin();
updateAndNotify(rnp);
storeParameters();
}
/**
* Persists new ArbNodeParams following a ChangeParamsPlan.
*/
public synchronized void updateParams(ArbNodeParams anp) {
checkIfReadonlyAdmin();
updateAndNotify(anp);
storeParameters();
}
/**
* Persists new params following successful deployment of a Storage Node.
*/
public synchronized void updateParams(StorageNodeParams snp,
GlobalParams gp) {
checkIfReadonlyAdmin();
updateAndNotify(snp);
if (gp != null) {
updateAndNotify(gp);
}
storeParameters();
}
/**
* Persists updated AdminParams after a successful deployment.
*/
public synchronized void updateParams(AdminParams ap) {
checkIfReadonlyAdmin();
parameters.update(ap);
storeParameters();
}
/**
* Persists new GlobalParams following a ChangeGlobalSecurityPlan.
*/
public synchronized void updateParams(GlobalParams gp) {
checkIfReadonlyAdmin();
updateAndNotify(gp);
storeParameters();
}
/**
* Persists new AdminParams following successful deployment of a new
* Admin. The admin must not yet exist in the params.
*/
public synchronized void addAdminParams(AdminParams ap) {
checkIfReadonlyAdmin();
final int id = ap.getAdminId().getAdminInstanceId();
logger.log(Level.FINE, "Saving new AdminParams[{0}]", id);
final int nAdmins = getAdminCount();
if (id <= nAdmins) {
throw new NonfatalAssertionException
("Attempting to add an AdminParams " +
"for an existing Admin. Id=" + id + " nAdmins=" + nAdmins);
}
parameters.add(ap);
storeParameters();
}
/**
* Removes the parameters associated with the specified AdminId.
*/
public synchronized void removeAdminParams(AdminId aid) {
checkIfReadonlyAdmin();
logger.log(Level.FINE, "Removing AdminParams[{0}]", aid);
if (parameters.get(aid) == null) {
throw new MemberNotFoundException
("Removing nonexistent params for admin " + aid);
}
final int nAdmins = getAdminCount();
if (nAdmins == 1) {
throw new NonfatalAssertionException
("Attempting to remove the sole Admin instance" + aid);
}
parameters.remove(aid);
storeParameters();
}
/**
* Removes a RepNode and save changed topology.Done atomically within a
* single transaction because removing RepNode can be executed in parallel.
*
* If the topology is updated true is returned, otherwise false is returned.
*/
public synchronized boolean removeRepNodeAndSaveTopo(
final RepNodeId target,
final DeploymentInfo info,
final Plan plan) {
checkIfReadonlyAdmin();
return new RunTransaction
(environment, RunTransaction.sync, logger) {
@Override
protected Boolean doTransaction(Transaction txn) {
/*
* Be sure to take the plan mutex before any JE locks are
* acquired in this transaction, per the synchronization
* hierarchy rules.
*/
Topology topo = null;
synchronized (plan) {
topo = stores.getTopology(txn);
try {
/* Remove RepNode from topology. */
topo.remove(target);
} catch (IllegalArgumentException mnfe) {
/*
* This would happen if the plan was interrupted and
* re-executed.
*/
logger.fine("The RN " + target +
" was not found in the repgroup.");
}
try {
/* Remove relevant parameters */
RepNodeParams rnp = parameters.remove(target);
storeParameters();
parameterChangeTracker.notifyListeners(
rnp.getStorageNodeId());
} catch (NonfatalAssertionException nae) {
/*
* This would happen if the plan was interrupted and
* re-executed.
*/
logger.fine("Attempt to remove a nonexistent " +
"RepNodesParams for RepNode " + target);
}
/*
* Now that the topo has been modified, inform the plan that
* the topology is being updated. Save the plan if needed.
*/
if (plan.updatingMetadata(topo)) {
stores.putPlan(txn, plan);
}
}
stores.putTopology(txn, new RealizedTopology(topo, info));
return true;
}
}.run();
}
/**
* Adds a new storage node pool using the specified name.
*/
public synchronized void addStorageNodePool(String name) {
checkIfReadonlyAdmin();
if (parameters.getStorageNodePool(name) != null) {
return;
}
parameters.addStorageNodePool(name);
logger.info("Created Storage Node Pool: " + name);
storeParameters();
}
/**
* Clone an existing storage node pool as a new storage node pool.
*
* @param name the name of the new storage node pool
* @param source the name of the source storage node pool
*/
synchronized void cloneStorageNodePool(String name, String source) {
checkIfReadonlyAdmin();
if (parameters.getStorageNodePool(name) != null) {
throw new IllegalCommandException
("Target already exists: " + name, ErrorMessage.NOSQL_5200,
CommandResult.NO_CLEANUP_JOBS);
}
final StorageNodePool pool = parameters.addStorageNodePool(name);
final StorageNodePool sourcePool =
parameters.getStorageNodePool(source);
if (sourcePool == null) {
throw new IllegalCommandException("No such Storage Node Pool: " +
source, ErrorMessage.NOSQL_5200,
CommandResult.NO_CLEANUP_JOBS);
}
for (StorageNodeId snId : sourcePool) {
pool.add(snId);
}
logger.info("Cloned Storage Node Pool " + source + " as: " + name);
storeParameters();
}
/**
* Removes the named storage node pool. It must exist.
*/
public synchronized void removeStorageNodePool(String name) {
checkIfReadonlyAdmin();
if (parameters.getStorageNodePool(name) == null) {
throw new IllegalCommandException
("Attempt to remove a nonexistent StorageNodePool.");
}
parameters.removeStorageNodePool(name);
logger.info("Removed Storage Node Pool: " + name);
storeParameters();
}
/**
* Adds the specified storage node to the named pool. Both must exist.
*/
public synchronized void addStorageNodeToPool(String name,
StorageNodeId snId) {
checkIfReadonlyAdmin();
final StorageNodePool pool = parameters.getStorageNodePool(name);
if (pool == null) {
throw new IllegalCommandException("No such Storage Node Pool: " +
name, ErrorMessage.NOSQL_5200,
CommandResult.NO_CLEANUP_JOBS);
}
/* Verify that the storage node exists. */
final Topology current = getCurrentTopology();
final StorageNode sn = current.get(snId);
if (sn == null) {
throw new IllegalCommandException
("Attempt to add nonexistent StorageNode " + snId +
" to a pool.",
ErrorMessage.NOSQL_5200, CommandResult.NO_CLEANUP_JOBS);
}
logger.info("Added Storage Node " + snId.toString() + " to pool: " +
name);
pool.add(snId);
storeParameters();
}
/**
* Removes the specified storage node from the named pool. Both must exist.
*/
synchronized void removeStorageNodeFromPool(String name,
StorageNodeId snId) {
checkIfReadonlyAdmin();
final StorageNodePool pool = parameters.getStorageNodePool(name);
if (pool == null) {
throw new IllegalCommandException("No such Storage Node Pool: " +
name, ErrorMessage.NOSQL_5200,
CommandResult.NO_CLEANUP_JOBS);
}
/* Verify that the storage node exists. */
final Topology current = getCurrentTopology();
final StorageNode sn = current.get(snId);
if (sn == null) {
throw new IllegalCommandException
("Attempt to remove nonexistent StorageNode " + snId +
" to a pool.",
ErrorMessage.NOSQL_5200, CommandResult.NO_CLEANUP_JOBS);
}
logger.info("Removed Storage Node " + snId.toString() + " to pool: " +
name);
pool.remove(snId);
storeParameters();
}
/**
* Replaces all of the storage nodes in the named pool with the storage
* nodes specified in the list of StorageNodeIds. The pool, and all
* referenced storage nodes, must exist.
*/
public synchronized void replaceStorageNodePool(String name,
List ids) {
checkIfReadonlyAdmin();
final StorageNodePool pool = parameters.getStorageNodePool(name);
if (pool == null) {
throw new IllegalCommandException("No such Storage Node Pool: " +
name);
}
final Topology current = getCurrentTopology();
for (StorageNodeId id : ids) {
/* Verify that the storage node exists. */
final StorageNode sn = current.get(id);
if (sn == null) {
throw new IllegalCommandException
("Attempt to add nonexistent StorageNode to a pool.");
}
}
pool.clear();
for (StorageNodeId id : ids) {
pool.add(id);
}
storeParameters();
}
/**
* Create a storage node pool that contains all of the SNs in the current
* topology.
*
* @param topology the current topology
* @param targetSNPool the target storage node pool
*/
synchronized StorageNodePool createTargetStorageNodePool(
final Topology topology,
final String targetSNPool) {
checkIfReadonlyAdmin();
StorageNodePool pool = parameters.addStorageNodePool(targetSNPool);
List snIds = topology.getStorageNodeIds();
for (StorageNodeId snId : snIds) {
pool.add(snId);
}
logger.info("Created Target Storage Node Pool");
storeParameters();
return pool;
}
/**
* Adds a named topology candidate that has been created by a copy
* operation. Relies on the transaction to avoid duplicate inserts.
*
* @param candidateName the name of the new candidate
* @param newTopo the topology to copy
* @throws IllegalCommandException if an existing topology candidate is
* found with the specified name
* @throws DatabaseException if there is a problem modifying the database
*/
public void addTopoCandidate(final String candidateName,
final Topology newTopo) {
new RunTransaction(environment, RunTransaction.writeNoSync,
logger) {
@Override
protected Void doTransaction(Transaction txn) {
if (stores.getTopologyStore().candidateExists(txn,
candidateName)) {
throw new IllegalCommandException
(candidateName +
" already exists and can't be added as a new" +
" topology candidate.");
}
stores.getTopologyStore().
putCandidate(txn,
new TopologyCandidate(candidateName, newTopo));
return null;
}
}.run();
}
/**
* Adds a named topology candidate that has been created by a copy of
* an existing candidate. Relies on the transaction to avoid duplicate
* inserts.
*/
void addTopoCandidate(final String candidateName,
final String sourceCandidateName) {
final Topology source =
getCandidate(sourceCandidateName).getTopology();
addTopoCandidate(candidateName, source);
}
/**
* Deletes a topology candidate.
*
* @param candidateName the name of the candidate to delete
* @throws IllegalCommandException if @param name is not associated with
* a topology.
* @throws DatabaseException if there is a problem with the deletion or
* if a foreign key constraint is violated because the candidate is
* referenced by a deployed topology. TODO: how to capture that case, and
* present the error differently?
*/
public void deleteTopoCandidate(final String candidateName) {
new RunTransaction(environment,
RunTransaction.writeNoSync, logger) {
@Override
protected Void doTransaction(Transaction txn) {
final TopologyStore topoStore = stores.getTopologyStore();
if (!topoStore.candidateExists(txn, candidateName)) {
throw new IllegalCommandException
(candidateName + " doesn't exist");
}
topoStore.deleteCandidate(txn, candidateName);
return null;
}
}.run();
}
/**
* Create a new, initial layout, based on the SNs available in the SN pool.
* @return a String with a status message suitable for the CLI.
*/
public String createTopoCandidate(final String topologyName,
final String snPoolName,
final int numPartitions,
final boolean json,
final short jsonVersion) {
checkIfReadonlyAdmin();
return new RunTransaction(environment,
RunTransaction.writeNoSync, logger) {
@Override
protected String doTransaction(Transaction txn) {
final StorageNodePool pool =
parameters.getStorageNodePool(snPoolName);
if (pool == null) {
throw new IllegalCommandException
("Storage Node Pool " + snPoolName + " not found.");
}
Parameters currentParams = getCurrentParameters();
TopologyCandidate candidate = buildCandidate(txn,
topologyName,
pool,
numPartitions,
currentParams);
if (json) {
if (jsonVersion ==
SerialVersion.ADMIN_CLI_JSON_V1_VERSION) {
return candidate.displayAsJson(currentParams);
}
return TopologyPrinter.printTopologyJson(
candidate.getTopology(), currentParams,
TopologyPrinter.all, false).toString();
}
return "Created: " + candidate.getName();
}
}.run();
}
private TopologyCandidate buildCandidate(Transaction txn,
String topologyName,
StorageNodePool pool,
int numPartitions,
Parameters currentParams) {
final TopologyBuilder tb =
new TopologyBuilder(getCurrentTopology(),
topologyName,
pool,
numPartitions,
currentParams,
myParams);
final TopologyCandidate candidate = tb.build();
final TopologyStore topoStore = stores.getTopologyStore();
if (topoStore.candidateExists(txn, topologyName)) {
final TopologyCandidate preexistingCandidate =
getCandidate(txn, topologyName);
if (!preexistingCandidate.getTopology().
layoutEquals(candidate.getTopology())) {
throw new IllegalCommandException(
"Topology candidate " + topologyName + " exists " +
"but with a different layout");
}
if (!preexistingCandidate.getDirectoryAssignments().equals(
candidate.getDirectoryAssignments())) {
throw new IllegalCommandException(
"Topology candidate " + topologyName + " exists " +
"but with different mount point assignment");
}
} else {
topoStore.putCandidate(txn, candidate);
}
return candidate;
}
/**
* Retrieves the named topology candidate from the topo store.
* @throws IllegalCommandException if the candidate does not exist.
*/
public TopologyCandidate getCandidate(final String candidateName) {
return new RunTransaction(environment,
RunTransaction.readOnly,
logger) {
@Override
protected TopologyCandidate doTransaction(Transaction txn) {
return getCandidate(txn, candidateName);
}
}.run();
}
/**
* Check that the specified topology exists, throw IllegalCommand if
* it doesn't.
*/
private TopologyCandidate getCandidate(Transaction txn,
String candidateName) {
final TopologyCandidate candidate =
stores.getTopologyStore().getCandidate(txn, candidateName);
if (candidate == null) {
throw new IllegalCommandException
("Topology " + candidateName + " does not exist. Use " +
" topology list to see all available candidate");
}
return candidate;
}
/**
* Present a list of candidate names.
*/
public List listTopoCandidates() {
return new RunTransaction>(environment,
RunTransaction.readOnly, logger) {
@Override
protected List doTransaction(Transaction txn) {
return stores.getTopologyStore().getCandidateNames(txn);
}
}.run();
}
/**
* Execute the redistribution algorithms on a given topo candidate,
* updating its layout to use as much of the available store SNs as
* possible.
* @param candidateName the name of the target candidate.
* @param snPoolName represents the resources to be used by in the new
* layout.
* @return a message indicating success or failure, to be reported by the
* CLI.
*/
public String redistributeTopology(final String candidateName,
final String snPoolName) {
return new RunTransaction(environment,
RunTransaction.writeNoSync, logger) {
@Override
protected String doTransaction(Transaction txn) {
/* check that the target candidate exists.*/
final TopologyCandidate candidate =
getCandidate(txn, candidateName);
final StorageNodePool pool = freezeSNPool(snPoolName);
try {
/* Create and save a new layout against the specified SNs */
final TopologyBuilder tb =
new TopologyBuilder(candidate, pool,
getCurrentParameters(),
myParams);
final TopologyCandidate newCandidate = tb.build();
stores.getTopologyStore().putCandidate(txn, newCandidate);
return "Redistributed: " + newCandidate.getName();
} finally {
pool.thaw();
}
}
}.run();
}
public String createFailoverTopology(final String candidateName,
final String snPoolName,
Set newPrimaryZones,
Set offlineZones) {
return new RunTransaction(environment,
RunTransaction.writeNoSync, logger) {
@Override
protected String doTransaction(Transaction txn) {
final StorageNodePool pool = freezeSNPool(snPoolName);
try {
return doFailoverProcessing(candidateName,
pool,
newPrimaryZones,
offlineZones,
txn);
} finally {
pool.thaw();
}
}
}.run();
}
/**
* Execute the contraction algorithms on a given topo candidate, updating
* its layout to use as much of the available store SNs as possible.
* @param candidateName the name of the target candidate.
* @param snPoolName represents the resources to be used by in the new
* layout.
* @return a message indicating success or failure, to be reported by the
* CLI.
*/
String contractTopology(final String candidateName,
final String snPoolName) {
return new RunTransaction(environment,
RunTransaction.writeNoSync, logger) {
@Override
protected String doTransaction(Transaction txn) {
/* check that the target candidate exists.*/
final TopologyCandidate candidate =
getCandidate(txn, candidateName);
final StorageNodePool pool = freezeSNPool(snPoolName);
try {
/* Create and save a new layout against the specified SNs */
final TopologyBuilder tb =
new TopologyBuilder(candidate,
pool,
getCurrentParameters(),
myParams);
final TopologyCandidate newCandidate = tb.contract();
/* Save topology candidate */
stores.getTopologyStore().putCandidate(txn, newCandidate);
return "Contracted: " + newCandidate.getName();
} finally {
pool.thaw();
}
}
}.run();
}
/**
* Execute the remove shard on source topology with mentioned failed
* shard argument and generate new topology layout to remove RN's and
* SNs if any associated with the failed shard.
* @param failedShard the Id of failed shard
* @param candidateName the name of target topology
* @return a message indicating success or failure, to be reported by the
* CLI.
*/
String removeFailedShard(final RepGroupId failedShard,
final String candidateName) {
return new RunTransaction(environment,
RunTransaction.writeNoSync, logger) {
@Override
protected String doTransaction(Transaction txn) {
/* Generate a cloned target candidate topology */
final Topology topology = getCurrentTopology();
addTopoCandidate(candidateName,topology);
/* Check that the target candidate exists.*/
final TopologyCandidate candidate =
getCandidate(txn, candidateName);
/* Generated new snPool from topology. We need to make sure
* that targetSNPool name is unique from already existing
* pools if customer runs new remove-shard commands, but use the
* same pool if the same plan is restarted. Each plan has a
* unique topology candidate name, so that suffix will give each
* plan a unique pool.
*
*/
String poolname = PoolCommand.INTERNAL_NAME_PREFIX +
"remove-shard-" + candidateName;
StorageNodePool targetSNpool = createTargetStorageNodePool(
topology, poolname);
/* Removing SN's from StorageNodePool associated with failed
* shard if there are no other running RNs and capacity = 1
*/
List removeSNIds = new ArrayList();
List snIds = topology.getStorageNodeIds();
for (StorageNodeId snId : snIds) {
Set snRNIds = topology.getHostedRepNodeIds(snId);
/* If snRNId belong to failed shard and snId capacity = 1
* then add snId to remove list from storage node pool.
*
*/
if (snRNIds.size() > 1) {
/* SN has capacity greater than 1 hence not
* adding it to remove SN pool list even
* if SN has RN from failed shard.
*/
continue;
}
for (RepNodeId snRNId : snRNIds) {
if (snRNId.getGroupId() == failedShard.getGroupId()) {
removeSNIds.add(snId);
}
}
}
for (StorageNodeId snId : removeSNIds) {
removeStorageNodeFromPool(poolname, snId);
}
targetSNpool = freezeSNPool(poolname);
try {
/* Create and save a new layout against the specified SNs */
final TopologyBuilder tb =
new TopologyBuilder(candidate,
targetSNpool,
getCurrentParameters(),
myParams);
final TopologyCandidate newCandidate =
tb.removeFailedShard(failedShard);
/* Save topology candidate */
stores.getTopologyStore().putCandidate(txn, newCandidate);
return "Removed failed shard " + failedShard + " in " +
newCandidate.getName();
} finally {
targetSNpool.thaw();
}
}
}.run();
}
/**
* Make the specified topology compliant with all the topology rules.
*/
public String rebalanceTopology(final String candidateName,
final String snPoolName,
final DatacenterId dcId) {
return new RunTransaction(environment,
RunTransaction.writeNoSync, logger) {
@Override
protected String doTransaction(Transaction txn) {
final TopologyCandidate candidate =
getCandidate(txn, candidateName);
final StorageNodePool pool = freezeSNPool(snPoolName);
try {
/* Create and save a new layout against the specified SNs */
final TopologyBuilder tb =
new TopologyBuilder(candidate, pool,
getCurrentParameters(), myParams);
final TopologyCandidate newCandidate = tb.rebalance(dcId);
stores.getTopologyStore().putCandidate(txn, newCandidate);
return "Rebalanced: " + newCandidate.getName();
} finally {
pool.thaw();
}
}
}.run();
}
/**
* Check that the specified snPool exists, and freeze it so it is stable
* while the topology transformation is going on.
*/
private StorageNodePool freezeSNPool(String snPoolName) {
checkIfReadonlyAdmin();
final StorageNodePool pool = parameters.getStorageNodePool(snPoolName);
if (pool == null) {
throw new IllegalCommandException
("Storage Node Pool " + snPoolName + " not found.");
}
pool.freeze();
return pool;
}
/**
* Get a new copy of the Parameters from the database.
*/
private Parameters readParameters(Transaction txn) {
final Parameters params = stores.getParameters(txn);
if (params == null) {
logger.fine("Parameters not found");
} else {
logger.fine("Parameters fetched");
}
return params;
}
/**
* Get a new copy of the Memo from the database.
*/
private Memo readMemo(Transaction txn) {
final Memo m = stores.getMemo(txn);
if (m == null) {
logger.fine("Memo not found");
} else {
logger.fine("Memo fetched");
}
return m;
}
/**
* Save a new bunch of event records, and the latest timestamp, in the same
* transaction.
*/
public void storeEvents(final List events,
final EventRecorder.LatestEventTimestamps let) {
/*
* Use a lightweight commit for critical-event-storing transactions.
*/
RunTransaction transaction =
new RunTransaction(environment, RunTransaction.noSync, logger) {
/* Supply handlers to propagate the the exceptions. */
@Override
void handler(UnknownMasterException ume) {
throw ume;
}
@Override
void handler(ReplicaWriteException rwe) {
throw rwe;
}
@Override
protected Void doTransaction(Transaction txn) {
final EventStore eventStore = stores.getEventStore();
for (CriticalEvent pe : events) {
eventStore.putEvent(txn, pe);
/*
* Age the store periodically.
*/
if (eventStoreCounter++ % eventStoreAgingFrequency == 0) {
eventStore.ageStore(txn, getEventExpiryAge());
}
}
/*
* The memo is being modified without synchronization, which is
* ok because the method is guaranteed to be called by a single
* eventRecorder thread.
*/
memo.setLatestEventTimestamps(let);
stores.putMemo(txn, memo);
return null;
}
};
try {
transaction.run();
} catch (AdminNotReadyException anre) {
/*
* ANRE is thrown during upgrade from DPL to non-DPL stores while
* the Admin is in read-only mode
*/
logger.info(anre.getMessage());
} catch (ReplicaWriteException | UnknownMasterException e) {
logger.log(Level.FINE,
"Ignoring exception resulting from write operation " +
"when not the master: {0}",
e.getMessage());
} catch (ThreadInterruptedException | IllegalStateException e) {
/**
* The thread can be interrupted while closing, or an
* IllegalStateException wrapping, say an
* InsufficientReplicasException, could be thrown by
* RunTransaction.
*
* Future: may be better to handle InsufficientReplicasException
* (and perhaps other exceptions) on shutdown in RunTransaction
* itself.
*/
if (!closing) {
throw e;
}
logger.log(Level.FINE,
"Ignoring interrupt exception storing events " +
"while shutting down: {0}",
e.getMessage());
}
}
/**
* Return a list of event records satisfying the given criteria.
* For endTime, zero means the current time.
* For type, null means all types.
*/
public List getEvents(final long startTime,
final long endTime,
final CriticalEvent.EventType type) {
return new RunTransaction>
(environment, RunTransaction.readOnly, logger) {
@Override
protected List doTransaction(Transaction txn) {
return stores.getEventStore().getEvents(txn, startTime, endTime,
type);
}
}.run();
}
/**
* Return a single event that has the given database key.
*/
public CriticalEvent getOneEvent(final String eventId) {
return new RunTransaction
(environment, RunTransaction.readOnly, logger) {
@Override
protected CriticalEvent doTransaction(Transaction txn) {
return stores.getEvent(txn, eventId);
}
}.run();
}
/**
* Approve the given plan for execution.
*/
public synchronized void approvePlan(int id) {
final Plan p = getAndCheckPlan(id);
planner.approvePlan(id);
savePlan(p, CAUSE_APPROVE);
logger.info(p + " approved");
}
/**
* Cancel the given plan.
*/
public synchronized void cancelPlan(int id) {
final Plan p = getAndCheckPlan(id);
planner.cancelPlan(id);
savePlan(p, CAUSE_CANCEL);
logger.info(p + " canceled");
}
/**
* Interrupt the given plan. Must be synchronized because
* planner.interruptPlan() will take the plan mutex, and then may
* attempt to save the plan, which requires the Admin mutex. Since the
* lock hierarchy is Admin→plan, we need to take the Admin mutex now.
* [#22161]
*/
public synchronized void interruptPlan(int id) {
final Plan p = getAndCheckPlan(id);
planner.interruptPlan(id);
savePlan(p, CAUSE_INTERRUPT);
logger.info(p + " interrupted");
}
/**
* Start the execution of a plan. The plan will proceed asynchronously.
*
* To check the status and final result of the plan, the caller should
* examine the state and execution history. AwaitPlan() can be used to
* synchronously wait for the end of the plan, and to learn the plan status.
* AssertSuccess() can be used to generate an exception if the plan
* fails, which will throw a wrapped version of the original exception
* @param force TODO
* @throws IllegalCommandException if there is a problem with executing the
* plan.
*/
public void executePlan(int id, boolean force) {
int MAX_RETRY_COUNT = 60;
long RETRY_WAIT_MS = 1000;
int retryCount = 0;
boolean done = false;
while (!done) {
try {
executePlanInternal(id, force);
done = true;
} catch (PlanLocksHeldException e) {
retryCount++;
if (retryCount > MAX_RETRY_COUNT) {
throw new IllegalCommandException(e.getMessage(),
ErrorMessage.NOSQL_5400,
CommandResult.PLAN_CANCEL);
}
try {
Thread.sleep(RETRY_WAIT_MS);
} catch (InterruptedException d) {
}
}
}
}
private synchronized void executePlanInternal(int id, boolean force)
throws PlanLocksHeldException {
try {
final Plan p = getAndCheckPlan(id);
planner.executePlan(p, force);
} catch (PlanLocksHeldException e) {
planner.clearLocks(id);
throw e;
}
}
/**
* Start the execution of a plan. The focus is on getting a set of actions
* to occur, so if there s a conflicting plan which already owns the
* plan catalog locks needed by this plan, take extra steps to handle
* the lock conflict.
*
* If there is a lock conflict, see if that conflicting plan is the logical
* equivalent of the original plan. If it is, return the id of the
* conflicting plan, which serves as an adequate proxy for the desired
* actions.
*
* @return the id of whichever plan is going to carry out the desired
* action, be that the original plan, or the conflicting plan.
* @throws PlanLocksHeldException the plan cannot obtain its catalog locks.
*/
public synchronized int executePlanOrFindMatch(int id)
throws PlanLocksHeldException {
final Plan p = getAndCheckPlan(id);
try {
planner.executePlan(p, false /* force */);
return id;
} catch (PlanLocksHeldException e) {
int runningPlanId = e.getOwningPlanId();
Plan runningPlan = getPlanById(runningPlanId);
if (runningPlan.logicalCompare(p)) {
return runningPlanId;
}
throw e;
}
}
/**
* Waits for the specified plan to finish. If the timeout period is
* specified and exceeded or the wait is interrupted, the call will end.
*
* @return the current plan state.
* @throws AdminNotReadyException if the Admin cannot service the request
*/
public Plan.State awaitPlan(int id, int timeout, TimeUnit timeoutUnit) {
final Plan p = getAndCheckPlan(id);
final PlanWaiter waiter = new PlanWaiter();
p.addWaiter(waiter);
logger.log(Level.FINE, "Waiting for {0}, timeout={1} {2}",
new Object[]{p, timeout, timeoutUnit});
try {
if (timeout == 0) {
/* Wait until the plan finishes or the planner is shutdown */
while (!waiter.waitForPlanEnd(10, TimeUnit.SECONDS)) {
/*
* Get the planner from the plan, since the Admin's planner
* may have changed.
*/
if (p.getPlanner().isShutdown()) {
throw new AdminNotReadyException("Cannot service " +
"request, admin " +
"is shutdown");
}
}
/*
* If the plan has ended in the RUNNING state, then the plan
* needed to restart this Admin. Throw an exception and let the
* caller retry at the new master. The plan should resume
* execution there.
*/
if (p.getState() == Plan.State.RUNNING) {
throw new AdminNotReadyException("Cannot service " +
"request, admin " +
"is shutting down");
}
} else {
if (!waiter.waitForPlanEnd(timeout, timeoutUnit)) {
logger.log(Level.INFO, "Timed out (timeout {0} ms)" +
" waiting for {1} to finish, state={2} ",
new Object[] {timeoutUnit.toMillis(timeout),
p, p.getState()});
}
}
} catch (InterruptedException e) {
logger.log
(Level.INFO,
"Interrupted while waiting for {0} to finish, end state={1}",
new Object[] {p, p.getState()});
} finally {
p.removeWaiter(waiter);
final Level level =
(p.getState() == Plan.State.SUCCEEDED) ? Level.FINE :
Level.INFO;
logger.log(level, "Waiting for {0} ended, state={1}",
new Object[] {p, p.getState()});
}
return p.getState();
}
/**
* Throw an operation fault exception if the plan did not succeed.
*/
public void assertSuccess(int planId) {
final Plan p = getAndCheckPlan(planId);
final Plan.State status = p.getState();
if (status == Plan.State.SUCCEEDED) {
return;
}
final ExceptionTransfer transfer = p.getLatestRunExceptionTransfer();
final String msg = p + " ended with " + status;
if (transfer != null) {
if (transfer.getFailure() != null) {
final Throwable failure = transfer.getFailure();
final OperationFaultException newEx =
new OperationFaultException(msg + ": " +
transfer.getDescription(),
failure);
newEx.setStackTrace(failure.getStackTrace());
throw newEx;
} else if (transfer.getDescription() != null) {
/* No exception saved, but there is some error information */
throw new OperationFaultException(msg + ": " +
transfer.getDescription());
}
}
throw new OperationFaultException(msg);
}
/**
* The concurrency protocol for getPlanbyId is that we give out references
* to the cached Plan of record, if the Plan is in a non-terminal, or
* active, state. If the plan is active, then it will be found in the
* Planner's catalog, which is consulted first in this method.
*
* If the plan is not active, we read it straight from the database. For
* such plans, it's possible for this method to produce multiple instances
* of the same plan. We consider inactive plans to be read-only, so this
* should not create any synchronization problems. The read-only-ness of
* inactive plans is enforced in Admin.savePlan, which is the only means by
* which a plan is updated in the database. The terminal-state-ness of
* uncached plans is asserted below, also.
*
* public for unit test support
*/
public Plan getPlanById(final int id) {
final Plan p = getPlanner().getCachedPlan(id);
if (p != null) {
return p;
}
return new RunTransaction(environment,
RunTransaction.readOnly,
logger) {
@Override
protected Plan doTransaction(Transaction txn) {
final Plan uncachedPlan = stores.getPlanStore().getPlanById
(id, txn, getPlanner(), myParams);
if (uncachedPlan == null) {
return null;
}
if (!uncachedPlan.getState().isTerminal()) {
throw new IllegalStateException
("Found non-terminal plan that is not cached. " +
uncachedPlan);
}
return uncachedPlan;
}
}.run();
}
/**
* Get the plan, throw an exception if it doesn't exist.
*/
Plan getAndCheckPlan(int id) {
final Plan p = getPlanById(id);
if (p == null) {
throw new IllegalCommandException("Plan id " + id +
" doesn't exist");
}
return p;
}
/**
* Return copies of the most recent 100 plans. The result should be used
* only for display purposes, or, in tests for determining the existence of
* plans. The plan instances will only be valid for display, and will not
* be executable, because they will be stripped of memory heavy objects.
* @deprecated in favor of getPlanRange.
*/
@Deprecated
private static int nRecentPlans = 100;
@Deprecated
public synchronized Map getRecentPlansCopy() {
if (planner == null) {
return null;
}
return new RunTransaction>
(environment, RunTransaction.readOnly, logger) {
@Override
protected Map doTransaction(Transaction txn) {
return stores.getPlanStore().getRecentPlansForDisplay
(nRecentPlans, txn, planner, myParams);
}
}.run();
}
/**
* Retrieve the beginning and ending plan ids that satisfy the request.
*
* Returns an array of two integers indicating a range of plan id
* numbers. [0] is the first id in the range, and [1] is the last,
* inclusive.
*
* Operates in three modes:
*
* mode A requests n plans ids following startTime
* mode B requests n plans ids preceding endTime
* mode C requests a range of plan ids from startTime to endTime.
*
* mode A is signified by endTime == 0
* mode B is signified by startTime == 0
* mode C is signified by neither startTime nor endTime being == 0.
* N is ignored in mode C.
*/
public int[] getPlanIdRange(final long startTime,
final long endTime,
final int n,
final String planOwnerId) {
return new RunTransaction
(environment, RunTransaction.readOnly, logger) {
@Override
protected int[] doTransaction(Transaction txn) {
return stores.getPlanStore().getPlanIdRange(txn,
startTime, endTime, n,
planOwnerId);
}
}.run();
}
/**
* Returns a map of plans starting at firstPlanId. The number of plans in
* the map is the lesser of howMany, MAXPLANS, or the number of extant
* plans with id numbers following firstPlanId. The range is not
* necessarily fully populated; while plan ids are mostly sequential, it is
* possible for values to be skipped.
*
* The plan instances will only be valid for display, and will not
* be executable, because they will be stripped of memory heavy objects.
*/
public synchronized Map getPlanRange(final int firstPlanId,
final int howMany,
final String planOwnerId) {
if (planner == null) {
return Collections.emptyMap();
}
return new RunTransaction>
(environment, RunTransaction.readOnly, logger) {
@Override
protected Map doTransaction(Transaction txn) {
return stores.getPlanStore().getPlanRange(txn,
planner,
myParams,
firstPlanId, howMany,
planOwnerId);
}
}.run();
}
public boolean isClosing() {
return closing;
}
/**
* Store the current parameters.
*/
private void storeParameters() {
logger.fine("Storing Parameters");
new RunTransaction(environment,
RunTransaction.sync, logger) {
@Override
protected Void doTransaction(Transaction txn) {
stores.putParameters(txn, parameters);
return null;
}
}.run();
}
/**
* Convenience method to store the current topology and parameters in the
* same transaction
*/
private void storeTopoAndParams(final Topology topology,
final DeploymentInfo info,
final Plan plan) {
assert Thread.holdsLock(this);
logger.log(Level.FINE,
"Storing parameters and topology with sequence #: {0}",
topology.getSequenceNumber());
new RunTransaction(environment,
RunTransaction.sync, logger) {
@Override
protected Void doTransaction(Transaction txn) {
/*
* Inform the plan that the topology is being updated. Save
* the plan if needed. Be sure to take the plan mutex before
* any JE locks are acquired in this transaction, per the
* synchronization hierarchy.
*/
synchronized (plan) {
if (plan.updatingMetadata(topology)) {
stores.putPlan(txn, plan);
}
}
stores.putTopology(txn, new RealizedTopology(topology, info));
stores.putParameters(txn, parameters);
stores.putMemo(txn, memo);
return null;
}
}.run();
}
/**
* Returns the params for the specified storage node.
*/
public StorageNodeParams getStorageNodeParams(StorageNodeId targetSNId) {
checkIfReadonlyAdmin();
return parameters.get(targetSNId);
}
/**
* Creates a copy of the policy map so that it can be updated.
*/
public ParameterMap copyPolicy() {
checkIfReadonlyAdmin();
return parameters.copyPolicies();
}
/**
* Replaces the current policy map with a new one. The pattern for updating
* the policy map is:
* 1. make a copy using copyPolicy()
* 2. update the copy
* 3. replace the policy map using setPolicy().
*/
public void setPolicy(ParameterMap policyParams) {
checkIfReadonlyAdmin();
parameters.setPolicies(policyParams);
storeParameters();
}
public long getEventExpiryAge() {
return myParams.getAdminParams().getEventExpiryAge();
}
/**
* This method is used only for testing.
*/
public void setEventStoreAgingFrequency(int f) {
eventStoreAgingFrequency = f;
}
/**
* Generates a new AdminId for deploying a new Admin to the system.
*/
public AdminId generateAdminId() {
checkIfReadonlyAdmin();
return parameters.getNextAdminId();
/*
* The new value of parameters.nextAdminId will be persisted when the
* DeployAdminPlan completes and calls
* savePlanResults(List).
*/
}
/**
* Returns the number of Admin instances in the system.
*/
public int getAdminCount() {
checkIfReadonlyAdmin();
return parameters.getAdminCount();
}
/**
* Returns the params for the specified RepNode.
*/
public RepNodeParams getRepNodeParams(RepNodeId targetRepNodeId) {
checkIfReadonlyAdmin();
return parameters.get(targetRepNodeId);
}
/**
* Returns the params for the specified ArbNode.
*/
public ArbNodeParams getArbNodeParams(ArbNodeId targetArbNodeId) {
checkIfReadonlyAdmin();
return parameters.get(targetArbNodeId);
}
/**
* Returns GlobalParams for the system.
*/
public GlobalParams getGlobalParams() {
return parameters.getGlobalParams();
}
/*
* Initializes the Admin stores, checking the schema version and
* performing updates as needed.
*/
private void initStores(final boolean isMaster) {
assert Thread.holdsLock(this);
final AdminSchemaVersion schemaVersion =
new AdminSchemaVersion(Admin.this, logger);
new RunTransaction(environment, RunTransaction.sync, logger) {
@Override
protected Void doTransaction(Transaction txn) {
/*
* Check that this release is compatible with the Admin db
* schema version, should it already exist.
*/
schemaVersion.checkAndUpdateVersion(txn, isMaster, stores);
return null;
}
}.run();
stores.init(schemaVersion.openAndReadSchemaVersion(), false /*force */);
}
/*
* Enter a new replication mode. Called in the ReplicationStateChange
* thread.
*/
private synchronized void enterMode(final StateChangeEvent sce) {
/* If we are shutting down, we don't need to do anything. */
if (closing) {
return;
}
final State state = sce.getState();
switch (state) {
case MASTER:
enterMasterMode();
break;
case REPLICA:
enterReplicaMode
(new AdminId(Integer.parseInt(sce.getMasterNodeName())));
break;
case DETACHED:
if (environment.isClosed()) {
logger.info
("Admin replica is detached; envImpl is closed");
} else {
EnvironmentFailureException efe =
environment.getInvalidatingException();
if (efe != null) {
throw efe;
}
logger.info("Admin replica is detached; env is valid.");
}
/*$FALL-THROUGH$*/
case UNKNOWN:
enterDetachedMode();
break;
}
}
/*
* Transition to master mode. This happens when the master starts up from
* scratch, and when a new master is elected. Called only from the
* synchronized method enterMode, above.
*/
private void enterMasterMode() {
assert Thread.holdsLock(this);
try {
shutdownSysTableMonitor();
startupStatus.setUnready(Admin.this);
masterId = adminId;
initStores(true /* isMaster */);
eventRecorder.shutdown();
monitor.shutdown();
removeParameterListener(monitor);
monitor = new Monitor(myParams, Admin.this, getLoginManager());
eventRecorder = new EventRecorder(Admin.this);
open(); // Sets planner
monitor.setupExistingAgents(getCurrentTopology());
addParameterListener(monitor);
final Set restartSet = readAndRecoverPlans();
startupStatus.setReady(Admin.this);
restartPlans(restartSet);
startSysTableMonitor();
AdminParams ap = myParams.getAdminParams();
if (isAdminThreadEnabled(ap.isMetadataAdminThreadEnabled())) {
startMetadataAdminThread(
ap.getKVMetadataAdminCheckInterval().toMillis(),
ap.getKVMetadataAdminMaxPlanWait().toMillis());
}
if (isAdminThreadEnabled(ap.isVersionThreadEnabled())) {
startVersionUpdaterThread(
ap.getVersionCheckInterval().toMillis());
}
} catch (RuntimeException e) {
logger.info("RuntimeException while entering master mode, " +
e.getMessage());
startupStatus.setError(Admin.this, e);
}
}
/*
* Transition to replica mode. This happens on startup of a replica.
* Called only from the synchronized method enterMode, above.
*/
private void enterReplicaMode(final AdminId newMaster) {
assert Thread.holdsLock(this);
try {
shutdownSysTableMonitor();
startupStatus.setUnready(Admin.this);
masterId = newMaster;
initStores(false /* isMaster */);
eventRecorder.shutdown();
/*
* This sets the planner to null. There is no point in waiting since
* the node is no longer the master, so plans can't finish.
*/
shutdownPlanner(false, false);
monitor.shutdown();
removeParameterListener(monitor);
shutdownKVMetadataAdminThread();
shutdownVersionUpdaterThread();
startupStatus.setReady(Admin.this);
} catch (RuntimeException e) {
logger.info("RuntimeException while entering replica mode, " +
e.getMessage());
startupStatus.setError(Admin.this, e);
}
}
/*
* Transition to a mode in which we don't really know what's going on, and
* hope that it's a temporary situation.
*/
private void enterDetachedMode() {
enterReplicaMode(null);
}
public synchronized ReplicatedEnvironment.State getReplicationMode() {
return environment.getState();
}
public synchronized AdminStatus getAdminStatus() {
return new AdminStatus(
startupStatus.getStatus(this),
environment.getState(),
getIsAuthoritativeMaster(),
environment.getRepStats(StatsConfig.DEFAULT));
}
/**
* Returns whether the underlying JE environment is known to be the
* authoritative master.
*/
private synchronized boolean getIsAuthoritativeMaster() {
final RepImpl repImpl = RepInternal.getRepImpl(environment);
if (repImpl == null) {
return false;
}
final com.sleepycat.je.rep.impl.node.RepNode repNode =
repImpl.getRepNode();
if (repNode == null) {
return false;
}
return repNode.isAuthoritativeMaster();
}
public synchronized URI getMasterRmiAddress() {
final AdminId currentMaster = masterId;
if (currentMaster == null) {
return null;
}
final Parameters p = getCurrentParameters();
final AdminParams ap = p.get(currentMaster);
final StorageNodeParams snp = p.get(ap.getStorageNodeId());
try {
return
new URI("rmi", null, snp.getHostname(), snp.getRegistryPort(),
null, null, null);
} catch (URISyntaxException e) {
throw new NonfatalAssertionException
("Unexpected bad URL: " + e.getMessage(), e);
}
}
/*
* Try to build the admin master's web service URI. If fail return null.
*/
public synchronized URI getMasterWebServiceAddress() {
final AdminId currentMaster = masterId;
if (currentMaster == null) {
return null;
}
final Parameters p = getCurrentParameters();
final AdminParams ap = p.get(currentMaster);
final StorageNodeParams snp = p.get(ap.getStorageNodeId());
/* Return null if there is not web service on admin master */
final int adminWebPort = snp.getAdminWebPort();
if (adminWebPort <= 0) {
return null;
}
try {
/* Check that whether we should use https */
String schema = "http";
if (myParams != null &&
myParams.getSecurityParams() != null &&
myParams.getSecurityParams().isSecure() &&
!myParams.getSecurityParams().allTransportSSLDisabled()) {
schema = "https";
}
return new URI(schema, null, snp.getHostname(),
snp.getAdminWebPort(),
null, null, null);
} catch (URISyntaxException e) {
throw new NonfatalAssertionException
("Unexpected bad URL: " + e.getMessage(), e);
}
}
/**
* Dump the deployed transaction history.
*/
public List displayRealizedTopologies(final boolean concise) {
return new RunTransaction>
(environment, RunTransaction.readOnly, logger) {
@Override
protected List doTransaction(Transaction txn) {
return stores.getTopologyStore().displayHistory(txn, concise);
}
}.run();
}
/**
* Ensures that the proposed start time for a new plan which creates a
* RealizedTopology is a value that is > the deployment time of the
* current topology.
*
* Since the start time is the primary key for the historical collection,
* the start time for any new realized topology must be greater than
* the start time recorded for the current topology. Due to clock skew in
* the HA rep group, conceivably the start time could fail to advance if
* there is admin rep node failover.
*/
public
long validateStartTime(long proposedStartTime) {
return stores.getTopologyStore().validateStartTime(proposedStartTime);
}
/**
* Provide information about the plan's current or
* last execution run. Meant for display purposes.
*/
String getPlanStatus(int planId, long options, boolean json) {
final Plan p = getAndCheckPlan(planId);
final StatusReport report = new StatusReport(p, options);
if (json) {
ObjectNode jsonTop = report.displayAsJson();
try {
CommandJsonUtils.updateNodeWithResult(jsonTop, p.getOperation(),
p.getCommandResult());
return CommandJsonUtils.toJsonString(jsonTop);
} catch(IOException e) {
throw new CommandFaultException(e.getMessage(), e,
ErrorMessage.NOSQL_5500,
CommandResult.NO_CLEANUP_JOBS);
}
}
return report.display();
}
/**
* Admin.Memo is a persistent class for storing singleton information that
* Admin needs to keep track of.
*/
@Entity
public static class Memo implements Serializable {
private static final long serialVersionUID = 1;
static final String MEMO_KEY = "Memo";
@PrimaryKey
private final String memoKey = MEMO_KEY;
private int planId;
private EventRecorder.LatestEventTimestamps latestEventTimestamps;
/**
* @deprecated as of R2, use Datacenter repfactor
*/
@SuppressWarnings("unused")
@Deprecated
private int repFactor;
public Memo(int firstPlanId,
EventRecorder.LatestEventTimestamps let) {
planId = firstPlanId;
latestEventTimestamps = let;
}
@SuppressWarnings("unused")
private Memo() {
}
private int getPlanId() {
return planId;
}
private void setPlanId(int nextId) {
planId = nextId;
}
private LatestEventTimestamps getLatestEventTimestamps() {
return latestEventTimestamps;
}
private void setLatestEventTimestamps
(EventRecorder.LatestEventTimestamps let) {
latestEventTimestamps = let;
}
}
public LoadParameters getAllParams() {
final LoadParameters ret = new LoadParameters();
ret.addMap(myParams.getGlobalParams().getMap());
ret.addMap(myParams.getStorageNodeParams().getMap());
ret.addMap(myParams.getAdminParams().getMap());
return ret;
}
/**
* Gets the planner. If the admin is not the master or the Admin is not
* yet initialized an AdminNotReadyException is thrown.
*
* public for unit test support
*
* @return the planner
* @throws AdminNotReadyException if the Admain is not initialized or not
* a master
*/
public Planner getPlanner() {
final Planner p = planner;
if (p == null) {
throw new AdminNotReadyException("Cannot service request, admin " +
"is not the master");
}
return p;
}
public void syncEventRecorder() {
eventRecorder.sync();
}
/* -- From StateChangeListener -- */
/**
* Takes action based upon the state change. The actions
* must be simple and fast since they are performed in JE's thread of
* control.
*/
@Override
public void stateChange(final StateChangeEvent sce) {
/* If we are shutting down, we don't need to do anything. */
if (closing) {
return;
}
final State state = sce.getState();
logger.info("State change event: " + new Date(sce.getEventTime()) +
", State: " + state +
", Type: " + myParams.getAdminParams().getType() +
", Master: " +
((state.isMaster() || state.isReplica()) ?
sce.getMasterNodeName() : "none"));
stateTracker.noteStateChange(sce);
}
/**
* Thread to manage replicated environment state changes.
*/
private class AdminStateTracker extends StateTracker {
AdminStateTracker(Logger logger) {
super(AdminStateTracker.class.getSimpleName(), adminId, logger,
exceptionHandler);
}
@Override
protected void doNotify(StateChangeEvent sce) {
enterMode(sce);
}
}
/**
* Uncaught exception handler for Admin threads.
*/
private class AdminExceptionHandler implements UncaughtExceptionHandler {
@Override
public void uncaughtException(Thread t, Throwable e) {
logger.log(Level.SEVERE,
"Admin shutting down for fault in thread " + t.getName(),
e);
if (owner != null) {
ProcessExitCode exitCode = ProcessExitCode.RESTART;
if (e instanceof EnvironmentFailureException &&
((EnvironmentFailureException) e).isCorrupted()) {
/* Do not restart if environment is corrupted */
exitCode = ProcessExitCode.NO_RESTART;
}
owner.getFaultHandler().queueShutdown(e, exitCode);
} else {
/* This would be true only in a test environment. */
new StoppableThread("StopAdminForForeignThreadFault") {
@Override
public void run() {
shutdown(true);
}
@Override
protected Logger getLogger() {
return Admin.this.getLogger();
}
}.start();
}
}
}
/**
* A class to wrap the invocation of a database transaction and to handle
* all the exceptions that might arise. Derived from the je.rep.quote
* example in the BDB JE distribution.
*/
public abstract static class RunTransaction {
private static final int TRANSACTION_RETRY_MAX = 10;
private static final int RETRY_WAIT = 3 * 1000;
/* For reads only */
public static final TransactionConfig readOnly =
new TransactionConfig().setReadOnly(true);
/* For critical writes requiring a high level of durability */
public static final TransactionConfig sync =
new TransactionConfig().setDurability
(new Durability(SyncPolicy.SYNC, SyncPolicy.SYNC,
ReplicaAckPolicy.SIMPLE_MAJORITY));
/*
* For writes which need to be lightweight and their loss would not
* be critical, such as event logging writes.
*/
public static final TransactionConfig noSync =
new TransactionConfig().setDurability
(new Durability(SyncPolicy.NO_SYNC,
SyncPolicy.NO_SYNC,
ReplicaAckPolicy.NONE));
/* Non-synced durable writes */
public static final TransactionConfig writeNoSync =
new TransactionConfig().setDurability
(new Durability(SyncPolicy.WRITE_NO_SYNC,
SyncPolicy.NO_SYNC,
ReplicaAckPolicy.SIMPLE_MAJORITY));
private final ReplicatedEnvironment env;
private final Logger logger;
private final TransactionConfig config;
public RunTransaction(ReplicatedEnvironment env,
TransactionConfig config,
Logger logger) {
this.env = env;
this.logger = logger;
/*
* For unit tests, use local WRITE_NO_SYNC rather than SYNC, for
* improved performance.
*/
if (TestStatus.isWriteNoSyncAllowed() &&
!config.getReadOnly() &&
config.getDurability().getLocalSync() == SyncPolicy.SYNC) {
final Durability newDurability =
new Durability(SyncPolicy.WRITE_NO_SYNC,
config.getDurability().getReplicaSync(),
config.getDurability().getReplicaAck());
this.config = config.clone().setDurability(newDurability);
} else {
this.config = config;
}
}
public T run() {
long sleepMillis = 0;
T retVal = null;
RuntimeException retryException = null;
for (int i = 0; i < TRANSACTION_RETRY_MAX; i++) {
/* Sleep before retrying. */
if (sleepMillis != 0) {
try {
Thread.sleep(sleepMillis);
} catch (InterruptedException ignored) {
}
sleepMillis = 0;
}
Transaction txn = null;
try {
txn = env.beginTransaction(null, config);
retVal = doTransaction(txn);
try {
txn.commit();
} catch (InsufficientAcksException iae) {
logger.log(Level.WARNING, "Insufficient Acks", iae);
/*
* The write, if there was one, succeeded locally;
* this should sort itself out eventually.
*/
}
return retVal;
} catch (InsufficientReplicasException ire) {
retryException = ire;
logger.log
(Level.INFO, "Retrying transaction after " +
"InsufficientReplicasException", ire);
sleepMillis = RETRY_WAIT;
/* Loop and retry. */
} catch (ReplicaWriteException rwe) {
retryException = rwe;
handler(rwe);
} catch (UnknownMasterException ume) {
retryException = ume;
handler(ume);
} catch (LockConflictException lce) {
retryException = lce;
/*
* This should not ever happen, because Admin is the sole
* user of this database. Nonetheless, we'll code to retry
* this operation.
*/
logger.log
(Level.SEVERE,
"Retrying transaction after LockConflictException",
lce);
sleepMillis = RETRY_WAIT;
/* Loop and retry. */
} catch (ReplicaConsistencyException rce) {
retryException = rce;
/*
* Retry the transaction to see if the replica becomes
* consistent.
*/
logger.log(Level.WARNING,
"Retrying transaction after " +
"ReplicaConsistencyException", rce);
sleepMillis = RETRY_WAIT;
/* Loop and retry. */
} catch (DatabaseException rre) {
/*
* If this happens, we will let the ProcessFaultHandler
* exit the Admin process so that the StorageNodeAgent can
* restart it.
*/
throw(rre);
} catch (DatabaseNotReadyException dnre) {
retryException = dnre;
/*
* Retry the transaction to see if the entity database is
* reopened.
*/
logger.log(Level.INFO,
"Retrying transaction after " +
"DatabaseNotReadyException", dnre);
sleepMillis = RETRY_WAIT;
/* Loop and retry. */
} finally {
TxnUtil.abort(txn);
}
}
throw new DBOperationFailedException(
"Transaction retry limit exceeded", retryException);
}
/**
* The following handlers allow for customized handling of
* UnknownMasterException and ReplicaWriteException.
*/
void handler(UnknownMasterException ume)
throws AdminNotReadyException {
/*
* Attempted a modification while in the Replica state.
*/
logger.log(Level.FINE,
"Write operation when not the master: {0}",
ume.getMessage());
throw new AdminNotReadyException("Unable to complete " +
"operation, admin is " +
"not the master");
}
void handler(ReplicaWriteException rwe)
throws AdminNotReadyException {
/*
* Attempted a modification while in the Replica state.
*/
logger.log(Level.FINE,
"Write operation when not the master: {0}",
rwe.getMessage());
throw new AdminNotReadyException("Unable to complete " +
"operation, admin is " +
"not the master");
}
/**
* Must be implemented to perform operations using the given
* Transaction.
*/
protected abstract T doTransaction(Transaction txn);
}
/**
* Signals that retry limit is exceeded.
*/
public static class DBOperationFailedException extends
IllegalStateException {
private static final long serialVersionUID = 1L;
public DBOperationFailedException(String msg) {
super(msg);
}
public DBOperationFailedException(String msg, Throwable cause) {
super(msg, cause);
}
}
/* For use in error and usage messages. */
public String getStorewideLogName() {
return myParams.getStorageNodeParams().getHostname() + ":" +
monitor.getStorewideLogName();
}
/**
* Tells the Admin whether startup processing, done in another thread, has
* finished. If any exceptions occur, saves the exception to transport
* across thread boundaries.
*/
private static class StartupStatus {
private enum Status { INITIALIZING, READY, ERROR }
private Status status;
private RuntimeException problem;
StartupStatus() {
status = Status.INITIALIZING;
}
void setReady(Admin admin) {
synchronized (admin) {
status = Status.READY;
admin.notifyAll();
admin.updateAdminStatus(ServiceStatus.RUNNING);
}
}
void setUnready(Admin admin) {
synchronized (admin) {
status = Status.INITIALIZING;
}
}
boolean isReady(Admin admin) {
synchronized (admin) {
return status == Status.READY;
}
}
void setError(Admin admin, RuntimeException e) {
synchronized (admin) {
status = Status.ERROR;
problem = e;
admin.notifyAll();
admin.updateAdminStatus(ServiceStatus.ERROR_RESTARTING);
}
}
void waitForIsReady(Admin admin) {
synchronized (admin) {
while (status == Status.INITIALIZING) {
try {
admin.wait();
} catch (InterruptedException ie) {
throw new IllegalStateException
("Interrupted while waiting for Admin " +
"initialization", ie);
}
}
}
if (status == Status.READY) {
return;
}
if (status == Status.ERROR) {
throw problem;
}
}
ServiceStatus getStatus(Admin admin) {
synchronized (admin) {
switch (status) {
case INITIALIZING:
return ServiceStatus.STARTING;
case READY:
return ServiceStatus.RUNNING;
case ERROR:
return ServiceStatus.ERROR_RESTARTING;
default:
throw new AssertionError();
}
}
}
}
/**
* Opens a new KVStore handle. The caller is responsible for closing it.
*
* In the future we may decide to maintain an open KVStore rather than
* opening one whenever it is needed, if the overhead for opening is
* unacceptable. However, because the KVStore is associated with a user
* login in a secure installation, we cannot blindly share KVStore handles.
*/
public KVStore openKVStore() {
final Topology topo = getCurrentTopology();
final StorageNodeMap snMap = topo.getStorageNodeMap();
final int nHelpers = Math.min(100, snMap.size());
final String[] helperArray = new String[nHelpers];
int i = 0;
for (StorageNode sn : snMap.getAll()) {
if (i >= nHelpers) {
break;
}
helperArray[i] = sn.getHostname() + ':' + sn.getRegistryPort();
i += 1;
}
final KVStoreConfig config = new KVStoreConfig(topo.getKVStoreName(),
helperArray);
final Properties props = new Properties();
props.setProperty(KVSecurityConstants.TRANSPORT_PROPERTY, "internal");
config.setSecurityProperties(props);
/*
* Disable async calls from the admin.
*
* TODO: Use async if the store has been upgraded to support it.
*/
config.setUseAsync(false);
final KVStoreUserPrincipal currentUser =
KVStoreUserPrincipal.getCurrentUser();
final LoginCredentials creds;
if (currentUser == null) {
creds = null;
} else {
LoginManager loginMgr = getLoginManager();
creds = new ClientProxyCredentials(currentUser, loginMgr);
}
return KVStoreFactory.getStore(config, creds,
null /* ReauthenticateHandler */);
}
private TopologyDiff generateTopoDiff(String targetTopoName,
String startTopoName) {
checkIfReadonlyAdmin();
Topology startTopo;
if (startTopoName == null) {
startTopo = getCurrentTopology();
} else {
final TopologyCandidate startCand = getCandidate(startTopoName);
startTopo = startCand.getTopology();
}
final TopologyCandidate target = getCandidate(targetTopoName);
final TopologyDiff diff = new TopologyDiff(startTopo, startTopoName,
target, parameters);
return diff;
}
public String previewTopology(String targetTopoName, String startTopoName,
boolean verbose, short jsonVersion) {
if (jsonVersion == SerialVersion.ADMIN_CLI_JSON_V1_VERSION) {
return generateTopoDiff(
targetTopoName, startTopoName).display(verbose);
}
return generateTopoDiff(
targetTopoName, startTopoName).displayJson(verbose).toString();
}
String changeRepFactor(final String candidateName,
final String snPoolName,
final DatacenterId dcId,
final int repFactor) {
return new RunTransaction(environment,
RunTransaction.writeNoSync, logger) {
@Override
protected String doTransaction(Transaction txn) {
final TopologyCandidate candidate =
getCandidate(txn, candidateName);
final StorageNodePool pool = freezeSNPool(snPoolName);
try {
/* Create and save a new layout against the specified SNs */
final TopologyBuilder tb =
new TopologyBuilder(candidate, pool,
getCurrentParameters(), myParams);
final TopologyCandidate newCandidate =
tb.changeRepfactor(repFactor, dcId);
stores.getTopologyStore().putCandidate(txn, newCandidate);
return "Changed replication factor in " +
newCandidate.getName();
} finally {
pool.thaw();
}
}
}.run();
}
/**
* Change the type of a zone in a topology candidate. Does nothing if the
* zone already has the requested type.
*
* @param candidateName the name of the topology candidate to modify
* @param dcId the ID of the zone to modify
* @param type the desired type of the zone
* @throws DatabaseException if there is a problem modifying the database
*/
public String changeZoneType(final String candidateName,
final DatacenterId dcId,
final DatacenterType type) {
return new RunTransaction(environment,
RunTransaction.writeNoSync, logger) {
@Override
protected String doTransaction(Transaction txn) {
final TopologyCandidate candidate = getCandidate(txn,
candidateName);
final Topology topo = candidate.getTopology();
final Datacenter dc = topo.get(dcId);
if (dc.getDatacenterType().equals(type)) {
return dcId + " is already of type " + type;
}
topo.update(dc.getResourceId(),
Datacenter.newInstance(dc.getName(),
dc.getRepFactor(),
type,
dc.getAllowArbiters(),
dc.getMasterAffinity()));
stores.getTopologyStore().putCandidate(txn, candidate);
return "Changed zone type of " + dcId + " to " + type +
" in " + candidate.getName();
}
}.run();
}
public String changeZoneMasterAffinity(final String candidateName,
final DatacenterId dcId,
final boolean masterAffinity) {
return new RunTransaction(environment,
RunTransaction.writeNoSync, logger) {
@Override
protected String doTransaction(Transaction txn) {
final TopologyCandidate candidate = getCandidate(txn,
candidateName);
final Topology topo = candidate.getTopology();
final Datacenter dc = topo.get(dcId);
if (dc.getMasterAffinity() == masterAffinity) {
if (masterAffinity) {
return dcId + " already has master affinity";
}
return dcId + " already has no master affinity";
}
KVVersion minALVersion =
DeployDatacenterPlan.AFFINITY_DC_VERSION;
if (!checkStoreVersion(DeployDatacenterPlan.
AFFINITY_DC_VERSION)) {
throw new IllegalCommandException(
"Cannot change " + candidateName +
" zone master affinity when" +
" not all nodes in the store support zone" +
" master affinity. The highest" +
" version supported by all nodes is " +
storeVersion.getNumericVersionString() +
", but zone master affinity" +
" requires version " +
minALVersion.getNumericVersionString() +
" or later.", ErrorMessage.NOSQL_5200,
new String[] {});
}
topo.update(dc.getResourceId(),
Datacenter.newInstance(dc.getName(),
dc.getRepFactor(),
dc.getDatacenterType(),
dc.getAllowArbiters(),
masterAffinity));
stores.getTopologyStore().putCandidate(txn, candidate);
return "Changed zone master affinity " + dcId + " to " +
masterAffinity + " in " + candidate.getName();
}
}.run();
}
/**
* Alter the arbiter attribute of a zone in a topology candidate.
* Does nothing if the zone already has the requested attribute value.
*
* @param candidateName the name of the topology candidate to modify
* @param dcId the ID of the zone to modify
* @param allowArbiters value of the attribute
* @throws DatabaseException if there is a problem modifying the database
*/
public String changeZoneArbiters(final String candidateName,
final DatacenterId dcId,
final boolean allowArbiters) {
return new RunTransaction(environment,
RunTransaction.writeNoSync, logger) {
@Override
protected String doTransaction(Transaction txn) {
final TopologyCandidate candidate = getCandidate(txn,
candidateName);
final Topology topo = candidate.getTopology();
final Datacenter dc = topo.get(dcId);
if (dc.getAllowArbiters() == allowArbiters) {
String retval;
if (allowArbiters) {
retval = dcId + " already allow Arbiters ";
} else {
retval = dcId + " already does not allow Arbiters ";
}
return retval;
}
topo.update(dc.getResourceId(),
Datacenter.newInstance(dc.getName(),
dc.getRepFactor(),
dc.getDatacenterType(),
allowArbiters,
dc.getMasterAffinity()));
stores.getTopologyStore().putCandidate(txn, candidate);
return "Changed allow Arbiters " + dcId + " to " +
allowArbiters + " in " + candidate.getName();
}
}.run();
}
public String validateTopology(String candidateName, short jsonVersion) {
Results results = null;
String prefix = null;
if (candidateName == null) {
results = Rules.validate(getCurrentTopology(),
getCurrentParameters(),
true);
prefix = "the current deployed topology";
} else {
final TopologyCandidate candidate = getCandidate(candidateName);
results = Rules.validate(candidate.getTopology(),
getCurrentParameters(),
false);
prefix = "topology candidate \"" + candidateName + "\"";
}
if (jsonVersion == SerialVersion.ADMIN_CLI_JSON_V1_VERSION) {
return "Validation for " + prefix + ":\n" + results;
}
final ObjectNode top = JsonUtils.createObjectNode();
top.put("candicateName", candidateName);
final ArrayNode problemArray = top.putArray("problems");
for (RulesProblem problem : results.getProblems()) {
final ObjectNode on = JsonUtils.createObjectNode();
on.put("resourceId", problem.getResourceId().toString());
on.put("description", problem.toString());
problemArray.add(on);
}
final ArrayNode warningArray = top.putArray("warnings");
for (RulesProblem warning : results.getWarnings()) {
final ObjectNode on = JsonUtils.createObjectNode();
on.put("resourceId", warning.getResourceId().toString());
on.put("description", warning.toString());
warningArray.add(on);
}
final ArrayNode violationArray = top.putArray("violations");
for (RulesProblem violation : results.getViolations()) {
final ObjectNode on = JsonUtils.createObjectNode();
on.put("resourceId", violation.getResourceId().toString());
on.put("description", violation.toString());
violationArray.add(on);
}
return top.toString();
}
public String moveRN(final String candidateName,
final RepNodeId rnId,
final StorageNodeId snId) {
return new RunTransaction(environment,
RunTransaction.writeNoSync, logger) {
@Override
protected String doTransaction(Transaction txn) {
final TopologyCandidate candidate =
getCandidate(txn, candidateName);
final StorageNodePool pool =
freezeSNPool(Parameters.DEFAULT_POOL_NAME);
try {
/*
* Create and save a new layout against the specified SNs
*/
final TopologyBuilder tb =
new TopologyBuilder(candidate, pool,
getCurrentParameters(), myParams);
final RepNode oldRN = candidate.getTopology().get(rnId);
if (oldRN == null) {
throw new IllegalCommandException(
rnId + " doesn't exist, and can't be moved.",
ErrorMessage.NOSQL_5200,
new String[] {});
}
final TopologyCandidate newCandidate =
tb.relocateRN(rnId, snId);
final RepNode newRN = newCandidate.getTopology().get(rnId);
if (newRN == null) {
throw new IllegalStateException
(rnId +
" is missing from the new topology candidate: " +
TopologyPrinter.printTopology
(newCandidate.getTopology()));
}
if (!(newRN.getStorageNodeId().equals
(oldRN.getStorageNodeId()))) {
stores.getTopologyStore().putCandidate(txn,
newCandidate);
return "Moved " + rnId + " from " +
oldRN.getStorageNodeId() + " to " +
newRN.getStorageNodeId();
}
throw new IllegalCommandException(
"Couldn't find an eligible SN to house " + rnId,
ErrorMessage.NOSQL_5200,
new String[] {});
} finally {
pool.thaw();
}
}
}.run();
}
/*
* These next few with @Override are for the MonitorKeeper interface.
*/
@Override
public Monitor getMonitor() {
return monitor;
}
/**
* Return the latency ceiling associated with the given RepNode.
*/
@Override
public int getLatencyCeiling(ResourceId rnid) {
if (rnid instanceof RepNodeId) {
checkIfReadonlyAdmin();
final RepNodeParams rnp = parameters.get((RepNodeId) rnid);
return rnp == null ? 0 : rnp.getLatencyCeiling();
}
return 0;
}
/**
* Return the throughput floor associated with the given RepNode.
*/
@Override
public int getThroughputFloor(ResourceId rnid) {
if (rnid instanceof RepNodeId) {
checkIfReadonlyAdmin();
final RepNodeParams rnp = parameters.get((RepNodeId) rnid);
return rnp == null ? 0 : rnp.getThroughputFloor();
}
return 0;
}
/**
* Return the threshold to apply to the average commit lag computed
* from the total commit lag and the number of commit log records
* replayed by the given RepNode, as reported by the JE backend.
*/
@Override
public long getCommitLagThreshold(ResourceId rnid) {
if (rnid instanceof RepNodeId) {
checkIfReadonlyAdmin();
final RepNodeParams rnp = parameters.get((RepNodeId) rnid);
return rnp == null ? 0 : rnp.getCommitLagThreshold();
}
return 0L;
}
private void updateAdminStatus(ServiceStatus newStatus) {
if (owner == null) {
return;
}
owner.updateAdminStatus(this, newStatus);
}
/**
* Gets the JE replication group admin for the specified Admin.
*/
public ReplicationGroupAdmin getReplicationGroupAdmin(AdminId targetId) {
/*
* Use the local service parameters if targetId matches this service's
* admin ID and there are no parameters because this admin was a
* replica
*/
final AdminParams targetAP;
if (parameters != null) {
targetAP = parameters.get(targetId);
} else if (targetId.equals(adminId)) {
targetAP = myParams.getAdminParams();
} else {
targetAP = null;
}
if (targetAP == null) {
return null;
}
String allNodes = targetAP.getNodeHostPort();
final String helpers = targetAP.getHelperHosts();
if (!"".equals(helpers) && (helpers != null)) {
allNodes += ParameterUtils.HELPER_HOST_SEPARATOR + helpers;
}
final GlobalParams globalParams = (parameters != null) ?
parameters.getGlobalParams() :
myParams.getGlobalParams();
final String storeName = globalParams.getKVStoreName();
final String groupName = getAdminRepGroupName(storeName);
return getReplicationGroupAdmin(groupName, allNodes);
}
/**
* Gets the JE replication group admin for the specified RN.
*/
public ReplicationGroupAdmin getReplicationGroupAdmin(RepNodeId targetId) {
checkIfReadonlyAdmin();
final RepNodeParams rnp = parameters.get(targetId);
if (rnp == null) {
return null;
}
String allNodes = rnp.getJENodeHostPort();
final String helpers = rnp.getJEHelperHosts();
if (!"".equals(helpers) && (helpers != null)) {
allNodes += ParameterUtils.HELPER_HOST_SEPARATOR + helpers;
}
return getReplicationGroupAdmin(targetId.getGroupName(), allNodes);
}
/**
* Gets the JE replication group for the group with the specified name by
* contacting the specified helpers.
*/
public ReplicationGroupAdmin
getReplicationGroupAdmin(String groupName,
String targetHelperHosts) {
final Set helperSockets = new HashSet<>();
final StringTokenizer tokenizer =
new StringTokenizer(targetHelperHosts,
ParameterUtils.HELPER_HOST_SEPARATOR);
while (tokenizer.hasMoreTokens()) {
final String helper = tokenizer.nextToken();
helperSockets.add(HostPortPair.getSocket(helper));
}
return new ReplicationGroupAdmin(groupName, helperSockets,
repConfig.getRepNetConfig());
}
/**
* Returns the JE replication group obtained from the group admin.
*
* @param rga the group admin
* @return the replication group
* @throws IllegalStateException if the group master is not found before
* the timeout or if the call fails for another reason
*/
public static ReplicationGroup getReplicationGroup(
ReplicationGroupAdmin rga)
throws IllegalStateException {
/* TODO: Add KV parameters for the check or timeout values? */
final long checkMillis = 1000;
/*
* Use a long enough timeout to account for the fact that JE HA master
* election retries have a power-of-two backoff that can wait as long
* as 32 seconds between attempts
*/
final long timeoutMillis = 90000;
final long stop = System.currentTimeMillis() + timeoutMillis;
try {
while (true) {
try {
return rga.getGroup();
} catch (UnknownMasterException e) {
final long now = System.currentTimeMillis();
if (now > stop) {
throw e;
}
Thread.sleep(checkMillis);
}
}
} catch (Exception e) {
throw new IllegalStateException(
"Problem getting replication group membership: " +
e.getMessage(),
e);
}
}
/**
* Removes the specified admin replica from its replication group. It must
* exist and not be the only admin the store.
*/
public void removeAdminFromRepGroup(AdminId victim) {
logger.info("Removing Admin replica " + victim +
" from the replication group.");
final ReplicationGroupAdmin rga = getReplicationGroupAdmin(victim);
if (rga == null) {
throw new MemberNotFoundException
("The admin " + victim + " is not in the rep group.");
}
/* This call may also throw MemberNotFoundException. */
final String victimNodeName = getAdminRepNodeName(victim);
final int nAdmins = getAdminCount();
if (nAdmins == 1) {
throw new NonfatalAssertionException
("Attempting to remove the sole Admin instance" + victim);
}
rga.removeMember(victimNodeName);
}
/**
* Causes the mastership of the admin db replication group to change to
* another member.
*/
public void transferMaster() {
checkIfReadonlyAdmin();
logger.info("Transferring Admin mastership");
final Set replicas = new HashSet<>();
for (final AdminParams adminParams : parameters.getAdminParams()) {
final AdminId aId = adminParams.getAdminId();
if (adminId.equals(aId)) {
continue;
}
if (!adminParams.getType().isPrimary()) {
continue;
}
replicas.add(getAdminRepNodeName(aId));
}
try {
environment.transferMaster(replicas, 60, TimeUnit.SECONDS);
} catch (Exception e) {
throw new NonfatalAssertionException
("Master transfer failed", e);
}
}
/**
* Checks whether all of the nodes in the store are at or above the minor
* version of {@code requiredVersion}. Returns {@code true} if the
* required version is supported by all nodes, and {@code false} if at
* least one node does not support the required version. Throws {@link
* AdminFaultException} if there is a problem determining the versions of
* the storage nodes or if there is a node does not support the
* prerequisite version. Once {@code true} is returned for a given
* version, future calls to this method will always return {@code true}.
*
* Version dependent features should keep calling this method until the
* required feature is enabled. Of course, features should be careful to
* not constantly call these methods at a high rate.
*
* @param requiredVersion version to check against
* @return {@code true} if all of the nodes in the store meet the required
* version, and {@code false} if a least one node does not
* @throws AdminFaultException if there was a problem checking versions or
* a node does not support the prerequisite version
*/
public boolean checkStoreVersion(KVVersion requiredVersion) {
return VersionUtil.compareMinorVersion(getStoreVersion(),
requiredVersion) >= 0;
}
/**
* Checks whether all of the storage nodes in the store hosting an admin
* instance are at or above the minor version of {@code requiredVersion}.
*
* @param requiredVersion version to check against
* @return {@code true} if all of the nodes hosting an admin instance meet
* the required version, and {@code false} if a least one node does
* not
* @throws AdminFaultException if there was a problem checking versions or
* a node does not support the prerequisite version
*/
public boolean checkAdminGroupVersion(KVVersion requiredVersion) {
return checkAdminGroupVersion(requiredVersion,
Collections.emptySet(),
null, null, false);
}
/**
* Checks whether all of the storage nodes in the store hosting an admin
* instance are at or above the minor version of {@code requiredVersion},
* skipping nodes which are down due to failure of specified shard.
*
* @param requiredVersion version to check against
* @param failedShard the RepGroupId of failed shard, or null for no failed
* shard
* @return {@code true} if all of the nodes hosting an admin instance meet
* the required version, and {@code false} if a least one node does
* not
* @throws AdminFaultException if there was a problem checking versions or
* a node does not support the prerequisite version
*/
public boolean checkAdminGroupVersion(KVVersion requiredVersion,
RepGroupId failedShard) {
return checkAdminGroupVersion(requiredVersion,
Collections.emptySet(),
failedShard, null, false);
}
/**
* Checks whether all of the storage nodes in the store hosting an admin
* instance are at or above the minor version of {@code requiredVersion},
* skipping node which is hosting to be removed admin on failed SN.
*
* @param requiredVersion version to check against
* @param victim the AdminId of to be removed admin
* @param failedSN true if to be removed admin hosted on failed sn,
* else false
* @return {@code true} if all of the nodes hosting an admin instance meet
* the required version, and {@code false} if a least one node does
* not
* @throws AdminFaultException if there was a problem checking versions or
* a node does not support the prerequisite version
*/
public boolean checkAdminGroupVersion(KVVersion requiredVersion,
AdminId victim,
boolean failedSN) {
return checkAdminGroupVersion(requiredVersion,
Collections.emptySet(),
null, victim, failedSN);
}
/**
* Checks whether all of the storage nodes in the store hosting an admin
* instance are at or above the minor version of {@code requiredVersion},
* skipping nodes in the specified set of offline zones and nodes which
* are down due to failure of specified shard and node hosting to be
* removed admin on failed SN.
*
* @param requiredVersion version to check against
* @param offlineZones the IDs of offline zones
* @param failedShard the RepGroupId of failed shard, or null for no failed
* shard
* @param victim the AdminId of to be removed admin
* @param failedSN true if to be removed admin is hosted on failed sn,
* else false
* @return {@code true} if all of the nodes hosting an admin instance meet
* the required version, and {@code false} if a least one node does
* not
* @throws AdminFaultException if there was a problem checking versions or
* a node does not support the prerequisite version
*/
private boolean checkAdminGroupVersion(KVVersion requiredVersion,
Set offlineZones,
RepGroupId failedShard,
AdminId victim,
boolean failedSN) {
return VersionUtil.compareMinorVersion(
getAdminGroupVersion(offlineZones, failedShard, victim, failedSN),
requiredVersion) >= 0;
}
/**
* Gets the highest minor version that all of the nodes in the store are
* known to be at and support. Throws {@link AdminFaultException} if there
* is a problem determining the versions of the storage nodes or if there
* is a node does not support the prerequisite version. Note that nodes may
* be at different patch levels and this method returns the lowest patch
* level found.
*
* When using this method to enable version sensitive features, callers
* should compare the returned version to the desired version using {@link
* oracle.kv.impl.util.VersionUtil#compareMinorVersion}.
*
*
Note that it is assumed that new nodes added to the store will be at
* the latest version. This method will not detect new nodes being added
* which have older software versions.
*
* @return the highest minor version that all of the nodes support
* @throws AdminFaultException if there was a problem checking versions or
* a node does not support the prerequisite version
*/
public KVVersion getStoreVersion() {
if (VersionUtil.compareMinorVersion(storeVersion,
KVVersion.CURRENT_VERSION) >= 0) {
return storeVersion;
}
/*
* The store is not at the current minor version, query the nodes to
* see if they have been upgraded.
*/
final KVVersion v =
getSNsVersion(getCurrentTopology().getStorageNodeIds());
/*
* Since everyone responded, we can set the store version to what
* was found.
*/
synchronized (this) {
if (v.compareTo(storeVersion) > 0) {
storeVersion = v;
}
return storeVersion;
}
}
/**
* Gets the highest minor version that all storage nodes in the store
* hosting an admin instance are known to be at and support.
*
*
When using this method to enable version sensitive features, callers
* should compare the returned version to the desired version using {@link
* VersionUtil#compareMinorVersion}.
*
* @return the highest minor version that the nodes hosting an admin
* instance support
* @throws AdminFaultException if there was a problem checking versions or
* a node does not support the prerequisite version
*/
KVVersion getAdminGroupVersion() {
return getAdminGroupVersion(Collections.emptySet(),
null, null, false);
}
/**
* Gets the highest minor version that all storage nodes in the store
* hosting an admin instance are known to be at and support, skipping nodes
* in the specified set of offline zones and nodes which are down due to
* failure of specified shard and node hosting to be removed admin on
* failed SN.
*
* When using this method to enable version sensitive features, callers
* should compare the returned version to the desired version using {@link
* VersionUtil#compareMinorVersion}.
*
* @param offlineZones the IDs of offline zones
* @param failedShard the RepGroupId of failed shard, or null for no failed
* shard
* @param victim the AdminId of to be removed admin
* @param failedSN true if to be removed admin hosted on failed sn,
* else false
* @return the highest minor version that all of the nodes hosting admin
* instance support
* @throws AdminFaultException if there was a problem checking versions or
* a node does not support the prerequisite version
*/
KVVersion getAdminGroupVersion(Set offlineZones,
RepGroupId failedShard,
AdminId victim,
boolean failedSN) {
if (VersionUtil.compareMinorVersion(adminVersion,
KVVersion.CURRENT_VERSION) >= 0) {
return adminVersion;
}
final List snIds = new ArrayList<>();
Parameters params = getCurrentParameters();
Topology topo = getCurrentTopology();
for (AdminId aId : params.getAdminIds()) {
/*
* If to be removed admin is hosted on a failed SN then skip
* adding failed SN to the list of version check
*/
if (victim != null &&
(aId.getAdminInstanceId() == victim.getAdminInstanceId()) &&
failedSN) {
continue;
}
final StorageNodeId snId = params.get(aId).getStorageNodeId();
if (offlineZones.contains(topo.getDatacenterId(snId))) {
continue;
}
/*
* In case of failed shard if capacity = 1 then skip
* adding failed SN belonging to failed shard which is
* hosting an admin. In case of capacity > 1, we assume
* that SNs are up and running, only RNs have failed.
* Hence we do not need to skip SNs in capacity > 1.
*/
if (failedShard != null) {
Set hostedRepNodes =
topo.getHostedRepNodeIds(snId);
if (hostedRepNodes.size() == 1 &&
(hostedRepNodes.iterator().next().getGroupId() ==
failedShard.getGroupId())) {
continue;
}
}
snIds.add(snId);
}
final KVVersion v = getSNsVersion(snIds);
synchronized (this) {
if (v.compareTo(adminVersion) > 0) {
adminVersion = v;
}
return adminVersion;
}
}
/**
* Returns a map of the other Admins in the group with their software
* version. If there is a problem getting the information, null is returned.
*
* @return map of the other Admins in the group or null
*/
Map getOtherAdminVersions() {
final Map map = new HashMap<>();
try {
final RegistryUtils registryUtils =
new RegistryUtils(getCurrentTopology(), getLoginManager());
final Parameters params = getCurrentParameters();
for (AdminId aId : params.getAdminIds()) {
if (aId.equals(adminId)) {
continue;
}
final StorageNodeId snId = params.get(aId).getStorageNodeId();
final KVVersion v =
getSNStatus(snId, registryUtils).getKVVersion();
map.put(aId, v);
}
} catch (Exception e) {
return null;
}
return map;
}
/**
* Check on all the SNs which host admins to return the highest software
* version of all the running admins.
*
* @return the highest software version of all the nodes hosting admin
* @throws AdminFaultException if there was a problem checking versions
*/
KVVersion getAdminHighestVersion() {
final List snIds = new ArrayList<>();
Parameters params = getCurrentParameters();
for (AdminId aId : params.getAdminIds()) {
snIds.add(params.get(aId).getStorageNodeId());
}
KVVersion v = KVVersion.CURRENT_VERSION;
final RegistryUtils registryUtils =
new RegistryUtils(getCurrentTopology(), getLoginManager());
for (StorageNodeId snId : snIds) {
StorageNodeStatus snStatus = getSNStatus(snId, registryUtils);
final KVVersion snVersion = snStatus.getKVVersion();
if (VersionUtil.compareMinorVersion(snVersion, v) > 0) {
v = snVersion;
}
}
return v;
}
/**
* Gets the highest minor version that given list of the nodes in the store
* are known to be at and support. Throws {@link AdminFaultException} if
* there is a problem determining the versions of the storage nodes or if
* there is a node does not support the prerequisite version. Note that
* nodes may be at different patch levels and this method returns the
* lowest patch level found.
*
* Note that it is assumed that new nodes added to the store will be at
* the latest version. This method will not detect new nodes being added
* which have older software versions.
*
* @param snIds given list of storage node Ids
* @return the highest minor version that given nodes support
* @throws AdminFaultException if there was a problem checking versions or
* a node does not support the prerequisite version
*/
private KVVersion getSNsVersion(List snIds) {
final RegistryUtils registryUtils =
new RegistryUtils(getCurrentTopology(), getLoginManager());
/* Start at our highest version, downgrade as needed. */
KVVersion v = KVVersion.CURRENT_VERSION;
for (StorageNodeId snId : snIds) {
KVVersion snVersion = null;
if (parameters != null) {
ParameterMap snParams = parameters.get(snId).getMap();
String version =
snParams.get(ParameterState.SN_SOFTWARE_VERSION).asString();
if (version != null) {
snVersion = KVVersion.parseVersion(version);
}
}
if (snVersion == null) {
StorageNodeStatus snStatus = getSNStatus(snId, registryUtils);
snVersion = snStatus.getKVVersion();
}
if (snVersion.compareTo(KVVersion.PREREQUISITE_VERSION) < 0) {
final String prereq =
KVVersion.PREREQUISITE_VERSION.getNumericVersionString();
throw new AdminFaultException(
new IllegalCommandException(
"Node " + snId + " is at software version " +
snVersion.getNumericVersionString() +
" which does not meet the current prerequisite." +
" It must be upgraded to version " + prereq +
" or greater."));
}
/* If we found someone of lesser minor version, downgrade */
if (VersionUtil.compareMinorVersion(snVersion, v) < 0) {
v = snVersion;
}
}
return v;
}
/**
* Return the node status of a given target SN.
*
* @param snId the target SN's id.
* @param utils the registry utils to get the status of the target SN.
* @return The status of the given SN.
* @throws AdminFaultException if there was a problem getting the SN
* status.
*/
StorageNodeStatus getSNStatus (StorageNodeId snId, RegistryUtils utils) {
try {
return utils.getStorageNodeAgent(snId).ping();
} catch (RemoteException | NotBoundException e) {
throw new AdminFaultException(e);
}
}
/**
* Checks whether the node hosting the current admin master instance is at
* or above the minor version of {@code requiredVersion}. Throws
* {@link AdminFaultException} if there is a problem determining the
* version of the admin master or if it does not support the prerequisite
* version.
*
* @param requiredVersion version to check against
* @return {@code true} if the node in the store hosting admin master
* instance meet the required version, and {@code false} otherwise
* @throws AdminFaultException if there was a problem checking version, or
* the admin is in detached mode, or the admin master does not
* support the prerequisite version
*/
boolean checkAdminMasterVersion(KVVersion requiredVersion) {
final KVVersion masterVersion;
final AdminId currentMaster = masterId;
if (currentMaster == null) {
throw new AdminFaultException(
new NonfatalAssertionException(
"Admin is in DETACHED mode. The master is unknown."));
}
if (currentMaster.equals(adminId)) {
masterVersion = KVVersion.CURRENT_VERSION;
} else {
final Parameters p = getCurrentParameters();
final AdminParams ap = p.get(currentMaster);
masterVersion =
getSNsVersion(Arrays.asList(ap.getStorageNodeId()));
}
return VersionUtil.compareMinorVersion(masterVersion,
requiredVersion) >= 0;
}
/**
* Returns the Metadata objects of the specified type.
*/
@Nullable
public > T
getMetadata(final Class returnType,
final MetadataType metadataType) {
logger.log(Level.FINE, "Getting {0} metadata", metadataType);
return new RunTransaction(environment,
RunTransaction.sync,
logger) {
@Override
protected T doTransaction(Transaction txn) {
return stores.getMetadata(returnType, metadataType, txn);
}
}.run();
}
/**
* Returns the Metadata objects of the specified type in an existing
* transaction.
*/
public > T
getMetadata(final Class returnType,
final MetadataType metadataType,
Transaction txn) {
logger.log(Level.FINE, "Getting {0} metadata", metadataType);
return stores.getMetadata(returnType, metadataType, txn);
}
/**
* Saves the specified Metadata object in the admin's store.
*/
public void saveMetadata(final Metadata> metadata, final Plan plan) {
logger.log(Level.FINE, "Storing {0} ", metadata);
new RunTransaction(environment,
RunTransaction.sync,
logger) {
@Override
protected Void doTransaction(Transaction txn) {
if (plan != null) {
/*
* Inform the plan that the metadata is being updated. Save
* the plan if needed. Be sure to take the plan mutex before
* any JE locks are acquired in this transaction, per the
* synchronization hierarchy.
*/
synchronized (plan) {
if (plan.updatingMetadata(metadata)) {
stores.putPlan(txn, plan);
}
}
}
saveMetadata(metadata, txn);
return null;
}
}.run();
}
/**
* Saves the specified Metadata object in the admin's store in an existing
* transaction.
*
* The metadata is pruned before being written.
*/
public void saveMetadata(final Metadata> metadata, Transaction txn) {
saveMetadata(metadata, txn, false);
}
/**
* Saves the specified Metadata object in the admin's store in an existing
* transaction. If noOverwrite is true and the metadata object is already
* in the store the new value will not be written and true is returned.
*
* The metadata is pruned before being written (successful or not).
*
* @return true if noOverwrite is true and the metadata object already
* exist
*/
public boolean saveMetadata(final Metadata> metadata,
Transaction txn,
boolean noOverwrite) {
metadata.pruneChanges(Integer.MAX_VALUE, MAX_MD_CHANGE_HISTORY);
logger.log(Level.FINE, "Storing {0} ", metadata);
return stores.putMetadata(metadata, txn, noOverwrite);
}
/**
* Creates a transaction context for the instance of UpdateMetadata.
* Returns the updated Metadata object.
*/
public > T updateMetadata(
final UpdateMetadata> updateMetadata) {
logger.log(Level.FINE, "Update Metadata {0} ", updateMetadata);
return new RunTransaction(environment,
RunTransaction.sync,
logger) {
@SuppressWarnings("unchecked")
@Override
protected T doTransaction(Transaction txn) {
return (T)updateMetadata.doUpdateMetadata(txn);
}
}.run();
}
/**
* Returns the LoginManager for the admin.
*/
public LoginManager getLoginManager() {
if (owner == null) {
return null;
}
return owner.getLoginManager();
}
public AdminService getOwner() {
return owner;
}
boolean isReady() {
return startupStatus.isReady(this);
}
void installSecurityUpdater() {
if (owner != null) {
final AdminSecurity aSecurity = owner.getAdminSecurity();
final LoginService loginService = owner.getLoginService();
final LoginUpdater loginUpdater = new LoginUpdater();
final SecurityMDUpdater secMDUpdater = new SecurityMDUpdater();
final IDCSOAuthAuthenticator idcsAuthenticator =
aSecurity.getIDCSOAuthAuthenticator();
loginUpdater.addServiceParamsUpdaters(aSecurity);
loginUpdater.addGlobalParamsUpdaters(aSecurity);
secMDUpdater.addRoleChangeUpdaters(aSecurity);
secMDUpdater.addUserChangeUpdaters(aSecurity);
if (loginService != null) {
loginUpdater.addServiceParamsUpdaters(loginService);
loginUpdater.addGlobalParamsUpdaters(loginService);
addSecurityMDListener(loginService);
}
if (idcsAuthenticator != null) {
loginUpdater.addGlobalParamsUpdaters(idcsAuthenticator);
}
addParameterListener(loginUpdater.new ServiceParamsListener());
addGlobalParameterListener(
loginUpdater.new GlobalParamsListener());
addSecurityMDListener(secMDUpdater.new RoleChangeListener());
addSecurityMDListener(secMDUpdater.new UserChangeListener());
}
}
public RoleResolver getRoleResolver() {
if (owner == null) {
return null;
}
return owner.getRoleResolver();
}
/* -- repairAdminQuorum -- */
/**
* Repairs the admin quorum by updating the JE HA rep group membership to
* match the currently available admins, setting an electable group size
* override on each admin to obtain quorum, and converting requested
* secondary admins to primary admins. If there are no existing primary
* admins, establishes one secondary as the initial master by resetting the
* JE replication group, which updates the group membership and allows the
* other secondary admins to join.
*
* First checks that the admins specified by the zoneIds and adminIds
* parameters match the currently available admin nodes. The nodes
* specified must be currently available, and must include all available
* primary nodes. If nodes are specified by zone, then the zone must be
* present in the topology and all nodes in the specified zone must be
* available.
*
*
Then attempts to modify admin parameters and adjust the JE rep group
* membership. The command fails if any of those operations fail.
*
*
Returns null if the repair should be retried after the current admin
* is restarted.
*
*
Since this method changes the types of admins without changing the
* associated zone types, and also changes the types of offline admins
* which cannot be notified of the change, the caller should call 'plan
* failover' and 'plan repair-topology' commands to complete the repairs.
*
* @param zoneIds include admins in zones with these IDs
* @param adminIds include admins with these IDs
* @return the current set of admins or null
* @throws IllegalCommandException if a requested admin is not found, if a
* primary admin is found that was not requested, or if a requested zone is
* not found
* @throws IllegalStateException if there is a problem repairing the admin
* quorum
*/
public Set repairAdminQuorum(Set zoneIds,
Set adminIds) {
logger.info("Repair admin quorum: zones: " + zoneIds +
", admins: " + adminIds);
final Topology topo = getCurrentTopology();
final RegistryUtils regUtils =
new RegistryUtils(topo, getLoginManager());
final Parameters params = getCurrentParameters();
final Map allAdmins =
findAllAdmins(regUtils, params);
final SortedSet requestedAdmins =
collectRequestedAdmins(zoneIds, adminIds, topo, params, allAdmins);
logger.info("Repair admin quorum: requested admins: " +
requestedAdmins);
int existingPrimaries = 0;
for (final AdminId aId : requestedAdmins) {
if (allAdmins.get(aId).getType().isPrimary()) {
existingPrimaries++;
}
}
/*
* If there are no existing primary nodes, then establish the initial
* master by using the first node to reset the JE replication group.
* This operation resets the group membership, so there is no need to
* delete members explicitly or set the electable group size override.
*/
if (existingPrimaries == 0) {
logger.info("Repair admin quorum: no existing primaries");
boolean nextIsFirst = true;
for (final AdminId aId : requestedAdmins) {
final boolean first = nextIsFirst;
nextIsFirst = false;
final AdminParams adminParams = allAdmins.get(aId);
if (repairAdminParams(regUtils, adminParams, 0, first)) {
/* Restarting the current admin */
return null;
}
}
return requestedAdmins;
}
/*
* Update the electable group size override on existing primary nodes
* to establish quorum. Although we could use reset rep group to do
* this, it is probably safer to do it by modifying the electable group
* size override, since that allows the nodes to perform an election.
*/
for (final AdminId aId : requestedAdmins) {
final AdminParams adminParams = allAdmins.get(aId);
if (adminParams.getType().isPrimary()) {
final boolean restart = repairAdminParams(
regUtils, adminParams, existingPrimaries, false);
assert !restart : "Primary nodes should not need restarting";
}
}
/*
* Update the JE HA group membership information, if needed, to
* remove nodes that are not in the requested set.
*/
final ReplicationGroupAdmin rga = getReplicationGroupAdmin(adminId);
final ReplicationGroup rg = getReplicationGroup(rga);
final Set adminSockets = new HashSet<>();
for (final AdminParams adminParams : allAdmins.values()) {
adminSockets.add(
HostPortPair.getSocket(adminParams.getNodeHostPort()));
}
for (final ReplicationNode jeRN : rg.getElectableNodes()) {
if (!adminSockets.contains(jeRN.getSocketAddress())) {
final String name = jeRN.getName();
logger.info("Repair admin quorum: delete member: " + name);
try {
rga.deleteMember(name);
} catch (Exception e) {
throw new IllegalStateException(
"Problem updating admin group membership for node " +
name + ": " + e.getMessage(),
e);
}
}
}
/* Clear special parameters on the primary nodes */
for (final AdminId aid : requestedAdmins) {
final AdminParams adminParams = allAdmins.get(aid);
if (adminParams.getType().isPrimary()) {
boolean restart =
repairAdminParams(regUtils, adminParams, 0, false);
assert !restart : "Primary nodes should not need restarting";
}
}
/*
* Convert any secondary nodes to primary nodes after quorum has been
* established so that they can join the existing group.
*/
if (existingPrimaries < requestedAdmins.size()) {
for (final AdminId aid : requestedAdmins) {
final AdminParams adminParams = allAdmins.get(aid);
if (!adminParams.getType().isPrimary()) {
if (repairAdminParams(regUtils, adminParams, 0, false)) {
/* Restarting the current admin */
return null;
}
}
}
}
return requestedAdmins;
}
/** Collect admin parameters from all available admins. */
private Map findAllAdmins(RegistryUtils regUtils,
Parameters params) {
/*
* TODO: Maybe scan other SNs, perhaps with a standard utility, to see
* if they have admins not included in the parameters?
*/
final Map allAdmins = new HashMap<>();
final List adminsToCheck =
new ArrayList<>(params.getAdminIds());
for (int i = 0; i < adminsToCheck.size(); i++) {
final AdminId aId = adminsToCheck.get(i);
final StorageNodeId snId = params.get(aId).getStorageNodeId();
try {
final CommandServiceAPI cs = regUtils.getAdmin(snId);
final LoadParameters adminServiceParams = cs.getParams();
allAdmins.put(aId,
new AdminParams(
adminServiceParams.getMapByType(
ParameterState.ADMIN_TYPE)));
} catch (RemoteException | NotBoundException e) {
logger.info("Admin " + aId + " is unreachable");
}
}
return allAdmins;
}
/**
* Returns all of the admins in the specified zone IDs and admin IDs,
* checking that all of the admins specified are available and that they
* include all available primary admins. Returns a sorted set so callers
* that iterate over the set will behave the same way if the same repair is
* performed more than once.
*
* @return the requested admins
* @throws IllegalCommandException if zones are not found or requested
* admins do not match available ones
*/
private SortedSet collectRequestedAdmins(
Set zoneIds,
Set adminIds,
Topology topo,
Parameters params,
Map allAdmins) {
final StringBuilder errorMessage = new StringBuilder();
final Set missingZones = new HashSet<>();
for (final DatacenterId dcId : zoneIds) {
if (topo.get(dcId) == null) {
missingZones.add(dcId);
}
}
if (!missingZones.isEmpty()) {
errorMessage.append("\n Zones not found: ").append(missingZones);
}
final SortedSet requestedAdmins = new TreeSet<>(adminIds);
for (final AdminId aId : params.getAdminIds()) {
final StorageNodeId snId = params.get(aId).getStorageNodeId();
final Datacenter dc = topo.getDatacenter(snId);
if (zoneIds.contains(dc.getResourceId())) {
requestedAdmins.add(aId);
}
}
final Set missingPrimaries = new HashSet<>();
for (final AdminParams adminParams : allAdmins.values()) {
if (adminParams.getType().isPrimary()) {
final AdminId aId = adminParams.getAdminId();
if (!requestedAdmins.contains(aId)) {
missingPrimaries.add(aId);
}
}
}
if (!missingPrimaries.isEmpty()) {
errorMessage.append("\n Available primary admins not specified: ")
.append(missingPrimaries);
}
final Set offlineAdmins = new HashSet<>(requestedAdmins);
offlineAdmins.removeAll(allAdmins.keySet());
if (!offlineAdmins.isEmpty()) {
errorMessage.append("\n Requested admins not found: ")
.append(offlineAdmins);
}
if (errorMessage.length() > 0) {
throw new IllegalCommandException(
"Problems repairing admin quorum" + errorMessage);
}
return requestedAdmins;
}
/**
* Update the node parameters as needed to make it a primary node, and have
* the requested electable group size override and reset rep group
* settings. Returns true if the node requires a restart and it is the
* currently running admin.
*
* @param regUtils registry utilities
* @param adminParams the current admin parameters
* @param groupSizeOverride the requested electable group size override
* @param resetRepGroup the requested value for reset rep group
* @return whether the current node needs to be restarted
* @throws IllegalStateException if the update fails
*/
private boolean repairAdminParams(RegistryUtils regUtils,
AdminParams adminParams,
int groupSizeOverride,
boolean resetRepGroup) {
final AdminId aId = adminParams.getAdminId();
logger.info("Repair admin params: " + aId +
", groupSizeOverride: " + groupSizeOverride +
", resetRepGroup: " + resetRepGroup);
final boolean currentIsPrimary = adminParams.getType().isPrimary();
final int currentGroupSizeOverride =
adminParams.getElectableGroupSizeOverride();
final boolean currentResetRepGroup = adminParams.getResetRepGroup();
assert !resetRepGroup || !currentIsPrimary
: "Only reset replication group for secondary node";
/* Check if node is OK as is */
if (currentIsPrimary &&
(currentGroupSizeOverride == groupSizeOverride) &&
(currentResetRepGroup == resetRepGroup)) {
logger.info("Repair admin params: OK: " + aId);
return false;
}
final AdminParams newAdminParams =
new AdminParams(adminParams.getMap());
newAdminParams.setType(AdminType.PRIMARY);
newAdminParams.setElectableGroupSizeOverride(groupSizeOverride);
newAdminParams.setResetRepGroup(resetRepGroup);
final StorageNodeId snId = adminParams.getStorageNodeId();
try {
final StorageNodeAgentAPI sna = regUtils.getStorageNodeAgent(snId);
sna.newAdminParameters(newAdminParams.getMap());
if (currentIsPrimary) {
logger.info("Repair admin params: no restart: " + aId);
regUtils.getAdmin(snId).newParameters();
return false;
}
if (!adminId.equals(aId)) {
logger.info("Repair admin params: restart: " + aId);
sna.stopAdmin(false /* force */);
sna.startAdmin();
final StorageNode sn = regUtils.getTopology().get(snId);
ServiceUtils.waitForAdmin(sn.getHostname(),
sn.getRegistryPort(),
regUtils.getLoginManager(),
90, /* timeoutSec */
ServiceStatus.RUNNING);
return false;
}
/*
* Stop the current admin, and return true, to request that the
* caller retry the repair after the admin restarts
*/
logger.info("Repair admin params: restart current: " + aId);
stopAdminService(true /* force */);
return true;
} catch (Exception e) {
throw new IllegalStateException(
"Problem attempting to update the admin quorum for admin: " +
aId + ", SN ID: " + snId + ", host and port: " +
adminParams.getNodeHostPort() + ": " + e.getMessage(),
e);
}
}
/* ---- DDL statement execution support ------ */
/**
* Asynchronously execute the specified DDL statement and return
* information about the resulting plan.
* @param namespace optional namespace to use for tables
* @param serialVersion
* @throws IllegalCommandException if the statement is invalid.
*/
public ExecutionInfo executeStatement(String ddlStatement,
String namespace,
short serialVersion) {
return executeStatement(ddlStatement, namespace, null,
null, serialVersion);
}
public ExecutionInfo executeStatement(String ddlStatement,
String namespace,
TableLimits limits,
LogContext lc,
short serialVersion) {
/*
* Parse the statement and fire off a plan to execute it. There are
* three possible outcomes where the handler will not have issued a
* plan:
*
* 1 - the statement is a describe statement, with a string return. In
* that case, the handler will return success & a result string
*
* 2 - the statement had an error, and the handler will return an error
* string. ResultString and planId will be null
*
* 3 - the handler will have issued a plan, and planId will be non-null.
* ErrorString and ResultString will be null.
*/
assert TestHookExecute.doHookIfSet(EXECUTE_HOOK, this);
/*
* The access checker instance in AdminService will be passed to
* DdlHandler so that privilege check can be done for ddl operations.
*/
final AccessChecker accessChecker =
owner == null ?
null :
owner.getAdminSecurity().getAccessChecker();
if (isLoggableWithCtx(logger, Level.FINE, lc)) {
fineWithCtx(logger, "Issuing DDL statement: " + ddlStatement, lc);
}
DdlHandler handler =
new DdlHandler(ddlStatement, this, namespace, limits, accessChecker);
int planId = handler.getPlanId();
/*
* If the handler isn't successful, it meant that the statement
* didn't parse correctly, or was semantically incorrect.
*/
if (!handler.getSuccess()) {
/*
* There should have been no plan executed, and there should be
* an error string.
*/
assert (planId == 0);
assert (handler.getErrorMessage() != null);
if (handler.canRetry()) {
/* This is a transient problem, retries may succeed */
throw new OperationFaultException
("Error from " + ddlStatement + ": "
+ handler.getErrorMessage());
}
/* This is a permanent problem, retries will not help */
if (handler.getException() instanceof QueryException) {
String msg = "User error in query: " +
handler.getException().toString();
fineWithCtx(logger, msg, lc);
throw ((QueryException)handler.getException()).
getWrappedIllegalArgument();
}
if (handler.getException() instanceof QueryStateException) {
warningWithCtx(logger, handler.getException().toString(), lc);
((QueryStateException)handler.getException()).
throwClientException();
}
/*
* Not all errors throw exceptions. Wrap these in a generic
* QueryException. Location information is not available.
*/
if (handler.getException() == null) {
String msg = "User error in query: " +
handler.getErrorMessage();
fineWithCtx(logger, msg, lc);
throw new QueryException(msg).getWrappedIllegalArgument();
}
/* Unknown exception from the query */
throw new IllegalCommandException
("Error from " + ddlStatement + ": " +
handler.getErrorMessage());
}
/* A successful outcome */
if (handler.hasPlan()) {
/* a plan was issued */
return getExecutionStatus(planId, serialVersion);
}
/*
* A statement that didn't require an administrative plan was issued,
* i.e. an "if exists" or "if not exists", or a show or describe
* statement.
*/
DdlResultsReport report = new DdlResultsReport(handler, serialVersion);
return new ExecutionInfoImpl
(0, // planId,
true, // isTerminated
report.getStatus(),
report.getStatusAsJson(),
true, // isSuccess
false, // isCancelled
null, // errorMessage
false, // needsCancel
report.getResult()); // result
}
/**
* This is not a DDL statement but is a DDL-like plan to change table
* limits. If anything fails, cancel the plan.
*/
ExecutionInfo setTableLimits(final String namespace,
final String tableName,
final TableLimits limits,
short serialVersion) {
int planId = getPlanner().createTableLimitPlan("SetTableLimits",
namespace,
tableName,
limits);
try {
approvePlan(planId);
executePlanOrFindMatch(planId);
} catch (Exception e) {
cancelPlan(planId);
throw new OperationFaultException
("Error trying to set table limits on table " + tableName);
}
return getExecutionStatus(planId, serialVersion);
}
/**
* Check on the specified plan and return status. Differs from
* {@link #getPlanStatus} in that getPlanStatus returns
* information as a string, for display.
*
* TODO: perhaps deprecate getPlanStatus, which is used by the Admin CLI,
* in the future, in favor of using getExecutionStatus. We could then have
* the CLI and the DDL API share a common way of displaying plan status.
*
* @throws IllegalArgumentException if the plan Id is null. Otherwise, any
* errors are handled and expressed by the ExecutionInfo, which will
* have isSuccess == false.
*/
public ExecutionInfo getExecutionStatus(int planId, short serialVersion) {
assert TestHookExecute.doHookIfSet(EXECUTE_HOOK, this);
/* Find the plan instance. */
final Plan p = getPlanById(planId);
if (p == null) {
throw new IllegalCommandException
("Attempt to get status for plan " + planId +
" but it doesn't exist");
}
/*
* The planRun has information about the last execution of this plan.
* Hang onto this planRun in case another run starts. This value may
* be null if the plan is just getting started. [#27012]
*/
PlanRun planRun = p.getExecutionState().getLatestPlanRun();
DdlResultsReport ddlReport = new DdlResultsReport(p, serialVersion);
/*
* TODO: the planRun's failure description may need to be translated
* into something more user friendly.
*/
return new ExecutionInfoImpl(
planId,
(planRun != null) && planRun.isTerminated(),
ddlReport.getStatus(),
ddlReport.getStatusAsJson(),
(planRun != null) && planRun.isSuccess(),
(planRun != null) && planRun.isCancelled(),
(planRun != null) ? planRun.getFailureDescription(false) : null,
(planRun != null) && planRun.getState().equals(Plan.State.ERROR),
ddlReport.getResult()); // result
}
/**
* Return the state of the latest plan run.
*/
public Plan.State getCurrentPlanState(int planId) {
final Plan p = getPlanById(planId);
if (p == null) {
throw new IllegalCommandException
("Attempt to get status for plan " + planId +
" but it doesn't exist");
}
/*
* The planRun has information about the last execution of this plan.
* Hang onto this planRun in case another run starts.
*/
return p.getExecutionState().getLatestPlanRun().getState();
}
@Override
public void newParameters(ParameterMap oldMap, ParameterMap newMap) {
assert Thread.holdsLock(this);
final AdminParams oldParams = new AdminParams(oldMap);
final AdminParams newParams = new AdminParams(newMap);
/* Update electable group size override */
final int newElectableGroupSizeOverride =
newParams.getElectableGroupSizeOverride();
if (oldParams.getElectableGroupSizeOverride() !=
newElectableGroupSizeOverride) {
repConfig.setElectableGroupSizeOverride(
newElectableGroupSizeOverride);
environment.setRepMutableConfig(repConfig);
}
long newPollTime =
newParams.getKVMetadataAdminCheckInterval().toMillis();
if (oldParams.isMetadataAdminThreadEnabled() !=
newParams.isMetadataAdminThreadEnabled()) {
if (!newParams.isMetadataAdminThreadEnabled()) {
shutdownKVMetadataAdminThread();
} else if (isAdminThreadEnabled(true)) {
startMetadataAdminThread(
newPollTime,
newParams.getKVMetadataAdminMaxPlanWait().toMillis());
}
} else if (isAdminThreadEnabled(
newParams.isMetadataAdminThreadEnabled())) {
/* Thread was and still is enabled check */
long oldPollTime =
newParams.getKVMetadataAdminCheckInterval().toMillis();
if (oldPollTime != newPollTime) {
startMetadataAdminThread(
newPollTime,
newParams.getKVMetadataAdminMaxPlanWait().toMillis());
}
} else if (!newParams.getKVMetadataAdminMaxPlanWait().equals(
oldParams.getKVMetadataAdminMaxPlanWait()) &&
metadataAdminChecker != null) {
metadataAdminChecker.setMaxPlanWait(
(int) newParams.getKVMetadataAdminMaxPlanWait().toMillis());
}
newPollTime = newParams.getVersionCheckInterval().toMillis();
if (oldParams.isVersionThreadEnabled() !=
newParams.isVersionThreadEnabled()) {
if (!newParams.isVersionThreadEnabled()) {
shutdownVersionUpdaterThread();
} else if (isAdminThreadEnabled(true)) {
startVersionUpdaterThread(newPollTime);
}
} else if (isAdminThreadEnabled(
newParams.isVersionThreadEnabled())) {
/* Thread was and still is enabled check */
long oldPollTime = newParams.getVersionCheckInterval().toMillis();
if (oldPollTime != newPollTime) {
startVersionUpdaterThread(newPollTime);
}
}
/*
* Check whether the new parameters specify a new node type that is not
* reflected by the environment. This could happen if, because of a
* bug, we failed to restart the environment after updating the
* parameter.
*/
final NodeType newNodeType = getNodeType(newParams.getType());
final NodeType envNodeType = environment.getRepConfig().getNodeType();
if (!envNodeType.equals(newNodeType)) {
throw new IllegalStateException(
"Environment for " + adminId + " has wrong node type:" +
" expected " + newNodeType +
", found " + envNodeType);
}
}
/**
* Starts a thread which monitors Admin upgrade. This should be called if
* the Admin is being upgraded and the master must wait until all Admins
* have been upgraded before the store schema can be updated.
*/
void monitorUpgrade() {
if ((upgradeMonitor != null) && upgradeMonitor.isAlive()) {
return;
}
/*
* The monitor thread will check every 10 seconds to see if the stores
* can be upgraded.
*/
upgradeMonitor = new StoppableThread("MonitorUpgradeThread") {
@Override
public void run() {
logger.log(Level.FINE, "{0} started", this);
try {
Thread.sleep(10000);
while (true) {
/*
* Quit the thread if it is shutdown, the admin is
* closing or no longer the master, or the stores have
* been updated.
*/
if (isShutdown() ||
isClosing() ||
!environment.getState().isMaster() ||
!stores.isReadOnly()) {
logger.log(Level.FINE, "{0} exiting", this);
return;
}
logger.log(Level.FINE, "{0} checking for upgrade",this);
synchronized (Admin.this) {
initStores(true /* isMaster */);
}
Thread.sleep(10000);
}
} catch (InterruptedException ex) {
logger.log(Level.WARNING, "{0} interrupted, exited", this);
}
}
@Override
protected Logger getLogger() {
return logger;
}
};
upgradeMonitor.start();
}
private void startSysTableMonitor() {
assert Thread.holdsLock(this);
if (sysTableMonitor != null) {
return;
}
sysTableMonitor = new SysTableMonitor(this);
sysTableMonitor.start();
}
private void shutdownSysTableMonitor() {
assert Thread.holdsLock(this);
if (sysTableMonitor == null) {
return;
}
sysTableMonitor.shutdownThread(logger);
sysTableMonitor = null;
}
/*
* For testing, wait for create/upgrade system tables completed.
*/
public void joinSysTableMonitorThread() throws InterruptedException {
/* Set to local t variable, to avoid sysTableMonitor is set to null
* in another thread after checking it is not null.
*/
Thread t = sysTableMonitor;
if (t != null) {
t.join();
}
}
private String
doFailoverProcessing(String candidateName,
StorageNodePool snPool,
Set newPrimaryZones,
Set offlineZones,
Transaction txn) {
Topology current = getCurrentTopology();
TopologyCandidate candidate =
new TopologyCandidate(candidateName, current);
final Topology topo = candidate.getTopology();
for (final DatacenterId dcId : topo.getDatacenterMap().getAllIds()) {
Datacenter dc = topo.get(dcId);
if (newPrimaryZones.contains(dcId)) {
if (dc.getDatacenterType().equals(DatacenterType.PRIMARY)) {
continue;
}
topo.update(dc.getResourceId(),
Datacenter.newInstance(dc.getName(),
dc.getRepFactor(),
DatacenterType.PRIMARY,
dc.getAllowArbiters(),
dc.getMasterAffinity()));
} else if (offlineZones.contains(dcId)) {
if (dc.getDatacenterType().equals(DatacenterType.SECONDARY)) {
continue;
}
topo.update(dc.getResourceId(),
Datacenter.newInstance(dc.getName(),
dc.getRepFactor(),
DatacenterType.SECONDARY,
dc.getAllowArbiters(),
dc.getMasterAffinity()));
}
}
TopologyBuilder tb =
new TopologyBuilder(candidate,
snPool,
getCurrentParameters(),
getParams());
candidate = tb.fixANProblems(candidate, offlineZones);
final TopologyStore topoStore = stores.getTopologyStore();
topoStore.putCandidate(txn,
new TopologyCandidate(candidateName,
candidate.getTopology()));
return candidateName;
}
/**
* Verify data for this admin.
*
* @param verifyBtree verifies the btree of databases
* @param verifyLog verifies log files of databases
* @param verifyIndex verifies the index
* @param verifyRecord verifies data records in disk
* @param btreeDelay delay between batches for btree verification
* @param logDelay delay between log file reads
* @throws IOException
*/
public void verifyData(boolean verifyBtree,
boolean verifyLog,
boolean verifyIndex,
boolean verifyRecord,
long btreeDelay,
long logDelay)
throws IOException {
final ReplicatedEnvironment env = getEnv();
if (env == null) {
throw new OperationFaultException("Environment unavailable");
}
if (VerifyDataPlan.VERIFY_HOOK != null && getOwner() != null) {
((AdminServiceFaultHandler)(getOwner().getFaultHandler())).
setSuppressPrinting();
}
DatabaseUtils.verifyData(env, adminId, verifyBtree, verifyLog,
verifyIndex, verifyRecord, btreeDelay,
logDelay, logger);
}
private void updateAndNotify(RepNodeParams rnp) {
parameters.update(rnp);
parameterChangeTracker.notifyListeners(rnp.getStorageNodeId());
}
private void updateAndNotify(StorageNodeParams snp) {
parameters.update(snp);
parameterChangeTracker.notifyListeners(snp.getStorageNodeId());
}
private void updateAndNotify(GlobalParams gp) {
parameters.update(gp);
parameterChangeTracker.notifyListeners(null);
}
private void updateAndNotify(ArbNodeParams anp) {
parameters.update(anp);
parameterChangeTracker.notifyListeners(anp.getStorageNodeId());
}
private void shutdownKVMetadataAdminThread() {
if (metadataAdminChecker != null) {
metadataAdminChecker.shutdown();
metadataAdminChecker = null;
}
}
private void shutdownVersionUpdaterThread() {
if (versionUpdater != null) {
versionUpdater.shutdown();
versionUpdater = null;
}
}
private void startMetadataAdminThread(long pollTimeMS, long maxPlanWait) {
shutdownKVMetadataAdminThread();
metadataAdminChecker =
new ParamConsistencyChecker(this,
pollTimeMS,
(int) maxPlanWait,
logger);
}
private void startVersionUpdaterThread(long pollTimeMS) {
shutdownVersionUpdaterThread();
versionUpdater =
new SoftwareVersionUpdater(this,
pollTimeMS,
logger);
}
private boolean isAdminThreadEnabled(boolean configValue) {
if (configValue == true) {
return true;
}
try {
return !checkAdminGroupVersion(MIN_UTIL_THREAD_VERSION);
} catch (Exception e) {
return false;
}
}
class KVMetadataAdminThreadParameterListenter
implements ParameterListener {
@Override
public void newParameters(ParameterMap oldMap, ParameterMap newMap) {
if (metadataAdminChecker != null) {
metadataAdminChecker.newParameters(oldMap, newMap);
}
if (versionUpdater != null) {
versionUpdater.newParameters(oldMap, newMap);
}
}
}
/**
* Simple class used to register listeners for parameter changes
* that effect an SN's configuration.
*/
class SNParameterChangeTracker {
private final Set listeners;
public SNParameterChangeTracker() {
listeners =
Collections.synchronizedSet(
new HashSet());
}
public void addListener(SNParameterChangeListener listener) {
listeners.add(listener);
}
public void removeListener(SNParameterChangeListener listener) {
listeners.remove(listener);
}
public void notifyListeners(StorageNodeId snId) {
for (SNParameterChangeListener listener : listeners) {
listener.changeParameters(snId);
}
}
}
private interface SNParameterChangeListener {
/**
* Notify listener of new parameters.
* @param snId Storage node of resource
*/
public void changeParameters(StorageNodeId snId);
}
class KVAdminParameterListener implements SNParameterChangeListener {
@Override
public void changeParameters(StorageNodeId snId) {
if (metadataAdminChecker != null) {
metadataAdminChecker.changeParameters(snId);
}
}
}
}