org.apache.cassandra.service.CassandraDaemon Maven / Gradle / Ivy
Show all versions of cassandra-all Show documentation
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.cassandra.service;
import java.io.File;
import java.io.IOException;
import java.lang.management.ManagementFactory;
import java.lang.management.MemoryPoolMXBean;
import java.net.InetAddress;
import java.net.InetSocketAddress;
import java.net.UnknownHostException;
import java.rmi.AccessException;
import java.rmi.AlreadyBoundException;
import java.rmi.NotBoundException;
import java.rmi.Remote;
import java.rmi.RemoteException;
import java.rmi.registry.Registry;
import java.rmi.server.RMIClientSocketFactory;
import java.rmi.server.RMIServerSocketFactory;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.UUID;
import java.util.concurrent.TimeUnit;
import javax.management.ObjectName;
import javax.management.StandardMBean;
import javax.management.remote.JMXConnectorServer;
import javax.management.remote.JMXServiceURL;
import javax.management.remote.rmi.RMIConnectorServer;
import javax.management.remote.rmi.RMIJRMPServerImpl;
import com.addthis.metrics3.reporter.config.ReporterConfig;
import com.codahale.metrics.Meter;
import com.codahale.metrics.MetricRegistryListener;
import com.codahale.metrics.SharedMetricRegistries;
import com.codahale.metrics.jvm.BufferPoolMetricSet;
import com.codahale.metrics.jvm.FileDescriptorRatioGauge;
import com.codahale.metrics.jvm.GarbageCollectorMetricSet;
import com.codahale.metrics.jvm.MemoryUsageGaugeSet;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.ImmutableMap;
import com.google.common.util.concurrent.Futures;
import com.google.common.util.concurrent.ListenableFuture;
import com.google.common.util.concurrent.Uninterruptibles;
import com.palantir.cassandra.concurrent.LocalReadRunnableTimeoutWatcher;
import com.palantir.cassandra.db.BootstrappingSafetyException;
import com.palantir.cassandra.settings.DisableClientInterfaceSetting;
import com.palantir.logsafe.Preconditions;
import com.palantir.logsafe.Safe;
import com.palantir.logsafe.SafeArg;
import org.apache.cassandra.config.ColumnDefinition;
import org.apache.cassandra.config.Config;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.cassandra.concurrent.*;
import org.apache.cassandra.config.CFMetaData;
import org.apache.cassandra.config.DatabaseDescriptor;
import org.apache.cassandra.config.Schema;
import org.apache.cassandra.db.*;
import org.apache.cassandra.db.commitlog.CommitLog;
import org.apache.cassandra.db.marshal.CounterColumnType;
import org.apache.cassandra.exceptions.ConfigurationException;
import org.apache.cassandra.exceptions.StartupException;
import org.apache.cassandra.io.util.FileUtils;
import org.apache.cassandra.metrics.CassandraMetricsRegistry;
import org.apache.cassandra.metrics.DefaultNameFactory;
import org.apache.cassandra.metrics.HiccupMeter;
import org.apache.cassandra.service.StorageServiceMBean.NonTransientError;
import org.apache.cassandra.thrift.ThriftServer;
import org.apache.cassandra.utils.*;
/**
* The CassandraDaemon
is an abstraction for a Cassandra daemon
* service, which defines not only a way to activate and deactivate it, but also
* hooks into its lifecycle methods (see {@link #setup()}, {@link #start()},
* {@link #stop()} and {@link #setup()}).
*/
public class CassandraDaemon
{
public static final String MBEAN_NAME = "org.apache.cassandra.db:type=NativeAccess";
private static JMXConnectorServer jmxServer = null;
private static final Logger logger;
@VisibleForTesting
public static CassandraDaemon getInstanceForTesting()
{
return instance;
}
static {
// Need to register metrics before instrumented appender is created(first access to LoggerFactory).
SharedMetricRegistries.getOrCreate("logback-metrics").addListener(new MetricRegistryListener.Base()
{
@Override
public void onMeterAdded(String metricName, Meter meter)
{
// Given metricName consists of appender name in logback.xml + "." + metric name.
// We first separate appender name
int separator = metricName.lastIndexOf('.');
String appenderName = metricName.substring(0, separator);
String metric = metricName.substring(separator + 1); // remove "."
ObjectName name = DefaultNameFactory.createMetricName(appenderName, metric, null).getMBeanName();
CassandraMetricsRegistry.Metrics.registerMBean(meter, name);
}
});
logger = LoggerFactory.getLogger(CassandraDaemon.class);
}
private void maybeInitJmx()
{
if (System.getProperty("com.sun.management.jmxremote.port") != null)
return;
String jmxPort = System.getProperty("cassandra.jmx.local.port");
if (jmxPort == null)
return;
System.setProperty("java.rmi.server.hostname", InetAddress.getLoopbackAddress().getHostAddress());
RMIServerSocketFactory serverFactory = new RMIServerSocketFactoryImpl();
Map env = new HashMap<>();
env.put(RMIConnectorServer.RMI_SERVER_SOCKET_FACTORY_ATTRIBUTE, serverFactory);
env.put("jmx.remote.rmi.server.credential.types",
new String[] { String[].class.getName(), String.class.getName() });
try
{
Registry registry = new JmxRegistry(Integer.valueOf(jmxPort), null, serverFactory, "jmxrmi");
JMXServiceURL url = new JMXServiceURL(String.format("service:jmx:rmi://localhost/jndi/rmi://localhost:%s/jmxrmi", jmxPort));
@SuppressWarnings("resource")
RMIJRMPServerImpl server = new RMIJRMPServerImpl(Integer.valueOf(jmxPort),
null,
(RMIServerSocketFactory) env.get(RMIConnectorServer.RMI_SERVER_SOCKET_FACTORY_ATTRIBUTE),
env);
jmxServer = new RMIConnectorServer(url, env, server, ManagementFactory.getPlatformMBeanServer());
jmxServer.start();
((JmxRegistry)registry).setRemoteServerStub(server.toStub());
}
catch (IOException e)
{
exitOrFail(1, e.getMessage(), e.getCause());
}
}
private static final CassandraDaemon instance = new CassandraDaemon();
public Server thriftServer;
public Server nativeServer;
private final boolean runManaged;
protected final StartupChecks startupChecks;
private boolean setupCompleted;
public CassandraDaemon() {
this(false);
}
public CassandraDaemon(boolean runManaged) {
this.runManaged = runManaged;
this.startupChecks = new StartupChecks().withDefaultTests();
this.setupCompleted = false;
}
/**
* This is a hook for concrete daemons to initialize themselves suitably.
*
* Subclasses should override this to finish the job (listening on ports, etc.).
*
* If cassandra was initialized successfully {@link #setupCompleted()} returns true. If cassandra encountered a
* commitlog error and {@code commit_failure_policy} configuration is set to
* {@link Config.CommitFailurePolicy#stop_on_startup} no exception will be thrown and {@link #setupCompleted()}
* returns false.
*/
protected void setup()
{
FileUtils.setFSErrorHandler(new DefaultFSErrorHandler());
// Delete any failed snapshot deletions on Windows - see CASSANDRA-9658
if (FBUtilities.isWindows())
WindowsFailedSnapshotTracker.deleteOldSnapshots();
logSystemInfo();
CLibrary.tryMlockall();
FileUtils.setDefaultUncaughtExceptionHandler();
try
{
startupChecks.verify();
}
catch (StartupException e)
{
exitOrFail(e.returnCode, e.getMessage(), e.getCause());
}
try
{
SystemKeyspace.snapshotOnVersionChange();
}
catch (IOException e)
{
exitOrFail(3, e.getMessage(), e.getCause());
}
// We need to persist this as soon as possible after startup checks.
// This should be the first write to SystemKeyspace (CASSANDRA-11742)
SystemKeyspace.persistLocalMetadata();
maybeInitJmx();
Directories.scheduleVerifyingDiskDoesNotExceedThresholdChecks();
doNotStartupClientInterfacesIfDisabled();
completeSetupMayThrowSstableException();
logger.debug("Completed CassandraDaemon setup.");
}
/* This functionality should only be used in a migration mode for a brand new cluster, and ensures that client
interfaces are not enabled across restarts. */
private void doNotStartupClientInterfacesIfDisabled() {
// only run this for new clusters - after that, we rely on the behavior set via the JMX probe
boolean isNewCluster = Boolean.getBoolean("palantir_cassandra.is_new_cluster");
if (!isNewCluster)
{
return;
}
boolean doNotStartupClientInterfaces = Boolean.getBoolean("palantir_cassandra.persist_disable_client_interfaces");
if (doNotStartupClientInterfaces)
{
logger.warn("Disabling client interfaces persistently, because this is a new cluster with " +
"'persist_disable_client_interfaces' set to true");
try
{
DisableClientInterfaceSetting.instance.setTrue();
}
catch (IOException e)
{
throw new RuntimeException("Caught IOException when attempting to mark persistent setting", e);
}
}
}
/* This part of setup may throw a CorruptSSTableException. */
private void completeSetupMayThrowSstableException() {
// load schema from disk
Schema.instance.loadFromDisk();
for (String keyspaceName : Schema.instance.getKeyspaces())
{
for (CFMetaData cfm : Schema.instance.getKeyspaceMetaData(keyspaceName).values())
{
for (ColumnDefinition def : cfm.allColumns())
{
Preconditions.checkState(
!(def.type instanceof CounterColumnType),
"Palantir Cassandra does not support counter columns",
SafeArg.of("keyspace", keyspaceName),
SafeArg.of("columnFamily", cfm.cfName),
SafeArg.of("columnName", def.name));
}
}
}
Map, Map> unfinishedCompactions = SystemKeyspace.getUnfinishedCompactions();
for (String keyspaceName : Schema.instance.getKeyspaces())
{
for (CFMetaData cfm : Schema.instance.getKeyspaceMetaData(keyspaceName).values())
{
ColumnFamilyStore.removeUnusedSstables(cfm, unfinishedCompactions.getOrDefault(cfm.ksAndCFName, ImmutableMap.of()));
}
if (keyspaceName.equals(SystemKeyspace.NAME))
continue;
for (CFMetaData cfm : Schema.instance.getKeyspaceMetaData(keyspaceName).values())
{
ColumnFamilyStore.scrubDataDirectories(cfm);
}
}
SystemKeyspace.discardCompactionsInProgress();
Keyspace.setInitialized();
// initialize keyspaces
for (String keyspaceName : Schema.instance.getKeyspaces())
{
if (logger.isDebugEnabled())
logger.debug("opening keyspace {}", keyspaceName);
// disable auto compaction until commit log replay ends
for (ColumnFamilyStore cfs : Keyspace.open(keyspaceName).getColumnFamilyStores())
{
for (ColumnFamilyStore store : cfs.concatWithIndexes())
{
store.disableAutoCompaction();
}
}
}
try
{
loadRowAndKeyCacheAsync().get();
}
catch (Throwable t)
{
JVMStabilityInspector.inspectThrowable(t);
logger.warn("Error loading key or row cache", t);
}
try
{
GCInspector.register();
}
catch (Throwable t)
{
JVMStabilityInspector.inspectThrowable(t);
logger.warn("Unable to start GCInspector (currently only supported on the Sun JVM)");
}
recoverCommitlogAndCompleteSetup();
}
private void recoverCommitlogAndCompleteSetup() {
// replay the log if necessary
try
{
CommitLog.instance.recover();
}
catch (IOException e)
{
if (DatabaseDescriptor.getCommitFailurePolicy() == Config.CommitFailurePolicy.stop_on_startup
&& StorageService.instance.hasNonTransientError(StorageServiceMBean.NonTransientError.COMMIT_LOG_CORRUPTION))
{
logger.error("Failed to recover from commitlog corruption due to some non transient errors: {}",
StorageService.instance.getNonTransientErrors());
return;
}
throw new RuntimeException(e);
}
enableAutoCompaction();
SystemKeyspace.finishStartup();
// start server internals
StorageService.instance.registerDaemon(this);
try
{
StorageService.instance.initServer();
}
catch (BootstrappingSafetyException e)
{
logger.error("Non-fatal bootstrap error. Server will continue but is disabled and without metrics.", e);
return;
}
catch (ConfigurationException e)
{
logger.error("Fatal configuration error; unable to start server. See log for stacktrace.", e);
exitOrFail(1, "Fatal configuration error", e);
}
Mx4jTool.maybeLoad();
// Metrics
String metricsReporterConfigFile = System.getProperty("cassandra.metricsReporterConfigFile");
if (metricsReporterConfigFile != null)
{
logger.info("Trying to load metrics-reporter-config from file: {}", metricsReporterConfigFile);
try
{
// enable metrics provided by metrics-jvm.jar
CassandraMetricsRegistry.Metrics.register("jvm.buffers", new BufferPoolMetricSet(ManagementFactory.getPlatformMBeanServer()));
CassandraMetricsRegistry.Metrics.register("jvm.gc", new GarbageCollectorMetricSet());
CassandraMetricsRegistry.Metrics.register("jvm.memory", new MemoryUsageGaugeSet());
CassandraMetricsRegistry.Metrics.register("jvm.fd.usage", new FileDescriptorRatioGauge());
// initialize metrics-reporter-config from yaml file
String reportFileLocation = CassandraDaemon.class.getClassLoader().getResource(metricsReporterConfigFile).getFile();
ReporterConfig.loadFromFile(reportFileLocation).enableAll(CassandraMetricsRegistry.Metrics);
}
catch (Exception e)
{
logger.warn("Failed to load metrics-reporter-config, metric sinks will not be activated", e);
}
}
new HiccupMeter().start();
if (!FBUtilities.getBroadcastAddress().equals(InetAddress.getLoopbackAddress()))
waitForGossipToSettle();
// schedule periodic background compaction task submission. this is simply a backstop against compactions stalling
// due to scheduling errors or race conditions
ScheduledExecutors.optionalTasks.scheduleWithFixedDelay(ColumnFamilyStore.getBackgroundCompactionTaskSubmitter(), 5, 1, TimeUnit.MINUTES);
// schedule a check for local read threads to see if they hit the timeout threshold
ScheduledExecutors.scheduledTasks.scheduleAtFixedRate(LocalReadRunnableTimeoutWatcher.INSTANCE, 0, 1, TimeUnit.SECONDS);
// schedule periodic dumps of table size estimates into SystemKeyspace.SIZE_ESTIMATES_CF
// set cassandra.size_recorder_interval to 0 to disable
int sizeRecorderInterval = Integer.getInteger("cassandra.size_recorder_interval", 5 * 60);
if (sizeRecorderInterval > 0)
ScheduledExecutors.optionalTasks.scheduleWithFixedDelay(SizeEstimatesRecorder.instance, 30, sizeRecorderInterval, TimeUnit.SECONDS);
initializeNativeTransport();
completeSetup();
}
public void initializeNativeTransport()
{
// Thrift
InetAddress rpcAddr = DatabaseDescriptor.getRpcAddress();
int rpcPort = DatabaseDescriptor.getRpcPort();
int listenBacklog = DatabaseDescriptor.getRpcListenBacklog();
thriftServer = new ThriftServer(rpcAddr, rpcPort, listenBacklog);
// Native transport
InetAddress nativeAddr = DatabaseDescriptor.getRpcAddress();
int nativePort = DatabaseDescriptor.getNativeTransportPort();
nativeServer = new org.apache.cassandra.transport.Server(nativeAddr, nativePort);
}
public void startNativeTransport()
{
if (DisableClientInterfaceSetting.instance.isTrue())
{
logger.warn("Not enabling client interface servers (thrift and native transport) because persistent settings" +
" have marked client interfaces as disabled");
return;
}
validateTransportsCanStart();
if (nativeServer == null)
throw new IllegalStateException("native transport should be set up before it can be started");
nativeServer.start();
logger.info("Native server running on {}", new InetSocketAddress(DatabaseDescriptor.getRpcAddress(), DatabaseDescriptor.getNativeTransportPort()));
if (thriftServer == null)
throw new IllegalStateException("thrift transport should be set up before it can be started");
thriftServer.start();
logger.info("Thrift server running on {}", new InetSocketAddress(DatabaseDescriptor.getRpcAddress(), DatabaseDescriptor.getRpcPort()));
}
private void validateTransportsCanStart()
{
// We only start transports if bootstrap has completed and we're not in survey mode, OR if we are in
// survey mode and streaming has completed but we're not using auth.
// OR if we have not joined the ring yet.
if (StorageService.instance.hasJoined())
{
if (StorageService.instance.isSurveyMode())
{
if (StorageService.instance.isBootstrapMode() || DatabaseDescriptor.getAuthenticator().requireAuthentication())
{
throw new IllegalStateException("Not starting client transports in write_survey mode as it's bootstrapping or " +
"auth is enabled");
}
}
else
{
if (!SystemKeyspace.bootstrapComplete())
{
throw new IllegalStateException("Node is not yet bootstrapped completely. Use nodetool to check bootstrap" +
" state and resume. For more, see `nodetool help bootstrap`");
}
}
}
}
/*
* Asynchronously load the row and key cache in one off threads and return a compound future of the result.
* Error handling is pushed into the cache load since cache loads are allowed to fail and are handled by logging.
*/
private ListenableFuture> loadRowAndKeyCacheAsync()
{
final ListenableFuture keyCacheLoad = CacheService.instance.keyCache.loadSavedAsync();
final ListenableFuture rowCacheLoad = CacheService.instance.rowCache.loadSavedAsync();
@SuppressWarnings("unchecked")
ListenableFuture> retval = Futures.successfulAsList(keyCacheLoad, rowCacheLoad);
return retval;
}
public static void enableAutoCompaction() {
for (Keyspace keyspace : Keyspace.all())
{
for (ColumnFamilyStore cfs : keyspace.getColumnFamilyStores())
{
for (final ColumnFamilyStore store : cfs.concatWithIndexes())
{
if (store.getCompactionStrategy().shouldBeEnabled())
store.enableAutoCompaction();
}
}
}
}
@VisibleForTesting
public void completeSetup()
{
setupCompleted = true;
}
public boolean setupCompleted()
{
return setupCompleted;
}
private void logSystemInfo()
{
if (logger.isInfoEnabled())
{
try
{
logger.info("Hostname: {}", InetAddress.getLocalHost().getHostName());
}
catch (UnknownHostException e1)
{
logger.info("Could not resolve local host");
}
logger.info("JVM vendor/version: {}/{}", System.getProperty("java.vm.name"), System.getProperty("java.version"));
logger.info("Heap size: {}/{}", Runtime.getRuntime().totalMemory(), Runtime.getRuntime().maxMemory());
for(MemoryPoolMXBean pool: ManagementFactory.getMemoryPoolMXBeans())
logger.info("{} {}: {}", pool.getName(), pool.getType(), pool.getPeakUsage());
logger.info("Classpath: {}", System.getProperty("java.class.path"));
logger.info("JVM Arguments: {}", ManagementFactory.getRuntimeMXBean().getInputArguments());
}
}
/**
* Initialize the Cassandra Daemon based on the given Commons
* Daemon-specific arguments. To clarify, this is a hook for JSVC.
*
* @param arguments
* the arguments passed in from JSVC
* @throws IOException
*/
public void init(String[] arguments) throws IOException
{
setup();
}
/**
* Start the Cassandra Daemon, assuming that it has already been
* initialized via {@link #init(String[])}
*
* Hook for JSVC
*/
public void start()
{
if (DisableClientInterfaceSetting.instance.isTrue())
{
logger.warn("Not enabling client interface servers (thrift and native transport) because persistent settings" +
" have marked client interfaces as disabled");
return;
}
try
{
validateTransportsCanStart();
}
catch (IllegalStateException isx)
{
// If there are any errors, we just log and return in this case
logger.info(isx.getMessage());
return;
}
String nativeFlag = System.getProperty("cassandra.start_native_transport");
if ((nativeFlag != null && Boolean.parseBoolean(nativeFlag)) || (nativeFlag == null && DatabaseDescriptor.startNativeTransport()))
{
nativeServer.start();
}
else
logger.info("Not starting native transport as requested. Use JMX (StorageService->startNativeTransport()) or nodetool (enablebinary) to start it");
String rpcFlag = System.getProperty("cassandra.start_rpc");
if ((rpcFlag != null && Boolean.parseBoolean(rpcFlag)) || (rpcFlag == null && DatabaseDescriptor.startRpc()))
thriftServer.start();
else
logger.info("Not starting RPC server as requested. Use JMX (StorageService->startRPCServer()) or nodetool (enablethrift) to start it");
}
/**
* Stop the daemon, ideally in an idempotent manner.
*
* Hook for JSVC / Procrun
*/
public void stop()
{
// On linux, this doesn't entirely shut down Cassandra, just the RPC server.
// jsvc takes care of taking the rest down
logger.info("Cassandra shutting down...");
thriftServer.stop();
nativeServer.stop();
// On windows, we need to stop the entire system as prunsrv doesn't have the jsvc hooks
// We rely on the shutdown hook to drain the node
if (FBUtilities.isWindows())
System.exit(0);
if (jmxServer != null)
{
try
{
jmxServer.stop();
}
catch (IOException e)
{
logger.error("Error shutting down local JMX server: ", e);
}
}
}
@VisibleForTesting
public void destroyNativeTransport() throws InterruptedException
{
// In 2.2, just stopping the server works. Future versions require `destroy` to be called
// so we maintain the name for consistency
if (nativeServer != null)
{
nativeServer.stopAndAwaitTermination();
nativeServer = null;
}
if (thriftServer != null)
{
thriftServer.stopAndAwaitTermination();
thriftServer = null;
}
}
/**
* Clean up all resources obtained during the lifetime of the daemon. This
* is a hook for JSVC.
*/
public void destroy()
{}
/**
* A convenience method to initialize and start the daemon in one shot.
*/
public void activate()
{
// Do not put any references to DatabaseDescriptor above the forceStaticInitialization call.
try
{
try
{
DatabaseDescriptor.forceStaticInitialization();
DatabaseDescriptor.setDaemonInitialized();
}
catch (ExceptionInInitializerError e)
{
throw e.getCause();
}
MBeanWrapper.instance.registerMBean(new StandardMBean(new NativeAccess(), NativeAccessMBean.class), MBEAN_NAME, MBeanWrapper.OnException.LOG);
if (FBUtilities.isWindows())
{
// We need to adjust the system timer on windows from the default 15ms down to the minimum of 1ms as this
// impacts timer intervals, thread scheduling, driver interrupts, etc.
WindowsTimer.startTimerPeriod(DatabaseDescriptor.getWindowsTimerInterval());
}
setup();
String pidFile = System.getProperty("cassandra-pidfile");
if (pidFile != null)
{
new File(pidFile).deleteOnExit();
}
if (System.getProperty("cassandra-foreground") == null)
{
System.out.close();
System.err.close();
}
if (setupCompleted())
start();
}
catch (Throwable e)
{
boolean logStackTrace =
e instanceof ConfigurationException ? ((ConfigurationException)e).logStackTrace : true;
System.out.println("Exception (" + e.getClass().getName() + ") encountered during startup: " + e.getMessage());
if (logStackTrace)
{
if (runManaged)
logger.error("Exception encountered during startup", e);
// try to warn user on stdout too, if we haven't already detached
e.printStackTrace();
exitOrFail(3, "Exception encountered during startup", e);
}
else
{
if (runManaged)
logger.error("Exception encountered during startup: {}", e.getMessage());
// try to warn user on stdout too, if we haven't already detached
System.err.println(e.getMessage());
exitOrFail(3, "Exception encountered during startup: " + e.getMessage());
}
}
}
/**
* A convenience method to stop and destroy the daemon in one shot.
*/
public void deactivate()
{
stop();
destroy();
// completely shut down cassandra
if(!runManaged) {
System.exit(0);
}
}
@VisibleForTesting
public static void waitForGossipToSettle()
{
int forceAfter = Integer.getInteger("cassandra.skip_wait_for_gossip_to_settle", -1);
if (forceAfter == 0)
{
return;
}
final int GOSSIP_SETTLE_MIN_WAIT_MS = 5000;
final int GOSSIP_SETTLE_POLL_INTERVAL_MS = 1000;
final int GOSSIP_SETTLE_POLL_SUCCESSES_REQUIRED = 3;
logger.info("Waiting for gossip to settle before accepting client requests...");
Uninterruptibles.sleepUninterruptibly(GOSSIP_SETTLE_MIN_WAIT_MS, TimeUnit.MILLISECONDS);
int totalPolls = 0;
int numOkay = 0;
JMXEnabledThreadPoolExecutor gossipStage = (JMXEnabledThreadPoolExecutor)StageManager.getStage(Stage.GOSSIP);
while (numOkay < GOSSIP_SETTLE_POLL_SUCCESSES_REQUIRED)
{
Uninterruptibles.sleepUninterruptibly(GOSSIP_SETTLE_POLL_INTERVAL_MS, TimeUnit.MILLISECONDS);
long completed = gossipStage.metrics.completedTasks.getValue();
long active = gossipStage.metrics.activeTasks.getValue();
long pending = gossipStage.metrics.pendingTasks.getValue();
totalPolls++;
if (active == 0 && pending == 0)
{
logger.debug("Gossip looks settled. CompletedTasks: {}", completed);
numOkay++;
}
else
{
logger.info("Gossip not settled after {} polls. Gossip Stage active/pending/completed: {}/{}/{}", totalPolls, active, pending, completed);
numOkay = 0;
}
if (forceAfter > 0 && totalPolls > forceAfter)
{
logger.warn("Gossip not settled but startup forced by cassandra.skip_wait_for_gossip_to_settle. Gossip Stage total/active/pending/completed: {}/{}/{}/{}",
totalPolls, active, pending, completed);
break;
}
}
if (totalPolls > GOSSIP_SETTLE_POLL_SUCCESSES_REQUIRED)
logger.info("Gossip settled after {} extra polls; proceeding", totalPolls - GOSSIP_SETTLE_POLL_SUCCESSES_REQUIRED);
else
logger.info("No gossip backlog; proceeding");
}
public static void stop(String[] args) throws InterruptedException
{
instance.deactivate();
}
public static void main(String[] args)
{
instance.activate();
}
private void exitOrFail(int code, String message) {
exitOrFail(code, message, null);
}
private void exitOrFail(int code, String message, Throwable cause) {
if(runManaged) {
RuntimeException t = cause!=null ? new RuntimeException(message, cause) : new RuntimeException(message);
throw t;
}
else {
logger.error(message, cause);
System.exit(code);
}
}
static class NativeAccess implements NativeAccessMBean
{
public boolean isAvailable()
{
return CLibrary.jnaAvailable();
}
public boolean isMemoryLockable()
{
return CLibrary.jnaMemoryLockable();
}
public void reinitializeFromCommitlogCorruption() throws IllegalNonTransientErrorStateException
{
checkInNonTransientErrorMode();
checkHasExpectedNte(NonTransientError.COMMIT_LOG_CORRUPTION);
StorageService.instance.clearNonTransientErrors();
CassandraDaemon.instance.recoverCommitlogAndCompleteSetup();
if (CassandraDaemon.instance.setupCompleted())
{
CassandraDaemon.instance.start();
}
}
public void reinitializeFromSstableCorruption() throws IllegalNonTransientErrorStateException
{
checkInNonTransientErrorMode();
checkHasExpectedNte(NonTransientError.SSTABLE_CORRUPTION);
StorageService.instance.clearNonTransientErrors();
if (!CassandraDaemon.instance.setupCompleted())
{
// if setup wasn't completed, then an FS error occurred early; we should re-attempt
CassandraDaemon.instance.completeSetupMayThrowSstableException();
}
else
{
StorageService.instance.startGossiping();
StorageService.instance.setOperationModeNormal();
CassandraDaemon.instance.start();
enableAutoCompaction();
}
}
private void checkInNonTransientErrorMode() throws IllegalNonTransientErrorStateException
{
if (!StorageService.instance.inNonTransientErrorMode()) {
logger.error("Attempted to reinitializeFromSstableCorruption when not in NonTransientError mode; "
+ "current mode: " + StorageService.instance.getOperationMode());
throw new NativeAccessMBean.IllegalNonTransientErrorStateException("Can only reinitializeFromSstableCorruption when in NonTransientError mode");
}
}
private void checkHasExpectedNte(NonTransientError error)
{
boolean hasExpectedNte = false;
boolean onlyExpectedNte = true;
for (Map nte : StorageService.instance.getNonTransientErrors()) {
boolean isExpectedNte = error.name().equals(nte.get(StorageServiceMBean.NON_TRANSIENT_ERROR_TYPE_KEY));
hasExpectedNte |= isExpectedNte;
onlyExpectedNte &= isExpectedNte;
}
if (!hasExpectedNte || !onlyExpectedNte) {
logger.error(String.format("Attempted to reinitialize from corruption when there is no known corruption of expected type, "
+ "or there are other corruption NonTransientErrors not of expected type; expected {}", error.name()));
throw new IllegalArgumentException("Can only reinitialize from corruption when there are NonTransientErrors "
+ "only of expected type.");
}
}
}
public interface Server
{
/**
* Start the server.
* This method shoud be able to restart a server stopped through stop().
* Should throw a RuntimeException if the server cannot be started
*/
public void start();
/**
* Stop the server.
* This method should be able to stop server started through start().
* Should throw a RuntimeException if the server cannot be stopped
*/
public void stop();
@VisibleForTesting
public void stopAndAwaitTermination();
/**
* Returns whether the server is currently running.
*/
public boolean isRunning();
}
@SuppressWarnings("restriction")
private static class JmxRegistry extends sun.rmi.registry.RegistryImpl {
private final String lookupName;
private Remote remoteServerStub;
JmxRegistry(final int port,
final RMIClientSocketFactory csf,
RMIServerSocketFactory ssf,
final String lookupName) throws RemoteException
{
super(port, csf, ssf);
this.lookupName = lookupName;
}
@Override
public Remote lookup(String s) throws RemoteException, NotBoundException
{
return lookupName.equals(s) ? remoteServerStub : null;
}
@Override
public void bind(String s, Remote remote) throws RemoteException, AlreadyBoundException, AccessException
{
}
@Override
public void unbind(String s) throws RemoteException, NotBoundException, AccessException {
}
@Override
public void rebind(String s, Remote remote) throws RemoteException, AccessException {
}
@Override
public String[] list() throws RemoteException {
return new String[] {lookupName};
}
public void setRemoteServerStub(Remote remoteServerStub) {
this.remoteServerStub = remoteServerStub;
}
}
}