All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.cassandra.service.StartupChecks Maven / Gradle / Ivy

There is a newer version: 2.2.18-2.2.18-1.170.1-rc1
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.cassandra.service;

import java.io.File;
import java.io.IOException;
import java.lang.management.ManagementFactory;
import java.lang.management.RuntimeMXBean;
import java.nio.file.*;
import java.nio.file.attribute.BasicFileAttributes;
import java.util.*;

import com.google.common.base.Joiner;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Iterables;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import org.apache.cassandra.config.CFMetaData;
import org.apache.cassandra.config.DatabaseDescriptor;
import org.apache.cassandra.config.Schema;
import org.apache.cassandra.db.*;
import org.apache.cassandra.exceptions.ConfigurationException;
import org.apache.cassandra.exceptions.StartupException;
import org.apache.cassandra.io.sstable.Descriptor;
import org.apache.cassandra.io.util.FileUtils;
import org.apache.cassandra.utils.*;

/**
 * Verifies that the system and environment is in a fit state to be started.
 * Used in CassandraDaemon#setup() to check various settings and invariants.
 *
 * Each individual test is modelled as an implementation of StartupCheck, these are run
 * at the start of CassandraDaemon#setup() before any local state is mutated. The default
 * checks are a mix of informational tests (inspectJvmOptions), initialization
 * (initSigarLibrary, checkCacheServiceInitialization) and invariant checking
 * (checkValidLaunchDate, checkSystemKeyspaceState, checkSSTablesFormat).
 *
 * In addition, if checkSystemKeyspaceState determines that the release version has
 * changed since last startup (i.e. the node has been upgraded) it snapshots the system
 * keyspace to make it easier to back out if necessary.
 *
 * If any check reports a failure, then the setup method exits with an error (after
 * logging any output from the tests). If all tests report success, setup can continue.
 * We should be careful in future to ensure anything which mutates local state (such as
 * writing new sstables etc) only happens after we've verified the initial setup.
 */
public class StartupChecks
{
    private static final Logger logger = LoggerFactory.getLogger(StartupChecks.class);

    // List of checks to run before starting up. If any test reports failure, startup will be halted.
    private final Map preFlightChecks = new LinkedHashMap<>();

    // The default set of pre-flight checks to run. Order is somewhat significant in that we probably
    // always want the system keyspace check run last, as this actually loads the schema for that
    // keyspace. All other checks should not require any schema initialization.
    private final List> DEFAULT_TESTS = ImmutableList.of(
        new AbstractMap.SimpleEntry<>("checkJemalloc", checkJemalloc),
        new AbstractMap.SimpleEntry<>("checkValidLaunchDate", checkValidLaunchDate),
        new AbstractMap.SimpleEntry<>("checkJMXPorts", checkJMXPorts),
        new AbstractMap.SimpleEntry<>("inspectJvmOptions", inspectJvmOptions),
        new AbstractMap.SimpleEntry<>("checkJnaInitialization", checkJnaInitialization),
        new AbstractMap.SimpleEntry<>("initSigarLibrary", initSigarLibrary),
        new AbstractMap.SimpleEntry<>("checkDataDirs", checkDataDirs),
        new AbstractMap.SimpleEntry<>("checkSSTablesFormat", checkSSTablesFormat),
        new AbstractMap.SimpleEntry<>("checkSystemKeyspaceState", checkSystemKeyspaceState),
        new AbstractMap.SimpleEntry<>("checkDatacenter", checkDatacenter),
        new AbstractMap.SimpleEntry<>("checkRack", checkRack),
        new AbstractMap.SimpleEntry<>("checkIp", checkIp));

    public StartupChecks withDefaultTests()
    {
        for (Map.Entry test : DEFAULT_TESTS)
        {
            preFlightChecks.put(test.getKey(), test.getValue());
        }
        return this;
    }

    /**
     * Add system test to be run before schema is loaded during startup
     * @param test the system test to include
     */
    public StartupChecks withTest(String name, StartupCheck test)
    {
        preFlightChecks.put(name, test);
        return this;
    }

    /**
     * Run the configured tests and return a report detailing the results.
     * @throws org.apache.cassandra.exceptions.StartupException if any test determines that the
     * system is not in an valid state to startup
     */
    public void verify() throws StartupException
    {
        for (Map.Entry check : preFlightChecks.entrySet())
        {
            logger.debug("Executing preflight check {}", check.getKey());
            check.getValue().execute();
            logger.debug("Preflight check {} completed", check.getKey());
        }
    }

    public static final StartupCheck checkJemalloc = new StartupCheck()
    {
        public void execute() throws StartupException
        {
            if (FBUtilities.isWindows())
                return;
            String jemalloc = System.getProperty("cassandra.libjemalloc");
            if (jemalloc == null)
                logger.warn("jemalloc shared library could not be preloaded to speed up memory allocations");
            else if ("-".equals(jemalloc))
                logger.info("jemalloc preload explicitly disabled");
            else
                logger.info("jemalloc seems to be preloaded from {}", jemalloc);
        }
    };

    public static final StartupCheck checkValidLaunchDate = new StartupCheck()
    {
        /**
         * The earliest legit timestamp a casandra instance could have ever launched.
         * Date roughly taken from http://perspectives.mvdirona.com/2008/07/12/FacebookReleasesCassandraAsOpenSource.aspx
         * We use this to ensure the system clock is at least somewhat correct at startup.
         */
        private static final long EARLIEST_LAUNCH_DATE = 1215820800000L;
        public void execute() throws StartupException
        {
            long now = System.currentTimeMillis();
            if (now < EARLIEST_LAUNCH_DATE)
                throw new StartupException(1, String.format("current machine time is %s, but that is seemingly incorrect. exiting now.",
                                                            new Date(now).toString()));
        }
    };

    public static final StartupCheck checkJMXPorts = new StartupCheck()
    {
        public void execute()
        {
            String jmxPort = System.getProperty("com.sun.management.jmxremote.port");
            if (jmxPort == null)
            {
                logger.warn("JMX is not enabled to receive remote connections. Please see cassandra-env.sh for more info.");
                jmxPort = System.getProperty("cassandra.jmx.local.port");
                if (jmxPort == null)
                    logger.error("cassandra.jmx.local.port missing from cassandra-env.sh, unable to start local JMX service.");
            }
            else
            {
                logger.info("JMX is enabled to receive remote connections on port: " + jmxPort);
            }
        }
    };

    public static final StartupCheck inspectJvmOptions = new StartupCheck()
    {
        public void execute() throws StartupException
        {
            // log warnings for different kinds of sub-optimal JVMs.  tldr use 64-bit Oracle >= 1.6u32
            if (!DatabaseDescriptor.hasLargeAddressSpace())
                logger.warn("32bit JVM detected.  It is recommended to run Cassandra on a 64bit JVM for better performance.");

            String javaVmName = System.getProperty("java.vm.name");
            if (javaVmName.contains("OpenJDK"))
            {
                // There is essentially no QA done on OpenJDK builds, and
                // clusters running OpenJDK have seen many heap and load issues.
                logger.warn("OpenJDK is not recommended. Please upgrade to the newest Oracle Java release");
                checkOutOfMemoryHandling();
            }
            else if (!javaVmName.contains("HotSpot"))
            {
                logger.warn("Non-Oracle JVM detected.  Some features, such as immediate unmap of compacted SSTables, may not work as intended");
            }
            else
            {
                checkOutOfMemoryHandling();
            }
        }

        /**
         * Checks that the JVM is configured to handle OutOfMemoryError
         */
        private void checkOutOfMemoryHandling() throws StartupException
        {
            if (JavaUtils.supportExitOnOutOfMemory(System.getProperty("java.version")))
            {
                if (!jvmOptionsContainsOneOf("-XX:OnOutOfMemoryError=", "-XX:+ExitOnOutOfMemoryError", "-XX:+CrashOnOutOfMemoryError"))
                    throw new StartupException(1, "The JVM is not configured to stop on OutOfMemoryError which can cause data corruption."
                                + " Use one of the following JVM options to configure the behavior on OutOfMemoryError: "
                                + " -XX:+ExitOnOutOfMemoryError, -XX:+CrashOnOutOfMemoryError, or -XX:OnOutOfMemoryError=\";\"");
            }
            else
            {
                if (!jvmOptionsContainsOneOf("-XX:OnOutOfMemoryError="))
                    throw new StartupException(1, "The JVM is not configured to stop on OutOfMemoryError which can cause data corruption."
                            + " Either upgrade your JRE to a version greater or equal to 8u92 and use -XX:+ExitOnOutOfMemoryError/-XX:+CrashOnOutOfMemoryError"
                            + " or use -XX:OnOutOfMemoryError=\";\" on your current JRE.");
            }
        }

        /**
         * Checks if one of the specified options is being used.
         * @param optionNames The name of the options to check
         * @return {@code true} if one of the specified options is being used, {@code false} otherwise.
         */
        private boolean jvmOptionsContainsOneOf(String... optionNames)
        {
            RuntimeMXBean runtimeMxBean = ManagementFactory.getRuntimeMXBean();
            List inputArguments = runtimeMxBean.getInputArguments();
            for (String argument : inputArguments)
            {
                for (String optionName : optionNames)
                    if (argument.startsWith(optionName))
                        return true;
            }
            return false;
        }
    };

    public static final StartupCheck checkJnaInitialization = new StartupCheck()
    {
        public void execute() throws StartupException
        {
            // Fail-fast if JNA is not available or failing to initialize properly
            if (!CLibrary.jnaAvailable())
                throw new StartupException(3, "JNA failing to initialize properly. ");
        }
    };

    public static final StartupCheck initSigarLibrary = new StartupCheck()
    {
        public void execute()
        {
            SigarLibrary.instance.warnIfRunningInDegradedMode();
        }
    };

    public static final StartupCheck checkDataDirs = new StartupCheck()
    {
        public void execute() throws StartupException
        {
            // check all directories(data, commitlog, saved cache) for existence and permission
            Iterable dirs = Iterables.concat(Arrays.asList(DatabaseDescriptor.getAllDataFileLocations()),
                                                     Arrays.asList(DatabaseDescriptor.getCommitLogLocation(),
                                                                   DatabaseDescriptor.getSavedCachesLocation()));
            for (String dataDir : dirs)
            {
                logger.debug("Checking directory {}", dataDir);
                File dir = new File(dataDir);

                // check that directories exist.
                if (!dir.exists())
                {
                    logger.warn("Directory {} doesn't exist", dataDir);
                    // if they don't, failing their creation, stop cassandra.
                    if (!dir.mkdirs())
                        throw new StartupException(3, "Has no permission to create directory "+ dataDir);
                }

                // if directories exist verify their permissions
                if (!Directories.verifyFullPermissions(dir, dataDir))
                    throw new StartupException(3, "Insufficient permissions on directory " + dataDir);

            }
        }
    };

    public static final StartupCheck checkSSTablesFormat = new StartupCheck()
    {
        public void execute() throws StartupException
        {
            final Set invalid = new HashSet<>();
            final Set nonSSTablePaths = new HashSet<>();
            nonSSTablePaths.add(FileUtils.getCanonicalPath(DatabaseDescriptor.getCommitLogLocation()));
            nonSSTablePaths.add(FileUtils.getCanonicalPath(DatabaseDescriptor.getSavedCachesLocation()));

            FileVisitor sstableVisitor = new SimpleFileVisitor()
            {
                public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException
                {
                    logger.trace("Checking SSTable file {}", file.toString());

                    if (!file.toString().endsWith(".db"))
                        return FileVisitResult.CONTINUE;

                    try
                    {
                        if (!Descriptor.fromFilename(file.toString()).isCompatible())
                            invalid.add(file.toString());
                    }
                    catch (Exception e)
                    {
                        invalid.add(file.toString());
                    }
                    return FileVisitResult.CONTINUE;
                }

                public FileVisitResult preVisitDirectory(Path dir, BasicFileAttributes attrs) throws IOException
                {
                    String name = dir.getFileName().toString();
                    return (name.equals("snapshots")
                            || name.equals("backups")
                            || nonSSTablePaths.contains(dir.toFile().getCanonicalPath()))
                           ? FileVisitResult.SKIP_SUBTREE
                           : FileVisitResult.CONTINUE;
                }
            };

            for (String dataDir : DatabaseDescriptor.getAllDataFileLocations())
            {
                try
                {
                    Files.walkFileTree(Paths.get(dataDir), sstableVisitor);
                }
                catch (IOException e)
                {
                    throw new StartupException(3, "Unable to verify sstable files on disk", e);
                }
            }

            if (!invalid.isEmpty())
                throw new StartupException(3, String.format("Detected unreadable sstables %s, please check " +
                                                            "NEWS.txt and ensure that you have upgraded through " +
                                                            "all required intermediate versions, running " +
                                                            "upgradesstables",
                                                            Joiner.on(",").join(invalid)));

        }
    };

    public static final StartupCheck checkSystemKeyspaceState = new StartupCheck()
    {
        public void execute() throws StartupException
        {
            // check the system keyspace to keep user from shooting self in foot by changing partitioner, cluster name, etc.
            // we do a one-off scrub of the system keyspace first; we can't load the list of the rest of the keyspaces,
            // until system keyspace is opened.

            for (CFMetaData cfm : Schema.instance.getKeyspaceMetaData(SystemKeyspace.NAME).values())
                ColumnFamilyStore.scrubDataDirectories(cfm);

            try
            {
                SystemKeyspace.checkHealth();
            }
            catch (ConfigurationException e)
            {
                throw new StartupException(100, "Fatal exception during initialization", e);
            }
        }
    };

    public static final StartupCheck checkDatacenter = new StartupCheck()
    {
        public void execute() throws StartupException
        {
            if (!Boolean.getBoolean("cassandra.ignore_dc"))
            {
                String storedDc = SystemKeyspace.getDatacenter();
                if (storedDc != null)
                {
                    String currentDc = DatabaseDescriptor.getEndpointSnitch().getDatacenter(FBUtilities.getBroadcastAddress());
                    if (!storedDc.equals(currentDc))
                    {
                        String formatMessage = "Cannot start node if snitch's data center (%s) differs from previous data center (%s). " +
                                               "Please fix the snitch configuration, decommission and rebootstrap this node or use the flag -Dcassandra.ignore_dc=true.";

                        throw new StartupException(100, String.format(formatMessage, currentDc, storedDc));
                    }
                }
            }
        }
    };

    public static final StartupCheck checkRack = new StartupCheck()
    {
        public void execute() throws StartupException
        {
            if (!Boolean.getBoolean("cassandra.ignore_rack"))
            {
                String storedRack = SystemKeyspace.getRack();
                if (storedRack != null)
                {
                    String currentRack = DatabaseDescriptor.getEndpointSnitch().getRack(FBUtilities.getBroadcastAddress());
                    if (!storedRack.equals(currentRack))
                    {
                        String formatMessage = "Cannot start node if snitch's rack (%s) differs from previous rack (%s). " +
                                               "Please fix the snitch configuration, decommission and rebootstrap this node or use the flag -Dcassandra.ignore_rack=true.";

                        throw new StartupException(100, String.format(formatMessage, currentRack, storedRack));
                    }
                }
            }
        }
    };

    public static final StartupCheck checkIp = new StartupCheck()
    {
        public void execute() throws StartupException
        {
            String restrictedIp = System.getProperty("palantir_cassandra.restricted_ip", null);
            if (restrictedIp != null)
            {
                String currentIp = FBUtilities.getLocalAddress().getHostAddress();
                if (currentIp.equals(restrictedIp))
                {
                    {
                        String message = "Cannot start as current IP {} matches palantir_cassandra.restricted_ip. " +
                                               "If running in Kubernetes, delete the pod to give it a different IP";

                        throw new StartupException(100, String.format(message, restrictedIp));
                    }
                }
            }
        }
    };
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy