com.datastax.driver.core.Metrics Maven / Gradle / Ivy

Show more of this group Show more artifacts with this name
Show all versions of cassandra-driver-core Show documentation
A driver for Apache Cassandra 1.2+ that works exclusively with the Cassandra Query Language version 3 (CQL3) and Cassandra's binary protocol.
There is a newer version: 4.0.0
Show newest version
/*
 *      Copyright (C) 2012-2015 DataStax Inc.
 *
 *   Licensed under the Apache License, Version 2.0 (the "License");
 *   you may not use this file except in compliance with the License.
 *   You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *   Unless required by applicable law or agreed to in writing, software
 *   distributed under the License is distributed on an "AS IS" BASIS,
 *   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *   See the License for the specific language governing permissions and
 *   limitations under the License.
 */
package com.datastax.driver.core;

import com.codahale.metrics.*;
import com.datastax.driver.core.policies.SpeculativeExecutionPolicy;

import java.util.HashSet;
import java.util.Set;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.ThreadPoolExecutor;

/**
 * Metrics exposed by the driver.
 * 
 * The metrics exposed by this class use the Metrics
 * library and you should refer its documentation
 * for details on how to handle the exposed metric objects.
 * 

 * By default, metrics are exposed through JMX, which is very useful for
 * development and browsing, but for production environments you may want to
 * have a look at the reporters
 * provided by the Metrics library which could be more efficient/adapted.
 */
public class Metrics {

    private final Cluster.Manager manager;
    private final MetricRegistry registry = new MetricRegistry();
    private final JmxReporter jmxReporter;
    private final Errors errors = new Errors();

    private final Timer requests = registry.timer("requests");

    private final Gauge knownHosts = registry.register("known-hosts", new Gauge() {
        @Override
        public Integer getValue() {
            return manager.metadata.allHosts().size();
        }
    });
    private final Gauge connectedTo = registry.register("connected-to", new Gauge() {
        @Override
        public Integer getValue() {
            Set s = new HashSet();
            for (SessionManager session : manager.sessions)
                s.addAll(session.pools.keySet());
            return s.size();
        }
    });
    private final Gauge openConnections = registry.register("open-connections", new Gauge() {
        @Override
        public Integer getValue() {
            int value = manager.controlConnection.isOpen() ? 1 : 0;
            for (SessionManager session : manager.sessions)
                for (HostConnectionPool pool : session.pools.values())
                    value += pool.opened();
            return value;
        }
    });
    private final Gauge trashedConnections = registry.register("trashed-connections", new Gauge() {
        @Override
        public Integer getValue() {
            int value = 0;
            for (SessionManager session : manager.sessions)
                for (HostConnectionPool pool : session.pools.values())
                    value += pool.trashed();
            return value;
        }
    });

    private final Gauge executorQueueDepth;
    private final Gauge blockingExecutorQueueDepth;
    private final Gauge reconnectionSchedulerQueueSize;
    private final Gauge taskSchedulerQueueSize;

    Metrics(Cluster.Manager manager) {
        this.manager = manager;
        this.executorQueueDepth = registry.register(
                "executor-queue-depth",
                buildQueueSizeGauge(manager.executorQueue));
        this.blockingExecutorQueueDepth = registry.register(
                "blocking-executor-queue-depth",
                buildQueueSizeGauge(manager.blockingExecutorQueue));
        this.reconnectionSchedulerQueueSize = registry.register(
                "reconnection-scheduler-task-count",
                buildQueueSizeGauge(manager.reconnectionExecutorQueue));
        this.taskSchedulerQueueSize = registry.register(
                "task-scheduler-task-count",
                buildQueueSizeGauge(manager.scheduledTasksExecutorQueue));
        if (manager.configuration.getMetricsOptions().isJMXReportingEnabled()) {
            this.jmxReporter = JmxReporter.forRegistry(registry).inDomain(manager.clusterName + "-metrics").build();
            this.jmxReporter.start();
        } else {
            this.jmxReporter = null;
        }
    }

    /**
     * Returns the registry containing all metrics.
     * 

     * The metrics registry allows you to easily use the reporters that ship
     * with Metrics
     * or a custom written one.
     * 

     * For instance, if {@code metrics} is {@code this} object, you could export the
     * metrics to csv files using:
     * 
     *     com.codahale.metrics.CsvReporter.forRegistry(metrics.getRegistry()).build(new File("measurements/")).start(1, TimeUnit.SECONDS);
     * 
     * 
     * If you already have a {@code MetricRegistry} in your application and wish to
     * add the driver's metrics to it, the recommended approach is to use a listener:
     * 
     *     // Your existing registry:
     *     final com.codahale.metrics.MetricRegistry myRegistry = ...
     *
     *     cluster.getMetrics().getRegistry().addListener(new com.codahale.metrics.MetricRegistryListener() {
     *         @Override
     *         public void onGaugeAdded(String name, Gauge<?> gauge) {
     *             if (myRegistry.getNames().contains(name)) {
     *                 // name is already taken, maybe prefix with a namespace
     *                 ...
     *             } else {
     *                 myRegistry.register(name, gauge);
     *             }
     *         }
     *
     *         ... // Implement other methods in a similar fashion
     *     });
     * 
     * Since reporting is handled by your registry, you'll probably also want to disable
     * JMX reporting with {@link Cluster.Builder#withoutJMXReporting()}.
     *
     * @return the registry containing all metrics.
     */
    public MetricRegistry getRegistry() {
        return registry;
    }

    /**
     * Returns metrics on the user requests performed on the Cluster.
     * 
     * This metric exposes
     * 

     * the total number of requests.
     * the requests rate (in requests per seconds), including 1, 5 and 15 minute rates.
     * the mean, min and max latencies, as well as latency at a given percentile.
     * 
     *
     * @return a {@code Timer} metric object exposing the rate and latency for
     * user requests.
     */
    public Timer getRequestsTimer() {
        return requests;
    }

    /**
     * Returns an object grouping metrics related to the errors encountered.
     *
     * @return an object grouping metrics related to the errors encountered.
     */
    public Errors getErrorMetrics() {
        return errors;
    }

    /**
     * Returns the number of Cassandra hosts currently known by the driver (that is
     * whether they are currently considered up or down).
     *
     * @return the number of Cassandra hosts currently known by the driver.
     */
    public Gauge getKnownHosts() {
        return knownHosts;
    }

    /**
     * Returns the number of Cassandra hosts the driver is currently connected to
     * (that is have at least one connection opened to).
     *
     * @return the number of Cassandra hosts the driver is currently connected to.
     */
    public Gauge getConnectedToHosts() {
        return connectedTo;
    }

    /**
     * Returns the total number of currently opened connections to Cassandra hosts.
     *
     * @return The total number of currently opened connections to Cassandra hosts.
     */
    public Gauge getOpenConnections() {
        return openConnections;
    }

    /**
     * Returns the total number of currently "trashed" connections to Cassandra hosts.
     * 
     * When the load to a host decreases, the driver will reclaim some connections in order to save
     * resources. No requests are sent to these connections anymore, but they are kept open for an
     * additional amount of time ({@link PoolingOptions#getIdleTimeoutSeconds()}), in case the load
     * goes up again. This metric counts connections in that state.
     *
     * @return The total number of currently trashed connections to Cassandra hosts.
     */
    public Gauge getTrashedConnections() {
        return trashedConnections;
    }

    /**
     * Returns the number of queued up tasks in the {@link ThreadingOptions#createExecutor(String) main internal executor}.
     * 

     * If the executor's task queue is not accessible – which happens when the executor
     * is not an instance of {@link ThreadPoolExecutor} – then this gauge returns -1.
     *
     * @return The number of queued up tasks in the main internal executor,
     * or -1, if that number is unknown.
     */
    public Gauge getExecutorQueueDepth() {
        return executorQueueDepth;
    }

    /**
     * Returns the number of queued up tasks in the {@link ThreadingOptions#createBlockingExecutor(String) blocking executor}.
     * 

     * If the executor's task queue is not accessible – which happens when the executor
     * is not an instance of {@link ThreadPoolExecutor} – then this gauge returns -1.
     *
     * @return The number of queued up tasks in the blocking executor,
     * or -1, if that number is unknown.
     */
    public Gauge getBlockingExecutorQueueDepth() {
        return blockingExecutorQueueDepth;
    }

    /**
     * Returns the number of queued up tasks in the {@link ThreadingOptions#createReconnectionExecutor(String) reconnection executor}.
     * 

     * A queue size > 0 does not
     * necessarily indicate a backlog as some tasks may not have been scheduled to execute yet.
     * 

     * If the executor's task queue is not accessible – which happens when the executor
     * is not an instance of {@link ThreadPoolExecutor} – then this gauge returns -1.
     *
     * @return The size of the work queue for the reconnection executor,
     * or -1, if that number is unknown.
     */
    public Gauge getReconnectionSchedulerQueueSize() {
        return reconnectionSchedulerQueueSize;
    }

    /**
     * Returns the number of queued up tasks in the {@link ThreadingOptions#createScheduledTasksExecutor(String) scheduled tasks executor}.
     * 

     * A queue size > 0 does not
     * necessarily indicate a backlog as some tasks may not have been scheduled to execute yet.
     * 

     * If the executor's task queue is not accessible – which happens when the executor
     * is not an instance of {@link ThreadPoolExecutor} – then this gauge returns -1.
     *
     * @return The size of the work queue for the scheduled tasks executor,
     * or -1, if that number is unknown.
     */
    public Gauge getTaskSchedulerQueueSize() {
        return taskSchedulerQueueSize;
    }

    void shutdown() {
        if (jmxReporter != null)
            jmxReporter.stop();
    }

    private static Gauge buildQueueSizeGauge(final BlockingQueue queue) {
        if (queue != null) {
            return new Gauge() {
                @Override
                public Integer getValue() {
                    return queue.size();
                }
            };
        } else {
            return new Gauge() {
                @Override
                public Integer getValue() {
                    return -1;
                }
            };
        }
    }

    /**
     * Metrics on errors encountered.
     */
    public class Errors {

        private final Counter connectionErrors = registry.counter("connection-errors");
        private final Counter authenticationErrors = registry.counter("authentication-errors");

        private final Counter writeTimeouts = registry.counter("write-timeouts");
        private final Counter readTimeouts = registry.counter("read-timeouts");
        private final Counter unavailables = registry.counter("unavailables");
        private final Counter clientTimeouts = registry.counter("client-timeouts");

        private final Counter otherErrors = registry.counter("other-errors");

        private final Counter retries = registry.counter("retries");
        private final Counter retriesOnWriteTimeout = registry.counter("retries-on-write-timeout");
        private final Counter retriesOnReadTimeout = registry.counter("retries-on-read-timeout");
        private final Counter retriesOnUnavailable = registry.counter("retries-on-unavailable");
        private final Counter retriesOnClientTimeout = registry.counter("retries-on-client-timeout");
        private final Counter retriesOnConnectionError = registry.counter("retries-on-connection-error");
        private final Counter retriesOnOtherErrors = registry.counter("retries-on-other-errors");

        private final Counter ignores = registry.counter("ignores");
        private final Counter ignoresOnWriteTimeout = registry.counter("ignores-on-write-timeout");
        private final Counter ignoresOnReadTimeout = registry.counter("ignores-on-read-timeout");
        private final Counter ignoresOnUnavailable = registry.counter("ignores-on-unavailable");
        private final Counter ignoresOnClientTimeout = registry.counter("ignores-on-client-timeout");
        private final Counter ignoresOnConnectionError = registry.counter("ignores-on-connection-error");
        private final Counter ignoresOnOtherErrors = registry.counter("ignores-on-other-errors");

        private final Counter speculativeExecutions = registry.counter("speculative-executions");

        /**
         * Returns the number of errors while connecting to Cassandra nodes.
         * 

         * This represents the number of times that a request to a Cassandra node
         * has failed due to a connection problem. This thus also corresponds to
         * how often the driver had to pick a fallback host for a request.
         * 
         * You can expect a few connection errors when a Cassandra node fails
         * (or is stopped) ,but if that number grows continuously you likely have
         * a problem.
         *
         * @return the number of errors while connecting to Cassandra nodes.
         */
        public Counter getConnectionErrors() {
            return connectionErrors;
        }

        /**
         * Returns the number of authentication errors while connecting to Cassandra nodes.
         *
         * @return the number of errors.
         */
        public Counter getAuthenticationErrors() {
            return authenticationErrors;
        }

        /**
         * Returns the number of write requests that returned a timeout (independently
         * of the final decision taken by the {@link com.datastax.driver.core.policies.RetryPolicy}).
         *
         * @return the number of write timeout.
         */
        public Counter getWriteTimeouts() {
            return writeTimeouts;
        }

        /**
         * Returns the number of read requests that returned a timeout (independently
         * of the final decision taken by the {@link com.datastax.driver.core.policies.RetryPolicy}).
         *
         * @return the number of read timeout.
         */
        public Counter getReadTimeouts() {
            return readTimeouts;
        }

        /**
         * Returns the number of requests that returned an unavailable exception
         * (independently of the final decision taken by the
         * {@link com.datastax.driver.core.policies.RetryPolicy}).
         *
         * @return the number of unavailable exceptions.
         */
        public Counter getUnavailables() {
            return unavailables;
        }

        /**
         * Returns the number of requests that timed out before the driver
         * received a response.
         *
         * @return the number of client timeouts.
         */
        public Counter getClientTimeouts() {
            return clientTimeouts;
        }

        /**
         * Returns the number of requests that returned errors not accounted for by
         * another metric. This includes all types of invalid requests.
         *
         * @return the number of requests errors not accounted by another
         * metric.
         */
        public Counter getOthers() {
            return otherErrors;
        }

        /**
         * Returns the number of times a request was retried due to the
         * {@link com.datastax.driver.core.policies.RetryPolicy}.
         *
         * @return the number of times a request was retried due to the
         * {@link com.datastax.driver.core.policies.RetryPolicy}.
         */
        public Counter getRetries() {
            return retries;
        }

        /**
         * Returns the number of times a request was retried due to the
         * {@link com.datastax.driver.core.policies.RetryPolicy}, after a
         * read timed out.
         *
         * @return the number of times a request was retried due to the
         * {@link com.datastax.driver.core.policies.RetryPolicy}, after a
         * read timed out.
         */
        public Counter getRetriesOnReadTimeout() {
            return retriesOnReadTimeout;
        }

        /**
         * Returns the number of times a request was retried due to the
         * {@link com.datastax.driver.core.policies.RetryPolicy}, after a
         * write timed out.
         *
         * @return the number of times a request was retried due to the
         * {@link com.datastax.driver.core.policies.RetryPolicy}, after a
         * write timed out.
         */
        public Counter getRetriesOnWriteTimeout() {
            return retriesOnWriteTimeout;
        }

        /**
         * Returns the number of times a request was retried due to the
         * {@link com.datastax.driver.core.policies.RetryPolicy}, after an
         * unavailable exception.
         *
         * @return the number of times a request was retried due to the
         * {@link com.datastax.driver.core.policies.RetryPolicy}, after an
         * unavailable exception.
         */
        public Counter getRetriesOnUnavailable() {
            return retriesOnUnavailable;
        }

        /**
         * Returns the number of times a request was retried due to the
         * {@link com.datastax.driver.core.policies.RetryPolicy}, after a
         * client timeout.
         *
         * @return the number of times a request was retried due to the
         * {@link com.datastax.driver.core.policies.RetryPolicy}, after a
         * client timeout.
         */
        public Counter getRetriesOnClientTimeout() {
            return retriesOnClientTimeout;
        }

        /**
         * Returns the number of times a request was retried due to the
         * {@link com.datastax.driver.core.policies.RetryPolicy}, after a
         * connection error.
         *
         * @return the number of times a request was retried due to the
         * {@link com.datastax.driver.core.policies.RetryPolicy}, after a
         * connection error.
         */
        public Counter getRetriesOnConnectionError() {
            return retriesOnConnectionError;
        }

        /**
         * Returns the number of times a request was retried due to the
         * {@link com.datastax.driver.core.policies.RetryPolicy}, after an
         * unexpected error.
         *
         * @return the number of times a request was retried due to the
         * {@link com.datastax.driver.core.policies.RetryPolicy}, after an
         * unexpected error.
         */
        public Counter getRetriesOnOtherErrors() {
            return retriesOnOtherErrors;
        }

        /**
         * Returns the number of times a request was ignored
         * due to the {@link com.datastax.driver.core.policies.RetryPolicy}, for
         * example due to timeouts or unavailability.
         *
         * @return the number of times a request was ignored due to the
         * {@link com.datastax.driver.core.policies.RetryPolicy}.
         */
        public Counter getIgnores() {
            return ignores;
        }

        /**
         * Returns the number of times a request was ignored due to the
         * {@link com.datastax.driver.core.policies.RetryPolicy}, after a
         * read timed out.
         *
         * @return the number of times a request was ignored due to the
         * {@link com.datastax.driver.core.policies.RetryPolicy}, after a
         * read timed out.
         */
        public Counter getIgnoresOnReadTimeout() {
            return ignoresOnReadTimeout;
        }

        /**
         * Returns the number of times a request was ignored due to the
         * {@link com.datastax.driver.core.policies.RetryPolicy}, after a
         * write timed out.
         *
         * @return the number of times a request was ignored due to the
         * {@link com.datastax.driver.core.policies.RetryPolicy}, after a
         * write timed out.
         */
        public Counter getIgnoresOnWriteTimeout() {
            return ignoresOnWriteTimeout;
        }

        /**
         * Returns the number of times a request was ignored due to the
         * {@link com.datastax.driver.core.policies.RetryPolicy}, after an
         * unavailable exception.
         *
         * @return the number of times a request was ignored due to the
         * {@link com.datastax.driver.core.policies.RetryPolicy}, after an
         * unavailable exception.
         */
        public Counter getIgnoresOnUnavailable() {
            return ignoresOnUnavailable;
        }

        /**
         * Returns the number of times a request was ignored due to the
         * {@link com.datastax.driver.core.policies.RetryPolicy}, after a
         * client timeout.
         *
         * @return the number of times a request was ignored due to the
         * {@link com.datastax.driver.core.policies.RetryPolicy}, after a
         * client timeout.
         */
        public Counter getIgnoresOnClientTimeout() {
            return ignoresOnClientTimeout;
        }

        /**
         * Returns the number of times a request was ignored due to the
         * {@link com.datastax.driver.core.policies.RetryPolicy}, after a
         * connection error.
         *
         * @return the number of times a request was ignored due to the
         * {@link com.datastax.driver.core.policies.RetryPolicy}, after a
         * connection error.
         */
        public Counter getIgnoresOnConnectionError() {
            return ignoresOnConnectionError;
        }

        /**
         * Returns the number of times a request was ignored due to the
         * {@link com.datastax.driver.core.policies.RetryPolicy}, after an
         * unexpected error.
         *
         * @return the number of times a request was ignored due to the
         * {@link com.datastax.driver.core.policies.RetryPolicy}, after an
         * unexpected error.
         */
        public Counter getIgnoresOnOtherErrors() {
            return ignoresOnOtherErrors;
        }

        /**
         * Returns the number of times a speculative execution was started
         * because a previous execution did not complete within the delay
         * specified by {@link SpeculativeExecutionPolicy}.
         *
         * @return the number of speculative executions.
         */
        public Counter getSpeculativeExecutions() {
            return speculativeExecutions;
        }
    }
}