All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.datastax.driver.core.Metrics Maven / Gradle / Ivy

/*
 * Copyright DataStax, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.datastax.driver.core;

import com.codahale.metrics.Counter;
import com.codahale.metrics.Gauge;
import com.codahale.metrics.JmxReporter;
import com.codahale.metrics.Meter;
import com.codahale.metrics.MetricRegistry;
import com.codahale.metrics.Timer;
import com.datastax.driver.core.policies.SpeculativeExecutionPolicy;
import java.util.HashSet;
import java.util.Set;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.ThreadPoolExecutor;

/**
 * Metrics exposed by the driver.
 *
 * 

The metrics exposed by this class use the Metrics * library and you should refer its documentation for details on how to handle the exposed metric * objects. * *

By default, metrics are exposed through JMX, which is very useful for development and * browsing, but for production environments you may want to have a look at the reporters provided by the * Metrics library which could be more efficient/adapted. */ public class Metrics { private final Cluster.Manager manager; private final MetricRegistry registry = new MetricRegistry(); private final JmxReporter jmxReporter; private final Errors errors = new Errors(); private final Timer requests = registry.timer("requests"); private final Meter bytesSent = registry.meter("bytes-sent"); private final Meter bytesReceived = registry.meter("bytes-received"); private final Gauge knownHosts = registry.register( "known-hosts", new Gauge() { @Override public Integer getValue() { return manager.metadata.allHosts().size(); } }); private final Gauge connectedTo = registry.register( "connected-to", new Gauge() { @Override public Integer getValue() { Set s = new HashSet(); for (SessionManager session : manager.sessions) s.addAll(session.pools.keySet()); return s.size(); } }); private final Gauge openConnections = registry.register( "open-connections", new Gauge() { @Override public Integer getValue() { int value = manager.controlConnection.isOpen() ? 1 : 0; for (SessionManager session : manager.sessions) for (HostConnectionPool pool : session.pools.values()) value += pool.opened(); return value; } }); private final Gauge trashedConnections = registry.register( "trashed-connections", new Gauge() { @Override public Integer getValue() { int value = 0; for (SessionManager session : manager.sessions) for (HostConnectionPool pool : session.pools.values()) value += pool.trashed(); return value; } }); private final Gauge inFlightRequests = registry.register( "inflight-requests", new Gauge() { @Override public Integer getValue() { int value = 0; for (SessionManager session : manager.sessions) for (HostConnectionPool pool : session.pools.values()) value += pool.totalInFlight.get(); return value; } }); private final Gauge requestQueueDepth = registry.register( "request-queue-depth", new Gauge() { @Override public Integer getValue() { int value = 0; for (SessionManager session : manager.sessions) for (HostConnectionPool pool : session.pools.values()) value += pool.pendingBorrowCount.get(); return value; } }); private final Gauge executorQueueDepth; private final Gauge blockingExecutorQueueDepth; private final Gauge reconnectionSchedulerQueueSize; private final Gauge taskSchedulerQueueSize; Metrics(Cluster.Manager manager) { this.manager = manager; this.executorQueueDepth = registry.register("executor-queue-depth", buildQueueSizeGauge(manager.executorQueue)); this.blockingExecutorQueueDepth = registry.register( "blocking-executor-queue-depth", buildQueueSizeGauge(manager.blockingExecutorQueue)); this.reconnectionSchedulerQueueSize = registry.register( "reconnection-scheduler-task-count", buildQueueSizeGauge(manager.reconnectionExecutorQueue)); this.taskSchedulerQueueSize = registry.register( "task-scheduler-task-count", buildQueueSizeGauge(manager.scheduledTasksExecutorQueue)); if (manager.configuration.getMetricsOptions().isJMXReportingEnabled()) { this.jmxReporter = JmxReporter.forRegistry(registry).inDomain(manager.clusterName + "-metrics").build(); this.jmxReporter.start(); } else { this.jmxReporter = null; } } /** * Returns the registry containing all metrics. * *

The metrics registry allows you to easily use the reporters that ship with Metrics or a custom * written one. * *

For instance, if {@code metrics} is {@code this} object, you could export the metrics to csv * files using: * *

   *     com.codahale.metrics.CsvReporter.forRegistry(metrics.getRegistry()).build(new File("measurements/")).start(1, TimeUnit.SECONDS);
   * 
* *

If you already have a {@code MetricRegistry} in your application and wish to add the * driver's metrics to it, the recommended approach is to use a listener: * *

   *     // Your existing registry:
   *     final com.codahale.metrics.MetricRegistry myRegistry = ...
   *
   *     cluster.getMetrics().getRegistry().addListener(new com.codahale.metrics.MetricRegistryListener() {
   *         @Override
   *         public void onGaugeAdded(String name, Gauge<?> gauge) {
   *             if (myRegistry.getNames().contains(name)) {
   *                 // name is already taken, maybe prefix with a namespace
   *                 ...
   *             } else {
   *                 myRegistry.register(name, gauge);
   *             }
   *         }
   *
   *         ... // Implement other methods in a similar fashion
   *     });
   * 
* * Since reporting is handled by your registry, you'll probably also want to disable JMX reporting * with {@link Cluster.Builder#withoutJMXReporting()}. * * @return the registry containing all metrics. */ public MetricRegistry getRegistry() { return registry; } /** * Returns metrics on the user requests performed on the Cluster. * *

This metric exposes * *

    *
  • the total number of requests. *
  • the requests rate (in requests per seconds), including 1, 5 and 15 minute rates. *
  • the mean, min and max latencies, as well as latency at a given percentile. *
* * @return a {@code Timer} metric object exposing the rate and latency for user requests. */ public Timer getRequestsTimer() { return requests; } /** * Returns an object grouping metrics related to the errors encountered. * * @return an object grouping metrics related to the errors encountered. */ public Errors getErrorMetrics() { return errors; } /** * Returns the number of Cassandra hosts currently known by the driver (that is whether they are * currently considered up or down). * * @return the number of Cassandra hosts currently known by the driver. */ public Gauge getKnownHosts() { return knownHosts; } /** * Returns the number of Cassandra hosts the driver is currently connected to (that is have at * least one connection opened to). * * @return the number of Cassandra hosts the driver is currently connected to. */ public Gauge getConnectedToHosts() { return connectedTo; } /** * Returns the total number of currently opened connections to Cassandra hosts. * * @return The total number of currently opened connections to Cassandra hosts. */ public Gauge getOpenConnections() { return openConnections; } /** * Returns the total number of currently "trashed" connections to Cassandra hosts. * *

When the load to a host decreases, the driver will reclaim some connections in order to save * resources. No requests are sent to these connections anymore, but they are kept open for an * additional amount of time ({@link PoolingOptions#getIdleTimeoutSeconds()}), in case the load * goes up again. This metric counts connections in that state. * * @return The total number of currently trashed connections to Cassandra hosts. */ public Gauge getTrashedConnections() { return trashedConnections; } /** * Returns the total number of in flight requests to Cassandra hosts. * * @return The total number of in flight requests to Cassandra hosts. */ public Gauge getInFlightRequests() { return inFlightRequests; } /** * Returns the total number of enqueued requests on all Cassandra hosts. * * @see Session.State#getRequestQueueDepth(Host) * @return The total number of enqueued requests on all Cassandra hosts. */ public Gauge getRequestQueueDepth() { return requestQueueDepth; } /** * Returns the number of queued up tasks in the {@link ThreadingOptions#createExecutor(String) * main internal executor}. * *

If the executor's task queue is not accessible – which happens when the executor is not an * instance of {@link ThreadPoolExecutor} – then this gauge returns -1. * * @return The number of queued up tasks in the main internal executor, or -1, if that number is * unknown. */ public Gauge getExecutorQueueDepth() { return executorQueueDepth; } /** * Returns the number of queued up tasks in the {@link * ThreadingOptions#createBlockingExecutor(String) blocking executor}. * *

If the executor's task queue is not accessible – which happens when the executor is not an * instance of {@link ThreadPoolExecutor} – then this gauge returns -1. * * @return The number of queued up tasks in the blocking executor, or -1, if that number is * unknown. */ public Gauge getBlockingExecutorQueueDepth() { return blockingExecutorQueueDepth; } /** * Returns the number of queued up tasks in the {@link * ThreadingOptions#createReconnectionExecutor(String) reconnection executor}. * *

A queue size > 0 does not necessarily indicate a backlog as some tasks may not have been * scheduled to execute yet. * *

If the executor's task queue is not accessible – which happens when the executor is not an * instance of {@link ThreadPoolExecutor} – then this gauge returns -1. * * @return The size of the work queue for the reconnection executor, or -1, if that number is * unknown. */ public Gauge getReconnectionSchedulerQueueSize() { return reconnectionSchedulerQueueSize; } /** * Returns the number of queued up tasks in the {@link * ThreadingOptions#createScheduledTasksExecutor(String) scheduled tasks executor}. * *

A queue size > 0 does not necessarily indicate a backlog as some tasks may not have been * scheduled to execute yet. * *

If the executor's task queue is not accessible – which happens when the executor is not an * instance of {@link ThreadPoolExecutor} – then this gauge returns -1. * * @return The size of the work queue for the scheduled tasks executor, or -1, if that number is * unknown. */ public Gauge getTaskSchedulerQueueSize() { return taskSchedulerQueueSize; } /** * Returns the number of bytes sent so far. * *

Note that this measures unencrypted traffic, even if SSL is enabled (the probe is inserted * before SSL handlers in the Netty pipeline). In practice, SSL overhead should be negligible * after the initial handshake. * * @return the number of bytes sent so far. */ public Meter getBytesSent() { return bytesSent; } /** * Returns the number of bytes received so far. * *

Note that this measures unencrypted traffic, even if SSL is enabled (the probe is inserted * before SSL handlers in the Netty pipeline). In practice, SSL overhead should be negligible * after the initial handshake. * * @return the number of bytes received so far. */ public Meter getBytesReceived() { return bytesReceived; } void shutdown() { if (jmxReporter != null) jmxReporter.stop(); } private static Gauge buildQueueSizeGauge(final BlockingQueue queue) { if (queue != null) { return new Gauge() { @Override public Integer getValue() { return queue.size(); } }; } else { return new Gauge() { @Override public Integer getValue() { return -1; } }; } } /** Metrics on errors encountered. */ public class Errors { private final Counter connectionErrors = registry.counter("connection-errors"); private final Counter authenticationErrors = registry.counter("authentication-errors"); private final Counter writeTimeouts = registry.counter("write-timeouts"); private final Counter readTimeouts = registry.counter("read-timeouts"); private final Counter unavailables = registry.counter("unavailables"); private final Counter clientTimeouts = registry.counter("client-timeouts"); private final Counter otherErrors = registry.counter("other-errors"); private final Counter retries = registry.counter("retries"); private final Counter retriesOnWriteTimeout = registry.counter("retries-on-write-timeout"); private final Counter retriesOnReadTimeout = registry.counter("retries-on-read-timeout"); private final Counter retriesOnUnavailable = registry.counter("retries-on-unavailable"); private final Counter retriesOnClientTimeout = registry.counter("retries-on-client-timeout"); private final Counter retriesOnConnectionError = registry.counter("retries-on-connection-error"); private final Counter retriesOnOtherErrors = registry.counter("retries-on-other-errors"); private final Counter ignores = registry.counter("ignores"); private final Counter ignoresOnWriteTimeout = registry.counter("ignores-on-write-timeout"); private final Counter ignoresOnReadTimeout = registry.counter("ignores-on-read-timeout"); private final Counter ignoresOnUnavailable = registry.counter("ignores-on-unavailable"); private final Counter ignoresOnClientTimeout = registry.counter("ignores-on-client-timeout"); private final Counter ignoresOnConnectionError = registry.counter("ignores-on-connection-error"); private final Counter ignoresOnOtherErrors = registry.counter("ignores-on-other-errors"); private final Counter speculativeExecutions = registry.counter("speculative-executions"); /** * Returns the number of errors while connecting to Cassandra nodes. * *

This represents the number of times that a request to a Cassandra node has failed due to a * connection problem. This thus also corresponds to how often the driver had to pick a fallback * host for a request. * *

You can expect a few connection errors when a Cassandra node fails (or is stopped) ,but if * that number grows continuously you likely have a problem. * * @return the number of errors while connecting to Cassandra nodes. */ public Counter getConnectionErrors() { return connectionErrors; } /** * Returns the number of authentication errors while connecting to Cassandra nodes. * * @return the number of errors. */ public Counter getAuthenticationErrors() { return authenticationErrors; } /** * Returns the number of write requests that returned a timeout (independently of the final * decision taken by the {@link com.datastax.driver.core.policies.RetryPolicy}). * * @return the number of write timeout. */ public Counter getWriteTimeouts() { return writeTimeouts; } /** * Returns the number of read requests that returned a timeout (independently of the final * decision taken by the {@link com.datastax.driver.core.policies.RetryPolicy}). * * @return the number of read timeout. */ public Counter getReadTimeouts() { return readTimeouts; } /** * Returns the number of requests that returned an unavailable exception (independently of the * final decision taken by the {@link com.datastax.driver.core.policies.RetryPolicy}). * * @return the number of unavailable exceptions. */ public Counter getUnavailables() { return unavailables; } /** * Returns the number of requests that timed out before the driver received a response. * * @return the number of client timeouts. */ public Counter getClientTimeouts() { return clientTimeouts; } /** * Returns the number of requests that returned errors not accounted for by another metric. This * includes all types of invalid requests. * * @return the number of requests errors not accounted by another metric. */ public Counter getOthers() { return otherErrors; } /** * Returns the number of times a request was retried due to the {@link * com.datastax.driver.core.policies.RetryPolicy}. * * @return the number of times a request was retried due to the {@link * com.datastax.driver.core.policies.RetryPolicy}. */ public Counter getRetries() { return retries; } /** * Returns the number of times a request was retried due to the {@link * com.datastax.driver.core.policies.RetryPolicy}, after a read timed out. * * @return the number of times a request was retried due to the {@link * com.datastax.driver.core.policies.RetryPolicy}, after a read timed out. */ public Counter getRetriesOnReadTimeout() { return retriesOnReadTimeout; } /** * Returns the number of times a request was retried due to the {@link * com.datastax.driver.core.policies.RetryPolicy}, after a write timed out. * * @return the number of times a request was retried due to the {@link * com.datastax.driver.core.policies.RetryPolicy}, after a write timed out. */ public Counter getRetriesOnWriteTimeout() { return retriesOnWriteTimeout; } /** * Returns the number of times a request was retried due to the {@link * com.datastax.driver.core.policies.RetryPolicy}, after an unavailable exception. * * @return the number of times a request was retried due to the {@link * com.datastax.driver.core.policies.RetryPolicy}, after an unavailable exception. */ public Counter getRetriesOnUnavailable() { return retriesOnUnavailable; } /** * Returns the number of times a request was retried due to the {@link * com.datastax.driver.core.policies.RetryPolicy}, after a client timeout. * * @return the number of times a request was retried due to the {@link * com.datastax.driver.core.policies.RetryPolicy}, after a client timeout. */ public Counter getRetriesOnClientTimeout() { return retriesOnClientTimeout; } /** * Returns the number of times a request was retried due to the {@link * com.datastax.driver.core.policies.RetryPolicy}, after a connection error. * * @return the number of times a request was retried due to the {@link * com.datastax.driver.core.policies.RetryPolicy}, after a connection error. */ public Counter getRetriesOnConnectionError() { return retriesOnConnectionError; } /** * Returns the number of times a request was retried due to the {@link * com.datastax.driver.core.policies.RetryPolicy}, after an unexpected error. * * @return the number of times a request was retried due to the {@link * com.datastax.driver.core.policies.RetryPolicy}, after an unexpected error. */ public Counter getRetriesOnOtherErrors() { return retriesOnOtherErrors; } /** * Returns the number of times a request was ignored due to the {@link * com.datastax.driver.core.policies.RetryPolicy}, for example due to timeouts or * unavailability. * * @return the number of times a request was ignored due to the {@link * com.datastax.driver.core.policies.RetryPolicy}. */ public Counter getIgnores() { return ignores; } /** * Returns the number of times a request was ignored due to the {@link * com.datastax.driver.core.policies.RetryPolicy}, after a read timed out. * * @return the number of times a request was ignored due to the {@link * com.datastax.driver.core.policies.RetryPolicy}, after a read timed out. */ public Counter getIgnoresOnReadTimeout() { return ignoresOnReadTimeout; } /** * Returns the number of times a request was ignored due to the {@link * com.datastax.driver.core.policies.RetryPolicy}, after a write timed out. * * @return the number of times a request was ignored due to the {@link * com.datastax.driver.core.policies.RetryPolicy}, after a write timed out. */ public Counter getIgnoresOnWriteTimeout() { return ignoresOnWriteTimeout; } /** * Returns the number of times a request was ignored due to the {@link * com.datastax.driver.core.policies.RetryPolicy}, after an unavailable exception. * * @return the number of times a request was ignored due to the {@link * com.datastax.driver.core.policies.RetryPolicy}, after an unavailable exception. */ public Counter getIgnoresOnUnavailable() { return ignoresOnUnavailable; } /** * Returns the number of times a request was ignored due to the {@link * com.datastax.driver.core.policies.RetryPolicy}, after a client timeout. * * @return the number of times a request was ignored due to the {@link * com.datastax.driver.core.policies.RetryPolicy}, after a client timeout. */ public Counter getIgnoresOnClientTimeout() { return ignoresOnClientTimeout; } /** * Returns the number of times a request was ignored due to the {@link * com.datastax.driver.core.policies.RetryPolicy}, after a connection error. * * @return the number of times a request was ignored due to the {@link * com.datastax.driver.core.policies.RetryPolicy}, after a connection error. */ public Counter getIgnoresOnConnectionError() { return ignoresOnConnectionError; } /** * Returns the number of times a request was ignored due to the {@link * com.datastax.driver.core.policies.RetryPolicy}, after an unexpected error. * * @return the number of times a request was ignored due to the {@link * com.datastax.driver.core.policies.RetryPolicy}, after an unexpected error. */ public Counter getIgnoresOnOtherErrors() { return ignoresOnOtherErrors; } /** * Returns the number of times a speculative execution was started because a previous execution * did not complete within the delay specified by {@link SpeculativeExecutionPolicy}. * * @return the number of speculative executions. */ public Counter getSpeculativeExecutions() { return speculativeExecutions; } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy