com.couchbase.client.core.cnc.tracing.ThresholdLoggingTracer Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of core-io Show documentation
Show all versions of core-io Show documentation
The official Couchbase JVM Core IO Library
/*
* Copyright (c) 2019 Couchbase, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.couchbase.client.core.cnc.tracing;
import com.couchbase.client.core.cnc.EventBus;
import com.couchbase.client.core.cnc.RequestSpan;
import com.couchbase.client.core.cnc.RequestTracer;
import com.couchbase.client.core.cnc.TracingIdentifiers;
import com.couchbase.client.core.cnc.events.tracing.OverThresholdRequestsRecordedEvent;
import com.couchbase.client.core.deps.org.jctools.queues.MpscArrayQueue;
import com.couchbase.client.core.env.ThresholdLoggingTracerConfig;
import com.couchbase.client.core.error.TracerException;
import com.couchbase.client.core.msg.Request;
import com.couchbase.client.core.service.ServiceType;
import com.couchbase.client.core.transaction.components.CoreTransactionRequest;
import com.couchbase.client.core.util.HostAndPort;
import com.couchbase.client.core.util.NanoTimestamp;
import reactor.core.publisher.Mono;
import java.time.Duration;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.PriorityQueue;
import java.util.Queue;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
import static com.couchbase.client.core.logging.RedactableArgument.redactSystem;
/**
* The default tracing implementation, which tracks the top N slowest requests per service and dumps them at
* configurable intervals.
*/
public class ThresholdLoggingTracer implements RequestTracer {
private static final AtomicInteger REQUEST_TRACER_ID = new AtomicInteger();
private static final String KEY_TOTAL_MICROS = "total_duration_us";
private static final String KEY_DISPATCH_MICROS = "last_dispatch_duration_us";
private static final String KEY_TOTAL_DISPATCH_MICROS = "total_dispatch_duration_us";
private static final String KEY_ENCODE_MICROS = "encode_duration_us";
private static final String KEY_SERVER_MICROS = "last_server_duration_us";
private static final String KEY_TOTAL_SERVER_MICROS = "total_server_duration_us";
private static final String KEY_OPERATION_ID = "operation_id";
private static final String KEY_OPERATION_NAME = "operation_name";
private static final String KEY_LAST_LOCAL_SOCKET = "last_local_socket";
private static final String KEY_LAST_REMOTE_SOCKET = "last_remote_socket";
private static final String KEY_LAST_LOCAL_ID = "last_local_id";
private static final String KEY_TIMEOUT = "timeout_ms";
private final AtomicBoolean running = new AtomicBoolean(false);
private final Queue> overThresholdQueue;
private final EventBus eventBus;
private final Thread worker;
private final ThresholdLoggingTracerConfig config;
private final long kvThreshold;
private final long queryThreshold;
private final long viewThreshold;
private final long searchThreshold;
private final long analyticsThreshold;
private final long transactionsThreshold;
private final Duration emitInterval;
private final int sampleSize;
/**
* Creates a builder to customize this tracer.
*
* @param eventBus the event bus where the final events will be emitted into.
* @return the builder to customize.
* @deprecated please use {@link #create(EventBus, ThresholdLoggingTracerConfig)} instead.
*/
public static Builder builder(final EventBus eventBus) {
return new Builder(eventBus);
}
/**
* Short-hand to create the tracer with the event bus that needs to be used.
*
* @param eventBus the event bus where the final events will be emitted into.
* @return the created tracer ready to be used.
*/
public static ThresholdLoggingTracer create(final EventBus eventBus) {
return create(eventBus, ThresholdLoggingTracerConfig.create());
}
/**
* Creates a tracer with config and a reference to the event bus.
*
* @param eventBus the event bus where the final events will be emitted into.
* @param config the config that should be used.
* @return the created tracer ready to be used.
*/
public static ThresholdLoggingTracer create(final EventBus eventBus, ThresholdLoggingTracerConfig config) {
return new ThresholdLoggingTracer(eventBus, config);
}
/**
* Internal constructor to build the tracer based on the config provided.
*
* @param eventBus the event bus that should be used.
* @param config the tracer config from where to extract the values.
*/
private ThresholdLoggingTracer(final EventBus eventBus, ThresholdLoggingTracerConfig config) {
this.eventBus = eventBus;
this.overThresholdQueue = new MpscArrayQueue<>(config.queueLength());
kvThreshold = config.kvThreshold().toNanos();
analyticsThreshold = config.analyticsThreshold().toNanos();
searchThreshold = config.searchThreshold().toNanos();
viewThreshold = config.viewThreshold().toNanos();
queryThreshold = config.queryThreshold().toNanos();
transactionsThreshold = config.transactionsThreshold().toNanos();
sampleSize = config.sampleSize();
emitInterval = config.emitInterval();
this.config = config;
worker = new Thread(new Worker());
worker.setDaemon(true);
}
/**
* Returns the current configuration.
*/
public ThresholdLoggingTracerConfig config() {
return config;
}
@Override
public RequestSpan requestSpan(final String name, final RequestSpan parent) {
try {
return new ThresholdRequestSpan(this);
} catch (Exception ex) {
throw new TracerException("Failed to create ThresholdRequestSpan", ex);
}
}
/**
* Finishes the span (sends it off into the queue when over threshold).
*
* @param span the finished internal span from the toplevel request.
*/
void finish(final ThresholdRequestSpan span) {
try {
if (span.requestContext() != null) {
final Request request = span.requestContext().request();
if (isOverThreshold(request)) {
if (!overThresholdQueue.offer(request)) {
// TODO: what to do if dropped because queue full? raise event?
}
}
}
} catch (Exception ex) {
throw new TracerException("Failed to finish ThresholdRequestSpan", ex);
}
}
/**
* Helper method to calculate if the given request is over the configured threshold for this service.
*
* @param request the request to check.
* @return true if over threshold, false otherwise.
*/
private boolean isOverThreshold(final Request request) {
final long tookNanos = request.context().logicalRequestLatency();
final ServiceType serviceType = request.serviceType();
if (serviceType == null) {
// Virtual service
if (request instanceof CoreTransactionRequest) {
return tookNanos >= transactionsThreshold;
}
return false;
}
else if (serviceType == ServiceType.KV && tookNanos >= kvThreshold) {
return true;
} else if (serviceType == ServiceType.QUERY && tookNanos >= queryThreshold) {
return true;
} else if (serviceType == ServiceType.ANALYTICS && tookNanos >= analyticsThreshold) {
return true;
} else if (serviceType == ServiceType.SEARCH && tookNanos >= searchThreshold) {
return true;
} else {
return serviceType == ServiceType.VIEWS && tookNanos >= viewThreshold;
}
}
@Override
public Mono start() {
return Mono.defer(() -> {
if (running.compareAndSet(false, true)) {
worker.start();
}
return Mono.empty();
});
}
@Override
public Mono stop(Duration timeout) {
return Mono.defer(() -> {
if (running.compareAndSet(true, false)) {
worker.interrupt();
}
return Mono.empty();
});
}
/**
* The worker picks up requests from the queue and stores it in the per-service queues so that they can be dumped
* when configured.
*/
private class Worker implements Runnable {
/**
* Time this worker spends between check cycles. 100ms should be granular enough
* but making it configurable, who knows...
*/
private final long workerSleepMs = Long.parseLong(
System.getProperty("com.couchbase.thresholdRequestTracerSleep", "100")
);
private final boolean newOutputFormat = Boolean.parseBoolean(
System.getProperty("com.couchbase.thresholdRequestTracerNewOutputFormat", "true")
);
/**
* Compares request by their logical request latency for the priority threshold queues.
*/
private final Comparator> THRESHOLD_COMPARATOR = Comparator.comparingLong(
o -> o.context().logicalRequestLatency()
);
private final Queue> kvThresholds = new PriorityQueue<>(THRESHOLD_COMPARATOR);
private final Queue> n1qlThresholds = new PriorityQueue<>(THRESHOLD_COMPARATOR);
private final Queue> viewThresholds = new PriorityQueue<>(THRESHOLD_COMPARATOR);
private final Queue> ftsThresholds = new PriorityQueue<>(THRESHOLD_COMPARATOR);
private final Queue> analyticsThresholds = new PriorityQueue<>(THRESHOLD_COMPARATOR);
private final Queue> transactionsThresholds = new PriorityQueue<>(THRESHOLD_COMPARATOR);
private long kvThresholdCount = 0;
private long n1qlThresholdCount = 0;
private long viewThresholdCount = 0;
private long ftsThresholdCount = 0;
private long analyticsThresholdCount = 0;
private long transactionsThresholdCount = 0;
private NanoTimestamp lastThresholdLog = NanoTimestamp.never();
private boolean hasThresholdWritten;
@Override
public void run() {
Thread.currentThread().setName("cb-tracing-" + REQUEST_TRACER_ID.incrementAndGet());
while (running.get()) {
try {
handleOverThresholdQueue();
Thread.sleep(workerSleepMs);
} catch (final InterruptedException ex) {
if (!running.get()) {
return;
} else {
Thread.currentThread().interrupt();
}
} catch (final Exception ex) {
// TODO: raise event
// LOGGER.warn("Got exception on slow operation reporter, ignoring.", ex);
}
}
}
/**
* Helper method which drains the queue, handles the lists and logs if needed.
*/
private void handleOverThresholdQueue() {
if (lastThresholdLog.hasElapsed(emitInterval)) {
if (newOutputFormat) {
prepareAndlogOverThresholdNew();
} else {
prepareAndlogOverThresholdOld();
}
lastThresholdLog = NanoTimestamp.now();
}
while (true) {
Request request = overThresholdQueue.poll();
if (request == null) {
return;
}
final ServiceType serviceType = request.serviceType();
if (serviceType == null) {
// Virtual service
if (request instanceof CoreTransactionRequest) {
updateThreshold(transactionsThresholds, request);
transactionsThresholdCount += 1;
}
}
else if (serviceType == ServiceType.KV) {
updateThreshold(kvThresholds, request);
kvThresholdCount += 1;
} else if (serviceType == ServiceType.QUERY) {
updateThreshold(n1qlThresholds, request);
n1qlThresholdCount += 1;
} else if (serviceType == ServiceType.VIEWS) {
updateThreshold(viewThresholds, request);
viewThresholdCount += 1;
} else if (serviceType == ServiceType.SEARCH) {
updateThreshold(ftsThresholds, request);
ftsThresholdCount += 1;
} else if (serviceType == ServiceType.ANALYTICS) {
updateThreshold(analyticsThresholds, request);
analyticsThresholdCount += 1;
} else {
// TODO: log error
// LOGGER.warn("Unknown service in span {}", service);
}
}
}
/**
* Logs the over threshold data and resets the sets.
*/
private void prepareAndlogOverThresholdNew() {
if (!hasThresholdWritten) {
return;
}
hasThresholdWritten = false;
Map output = new HashMap<>();
if (!kvThresholds.isEmpty()) {
output.put(
TracingIdentifiers.SERVICE_KV,
convertThresholdMetadataNew(kvThresholds, kvThresholdCount)
);
kvThresholds.clear();
kvThresholdCount = 0;
}
if (!n1qlThresholds.isEmpty()) {
output.put(
TracingIdentifiers.SERVICE_QUERY,
convertThresholdMetadataNew(n1qlThresholds, n1qlThresholdCount)
);
n1qlThresholds.clear();
n1qlThresholdCount = 0;
}
if (!viewThresholds.isEmpty()) {
output.put(
TracingIdentifiers.SERVICE_VIEWS,
convertThresholdMetadataNew(viewThresholds, viewThresholdCount)
);
viewThresholds.clear();
viewThresholdCount = 0;
}
if (!ftsThresholds.isEmpty()) {
output.put(
TracingIdentifiers.SERVICE_SEARCH,
convertThresholdMetadataNew(ftsThresholds, ftsThresholdCount)
);
ftsThresholds.clear();
ftsThresholdCount = 0;
}
if (!analyticsThresholds.isEmpty()) {
output.put(
TracingIdentifiers.SERVICE_ANALYTICS,
convertThresholdMetadataNew(analyticsThresholds, analyticsThresholdCount)
);
analyticsThresholds.clear();
analyticsThresholdCount = 0;
}
if (!transactionsThresholds.isEmpty()) {
output.put(
TracingIdentifiers.SERVICE_TRANSACTIONS,
convertThresholdMetadataNew(transactionsThresholds, transactionsThresholdCount)
);
transactionsThresholds.clear();
transactionsThresholdCount = 0;
}
logOverThreshold(output, null);
}
private void prepareAndlogOverThresholdOld() {
if (!hasThresholdWritten) {
return;
}
hasThresholdWritten = false;
List