com.couchbase.client.core.cnc.OrphanReporter Maven / Gradle / Ivy
Show all versions of core-io Show documentation
/*
* Copyright (c) 2019 Couchbase, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.couchbase.client.core.cnc;
import com.couchbase.client.core.annotation.Stability;
import com.couchbase.client.core.cnc.events.tracing.OrphanRecordDroppedEvent;
import com.couchbase.client.core.cnc.events.tracing.OrphanReporterFailureDetectedEvent;
import com.couchbase.client.core.cnc.events.tracing.OrphansRecordedEvent;
import com.couchbase.client.core.deps.org.jctools.queues.MpscArrayQueue;
import com.couchbase.client.core.env.OrphanReporterConfig;
import com.couchbase.client.core.msg.Request;
import com.couchbase.client.core.msg.UnmonitoredRequest;
import com.couchbase.client.core.msg.kv.KeyValueRequest;
import com.couchbase.client.core.msg.view.ViewRequest;
import com.couchbase.client.core.service.ServiceType;
import com.couchbase.client.core.util.HostAndPort;
import com.couchbase.client.core.util.NanoTimestamp;
import reactor.core.publisher.Mono;
import java.time.Duration;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.PriorityQueue;
import java.util.Queue;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
import static com.couchbase.client.core.logging.RedactableArgument.redactSystem;
@Stability.Internal
public class OrphanReporter {
public static final String ORPHAN_TREAD_PREFIX = "cb-orphan-";
private static final AtomicInteger ORPHAN_REPORTER_ID = new AtomicInteger();
private static final String KEY_TOTAL_MICROS = "total_duration_us";
private static final String KEY_DISPATCH_MICROS = "last_dispatch_duration_us";
private static final String KEY_TOTAL_DISPATCH_MICROS = "total_dispatch_duration_us";
private static final String KEY_ENCODE_MICROS = "encode_duration_us";
private static final String KEY_SERVER_MICROS = "last_server_duration_us";
private static final String KEY_TOTAL_SERVER_MICROS = "total_server_duration_us";
private static final String KEY_OPERATION_ID = "operation_id";
private static final String KEY_OPERATION_NAME = "operation_name";
private static final String KEY_LAST_LOCAL_SOCKET = "last_local_socket";
private static final String KEY_LAST_REMOTE_SOCKET = "last_remote_socket";
private static final String KEY_LAST_LOCAL_ID = "last_local_id";
private static final String KEY_TIMEOUT = "timeout_ms";
private final AtomicBoolean running = new AtomicBoolean(false);
volatile Thread worker = null; // visible for testing
private final Queue> orphanQueue;
private final Duration emitInterval;
private final int sampleSize;
private final EventBus eventBus;
private final boolean enabled;
private final OrphanReporterConfig config;
/**
* Creates a new {@link OrphanReporter}.
*
* Please do not instantiate this class directly, but rather let it be handled through the environment and
* customize it through the {@link OrphanReporterConfig} which can also be provided on the environment.
*
* @param eventBus the event bus where the messages should be emitted on.
* @param config the configuration for this reporter.
*/
@Stability.Internal
public OrphanReporter(final EventBus eventBus, final OrphanReporterConfig config) {
this.eventBus = eventBus;
this.orphanQueue = new MpscArrayQueue<>(config.queueLength());
this.emitInterval = config.emitInterval();
this.sampleSize = config.sampleSize();
this.enabled = config.enabled();
this.config = config;
// Spawn a thread only if the reporter is enabled.
if (enabled) {
worker = new Thread(new Worker());
worker.setDaemon(true);
worker.setName(ORPHAN_TREAD_PREFIX + ORPHAN_REPORTER_ID.incrementAndGet());
}
}
/**
* Returns the currently active configuration.
*/
public OrphanReporterConfig config() {
return config;
}
/**
* Starts the orphan reporter.
*
* @return completes the Mono once it has been started properly.
*/
public Mono start() {
return Mono.defer(() -> {
if (enabled && running.compareAndSet(false, true)) {
worker.start();
}
return Mono.empty();
});
}
/**
* Stops the orphan reporter.
*
* @return completes the Mono once it has been stopped properly.
*/
public Mono stop() {
return Mono.defer(() -> {
if (enabled && running.compareAndSet(true, false)) {
worker.interrupt();
}
return Mono.empty();
});
}
/**
* Reports an orphaned {@link Request}.
*
* If the orphan could not be recorded because the buffer is full, a {@link OrphanRecordDroppedEvent} will
* be raised on the event bus.
*
* If you wish to ignore certain requests as being considered in the orphan reporting, make sure they implement
* the {@link UnmonitoredRequest} marker interface.
*
* @param request the request to report as orphan.
*/
public void report(final Request request) {
if (!enabled || request instanceof UnmonitoredRequest) {
return;
}
if (!orphanQueue.offer(request)) {
eventBus.publish(new OrphanRecordDroppedEvent(request.getClass()));
}
}
/**
* Worker thread which runs in a separate thread and consumes the orphan queue, aggregates the infos
* and puts them onto the event bus at regular intervals.
*/
private class Worker implements Runnable {
/**
* Time this worker spends between check cycles. 100ms should be granular enough
* but making it configurable, who knows...
*/
private final long workerSleepMs = Long.parseLong(
System.getProperty("com.couchbase.orphanReporterSleep", "100")
);
private final boolean newOutputFormat = Boolean.parseBoolean(
System.getProperty("com.couchbase.orphanReporterNewOutputFormat", "true")
);
/**
* Compares request by their logical request latency for the priority threshold queues.
*/
private final Comparator> THRESHOLD_COMPARATOR = Comparator.comparingLong(
o -> o.context().logicalRequestLatency()
);
private NanoTimestamp lastThresholdLog = NanoTimestamp.never();
private boolean hasThresholdWritten;
private final Queue> kvOrphans = new PriorityQueue<>(THRESHOLD_COMPARATOR);
private final Queue> queryOrphans = new PriorityQueue<>(THRESHOLD_COMPARATOR);
private final Queue> viewOrphans = new PriorityQueue<>(THRESHOLD_COMPARATOR);
private final Queue> searchOrphans = new PriorityQueue<>(THRESHOLD_COMPARATOR);
private final Queue> analyticsOrphans = new PriorityQueue<>(THRESHOLD_COMPARATOR);
private long kvOrphanCount = 0;
private long queryOrphanCount = 0;
private long viewOrphanCount = 0;
private long searchOrphanCount = 0;
private long analyticsOrphanCount = 0;
@Override
public void run() {
while (running.get()) {
try {
handleOrphanQueue();
Thread.sleep(workerSleepMs);
} catch (final InterruptedException ex) {
if (!running.get()) {
return;
}
} catch (final Exception ex) {
eventBus.publish(new OrphanReporterFailureDetectedEvent(ex));
}
}
}
private void handleOrphanQueue() {
if (lastThresholdLog.hasElapsed(emitInterval)) {
if (newOutputFormat) {
prepareAndLogOrphansNew();
} else {
prepareAndLogOrphansOld();
}
lastThresholdLog = NanoTimestamp.now();
}
while (true) {
Request request = orphanQueue.poll();
if (request == null) {
return;
}
final ServiceType serviceType = request.serviceType();
if (serviceType == ServiceType.KV) {
updateSet(kvOrphans, request);
kvOrphanCount += 1;
} else if (serviceType == ServiceType.QUERY) {
updateSet(queryOrphans, request);
queryOrphanCount += 1;
} else if (serviceType == ServiceType.VIEWS) {
updateSet(viewOrphans, request);
viewOrphanCount += 1;
} else if (serviceType == ServiceType.SEARCH) {
updateSet(searchOrphans, request);
searchOrphanCount += 1;
} else if (serviceType == ServiceType.ANALYTICS) {
updateSet(analyticsOrphans, request);
analyticsOrphanCount += 1;
}
}
}
/**
* Helper method which updates the list with the span and ensures that the sample
* size is respected.
*/
private void updateSet(final Queue> set, final Request request) {
set.add(request);
// Remove the element with the lowest duration, so we only keep the highest ones consistently
while(set.size() > sampleSize) {
set.remove();
}
hasThresholdWritten = true;
}
private void prepareAndLogOrphansNew() {
if (!hasThresholdWritten) {
return;
}
hasThresholdWritten = false;
Map output = new HashMap<>();
if (!kvOrphans.isEmpty()) {
output.put(TracingIdentifiers.SERVICE_KV, convertOrphanMetadataNew(kvOrphans, kvOrphanCount));
kvOrphans.clear();
kvOrphanCount = 0;
}
if (!queryOrphans.isEmpty()) {
output.put(TracingIdentifiers.SERVICE_QUERY, convertOrphanMetadataNew(queryOrphans, queryOrphanCount));
queryOrphans.clear();
queryOrphanCount = 0;
}
if (!viewOrphans.isEmpty()) {
output.put(TracingIdentifiers.SERVICE_VIEWS, convertOrphanMetadataNew(viewOrphans, viewOrphanCount));
viewOrphans.clear();
viewOrphanCount = 0;
}
if (!searchOrphans.isEmpty()) {
output.put(TracingIdentifiers.SERVICE_SEARCH, convertOrphanMetadataNew(searchOrphans, searchOrphanCount));
searchOrphans.clear();
searchOrphanCount = 0;
}
if (!analyticsOrphans.isEmpty()) {
output.put(TracingIdentifiers.SERVICE_ANALYTICS, convertOrphanMetadataNew(analyticsOrphans, analyticsOrphanCount));
analyticsOrphans.clear();
analyticsOrphanCount = 0;
}
logOrphans(output, null);
}
private void prepareAndLogOrphansOld() {
if (!hasThresholdWritten) {
return;
}
hasThresholdWritten = false;
List