org.glowroot.agent.central.CentralConnection Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of glowroot-agent-it-harness Show documentation
Show all versions of glowroot-agent-it-harness Show documentation
Glowroot Agent Integration Test Harness
/*
* Copyright 2015-2018 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.glowroot.agent.central;
import java.io.File;
import java.util.List;
import java.util.Random;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.atomic.AtomicBoolean;
import org.glowroot.agent.shaded.org.glowroot.agent.it.harness.shaded.javax.annotation.concurrent.GuardedBy;
import javax.net.ssl.SSLException;
import org.glowroot.agent.shaded.org.glowroot.agent.it.harness.shaded.com.google.common.base.Splitter;
import org.glowroot.agent.shaded.org.glowroot.agent.it.harness.shaded.com.google.common.base.Stopwatch;
import org.glowroot.agent.shaded.org.glowroot.agent.it.harness.shaded.com.google.common.collect.Lists;
import org.glowroot.agent.shaded.org.glowroot.agent.it.harness.shaded.io.grpc.ManagedChannel;
import org.glowroot.agent.shaded.org.glowroot.agent.it.harness.shaded.io.grpc.netty.GrpcSslContexts;
import org.glowroot.agent.shaded.org.glowroot.agent.it.harness.shaded.io.grpc.netty.NegotiationType;
import org.glowroot.agent.shaded.org.glowroot.agent.it.harness.shaded.io.grpc.netty.NettyChannelBuilder;
import org.glowroot.agent.shaded.org.glowroot.agent.it.harness.shaded.io.grpc.stub.StreamObserver;
import org.glowroot.agent.shaded.org.glowroot.agent.it.harness.shaded.io.grpc.util.RoundRobinLoadBalancerFactory;
import org.glowroot.agent.shaded.org.glowroot.agent.it.harness.shaded.io.netty.channel.EventLoopGroup;
import org.glowroot.agent.shaded.org.glowroot.agent.it.harness.shaded.io.netty.handler.ssl.SslContextBuilder;
import org.glowroot.agent.shaded.org.checkerframework.checker.nullness.qual.Nullable;
import org.immutables.value.Value;
import org.glowroot.agent.shaded.org.glowroot.agent.shaded.org.slf4j.Logger;
import org.glowroot.agent.shaded.org.glowroot.agent.shaded.org.slf4j.LoggerFactory;
import org.glowroot.agent.util.RateLimitedLogger;
import org.glowroot.agent.util.ThreadFactories;
import org.glowroot.agent.shaded.org.glowroot.common.util.OnlyUsedByTests;
import static java.util.concurrent.TimeUnit.MILLISECONDS;
import static java.util.concurrent.TimeUnit.SECONDS;
class CentralConnection {
private static final Logger logger = LoggerFactory.getLogger(CentralConnection.class);
// back pressure on connection to the central collector
private static final int PENDING_LIMIT = 100;
@SuppressWarnings("nullness:type.argument.type.incompatible")
private final ThreadLocal suppressLogCollector = new ThreadLocal() {
@Override
protected Boolean initialValue() {
return false;
}
};
private final EventLoopGroup eventLoopGroup;
private final ExecutorService channelExecutor;
private final ManagedChannel channel;
private final ScheduledExecutorService retryExecutor;
private final AtomicBoolean inConnectionFailure;
private final Random random = new Random();
private final RateLimitedLogger discardingDataLogger =
new RateLimitedLogger(CentralConnection.class);
// count does not include init call
@GuardedBy("discardingDataLogger")
private int pendingRequestCount;
private final RateLimitedLogger initConnectionErrorLogger =
new RateLimitedLogger(CentralConnection.class, true);
private final RateLimitedLogger connectionErrorLogger =
new RateLimitedLogger(CentralConnection.class);
private final String collectorAddress;
private volatile boolean initCallSucceeded;
private volatile boolean closed;
CentralConnection(String collectorAddress, @Nullable String collectorAuthority,
List confDirs, AtomicBoolean inConnectionFailure) throws SSLException {
ParsedCollectorAddress parsedCollectorAddress = parseCollectorAddress(collectorAddress);
eventLoopGroup = EventLoopGroups.create("Glowroot-GRPC-Worker-ELG");
channelExecutor =
Executors.newSingleThreadExecutor(ThreadFactories.create("Glowroot-GRPC-Executor"));
NettyChannelBuilder builder;
if (parsedCollectorAddress.targets().size() == 1) {
CollectorTarget target = parsedCollectorAddress.targets().get(0);
builder = NettyChannelBuilder.forAddress(target.host(), target.port());
if (collectorAuthority != null) {
builder.overrideAuthority(collectorAuthority);
}
} else {
// this connection mechanism may be deprecated in the future in favor resolving a single
// address to multiple collectors via DNS (above)
String authority;
if (collectorAuthority != null) {
authority = collectorAuthority;
} else if (!parsedCollectorAddress.https()) {
authority = "dummy-service-authority";
} else {
throw new IllegalStateException("collector.authority is required when connecting"
+ " over HTTPS to a comma-separated list of glowroot central collectors");
}
builder = NettyChannelBuilder.forTarget("dummy-target")
.nameResolverFactory(new MultipleAddressNameResolverFactory(
parsedCollectorAddress.targets(), authority));
}
// single address may resolve to multiple collectors above via DNS, so need to specify round
// robin here even if only single address (first part of conditional above)
builder.loadBalancerFactory(RoundRobinLoadBalancerFactory.getInstance())
.eventLoopGroup(eventLoopGroup)
.executor(channelExecutor)
// aggressive keep alive, shouldn't even be used since gauge data is sent every
// 5 seconds and keep alive will only kick in after 30 seconds of not hearing back
// from the server
.keepAliveTime(30, SECONDS);
if (parsedCollectorAddress.https()) {
SslContextBuilder sslContext = GrpcSslContexts.forClient();
File trustCertCollectionFile = getTrustCertCollectionFile(confDirs);
if (trustCertCollectionFile != null) {
sslContext.trustManager(trustCertCollectionFile);
}
channel = builder.sslContext(sslContext.build())
.negotiationType(NegotiationType.TLS)
.build();
} else {
channel = builder.negotiationType(NegotiationType.PLAINTEXT)
.build();
}
retryExecutor = Executors.newSingleThreadScheduledExecutor(
ThreadFactories.create("Glowroot-Collector-Retry"));
this.inConnectionFailure = inConnectionFailure;
this.collectorAddress = collectorAddress;
}
boolean suppressLogCollector() {
return suppressLogCollector.get();
}
ManagedChannel getChannel() {
return channel;
}
void callOnce(GrpcCall call) {
callWithAFewRetries(0, -1, call);
}
// important that these calls are idempotent
void callWithAFewRetries(GrpcCall call) {
callWithAFewRetries(0, call);
}
// important that these calls are idempotent
void callWithAFewRetries(int initialDelayMillis,
GrpcCall call) {
callWithAFewRetries(initialDelayMillis, 60, call);
}
// important that these calls are idempotent
private void callWithAFewRetries(int initialDelayMillis,
final int maxTotalInSeconds, final GrpcCall call) {
if (closed) {
return;
}
if (inConnectionFailure.get()) {
return;
}
boolean logWarningAndDoNotSend = false;
synchronized (discardingDataLogger) {
if (pendingRequestCount >= PENDING_LIMIT) {
logWarningAndDoNotSend = true;
} else {
pendingRequestCount++;
}
}
if (logWarningAndDoNotSend) {
// it is important not to perform logging under the above synchronized lock in order to
// eliminate possibility of deadlock
suppressLogCollector(new Runnable() {
@Override
public void run() {
discardingDataLogger.warn("not sending data to the central collector"
+ " because pending request limit ({}) exceeded", PENDING_LIMIT);
}
});
return;
}
// TODO revisit retry/backoff after next grpc version
// 60 seconds should be enough time to restart central collector instance without losing
// data (though better to use central collector cluster)
//
// this cannot retry over too long a period since it retains memory of rpc message for
// that duration
if (initialDelayMillis > 0) {
retryExecutor.schedule(new Runnable() {
@Override
public void run() {
try {
call.call(new RetryingStreamObserver(call, maxTotalInSeconds,
maxTotalInSeconds, false));
} catch (Throwable t) {
logger.error(t.getMessage(), t);
}
}
}, initialDelayMillis, MILLISECONDS);
} else {
call.call(new RetryingStreamObserver(call, maxTotalInSeconds, maxTotalInSeconds,
false));
}
}
// important that these calls are idempotent
void callInit(GrpcCall call) {
if (closed) {
return;
}
// important here not to check inConnectionFailure, since need this to succeed if/when
// connection is re-established
call.call(new RetryingStreamObserver(call, 15, -1, true));
}
void suppressLogCollector(Runnable runnable) {
boolean priorValue = suppressLogCollector.get();
suppressLogCollector.set(true);
try {
runnable.run();
} finally {
suppressLogCollector.set(priorValue);
}
}
@OnlyUsedByTests
void close() {
closed = true;
retryExecutor.shutdown();
channel.shutdown();
}
@OnlyUsedByTests
void awaitClose() throws InterruptedException {
if (!retryExecutor.awaitTermination(10, SECONDS)) {
throw new IllegalStateException("Could not terminate executor");
}
if (!channel.awaitTermination(10, SECONDS)) {
throw new IllegalStateException("Could not terminate channel");
}
channelExecutor.shutdown();
if (!channelExecutor.awaitTermination(10, SECONDS)) {
throw new IllegalStateException("Could not terminate executor");
}
if (!eventLoopGroup.shutdownGracefully(0, 0, SECONDS).await(10, SECONDS)) {
throw new IllegalStateException("Could not terminate event loop group");
}
}
private static ParsedCollectorAddress parseCollectorAddress(String collectorAddress) {
boolean https = false;
List targets = Lists.newArrayList();
for (String addr : Splitter.on(',').trimResults().omitEmptyStrings()
.split(collectorAddress)) {
if (addr.startsWith("https://")) {
if (!targets.isEmpty() && !https) {
throw new IllegalStateException("Cannot mix http and https addresses when using"
+ " client side load balancing: " + collectorAddress);
}
addr = addr.substring("https://".length());
https = true;
} else {
if (https) {
throw new IllegalStateException("Cannot mix http and https addresses when using"
+ " client side load balancing: " + collectorAddress);
}
if (addr.startsWith("http://")) {
addr = addr.substring("http://".length());
}
}
int index = addr.indexOf(':');
if (index == -1) {
throw new IllegalStateException(
"Invalid collector.address (missing port): " + addr);
}
String host = addr.substring(0, index);
int port;
try {
port = Integer.parseInt(addr.substring(index + 1));
} catch (NumberFormatException e) {
logger.debug(e.getMessage(), e);
throw new IllegalStateException(
"Invalid collector.address (invalid port): " + addr);
}
targets.add(ImmutableCollectorTarget.builder()
.host(host)
.port(port)
.build());
}
return ImmutableParsedCollectorAddress.builder()
.https(https)
.addAllTargets(targets)
.build();
}
private static @Nullable File getTrustCertCollectionFile(List confDirs) {
for (File confDir : confDirs) {
File confFile = new File(confDir, "grpc-trusted-root-certs.pem");
if (confFile.exists()) {
return confFile;
}
}
return null;
}
private static String getRootCauseMessage(Throwable t) {
Throwable cause = t.getCause();
if (cause == null) {
// using toString() instead of getMessage() in order to capture exception class name
return t.toString();
} else {
return getRootCauseMessage(cause);
}
}
@Value.Immutable
interface ParsedCollectorAddress {
boolean https();
List targets();
}
@Value.Immutable
interface CollectorTarget {
String host();
int port();
}
abstract static class GrpcCall {
abstract void call(StreamObserver responseObserver);
void doWithResponse(@SuppressWarnings("unused") T response) {}
}
private class RetryingStreamObserver
implements StreamObserver {
private final GrpcCall grpcCall;
private final int maxSingleDelayInSeconds;
private final int maxTotalInSeconds;
private final boolean init;
private final Stopwatch stopwatch = Stopwatch.createStarted();
private volatile long nextDelayInSeconds = 4;
private RetryingStreamObserver(GrpcCall grpcCall, int maxSingleDelayInSeconds,
int maxTotalInSeconds, boolean init) {
this.grpcCall = grpcCall;
this.maxSingleDelayInSeconds = maxSingleDelayInSeconds;
this.maxTotalInSeconds = maxTotalInSeconds;
this.init = init;
}
@Override
public void onNext(T value) {
try {
grpcCall.doWithResponse(value);
} catch (RuntimeException t) {
logger.error(t.getMessage(), t);
throw t;
} catch (Throwable t) {
logger.error(t.getMessage(), t);
throw new RuntimeException(t);
}
}
@Override
public void onCompleted() {
if (init) {
initCallSucceeded = true;
}
decrementPendingRequestCount();
}
@Override
public void onError(Throwable t) {
try {
onErrorInternal(t);
} catch (RuntimeException u) {
logger.error(u.getMessage(), u);
throw u;
} catch (Throwable u) {
logger.error(u.getMessage(), u);
throw new RuntimeException(u);
}
}
private void onErrorInternal(final Throwable t) {
if (closed) {
decrementPendingRequestCount();
return;
}
if (init) {
suppressLogCollector(new Runnable() {
@Override
public void run() {
initConnectionErrorLogger.warn("unable to establish connection with the"
+ " central collector {} (will keep trying...): {}",
collectorAddress, getRootCauseMessage(t));
logger.debug(t.getMessage(), t);
}
});
}
if (inConnectionFailure.get()) {
decrementPendingRequestCount();
return;
}
suppressLogCollector(new Runnable() {
@Override
public void run() {
logger.debug(t.getMessage(), t);
}
});
if (!init && stopwatch.elapsed(SECONDS) > maxTotalInSeconds) {
if (initCallSucceeded) {
suppressLogCollector(new Runnable() {
@Override
public void run() {
connectionErrorLogger.warn(
"unable to send data to the central collector: {}",
getRootCauseMessage(t));
logger.debug(t.getMessage(), t);
}
});
}
decrementPendingRequestCount();
return;
}
// retry delay doubles on average each time, randomized +/- 50%
double randomizedDoubling = 0.5 + random.nextDouble();
long currDelay = (long) (nextDelayInSeconds * randomizedDoubling);
nextDelayInSeconds = Math.min(nextDelayInSeconds * 2, maxSingleDelayInSeconds);
// TODO revisit retry/backoff after next grpc version
retryExecutor.schedule(new Runnable() {
@Override
public void run() {
try {
grpcCall.call(RetryingStreamObserver.this);
} catch (final Throwable t) {
// intentionally capturing InterruptedException here as well to ensure
// reconnect is attempted no matter what
suppressLogCollector(new Runnable() {
@Override
public void run() {
logger.error(t.getMessage(), t);
}
});
}
}
}, currDelay, SECONDS);
}
private void decrementPendingRequestCount() {
if (!init) {
synchronized (discardingDataLogger) {
pendingRequestCount--;
}
}
}
}
}