Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.pinot.broker.failuredetector;
import java.util.ArrayList;
import java.util.List;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.DelayQueue;
import java.util.concurrent.Delayed;
import java.util.concurrent.TimeUnit;
import javax.annotation.concurrent.ThreadSafe;
import org.apache.pinot.common.metrics.BrokerGauge;
import org.apache.pinot.common.metrics.BrokerMetrics;
import org.apache.pinot.spi.env.PinotConfiguration;
import org.apache.pinot.spi.utils.CommonConstants.Broker;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* The {@code BaseExponentialBackoffRetryFailureDetector} is a base failure detector implementation that retries the
* unhealthy servers with exponential increasing delays.
*/
@ThreadSafe
public abstract class BaseExponentialBackoffRetryFailureDetector implements FailureDetector {
private static final Logger LOGGER = LoggerFactory.getLogger(BaseExponentialBackoffRetryFailureDetector.class);
protected final String _name = getClass().getSimpleName();
protected final List _listeners = new ArrayList<>();
protected final ConcurrentHashMap _unhealthyServerRetryInfoMap = new ConcurrentHashMap<>();
protected final DelayQueue _retryInfoDelayQueue = new DelayQueue<>();
protected BrokerMetrics _brokerMetrics;
protected long _retryInitialDelayNs;
protected double _retryDelayFactor;
protected int _maxRetries;
protected Thread _retryThread;
protected volatile boolean _running;
@Override
public void init(PinotConfiguration config, BrokerMetrics brokerMetrics) {
_brokerMetrics = brokerMetrics;
long retryInitialDelayMs = config.getProperty(Broker.FailureDetector.CONFIG_OF_RETRY_INITIAL_DELAY_MS,
Broker.FailureDetector.DEFAULT_RETRY_INITIAL_DELAY_MS);
_retryInitialDelayNs = TimeUnit.MILLISECONDS.toNanos(retryInitialDelayMs);
_retryDelayFactor = config.getProperty(Broker.FailureDetector.CONFIG_OF_RETRY_DELAY_FACTOR,
Broker.FailureDetector.DEFAULT_RETRY_DELAY_FACTOR);
_maxRetries =
config.getProperty(Broker.FailureDetector.CONFIG_OF_MAX_RETRIES, Broker.FailureDetector.DEFAULT_MAX_RETIRES);
LOGGER.info("Initialized {} with retry initial delay: {}ms, exponential backoff factor: {}, max retries: {}", _name,
retryInitialDelayMs, _retryDelayFactor, _maxRetries);
}
@Override
public void register(Listener listener) {
_listeners.add(listener);
}
@Override
public void start() {
LOGGER.info("Starting {}", _name);
_running = true;
_retryThread = new Thread(() -> {
while (_running) {
try {
RetryInfo retryInfo = _retryInfoDelayQueue.take();
String instanceId = retryInfo._instanceId;
if (_unhealthyServerRetryInfoMap.get(instanceId) != retryInfo) {
LOGGER.info("Server: {} has been marked healthy, skipping the retry", instanceId);
continue;
}
if (retryInfo._numRetires == _maxRetries) {
LOGGER.warn("Unhealthy server: {} already reaches the max retries: {}, do not retry again and treat it "
+ "as healthy so that the listeners do not lose track of the server", instanceId, _maxRetries);
markServerHealthy(instanceId);
continue;
}
LOGGER.info("Retry unhealthy server: {}", instanceId);
for (Listener listener : _listeners) {
listener.retryUnhealthyServer(instanceId, this);
}
// Update the retry info and add it back to the delay queue
retryInfo._retryDelayNs = (long) (retryInfo._retryDelayNs * _retryDelayFactor);
retryInfo._retryTimeNs = System.nanoTime() + retryInfo._retryDelayNs;
retryInfo._numRetires++;
_retryInfoDelayQueue.offer(retryInfo);
} catch (Exception e) {
if (_running) {
LOGGER.error("Caught exception in the retry thread, continuing with errors", e);
}
}
}
});
_retryThread.setName("failure-detector-retry");
_retryThread.setDaemon(true);
_retryThread.start();
}
@Override
public void markServerHealthy(String instanceId) {
_unhealthyServerRetryInfoMap.computeIfPresent(instanceId, (id, retryInfo) -> {
LOGGER.info("Mark server: {} as healthy", instanceId);
_brokerMetrics.setValueOfGlobalGauge(BrokerGauge.UNHEALTHY_SERVERS, _unhealthyServerRetryInfoMap.size() - 1);
for (Listener listener : _listeners) {
listener.notifyHealthyServer(instanceId, this);
}
return null;
});
}
@Override
public void markServerUnhealthy(String instanceId) {
_unhealthyServerRetryInfoMap.computeIfAbsent(instanceId, id -> {
LOGGER.warn("Mark server: {} as unhealthy", instanceId);
_brokerMetrics.setValueOfGlobalGauge(BrokerGauge.UNHEALTHY_SERVERS, _unhealthyServerRetryInfoMap.size() + 1);
for (Listener listener : _listeners) {
listener.notifyUnhealthyServer(instanceId, this);
}
RetryInfo retryInfo = new RetryInfo(id);
_retryInfoDelayQueue.offer(retryInfo);
return retryInfo;
});
}
@Override
public Set getUnhealthyServers() {
return _unhealthyServerRetryInfoMap.keySet();
}
@Override
public void stop() {
LOGGER.info("Stopping {}", _name);
_running = false;
try {
_retryThread.interrupt();
_retryThread.join();
} catch (InterruptedException e) {
throw new RuntimeException("Interrupted while waiting for retry thread to finish", e);
}
}
/**
* Encapsulates the retry related information.
*/
protected class RetryInfo implements Delayed {
final String _instanceId;
long _retryTimeNs;
long _retryDelayNs;
int _numRetires;
RetryInfo(String instanceId) {
_instanceId = instanceId;
_retryTimeNs = System.nanoTime() + _retryInitialDelayNs;
_retryDelayNs = _retryInitialDelayNs;
_numRetires = 0;
}
@Override
public long getDelay(TimeUnit unit) {
return unit.convert(_retryTimeNs - System.nanoTime(), TimeUnit.NANOSECONDS);
}
@Override
public int compareTo(Delayed o) {
RetryInfo that = (RetryInfo) o;
return Long.compare(_retryTimeNs, that._retryTimeNs);
}
}
}