
com.github.loki4j.client.pipeline.AsyncBufferPipeline Maven / Gradle / Ivy
package com.github.loki4j.client.pipeline;
import java.net.ConnectException;
import java.util.Comparator;
import java.util.Optional;
import java.util.concurrent.ConcurrentLinkedQueue;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.ScheduledFuture;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.locks.LockSupport;
import java.util.function.Supplier;
import com.github.loki4j.client.batch.Batcher;
import com.github.loki4j.client.batch.BinaryBatch;
import com.github.loki4j.client.batch.ByteBufferQueue;
import com.github.loki4j.client.batch.LogRecord;
import com.github.loki4j.client.batch.LogRecordBatch;
import com.github.loki4j.client.batch.LogRecordStream;
import com.github.loki4j.client.http.Loki4jHttpClient;
import com.github.loki4j.client.http.LokiResponse;
import com.github.loki4j.client.util.ByteBufferFactory;
import com.github.loki4j.client.util.Loki4jLogger;
import com.github.loki4j.client.util.Loki4jThreadFactory;
import com.github.loki4j.client.writer.Writer;
import static com.github.loki4j.client.util.StringUtils.bytesAsBase64String;
import static com.github.loki4j.client.util.StringUtils.bytesAsUtf8String;
public final class AsyncBufferPipeline {
private static final int TOO_MANY_REQUEST_HTTP_STATUS = 429;
private static final Comparator compareByTime = (e1, e2) -> {
var tsCmp = Long.compare(e1.timestampMs, e2.timestampMs);
return tsCmp == 0 ? Integer.compare(e1.nanos, e2.nanos) : tsCmp;
};
private static final Comparator compareByStream = (e1, e2) ->
Long.compare(e1.stream.hash, e2.stream.hash);
private final ConcurrentLinkedQueue buffer = new ConcurrentLinkedQueue<>();
private final long parkTimeoutNs;
private final ByteBufferQueue sendQueue;
private final Batcher batcher;
private final Optional> recordComparator;
private final Writer writer;
/**
* A HTTP client to use for pushing logs to Loki
*/
private final Loki4jHttpClient httpClient;
/**
* A tracker for the performance metrics (if enabled)
*/
private final Loki4jMetrics metrics;
private final Loki4jLogger log;
private final boolean drainOnStop;
private final int maxRetries;
private final long retryTimeoutMs;
/**
* Disables retries of batches that Loki responds to with a 429 status code (TooManyRequests).
* This reduces impacts on batches from other tenants, which could end up being delayed or dropped
* due to backoff.
*/
private final boolean dropRateLimitedBatches;
private volatile boolean started = false;
private AtomicBoolean acceptNewEvents = new AtomicBoolean(true);
private AtomicBoolean drainRequested = new AtomicBoolean(false);
private AtomicLong lastSendTimeMs = new AtomicLong(System.currentTimeMillis());
private AtomicLong unsentEvents = new AtomicLong(0L);
private ScheduledExecutorService scheduler;
private ExecutorService encoderThreadPool;
private ExecutorService senderThreadPool;
private ScheduledFuture> drainScheduledFuture;
public AsyncBufferPipeline(PipelineConfig conf) {
Optional> logRecordComparator = Optional.empty();
if (conf.staticLabels) {
if (conf.sortByTime)
logRecordComparator = Optional.of(compareByTime);
} else {
logRecordComparator = Optional.of(
conf.sortByTime ? compareByStream.thenComparing(compareByTime) : compareByStream);
}
ByteBufferFactory bufferFactory = new ByteBufferFactory(conf.useDirectBuffers);
batcher = new Batcher(conf.batchMaxItems, conf.batchMaxBytes, conf.batchTimeoutMs);
recordComparator = logRecordComparator;
writer = conf.writerFactory.factory.apply(conf.batchMaxBytes, bufferFactory);
sendQueue = new ByteBufferQueue(conf.sendQueueMaxBytes, bufferFactory);
httpClient = conf.httpClientFactory.apply(conf.httpConfig);
drainOnStop = conf.drainOnStop;
maxRetries = conf.maxRetries;
retryTimeoutMs = conf.retryTimeoutMs;
dropRateLimitedBatches = conf.dropRateLimitedBatches;
parkTimeoutNs = TimeUnit.MILLISECONDS.toNanos(conf.internalQueuesCheckTimeoutMs);
this.log = conf.internalLoggingFactory.apply(this);
this.metrics = conf.metricsEnabled ? new Loki4jMetrics(conf.name, () -> unsentEvents.get()) : null;
}
public void start() {
log.info("Pipeline is starting...");
started = true;
senderThreadPool = Executors.newFixedThreadPool(1, new Loki4jThreadFactory("loki4j-sender"));
senderThreadPool.execute(() -> runSendLoop());
encoderThreadPool = Executors.newFixedThreadPool(1, new Loki4jThreadFactory("loki4j-encoder"));
encoderThreadPool.execute(() -> runEncodeLoop());
scheduler = Executors.newScheduledThreadPool(1, new Loki4jThreadFactory("loki4j-scheduler"));
drainScheduledFuture = scheduler.scheduleAtFixedRate(
() -> drain(),
100,
100,
TimeUnit.MILLISECONDS);
log.trace("Pipeline started");
}
public void stop() {
log.trace("Pipeline is stopping...");
drainScheduledFuture.cancel(false);
if (drainOnStop) {
log.info("Pipeline is draining...");
waitSendQueueLessThan(batcher.getCapacity(), Long.MAX_VALUE);
lastSendTimeMs.set(0);
drain();
waitSendQueueIsEmpty(Long.MAX_VALUE);
log.info("Drain completed");
}
started = false;
scheduler.shutdown();
encoderThreadPool.shutdown();
senderThreadPool.shutdown();
try {
httpClient.close();
} catch (Exception e) {
log.error(e, "Error while closing HttpClient");
}
log.trace("Pipeline stopped");
}
public void waitSendQueueIsEmpty(long timeoutMs) {
waitSendQueueLessThan(1, timeoutMs);
}
public boolean append(long timestamp, int nanos, Supplier stream, Supplier message) {
var startedNs = System.nanoTime();
boolean accepted = false;
if (acceptNewEvents.get()) {
var record = LogRecord.create(timestamp, nanos, stream.get(), message.get());
if (batcher.validateLogRecordSize(record)) {
buffer.offer(record);
unsentEvents.incrementAndGet();
accepted = true;
log.trace("Log record was accepted for sending: %s", record);
} else {
log.warn("Dropping the record that exceeds max batch size: %s", record);
}
}
if (metrics != null)
metrics.eventAppended(startedNs, !accepted);
return accepted;
}
private void drain() {
drainRequested.set(true);
log.trace("Drain planned");
}
private void runEncodeLoop() {
var batch = new LogRecordBatch(batcher.getCapacity());
while (started) {
try {
encodeStep(batch);
} catch (InterruptedException e) {
stop();
}
}
}
private void runSendLoop() {
while (started) {
try {
sendStep();
} catch (InterruptedException e) {
stop();
}
}
}
private void encodeStep(LogRecordBatch batch) throws InterruptedException {
while (started && buffer.isEmpty() && !drainRequested.get()) {
LockSupport.parkNanos(this, parkTimeoutNs);
}
if (!started) return;
log.trace("Checking encode actions...");
LogRecord record = buffer.peek();
while(record != null && batch.isEmpty()) {
batcher.checkSizeBeforeAdd(record, batch);
if (batch.isEmpty()) batcher.add(buffer.remove(), batch);
if (batch.isEmpty()) record = buffer.peek();
}
if (batch.isEmpty() && drainRequested.get()) {
batcher.drain(lastSendTimeMs.get(), batch);
log.trace("Draining %s remained log records for encode", batch.size());
}
drainRequested.set(false);
if (batch.isEmpty()) return;
writeBatch(batch, writer);
if (writer.isEmpty()) return;
while(started &&
!sendQueue.offer(
batch.batchId(),
batch.size(),
writer.size(),
b -> writer.toByteBuffer(b))) {
acceptNewEvents.set(false);
LockSupport.parkNanos(this, parkTimeoutNs);
}
batch.clear();
acceptNewEvents.set(true);
}
private void writeBatch(LogRecordBatch batch, Writer writer) {
var startedNs = System.nanoTime();
recordComparator.ifPresent(cmp -> batch.sort(cmp));
try {
writer.serializeBatch(batch);
log.info(
">>> Batch %s converted to %,d bytes",
batch, writer.size());
if (metrics != null)
metrics.batchEncoded(startedNs, writer.size());
} catch (Exception e) {
log.error(e, "Error occurred while serializing batch %s", batch);
unsentEvents.addAndGet(-batch.size());
if (metrics != null) metrics.batchEncodeFailed(() -> e.getClass().getSimpleName());
writer.reset();
batch.clear();
}
}
private void sendStep() throws InterruptedException {
BinaryBatch batch = sendQueue.borrowBuffer();
while(started && batch == null) {
LockSupport.parkNanos(this, parkTimeoutNs);
batch = sendQueue.borrowBuffer();
}
if (!started) return;
try {
sendBatch(batch);
lastSendTimeMs.set(System.currentTimeMillis());
log.trace("Batch %s was successfully sent to Loki", batch);
} finally {
unsentEvents.addAndGet(-batch.sizeItems);
sendQueue.returnBuffer(batch);
}
}
private LokiResponse sendBatch(BinaryBatch batch) {
var startedNs = System.nanoTime();
LokiResponse r = null;
Exception e = null;
int retry = 0;
do {
batch.data.rewind();
// print out the batch before send if tracing is enabled
if (log.isTraceEnabled(this)) {
var payload = new byte[batch.data.limit()];
batch.data.get(payload);
batch.data.rewind();
log.trace("Sending batch %s with %spayload:\n%s",
batch,
writer.isBinary() ? "binary " : "",
writer.isBinary() ? bytesAsBase64String(payload) : bytesAsUtf8String(payload));
}
// try to send the batch
try {
r = httpClient.send(batch.data);
// exit if send is successful
if (r.status >= 200 && r.status < 300) {
log.info("<<< %sBatch %s: Loki responded with status %s",
retry > 0 ? "Retry #" + retry + ". " : "", batch, r.status);
if (metrics != null) metrics.batchSent(startedNs, batch.sizeBytes);
return r;
}
} catch (Exception re) {
e = re;
}
reportSendError(batch, e, r, retry);
} while (
++retry <= maxRetries
&& checkIfEligibleForRetry(e, r)
&& reportRetryFailed(e, r)
&& sleep(retryTimeoutMs));
if (metrics != null) metrics.batchSendFailed(sendErrorReasonProvider(e, r));
return null;
}
private void reportSendError(BinaryBatch batch, Exception e, LokiResponse r, int retry) {
// whether exception occurred or error status received
var exceptionOccurred = e != null;
var isRetry = retry > 0;
if (exceptionOccurred) {
log.error(e,
"%sError while sending Batch %s to Loki (%s)",
isRetry ? "Retry #" + retry + ". " : "", batch, httpClient.getConfig().pushUrl);
} else {
log.error(
"%sLoki responded with non-success status %s on batch %s. Error: %s",
isRetry ? "Retry #" + retry + ". " : "", r.status, batch, r.body);
}
}
private boolean reportRetryFailed(Exception e, LokiResponse r) {
if (metrics != null) metrics.sendRetryFailed(sendErrorReasonProvider(e, r));
return true;
}
private Supplier sendErrorReasonProvider(Exception e, LokiResponse r) {
return () ->
e != null
? "exception:" + e.getClass().getSimpleName()
: "status:" + r.status;
}
private boolean checkIfEligibleForRetry(Exception e, LokiResponse r) {
return e instanceof ConnectException || (r != null && (r.status == 503 || shouldRetryRateLimitedBatches(r.status)));
}
private boolean shouldRetryRateLimitedBatches(int status) {
return status == TOO_MANY_REQUEST_HTTP_STATUS && !dropRateLimitedBatches;
}
private boolean sleep(long timeoutMs) {
try {
Thread.sleep(timeoutMs);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
}
return true;
}
void waitSendQueueLessThan(int size, long timeoutMs) {
var timeoutNs = TimeUnit.MILLISECONDS.toNanos(timeoutMs);
var elapsedNs = 0L;
while(started && unsentEvents.get() >= size && elapsedNs < timeoutNs) {
LockSupport.parkNanos(parkTimeoutNs);
elapsedNs += parkTimeoutNs;
}
log.trace("Wait send queue: started=%s, buffer(%s)>=%s, %s ms %s elapsed",
started, unsentEvents.get(), size, timeoutMs, elapsedNs < timeoutNs ? "not" : "");
if (elapsedNs >= timeoutNs)
throw new RuntimeException("Not completed within timeout " + timeoutMs + " ms");
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy