com.increff.commons.es.ESManager Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of commons-gelf-springboot Show documentation
Show all versions of commons-gelf-springboot Show documentation
This library is useful for logging HTTP requests and responses using the GELF protocol to Graylog and to ELK for monitoring.
The newest version!
package com.increff.commons.es;/*
* Copyright (c) 2021. Increff
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
* in compliance with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License
* is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
* or implied. See the License for the specific language governing permissions and limitations under
* the License.
*/
import java.util.Arrays;
import java.util.concurrent.LinkedBlockingDeque;
import lombok.extern.log4j.Log4j2;
import org.springframework.web.client.HttpStatusCodeException;
/**
* This class is designed to create a fast and fault tolerant ES client.
* All new messages are queued, and a background thread tries to push to ELK.
* This way calling methods are not slowed down
* If there is no space in queue, then oldest message is dropped(queue.pollFirst())
* Then new message is put in queue.
* If a message delivery fails because of 402, 403, 404, 502 etc, then message is added to top of queue,
* so that it is retried quickly. However, if queue is full, then that too will be dropped
* If, there was no message, or status was 404 or 502 (given by load balancers) then the thread
* sleeps for RETRY_SLEEP_TIME milliseconds to avoid unnecessary loops
* All ESManager methods are synchronized to ensure thread safety.
*/
@Log4j2
public class ESManager implements Runnable {
private static int RETRY_MAX_COUNT = 10;
private static int RETRY_WAIT_TIME = 60_000; // 60 seconds
private static int MAX_QUEUE_SIZE = 1000;
private static int EMPTY_WAIT_TIME = 1_000; // 1 second
private ESMetrics m;
private ESClient c;
private LinkedBlockingDeque q;
private boolean running;
private int retryCount;
private IESLogProvider logProvider;
/**
* Constructs a new ESManager with the specified base URL, port, user, and password.
* Initializes the queue with a maximum size, creates a new ESMetrics object, and a new ESClient with the provided parameters.
*
* @param baseUrl The base URL for the ESClient.
* @param port The port for the ESClient.
* @param user The user for the ESClient.
* @param password The password for the ESClient.
*/
public ESManager(String baseUrl, int port, String user, String password) {
this.q = new LinkedBlockingDeque<>(MAX_QUEUE_SIZE);
this.m = new ESMetrics();
this.c = new ESClient(baseUrl, port, user, password);
}
// FOR STARTING AND STOPPING
public void setLogProvider(IESLogProvider logProvider) {
this.logProvider = logProvider;
}
/**
* Starts the ESManager if it is not already running.
* It creates a new thread and starts it, setting the running flag to true.
*/
public synchronized void start() {
if (!running) {
Thread t = new Thread(this);
t.start();
running = true;
}
}
/**
* Stops the ESManager if it is running.
* It sets the running flag to false and logs all pending messages in the queue.
*/
public synchronized void stop() {
running = false;
ESRequest msg = null;
while (!q.isEmpty()) { // log all pending messages
msg = getFirst();
dropRequest(msg);
}
}
/**
* Checks if the ESManager is currently running.
* @return true if the ESManager is running, false otherwise.
*/
public synchronized boolean isRunning() {
return running;
}
/**
* Retrieves the current size of the queue.
* @return The size of the queue.
*/
public synchronized int getQueueSize() {
return q.size();
}
/**
* Retrieves the metrics of the ESManager.
* @return The ESMetrics object containing the metrics of the ESManager.
*/
public synchronized ESMetrics getMetrics() {
return m;
}
/**
* Adds a new ESRequest to the queue.
* If the remaining capacity of the queue is less than 10, the oldest request is removed from the queue.
* The method also logs an error message indicating the dropping of an ELK request and the current queue capacity.
* After ensuring there is space in the queue, the new request is added to the queue
* and the number of received requests is incremented by 1.
*
* @param req The ESRequest to be added to the queue.
*/
public synchronized void add(ESRequest req) {
// we want to keep the latest request, so remove first message if queue is full
if (q.remainingCapacity() < 10) {
ESRequest dropReq = getFirst();
dropRequest(dropReq);
log.error("Dropping ELK request: queue capacity: " + q.remainingCapacity());
}
q.offer(req);
m.addNumRecieved(1);
}
private synchronized ESRequest getFirst() {
// Retrieves and removes the first element of this deque, or returns null if
// this deque is empty.
ESRequest r = q.pollFirst();
if (r != null) {
m.addNumProcessed(+1);
}
return r;
}
private synchronized void retry(ESRequest req) {
// Inserts the specified element at the front of this deque if it is possible to
// do so immediately without violating capacity restrictions,returning true upon
// success and false if no space is currently available.
boolean result = q.offerFirst(req);
if (!result) {
dropRequest(req);
} else {
m.addNumProcessed(-1);
}
}
private void dropRequest(ESRequest req) {
if (req == null) {
return;
}
m.addNumDropped(1);
if (logProvider == null) {
return;
}
try {
String json = ESEncoder.getJson(req);
logProvider.log(json);
} catch (Exception e) {
// return, cannot do much here really
}
}
public void run() {
// Note: We do not reset retryCount because if some message has failed even
// after trying for RETRY_MAX_COUNT, then it is likely that the next message
// will also fail. This can make the queue too large. Thus after
// RETRY_MAX_COUNT, it is best to keep on trying to send messages
// and set it to 0 only when a message has been successfully delivered
ESRequest req = null;
int errStatus = 0, waitTimeMs = 0;
while (isRunning()) {
waitTimeMs = 0;
errStatus = 0;
try {
req = getFirst();
if (req != null) {
c.send(req);
errStatus = 200;
retryCount = 0;
m.addNumSuccess(1);
}
} catch (HttpStatusCodeException e) {
errStatus = e.getRawStatusCode();
retryCount++;
log.error("error in sending log to elk: request_name: " + req.getRequestName() + " error: " + e.getMessage() + Arrays.toString(e.getStackTrace()));
} catch (Exception e) {
errStatus = 9999; // Some uknown issue has happened
retryCount++;
log.error("error in sending log to elk: request_name: " + req.getRequestName() + " error: " + e.getMessage() + Arrays.toString(e.getStackTrace()));
}
if (errStatus == 200) {
// request sent successfully, do nothing!
waitTimeMs = 0;
} else if (errStatus == 0) {
waitTimeMs = EMPTY_WAIT_TIME; // no request, sleep for 1 seconds
} else if (retryCount < RETRY_MAX_COUNT) {
// error in sending, requeue & sleep for RETRY_SLEEP_TIME seconds
// So total maximum we will wait for RETRY_MAX_COUNT*RETRY_SLEEP_TIME seconds
// This is 10 minutes for now
retry(req);
waitTimeMs = RETRY_WAIT_TIME;
} else {
dropRequest(req);
}
try {
// Read on Thread.sleep(0) also
// https://stackoverflow.com/questions/3257708/thread-sleep0-what-is-the-normal-behavior
Thread.sleep(waitTimeMs);
} catch (InterruptedException e) {
stop();
}
}
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy