com.datorama.oss.timbermill.TaskIndexer Maven / Gradle / Ivy
package com.datorama.oss.timbermill;
import com.datorama.oss.timbermill.common.Constants;
import com.datorama.oss.timbermill.common.ElasticsearchUtil;
import com.datorama.oss.timbermill.common.KamonConstants;
import com.datorama.oss.timbermill.common.cache.AbstractCacheHandler;
import com.datorama.oss.timbermill.plugins.PluginsConfig;
import com.datorama.oss.timbermill.plugins.TaskLogPlugin;
import com.datorama.oss.timbermill.unit.*;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import kamon.metric.Timer;
import org.apache.commons.lang3.exception.ExceptionUtils;
import org.apache.logging.log4j.ThreadContext;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import javax.swing.tree.DefaultMutableTreeNode;
import java.time.ZonedDateTime;
import java.util.*;
import java.util.stream.Collectors;
import static com.datorama.oss.timbermill.ElasticsearchClient.GSON;
import static com.datorama.oss.timbermill.ParentResolver.populateParentParams;
public class TaskIndexer {
private static final Logger LOG = LoggerFactory.getLogger(TaskIndexer.class);
private final ElasticsearchClient es;
private final Collection logPlugins;
private AbstractCacheHandler cacheHandler;
private long daysRotation;
private String timbermillVersion;
public TaskIndexer(String pluginsJson, Integer daysRotation, ElasticsearchClient es, String timbermillVersion, AbstractCacheHandler cacheHandler) {
this.daysRotation = calculateDaysRotation(daysRotation);
this.logPlugins = PluginsConfig.initPluginsFromJson(pluginsJson);
this.es = es;
this.timbermillVersion = timbermillVersion;
this.cacheHandler = cacheHandler;
}
private static int calculateDaysRotation(int daysRotationParam) {
return Math.max(daysRotationParam, 1);
}
public void close() {
es.close();
cacheHandler.close();
}
public void retrieveAndIndex(Collection events, String env) {
String flowId = "Task Indexer - " + UUID.randomUUID().toString();
ThreadContext.put("id", flowId);
LOG.info("#### Batch Start ####");
Timer.Started start = KamonConstants.BATCH_DURATION_TIMER.withoutTags().start();
ZonedDateTime taskIndexerStartTime = ZonedDateTime.now();
LOG.info("{} events to be handled in current batch", events.size());
Collection heartbeatEvents = new HashSet<>();
Collection timbermillEvents = new LinkedHashSet<>();
events.forEach(e -> {
if (e.getName() != null && e.getName().equals(Constants.HEARTBEAT_TASK)){
String heartbeatJson = GSON.toJson(new HeartbeatTask(e, daysRotation));
heartbeatEvents.add(heartbeatJson);
}
else{
if (e.getTaskId() == null){
LOG.warn("Task ID is null for event {}", GSON.toJson(e));
}
else {
e.fixErrors();
e.replaceAllFieldsWithDots();
e.trimAllStrings();
timbermillEvents.add(e);
}
}
});
if (!heartbeatEvents.isEmpty()) {
this.es.indexMetaDataTasks(env, heartbeatEvents);
}
if (!timbermillEvents.isEmpty()) {
int previouslyIndexedParentSize = handleTimbermillEvents(env, timbermillEvents);
reportBatchMetrics(env, previouslyIndexedParentSize, taskIndexerStartTime, timbermillEvents.size());
}
start.stop();
LOG.info("#### Batch End ####");
}
private int handleTimbermillEvents(String env, Collection timbermillEvents) {
applyPlugins(timbermillEvents, env);
Map nodesMap = Maps.newHashMap();
Set startEventsIds = Sets.newHashSet();
Set parentIds = Sets.newHashSet();
Map> eventsMap = Maps.newHashMap();
populateCollections(timbermillEvents, nodesMap, startEventsIds, parentIds, eventsMap);
connectNodesByParentId(nodesMap);
Set missingParentsIds = parentIds.stream().filter(id -> !startEventsIds.contains(id)).collect(Collectors.toSet());
Map tasksMap;
Map previouslyIndexedParentTasks;
String alias = es.createTimbermillAlias(env);
String index;
if (!alias.endsWith(ElasticsearchUtil.getIndexSerial(1))){
index = es.rolloverIndex(alias);
}
else{
index = alias;
}
cacheHandler.lock();
try {
LOG.info("Fetching {} missing parents", missingParentsIds.size());
previouslyIndexedParentTasks = getMissingParents(missingParentsIds, env);
LOG.info("Fetched {} missing parents", previouslyIndexedParentTasks.size());
tasksMap = createEnrichedTasks(nodesMap, eventsMap, previouslyIndexedParentTasks, index);
resolveOrphansFromCache(tasksMap);
LOG.info("Caching {} tasks", tasksMap.size());
cacheTasks(tasksMap);
LOG.info("Caching {} orphans", tasksMap.size());
cacheOrphans(tasksMap);
} finally {
cacheHandler.release();
}
LOG.info("{} tasks to be indexed to elasticsearch", tasksMap.size());
es.index(tasksMap);
LOG.info("Tasks were indexed to elasticsearch");
return previouslyIndexedParentTasks.size();
}
private void resolveOrphansFromCache(Map tasksMap) {
Timer.Started start = KamonConstants.ORPHANS_JOB_LATENCY.withoutTags().start();
ParentResolver resolver = new ParentResolver(tasksMap, cacheHandler);
Map adoptedTasksMap = resolver.resolveOrphansReceived();
for (Map.Entry adoptedEntry : adoptedTasksMap.entrySet()) {
String adoptedId = adoptedEntry.getKey();
Task adoptedTask = adoptedEntry.getValue();
if (tasksMap.containsKey(adoptedId)){
tasksMap.get(adoptedId).mergeTask(adoptedTask, adoptedId);
}
else{
tasksMap.put(adoptedId, adoptedTask);
}
}
int adopted = adoptedTasksMap.size();
if (adopted > 0) {
LOG.info("{} orphans resolved", adopted);
}
KamonConstants.ORPHANS_ADOPTED_HISTOGRAM.withoutTags().record(adopted);
start.stop();
}
private void cacheOrphans(Map tasksMap) {
Map> parentToOrphansMap = Maps.newHashMap();
for (Map.Entry entry : tasksMap.entrySet()) {
Task orphanTask = entry.getValue();
String orphanId = entry.getKey();
String parentId = orphanTask.getParentId();
if (parentId != null) {
if (orphanTask.isOrphan() != null && orphanTask.isOrphan()) {
List tasks = parentToOrphansMap.get(parentId);
if (tasks == null) {
tasks = Lists.newArrayList(orphanId);
} else {
tasks.add(orphanId);
}
parentToOrphansMap.put(parentId, tasks);
}
}
}
if (!parentToOrphansMap.isEmpty()) {
Map> fromOrphansCache = cacheHandler.logPullFromOrphansCache(parentToOrphansMap.keySet(), "cache_orphans");
for (Map.Entry> entry : fromOrphansCache.entrySet()) {
String parentId = entry.getKey();
List orphansList = parentToOrphansMap.get(parentId);
List orphanListFromCache = entry.getValue();
orphansList.addAll(orphanListFromCache);
}
cacheHandler.logPushToOrphanCache(parentToOrphansMap, "cache_orphans");
}
}
private void cacheTasks(Map tasksMap) {
HashMap updatedTasks = Maps.newHashMap();
Map idToTaskMap = cacheHandler.logGetFromTasksCache(tasksMap.keySet(), "cache_tasks");
for (Map.Entry entry : tasksMap.entrySet()) {
Task task = entry.getValue();
LocalTask localTask = new LocalTask(task);
String id = entry.getKey();
Task cachedTask = idToTaskMap.get(id);
if (cachedTask != null) {
localTask.mergeTask(cachedTask, id);
localTask.setIndex(cachedTask.getIndex());
task.setIndex(cachedTask.getIndex());
}
updatedTasks.put(id, localTask);
}
cacheHandler.logPushToTasksCache(updatedTasks, "cache_tasks");
}
private Map getMissingParents(Set parentIds, String env) {
int missingParentAmount = parentIds.size();
KamonConstants.MISSING_PARENTS_HISTOGRAM.withoutTags().record(missingParentAmount);
Map previouslyIndexedParentTasks = Maps.newHashMap();
try {
if (!parentIds.isEmpty()) {
Map parentMap = cacheHandler.logGetFromTasksCache(parentIds, "missing_parents");
parentMap.forEach((parentId, parentTask) -> {
if (parentTask != null) {
previouslyIndexedParentTasks.put(parentId, parentTask);
}
if (parentTask != null && parentTask.getPrimaryId() == null && parentTask.getParentsPath() != null && !parentTask.getParentsPath().isEmpty()){
LOG.debug("getMissingParents CACHE missing primary localtask: {}", GSON.toJson(parentTask));
}
});
}
} catch (Throwable t) {
LOG.error("Error fetching indexed tasks from Elasticsearch", t);
}
parentIds.removeAll(previouslyIndexedParentTasks.keySet());
if (!parentIds.isEmpty()) {
Map fromEs = es.getMissingParents(parentIds, env);
for (Task value : fromEs.values()) {
if (value.getPrimaryId() == null && value.getParentsPath() != null && !value.getParentsPath().isEmpty()){
LOG.info("getMissingParents ES missing primary task: {}", GSON.toJson(value));
}
}
previouslyIndexedParentTasks.putAll(fromEs);
if (!fromEs.isEmpty()) {
LOG.info("Fetched {} missing parents from Elasticsearch", fromEs.size());
}
}
return previouslyIndexedParentTasks;
}
private void reportBatchMetrics(String env, int tasksFetchedSize, ZonedDateTime taskIndexerStartTime, int indexedTasksSize) {
ZonedDateTime taskIndexerEndTime = ZonedDateTime.now();
long timesDuration = ElasticsearchUtil.getTimesDuration(taskIndexerStartTime, taskIndexerEndTime);
reportToElasticsearch(env, tasksFetchedSize, taskIndexerStartTime, indexedTasksSize, timesDuration, taskIndexerEndTime);
reportToKamon(tasksFetchedSize, indexedTasksSize);
}
private void reportToKamon(int tasksFetchedSize, int indexedTasksSize) {
KamonConstants.MISSING_PARENTS_TASKS_FETCHED_HISTOGRAM.withoutTags().record(tasksFetchedSize);
KamonConstants.TASKS_INDEXED_HISTOGRAM.withoutTags().record(indexedTasksSize);
}
private void reportToElasticsearch(String env, int tasksFetchedSize, ZonedDateTime taskIndexerStartTime, int indexedTasksSize, long timesDuration, ZonedDateTime now) {
IndexEvent indexEvent = new IndexEvent(env, tasksFetchedSize, taskIndexerStartTime, now, indexedTasksSize, daysRotation,
timesDuration);
es.indexMetaDataTasks(env, Lists.newArrayList(GSON.toJson(indexEvent)));
}
private void populateCollections(Collection timbermillEvents, Map nodesMap, Set startEventsIds, Set parentIds,
Map> eventsMap) {
timbermillEvents.forEach(event -> {
if (event.isStartEvent()){
startEventsIds.add(event.getTaskId());
nodesMap.put(event.getTaskId(), new DefaultMutableTreeNode(event));
}
if (event.getParentId() != null){
parentIds.add(event.getParentId());
}
if (!eventsMap.containsKey(event.getTaskId())){
eventsMap.put(event.getTaskId(), Lists.newArrayList(event));
}
else {
List events = eventsMap.get(event.getTaskId());
events.add(event);
}
});
}
private void connectNodesByParentId(Map nodesMap) {
for (DefaultMutableTreeNode treeNode : nodesMap.values()) {
Event startEvent = (Event) treeNode.getUserObject();
String parentId = startEvent.getParentId();
if (parentId != null) {
DefaultMutableTreeNode parentNode = nodesMap.get(parentId);
if (parentNode != null) {
parentNode.add(treeNode);
}
}
}
}
private Map createEnrichedTasks(Map nodesMap, Map> eventsMap,
Map previouslyIndexedParentTasks, String index) {
enrichStartEventsByOrder(nodesMap.values(), eventsMap, previouslyIndexedParentTasks);
return getTasksFromEvents(eventsMap, index);
}
private Map getTasksFromEvents(Map> eventsMap, String index) {
Map tasksMap = new HashMap<>();
for (Map.Entry> eventEntry : eventsMap.entrySet()) {
Task task = new Task(eventEntry.getValue(), index, daysRotation, timbermillVersion);
tasksMap.put(eventEntry.getKey(), task);
}
return tasksMap;
}
private void enrichStartEventsByOrder(Collection nodes, Map> eventsMap, Map previouslyIndexedTasks) {
/*
* Compute origins and down merge parameters from parent
*/
for (DefaultMutableTreeNode node : nodes) {
if (node.isRoot()) {
Enumeration enumeration = node.breadthFirstEnumeration();
while (enumeration.hasMoreElements()) {
DefaultMutableTreeNode curr = (DefaultMutableTreeNode) enumeration.nextElement();
Event startEvent = (Event) curr.getUserObject();
enrichStartEvent(eventsMap, previouslyIndexedTasks, startEvent);
}
}
}
}
public static void logErrorInEventsMap(Map> eventsMap, String where) {
for (Map.Entry> stringListEntry : eventsMap.entrySet()) {
List value = stringListEntry.getValue();
if (value.stream().filter(Event::isStartEvent).count() > 1){
LOG.warn("Too many start events in {} events: {}" ,where , GSON.toJson(value));
}
}
}
private void enrichStartEvent(Map> eventsMap, Map previouslyIndexedTasks, Event startEvent) {
String parentId = startEvent.getParentId();
if (parentId != null) {
if (isOrphan(startEvent, previouslyIndexedTasks, eventsMap)){
startEvent.setOrphan(true);
startEvent.setPrimaryId(null);
}
else {
populateParentParams(startEvent, previouslyIndexedTasks.get(parentId), eventsMap.get(parentId));
}
}
else{
startEvent.setPrimaryId(startEvent.getTaskId());
}
}
private boolean isOrphan(Event event, Map previouslyIndexedTasks, Map> eventsMap) {
String parentId = event.getParentId();
if (parentId == null) {
return false;
} else {
if (previouslyIndexedTasks.containsKey(parentId)){
Task parentTask = previouslyIndexedTasks.get(parentId);
return parentTask.isOrphan() != null && parentTask.isOrphan();
}
if (eventsMap.containsKey(parentId)){
if (eventsMap.get(parentId).stream().anyMatch(Event::isAdoptedEvent)) {
return false;
}
if (eventsMap.get(parentId).stream().anyMatch(Event::isStartEvent)){
List parentEvents = eventsMap.get(parentId).stream().filter(Event::isStartEvent).collect(Collectors.toList());
if (parentEvents.size() != 1){
LOG.warn("Too many parents found for parent ID [{}] child task ID [{}] Events: {}", parentId, event.getTaskId(), GSON.toJson(parentEvents));
}
for (Event e : parentEvents) {
if (e.isOrphan() != null && e.isOrphan()){
return true;
}
}
return false;
}
}
return true;
}
}
private void applyPlugins(Collection events, String env) {
try {
for (TaskLogPlugin plugin : logPlugins) {
ZonedDateTime startTime = ZonedDateTime.now();
TaskStatus status;
String exception = null;
try {
plugin.apply(events);
status = TaskStatus.SUCCESS;
} catch (Exception ex) {
exception = ExceptionUtils.getStackTrace(ex);
status = TaskStatus.ERROR;
LOG.error("error in plugin" + plugin, ex);
}
ZonedDateTime endTime = ZonedDateTime.now();
long duration = ElasticsearchUtil.getTimesDuration(startTime, endTime);
PluginApplierTask pluginApplierTask = new PluginApplierTask(env, plugin.getName(), plugin.getClass().getSimpleName(), status, exception, endTime, duration, startTime, daysRotation);
es.indexMetaDataTasks(env, Lists.newArrayList(GSON.toJson(pluginApplierTask)));
}
} catch (Throwable t) {
LOG.error("Error running plugins", t);
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy