All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.datorama.oss.timbermill.TaskIndexer Maven / Gradle / Ivy

There is a newer version: 2.5.3
Show newest version
package com.datorama.oss.timbermill;

import com.datorama.oss.timbermill.common.Constants;
import com.datorama.oss.timbermill.common.ElasticsearchUtil;
import com.datorama.oss.timbermill.common.KamonConstants;
import com.datorama.oss.timbermill.common.cache.AbstractCacheHandler;
import com.datorama.oss.timbermill.plugins.PluginsConfig;
import com.datorama.oss.timbermill.plugins.TaskLogPlugin;
import com.datorama.oss.timbermill.unit.*;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import kamon.metric.Timer;
import org.apache.commons.lang3.exception.ExceptionUtils;
import org.apache.logging.log4j.ThreadContext;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import javax.swing.tree.DefaultMutableTreeNode;
import java.time.ZonedDateTime;
import java.util.*;
import java.util.stream.Collectors;

import static com.datorama.oss.timbermill.ElasticsearchClient.GSON;
import static com.datorama.oss.timbermill.ParentResolver.populateParentParams;

public class TaskIndexer {

    private static final Logger LOG = LoggerFactory.getLogger(TaskIndexer.class);

    private final ElasticsearchClient es;
    private final Collection logPlugins;
    private AbstractCacheHandler cacheHandler;
    private long daysRotation;
    private String timbermillVersion;

    public TaskIndexer(String pluginsJson, Integer daysRotation, ElasticsearchClient es, String timbermillVersion, AbstractCacheHandler cacheHandler) {
        this.daysRotation = calculateDaysRotation(daysRotation);
        this.logPlugins = PluginsConfig.initPluginsFromJson(pluginsJson);
        this.es = es;
        this.timbermillVersion = timbermillVersion;
        this.cacheHandler = cacheHandler;
    }

    private static int calculateDaysRotation(int daysRotationParam) {
        return Math.max(daysRotationParam, 1);
    }

    public void close() {
        es.close();
        cacheHandler.close();
    }

    public void retrieveAndIndex(Collection events, String env) {
        String flowId = "Task Indexer - " + UUID.randomUUID().toString();
        ThreadContext.put("id", flowId);
        LOG.info("#### Batch Start ####");
        Timer.Started start = KamonConstants.BATCH_DURATION_TIMER.withoutTags().start();
        ZonedDateTime taskIndexerStartTime = ZonedDateTime.now();
        LOG.info("{} events to be handled in current batch", events.size());

        Collection heartbeatEvents = new HashSet<>();
        Collection timbermillEvents = new LinkedHashSet<>();

        events.forEach(e -> {
            if (e.getName() != null && e.getName().equals(Constants.HEARTBEAT_TASK)){
                String heartbeatJson = GSON.toJson(new HeartbeatTask(e, daysRotation));
                heartbeatEvents.add(heartbeatJson);
            }
            else{
                if (e.getTaskId() == null){
                    LOG.warn("Task ID is null for event {}", GSON.toJson(e));
                }
                else {
                    e.fixErrors();
                    e.replaceAllFieldsWithDots();
                    e.trimAllStrings();
                    timbermillEvents.add(e);
                }
            }
        });

        if (!heartbeatEvents.isEmpty()) {
            this.es.indexMetaDataTasks(env, heartbeatEvents);
        }

        if (!timbermillEvents.isEmpty()) {
            int previouslyIndexedParentSize = handleTimbermillEvents(env, timbermillEvents);
            reportBatchMetrics(env, previouslyIndexedParentSize, taskIndexerStartTime, timbermillEvents.size());
        }
        start.stop();
        LOG.info("#### Batch End ####");
    }

    private int handleTimbermillEvents(String env, Collection timbermillEvents) {
        applyPlugins(timbermillEvents, env);

        Map nodesMap = Maps.newHashMap();
        Set startEventsIds = Sets.newHashSet();
        Set parentIds = Sets.newHashSet();
        Map> eventsMap = Maps.newHashMap();
        populateCollections(timbermillEvents, nodesMap, startEventsIds, parentIds, eventsMap);
        connectNodesByParentId(nodesMap);

        Set missingParentsIds = parentIds.stream().filter(id -> !startEventsIds.contains(id)).collect(Collectors.toSet());

        Map tasksMap;
        Map previouslyIndexedParentTasks;

        String alias = es.createTimbermillAlias(env);

        String index;
        if (!alias.endsWith(ElasticsearchUtil.getIndexSerial(1))){
            index = es.rolloverIndex(alias);
        }
        else{
            index = alias;
        }

        cacheHandler.lock();
        try {
            LOG.info("Fetching {} missing parents", missingParentsIds.size());
            previouslyIndexedParentTasks = getMissingParents(missingParentsIds, env);
            LOG.info("Fetched {} missing parents", previouslyIndexedParentTasks.size());
            tasksMap = createEnrichedTasks(nodesMap, eventsMap, previouslyIndexedParentTasks, index);
            resolveOrphansFromCache(tasksMap);

            LOG.info("Caching {} tasks", tasksMap.size());
            cacheTasks(tasksMap);
            LOG.info("Caching {} orphans", tasksMap.size());
            cacheOrphans(tasksMap);
        } finally {
            cacheHandler.release();
        }
        LOG.info("{} tasks to be indexed to elasticsearch", tasksMap.size());
        es.index(tasksMap);
        LOG.info("Tasks were indexed to elasticsearch");
        return previouslyIndexedParentTasks.size();
    }

    private void resolveOrphansFromCache(Map tasksMap) {
        Timer.Started start = KamonConstants.ORPHANS_JOB_LATENCY.withoutTags().start();

        ParentResolver resolver = new ParentResolver(tasksMap, cacheHandler);
        Map adoptedTasksMap = resolver.resolveOrphansReceived();

        for (Map.Entry adoptedEntry : adoptedTasksMap.entrySet()) {
            String adoptedId = adoptedEntry.getKey();
            Task adoptedTask = adoptedEntry.getValue();
            if (tasksMap.containsKey(adoptedId)){
                tasksMap.get(adoptedId).mergeTask(adoptedTask, adoptedId);
            }
            else{
                tasksMap.put(adoptedId, adoptedTask);
            }
        }
        int adopted = adoptedTasksMap.size();
        if (adopted > 0) {
            LOG.info("{} orphans resolved", adopted);
        }
        KamonConstants.ORPHANS_ADOPTED_HISTOGRAM.withoutTags().record(adopted);
        start.stop();
    }

    private void cacheOrphans(Map tasksMap) {
        Map> parentToOrphansMap = Maps.newHashMap();

        for (Map.Entry entry : tasksMap.entrySet()) {
            Task orphanTask = entry.getValue();
            String orphanId = entry.getKey();
            String parentId = orphanTask.getParentId();
            if (parentId != null) {
                if (orphanTask.isOrphan() != null && orphanTask.isOrphan()) {
                    List tasks = parentToOrphansMap.get(parentId);
                    if (tasks == null) {
                        tasks = Lists.newArrayList(orphanId);
                    } else {
                        tasks.add(orphanId);
                    }
                    parentToOrphansMap.put(parentId, tasks);
                }
            }
        }

        if (!parentToOrphansMap.isEmpty()) {
            Map> fromOrphansCache = cacheHandler.logPullFromOrphansCache(parentToOrphansMap.keySet(), "cache_orphans");
            for (Map.Entry> entry : fromOrphansCache.entrySet()) {
                String parentId = entry.getKey();
                List orphansList = parentToOrphansMap.get(parentId);
                List orphanListFromCache = entry.getValue();
                orphansList.addAll(orphanListFromCache);
            }

            cacheHandler.logPushToOrphanCache(parentToOrphansMap, "cache_orphans");
        }
    }

    private void cacheTasks(Map tasksMap) {
        HashMap updatedTasks = Maps.newHashMap();
        Map idToTaskMap = cacheHandler.logGetFromTasksCache(tasksMap.keySet(), "cache_tasks");
        for (Map.Entry entry : tasksMap.entrySet()) {
            Task task = entry.getValue();
            LocalTask localTask = new LocalTask(task);
            String id = entry.getKey();
            Task cachedTask = idToTaskMap.get(id);
            if (cachedTask != null) {
                localTask.mergeTask(cachedTask, id);
                localTask.setIndex(cachedTask.getIndex());
                task.setIndex(cachedTask.getIndex());
            }
            updatedTasks.put(id, localTask);
        }
        cacheHandler.logPushToTasksCache(updatedTasks, "cache_tasks");
    }

    private Map getMissingParents(Set parentIds, String env) {
        
        int missingParentAmount = parentIds.size();
        KamonConstants.MISSING_PARENTS_HISTOGRAM.withoutTags().record(missingParentAmount);

        Map previouslyIndexedParentTasks = Maps.newHashMap();
        try {
            if (!parentIds.isEmpty()) {
                Map parentMap = cacheHandler.logGetFromTasksCache(parentIds, "missing_parents");
                parentMap.forEach((parentId, parentTask) -> {
                    if (parentTask != null) {
                        previouslyIndexedParentTasks.put(parentId, parentTask);
                    }
                    if (parentTask != null && parentTask.getPrimaryId() == null && parentTask.getParentsPath() != null && !parentTask.getParentsPath().isEmpty()){
                        LOG.debug("getMissingParents CACHE missing primary localtask: {}", GSON.toJson(parentTask));
                    }
                });
            }
        } catch (Throwable t) {
            LOG.error("Error fetching indexed tasks from Elasticsearch", t);
        }

        parentIds.removeAll(previouslyIndexedParentTasks.keySet());
        if (!parentIds.isEmpty()) {
            Map fromEs = es.getMissingParents(parentIds, env);
            for (Task value : fromEs.values()) {
                if (value.getPrimaryId() == null && value.getParentsPath() != null && !value.getParentsPath().isEmpty()){
                    LOG.info("getMissingParents ES missing primary task: {}", GSON.toJson(value));
                }
            }
            previouslyIndexedParentTasks.putAll(fromEs);

            if (!fromEs.isEmpty()) {
                LOG.info("Fetched {} missing parents from Elasticsearch", fromEs.size());
            }
        }

        return previouslyIndexedParentTasks;
    }

    private void reportBatchMetrics(String env, int tasksFetchedSize, ZonedDateTime taskIndexerStartTime, int indexedTasksSize) {
        ZonedDateTime taskIndexerEndTime = ZonedDateTime.now();
        long timesDuration = ElasticsearchUtil.getTimesDuration(taskIndexerStartTime, taskIndexerEndTime);
        reportToElasticsearch(env, tasksFetchedSize, taskIndexerStartTime, indexedTasksSize, timesDuration, taskIndexerEndTime);
        reportToKamon(tasksFetchedSize, indexedTasksSize);
    }

    private void reportToKamon(int tasksFetchedSize, int indexedTasksSize) {
        KamonConstants.MISSING_PARENTS_TASKS_FETCHED_HISTOGRAM.withoutTags().record(tasksFetchedSize);
        KamonConstants.TASKS_INDEXED_HISTOGRAM.withoutTags().record(indexedTasksSize);
    }

    private void reportToElasticsearch(String env, int tasksFetchedSize, ZonedDateTime taskIndexerStartTime, int indexedTasksSize, long timesDuration, ZonedDateTime now) {
        IndexEvent indexEvent = new IndexEvent(env, tasksFetchedSize, taskIndexerStartTime, now, indexedTasksSize,  daysRotation,
                timesDuration);
        es.indexMetaDataTasks(env, Lists.newArrayList(GSON.toJson(indexEvent)));
    }

    private void populateCollections(Collection timbermillEvents, Map nodesMap, Set startEventsIds, Set parentIds,
            Map> eventsMap) {
        timbermillEvents.forEach(event -> {
            if (event.isStartEvent()){
                startEventsIds.add(event.getTaskId());

                nodesMap.put(event.getTaskId(), new DefaultMutableTreeNode(event));
            }
            if (event.getParentId() != null){
                parentIds.add(event.getParentId());
            }

            if (!eventsMap.containsKey(event.getTaskId())){
                eventsMap.put(event.getTaskId(), Lists.newArrayList(event));
            }
            else {
                List events = eventsMap.get(event.getTaskId());
                events.add(event);
            }
        });
    }

    private void connectNodesByParentId(Map nodesMap) {
        for (DefaultMutableTreeNode treeNode : nodesMap.values()) {

            Event startEvent = (Event) treeNode.getUserObject();
            String parentId = startEvent.getParentId();
            if (parentId != null) {
                DefaultMutableTreeNode parentNode = nodesMap.get(parentId);
                if (parentNode != null) {
                    parentNode.add(treeNode);
                }
            }
        }
    }

    private Map createEnrichedTasks(Map nodesMap, Map> eventsMap,
                                                  Map previouslyIndexedParentTasks, String index) {
        enrichStartEventsByOrder(nodesMap.values(), eventsMap, previouslyIndexedParentTasks);
        return getTasksFromEvents(eventsMap, index);
    }

    private Map getTasksFromEvents(Map> eventsMap, String index) {
        Map tasksMap = new HashMap<>();
        for (Map.Entry> eventEntry : eventsMap.entrySet()) {
            Task task = new Task(eventEntry.getValue(), index, daysRotation, timbermillVersion);
            tasksMap.put(eventEntry.getKey(), task);
        }
        return tasksMap;
    }

    private void enrichStartEventsByOrder(Collection nodes, Map> eventsMap, Map previouslyIndexedTasks) {
        /*
         * Compute origins and down merge parameters from parent
         */
        for (DefaultMutableTreeNode node : nodes) {
            if (node.isRoot()) {
                Enumeration enumeration = node.breadthFirstEnumeration();
                while (enumeration.hasMoreElements()) {
                    DefaultMutableTreeNode curr = (DefaultMutableTreeNode) enumeration.nextElement();
                    Event startEvent = (Event) curr.getUserObject();
                    enrichStartEvent(eventsMap, previouslyIndexedTasks, startEvent);
                }
            }
        }
    }

    public static void logErrorInEventsMap(Map> eventsMap, String where) {
        for (Map.Entry> stringListEntry : eventsMap.entrySet()) {
            List value = stringListEntry.getValue();
            if (value.stream().filter(Event::isStartEvent).count() > 1){
                LOG.warn("Too many start events in {} events: {}" ,where , GSON.toJson(value));
            }
        }
    }

    private void enrichStartEvent(Map> eventsMap, Map previouslyIndexedTasks, Event startEvent) {
        String parentId = startEvent.getParentId();
        if (parentId != null) {
            if (isOrphan(startEvent, previouslyIndexedTasks, eventsMap)){
                startEvent.setOrphan(true);
                startEvent.setPrimaryId(null);
            }
            else {
                populateParentParams(startEvent, previouslyIndexedTasks.get(parentId), eventsMap.get(parentId));
            }
        }
        else{
            startEvent.setPrimaryId(startEvent.getTaskId());
        }
    }

    private boolean isOrphan(Event event, Map previouslyIndexedTasks, Map> eventsMap) {
        String parentId = event.getParentId();
        if (parentId == null) {
            return false;
        } else {
            if (previouslyIndexedTasks.containsKey(parentId)){
                Task parentTask = previouslyIndexedTasks.get(parentId);
                return parentTask.isOrphan() != null && parentTask.isOrphan();
            }
            if (eventsMap.containsKey(parentId)){
                if (eventsMap.get(parentId).stream().anyMatch(Event::isAdoptedEvent)) {
                    return false;
                }
                if (eventsMap.get(parentId).stream().anyMatch(Event::isStartEvent)){
                    List parentEvents = eventsMap.get(parentId).stream().filter(Event::isStartEvent).collect(Collectors.toList());
                    if (parentEvents.size() != 1){
                        LOG.warn("Too many parents found for parent ID [{}] child task ID [{}] Events: {}", parentId, event.getTaskId(), GSON.toJson(parentEvents));
                    }
                    for (Event e : parentEvents) {
                        if (e.isOrphan() != null && e.isOrphan()){
                            return true;
                        }
                    }
                    return false;
                }
            }
            return true;
        }
    }

    private void applyPlugins(Collection events, String env) {
        try {
            for (TaskLogPlugin plugin : logPlugins) {
                ZonedDateTime startTime = ZonedDateTime.now();
                TaskStatus status;
                String exception = null;
                try {
                    plugin.apply(events);
                    status = TaskStatus.SUCCESS;
                } catch (Exception ex) {
                    exception = ExceptionUtils.getStackTrace(ex);
                    status = TaskStatus.ERROR;
                    LOG.error("error in plugin" + plugin, ex);
                }
                ZonedDateTime endTime = ZonedDateTime.now();
                long duration = ElasticsearchUtil.getTimesDuration(startTime, endTime);
                PluginApplierTask pluginApplierTask = new PluginApplierTask(env, plugin.getName(), plugin.getClass().getSimpleName(), status, exception, endTime, duration, startTime, daysRotation);
                es.indexMetaDataTasks(env, Lists.newArrayList(GSON.toJson(pluginApplierTask)));
            }
        } catch (Throwable t) {
            LOG.error("Error running plugins", t);
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy