All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.netflix.genie.web.tasks.leader.UserMetricsTask Maven / Gradle / Ivy

The newest version!
/*
 *
 *  Copyright 2016 Netflix, Inc.
 *
 *     Licensed under the Apache License, Version 2.0 (the "License");
 *     you may not use this file except in compliance with the License.
 *     You may obtain a copy of the License at
 *
 *         http://www.apache.org/licenses/LICENSE-2.0
 *
 *     Unless required by applicable law or agreed to in writing, software
 *     distributed under the License is distributed on an "AS IS" BASIS,
 *     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *     See the License for the specific language governing permissions and
 *     limitations under the License.
 *
 */
package com.netflix.genie.web.tasks.leader;

import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import com.google.common.util.concurrent.AtomicDouble;
import com.netflix.genie.common.dto.UserResourcesSummary;
import com.netflix.genie.common.internal.dtos.JobStatus;
import com.netflix.genie.web.data.services.DataServices;
import com.netflix.genie.web.data.services.PersistenceService;
import com.netflix.genie.web.properties.UserMetricsProperties;
import com.netflix.genie.web.tasks.GenieTaskScheduleType;
import com.netflix.genie.web.util.MetricsConstants;
import io.micrometer.core.instrument.Gauge;
import io.micrometer.core.instrument.MeterRegistry;
import lombok.extern.slf4j.Slf4j;

import java.util.Map;
import java.util.Set;

/**
 * A task which publishes user metrics.
 *
 * @author mprimi
 * @since 4.0.0
 */
@Slf4j
public class UserMetricsTask extends LeaderTask {

    private static final String USER_ACTIVE_JOBS_METRIC_NAME = "genie.user.active-jobs.gauge";
    private static final String USER_ACTIVE_MEMORY_METRIC_NAME = "genie.user.active-memory.gauge";
    private static final String USER_ACTIVE_USERS_METRIC_NAME = "genie.user.active-users.gauge";
    private static final UserResourcesRecord USER_RECORD_PLACEHOLDER = new UserResourcesRecord("nobody");
    private final MeterRegistry registry;
    private final PersistenceService persistenceService;
    private final UserMetricsProperties userMetricsProperties;

    private final Map userResourcesRecordMap = Maps.newHashMap();
    private final AtomicDouble activeUsersCount;

    /**
     * Constructor.
     *
     * @param registry              the metrics registry
     * @param dataServices          The {@link DataServices} instance to use
     * @param userMetricsProperties the properties that configure this task
     */
    public UserMetricsTask(
        final MeterRegistry registry,
        final DataServices dataServices,
        final UserMetricsProperties userMetricsProperties
    ) {
        this.registry = registry;
        this.persistenceService = dataServices.getPersistenceService();
        this.userMetricsProperties = userMetricsProperties;
        this.activeUsersCount = new AtomicDouble(Double.NaN);

        // Register gauge for count of distinct users with active jobs.
        Gauge.builder(USER_ACTIVE_USERS_METRIC_NAME, this::getUsersCount)
            .register(registry);
    }

    /**
     * {@inheritDoc}
     */
    @Override
    public GenieTaskScheduleType getScheduleType() {
        return GenieTaskScheduleType.FIXED_RATE;
    }

    /**
     * {@inheritDoc}
     */
    @Override
    public long getFixedRate() {
        return this.userMetricsProperties.getRefreshInterval();
    }

    /**
     * {@inheritDoc}
     */
    @Override
    public void run() {
        log.debug("Publishing user metrics");

        // For now just report the API jobs as they're using resources on Genie web nodes
        // Get us unblocked for now on agent migration but in future we may want to change this to further dice or
        // combine reports by CLI vs. API
        final Map summaries = this.persistenceService.getUserResourcesSummaries(
            JobStatus.getActiveStatuses(),
            true
        );

        // Update number of active users
        log.debug("Number of users with active jobs: {}", summaries.size());
        this.activeUsersCount.set(summaries.size());

        // Track users who previously had jobs but no longer do
        final Set usersToReset = Sets.newHashSet(this.userResourcesRecordMap.keySet());
        usersToReset.removeAll(summaries.keySet());

        for (final String user : usersToReset) {
            // Remove user. If gauge is polled, it'll return NaN
            this.userResourcesRecordMap.remove(user);
        }

        // Update existing user metrics
        for (final UserResourcesSummary userResourcesSummary : summaries.values()) {
            final String user = userResourcesSummary.getUser();
            final long jobs = userResourcesSummary.getRunningJobsCount();
            final long memory = userResourcesSummary.getUsedMemory();

            log.debug("User {}: {} jobs running, using {}MB", user, jobs, memory);

            this.userResourcesRecordMap.computeIfAbsent(
                userResourcesSummary.getUser(),
                userName -> {
                    // Register gauges this user user.
                    // Gauge creation is idempotent so it doesn't matter if the user is new or seen before.
                    // Registry holds a reference to the gauge so no need to save it.
                    Gauge.builder(
                            USER_ACTIVE_JOBS_METRIC_NAME,
                            () -> this.getUserJobCount(userName)
                        )
                        .tags(MetricsConstants.TagKeys.USER, userName)
                        .register(registry);
                    Gauge.builder(
                            USER_ACTIVE_MEMORY_METRIC_NAME,
                            () -> this.getUserMemoryAmount(userName)
                        )
                        .tags(MetricsConstants.TagKeys.USER, userName)
                        .register(registry);

                    return new UserResourcesRecord(userName);
                }

            ).update(jobs, memory);
        }

        log.debug("Done publishing user metrics");
    }

    /**
     * {@inheritDoc}
     */
    @Override
    public void cleanup() {

        log.debug("Cleaning up user metrics publishing");

        // Reset all users
        this.userResourcesRecordMap.clear();

        // Reset active users count
        this.activeUsersCount.set(Double.NaN);
    }

    private Number getUserJobCount(final String userName) {
        final UserResourcesRecord record = this.userResourcesRecordMap.getOrDefault(userName, USER_RECORD_PLACEHOLDER);
        final double jobCount = record.jobCount.get();
        log.debug("Current jobs count for user '{}' is {}", userName, (long) jobCount);
        return jobCount;
    }

    private Number getUserMemoryAmount(final String userName) {
        final UserResourcesRecord record = this.userResourcesRecordMap.getOrDefault(userName, USER_RECORD_PLACEHOLDER);
        final double memoryAmount = record.memoryAmount.get();
        log.debug("Current memory amount for user '{}' is {}MB", userName, (long) memoryAmount);
        return memoryAmount;
    }

    private Number getUsersCount() {
        return activeUsersCount.get();
    }

    private static class UserResourcesRecord {
        private final String userName;
        private final AtomicDouble jobCount = new AtomicDouble(Double.NaN);
        private final AtomicDouble memoryAmount = new AtomicDouble(Double.NaN);

        UserResourcesRecord(
            final String userName
        ) {
            this.userName = userName;
        }

        void update(final long runningJobsCount, final long usedMemory) {
            log.debug(
                "Updating usage of user '{}': {} jobs totalling {}MB",
                this.userName,
                runningJobsCount,
                usedMemory
            );
            this.jobCount.set(runningJobsCount);
            this.memoryAmount.set(usedMemory);
        }
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy