All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.netflix.genie.web.tasks.node.DiskCleanupTask Maven / Gradle / Ivy

The newest version!
/*
 *
 *  Copyright 2016 Netflix, Inc.
 *
 *     Licensed under the Apache License, Version 2.0 (the "License");
 *     you may not use this file except in compliance with the License.
 *     You may obtain a copy of the License at
 *
 *         http://www.apache.org/licenses/LICENSE-2.0
 *
 *     Unless required by applicable law or agreed to in writing, software
 *     distributed under the License is distributed on an "AS IS" BASIS,
 *     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *     See the License for the specific language governing permissions and
 *     limitations under the License.
 *
 */
package com.netflix.genie.web.tasks.node;

import com.netflix.genie.common.dto.Job;
import com.netflix.genie.common.exceptions.GenieException;
import com.netflix.genie.common.internal.jobs.JobConstants;
import com.netflix.genie.web.data.services.DataServices;
import com.netflix.genie.web.data.services.PersistenceService;
import com.netflix.genie.web.properties.DiskCleanupProperties;
import com.netflix.genie.web.properties.JobsProperties;
import com.netflix.genie.web.tasks.TaskUtils;
import io.micrometer.core.instrument.Counter;
import io.micrometer.core.instrument.MeterRegistry;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.exec.CommandLine;
import org.apache.commons.exec.Executor;
import org.apache.commons.io.FileUtils;
import org.apache.commons.lang3.SystemUtils;
import org.springframework.core.io.Resource;
import org.springframework.scheduling.TaskScheduler;
import org.springframework.scheduling.support.CronTrigger;

import javax.validation.constraints.NotNull;
import java.io.File;
import java.io.IOException;
import java.time.Instant;
import java.time.temporal.ChronoUnit;
import java.util.Optional;
import java.util.concurrent.atomic.AtomicLong;

//TODO: this is now only relevant for {@link com.netflix.genie.web.agent.launchers.impl.LocalAgentLauncherImpl}.
// Should refactor as such, rather than having this generic disk cleaner inherited from V3.

/**
 * This task runs on every Genie node and is responsible for cleaning up the local disk so that space can be
 * recaptured.
 *
 * @author tgianos
 * @since 3.0.0
 */
@Slf4j
public class DiskCleanupTask implements Runnable {

    private final DiskCleanupProperties properties;
    private final File jobsDir;
    private final PersistenceService persistenceService;
    private final boolean runAsUser;
    private final Executor processExecutor;

    private final AtomicLong numberOfDeletedJobDirs;
    private final AtomicLong numberOfDirsUnableToDelete;
    private final Counter unableToGetJobCounter;
    private final Counter unableToDeleteJobDirCounter;

    /**
     * Constructor. Schedules this task to be run by the task scheduler.
     *
     * @param properties      The disk cleanup properties to use.
     * @param scheduler       The scheduler to use to schedule the cron trigger.
     * @param jobsDir         The resource representing the location of the job directory
     * @param dataServices    The {@link DataServices} instance to use
     * @param jobsProperties  The jobs properties to use
     * @param processExecutor The process executor to use to delete directories
     * @param registry        The metrics registry
     * @throws IOException When it is unable to open a file reference to the job directory
     */
    public DiskCleanupTask(
        @NotNull final DiskCleanupProperties properties,
        @NotNull final TaskScheduler scheduler,
        @NotNull final Resource jobsDir,
        @NotNull final DataServices dataServices,
        @NotNull final JobsProperties jobsProperties,
        @NotNull final Executor processExecutor,
        @NotNull final MeterRegistry registry
    ) throws IOException {
        // Job Directory is guaranteed to exist by the MvcConfig bean creation but just in case someone overrides
        if (!jobsDir.exists()) {
            throw new IOException("Jobs dir " + jobsDir + " doesn't exist. Unable to create task to cleanup.");
        }

        this.properties = properties;
        this.jobsDir = jobsDir.getFile();
        this.persistenceService = dataServices.getPersistenceService();
        this.runAsUser = jobsProperties.getUsers().isRunAsUserEnabled();
        this.processExecutor = processExecutor;

        this.numberOfDeletedJobDirs = registry.gauge(
            "genie.tasks.diskCleanup.numberDeletedJobDirs.gauge",
            new AtomicLong()
        );
        this.numberOfDirsUnableToDelete = registry.gauge(
            "genie.tasks.diskCleanup.numberDirsUnableToDelete.gauge",
            new AtomicLong()
        );
        this.unableToGetJobCounter = registry.counter("genie.tasks.diskCleanup.unableToGetJobs.rate");
        this.unableToDeleteJobDirCounter = registry.counter("genie.tasks.diskCleanup.unableToDeleteJobsDir.rate");

        // Only schedule the task if we don't need sudo while on a non-unix system
        if (this.runAsUser && !SystemUtils.IS_OS_UNIX) {
            log.error("System is not UNIX like. Unable to schedule disk cleanup due to needing Unix commands");
        } else {
            final CronTrigger trigger = new CronTrigger(properties.getExpression(), JobConstants.UTC);
            scheduler.schedule(this, trigger);
        }
    }

    /**
     * Checks the disk for jobs on this host. Deletes any job directories that are older than the desired
     * retention and are complete.
     */
    @Override
    public void run() {
        log.info("Running disk cleanup task...");
        final File[] jobDirs = this.jobsDir.listFiles();
        if (jobDirs == null) {
            log.warn("No job dirs found. Returning.");
            this.numberOfDeletedJobDirs.set(0);
            this.numberOfDirsUnableToDelete.set(0);
            return;
        }
        // For each of the directories figure out if we need to delete the files or not
        long deletedCount = 0;
        long unableToDeleteCount = 0;
        for (final File dir : jobDirs) {
            if (!dir.isDirectory()) {
                log.info("File {} isn't a directory. Skipping.", dir.getName());
                continue;
            }

            final String id = dir.getName();
            try {
                final Job job = this.persistenceService.getJob(id);
                if (job.getStatus().isActive()) {
                    // Don't want to delete anything still going
                    continue;
                }

                // Delete anything with a finish time before today @12 AM UTC - retention
                final Instant midnightUTC = TaskUtils.getMidnightUTC();
                final Instant retentionThreshold = midnightUTC.minus(this.properties.getRetention(), ChronoUnit.DAYS);
                final Optional finished = job.getFinished();
                if (finished.isPresent() && finished.get().isBefore(retentionThreshold)) {
                    log.info("Attempting to delete job directory for job {}", id);
                    if (this.runAsUser) {
                        final CommandLine commandLine = new CommandLine("sudo");
                        commandLine.addArgument("rm");
                        commandLine.addArgument("-rf");
                        commandLine.addArgument(dir.getAbsolutePath());
                        this.processExecutor.execute(commandLine);
                    } else {
                        // Save forking a process ourselves if we don't have to
                        FileUtils.deleteDirectory(dir);
                    }
                    deletedCount++;
                    log.info("Successfully deleted job directory for job {}", id);
                }
            } catch (final GenieException ge) {
                log.error("Unable to get job {}. Continuing.", id, ge);
                this.unableToGetJobCounter.increment();
                unableToDeleteCount++;
            } catch (final IOException ioe) {
                log.error("Unable to delete job directory for job with id: {}", id, ioe);
                this.unableToDeleteJobDirCounter.increment();
                unableToDeleteCount++;
            }
        }
        this.numberOfDeletedJobDirs.set(deletedCount);
        this.numberOfDirsUnableToDelete.set(unableToDeleteCount);
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy