All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.powsybl.computation.slurm.JobArraySlurmTask Maven / Gradle / Ivy

/**
 * Copyright (c) 2020, RTE (http://www.rte-france.com)
 * This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
 */
package com.powsybl.computation.slurm;

import com.powsybl.commons.io.WorkingDirectory;
import com.powsybl.computation.*;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.util.*;
import java.util.regex.Matcher;

/**
 *
 * A {@link SlurmTask} which submits commands as job arrays when possible,
 * in particular for commands with execution count > 1.
 *
 * @author Yichen TANG 
 */
class JobArraySlurmTask extends AbstractTask {

    private static final Logger LOGGER = LoggerFactory.getLogger(JobArraySlurmTask.class);

    private Long firstJobId = null;
    private List ids = new ArrayList<>();

    JobArraySlurmTask(SlurmComputationManager scm, WorkingDirectory directory,
                      List executions, ComputationParameters parameters, ExecutionEnvironment environment) {
        super(scm, directory, executions, parameters, environment);
    }

    @Override
    public void submit() throws IOException {
        commandByJobId = new HashMap<>();
        Long prejobId = null;
        for (int executionIdx = 0; executionIdx < executions.size(); executionIdx++) {
            if (!canSubmit()) {
                break;
            }
            CommandExecution commandExecution = executions.get(executionIdx);
            Command command = commandExecution.getCommand();
            SbatchCmd cmd;
            if (LOGGER.isDebugEnabled()) {
                LOGGER.debug("Executing {} command {} in working directory {}", command.getType(), command, workingDir);
            }

            // a master job to copy NonExecutionDependent and PreProcess needed input files
            if (command.getInputFiles().stream()
                    .anyMatch(inputFile -> !inputFile.dependsOnExecutionNumber() && inputFile.getPreProcessor() != null)) {
                if (!canSubmit()) {
                    break;
                }
                SbatchScriptGenerator sbatchScriptGenerator = new SbatchScriptGenerator(flagDir);
                List shell = sbatchScriptGenerator.unzipCommonInputFiles(command);
                String batchName = UNZIP_INPUTS_COMMAND_ID + "_" + executionIdx;
                copyShellToRemoteWorkingDir(shell, UNZIP_INPUTS_COMMAND_ID + "_" + executionIdx);
                cmd = buildSbatchCmd(UNZIP_INPUTS_COMMAND_ID, batchName, prejobId, parameters);
                prejobId = launchSbatch(cmd);
                if (firstJobId == null) {
                    firstJobId = prejobId;
                    LOGGER.debug("First jobId : {}", firstJobId);
                }
                ids.add(prejobId);
                jobs.add(new CompletableMonitoredJob(prejobId, false));
            }
            boolean isLast = executionIdx == executions.size() - 1;
            String batchName = prepareBatch(commandExecution, isLast);
            cmd = buildSbatchCmd(commandExecution.getExecutionCount(), command.getId(), batchName, prejobId, parameters);
            prejobId = launchSbatch(cmd);
            if (firstJobId == null) {
                firstJobId = prejobId;
                LOGGER.debug("First jobId : {}", firstJobId);
            }
            ids.add(prejobId);
            jobs.add(new CompletableMonitoredJob(prejobId, isLast));
            commandByJobId.put(prejobId, command);
        }

        aggregateMonitoredJobs();
    }

    @Override
    Collection getAllJobIds() {
        return ids;
    }

    @Override
    ExecutionError convertNonZeroSacctLine2Error(String line) {
        Matcher m = DIGITAL_PATTERN.matcher(line);
        m.find();
        long jobId = Long.parseLong(m.group());
        if (line.contains("_")) {
            m.find();
            int executionIdx = Integer.parseInt(m.group());
            m.find();
            int exitCode = Integer.parseInt(m.group());
            return new ExecutionError(commandByJobId.get(jobId), executionIdx, exitCode);
        } else {
            // not array job
            m.find();
            int exitCode = Integer.parseInt(m.group());
            return new ExecutionError(commandByJobId.get(jobId), 0, exitCode);
        }
    }

    private String prepareBatch(CommandExecution commandExecution, boolean isLastCommandExecution) throws IOException {
        Map executionVariables = CommandExecution.getExecutionVariables(environment.getVariables(), commandExecution);
        SbatchScriptGenerator scriptGenerator = new SbatchScriptGenerator(flagDir);
        List shell = scriptGenerator.parser(commandExecution, workingDir, executionVariables, isLastCommandExecution);
        String batchName = commandExecution.getCommand().getId();
        copyShellToRemoteWorkingDir(shell, batchName);
        return batchName;
    }

    private SbatchCmd buildSbatchCmd(String commandId, String batchName, Long prejobId, ComputationParameters parameters) {
        return buildSbatchCmd(1, commandId, batchName, prejobId, parameters);
    }

    private SbatchCmd buildSbatchCmd(int arrayCount, String commandId, String batchName, Long prejobId, ComputationParameters parameters) {
        SbatchCmdBuilder builder = new SbatchCmdBuilder()
                .jobName(commandId)
                .workDir(workingDir)
                // TODO check
                .nodes(1)
                .ntasks(1)
                .oversubscribe();
        if (prejobId != null) {
            builder.aftercorr(Collections.singletonList(prejobId));
        }
        builder.jobName(commandId)
                .script(batchName + SlurmConstants.BATCH_EXT);
        if (arrayCount > 1) {
            builder.array(arrayCount)
                    .output(batchName + "_%a" + SlurmConstants.OUT_EXT)
                    .error(batchName + "_%a" + SlurmConstants.ERR_EXT);
        } else {
            builder.output(batchName + "_0" + SlurmConstants.OUT_EXT)
                    .error(batchName + "_0" + SlurmConstants.ERR_EXT);
        }
        addParameters(builder, commandId);
        return builder.build();
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy