All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.powsybl.computation.slurm.SlurmTaskImpl Maven / Gradle / Ivy

/**
 * Copyright (c) 2019, RTE (http://www.rte-france.com)
 * This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
 * SPDX-License-Identifier: MPL-2.0
 */
package com.powsybl.computation.slurm;

import com.powsybl.commons.io.WorkingDirectory;
import com.powsybl.computation.*;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.util.*;
import java.util.stream.Collectors;
import java.util.stream.Stream;

import static com.powsybl.computation.slurm.SlurmConstants.BATCH_EXT;

/**
 * A {@link SlurmTask} which submits each command as an individual slurm job.
 *
 * 

* This class contains those job ids relationship in Slurm platform for one task. * It has a correspondent working directory and the CompletableFuture as return value. * * @author Yichen TANG {@literal } */ public class SlurmTaskImpl extends AbstractTask { private static final Logger LOGGER = LoggerFactory.getLogger(SlurmTaskImpl.class); private Long firstJobId; private List masters; private Map subTaskMap; private Long currentMaster; SlurmTaskImpl(SlurmComputationManager scm, WorkingDirectory directory, List executions, ComputationParameters parameters, ExecutionEnvironment environment) { super(scm, directory, executions, parameters, environment); } @Override public void submit() throws IOException { if (cannotSubmit()) { return; } commandByJobId = new HashMap<>(); for (int commandIdx = 0; commandIdx < executions.size(); commandIdx++) { submitCommand(commandIdx); } aggregateMonitoredJobs(); } private void submitCommand(int commandIdx) throws IOException { CommandExecution commandExecution = executions.get(commandIdx); Command command = commandExecution.getCommand(); SbatchCmd cmd; if (LOGGER.isDebugEnabled()) { LOGGER.debug("Executing {} command {} in working directory {}", command.getType(), command, workingDir); } // a master job to copy NonExecutionDependent and PreProcess needed input files if (command.getInputFiles().stream() .anyMatch(inputFile -> !inputFile.dependsOnExecutionNumber() && inputFile.getPreProcessor() != null)) { if (cannotSubmit()) { return; } SbatchScriptGenerator sbatchScriptGenerator = new SbatchScriptGenerator(flagDir); List shell = sbatchScriptGenerator.unzipCommonInputFiles(command); copyShellToRemoteWorkingDir(shell, UNZIP_INPUTS_COMMAND_ID + "_" + commandIdx); cmd = buildSbatchCmd(UNZIP_INPUTS_COMMAND_ID, commandIdx, getPreJobIds(), parameters); long jobId = launchSbatch(cmd); newCommonUnzipJob(jobId); } // no job array --> commandId_index.batch for (int executionIndex = 0; executionIndex < commandExecution.getExecutionCount(); executionIndex++) { if (cannotSubmit()) { return; } prepareBatch(command, executionIndex, commandExecution); cmd = buildSbatchCmd(command.getId(), executionIndex, getPreJobIds(), parameters); long jobId = launchSbatch(cmd); newBatch(jobId); } commandByJobId.put(currentMaster, command); // finish binding batches setCurrentMasterNull(); } private SbatchCmd buildSbatchCmd(String commandId, int executionIndex, List preJobIds, ComputationParameters baseParams) { // prepare sbatch cmd String baseFileName = workingDir.resolve(commandId).toAbsolutePath().toString(); SbatchCmdBuilder builder = new SbatchCmdBuilder() .script(baseFileName + "_" + executionIndex + BATCH_EXT) .jobName(commandId) .workDir(workingDir) .nodes(1) .ntasks(1) .oversubscribe() .output(baseFileName + "_" + executionIndex + SlurmConstants.OUT_EXT) .error(baseFileName + "_" + executionIndex + SlurmConstants.ERR_EXT); if (!preJobIds.isEmpty()) { builder.aftercorr(preJobIds); } addParameters(builder, commandId); return builder.build(); } private void prepareBatch(Command command, int executionIndex, CommandExecution commandExecution) throws IOException { // prepare sbatch script from command Map executionVariables = CommandExecution.getExecutionVariables(environment.getVariables(), commandExecution); SbatchScriptGenerator scriptGenerator = new SbatchScriptGenerator(flagDir); List shell = scriptGenerator.parser(command, executionIndex, workingDir, executionVariables); if (executionIndex == -1) { // array job not used yet copyShellToRemoteWorkingDir(shell, command.getId()); } else { copyShellToRemoteWorkingDir(shell, command.getId() + "_" + executionIndex); } } @Override ExecutionError convertScontrolResult2Error(ScontrolCmd.ScontrolResultBean scontrolResultBean) { long jobId = scontrolResultBean.getJobId(); int exitCode = scontrolResultBean.getExitCode(); long mid = jobId; int executionIdx = 0; if (!commandByJobId.containsKey(jobId)) { mid = getMasterId(jobId); executionIdx = (int) (jobId - mid); } // error message ??? ExecutionError error = new ExecutionError(commandByJobId.get(mid), executionIdx, exitCode); LOGGER.debug("{} error added ", error); return error; } /** * Returns all job ids in slurm for this task. It contains batch ids if array_job is not used. * For array jobs can be cancelled just by calling on master jobId * but currently array_job in slurm is not used, so jobs should be cancelled one by one. */ @Override Set getAllJobIds() { if (masters == null || masters.isEmpty()) { return Collections.emptySet(); } Set set = new HashSet<>(); set.addAll(masters); Set subIds = getMasters().stream().flatMap(mId -> subTaskMap.get(mId).getBatchStream()) .collect(Collectors.toSet()); set.addAll(subIds); return set; } Long getFirstJobId() { return firstJobId; } Long getMasterId(Long batchId) { for (Map.Entry entry : subTaskMap.entrySet()) { boolean contains = entry.getValue().batchIds.contains(batchId); if (contains) { return entry.getKey(); } } return null; } /** * If first job id is null, this masterId would take account as first job id. * @param masterId */ private void newMaster(Long masterId) { setFirstJobIfIsNull(masterId); currentMaster = masterId; } /** * A CommonUnzipJob is always a master job, but never be a current master. * @param masterId */ private void newCommonUnzipJob(Long masterId) { Objects.requireNonNull(masterId); jobs.add(new CompletableMonitoredJob(masterId, false)); LOGGER.debug("tracing common unzip job:{}", masterId); setFirstJobIfIsNull(masterId); currentMaster = null; } // TODO rename to dependent jobs List getPreJobIds() { if (masters == null) { return Collections.emptyList(); } int preMasterIdOffset = currentMaster == null ? 1 : 2; if (masters.size() == 1 && preMasterIdOffset == 2) { return Collections.emptyList(); } Long preMasterId = masters.get(masters.size() - preMasterIdOffset); List preJobIds = new ArrayList<>(); preJobIds.add(preMasterId); preJobIds.addAll(subTaskMap.get(preMasterId).batchIds); return preJobIds; } private void setFirstJobIfIsNull(Long masterId) { Objects.requireNonNull(masterId); if (firstJobId == null) { initCollections(masterId); subTaskMap = new HashMap<>(); } else { Long preMasterId = masters.get(masters.size() - 1); LOGGER.debug("DependencyId: {} -> {}", preMasterId, masterId); } newMasterInCollections(masterId); } private void initCollections(Long masterId) { firstJobId = masterId; masters = new ArrayList<>(); LOGGER.debug("First jobId : {}", firstJobId); } private void newMasterInCollections(Long masterId) { masters.add(masterId); subTaskMap.put(masterId, new SubTask(masterId)); } /** * The batchId could be a batchId if currentMaster is null. * @param batchId */ private void newBatch(Long batchId) { Objects.requireNonNull(batchId); jobs.add(new CompletableMonitoredJob(batchId)); LOGGER.debug("tracing job:{}", batchId); if (masters == null || currentMaster == null) { newMaster(batchId); } else { subTaskMap.get(currentMaster).add(batchId); } } private void setCurrentMasterNull() { // make current master to null, and wait to be set currentMaster = null; } // =============================== // ==== for unit test methods ==== // =============================== List getMasters() { return masters; } List getBatches(Long masterId) { return subTaskMap.get(masterId).batchIds; } int getCommandExecutionSize() { return executions.size(); } CommandExecution getCommandExecution(int i) { return executions.get(i); } private static final class SubTask { private Long masterId; private List batchIds; private SubTask(Long masterId) { this.masterId = Objects.requireNonNull(masterId); batchIds = new ArrayList<>(); } private boolean add(Long batchId) { LOGGER.debug("batchIds: {} -> {}", masterId, batchId); return batchIds.add(batchId); } private Stream getBatchStream() { return batchIds.stream(); } @Override public String toString() { return "SubTask{" + "masterId=" + masterId + ", batchIds=" + batchIds + '}'; } } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy