![JAR search and dependency download from the Maven repository](/logo.png)
com.powsybl.computation.slurm.JobArraySlurmTask Maven / Gradle / Ivy
/**
* Copyright (c) 2020, RTE (http://www.rte-france.com)
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*/
package com.powsybl.computation.slurm;
import com.powsybl.commons.io.WorkingDirectory;
import com.powsybl.computation.*;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.util.*;
import java.util.regex.Matcher;
/**
*
* A {@link SlurmTask} which submits commands as job arrays when possible,
* in particular for commands with execution count > 1.
*
* @author Yichen TANG
*/
class JobArraySlurmTask extends AbstractTask {
private static final Logger LOGGER = LoggerFactory.getLogger(JobArraySlurmTask.class);
private Long firstJobId = null;
private List ids = new ArrayList<>();
JobArraySlurmTask(SlurmComputationManager scm, WorkingDirectory directory,
List executions, ComputationParameters parameters, ExecutionEnvironment environment) {
super(scm, directory, executions, parameters, environment);
}
@Override
public void submit() throws IOException {
commandByJobId = new HashMap<>();
Long prejobId = null;
for (int executionIdx = 0; executionIdx < executions.size(); executionIdx++) {
if (!canSubmit()) {
break;
}
CommandExecution commandExecution = executions.get(executionIdx);
Command command = commandExecution.getCommand();
SbatchCmd cmd;
if (LOGGER.isDebugEnabled()) {
LOGGER.debug("Executing {} command {} in working directory {}", command.getType(), command, workingDir);
}
// a master job to copy NonExecutionDependent and PreProcess needed input files
if (command.getInputFiles().stream()
.anyMatch(inputFile -> !inputFile.dependsOnExecutionNumber() && inputFile.getPreProcessor() != null)) {
if (!canSubmit()) {
break;
}
SbatchScriptGenerator sbatchScriptGenerator = new SbatchScriptGenerator(flagDir);
List shell = sbatchScriptGenerator.unzipCommonInputFiles(command);
String batchName = UNZIP_INPUTS_COMMAND_ID + "_" + executionIdx;
copyShellToRemoteWorkingDir(shell, UNZIP_INPUTS_COMMAND_ID + "_" + executionIdx);
cmd = buildSbatchCmd(UNZIP_INPUTS_COMMAND_ID, batchName, prejobId, parameters);
prejobId = launchSbatch(cmd);
if (firstJobId == null) {
firstJobId = prejobId;
LOGGER.debug("First jobId : {}", firstJobId);
}
ids.add(prejobId);
jobs.add(new CompletableMonitoredJob(prejobId, false));
}
boolean isLast = executionIdx == executions.size() - 1;
String batchName = prepareBatch(commandExecution, isLast);
cmd = buildSbatchCmd(commandExecution.getExecutionCount(), command.getId(), batchName, prejobId, parameters);
prejobId = launchSbatch(cmd);
if (firstJobId == null) {
firstJobId = prejobId;
LOGGER.debug("First jobId : {}", firstJobId);
}
ids.add(prejobId);
jobs.add(new CompletableMonitoredJob(prejobId, isLast));
commandByJobId.put(prejobId, command);
}
aggregateMonitoredJobs();
}
@Override
Collection getAllJobIds() {
return ids;
}
@Override
ExecutionError convertNonZeroSacctLine2Error(String line) {
Matcher m = DIGITAL_PATTERN.matcher(line);
m.find();
long jobId = Long.parseLong(m.group());
if (line.contains("_")) {
m.find();
int executionIdx = Integer.parseInt(m.group());
m.find();
int exitCode = Integer.parseInt(m.group());
return new ExecutionError(commandByJobId.get(jobId), executionIdx, exitCode);
} else {
// not array job
m.find();
int exitCode = Integer.parseInt(m.group());
return new ExecutionError(commandByJobId.get(jobId), 0, exitCode);
}
}
private String prepareBatch(CommandExecution commandExecution, boolean isLastCommandExecution) throws IOException {
Map executionVariables = CommandExecution.getExecutionVariables(environment.getVariables(), commandExecution);
SbatchScriptGenerator scriptGenerator = new SbatchScriptGenerator(flagDir);
List shell = scriptGenerator.parser(commandExecution, workingDir, executionVariables, isLastCommandExecution);
String batchName = commandExecution.getCommand().getId();
copyShellToRemoteWorkingDir(shell, batchName);
return batchName;
}
private SbatchCmd buildSbatchCmd(String commandId, String batchName, Long prejobId, ComputationParameters parameters) {
return buildSbatchCmd(1, commandId, batchName, prejobId, parameters);
}
private SbatchCmd buildSbatchCmd(int arrayCount, String commandId, String batchName, Long prejobId, ComputationParameters parameters) {
SbatchCmdBuilder builder = new SbatchCmdBuilder()
.jobName(commandId)
.workDir(workingDir)
// TODO check
.nodes(1)
.ntasks(1)
.oversubscribe();
if (prejobId != null) {
builder.aftercorr(Collections.singletonList(prejobId));
}
builder.jobName(commandId)
.script(batchName + SlurmConstants.BATCH_EXT);
if (arrayCount > 1) {
builder.array(arrayCount)
.output(batchName + "_%a" + SlurmConstants.OUT_EXT)
.error(batchName + "_%a" + SlurmConstants.ERR_EXT);
} else {
builder.output(batchName + "_0" + SlurmConstants.OUT_EXT)
.error(batchName + "_0" + SlurmConstants.ERR_EXT);
}
addParameters(builder, commandId);
return builder.build();
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy