org.apache.sysml.runtime.matrix.data.MultipleOutputCommitter Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of systemml Show documentation
Show all versions of systemml Show documentation
Declarative Machine Learning
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.sysml.runtime.matrix.data;
import java.io.IOException;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapred.FileOutputCommitter;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.JobContext;
import org.apache.hadoop.mapred.TaskAttemptContext;
import org.apache.hadoop.mapred.TaskAttemptID;
import org.apache.sysml.runtime.matrix.mapred.MRJobConfiguration;
public class MultipleOutputCommitter extends FileOutputCommitter
{
// maintain the map of matrix index to its destination output dir
// private HashMap outputmap=new HashMap();
private String[] outputs;
@Override
public void setupJob(JobContext context)
throws IOException
{
super.setupJob(context);
// get output file directories and create directories
JobConf conf = context.getJobConf();
String[] loutputs = MRJobConfiguration.getOutputs(conf);
for (String dir : loutputs) {
Path path = new Path(dir);
FileSystem fs = path.getFileSystem(conf);
if( !fs.mkdirs(path) )
LOG.error("Mkdirs failed to create " + path.toString());
}
}
@Override
public void cleanupJob(JobContext context)
throws IOException
{
JobConf conf = context.getJobConf();
// do the clean up of temporary directory
Path outputPath = FileOutputFormat.getOutputPath(conf);
if (outputPath != null) {
FileSystem fs = outputPath.getFileSystem(conf);
context.getProgressible().progress();
if( fs.exists(outputPath) )
fs.delete(outputPath, true);
}
}
@Override
public void commitTask(TaskAttemptContext context)
throws IOException
{
JobConf conf = context.getJobConf();
TaskAttemptID attemptId = context.getTaskAttemptID();
// get the mapping between index to output filename
outputs = MRJobConfiguration.getOutputs(conf);
// get temp task output path (compatible with hadoop1 and hadoop2)
Path taskOutPath = FileOutputFormat.getWorkOutputPath(conf);
FileSystem fs = taskOutPath.getFileSystem(conf);
if( !fs.exists(taskOutPath) )
throw new IOException("Task output path "+ taskOutPath.toString() + "does not exist.");
// move the task outputs to their final places
context.getProgressible().progress();
moveFinalTaskOutputs(context, fs, taskOutPath);
// delete the temporary task-specific output directory
if( !fs.delete(taskOutPath, true) )
LOG.debug("Failed to delete the temporary output directory of task: " + attemptId + " - " + taskOutPath);
}
private void moveFinalTaskOutputs(TaskAttemptContext context, FileSystem fs, Path taskOutput)
throws IOException
{
context.getProgressible().progress();
if( fs.getFileStatus(taskOutput).isDirectory() ) {
FileStatus[] files = fs.listStatus(taskOutput);
if (files != null)
for (FileStatus file : files) //for all files
if( !file.isDirectory() ) //skip directories
moveFileToDestination(context, fs, file.getPath());
}
}
private void moveFileToDestination(TaskAttemptContext context, FileSystem fs, Path file)
throws IOException
{
TaskAttemptID attemptId = context.getTaskAttemptID();
// get output index and final destination
String name = file.getName(); //e.g., 0-r-00000
int index = Integer.parseInt(name.substring(0, name.indexOf("-")));
Path dest = new Path(outputs[index], name); //e.g., outX/0-r-00000
// move file from 'file' to 'finalPath'
if( !fs.rename(file, dest) ) {
if (!fs.delete(dest, true))
throw new IOException("Failed to delete earlier output " + dest + " for rename of " + file + " in task " + attemptId);
if (!fs.rename(file, dest))
throw new IOException("Failed to save output " + dest + " for rename of " + file + " in task: " + attemptId);
}
}
}