All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.sysml.runtime.matrix.CleanupMR Maven / Gradle / Ivy

There is a newer version: 1.2.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 * 
 *   http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */


package org.apache.sysml.runtime.matrix;

import java.io.BufferedWriter;
import java.io.IOException;
import java.io.OutputStreamWriter;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.RunningJob;
import org.apache.hadoop.mapred.lib.NLineInputFormat;
import org.apache.hadoop.mapred.lib.NullOutputFormat;
import org.apache.sysml.conf.DMLConfig;
import org.apache.sysml.runtime.DMLRuntimeException;
import org.apache.sysml.runtime.controlprogram.parfor.stat.InfrastructureAnalyzer;
import org.apache.sysml.runtime.io.IOUtilFunctions;
import org.apache.sysml.runtime.matrix.mapred.MRConfigurationNames;
import org.apache.sysml.runtime.matrix.mapred.MRJobConfiguration;
import org.apache.sysml.runtime.util.LocalFileUtils;


public class CleanupMR 
{
	private static final Log LOG = LogFactory.getLog(CleanupMR.class.getName());
	
	private CleanupMR() {
		//prevent instantiation via private constructor
	}
	
	public static boolean runJob( DMLConfig conf ) 
		throws Exception
	{
		boolean ret = false;
		
		try
		{
			JobConf job;
			job = new JobConf(CleanupMR.class);
			job.setJobName("Cleanup-MR");
			
			//set up SystemML local tmp dir
			String dir = conf.getTextValue(DMLConfig.LOCAL_TMP_DIR);
			MRJobConfiguration.setSystemMLLocalTmpDir(job, dir); 
			
			//set mappers, reducers 
			int numNodes = InfrastructureAnalyzer.getRemoteParallelNodes();
			job.setMapperClass(CleanupMapper.class); //map-only
			job.setNumMapTasks(numNodes); //numMappers
			job.setNumReduceTasks( 0 );			
			
			//set input/output format, input path
			String inFileName = conf.getTextValue(DMLConfig.SCRATCH_SPACE)+"/cleanup_tasks";
			job.setInputFormat(NLineInputFormat.class);
		    job.setOutputFormat(NullOutputFormat.class);
		    
			Path path = new Path( inFileName );
		    FileInputFormat.setInputPaths(job, path);
		    writeCleanupTasksToFile(path, numNodes);
		    
			//disable automatic tasks timeouts and speculative task exec
			job.setInt(MRConfigurationNames.MR_TASK_TIMEOUT, 0);
			job.setMapSpeculativeExecution(false);
			
			/////
			// execute the MR job			
			RunningJob runjob = JobClient.runJob(job);
			
			ret = runjob.isSuccessful();
		}
		catch(Exception ex)
		{
			//don't raise an exception, just gracefully an error message.
			LOG.error("Failed to run cleanup MR job. ",ex);
		}
		
		return ret;
	}

	private static void writeCleanupTasksToFile(Path path, int numTasks)
		throws DMLRuntimeException, IOException
	{
		FileSystem fs = IOUtilFunctions.getFileSystem(path);
		try( BufferedWriter br = new BufferedWriter(new OutputStreamWriter(fs.create(path,true))) ) {
			for( int i=1; i<=numTasks; i++ )
				br.write( String.valueOf("CLEANUP TASK "+i)+"\n" );
		}
		catch(Exception ex) {
			throw new DMLRuntimeException("Error writing cleanup tasks to taskfile "+path.toString(), ex);
		}
	}
	
	public static class CleanupMapper implements Mapper
	{
		private static final Log LOG = LogFactory.getLog(CleanupMapper.class.getName());
		
		//file name local tmp dir  
		protected String _dir = null; 
		
		public CleanupMapper( ) 
		{
			
		}
		
		@Override
		public void map(LongWritable key, Text value, OutputCollector out, Reporter reporter) 
			throws IOException
		{
			try 
			{
				String task = value.toString();
				LOG.info("Running cleanup task: "+task+" ("+_dir+") ... ");
				
				int count = LocalFileUtils.cleanupRcWorkingDirectory(_dir);
				LOG.info("Done - deleted "+count+" files.");
			}
			catch(Exception ex)
			{
				//throw IO exception to adhere to API specification
				throw new IOException("Failed to execute cleanup task.",ex);
			}
		}
	
		@Override
		public void configure(JobConf job)
		{
			_dir = MRJobConfiguration.getSystemMLLocalTmpDir(job);
		}
		
		@Override
		public void close()
		{
			
		}
	}
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy