All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.sysml.runtime.controlprogram.parfor.ResultMergeLocalFile Maven / Gradle / Ivy

There is a newer version: 1.2.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 * 
 *   http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.sysml.runtime.controlprogram.parfor;

import java.io.BufferedWriter;
import java.io.File;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.Map.Entry;

import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.InputSplit;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.RecordReader;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.TextInputFormat;

import org.apache.sysml.conf.ConfigurationManager;
import org.apache.sysml.parser.Expression.DataType;
import org.apache.sysml.parser.Expression.ValueType;
import org.apache.sysml.runtime.DMLRuntimeException;
import org.apache.sysml.runtime.controlprogram.caching.CacheException;
import org.apache.sysml.runtime.controlprogram.caching.MatrixObject;
import org.apache.sysml.runtime.controlprogram.parfor.util.Cell;
import org.apache.sysml.runtime.controlprogram.parfor.util.IDSequence;
import org.apache.sysml.runtime.controlprogram.parfor.util.StagingFileUtils;
import org.apache.sysml.runtime.io.MatrixReader;
import org.apache.sysml.runtime.matrix.MatrixCharacteristics;
import org.apache.sysml.runtime.matrix.MatrixFormatMetaData;
import org.apache.sysml.runtime.matrix.data.IJV;
import org.apache.sysml.runtime.matrix.data.InputInfo;
import org.apache.sysml.runtime.matrix.data.MatrixBlock;
import org.apache.sysml.runtime.matrix.data.MatrixCell;
import org.apache.sysml.runtime.matrix.data.MatrixIndexes;
import org.apache.sysml.runtime.matrix.data.OutputInfo;
import org.apache.sysml.runtime.matrix.data.SparseRowsIterator;
import org.apache.sysml.runtime.util.DataConverter;
import org.apache.sysml.runtime.util.FastStringTokenizer;
import org.apache.sysml.runtime.util.LocalFileUtils;
import org.apache.sysml.runtime.util.MapReduceTool;

/**
 * 
 * TODO potential extension: parallel merge (create individual staging files concurrently)
 *     
 *      NOTE: file merge typically used due to memory constraints - parallel merge would increase the memory
 *      consumption again.
 */
public class ResultMergeLocalFile extends ResultMerge
{
	
	//NOTE: if we allow simple copies, this might result in a scattered file and many MR tasks for subsequent jobs
	public static final boolean ALLOW_COPY_CELLFILES = false;	
	
	//internal comparison matrix
	private IDSequence _seq = null;
	
	public ResultMergeLocalFile( MatrixObject out, MatrixObject[] in, String outputFilename )
	{
		super( out, in, outputFilename );
		
		_seq = new IDSequence();
	}


	@Override
	public MatrixObject executeSerialMerge() 
		throws DMLRuntimeException 
	{
		MatrixObject moNew = null; //always create new matrix object (required for nested parallelism)

		//Timing time = null;
		LOG.trace("ResultMerge (local, file): Execute serial merge for output "+_output.getVarName()+" (fname="+_output.getFileName()+")");
		//	time = new Timing();
		//	time.start();

		
		try
		{
			
			
			//collect all relevant inputs
			ArrayList inMO = new ArrayList();
			for( MatrixObject in : _inputs )
			{
				//check for empty inputs (no iterations executed)
				if( in !=null && in != _output ) 
				{
					//ensure that input file resides on disk
					in.exportData();
					
					//add to merge list
					inMO.add( in );
				}
			}

			if( !inMO.isEmpty() )
			{
				//ensure that outputfile (for comparison) resides on disk
				_output.exportData();
				
				//actual merge
				merge( _outputFName, _output, inMO );
				
				//create new output matrix (e.g., to prevent potential export<->read file access conflict
				moNew = createNewMatrixObject( _output, inMO );	
			}
			else
			{
				moNew = _output; //return old matrix, to prevent copy
			}
		}
		catch(Exception ex)
		{
			throw new DMLRuntimeException(ex);
		}

		//LOG.trace("ResultMerge (local, file): Executed serial merge for output "+_output.getVarName()+" (fname="+_output.getFileName()+") in "+time.stop()+"ms");
		
		return moNew;
	}
	
	@Override
	public MatrixObject executeParallelMerge(int par) 
		throws DMLRuntimeException 
	{
		//graceful degradation to serial merge
		return executeSerialMerge();
	}

	/**
	 * 
	 * @param output
	 * @param inMO
	 * @return
	 * @throws DMLRuntimeException
	 */
	private MatrixObject createNewMatrixObject(MatrixObject output, ArrayList inMO ) 
		throws DMLRuntimeException
	{
		String varName = _output.getVarName();
		ValueType vt = _output.getValueType();
		MatrixFormatMetaData metadata = (MatrixFormatMetaData) _output.getMetaData();
		
		MatrixObject moNew = new MatrixObject( vt, _outputFName );
		moNew.setVarName( varName.contains(NAME_SUFFIX) ? varName : varName+NAME_SUFFIX );
		moNew.setDataType( DataType.MATRIX );
		
		//create deep copy of metadata obj
		MatrixCharacteristics mcOld = metadata.getMatrixCharacteristics();
		OutputInfo oiOld = metadata.getOutputInfo();
		InputInfo iiOld = metadata.getInputInfo();
		MatrixCharacteristics mc = new MatrixCharacteristics(mcOld.getRows(),mcOld.getCols(),
				                                             mcOld.getRowsPerBlock(),mcOld.getColsPerBlock());
		mc.setNonZeros( computeNonZeros(output, inMO) );
		MatrixFormatMetaData meta = new MatrixFormatMetaData(mc,oiOld,iiOld);
		moNew.setMetaData( meta );
		
		return moNew;
	}
	
	/**
	 * 
	 * @param fnameNew
	 * @param outMo
	 * @param inMO
	 * @throws DMLRuntimeException
	 */
	private void merge( String fnameNew, MatrixObject outMo, ArrayList inMO ) 
		throws DMLRuntimeException
	{
		OutputInfo oi = ((MatrixFormatMetaData)outMo.getMetaData()).getOutputInfo();
		boolean withCompare = ( outMo.getNnz() != 0 ); //if nnz exist or unknown (-1)
		
		if( oi == OutputInfo.TextCellOutputInfo )
		{
			if(withCompare)
				mergeTextCellWithComp(fnameNew, outMo, inMO);
			else
				mergeTextCellWithoutComp( fnameNew, outMo, inMO );
		}
		else if( oi == OutputInfo.BinaryCellOutputInfo )
		{
			if(withCompare)
				mergeBinaryCellWithComp(fnameNew, outMo, inMO);
			else
				mergeBinaryCellWithoutComp( fnameNew, outMo, inMO );
		}
		else if( oi == OutputInfo.BinaryBlockOutputInfo )
		{
			if(withCompare)
				mergeBinaryBlockWithComp( fnameNew, outMo, inMO );
			else
				mergeBinaryBlockWithoutComp( fnameNew, outMo, inMO );
		}
	}
	
	/**
	 * 
	 * @param fnameNew
	 * @param outMo
	 * @param inMO
	 * @throws DMLRuntimeException
	 */
	private void mergeTextCellWithoutComp( String fnameNew, MatrixObject outMo, ArrayList inMO ) 
		throws DMLRuntimeException
	{
		try
		{
			//delete target file if already exists
			MapReduceTool.deleteFileIfExistOnHDFS(fnameNew);
			
			if( ALLOW_COPY_CELLFILES )
			{
				copyAllFiles(fnameNew, inMO);
				return; //we're done
			}
			
			//actual merge
			JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
			FileSystem fs = FileSystem.get(job);
			Path path = new Path( fnameNew );
			BufferedWriter out = new BufferedWriter(new OutputStreamWriter(fs.create(path,true)));		
			
			String valueStr = null;
			
			try
			{
				for( MatrixObject in : inMO ) //read/write all inputs
				{
					LOG.trace("ResultMerge (local, file): Merge input "+in.getVarName()+" (fname="+in.getFileName()+") via stream merge");
					
					JobConf tmpJob = new JobConf(ConfigurationManager.getCachedJobConf());
					Path tmpPath = new Path(in.getFileName());
					FileInputFormat.addInputPath(tmpJob, tmpPath);
					TextInputFormat informat = new TextInputFormat();
					informat.configure(tmpJob);
					InputSplit[] splits = informat.getSplits(tmpJob, 1);
					
					LongWritable key = new LongWritable();
					Text value = new Text();
		
					for(InputSplit split: splits)
					{
						RecordReader reader = informat.getRecordReader(split, tmpJob, Reporter.NULL);
						try
						{
							while(reader.next(key, value))
							{
								valueStr = value.toString().trim();	
								out.write( valueStr+"\n" );
							}
						}
						finally
						{
							if( reader != null )
								reader.close();
						}
					}
				}
			}
			finally
			{
				if( out != null )
					out.close();
			}
		}
		catch(Exception ex)
		{
			throw new DMLRuntimeException("Unable to merge text cell results.", ex);
		}
	}
	
	/**
	 * 
	 * @param fnameNew
	 * @param outMo
	 * @param inMO
	 * @throws DMLRuntimeException
	 */
	private void mergeTextCellWithComp( String fnameNew, MatrixObject outMo, ArrayList inMO ) 
		throws DMLRuntimeException
	{
		String fnameStaging = LocalFileUtils.getUniqueWorkingDir(LocalFileUtils.CATEGORY_RESULTMERGE);
		String fnameStagingCompare = LocalFileUtils.getUniqueWorkingDir(LocalFileUtils.CATEGORY_RESULTMERGE);
		
		try
		{
			//delete target file if already exists
			MapReduceTool.deleteFileIfExistOnHDFS(fnameNew);
			
			//Step 0) write compare blocks to staging area (if necessary)
			LOG.trace("ResultMerge (local, file): Create merge compare matrix for output "+outMo.getVarName()+" (fname="+outMo.getFileName()+")");
			createTextCellStagingFile(fnameStagingCompare, outMo, 0);
			
			//Step 1) read and write blocks to staging area
			for( MatrixObject in : inMO )
			{
				LOG.trace("ResultMerge (local, file): Merge input "+in.getVarName()+" (fname="+in.getFileName()+")");
				
				long ID = _seq.getNextID();
				createTextCellStagingFile( fnameStaging, in, ID );
			}
	
			//Step 2) read blocks, consolidate, and write to HDFS
			createTextCellResultFile(fnameStaging, fnameStagingCompare, fnameNew, (MatrixFormatMetaData)outMo.getMetaData(), true);
		}	
		catch(Exception ex)
		{
			throw new DMLRuntimeException("Unable to merge text cell results.", ex);
		}
		
		LocalFileUtils.cleanupWorkingDirectory(fnameStaging);
		LocalFileUtils.cleanupWorkingDirectory(fnameStagingCompare);
	}
	
	/**
	 * 
	 * @param fnameNew
	 * @param outMo
	 * @param inMO
	 * @throws DMLRuntimeException
	 */
	@SuppressWarnings("deprecation")
	private void mergeBinaryCellWithoutComp( String fnameNew, MatrixObject outMo, ArrayList inMO ) 
		throws DMLRuntimeException
	{
		try
		{	
			//delete target file if already exists
			MapReduceTool.deleteFileIfExistOnHDFS(fnameNew);
			
			if( ALLOW_COPY_CELLFILES )
			{
				copyAllFiles(fnameNew, inMO);
				return; //we're done
			}
			
			//actual merge
			JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
			FileSystem fs = FileSystem.get(job);
			Path path = new Path( fnameNew );					
			SequenceFile.Writer out = new SequenceFile.Writer(fs, job, path, MatrixIndexes.class, MatrixCell.class); //beware ca 50ms
			
			MatrixIndexes key = new MatrixIndexes();
			MatrixCell value = new MatrixCell();
			
			try
			{
				for( MatrixObject in : inMO ) //read/write all inputs
				{
					LOG.trace("ResultMerge (local, file): Merge input "+in.getVarName()+" (fname="+in.getFileName()+") via stream merge");
					
					JobConf tmpJob = new JobConf(ConfigurationManager.getCachedJobConf());
					Path tmpPath = new Path(in.getFileName());
					
					for(Path lpath : MatrixReader.getSequenceFilePaths(fs, tmpPath) )
					{
						SequenceFile.Reader reader = new SequenceFile.Reader(fs,lpath,tmpJob);
						try
						{
							while(reader.next(key, value))
							{
								out.append(key, value);
							}
						}
						finally
						{
							if( reader != null )
								reader.close();
						}
					}					
				}	
			}
			finally
			{
				if( out != null )
					out.close();
			}
		}
		catch(Exception ex)
		{
			throw new DMLRuntimeException("Unable to merge binary cell results.", ex);
		}	
	}
	
	/**
	 * 
	 * @param fnameNew
	 * @param outMo
	 * @param inMO
	 * @throws DMLRuntimeException
	 */
	private void mergeBinaryCellWithComp( String fnameNew, MatrixObject outMo, ArrayList inMO ) 
		throws DMLRuntimeException
	{
		String fnameStaging = LocalFileUtils.getUniqueWorkingDir(LocalFileUtils.CATEGORY_RESULTMERGE);
		String fnameStagingCompare = LocalFileUtils.getUniqueWorkingDir(LocalFileUtils.CATEGORY_RESULTMERGE);
		
		try
		{
			//delete target file if already exists
			MapReduceTool.deleteFileIfExistOnHDFS(fnameNew);
			
			//Step 0) write compare blocks to staging area (if necessary)
			LOG.trace("ResultMerge (local, file): Create merge compare matrix for output "+outMo.getVarName()+" (fname="+outMo.getFileName()+")");
			createBinaryCellStagingFile(fnameStagingCompare, outMo, 0);
			
			//Step 1) read and write blocks to staging area
			for( MatrixObject in : inMO )
			{
				LOG.trace("ResultMerge (local, file): Merge input "+in.getVarName()+" (fname="+in.getFileName()+")");
				
				long ID = _seq.getNextID();
				createBinaryCellStagingFile( fnameStaging, in, ID );
			}
	
			//Step 2) read blocks, consolidate, and write to HDFS
			createBinaryCellResultFile(fnameStaging, fnameStagingCompare, fnameNew, (MatrixFormatMetaData)outMo.getMetaData(), true);
		}	
		catch(Exception ex)
		{
			throw new DMLRuntimeException("Unable to merge binary cell results.", ex);
		}
		
		LocalFileUtils.cleanupWorkingDirectory(fnameStaging);
		LocalFileUtils.cleanupWorkingDirectory(fnameStagingCompare);
	}
	
	/**
	 * 
	 * @param fnameNew
	 * @param outMo
	 * @param inMO
	 * @throws DMLRuntimeException
	 */
	private void mergeBinaryBlockWithoutComp( String fnameNew, MatrixObject outMo, ArrayList inMO ) 
		throws DMLRuntimeException
	{
		String fnameStaging = LocalFileUtils.getUniqueWorkingDir(LocalFileUtils.CATEGORY_RESULTMERGE);
		
		try
		{
			//delete target file if already exists
			MapReduceTool.deleteFileIfExistOnHDFS(fnameNew);
			
			//Step 1) read and write blocks to staging area
			for( MatrixObject in : inMO )
			{
				LOG.trace("ResultMerge (local, file): Merge input "+in.getVarName()+" (fname="+in.getFileName()+")");				
				
				createBinaryBlockStagingFile( fnameStaging, in );
			}
	
			//Step 2) read blocks, consolidate, and write to HDFS
			createBinaryBlockResultFile(fnameStaging, null, fnameNew, (MatrixFormatMetaData)outMo.getMetaData(), false);
		}	
		catch(Exception ex)
		{
			throw new DMLRuntimeException("Unable to merge binary block results.", ex);
		}	
		
		LocalFileUtils.cleanupWorkingDirectory(fnameStaging);
	}
	
	/**
	 * 
	 * @param fnameNew
	 * @param outMo
	 * @param inMO
	 * @throws DMLRuntimeException
	 */
	private void mergeBinaryBlockWithComp( String fnameNew, MatrixObject outMo, ArrayList inMO ) 
		throws DMLRuntimeException
	{
		String fnameStaging = LocalFileUtils.getUniqueWorkingDir(LocalFileUtils.CATEGORY_RESULTMERGE);
		String fnameStagingCompare = LocalFileUtils.getUniqueWorkingDir(LocalFileUtils.CATEGORY_RESULTMERGE);
		
		try
		{
			//delete target file if already exists
			MapReduceTool.deleteFileIfExistOnHDFS(fnameNew);
			
			//Step 0) write compare blocks to staging area (if necessary)
			LOG.trace("ResultMerge (local, file): Create merge compare matrix for output "+outMo.getVarName()+" (fname="+outMo.getFileName()+")");			
			
			createBinaryBlockStagingFile(fnameStagingCompare, outMo);
			
			//Step 1) read and write blocks to staging area
			for( MatrixObject in : inMO )
			{
				LOG.trace("ResultMerge (local, file): Merge input "+in.getVarName()+" (fname="+in.getFileName()+")");		
				createBinaryBlockStagingFile( fnameStaging, in );
			}
	
			//Step 2) read blocks, consolidate, and write to HDFS
			createBinaryBlockResultFile(fnameStaging, fnameStagingCompare, fnameNew, (MatrixFormatMetaData)outMo.getMetaData(), true);
		}	
		catch(Exception ex)
		{
			throw new DMLRuntimeException("Unable to merge binary block results.", ex);
		}	
		
		LocalFileUtils.cleanupWorkingDirectory(fnameStaging);
		LocalFileUtils.cleanupWorkingDirectory(fnameStagingCompare);
	}
	
	/**
	 * 
	 * @param fnameStaging
	 * @param mo
	 * @throws IOException
	 */
	@SuppressWarnings("deprecation")
	private void createBinaryBlockStagingFile( String fnameStaging, MatrixObject mo ) 
		throws IOException
	{		
		MatrixIndexes key = new MatrixIndexes(); 
		MatrixBlock value = new MatrixBlock();
		
		JobConf tmpJob = new JobConf(ConfigurationManager.getCachedJobConf());
		FileSystem fs = FileSystem.get(tmpJob);
		Path tmpPath = new Path(mo.getFileName());
		
		for(Path lpath : MatrixReader.getSequenceFilePaths(fs, tmpPath))
		{
			SequenceFile.Reader reader = new SequenceFile.Reader(fs,lpath,tmpJob);
			try
			{
				while(reader.next(key, value)) //for each block
				{							
					String lname = key.getRowIndex()+"_"+key.getColumnIndex();
					String dir = fnameStaging+"/"+lname;
					if( value.getNonZeros()>0 ) //write only non-empty blocks
					{
						LocalFileUtils.checkAndCreateStagingDir( dir );
						LocalFileUtils.writeMatrixBlockToLocal(dir+"/"+_seq.getNextID(), value);
					}
				}
			}
			finally
			{
				if( reader != null )
					reader.close();
			}
		}
	}
	
	/**
	 * 
	 * @param fnameStaging
	 * @param mo
	 * @param ID
	 * @throws IOException
	 * @throws DMLRuntimeException
	 */
	
	private void createTextCellStagingFile( String fnameStaging, MatrixObject mo, long ID ) 
		throws IOException, DMLRuntimeException
	{		
		JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
		Path path = new Path(mo.getFileName());
		FileInputFormat.addInputPath(job, path);
		TextInputFormat informat = new TextInputFormat();
		informat.configure(job);
		InputSplit[] splits = informat.getSplits(job, 1);
		
		LinkedList buffer = new LinkedList();
		LongWritable key = new LongWritable();
		Text value = new Text();

		MatrixCharacteristics mc = mo.getMatrixCharacteristics();
		int brlen = mc.getRowsPerBlock(); 
		int bclen = mc.getColsPerBlock();
		//long row = -1, col = -1; //FIXME needs reconsideration whenever textcell is used actively
		//NOTE MB: Originally, we used long row, col but this led reproducibly to JIT compilation
		// errors during runtime; experienced under WINDOWS, Intel x86-64, IBM JDK 64bit/32bit.
		// It works fine with int row, col but we require long for larger matrices.
		// Since, textcell is never used for result merge (hybrid/hadoop: binaryblock, singlenode:binarycell)
		// we just propose the to exclude it with -Xjit:exclude={package.method*}(count=0,optLevel=0)
		
		FastStringTokenizer st = new FastStringTokenizer(' ');
		
		for(InputSplit split : splits)
		{
			RecordReader reader = informat.getRecordReader(split, job, Reporter.NULL);
			try
			{
				while(reader.next(key, value))
				{
					st.reset( value.toString() ); //reset tokenizer
					long row = st.nextLong();
				    long col = st.nextLong();
					double lvalue = Double.parseDouble( st.nextToken() );
					
					Cell tmp = new Cell( row, col, lvalue ); 
					
					buffer.addLast( tmp );
					if( buffer.size() > StagingFileUtils.CELL_BUFFER_SIZE ) //periodic flush
					{
						appendCellBufferToStagingArea(fnameStaging, ID, buffer, brlen, bclen);
						buffer.clear();
					}
				}
				
				//final flush
				if( !buffer.isEmpty() )
				{
					appendCellBufferToStagingArea(fnameStaging, ID, buffer, brlen, bclen);
					buffer.clear();
				}
			}
			finally
			{
				if( reader != null )
					reader.close();
			}
		}
	}
	
	/**
	 * 
	 * @param fnameStaging
	 * @param mo
	 * @param ID
	 * @throws IOException
	 * @throws DMLRuntimeException
	 */
	@SuppressWarnings("deprecation")
	private void createBinaryCellStagingFile( String fnameStaging, MatrixObject mo, long ID ) 
		throws IOException, DMLRuntimeException
	{		
		JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
		Path path = new Path(mo.getFileName());
		FileSystem fs = FileSystem.get(job);
		
		LinkedList buffer = new LinkedList();
		MatrixIndexes key = new MatrixIndexes();
		MatrixCell value = new MatrixCell();
	
		MatrixCharacteristics mc = mo.getMatrixCharacteristics();
		int brlen = mc.getRowsPerBlock();
		int bclen = mc.getColsPerBlock();
		
		for(Path lpath: MatrixReader.getSequenceFilePaths(fs, path))
		{
			SequenceFile.Reader reader = new SequenceFile.Reader(fs,lpath,job);
			try
			{
				while(reader.next(key, value))
				{
					Cell tmp = new Cell( key.getRowIndex(), key.getColumnIndex(), value.getValue() ); 
	
					buffer.addLast( tmp );
					if( buffer.size() > StagingFileUtils.CELL_BUFFER_SIZE ) //periodic flush
					{
						appendCellBufferToStagingArea(fnameStaging, ID, buffer, brlen, bclen);
						buffer.clear();
					}
				}
				
				//final flush
				if( !buffer.isEmpty() )
				{
					appendCellBufferToStagingArea(fnameStaging, ID, buffer, brlen, bclen);
					buffer.clear();
				}
			}
			finally
			{
				if( reader != null )
					reader.close();
			}
		}
	}
	
	/**
	 * @param fnameStaging
	 * @param ID
	 * @param buffer
	 * @param brlen
	 * @param bclen
	 * @throws DMLRuntimeException
	 * @throws IOException
	 */
	private void appendCellBufferToStagingArea( String fnameStaging, long ID, LinkedList buffer, int brlen, int bclen ) 
		throws DMLRuntimeException, IOException
	{
		HashMap>> sortedBuffer = new HashMap>>();
		long brow, bcol, row_offset, col_offset;
		
		for( Cell c : buffer )
		{
			brow = (c.getRow()-1)/brlen + 1;
			bcol = (c.getCol()-1)/bclen + 1;
			row_offset = (brow-1)*brlen + 1;
			col_offset = (bcol-1)*bclen + 1;
			
			c.setRow( c.getRow() - row_offset);
			c.setCol(c.getCol() - col_offset);
			
			if( !sortedBuffer.containsKey(brow) )
				sortedBuffer.put(brow, new HashMap>());
			if( !sortedBuffer.get(brow).containsKey(bcol) )
				sortedBuffer.get(brow).put(bcol, new LinkedList());
			sortedBuffer.get(brow).get(bcol).addLast(c);
		}	
		
		//write lists of cells to local files
		for( Entry>> e : sortedBuffer.entrySet() )
		{
			brow = e.getKey();
			for( Entry> e2 : e.getValue().entrySet() )
			{
				bcol = e2.getKey();
				String lname = brow+"_"+bcol;
				String dir = fnameStaging+"/"+lname;
				LocalFileUtils.checkAndCreateStagingDir( dir );
				StagingFileUtils.writeCellListToLocal(dir+"/"+ID, e2.getValue());
			}
		}
	}	
	
	/**
	 * 
	 * @param fnameStaging
	 * @param fnameStagingCompare
	 * @param fnameNew
	 * @param metadata
	 * @param withCompare
	 * @throws IOException
	 * @throws DMLRuntimeException
	 */
	@SuppressWarnings("deprecation")
	private void createBinaryBlockResultFile( String fnameStaging, String fnameStagingCompare, String fnameNew, MatrixFormatMetaData metadata, boolean withCompare ) 
		throws IOException, DMLRuntimeException
	{
		JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
		FileSystem fs = FileSystem.get(job);
		Path path = new Path( fnameNew );	
		
		MatrixCharacteristics mc = metadata.getMatrixCharacteristics();
		long rlen = mc.getRows();
		long clen = mc.getCols();
		int brlen = mc.getRowsPerBlock();
		int bclen = mc.getColsPerBlock();
		
		SequenceFile.Writer writer = new SequenceFile.Writer(fs, job, path, MatrixIndexes.class, MatrixBlock.class); //beware ca 50ms
		try
		{
			MatrixIndexes indexes = new MatrixIndexes();
			for(long brow = 1; brow <= (long)Math.ceil(rlen/(double)brlen); brow++)
				for(long bcol = 1; bcol <= (long)Math.ceil(clen/(double)bclen); bcol++)
				{
					File dir = new File(fnameStaging+"/"+brow+"_"+bcol);
					File dir2 = new File(fnameStagingCompare+"/"+brow+"_"+bcol);
					MatrixBlock mb = null;
					
					if( dir.exists() )
					{
						if( withCompare && dir2.exists() ) //WITH COMPARE BLOCK
						{
							//copy only values that are different from the original
							String[] lnames2 = dir2.list();
							if( lnames2.length != 1 ) //there should be exactly 1 compare block
								throw new DMLRuntimeException("Unable to merge results because multiple compare blocks found.");
							mb = LocalFileUtils.readMatrixBlockFromLocal( dir2+"/"+lnames2[0] );
							boolean appendOnly = mb.isInSparseFormat();
							double[][] compare = DataConverter.convertToDoubleMatrix(mb);
							
							String[] lnames = dir.list();
							for( String lname : lnames )
							{
								MatrixBlock tmp = LocalFileUtils.readMatrixBlockFromLocal( dir+"/"+lname );
								mergeWithComp(mb, tmp, compare);
							}
							
							//sort sparse due to append-only
							if( appendOnly )
								mb.sortSparseRows();
							
							//change sparsity if required after 
							mb.examSparsity(); 
						}
						else //WITHOUT COMPARE BLOCK
						{
							//copy all non-zeros from all workers
							String[] lnames = dir.list();
							boolean appendOnly = false;
							for( String lname : lnames )
							{
								if( mb == null )
								{
									mb = LocalFileUtils.readMatrixBlockFromLocal( dir+"/"+lname );
									appendOnly = mb.isInSparseFormat();
								}
								else
								{
									MatrixBlock tmp = LocalFileUtils.readMatrixBlockFromLocal( dir+"/"+lname );
									mergeWithoutComp(mb, tmp, appendOnly);
								}
							}	
							
							//sort sparse due to append-only
							if( appendOnly )
								mb.sortSparseRows();
							
							//change sparsity if required after 
							mb.examSparsity(); 
						}
					}
					else
					{
						//NOTE: whenever runtime does not need all blocks anymore, this can be removed
						int maxRow = (int)(((brow-1)*brlen + brlen < rlen) ? brlen : rlen - (brow-1)*brlen);
						int maxCol = (int)(((bcol-1)*bclen + bclen < clen) ? bclen : clen - (bcol-1)*bclen);
				
						mb = new MatrixBlock(maxRow, maxCol, true);
					}	
					
					//mb.examSparsity(); //done on write anyway and mb not reused
					indexes.setIndexes(brow, bcol);
					writer.append(indexes, mb);
				}	
		}
		finally
		{
			if( writer != null )
				writer.close();
		}
	}
	
	/**
	 * 
	 * @param fnameStaging
	 * @param fnameStagingCompare
	 * @param fnameNew
	 * @param metadata
	 * @param withCompare
	 * @throws IOException
	 * @throws DMLRuntimeException
	 */
	private void createTextCellResultFile( String fnameStaging, String fnameStagingCompare, String fnameNew, MatrixFormatMetaData metadata, boolean withCompare ) 
		throws IOException, DMLRuntimeException
	{
		JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
		FileSystem fs = FileSystem.get(job);
		Path path = new Path( fnameNew );	
		
		MatrixCharacteristics mc = metadata.getMatrixCharacteristics();
		long rlen = mc.getRows();
		long clen = mc.getCols();
		int brlen = mc.getRowsPerBlock();
		int bclen = mc.getColsPerBlock();
				
		BufferedWriter out = new BufferedWriter(new OutputStreamWriter(fs.create(path,true)));		
		try
		{
			//for obj reuse and preventing repeated buffer re-allocations
			StringBuilder sb = new StringBuilder();
			
			boolean written=false;
			for(long brow = 1; brow <= (long)Math.ceil(rlen/(double)brlen); brow++)
				for(long bcol = 1; bcol <= (long)Math.ceil(clen/(double)bclen); bcol++)
				{
					File dir = new File(fnameStaging+"/"+brow+"_"+bcol);
					File dir2 = new File(fnameStagingCompare+"/"+brow+"_"+bcol);
					MatrixBlock mb = null;
					
					long row_offset = (brow-1)*brlen + 1;
					long col_offset = (bcol-1)*bclen + 1;
					
					
					if( dir.exists() )
					{
						if( withCompare && dir2.exists() ) //WITH COMPARE BLOCK
						{
							//copy only values that are different from the original
							String[] lnames2 = dir2.list();
							if( lnames2.length != 1 ) //there should be exactly 1 compare block
								throw new DMLRuntimeException("Unable to merge results because multiple compare blocks found.");
							mb = StagingFileUtils.readCellList2BlockFromLocal( dir2+"/"+lnames2[0], brlen, bclen );
							boolean appendOnly = mb.isInSparseFormat();
							double[][] compare = DataConverter.convertToDoubleMatrix(mb);
							
							String[] lnames = dir.list();
							for( String lname : lnames )
							{
								MatrixBlock tmp = StagingFileUtils.readCellList2BlockFromLocal(  dir+"/"+lname, brlen, bclen );
								mergeWithComp(mb, tmp, compare);
							}
							
							//sort sparse and exam sparsity due to append-only
							if( appendOnly )
								mb.sortSparseRows();
							
							//change sparsity if required after 
							mb.examSparsity(); 
						}
						else //WITHOUT COMPARE BLOCK
						{
							//copy all non-zeros from all workers
							String[] lnames = dir.list();
							boolean appendOnly = false;
							for( String lname : lnames )
							{
								if( mb == null )
								{
									mb = StagingFileUtils.readCellList2BlockFromLocal( dir+"/"+lname, brlen, bclen );
									appendOnly = mb.isInSparseFormat();
								}
								else
								{
									MatrixBlock tmp = StagingFileUtils.readCellList2BlockFromLocal(  dir+"/"+lname, brlen, bclen );
									mergeWithoutComp(mb, tmp, appendOnly);
								}
							}	
							
							//sort sparse due to append-only
							if( appendOnly )
								mb.sortSparseRows();
							
							//change sparsity if required after 
							mb.examSparsity(); 
						}
					}

					//write the block to text cell
					if( mb!=null )
					{
						if( mb.isInSparseFormat() )
						{
							SparseRowsIterator iter = mb.getSparseRowsIterator();
							while( iter.hasNext() )
							{
								IJV lcell = iter.next();
								sb.append(row_offset+lcell.i);
								sb.append(' ');
								sb.append(col_offset+lcell.j);
								sb.append(' ');
								sb.append(lcell.v);
								sb.append('\n');
								out.write( sb.toString() ); 
								sb.setLength(0);
								written = true;
							}							
						}
						else
						{
							for( int i=0; i inMO ) 
		throws CacheException, IOException
	{
		JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
		FileSystem fs = FileSystem.get(job);
		Path path = new Path( fnameNew );

		//create output dir
		fs.mkdirs(path);
		
		//merge in all input matrix objects
		IDSequence seq = new IDSequence();
		for( MatrixObject in : inMO )
		{			
			LOG.trace("ResultMerge (local, file): Merge input "+in.getVarName()+" (fname="+in.getFileName()+") via file rename.");
			
			//copy over files (just rename file or entire dir)
			Path tmpPath = new Path(in.getFileName());
			String lname = tmpPath.getName();
			fs.rename(tmpPath, new Path(fnameNew+"/"+lname+seq.getNextID()));
		}
	}

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy