All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.sysml.runtime.instructions.cpfile.ParameterizedBuiltinCPFileInstruction Maven / Gradle / Ivy

There is a newer version: 1.2.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 * 
 *   http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.sysml.runtime.instructions.cpfile;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.Map.Entry;

import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.InputSplit;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.RecordReader;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.TextInputFormat;
import org.apache.sysml.conf.ConfigurationManager;
import org.apache.sysml.parser.DMLTranslator;
import org.apache.sysml.parser.Expression.DataType;
import org.apache.sysml.parser.Expression.ValueType;
import org.apache.sysml.runtime.DMLRuntimeException;
import org.apache.sysml.runtime.DMLUnsupportedOperationException;
import org.apache.sysml.runtime.controlprogram.caching.CacheException;
import org.apache.sysml.runtime.controlprogram.caching.MatrixObject;
import org.apache.sysml.runtime.controlprogram.context.ExecutionContext;
import org.apache.sysml.runtime.controlprogram.parfor.util.Cell;
import org.apache.sysml.runtime.controlprogram.parfor.util.IDHandler;
import org.apache.sysml.runtime.controlprogram.parfor.util.StagingFileUtils;
import org.apache.sysml.runtime.functionobjects.ParameterizedBuiltin;
import org.apache.sysml.runtime.functionobjects.ValueFunction;
import org.apache.sysml.runtime.instructions.InstructionUtils;
import org.apache.sysml.runtime.instructions.cp.CPOperand;
import org.apache.sysml.runtime.instructions.cp.ParameterizedBuiltinCPInstruction;
import org.apache.sysml.runtime.io.MatrixReader;
import org.apache.sysml.runtime.io.MatrixWriter;
import org.apache.sysml.runtime.matrix.MatrixCharacteristics;
import org.apache.sysml.runtime.matrix.MatrixFormatMetaData;
import org.apache.sysml.runtime.matrix.data.InputInfo;
import org.apache.sysml.runtime.matrix.data.MatrixBlock;
import org.apache.sysml.runtime.matrix.data.MatrixCell;
import org.apache.sysml.runtime.matrix.data.MatrixIndexes;
import org.apache.sysml.runtime.matrix.data.OutputInfo;
import org.apache.sysml.runtime.matrix.operators.Operator;
import org.apache.sysml.runtime.matrix.operators.SimpleOperator;
import org.apache.sysml.runtime.util.FastStringTokenizer;
import org.apache.sysml.runtime.util.LocalFileUtils;
import org.apache.sysml.runtime.util.MapReduceTool;

/**
 * File-based (out-of-core) realization of remove empty for robustness because there is no
 * parallel version due to data-dependent row- and column dependencies.
 * 
 */
public class ParameterizedBuiltinCPFileInstruction extends ParameterizedBuiltinCPInstruction 
{	
	
	public ParameterizedBuiltinCPFileInstruction(Operator op, HashMap paramsMap, CPOperand out, String opcode, String istr) 
	{
		super(op, paramsMap, out, opcode, istr);
	}

	/**
	 * 
	 * @param str
	 * @return
	 * @throws DMLRuntimeException
	 * @throws DMLUnsupportedOperationException
	 */
	public static ParameterizedBuiltinCPFileInstruction parseInstruction( String str ) 
		throws DMLRuntimeException, DMLUnsupportedOperationException 
	{
		String[] parts = InstructionUtils.getInstructionPartsWithValueType(str);
		// first part is always the opcode
		String opcode = parts[0];
		// last part is always the output
		CPOperand out = new CPOperand( parts[parts.length-1] ); 

		// process remaining parts and build a hash map
		HashMap paramsMap = constructParameterMap(parts);

		// determine the appropriate value function
		ValueFunction func = null;
		if ( opcode.equalsIgnoreCase("rmempty") ) {
			func = ParameterizedBuiltin.getParameterizedBuiltinFnObject(opcode);
			return new ParameterizedBuiltinCPFileInstruction(new SimpleOperator(func), paramsMap, out, opcode, str);
		}
		else {
			throw new DMLRuntimeException("Unknown opcode (" + opcode + ") for ParameterizedBuiltin Instruction.");
		}
	}
	
	@Override 
	public void processInstruction(ExecutionContext ec) 
		throws DMLRuntimeException, DMLUnsupportedOperationException 
	{
		String opcode = getOpcode();
		
		if ( opcode.equalsIgnoreCase("rmempty") ) 
		{
			// get inputs
			MatrixObject src = (MatrixObject)ec.getVariable( params.get("target") );
			MatrixObject out = (MatrixObject)ec.getVariable( output.getName() );
			String margin = params.get("margin");
			
			// export input matrix (if necessary)
			src.exportData();
			
			//core execution
			RemoveEmpty rm = new RemoveEmpty( margin, src, out );
			out = rm.execute();
		
			//put output
			ec.setVariable(output.getName(), out);
		}
		else {
			throw new DMLRuntimeException("Unknown opcode : " + opcode);
		}
	}

	/**
	 * Remove empty rows as a inner class in order to allow testing independent of the
	 * overall SystemML instruction framework.
	 * 
	 */
	public static class RemoveEmpty
	{
		private String _margin = null;
		private MatrixObject _src = null;
		private MatrixObject _out = null;
		
		public RemoveEmpty( String margin, MatrixObject src, MatrixObject out )
		{
			_margin = margin;
			_src = src;
			_out = out;
		}
		
		/**
		 * 
		 * @return
		 * @throws DMLRuntimeException
		 */
		public MatrixObject execute() 
			throws DMLRuntimeException 
		{
			//Timing time = new Timing();
			//time.start();
			
			//initial setup
			String fnameOld = _src.getFileName();
			String fnameNew = _out.getFileName();
			InputInfo ii = ((MatrixFormatMetaData)_src.getMetaData()).getInputInfo();
			MatrixCharacteristics mc = _src.getMatrixCharacteristics();
			
			String stagingDir = LocalFileUtils.getUniqueWorkingDir(LocalFileUtils.CATEGORY_WORK);
			LocalFileUtils.createLocalFileIfNotExist(stagingDir);
			
			long ret = -1;
			try
			{
				boolean diagBlocks = false;
				
				//Phase 1: write file to staging 
				if( ii == InputInfo.TextCellInputInfo )
					createTextCellStagingFile( fnameOld, stagingDir );
				else if( ii == InputInfo.BinaryCellInputInfo )
					createBinaryCellStagingFile( fnameOld, stagingDir );
				else if( ii == InputInfo.BinaryBlockInputInfo )
					diagBlocks = createBinaryBlockStagingFile( fnameOld, stagingDir );
				
				//System.out.println("Executed phase 1 in "+time.stop());
				
				//Phase 2: scan empty rows/cols
				if( diagBlocks )
					ret = createKeyMappingDiag(stagingDir, mc.getRows(), mc.getCols(), mc.getRowsPerBlock(), mc.getColsPerBlock(), ii);
				else
					ret = createKeyMapping(stagingDir, mc.getRows(), mc.getCols(), mc.getRowsPerBlock(), mc.getColsPerBlock(), ii);
				
				//System.out.println("Executed phase 2 in "+time.stop());
				
				//Phase 3: create output files
				MapReduceTool.deleteFileIfExistOnHDFS(fnameNew);
				if(   ii == InputInfo.TextCellInputInfo 
				   || ii == InputInfo.BinaryCellInputInfo )
				{
					createCellResultFile( fnameNew, stagingDir, mc.getRows(), mc.getCols(), mc.getRowsPerBlock(), mc.getColsPerBlock(), ii );
				}
				else if( ii == InputInfo.BinaryBlockInputInfo )
				{
					if( diagBlocks )
						createBlockResultFileDiag( fnameNew, stagingDir, mc.getRows(), mc.getCols(), ret, mc.getNonZeros(), mc.getRowsPerBlock(), mc.getColsPerBlock(), ii );
					else
						createBlockResultFile( fnameNew, stagingDir, mc.getRows(), mc.getCols(), ret, mc.getNonZeros(), mc.getRowsPerBlock(), mc.getColsPerBlock(), ii );
				}
				
				//System.out.println("Executed phase 3 in "+time.stop());
			}
			catch( IOException ioe )
			{
				throw new DMLRuntimeException( ioe );
			}
			
			//final cleanup
			LocalFileUtils.cleanupWorkingDirectory(stagingDir);
			
			//create and return new output object
			if( _margin.equals("rows") )
				return createNewOutputObject(_src, _out, ret, mc.getCols());
			else
				return createNewOutputObject(_src, _out, mc.getRows(), ret );
		}
		
		/**
		 * 
		 * @param src
		 * @param out
		 * @param rows
		 * @param cols
		 * @return
		 * @throws DMLRuntimeException 
		 */
		private MatrixObject createNewOutputObject( MatrixObject src, MatrixObject out, long rows, long cols ) 
			throws DMLRuntimeException
		{
			String varName = out.getVarName();
			String fName = out.getFileName();
			ValueType vt = src.getValueType();
			MatrixFormatMetaData metadata = (MatrixFormatMetaData) src.getMetaData();
			
			MatrixObject moNew = new MatrixObject( vt, fName );
			moNew.setVarName( varName );
			moNew.setDataType( DataType.MATRIX );
			
			//handle empty output block (ensure valid dimensions)
			if( rows==0 || cols ==0 ){
				rows = Math.max(rows, 1);
				cols = Math.max(cols, 1);
				try {
					moNew.acquireModify(new MatrixBlock((int)rows, (int) cols, true));
					moNew.release();
				} 
				catch (CacheException e) {
					throw new DMLRuntimeException(e);
				}
			}
			
			//create deep copy of metadata obj
			MatrixCharacteristics mcOld = metadata.getMatrixCharacteristics();
			OutputInfo oiOld = metadata.getOutputInfo();
			InputInfo iiOld = metadata.getInputInfo();
			MatrixCharacteristics mc = new MatrixCharacteristics( rows, cols, mcOld.getRowsPerBlock(),
					                                              mcOld.getColsPerBlock(), mcOld.getNonZeros());
			MatrixFormatMetaData meta = new MatrixFormatMetaData(mc,oiOld,iiOld);
			moNew.setMetaData( meta );

			return moNew;
		}

		/**
		 * 
		 * @param fnameOld
		 * @param stagingDir
		 * @throws IOException
		 * @throws DMLRuntimeException
		 */
		public void createTextCellStagingFile( String fnameOld, String stagingDir ) 
			throws IOException, DMLRuntimeException
		{	
			//prepare input
			JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());	
			Path path = new Path(fnameOld);
			FileSystem fs = FileSystem.get(job);
			if( !fs.exists(path) )	
				throw new IOException("File "+fnameOld+" does not exist on HDFS.");
			FileInputFormat.addInputPath(job, path); 
			TextInputFormat informat = new TextInputFormat();
			informat.configure(job);
			InputSplit[] splits = informat.getSplits(job, 1);
		
			LinkedList buffer = new LinkedList();
			
			LongWritable key = new LongWritable();
			Text value = new Text();
			FastStringTokenizer st = new FastStringTokenizer(' ');		
			
			for(InputSplit split: splits)
			{
				RecordReader reader = informat.getRecordReader(split, job, Reporter.NULL);				
				try
				{
					while( reader.next(key, value) )
					{
						st.reset( value.toString() ); //reset tokenizer
						long row = st.nextLong();
						long col = st.nextLong();
						double lvalue = st.nextDouble();
						
						buffer.add(new Cell(row,col,lvalue));
						
						if( buffer.size() > StagingFileUtils.CELL_BUFFER_SIZE )
						{
							appendCellBufferToStagingArea(stagingDir, buffer, DMLTranslator.DMLBlockSize, DMLTranslator.DMLBlockSize);
							buffer.clear();
						}
					}
					
					if( !buffer.isEmpty() )
					{
						appendCellBufferToStagingArea(stagingDir, buffer, DMLTranslator.DMLBlockSize, DMLTranslator.DMLBlockSize);
						buffer.clear();
					}
				}
				finally
				{
					if( reader != null )
						reader.close();
				}
			}
		}		

		/**
		 * 
		 * @param fnameOld
		 * @param stagingDir
		 * @throws IOException
		 * @throws DMLRuntimeException
		 */
		@SuppressWarnings("deprecation")
		public void createBinaryCellStagingFile( String fnameOld, String stagingDir ) 
			throws IOException, DMLRuntimeException
		{
			//prepare input
			JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());	
			Path path = new Path(fnameOld);
			FileSystem fs = FileSystem.get(job);
			if( !fs.exists(path) )	
				throw new IOException("File "+fnameOld+" does not exist on HDFS.");
			
			LinkedList buffer = new LinkedList();
			
			MatrixIndexes key = new MatrixIndexes();
			MatrixCell value = new MatrixCell();

			for(Path lpath: MatrixReader.getSequenceFilePaths(fs, path))
			{
				SequenceFile.Reader reader = new SequenceFile.Reader(fs,lpath,job);
				try
				{
					while(reader.next(key, value))
					{
						long row = key.getRowIndex();
						long col = key.getColumnIndex();
						double lvalue = value.getValue();
						
						buffer.add(new Cell(row,col,lvalue));
						
						if( buffer.size() > StagingFileUtils.CELL_BUFFER_SIZE )
						{
							appendCellBufferToStagingArea(stagingDir, buffer, DMLTranslator.DMLBlockSize, DMLTranslator.DMLBlockSize);
							buffer.clear();
						}
					}
					
					if( !buffer.isEmpty() )
					{
						appendCellBufferToStagingArea(stagingDir, buffer, DMLTranslator.DMLBlockSize, DMLTranslator.DMLBlockSize);
						buffer.clear();
					}
				}
				finally
				{
					if( reader != null )
						reader.close();
				}
			}
		}

		/**
		 * Creates a binary block staging file and returns if the input matrix is a diag,
		 * because diag is the primary usecase and there is lots of optimization potential.
		 * 
		 * @param fnameOld
		 * @param stagingDir
		 * @throws IOException
		 * @throws DMLRuntimeException
		 */
		@SuppressWarnings("deprecation")
		public boolean createBinaryBlockStagingFile( String fnameOld, String stagingDir ) 
			throws IOException, DMLRuntimeException
		{
			//prepare input
			JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());	
			Path path = new Path(fnameOld);
			FileSystem fs = FileSystem.get(job);
			if( !fs.exists(path) )	
				throw new IOException("File "+fnameOld+" does not exist on HDFS.");
			
			MatrixIndexes key = new MatrixIndexes(); 
			MatrixBlock value = new MatrixBlock();
			boolean diagBlocks = true;
			
			for(Path lpath : MatrixReader.getSequenceFilePaths(fs, path))
			{
				SequenceFile.Reader reader = new SequenceFile.Reader(fs,lpath,job);
				
				try
				{
					while( reader.next(key, value) )
					{
						if( !value.isEmptyBlock() ) //skip empty blocks (important for diag)
						{
							String fname = stagingDir +"/"+key.getRowIndex()+"_"+key.getColumnIndex();
							LocalFileUtils.writeMatrixBlockToLocal(fname, value);							
							diagBlocks &= (key.getRowIndex()==key.getColumnIndex());
						}
					}	
				}
				finally
				{
					if( reader != null )
						reader.close();
				}
			}
			
			return diagBlocks;
		}
		
		/**
		 * 
		 * @param dir
		 * @param buffer
		 * @param brlen
		 * @param bclen
		 * @throws DMLRuntimeException
		 * @throws IOException
		 */
		private void appendCellBufferToStagingArea( String dir, LinkedList buffer, int brlen, int bclen ) 
			throws DMLRuntimeException, IOException
		{
			HashMap> sortedBuffer = new HashMap>();
			
			//sort cells in buffer wrt key
			String key = null;
			for( Cell c : buffer )
			{
				key = (c.getRow()/brlen+1) +"_"+(c.getCol()/bclen+1);
				
				if( !sortedBuffer.containsKey(key) )
					sortedBuffer.put(key, new LinkedList());
				sortedBuffer.get(key).addLast(c);
			}	
			
			//write lists of cells to local files
			for( Entry> e : sortedBuffer.entrySet() )
			{
				
				String pfname = dir + "/" + e.getKey();
				StagingFileUtils.writeCellListToLocal(pfname, e.getValue());
			}
		}	

		/**
		 * 
		 * @param stagingDir
		 * @param rlen
		 * @param clen
		 * @param brlen
		 * @param bclen
		 * @param ii
		 * @return
		 * @throws FileNotFoundException
		 * @throws IOException
		 * @throws DMLRuntimeException
		 */
		private long createKeyMapping( String stagingDir, long rlen, long clen, int brlen, int bclen, InputInfo ii) 
			throws FileNotFoundException, IOException, DMLRuntimeException 
		{
			String metaOut = stagingDir+"/meta";
			
			long len = 0;
			long lastKey = 0;
			
			if(_margin.equals("rows"))
			{
				for(int blockRow = 0; blockRow < (int)Math.ceil(rlen/(double)brlen); blockRow++)
				{	
					boolean[] flags = new boolean[brlen];
					for( int k=0; k buffer = StagingFileUtils.readCellListFromLocal(fname);
							for( Cell c : buffer )
								flags[ (int)c.getRow()-blockRow*brlen-1 ] = false;
						}
					} 
			
					//create and append key mapping
					LinkedList keyMapping = new LinkedList();
					for( int i = 0; i buffer = StagingFileUtils.readCellListFromLocal(fname);
							for( Cell c : buffer )
								flags[ (int)c.getCol()-blockCol*bclen-1 ] = false;
						}
					} 
			
					//create and append key mapping
					LinkedList keyMapping = new LinkedList();
					for( int i = 0; i buffer = StagingFileUtils.readCellListFromLocal(fname);
						for( Cell c : buffer )
							flags[ (int)c.getRow()-blockRow*brlen-1 ] = false;
					}
					 
			
					//create and append key mapping
					LinkedList keyMapping = new LinkedList();
					for( int i = 0; i buffer = StagingFileUtils.readCellListFromLocal(fname);
						for( Cell c : buffer )
							flags[ (int)c.getCol()-blockCol*bclen-1 ] = false;
					}
					 
			
					//create and append key mapping
					LinkedList keyMapping = new LinkedList();
					for( int i = 0; i> keyMap = new HashMap>();
			BufferedReader fkeyMap = StagingFileUtils.openKeyMap(metaOut);
			try
			{
				if( _margin.equals("rows") )
				{
					for(int blockRow = 0; blockRow < (int)Math.ceil(rlen/(double)brlen); blockRow++)
					{
						StagingFileUtils.nextKeyMap(fkeyMap, keyMap, blockRow, brlen);		
						for(int blockCol = 0; blockCol < (int)Math.ceil(clen/(double)bclen); blockCol++)
						{
							String fname = stagingDir+"/"+(blockRow+1)+"_"+(blockCol+1);
							LinkedList buffer = StagingFileUtils.readCellListFromLocal(fname);
							if( ii == InputInfo.TextCellInputInfo )
								for( Cell c : buffer )
								{
									sb.append(keyMap.get(blockRow).get(c.getRow()-1)+1);
									sb.append(' ');
									sb.append(c.getCol());
									sb.append(' ');
									sb.append(c.getValue());
									sb.append('\n');
									twriter.write( sb.toString() );	
									sb.setLength(0);
								}
							else if( ii == InputInfo.BinaryCellInputInfo )
								for( Cell c : buffer )
								{
									key.setIndexes(keyMap.get(blockRow).get(c.getRow()-1)+1, c.getCol());
									value.setValue(c.getValue());
									bwriter.append(key, value);	
								}
						}
						keyMap.remove(blockRow);
					}
				}
				else
				{
					for(int blockCol = 0; blockCol < (int)Math.ceil(clen/(double)bclen); blockCol++)
					{
						StagingFileUtils.nextKeyMap(fkeyMap, keyMap, blockCol, bclen);		
						for(int blockRow = 0; blockRow < (int)Math.ceil(rlen/(double)brlen); blockRow++)
						{
							String fname = stagingDir+"/"+(blockRow+1)+"_"+(blockCol+1);
							LinkedList buffer = StagingFileUtils.readCellListFromLocal(fname);
							if( ii == InputInfo.TextCellInputInfo )
								for( Cell c : buffer )
								{
									sb.append(c.getRow());
									sb.append(' ');
									sb.append(keyMap.get(blockCol).get(c.getCol()-1)+1);
									sb.append(' ');
									sb.append(c.getValue());
									sb.append('\n');
									twriter.write( sb.toString() );	
									sb.setLength(0);
								}
							else if( ii == InputInfo.BinaryCellInputInfo )
								for( Cell c : buffer )
								{
									key.setIndexes(c.getRow(), keyMap.get(blockCol).get(c.getCol()-1)+1);
									value.setValue(c.getValue());
									bwriter.append(key, value);	
								}
						}
						keyMap.remove(blockCol);
					}
				}

				//Note: no need to handle empty result
			}
			finally
			{
				if( twriter != null )
					twriter.close();	
				if( bwriter != null )
					bwriter.close();	
			}
		}
	
		/**
		 * 
		 * @param fnameNew
		 * @param stagingDir
		 * @param rlen
		 * @param clen
		 * @param newlen
		 * @param nnz
		 * @param brlen
		 * @param bclen
		 * @param ii
		 * @throws IOException
		 * @throws DMLRuntimeException
		 */
		@SuppressWarnings("deprecation")
		public void createBlockResultFile( String fnameNew, String stagingDir, long rlen, long clen, long newlen, long nnz, int brlen, int bclen, InputInfo ii ) 
			throws IOException, DMLRuntimeException
		{
			//prepare input
			JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());	
			Path path = new Path(fnameNew);
			FileSystem fs = FileSystem.get(job);
			String metaOut = stagingDir+"/meta";
	
			//prepare output
			SequenceFile.Writer writer = new SequenceFile.Writer(fs, job, path, MatrixIndexes.class, MatrixBlock.class);
			
			MatrixIndexes key = new MatrixIndexes(); 
			
			try
			{
				if( _margin.equals("rows") ) 
				{
					MatrixBlock[] blocks = MatrixWriter.createMatrixBlocksForReuse(newlen, clen, brlen, bclen, 
							                     MatrixBlock.evalSparseFormatInMemory(rlen, clen, nnz), nnz);  
					
					for(int blockCol = 0; blockCol < (int)Math.ceil(clen/(double)bclen); blockCol++)
					{
						HashMap> keyMap = new HashMap>();
						BufferedReader fkeyMap = StagingFileUtils.openKeyMap(metaOut);
						int maxCol = (int)(((long)blockCol*bclen + bclen < clen) ? bclen : clen - (long)blockCol*bclen);
						
						int blockRowOut = 0;
						int currentSize = -1;
						while( (currentSize = StagingFileUtils.nextSizedKeyMap(fkeyMap, keyMap, brlen, brlen)) > 0  )
						{
							int maxRow = currentSize;
							
							//get reuse matrix block
							MatrixBlock block = MatrixWriter.getMatrixBlockForReuse(blocks, maxRow, maxCol, brlen, bclen);
							block.reset(maxRow, maxCol);
							
							int rowPos = 0;
							int blockRow = Collections.min(keyMap.keySet());
							
							for( ; blockRow < (int)Math.ceil(rlen/(double)brlen) && rowPos lkeyMap = keyMap.get(blockRow);
										long row_offset = (long)blockRow*brlen;
										for( int i=0; i lkeyMap = keyMap.get(blockRow);
										rowPos+=lkeyMap.size();
									}
								}				
								keyMap.remove(blockRow);
							}
							
							key.setIndexes(blockRowOut+1, blockCol+1);
							writer.append(key, block);
							blockRowOut++;
						}
						
						if( fkeyMap != null )
							StagingFileUtils.closeKeyMap(fkeyMap);
					}
				}
				else
				{
					MatrixBlock[] blocks = MatrixWriter.createMatrixBlocksForReuse(rlen, newlen, brlen, bclen, 
							                    MatrixBlock.evalSparseFormatInMemory(rlen, clen, nnz), nnz);  
					
					for(int blockRow = 0; blockRow < (int)Math.ceil(rlen/(double)brlen); blockRow++)
					{
						HashMap> keyMap = new HashMap>();
						BufferedReader fkeyMap = StagingFileUtils.openKeyMap(metaOut);
						int maxRow = (int)(((long)blockRow*brlen + brlen < rlen) ? brlen : rlen - (long)blockRow*brlen);
						
						int blockColOut = 0;
						int currentSize = -1;
						while( (currentSize = StagingFileUtils.nextSizedKeyMap(fkeyMap, keyMap, bclen, bclen)) > 0  )
						{
							int maxCol = currentSize;
							
							//get reuse matrix block
							MatrixBlock block = MatrixWriter.getMatrixBlockForReuse(blocks, maxRow, maxCol, brlen, bclen);
							block.reset(maxRow, maxCol);
							int colPos = 0;
							
							int blockCol = Collections.min(keyMap.keySet());
							for( ; blockCol < (int)Math.ceil(clen/(double)bclen) && colPos lkeyMap = keyMap.get(blockCol);
										long col_offset = blockCol*bclen;
										for( int j=0; j lkeyMap = keyMap.get(blockCol);
										colPos+=lkeyMap.size();
									}
								}							
								keyMap.remove(blockCol);
							}
							
							key.setIndexes(blockRow+1, blockColOut+1);
							writer.append(key, block);
							blockColOut++;
						}
						
						if( fkeyMap != null )
							StagingFileUtils.closeKeyMap(fkeyMap);
					}
				}
				
				//Note: no handling of empty matrices necessary
			}
			finally
			{
				if( writer != null )
					writer.close();
			}
		}
		
		/**
		 * 
		 * @param fnameNew
		 * @param stagingDir
		 * @param rlen
		 * @param clen
		 * @param newlen
		 * @param nnz
		 * @param brlen
		 * @param bclen
		 * @param ii
		 * @throws IOException
		 * @throws DMLRuntimeException
		 */
		@SuppressWarnings("deprecation")
		public void createBlockResultFileDiag( String fnameNew, String stagingDir, long rlen, long clen, long newlen, long nnz, int brlen, int bclen, InputInfo ii ) 
			throws IOException, DMLRuntimeException
		{
			//prepare input
			JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());	
			Path path = new Path(fnameNew);
			FileSystem fs = FileSystem.get(job);
			String metaOut = stagingDir+"/meta";
	
			//prepare output
			SequenceFile.Writer writer = new SequenceFile.Writer(fs, job, path, MatrixIndexes.class, MatrixBlock.class);
			MatrixIndexes key = new MatrixIndexes(); 
			HashSet writtenBlocks = new HashSet();
			
			try
			{
				if( _margin.equals("rows") ) 
				{
					MatrixBlock[] blocks = MatrixWriter.createMatrixBlocksForReuse(newlen, clen, brlen, bclen, 
							                       MatrixBlock.evalSparseFormatInMemory(rlen, clen, nnz), nnz);  
					HashMap> keyMap = new HashMap>();
					BufferedReader fkeyMap = StagingFileUtils.openKeyMap(metaOut);
					int currentSize = -1;
					int blockRowOut = 0;
					
					while( (currentSize = StagingFileUtils.nextSizedKeyMap(fkeyMap, keyMap, brlen, brlen)) > 0  )
					{
						int rowPos = 0;
						int blockRow = Collections.min(keyMap.keySet()); 
						int maxRow = currentSize;
						for( ; blockRow < (int)Math.ceil(rlen/(double)brlen); blockRow++)
						{
							int blockCol = blockRow; // for diag known to be equivalent
							int maxCol = (int)(((long)blockCol*bclen + bclen < clen) ? bclen : clen - (long)blockCol*bclen);
							
							//get reuse matrix block
							MatrixBlock block = MatrixWriter.getMatrixBlockForReuse(blocks, maxRow, maxCol, brlen, bclen);
							block.reset(maxRow, maxCol);
							
							if( keyMap.containsKey(blockRow) )
							{
								String fname = stagingDir+"/"+(blockRow+1)+"_"+(blockCol+1);
								MatrixBlock tmp = LocalFileUtils.readMatrixBlockFromLocal(fname);
								
								HashMap lkeyMap = keyMap.get(blockRow);
								long row_offset = blockRow*brlen;
								for( int i=0; i> keyMap = new HashMap>();
					BufferedReader fkeyMap = StagingFileUtils.openKeyMap(metaOut);
					int currentSize = -1;
					int blockColOut = 0;
					
					while( (currentSize = StagingFileUtils.nextSizedKeyMap(fkeyMap, keyMap, bclen, bclen)) > 0  )
					{
						int colPos = 0;
						int blockCol = Collections.min(keyMap.keySet()); 
						int maxCol = currentSize;
						for( ; blockCol < (int)Math.ceil(clen/(double)bclen); blockCol++)
						{
							int blockRow = blockCol; // for diag known to be equivalent
							int maxRow = (int)((blockRow*brlen + brlen < rlen) ? brlen : rlen - blockRow*brlen);
							
							//get reuse matrix block
							MatrixBlock block = MatrixWriter.getMatrixBlockForReuse(blocks, maxRow, maxCol, brlen, bclen);
							block.reset(maxRow, maxCol);
						
							if( keyMap.containsKey(blockCol) )
							{
								String fname = stagingDir+"/"+(blockRow+1)+"_"+(blockCol+1);
								MatrixBlock tmp = LocalFileUtils.readMatrixBlockFromLocal(fname);
								
								HashMap lkeyMap = keyMap.get(blockCol);
								long col_offset = blockCol*bclen;
								for( int j=0; j




© 2015 - 2024 Weber Informatics LLC | Privacy Policy