org.apache.sysml.runtime.instructions.cpfile.ParameterizedBuiltinCPFileInstruction Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of systemml Show documentation
Show all versions of systemml Show documentation
Declarative Machine Learning
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.sysml.runtime.instructions.cpfile;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.Map.Entry;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.InputSplit;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.RecordReader;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.TextInputFormat;
import org.apache.sysml.conf.ConfigurationManager;
import org.apache.sysml.parser.DMLTranslator;
import org.apache.sysml.parser.Expression.DataType;
import org.apache.sysml.parser.Expression.ValueType;
import org.apache.sysml.runtime.DMLRuntimeException;
import org.apache.sysml.runtime.DMLUnsupportedOperationException;
import org.apache.sysml.runtime.controlprogram.caching.CacheException;
import org.apache.sysml.runtime.controlprogram.caching.MatrixObject;
import org.apache.sysml.runtime.controlprogram.context.ExecutionContext;
import org.apache.sysml.runtime.controlprogram.parfor.util.Cell;
import org.apache.sysml.runtime.controlprogram.parfor.util.IDHandler;
import org.apache.sysml.runtime.controlprogram.parfor.util.StagingFileUtils;
import org.apache.sysml.runtime.functionobjects.ParameterizedBuiltin;
import org.apache.sysml.runtime.functionobjects.ValueFunction;
import org.apache.sysml.runtime.instructions.InstructionUtils;
import org.apache.sysml.runtime.instructions.cp.CPOperand;
import org.apache.sysml.runtime.instructions.cp.ParameterizedBuiltinCPInstruction;
import org.apache.sysml.runtime.io.MatrixReader;
import org.apache.sysml.runtime.io.MatrixWriter;
import org.apache.sysml.runtime.matrix.MatrixCharacteristics;
import org.apache.sysml.runtime.matrix.MatrixFormatMetaData;
import org.apache.sysml.runtime.matrix.data.InputInfo;
import org.apache.sysml.runtime.matrix.data.MatrixBlock;
import org.apache.sysml.runtime.matrix.data.MatrixCell;
import org.apache.sysml.runtime.matrix.data.MatrixIndexes;
import org.apache.sysml.runtime.matrix.data.OutputInfo;
import org.apache.sysml.runtime.matrix.operators.Operator;
import org.apache.sysml.runtime.matrix.operators.SimpleOperator;
import org.apache.sysml.runtime.util.FastStringTokenizer;
import org.apache.sysml.runtime.util.LocalFileUtils;
import org.apache.sysml.runtime.util.MapReduceTool;
/**
* File-based (out-of-core) realization of remove empty for robustness because there is no
* parallel version due to data-dependent row- and column dependencies.
*
*/
public class ParameterizedBuiltinCPFileInstruction extends ParameterizedBuiltinCPInstruction
{
public ParameterizedBuiltinCPFileInstruction(Operator op, HashMap paramsMap, CPOperand out, String opcode, String istr)
{
super(op, paramsMap, out, opcode, istr);
}
/**
*
* @param str
* @return
* @throws DMLRuntimeException
* @throws DMLUnsupportedOperationException
*/
public static ParameterizedBuiltinCPFileInstruction parseInstruction( String str )
throws DMLRuntimeException, DMLUnsupportedOperationException
{
String[] parts = InstructionUtils.getInstructionPartsWithValueType(str);
// first part is always the opcode
String opcode = parts[0];
// last part is always the output
CPOperand out = new CPOperand( parts[parts.length-1] );
// process remaining parts and build a hash map
HashMap paramsMap = constructParameterMap(parts);
// determine the appropriate value function
ValueFunction func = null;
if ( opcode.equalsIgnoreCase("rmempty") ) {
func = ParameterizedBuiltin.getParameterizedBuiltinFnObject(opcode);
return new ParameterizedBuiltinCPFileInstruction(new SimpleOperator(func), paramsMap, out, opcode, str);
}
else {
throw new DMLRuntimeException("Unknown opcode (" + opcode + ") for ParameterizedBuiltin Instruction.");
}
}
@Override
public void processInstruction(ExecutionContext ec)
throws DMLRuntimeException, DMLUnsupportedOperationException
{
String opcode = getOpcode();
if ( opcode.equalsIgnoreCase("rmempty") )
{
// get inputs
MatrixObject src = (MatrixObject)ec.getVariable( params.get("target") );
MatrixObject out = (MatrixObject)ec.getVariable( output.getName() );
String margin = params.get("margin");
// export input matrix (if necessary)
src.exportData();
//core execution
RemoveEmpty rm = new RemoveEmpty( margin, src, out );
out = rm.execute();
//put output
ec.setVariable(output.getName(), out);
}
else {
throw new DMLRuntimeException("Unknown opcode : " + opcode);
}
}
/**
* Remove empty rows as a inner class in order to allow testing independent of the
* overall SystemML instruction framework.
*
*/
public static class RemoveEmpty
{
private String _margin = null;
private MatrixObject _src = null;
private MatrixObject _out = null;
public RemoveEmpty( String margin, MatrixObject src, MatrixObject out )
{
_margin = margin;
_src = src;
_out = out;
}
/**
*
* @return
* @throws DMLRuntimeException
*/
public MatrixObject execute()
throws DMLRuntimeException
{
//Timing time = new Timing();
//time.start();
//initial setup
String fnameOld = _src.getFileName();
String fnameNew = _out.getFileName();
InputInfo ii = ((MatrixFormatMetaData)_src.getMetaData()).getInputInfo();
MatrixCharacteristics mc = _src.getMatrixCharacteristics();
String stagingDir = LocalFileUtils.getUniqueWorkingDir(LocalFileUtils.CATEGORY_WORK);
LocalFileUtils.createLocalFileIfNotExist(stagingDir);
long ret = -1;
try
{
boolean diagBlocks = false;
//Phase 1: write file to staging
if( ii == InputInfo.TextCellInputInfo )
createTextCellStagingFile( fnameOld, stagingDir );
else if( ii == InputInfo.BinaryCellInputInfo )
createBinaryCellStagingFile( fnameOld, stagingDir );
else if( ii == InputInfo.BinaryBlockInputInfo )
diagBlocks = createBinaryBlockStagingFile( fnameOld, stagingDir );
//System.out.println("Executed phase 1 in "+time.stop());
//Phase 2: scan empty rows/cols
if( diagBlocks )
ret = createKeyMappingDiag(stagingDir, mc.getRows(), mc.getCols(), mc.getRowsPerBlock(), mc.getColsPerBlock(), ii);
else
ret = createKeyMapping(stagingDir, mc.getRows(), mc.getCols(), mc.getRowsPerBlock(), mc.getColsPerBlock(), ii);
//System.out.println("Executed phase 2 in "+time.stop());
//Phase 3: create output files
MapReduceTool.deleteFileIfExistOnHDFS(fnameNew);
if( ii == InputInfo.TextCellInputInfo
|| ii == InputInfo.BinaryCellInputInfo )
{
createCellResultFile( fnameNew, stagingDir, mc.getRows(), mc.getCols(), mc.getRowsPerBlock(), mc.getColsPerBlock(), ii );
}
else if( ii == InputInfo.BinaryBlockInputInfo )
{
if( diagBlocks )
createBlockResultFileDiag( fnameNew, stagingDir, mc.getRows(), mc.getCols(), ret, mc.getNonZeros(), mc.getRowsPerBlock(), mc.getColsPerBlock(), ii );
else
createBlockResultFile( fnameNew, stagingDir, mc.getRows(), mc.getCols(), ret, mc.getNonZeros(), mc.getRowsPerBlock(), mc.getColsPerBlock(), ii );
}
//System.out.println("Executed phase 3 in "+time.stop());
}
catch( IOException ioe )
{
throw new DMLRuntimeException( ioe );
}
//final cleanup
LocalFileUtils.cleanupWorkingDirectory(stagingDir);
//create and return new output object
if( _margin.equals("rows") )
return createNewOutputObject(_src, _out, ret, mc.getCols());
else
return createNewOutputObject(_src, _out, mc.getRows(), ret );
}
/**
*
* @param src
* @param out
* @param rows
* @param cols
* @return
* @throws DMLRuntimeException
*/
private MatrixObject createNewOutputObject( MatrixObject src, MatrixObject out, long rows, long cols )
throws DMLRuntimeException
{
String varName = out.getVarName();
String fName = out.getFileName();
ValueType vt = src.getValueType();
MatrixFormatMetaData metadata = (MatrixFormatMetaData) src.getMetaData();
MatrixObject moNew = new MatrixObject( vt, fName );
moNew.setVarName( varName );
moNew.setDataType( DataType.MATRIX );
//handle empty output block (ensure valid dimensions)
if( rows==0 || cols ==0 ){
rows = Math.max(rows, 1);
cols = Math.max(cols, 1);
try {
moNew.acquireModify(new MatrixBlock((int)rows, (int) cols, true));
moNew.release();
}
catch (CacheException e) {
throw new DMLRuntimeException(e);
}
}
//create deep copy of metadata obj
MatrixCharacteristics mcOld = metadata.getMatrixCharacteristics();
OutputInfo oiOld = metadata.getOutputInfo();
InputInfo iiOld = metadata.getInputInfo();
MatrixCharacteristics mc = new MatrixCharacteristics( rows, cols, mcOld.getRowsPerBlock(),
mcOld.getColsPerBlock(), mcOld.getNonZeros());
MatrixFormatMetaData meta = new MatrixFormatMetaData(mc,oiOld,iiOld);
moNew.setMetaData( meta );
return moNew;
}
/**
*
* @param fnameOld
* @param stagingDir
* @throws IOException
* @throws DMLRuntimeException
*/
public void createTextCellStagingFile( String fnameOld, String stagingDir )
throws IOException, DMLRuntimeException
{
//prepare input
JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
Path path = new Path(fnameOld);
FileSystem fs = FileSystem.get(job);
if( !fs.exists(path) )
throw new IOException("File "+fnameOld+" does not exist on HDFS.");
FileInputFormat.addInputPath(job, path);
TextInputFormat informat = new TextInputFormat();
informat.configure(job);
InputSplit[] splits = informat.getSplits(job, 1);
LinkedList buffer = new LinkedList();
LongWritable key = new LongWritable();
Text value = new Text();
FastStringTokenizer st = new FastStringTokenizer(' ');
for(InputSplit split: splits)
{
RecordReader reader = informat.getRecordReader(split, job, Reporter.NULL);
try
{
while( reader.next(key, value) )
{
st.reset( value.toString() ); //reset tokenizer
long row = st.nextLong();
long col = st.nextLong();
double lvalue = st.nextDouble();
buffer.add(new Cell(row,col,lvalue));
if( buffer.size() > StagingFileUtils.CELL_BUFFER_SIZE )
{
appendCellBufferToStagingArea(stagingDir, buffer, DMLTranslator.DMLBlockSize, DMLTranslator.DMLBlockSize);
buffer.clear();
}
}
if( !buffer.isEmpty() )
{
appendCellBufferToStagingArea(stagingDir, buffer, DMLTranslator.DMLBlockSize, DMLTranslator.DMLBlockSize);
buffer.clear();
}
}
finally
{
if( reader != null )
reader.close();
}
}
}
/**
*
* @param fnameOld
* @param stagingDir
* @throws IOException
* @throws DMLRuntimeException
*/
@SuppressWarnings("deprecation")
public void createBinaryCellStagingFile( String fnameOld, String stagingDir )
throws IOException, DMLRuntimeException
{
//prepare input
JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
Path path = new Path(fnameOld);
FileSystem fs = FileSystem.get(job);
if( !fs.exists(path) )
throw new IOException("File "+fnameOld+" does not exist on HDFS.");
LinkedList buffer = new LinkedList();
MatrixIndexes key = new MatrixIndexes();
MatrixCell value = new MatrixCell();
for(Path lpath: MatrixReader.getSequenceFilePaths(fs, path))
{
SequenceFile.Reader reader = new SequenceFile.Reader(fs,lpath,job);
try
{
while(reader.next(key, value))
{
long row = key.getRowIndex();
long col = key.getColumnIndex();
double lvalue = value.getValue();
buffer.add(new Cell(row,col,lvalue));
if( buffer.size() > StagingFileUtils.CELL_BUFFER_SIZE )
{
appendCellBufferToStagingArea(stagingDir, buffer, DMLTranslator.DMLBlockSize, DMLTranslator.DMLBlockSize);
buffer.clear();
}
}
if( !buffer.isEmpty() )
{
appendCellBufferToStagingArea(stagingDir, buffer, DMLTranslator.DMLBlockSize, DMLTranslator.DMLBlockSize);
buffer.clear();
}
}
finally
{
if( reader != null )
reader.close();
}
}
}
/**
* Creates a binary block staging file and returns if the input matrix is a diag,
* because diag is the primary usecase and there is lots of optimization potential.
*
* @param fnameOld
* @param stagingDir
* @throws IOException
* @throws DMLRuntimeException
*/
@SuppressWarnings("deprecation")
public boolean createBinaryBlockStagingFile( String fnameOld, String stagingDir )
throws IOException, DMLRuntimeException
{
//prepare input
JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
Path path = new Path(fnameOld);
FileSystem fs = FileSystem.get(job);
if( !fs.exists(path) )
throw new IOException("File "+fnameOld+" does not exist on HDFS.");
MatrixIndexes key = new MatrixIndexes();
MatrixBlock value = new MatrixBlock();
boolean diagBlocks = true;
for(Path lpath : MatrixReader.getSequenceFilePaths(fs, path))
{
SequenceFile.Reader reader = new SequenceFile.Reader(fs,lpath,job);
try
{
while( reader.next(key, value) )
{
if( !value.isEmptyBlock() ) //skip empty blocks (important for diag)
{
String fname = stagingDir +"/"+key.getRowIndex()+"_"+key.getColumnIndex();
LocalFileUtils.writeMatrixBlockToLocal(fname, value);
diagBlocks &= (key.getRowIndex()==key.getColumnIndex());
}
}
}
finally
{
if( reader != null )
reader.close();
}
}
return diagBlocks;
}
/**
*
* @param dir
* @param buffer
* @param brlen
* @param bclen
* @throws DMLRuntimeException
* @throws IOException
*/
private void appendCellBufferToStagingArea( String dir, LinkedList buffer, int brlen, int bclen )
throws DMLRuntimeException, IOException
{
HashMap> sortedBuffer = new HashMap>();
//sort cells in buffer wrt key
String key = null;
for( Cell c : buffer )
{
key = (c.getRow()/brlen+1) +"_"+(c.getCol()/bclen+1);
if( !sortedBuffer.containsKey(key) )
sortedBuffer.put(key, new LinkedList());
sortedBuffer.get(key).addLast(c);
}
//write lists of cells to local files
for( Entry> e : sortedBuffer.entrySet() )
{
String pfname = dir + "/" + e.getKey();
StagingFileUtils.writeCellListToLocal(pfname, e.getValue());
}
}
/**
*
* @param stagingDir
* @param rlen
* @param clen
* @param brlen
* @param bclen
* @param ii
* @return
* @throws FileNotFoundException
* @throws IOException
* @throws DMLRuntimeException
*/
private long createKeyMapping( String stagingDir, long rlen, long clen, int brlen, int bclen, InputInfo ii)
throws FileNotFoundException, IOException, DMLRuntimeException
{
String metaOut = stagingDir+"/meta";
long len = 0;
long lastKey = 0;
if(_margin.equals("rows"))
{
for(int blockRow = 0; blockRow < (int)Math.ceil(rlen/(double)brlen); blockRow++)
{
boolean[] flags = new boolean[brlen];
for( int k=0; k buffer = StagingFileUtils.readCellListFromLocal(fname);
for( Cell c : buffer )
flags[ (int)c.getRow()-blockRow*brlen-1 ] = false;
}
}
//create and append key mapping
LinkedList keyMapping = new LinkedList();
for( int i = 0; i buffer = StagingFileUtils.readCellListFromLocal(fname);
for( Cell c : buffer )
flags[ (int)c.getCol()-blockCol*bclen-1 ] = false;
}
}
//create and append key mapping
LinkedList keyMapping = new LinkedList();
for( int i = 0; i buffer = StagingFileUtils.readCellListFromLocal(fname);
for( Cell c : buffer )
flags[ (int)c.getRow()-blockRow*brlen-1 ] = false;
}
//create and append key mapping
LinkedList keyMapping = new LinkedList();
for( int i = 0; i buffer = StagingFileUtils.readCellListFromLocal(fname);
for( Cell c : buffer )
flags[ (int)c.getCol()-blockCol*bclen-1 ] = false;
}
//create and append key mapping
LinkedList keyMapping = new LinkedList();
for( int i = 0; i> keyMap = new HashMap>();
BufferedReader fkeyMap = StagingFileUtils.openKeyMap(metaOut);
try
{
if( _margin.equals("rows") )
{
for(int blockRow = 0; blockRow < (int)Math.ceil(rlen/(double)brlen); blockRow++)
{
StagingFileUtils.nextKeyMap(fkeyMap, keyMap, blockRow, brlen);
for(int blockCol = 0; blockCol < (int)Math.ceil(clen/(double)bclen); blockCol++)
{
String fname = stagingDir+"/"+(blockRow+1)+"_"+(blockCol+1);
LinkedList buffer = StagingFileUtils.readCellListFromLocal(fname);
if( ii == InputInfo.TextCellInputInfo )
for( Cell c : buffer )
{
sb.append(keyMap.get(blockRow).get(c.getRow()-1)+1);
sb.append(' ');
sb.append(c.getCol());
sb.append(' ');
sb.append(c.getValue());
sb.append('\n');
twriter.write( sb.toString() );
sb.setLength(0);
}
else if( ii == InputInfo.BinaryCellInputInfo )
for( Cell c : buffer )
{
key.setIndexes(keyMap.get(blockRow).get(c.getRow()-1)+1, c.getCol());
value.setValue(c.getValue());
bwriter.append(key, value);
}
}
keyMap.remove(blockRow);
}
}
else
{
for(int blockCol = 0; blockCol < (int)Math.ceil(clen/(double)bclen); blockCol++)
{
StagingFileUtils.nextKeyMap(fkeyMap, keyMap, blockCol, bclen);
for(int blockRow = 0; blockRow < (int)Math.ceil(rlen/(double)brlen); blockRow++)
{
String fname = stagingDir+"/"+(blockRow+1)+"_"+(blockCol+1);
LinkedList buffer = StagingFileUtils.readCellListFromLocal(fname);
if( ii == InputInfo.TextCellInputInfo )
for( Cell c : buffer )
{
sb.append(c.getRow());
sb.append(' ');
sb.append(keyMap.get(blockCol).get(c.getCol()-1)+1);
sb.append(' ');
sb.append(c.getValue());
sb.append('\n');
twriter.write( sb.toString() );
sb.setLength(0);
}
else if( ii == InputInfo.BinaryCellInputInfo )
for( Cell c : buffer )
{
key.setIndexes(c.getRow(), keyMap.get(blockCol).get(c.getCol()-1)+1);
value.setValue(c.getValue());
bwriter.append(key, value);
}
}
keyMap.remove(blockCol);
}
}
//Note: no need to handle empty result
}
finally
{
if( twriter != null )
twriter.close();
if( bwriter != null )
bwriter.close();
}
}
/**
*
* @param fnameNew
* @param stagingDir
* @param rlen
* @param clen
* @param newlen
* @param nnz
* @param brlen
* @param bclen
* @param ii
* @throws IOException
* @throws DMLRuntimeException
*/
@SuppressWarnings("deprecation")
public void createBlockResultFile( String fnameNew, String stagingDir, long rlen, long clen, long newlen, long nnz, int brlen, int bclen, InputInfo ii )
throws IOException, DMLRuntimeException
{
//prepare input
JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
Path path = new Path(fnameNew);
FileSystem fs = FileSystem.get(job);
String metaOut = stagingDir+"/meta";
//prepare output
SequenceFile.Writer writer = new SequenceFile.Writer(fs, job, path, MatrixIndexes.class, MatrixBlock.class);
MatrixIndexes key = new MatrixIndexes();
try
{
if( _margin.equals("rows") )
{
MatrixBlock[] blocks = MatrixWriter.createMatrixBlocksForReuse(newlen, clen, brlen, bclen,
MatrixBlock.evalSparseFormatInMemory(rlen, clen, nnz), nnz);
for(int blockCol = 0; blockCol < (int)Math.ceil(clen/(double)bclen); blockCol++)
{
HashMap> keyMap = new HashMap>();
BufferedReader fkeyMap = StagingFileUtils.openKeyMap(metaOut);
int maxCol = (int)(((long)blockCol*bclen + bclen < clen) ? bclen : clen - (long)blockCol*bclen);
int blockRowOut = 0;
int currentSize = -1;
while( (currentSize = StagingFileUtils.nextSizedKeyMap(fkeyMap, keyMap, brlen, brlen)) > 0 )
{
int maxRow = currentSize;
//get reuse matrix block
MatrixBlock block = MatrixWriter.getMatrixBlockForReuse(blocks, maxRow, maxCol, brlen, bclen);
block.reset(maxRow, maxCol);
int rowPos = 0;
int blockRow = Collections.min(keyMap.keySet());
for( ; blockRow < (int)Math.ceil(rlen/(double)brlen) && rowPos lkeyMap = keyMap.get(blockRow);
long row_offset = (long)blockRow*brlen;
for( int i=0; i lkeyMap = keyMap.get(blockRow);
rowPos+=lkeyMap.size();
}
}
keyMap.remove(blockRow);
}
key.setIndexes(blockRowOut+1, blockCol+1);
writer.append(key, block);
blockRowOut++;
}
if( fkeyMap != null )
StagingFileUtils.closeKeyMap(fkeyMap);
}
}
else
{
MatrixBlock[] blocks = MatrixWriter.createMatrixBlocksForReuse(rlen, newlen, brlen, bclen,
MatrixBlock.evalSparseFormatInMemory(rlen, clen, nnz), nnz);
for(int blockRow = 0; blockRow < (int)Math.ceil(rlen/(double)brlen); blockRow++)
{
HashMap> keyMap = new HashMap>();
BufferedReader fkeyMap = StagingFileUtils.openKeyMap(metaOut);
int maxRow = (int)(((long)blockRow*brlen + brlen < rlen) ? brlen : rlen - (long)blockRow*brlen);
int blockColOut = 0;
int currentSize = -1;
while( (currentSize = StagingFileUtils.nextSizedKeyMap(fkeyMap, keyMap, bclen, bclen)) > 0 )
{
int maxCol = currentSize;
//get reuse matrix block
MatrixBlock block = MatrixWriter.getMatrixBlockForReuse(blocks, maxRow, maxCol, brlen, bclen);
block.reset(maxRow, maxCol);
int colPos = 0;
int blockCol = Collections.min(keyMap.keySet());
for( ; blockCol < (int)Math.ceil(clen/(double)bclen) && colPos lkeyMap = keyMap.get(blockCol);
long col_offset = blockCol*bclen;
for( int j=0; j lkeyMap = keyMap.get(blockCol);
colPos+=lkeyMap.size();
}
}
keyMap.remove(blockCol);
}
key.setIndexes(blockRow+1, blockColOut+1);
writer.append(key, block);
blockColOut++;
}
if( fkeyMap != null )
StagingFileUtils.closeKeyMap(fkeyMap);
}
}
//Note: no handling of empty matrices necessary
}
finally
{
if( writer != null )
writer.close();
}
}
/**
*
* @param fnameNew
* @param stagingDir
* @param rlen
* @param clen
* @param newlen
* @param nnz
* @param brlen
* @param bclen
* @param ii
* @throws IOException
* @throws DMLRuntimeException
*/
@SuppressWarnings("deprecation")
public void createBlockResultFileDiag( String fnameNew, String stagingDir, long rlen, long clen, long newlen, long nnz, int brlen, int bclen, InputInfo ii )
throws IOException, DMLRuntimeException
{
//prepare input
JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
Path path = new Path(fnameNew);
FileSystem fs = FileSystem.get(job);
String metaOut = stagingDir+"/meta";
//prepare output
SequenceFile.Writer writer = new SequenceFile.Writer(fs, job, path, MatrixIndexes.class, MatrixBlock.class);
MatrixIndexes key = new MatrixIndexes();
HashSet writtenBlocks = new HashSet();
try
{
if( _margin.equals("rows") )
{
MatrixBlock[] blocks = MatrixWriter.createMatrixBlocksForReuse(newlen, clen, brlen, bclen,
MatrixBlock.evalSparseFormatInMemory(rlen, clen, nnz), nnz);
HashMap> keyMap = new HashMap>();
BufferedReader fkeyMap = StagingFileUtils.openKeyMap(metaOut);
int currentSize = -1;
int blockRowOut = 0;
while( (currentSize = StagingFileUtils.nextSizedKeyMap(fkeyMap, keyMap, brlen, brlen)) > 0 )
{
int rowPos = 0;
int blockRow = Collections.min(keyMap.keySet());
int maxRow = currentSize;
for( ; blockRow < (int)Math.ceil(rlen/(double)brlen); blockRow++)
{
int blockCol = blockRow; // for diag known to be equivalent
int maxCol = (int)(((long)blockCol*bclen + bclen < clen) ? bclen : clen - (long)blockCol*bclen);
//get reuse matrix block
MatrixBlock block = MatrixWriter.getMatrixBlockForReuse(blocks, maxRow, maxCol, brlen, bclen);
block.reset(maxRow, maxCol);
if( keyMap.containsKey(blockRow) )
{
String fname = stagingDir+"/"+(blockRow+1)+"_"+(blockCol+1);
MatrixBlock tmp = LocalFileUtils.readMatrixBlockFromLocal(fname);
HashMap lkeyMap = keyMap.get(blockRow);
long row_offset = blockRow*brlen;
for( int i=0; i> keyMap = new HashMap>();
BufferedReader fkeyMap = StagingFileUtils.openKeyMap(metaOut);
int currentSize = -1;
int blockColOut = 0;
while( (currentSize = StagingFileUtils.nextSizedKeyMap(fkeyMap, keyMap, bclen, bclen)) > 0 )
{
int colPos = 0;
int blockCol = Collections.min(keyMap.keySet());
int maxCol = currentSize;
for( ; blockCol < (int)Math.ceil(clen/(double)bclen); blockCol++)
{
int blockRow = blockCol; // for diag known to be equivalent
int maxRow = (int)((blockRow*brlen + brlen < rlen) ? brlen : rlen - blockRow*brlen);
//get reuse matrix block
MatrixBlock block = MatrixWriter.getMatrixBlockForReuse(blocks, maxRow, maxCol, brlen, bclen);
block.reset(maxRow, maxCol);
if( keyMap.containsKey(blockCol) )
{
String fname = stagingDir+"/"+(blockRow+1)+"_"+(blockCol+1);
MatrixBlock tmp = LocalFileUtils.readMatrixBlockFromLocal(fname);
HashMap lkeyMap = keyMap.get(blockCol);
long col_offset = blockCol*bclen;
for( int j=0; j | | | | | | | |