org.apache.sysml.runtime.matrix.mapred.ReblockMapper Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of systemml Show documentation
Show all versions of systemml Show documentation
Declarative Machine Learning
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.sysml.runtime.matrix.mapred;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Map.Entry;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;
import org.apache.sysml.runtime.instructions.mr.ReblockInstruction;
import org.apache.sysml.runtime.matrix.MatrixCharacteristics;
import org.apache.sysml.runtime.matrix.data.AdaptivePartialBlock;
import org.apache.sysml.runtime.matrix.data.MatrixBlock;
import org.apache.sysml.runtime.matrix.data.MatrixCell;
import org.apache.sysml.runtime.matrix.data.MatrixIndexes;
import org.apache.sysml.runtime.matrix.data.MatrixValue;
import org.apache.sysml.runtime.matrix.data.PartialBlock;
import org.apache.sysml.runtime.matrix.data.TaggedAdaptivePartialBlock;
import org.apache.sysml.runtime.util.MapReduceTool;
/**
*
*
*/
public class ReblockMapper extends MapperBase
implements Mapper
{
//state of reblock mapper
private OutputCollector cachedCollector = null;
private JobConf cachedJobConf = null;
private HashMap dimensionsOut = new HashMap();
private HashMap dimensionsIn = new HashMap();
private HashMap emptyBlocks = new HashMap();
//reblock buffer
private HashMap buffer = new HashMap();
private int buffersize =-1;
@Override
public void map(Writable rawKey, Writable rawValue, OutputCollector out, Reporter reporter)
throws IOException
{
cachedCollector = out;
commonMap(rawKey, rawValue, out, reporter);
}
@Override
public void configure(JobConf job)
{
MRJobConfiguration.setMatrixValueClass(job, false); //worst-case
super.configure(job);
//cache job conf for use in close
cachedJobConf = job;
try
{
ReblockInstruction[] reblockInstructions = MRJobConfiguration.getReblockInstructions(job);
//get dimension information
for(ReblockInstruction ins: reblockInstructions)
{
dimensionsIn.put(ins.input, MRJobConfiguration.getMatrixCharacteristicsForInput(job, ins.input));
dimensionsOut.put(ins.output, MRJobConfiguration.getMatrixCharactristicsForReblock(job, ins.output));
emptyBlocks.put(ins.output, ins.outputEmptyBlocks);
}
//compute reblock buffer size (according to relevant rblk inst of this task only)
//(buffer size divided by max reblocks per input matrix, because those are shared in JVM)
int maxlen = 1;
for( ArrayList rinst : reblock_instructions )
maxlen = Math.max(maxlen, rinst.size()); //max reblocks per input
buffersize = ReblockBuffer.DEFAULT_BUFFER_SIZE/maxlen;
}
catch (Exception e)
{
throw new RuntimeException(e);
}
}
@Override
public void close() throws IOException
{
super.close();
//flush buffered data
for( Entry e : buffer.entrySet() )
{
ReblockBuffer rbuff = e.getValue();
rbuff.flushBuffer(e.getKey(), cachedCollector);
}
//handle empty block output (responsibility distributed over all map tasks)
if( cachedJobConf==null || cachedCollector==null )
return;
long mapID = Long.parseLong(MapReduceTool.getUniqueKeyPerTask(cachedJobConf, true));
long numMap = cachedJobConf.getNumMapTasks();
MatrixIndexes tmpIx = new MatrixIndexes();
TaggedAdaptivePartialBlock tmpVal = new TaggedAdaptivePartialBlock();
AdaptivePartialBlock apb = new AdaptivePartialBlock(new PartialBlock(-1,-1,0));
tmpVal.setBaseObject(apb);
for(Entry e: dimensionsOut.entrySet())
{
tmpVal.setTag(e.getKey());
MatrixCharacteristics mc = e.getValue();
long rlen = mc.getRows();
long clen = mc.getCols();
long brlen = mc.getRowsPerBlock();
long bclen = mc.getColsPerBlock();
long nnz = mc.getNonZeros();
//output empty blocks on demand (not required if nnz ensures that values exist in each block)
if( nnz >= (rlen*clen-Math.min(brlen, rlen)*Math.min(bclen, clen)+1)
|| !emptyBlocks.get(e.getKey()) )
{
continue; //safe to skip empty block output
}
//output part of empty blocks (all mappers contribute for better load balance),
//where mapper responsibility is distributed over row blocks
long numBlocks = (long)Math.ceil((double)rlen/brlen);
long len = (long)Math.ceil((double)numBlocks/numMap);
long start = mapID * len * brlen;
long end = Math.min((mapID+1) * len * brlen, rlen);
for(long i=start, r=start/brlen+1; i out, Reporter reporter)
throws IOException
{
//note: invoked from MapperBase for each cell
//apply all instructions
processMapperInstructionsForMatrix(index);
//apply reblock instructions and output
processReblockInMapperAndOutput(index, out);
}
/**
*
* @param index
* @param indexBuffer
* @param partialBuffer
* @param out
* @throws IOException
*/
protected void processReblockInMapperAndOutput(int index, OutputCollector out)
throws IOException
{
for(ReblockInstruction ins : reblock_instructions.get(index))
{
ArrayList ixvList = cachedValues.get(ins.input);
if( ixvList!=null ) {
for(IndexedMatrixValue inValue : ixvList )
{
if(inValue==null)
continue;
//get buffer
ReblockBuffer rbuff = buffer.get(ins.output);
if( rbuff==null )
{
MatrixCharacteristics mc = dimensionsOut.get(ins.output);
rbuff = new ReblockBuffer( buffersize, mc.getRows(), mc.getCols(), ins.brlen, ins.bclen );
buffer.put(ins.output, rbuff);
}
//append cells and flush buffer if required
MatrixValue mval = inValue.getValue();
if( mval instanceof MatrixBlock )
{
MatrixIndexes inIx = inValue.getIndexes();
MatrixCharacteristics mc = dimensionsIn.get(ins.input);
long row_offset = (inIx.getRowIndex()-1)*mc.getRowsPerBlock() + 1;
long col_offset = (inIx.getColumnIndex()-1)*mc.getColsPerBlock() + 1;
//append entire block incl. flush on demand
rbuff.appendBlock(row_offset, col_offset, (MatrixBlock)mval, ins.output, out );
}
else //if( mval instanceof MatrixCell )
{
rbuff.appendCell( inValue.getIndexes().getRowIndex(),
inValue.getIndexes().getColumnIndex(),
((MatrixCell)mval).getValue() );
//flush buffer if necessary
if( rbuff.getSize() >= rbuff.getCapacity() )
rbuff.flushBuffer( ins.output, out );
}
}
}
}
}
}