org.apache.sysml.runtime.matrix.mapred.MapperBase Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of systemml Show documentation
Show all versions of systemml Show documentation
Declarative Machine Learning
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.sysml.runtime.matrix.mapred;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;
import org.apache.sysml.runtime.DMLRuntimeException;
import org.apache.sysml.runtime.instructions.mr.AggregateBinaryInstruction;
import org.apache.sysml.runtime.instructions.mr.CSVReblockInstruction;
import org.apache.sysml.runtime.instructions.mr.DataGenMRInstruction;
import org.apache.sysml.runtime.instructions.mr.MRInstruction;
import org.apache.sysml.runtime.instructions.mr.PMMJMRInstruction;
import org.apache.sysml.runtime.instructions.mr.ReblockInstruction;
import org.apache.sysml.runtime.matrix.data.Converter;
import org.apache.sysml.runtime.matrix.data.MatrixBlock;
import org.apache.sysml.runtime.matrix.data.MatrixIndexes;
import org.apache.sysml.runtime.matrix.data.MatrixValue;
import org.apache.sysml.runtime.matrix.data.Pair;
@SuppressWarnings("rawtypes")
public abstract class MapperBase extends MRBaseForCommonInstructions
{
//the indexes that this particular input matrix file represents
protected ArrayList representativeMatrixes=null;
//the dimension for all the representative matrices
//(they are all the same, since coming from the same files)
protected long[] rlens=null;
protected long[] clens=null;
//the block sizes for the representative matrices
protected int[] brlens=null;
protected int[] bclens=null;
//upper boundaries to check
protected long[] rbounds=null;
protected long[] cbounds=null;
//boundary block sizes
protected int[] lastblockrlens=null;
protected int[] lastblockclens=null;
//rand instructions that need to be performed in mapper
protected ArrayList dataGen_instructions=new ArrayList();
//instructions that need to be performed in mapper
protected ArrayList> mapper_instructions=new ArrayList>();
//block instructions that need to be performed in part by mapper
protected ArrayList> reblock_instructions=new ArrayList>();
//csv block instructions that need to be performed in part by mapper
protected ArrayList> csv_reblock_instructions=new ArrayList>();
//the indexes of the matrices that needed to be outputted
protected ArrayList> outputIndexes=new ArrayList>();
//converter to convert the input record into indexes and matrix value (can be a cell or a block)
protected Converter inputConverter=null;
//a counter to measure the time spent in a mapper
protected static enum Counters {
MAP_TIME
};
@SuppressWarnings("unchecked")
protected void commonMap(Writable rawKey, Writable rawValue, OutputCollector out, Reporter reporter)
throws IOException
{
long start=System.currentTimeMillis();
//System.out.println("read in Mapper: "+rawKey+": "+rawValue);
//for each representative matrix, read the record and apply instructions
for(int i=0; i pair = inputConverter.next();
MatrixIndexes indexes=pair.getKey();
MatrixValue value=pair.getValue();
checkValidity(indexes, value, i);
//put the input in the cache
cachedValues.reset();
cachedValues.set(thisMatrix, indexes, value);
//special operations for individual mapp type
specialOperationsForActualMap(i, out, reporter);
}
}
reporter.incrCounter(Counters.MAP_TIME, System.currentTimeMillis()-start);
}
protected abstract void specialOperationsForActualMap(int index,
OutputCollector out, Reporter reporter)throws IOException;
protected void checkValidity(MatrixIndexes indexes, MatrixValue value, int rep) throws IOException
{
if(indexes.getRowIndex()<=0 || indexes.getColumnIndex()<=0
|| indexes.getRowIndex()>rbounds[rep] || indexes.getColumnIndex()>cbounds[rep]){
throw new IOException("key: "+indexes+" is out of range: [1, "+rbounds[rep]+"] and [1, "+cbounds[rep]+"] (tag="+rep+")!");
}
if(indexes.getRowIndex()==rbounds[rep] && value.getNumRows()>lastblockrlens[rep])
{
throw new IOException("boundary block with "+value.getNumRows()+" rows exceeds the size "+lastblockrlens[rep]+" "
+ "(tag="+rep+", ix="+indexes+", "+value.getNumRows()+"x"+value.getNumColumns()+")");
}
if(indexes.getColumnIndex()==cbounds[rep] && value.getNumColumns()>lastblockclens[rep])
{
throw new IOException("boundary block with "+value.getNumColumns()+" columns exceeds the size "+lastblockclens[rep]+" "
+ "(tag="+rep+", ix="+indexes+", "+value.getNumRows()+"x"+value.getNumColumns()+")");
}
}
/**
* Determines if empty blocks can be discarded on map input. Conceptually, this is true
* if the individual instruction don't need to output empty blocks and if they are sparsesafe.
*
* @return true if empty blocks can be discarded on map input
*/
public boolean allowsFilterEmptyInputBlocks()
{
boolean ret = true;
int count = 0;
if( ret && mapper_instructions!=null )
for( ArrayList vinst : mapper_instructions )
for( MRInstruction inst : vinst ){
ret &= (inst instanceof AggregateBinaryInstruction && !((AggregateBinaryInstruction)inst).getOutputEmptyBlocks() )
||(inst instanceof PMMJMRInstruction && !((PMMJMRInstruction)inst).getOutputEmptyBlocks() );
count++; //ensure that mapper instructions exists
}
return ret && count>0;
}
public void configure(JobConf job)
{
super.configure(job);
//get the indexes that this matrix file represents,
//since one matrix file can occur multiple times in a statement
try {
representativeMatrixes=MRJobConfiguration.getInputMatrixIndexesInMapper(job);
} catch (IOException e) {
throw new RuntimeException(e);
}
//get input converter information
inputConverter=MRJobConfiguration.getInputConverter(job, representativeMatrixes.get(0));
DataGenMRInstruction[] allDataGenIns;
MRInstruction[] allMapperIns;
ReblockInstruction[] allReblockIns;
CSVReblockInstruction[] allCSVReblockIns;
try {
allDataGenIns = MRJobConfiguration.getDataGenInstructions(job);
//parse the instructions on the matrices that this file represent
allMapperIns=MRJobConfiguration.getInstructionsInMapper(job);
//parse the reblock instructions on the matrices that this file represent
allReblockIns=MRJobConfiguration.getReblockInstructions(job);
allCSVReblockIns=MRJobConfiguration.getCSVReblockInstructions(job);
} catch (DMLRuntimeException e) {
throw new RuntimeException(e);
}
//get all the output indexes
byte[] outputs=MRJobConfiguration.getOutputIndexesInMapper(job);
//get the dimension of all the representative matrices
rlens=new long[representativeMatrixes.size()];
clens=new long[representativeMatrixes.size()];
for(int i=0; i set=new HashSet();
for(int i=0; i dataGensForThisMatrix=new ArrayList();
if(allDataGenIns!=null)
{
for(DataGenMRInstruction ins:allDataGenIns)
{
if(set.contains(ins.getInput()))
{
dataGensForThisMatrix.add(ins);
set.add(ins.output);
}
}
}
if(dataGensForThisMatrix.size()>1)
throw new RuntimeException("only expects at most one rand instruction per input");
if(dataGensForThisMatrix.isEmpty())
dataGen_instructions.add(null);
else
dataGen_instructions.add(dataGensForThisMatrix.get(0));
//collect the relavent instructions for this representative matrix
ArrayList opsForThisMatrix=new ArrayList();
if(allMapperIns!=null)
{
for(MRInstruction ins: allMapperIns)
{
try {
/*
boolean toAdd=true;
for(byte input: ins.getInputIndexes())
if(!set.contains(input))
{
toAdd=false;
break;
}
*/
boolean toAdd=false;
for(byte input : ins.getInputIndexes())
if(set.contains(input))
{
toAdd=true;
break;
}
if(toAdd)
{
opsForThisMatrix.add(ins);
set.add(ins.output);
}
} catch (DMLRuntimeException e) {
throw new RuntimeException(e);
}
}
}
mapper_instructions.add(opsForThisMatrix);
//collect the relavent reblock instructions for this representative matrix
ArrayList reblocksForThisMatrix=new ArrayList();
if(allReblockIns!=null)
{
for(ReblockInstruction ins:allReblockIns)
{
if(set.contains(ins.input))
{
reblocksForThisMatrix.add(ins);
set.add(ins.output);
}
}
}
reblock_instructions.add(reblocksForThisMatrix);
//collect the relavent reblock instructions for this representative matrix
ArrayList csvReblocksForThisMatrix=new ArrayList();
if(allCSVReblockIns!=null)
{
for(CSVReblockInstruction ins:allCSVReblockIns)
{
if(set.contains(ins.input))
{
csvReblocksForThisMatrix.add(ins);
set.add(ins.output);
}
}
}
csv_reblock_instructions.add(csvReblocksForThisMatrix);
//collect the output indexes for this representative matrix
ArrayList outsForThisMatrix=new ArrayList();
for(byte output: outputs)
{
if(set.contains(output))
outsForThisMatrix.add(output);
}
outputIndexes.add(outsForThisMatrix);
}
}
protected void processMapperInstructionsForMatrix(int index)
throws IOException
{
//apply all mapper instructions
try {
processMixedInstructions(mapper_instructions.get(index));
} catch (Exception e) {
throw new IOException(e);
}
}
}