All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.sysml.runtime.matrix.mapred.MapperBase Maven / Gradle / Ivy

There is a newer version: 1.2.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 * 
 *   http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */


package org.apache.sysml.runtime.matrix.mapred;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;

import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;
import org.apache.sysml.runtime.DMLRuntimeException;
import org.apache.sysml.runtime.instructions.mr.AggregateBinaryInstruction;
import org.apache.sysml.runtime.instructions.mr.CSVReblockInstruction;
import org.apache.sysml.runtime.instructions.mr.DataGenMRInstruction;
import org.apache.sysml.runtime.instructions.mr.MRInstruction;
import org.apache.sysml.runtime.instructions.mr.PMMJMRInstruction;
import org.apache.sysml.runtime.instructions.mr.ReblockInstruction;
import org.apache.sysml.runtime.matrix.data.Converter;
import org.apache.sysml.runtime.matrix.data.MatrixBlock;
import org.apache.sysml.runtime.matrix.data.MatrixIndexes;
import org.apache.sysml.runtime.matrix.data.MatrixValue;
import org.apache.sysml.runtime.matrix.data.Pair;

@SuppressWarnings("rawtypes")
public abstract class MapperBase extends MRBaseForCommonInstructions
{
	
	//the indexes that this particular input matrix file represents
	protected ArrayList representativeMatrixes=null;
	
	//the dimension for all the representative matrices 
	//(they are all the same, since coming from the same files)
	protected long[] rlens=null;
	protected long[] clens=null;
	
	//the block sizes for the representative matrices
	protected int[] brlens=null;
	protected int[] bclens=null;
	
	//upper boundaries to check
	protected long[] rbounds=null;
	protected long[] cbounds=null;
	
	//boundary block sizes
	protected int[] lastblockrlens=null;
	protected int[] lastblockclens=null;
	
	//rand instructions that need to be performed in mapper
	protected ArrayList dataGen_instructions=new ArrayList();
	
	//instructions that need to be performed in mapper
	protected ArrayList> mapper_instructions=new ArrayList>();
	
	//block instructions that need to be performed in part by mapper
	protected ArrayList> reblock_instructions=new ArrayList>();
	
	//csv block instructions that need to be performed in part by mapper
	protected ArrayList> csv_reblock_instructions=new ArrayList>();
	
	//the indexes of the matrices that needed to be outputted
	protected ArrayList> outputIndexes=new ArrayList>();
	
	//converter to convert the input record into indexes and matrix value (can be a cell or a block)
	protected Converter inputConverter=null;
	
	//a counter to measure the time spent in a mapper
	protected static enum Counters {
		MAP_TIME 
	};
	
	
	@SuppressWarnings("unchecked")
	protected void commonMap(Writable rawKey, Writable rawValue, OutputCollector out, Reporter reporter) 
		throws IOException 
	{
		long start=System.currentTimeMillis();
		
		//System.out.println("read in Mapper: "+rawKey+": "+rawValue);
		
		//for each representative matrix, read the record and apply instructions
		for(int i=0; i pair = inputConverter.next();
				
				MatrixIndexes indexes=pair.getKey();
				MatrixValue value=pair.getValue();
				
				checkValidity(indexes, value, i);
				
				//put the input in the cache
				cachedValues.reset();
				cachedValues.set(thisMatrix, indexes, value);
				
				//special operations for individual mapp type
				specialOperationsForActualMap(i, out, reporter);
			}
		}
		
		reporter.incrCounter(Counters.MAP_TIME, System.currentTimeMillis()-start);
	}
	
	protected abstract void specialOperationsForActualMap(int index,
			OutputCollector out, Reporter reporter)throws IOException;

	protected void checkValidity(MatrixIndexes indexes, MatrixValue value, int rep) throws IOException
	{
		if(indexes.getRowIndex()<=0 || indexes.getColumnIndex()<=0 
		|| indexes.getRowIndex()>rbounds[rep] || indexes.getColumnIndex()>cbounds[rep]){
			
			throw new IOException("key: "+indexes+" is out of range: [1, "+rbounds[rep]+"] and [1, "+cbounds[rep]+"] (tag="+rep+")!");
		}
		
		if(indexes.getRowIndex()==rbounds[rep] && value.getNumRows()>lastblockrlens[rep])
		{
			throw new IOException("boundary block with "+value.getNumRows()+" rows exceeds the size "+lastblockrlens[rep]+" "
					+ "(tag="+rep+", ix="+indexes+", "+value.getNumRows()+"x"+value.getNumColumns()+")");
		}
		
		if(indexes.getColumnIndex()==cbounds[rep] && value.getNumColumns()>lastblockclens[rep])
		{
			throw new IOException("boundary block with "+value.getNumColumns()+" columns exceeds the size "+lastblockclens[rep]+" "
					+ "(tag="+rep+", ix="+indexes+", "+value.getNumRows()+"x"+value.getNumColumns()+")");
		}
	}

	/**
	 * Determines if empty blocks can be discarded on map input. Conceptually, this is true
	 * if the individual instruction don't need to output empty blocks and if they are sparsesafe.
	 * 
	 * @return true if empty blocks can be discarded on map input
	 */
	public boolean allowsFilterEmptyInputBlocks()
	{
		boolean ret = true;
		int count = 0;
		
		if( ret && mapper_instructions!=null )
			for( ArrayList vinst : mapper_instructions )
				for( MRInstruction inst : vinst ){
					ret &= (inst instanceof AggregateBinaryInstruction && !((AggregateBinaryInstruction)inst).getOutputEmptyBlocks() )
						  ||(inst instanceof PMMJMRInstruction && !((PMMJMRInstruction)inst).getOutputEmptyBlocks() );
					count++; //ensure that mapper instructions exists
				}
		
		return ret && count>0;
	}
	
	public void configure(JobConf job)
	{
		super.configure(job);
		
		//get the indexes that this matrix file represents, 
		//since one matrix file can occur multiple times in a statement
		try {
			representativeMatrixes=MRJobConfiguration.getInputMatrixIndexesInMapper(job);
		} catch (IOException e) {
			throw new RuntimeException(e);
		}
		
		//get input converter information
		inputConverter=MRJobConfiguration.getInputConverter(job, representativeMatrixes.get(0));
		
		DataGenMRInstruction[] allDataGenIns;
		MRInstruction[] allMapperIns;
		ReblockInstruction[] allReblockIns;
		CSVReblockInstruction[] allCSVReblockIns;
		
		try {
			allDataGenIns = MRJobConfiguration.getDataGenInstructions(job);
			
			//parse the instructions on the matrices that this file represent
			allMapperIns=MRJobConfiguration.getInstructionsInMapper(job);
			
			//parse the reblock instructions on the matrices that this file represent
			allReblockIns=MRJobConfiguration.getReblockInstructions(job);
			
			allCSVReblockIns=MRJobConfiguration.getCSVReblockInstructions(job);
			
		} catch (DMLRuntimeException e) {
			throw new RuntimeException(e);
		}
		
		//get all the output indexes
		byte[] outputs=MRJobConfiguration.getOutputIndexesInMapper(job);
		
		//get the dimension of all the representative matrices
		rlens=new long[representativeMatrixes.size()];
		clens=new long[representativeMatrixes.size()];
		for(int i=0; i set=new HashSet();
		for(int i=0; i dataGensForThisMatrix=new ArrayList();
			if(allDataGenIns!=null)
			{
				for(DataGenMRInstruction ins:allDataGenIns)
				{
					if(set.contains(ins.getInput()))
					{
						dataGensForThisMatrix.add(ins);
						set.add(ins.output);
					}
				}
			}
			if(dataGensForThisMatrix.size()>1)
				throw new RuntimeException("only expects at most one rand instruction per input");
			if(dataGensForThisMatrix.isEmpty())
				dataGen_instructions.add(null);
			else
				dataGen_instructions.add(dataGensForThisMatrix.get(0));
						
			//collect the relavent instructions for this representative matrix
			ArrayList opsForThisMatrix=new ArrayList();
			
			if(allMapperIns!=null)
			{
				for(MRInstruction ins: allMapperIns)
				{
					try {
						/*
						boolean toAdd=true;
						for(byte input: ins.getInputIndexes())
							if(!set.contains(input))
							{
								toAdd=false;
								break;
							}
							*/
						boolean toAdd=false;
						for(byte input : ins.getInputIndexes())
							if(set.contains(input))
							{
								toAdd=true;
								break;
							}
						
						if(toAdd)
						{
							opsForThisMatrix.add(ins);
							set.add(ins.output);
						}
					} catch (DMLRuntimeException e) {
						throw new RuntimeException(e);
					}	
				}
			}
			
			mapper_instructions.add(opsForThisMatrix);
			
			//collect the relavent reblock instructions for this representative matrix
			ArrayList reblocksForThisMatrix=new ArrayList();
			if(allReblockIns!=null)
			{
				for(ReblockInstruction ins:allReblockIns)
				{
					if(set.contains(ins.input))
					{
						reblocksForThisMatrix.add(ins);
						set.add(ins.output);
					}
				}
			}
		
			reblock_instructions.add(reblocksForThisMatrix);
			
			//collect the relavent reblock instructions for this representative matrix
			ArrayList csvReblocksForThisMatrix=new ArrayList();
			if(allCSVReblockIns!=null)
			{
				for(CSVReblockInstruction ins:allCSVReblockIns)
				{
					if(set.contains(ins.input))
					{
						csvReblocksForThisMatrix.add(ins);
						set.add(ins.output);
					}
				}
			}
		
			csv_reblock_instructions.add(csvReblocksForThisMatrix);
			
			//collect the output indexes for this representative matrix
			ArrayList outsForThisMatrix=new ArrayList();
			for(byte output: outputs)
			{
				if(set.contains(output))
					outsForThisMatrix.add(output);
			}
			outputIndexes.add(outsForThisMatrix);
		}
	}

	protected void processMapperInstructionsForMatrix(int index) 
		throws IOException
	{
		//apply all mapper instructions
		try {
			processMixedInstructions(mapper_instructions.get(index));
		} catch (Exception e) {
			throw new IOException(e);
		}
	}
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy