All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.sysml.runtime.instructions.spark.RmmSPInstruction Maven / Gradle / Ivy

There is a newer version: 1.2.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 * 
 *   http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.sysml.runtime.instructions.spark;


import java.util.LinkedList;

import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.function.PairFlatMapFunction;
import org.apache.spark.api.java.function.PairFunction;

import scala.Tuple2;

import org.apache.sysml.runtime.DMLRuntimeException;
import org.apache.sysml.runtime.DMLUnsupportedOperationException;
import org.apache.sysml.runtime.controlprogram.context.ExecutionContext;
import org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext;
import org.apache.sysml.runtime.functionobjects.Multiply;
import org.apache.sysml.runtime.functionobjects.Plus;
import org.apache.sysml.runtime.instructions.InstructionUtils;
import org.apache.sysml.runtime.instructions.cp.CPOperand;
import org.apache.sysml.runtime.instructions.spark.utils.RDDAggregateUtils;
import org.apache.sysml.runtime.matrix.MatrixCharacteristics;
import org.apache.sysml.runtime.matrix.data.MatrixBlock;
import org.apache.sysml.runtime.matrix.data.MatrixIndexes;
import org.apache.sysml.runtime.matrix.data.TripleIndexes;
import org.apache.sysml.runtime.matrix.operators.AggregateBinaryOperator;
import org.apache.sysml.runtime.matrix.operators.AggregateOperator;
import org.apache.sysml.runtime.matrix.operators.Operator;

/**
 * 
 */
public class RmmSPInstruction extends BinarySPInstruction 
{
	
	public RmmSPInstruction(Operator op, CPOperand in1, CPOperand in2, CPOperand out, String opcode, String istr )
	{
		super(op, in1, in2, out, opcode, istr);
		_sptype = SPINSTRUCTION_TYPE.RMM;		
	}

	/**
	 * 
	 * @param str
	 * @return
	 * @throws DMLRuntimeException
	 */
	public static RmmSPInstruction parseInstruction( String str ) 
		throws DMLRuntimeException 
	{
		String parts[] = InstructionUtils.getInstructionPartsWithValueType(str);
		String opcode = parts[0];

		if ( "rmm".equals(opcode) ) {
			CPOperand in1 = new CPOperand(parts[1]);
			CPOperand in2 = new CPOperand(parts[2]);
			CPOperand out = new CPOperand(parts[3]);
			
			return new RmmSPInstruction(null, in1, in2, out, opcode, str);
		} 
		else {
			throw new DMLRuntimeException("RmmSPInstruction.parseInstruction():: Unknown opcode " + opcode);
		}		
	}
	
	@Override
	public void processInstruction(ExecutionContext ec) 
		throws DMLRuntimeException, DMLUnsupportedOperationException
	{	
		SparkExecutionContext sec = (SparkExecutionContext)ec;
		
		//get input rdds
		MatrixCharacteristics mc1 = sec.getMatrixCharacteristics( input1.getName() );
		MatrixCharacteristics mc2 = sec.getMatrixCharacteristics( input2.getName() );
		JavaPairRDD in1 = sec.getBinaryBlockRDDHandleForVariable( input1.getName() );
		JavaPairRDD in2 = sec.getBinaryBlockRDDHandleForVariable( input2.getName() );
		
		//execute Spark RMM instruction
		//step 1: prepare join keys (w/ replication), i/j/k 
		JavaPairRDD tmp1 = in1.flatMapToPair(
				new RmmReplicateFunction(mc2.getCols(), mc2.getColsPerBlock(), true)); 
		JavaPairRDD tmp2 = in2.flatMapToPair(
				new RmmReplicateFunction(mc1.getRows(), mc1.getRowsPerBlock(), false));
		
		//step 2: join prepared datasets, multiply, and aggregate
		JavaPairRDD out = 
				tmp1.join( tmp2 )                              //join by result block 
		            .mapToPair( new RmmMultiplyFunction() );   //do matrix multiplication
		out = RDDAggregateUtils.sumByKeyStable(out);           //aggregation per result block
		
		//put output block into symbol table (no lineage because single block)
		updateBinaryMMOutputMatrixCharacteristics(sec, true);
		sec.setRDDHandleForVariable(output.getName(), out);
		sec.addLineageRDD(output.getName(), input1.getName());
		sec.addLineageRDD(output.getName(), input2.getName());
	}


	/**
	 * 
	 */
	private static class RmmReplicateFunction implements PairFlatMapFunction, TripleIndexes, MatrixBlock> 
	{
		private static final long serialVersionUID = 3577072668341033932L;
		
		private long _len = -1;
		private long _blen = -1;
		private boolean _left = false;
		
		public RmmReplicateFunction(long len, long blen, boolean left)
		{
			_len = len;
			_blen = blen;
			_left = left;
		}
		
		@Override
		public Iterable> call( Tuple2 arg0 ) 
			throws Exception 
		{
			LinkedList> ret = new LinkedList>();
			MatrixIndexes ixIn = arg0._1();
			MatrixBlock blkIn = arg0._2();
			
			long numBlocks = (long) Math.ceil((double)_len/_blen); 
			
			if( _left ) //LHS MATRIX
			{
				//replicate wrt # column blocks in RHS
				long i = ixIn.getRowIndex();
				long k = ixIn.getColumnIndex();
				for( long j=1; j<=numBlocks; j++ ) {
					TripleIndexes tmptix = new TripleIndexes(i, j, k);
					MatrixBlock tmpblk = new MatrixBlock(blkIn);
					ret.add( new Tuple2(tmptix, tmpblk) );
				}
			} 
			else // RHS MATRIX
			{
				//replicate wrt # row blocks in LHS
				long k = ixIn.getRowIndex();
				long j = ixIn.getColumnIndex();
				for( long i=1; i<=numBlocks; i++ ) {
					TripleIndexes tmptix = new TripleIndexes(i, j, k);
					MatrixBlock tmpblk = new MatrixBlock(blkIn);
					ret.add( new Tuple2(tmptix, tmpblk) );
				}
			}
			
			//output list of new tuples
			return ret;
		}
	}

	/**
	 * 
	 * 
	 */
	private static class RmmMultiplyFunction implements PairFunction>, MatrixIndexes, MatrixBlock> 
	{
		private static final long serialVersionUID = -5772410117511730911L;
		
		private AggregateBinaryOperator _op = null;
		
		public RmmMultiplyFunction()
		{
			AggregateOperator agg = new AggregateOperator(0, Plus.getPlusFnObject());
			_op = new AggregateBinaryOperator(Multiply.getMultiplyFnObject(), agg);
		}

		@Override
		public Tuple2 call( Tuple2> arg0 ) 
			throws Exception 
		{
			//get input blocks per
			TripleIndexes ixIn = arg0._1(); //i,j,k
			MatrixIndexes ixOut = new MatrixIndexes(ixIn.getFirstIndex(), ixIn.getSecondIndex()); //i,j
			MatrixBlock blkIn1 = arg0._2()._1();
			MatrixBlock blkIn2 = arg0._2()._2();
			MatrixBlock blkOut = new MatrixBlock();
			
			//core block matrix multiplication 
			blkIn1.aggregateBinaryOperations(blkIn1, blkIn2, blkOut, _op);
							
			//output new tuple
			return new Tuple2(ixOut, blkOut);
		}
	}
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy