All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.sysml.runtime.instructions.spark.ReblockSPInstruction Maven / Gradle / Ivy

There is a newer version: 1.2.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 * 
 *   http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.sysml.runtime.instructions.spark;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.spark.api.java.JavaPairRDD;

import org.apache.sysml.hops.recompile.Recompiler;
import org.apache.sysml.runtime.DMLRuntimeException;
import org.apache.sysml.runtime.DMLUnsupportedOperationException;
import org.apache.sysml.runtime.controlprogram.caching.MatrixObject;
import org.apache.sysml.runtime.controlprogram.context.ExecutionContext;
import org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext;
import org.apache.sysml.runtime.instructions.InstructionUtils;
import org.apache.sysml.runtime.instructions.cp.CPOperand;
import org.apache.sysml.runtime.instructions.spark.data.RDDProperties;
import org.apache.sysml.runtime.instructions.spark.functions.ExtractBlockForBinaryReblock;
import org.apache.sysml.runtime.instructions.spark.utils.RDDAggregateUtils;
import org.apache.sysml.runtime.instructions.spark.utils.RDDConverterUtils;
import org.apache.sysml.runtime.matrix.MatrixCharacteristics;
import org.apache.sysml.runtime.matrix.MatrixFormatMetaData;
import org.apache.sysml.runtime.matrix.data.InputInfo;
import org.apache.sysml.runtime.matrix.data.MatrixBlock;
import org.apache.sysml.runtime.matrix.data.MatrixCell;
import org.apache.sysml.runtime.matrix.data.MatrixIndexes;
import org.apache.sysml.runtime.matrix.operators.Operator;

public class ReblockSPInstruction extends UnarySPInstruction 
{
	private int brlen; 
	private int bclen;
	private boolean outputEmptyBlocks;
	
	public ReblockSPInstruction(Operator op, CPOperand in, CPOperand out, int br, int bc, boolean emptyBlocks,
			String opcode, String instr) 
	{
		super(op, in, out, opcode, instr);
		brlen=br;
		bclen=bc;
		outputEmptyBlocks = emptyBlocks;
	}
	
	public static ReblockSPInstruction parseInstruction(String str)  throws DMLRuntimeException 
	{
		String parts[] = InstructionUtils.getInstructionPartsWithValueType(str);
		String opcode = parts[0];
		
		if(opcode.compareTo("rblk") != 0) {
			throw new DMLRuntimeException("Incorrect opcode for ReblockSPInstruction:" + opcode);
		}
		
		CPOperand in = new CPOperand(parts[1]);
		CPOperand out = new CPOperand(parts[2]);
		int brlen=Integer.parseInt(parts[3]);
		int bclen=Integer.parseInt(parts[4]);
		boolean outputEmptyBlocks = Boolean.parseBoolean(parts[5]);
		
		Operator op = null; // no operator for ReblockSPInstruction
		return new ReblockSPInstruction(op, in, out, brlen, bclen, outputEmptyBlocks, opcode, str);
	}
	

	@Override
	@SuppressWarnings("unchecked")
	public void processInstruction(ExecutionContext ec)
		throws DMLRuntimeException, DMLUnsupportedOperationException 
	{
		SparkExecutionContext sec = (SparkExecutionContext)ec;

		//set the output characteristics
		MatrixObject mo = sec.getMatrixObject(input1.getName());
		MatrixCharacteristics mc = sec.getMatrixCharacteristics(input1.getName());
		MatrixCharacteristics mcOut = sec.getMatrixCharacteristics(output.getName());
		mcOut.set(mc.getRows(), mc.getCols(), brlen, bclen, mc.getNonZeros());

		//get the source format form the meta data
		MatrixFormatMetaData iimd = (MatrixFormatMetaData) mo.getMetaData();
		if(iimd == null) {
			throw new DMLRuntimeException("Error: Metadata not found");
		}
		
		//check for in-memory reblock (w/ lazy spark context, potential for latency reduction)
		if( Recompiler.checkCPReblock(sec, input1.getName()) ) {
			Recompiler.executeInMemoryReblock(sec, input1.getName(), output.getName());
			return;
		}
		
		if(iimd.getInputInfo() == InputInfo.TextCellInputInfo || iimd.getInputInfo() == InputInfo.MatrixMarketInputInfo ) 
		{
			//check jdk version (prevent double.parseDouble contention on  lines = (JavaPairRDD) 
					sec.getRDDHandleForVariable(input1.getName(), iimd.getInputInfo());
			
			//convert textcell to binary block
			JavaPairRDD out = 
					RDDConverterUtils.textCellToBinaryBlock(sec.getSparkContext(), lines, mcOut, outputEmptyBlocks);
			
			//put output RDD handle into symbol table
			sec.setRDDHandleForVariable(output.getName(), out);
			sec.addLineageRDD(output.getName(), input1.getName());
		}
		else if(iimd.getInputInfo() == InputInfo.CSVInputInfo) {
			// HACK ALERT: Until we introduces the rewrite to insert csvrblock for non-persistent read
			// throw new DMLRuntimeException("CSVInputInfo is not supported for ReblockSPInstruction");
			RDDProperties properties = mo.getRddProperties();
			CSVReblockSPInstruction csvInstruction = null;
			boolean hasHeader = false;
			String delim = ",";
			boolean fill = false;
			double missingValue = 0;
			if(properties != null) {
				hasHeader = properties.isHasHeader();
				delim = properties.getDelim();
				fill = properties.isFill();
				missingValue = properties.getMissingValue();
			}
			
			csvInstruction = new CSVReblockSPInstruction(null, input1, output, mcOut.getRowsPerBlock(), mcOut.getColsPerBlock(), hasHeader, delim, fill, missingValue, "csvrblk", instString);
			csvInstruction.processInstruction(sec);
			return;
		}
		else if(iimd.getInputInfo()==InputInfo.BinaryCellInputInfo) 
		{
			JavaPairRDD binaryCells = (JavaPairRDD) sec.getRDDHandleForVariable(input1.getName(), iimd.getInputInfo());
			JavaPairRDD out = RDDConverterUtils.binaryCellToBinaryBlock(sec.getSparkContext(), binaryCells, mcOut, outputEmptyBlocks);
			
			//put output RDD handle into symbol table
			sec.setRDDHandleForVariable(output.getName(), out);
			sec.addLineageRDD(output.getName(), input1.getName());
		}
		else if(iimd.getInputInfo()==InputInfo.BinaryBlockInputInfo) 
		{
			/// HACK ALERT: Workaround for MLContext 
			if(mc.getRowsPerBlock() == mcOut.getRowsPerBlock() && mc.getColsPerBlock() == mcOut.getColsPerBlock()) {
				if(mo.getRDDHandle() != null) {
					JavaPairRDD out = (JavaPairRDD) mo.getRDDHandle().getRDD();
					
					//put output RDD handle into symbol table
					sec.setRDDHandleForVariable(output.getName(), out);
					sec.addLineageRDD(output.getName(), input1.getName());
					return;
				}
				else {
					throw new DMLRuntimeException("Input RDD is not accessible through buffer pool for ReblockSPInstruction:" + iimd.getInputInfo());
				}
			}
			else 
			{
				//BINARY BLOCK <- BINARY BLOCK (different sizes)
				JavaPairRDD in1 = sec.getBinaryBlockRDDHandleForVariable(input1.getName());
				
				JavaPairRDD out = 
						in1.flatMapToPair(new ExtractBlockForBinaryReblock(mc, mcOut));
				out = RDDAggregateUtils.mergeByKey( out );
				
				//put output RDD handle into symbol table
				sec.setRDDHandleForVariable(output.getName(), out);
				sec.addLineageRDD(output.getName(), input1.getName());
			}
		}
		else {
			throw new DMLRuntimeException("The given InputInfo is not implemented for ReblockSPInstruction:" + iimd.getInputInfo());
		}		
	}
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy