All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.sysml.hops.DataOp Maven / Gradle / Ivy

There is a newer version: 1.2.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 * 
 *   http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.sysml.hops;

import org.apache.sysml.lops.Data;
import org.apache.sysml.lops.Lop;
import org.apache.sysml.lops.LopsException;
import org.apache.sysml.lops.LopProperties.ExecType;
import org.apache.sysml.parser.Expression.DataType;
import org.apache.sysml.parser.Expression.ValueType;
import org.apache.sysml.runtime.matrix.MatrixCharacteristics;
import org.apache.sysml.runtime.util.LocalFileUtils;

import java.util.HashMap;
import java.util.Map.Entry;


public class DataOp extends Hop 
{

	private DataOpTypes _dataop;
	private String _fileName = null;
	
	//read dataop properties
	private FileFormatTypes _inFormat = FileFormatTypes.TEXT;
	private long _inRowsInBlock = -1;
	private long _inColsInBlock = -1;
	
	private boolean _recompileRead = true;
	
	/**
	 * List of "named" input parameters. They are maintained as a hashmap:
	 * parameter names (String) are mapped as indices (Integer) into getInput()
	 * arraylist.
	 * 
	 * i.e., getInput().get(_paramIndexMap.get(parameterName)) refers to the Hop
	 * that is associated with parameterName.
	 */
	private HashMap _paramIndexMap = new HashMap();

	private DataOp() {
		//default constructor for clone
	}
	
	/**
	 *  READ operation for Matrix w/ dim1, dim2. 
	 * This constructor does not support any expression in parameters
	 */
	public DataOp(String l, DataType dt, ValueType vt, DataOpTypes dop,
			String fname, long dim1, long dim2, long nnz, long rowsPerBlock, long colsPerBlock) {
		super(l, dt, vt);
		_dataop = dop;
		
		_fileName = fname;
		setDim1(dim1);
		setDim2(dim2);
		setRowsInBlock(rowsPerBlock);
		setColsInBlock(colsPerBlock);
		setNnz(nnz);
		
		if( dop == DataOpTypes.TRANSIENTREAD )
			setInputFormatType(FileFormatTypes.BINARY);
	}

	/**
	 * READ operation for Matrix
	 * This constructor supports expressions in parameters
	 */
	public DataOp(String l, DataType dt, ValueType vt, 
			DataOpTypes dop, HashMap inputParameters) {
		super(l, dt, vt);

		_dataop = dop;

		int index = 0;
		for( Entry e : inputParameters.entrySet() ) 
		{
			String s = e.getKey();
			Hop input = e.getValue();
			getInput().add(input);
			input.getParent().add(this);

			_paramIndexMap.put(s, index);
			index++;
		}
		if (dop == DataOpTypes.TRANSIENTREAD ){
			setInputFormatType(FileFormatTypes.BINARY);
		}
	}
	
	// WRITE operation
	// This constructor does not support any expression in parameters
	public DataOp(String l, DataType dt, ValueType vt, Hop in,
			DataOpTypes dop, String fname) {
		super(l, dt, vt);
		_dataop = dop;
		getInput().add(0, in);
		in.getParent().add(this);
		_fileName = fname;

		if (dop == DataOpTypes.TRANSIENTWRITE || dop == DataOpTypes.FUNCTIONOUTPUT )
			setInputFormatType(FileFormatTypes.BINARY);
	}
	
	// CHECKPOINT operation
	// This constructor does not support any expression in parameters
	public DataOp(String l, DataType dt, ValueType vt, Hop in,
			LiteralOp level, DataOpTypes dop, String fname) {
		super(l, dt, vt);
		_dataop = dop;
		getInput().add(0, in);
		getInput().add(1, level);
		in.getParent().add(this);
		level.getParent().add(this);
		_fileName = fname;

		if (dop == DataOpTypes.TRANSIENTWRITE || dop == DataOpTypes.FUNCTIONOUTPUT )
			setInputFormatType(FileFormatTypes.BINARY);
	}
	
	
	/**
	 *  WRITE operation for Matrix
	 *  This constructor supports expression in parameters
	 */
	public DataOp(String l, DataType dt, ValueType vt, 
		DataOpTypes dop, Hop in, HashMap inputParameters) {
		super(l, dt, vt);

		_dataop = dop;
		
		getInput().add(0, in);
		in.getParent().add(this);
		
		if (inputParameters != null){
			int index = 1;
			for( Entry e : inputParameters.entrySet() ) 
			{
				String s = e.getKey();
				Hop input = e.getValue();
				getInput().add(input);
				input.getParent().add(this);

				_paramIndexMap.put(s, index);
				index++;
			}
		
		}

		if (dop == DataOpTypes.TRANSIENTWRITE)
			setInputFormatType(FileFormatTypes.BINARY);
	}
	
	public DataOpTypes getDataOpType()
	{
		return _dataop;
	}
	
	public void setDataOpType( DataOpTypes type )
	{
		_dataop = type;
	}
	
	public void setOutputParams(long dim1, long dim2, long nnz, long rowsPerBlock, long colsPerBlock) {
		setDim1(dim1);
		setDim2(dim2);
		setNnz(nnz);
		setRowsInBlock(rowsPerBlock);
		setColsInBlock(colsPerBlock);
	}

	public void setFileName(String fn) {
		_fileName = fn;
	}

	public String getFileName() {
		return _fileName;
	}

	public int getParameterIndex(String name)
	{
		return _paramIndexMap.get(name);
	}
	
	@Override
	public Lop constructLops()
			throws HopsException, LopsException 
	{	
		//return already created lops
		if( getLops() != null )
			return getLops();

		ExecType et = optFindExecType();
		Lop l = null;
		
		// construct lops for all input parameters
		HashMap inputLops = new HashMap();
		for (Entry cur : _paramIndexMap.entrySet()) {
			inputLops.put(cur.getKey(), getInput().get(cur.getValue())
					.constructLops());
		}

		// Create the lop
		switch(_dataop) 
		{
			case TRANSIENTREAD:
				l = new Data(HopsData2Lops.get(_dataop), null, inputLops, getName(), null, 
						getDataType(), getValueType(), true, getInputFormatType());
				setOutputDimensions(l);
				break;
				
			case PERSISTENTREAD:
				l = new Data(HopsData2Lops.get(_dataop), null, inputLops, getName(), null, 
						getDataType(), getValueType(), false, getInputFormatType());
				l.getOutputParameters().setDimensions(getDim1(), getDim2(), _inRowsInBlock, _inColsInBlock, getNnz());
				break;
				
			case PERSISTENTWRITE:
				l = new Data(HopsData2Lops.get(_dataop), getInput().get(0).constructLops(), inputLops, getName(), null, 
						getDataType(), getValueType(), false, getInputFormatType());
				((Data)l).setExecType(et);
				setOutputDimensions(l);
				break;
				
			case TRANSIENTWRITE:
				l = new Data(HopsData2Lops.get(_dataop), getInput().get(0).constructLops(), inputLops, getName(), null,
						getDataType(), getValueType(), true, getInputFormatType());
				setOutputDimensions(l);
				break;
				
			case FUNCTIONOUTPUT:
				l = new Data(HopsData2Lops.get(_dataop), getInput().get(0).constructLops(), inputLops, getName(), null, 
						getDataType(), getValueType(), true, getInputFormatType());
				((Data)l).setExecType(et);
				setOutputDimensions(l);
				break;
			
			default:
				throw new LopsException("Invalid operation type for Data LOP: " + _dataop);	
		}
		
		setLineNumbers(l);
		setLops(l);
		
		//add reblock/checkpoint lops if necessary
		constructAndSetLopsDataFlowProperties();
	
		return getLops();

	}

	public void setInputFormatType(FileFormatTypes ft) {
		_inFormat = ft;
	}

	public FileFormatTypes getInputFormatType() {
		return _inFormat;
	}
	
	public void setInputBlockSizes( long brlen, long bclen ){
		setInputRowsInBlock(brlen);
		setInputColsInBlock(bclen);
	}
	
	public void setInputRowsInBlock( long brlen ){
		_inRowsInBlock = brlen;
	}
	
	public long getInputRowsInBlock(){
		return _inRowsInBlock;
	}
	
	public void setInputColsInBlock( long bclen ){
		_inColsInBlock = bclen;
	}
	
	public long getInputColsInBlock(){
		return _inColsInBlock;
	}
	
	public boolean isRead()
	{
		return( _dataop == DataOpTypes.PERSISTENTREAD || _dataop == DataOpTypes.TRANSIENTREAD );
	}
	
	public boolean isWrite()
	{
		return( _dataop == DataOpTypes.PERSISTENTWRITE || _dataop == DataOpTypes.TRANSIENTWRITE );
	}
	
	public boolean isPersistentReadWrite()
	{
		return( _dataop == DataOpTypes.PERSISTENTREAD || _dataop == DataOpTypes.PERSISTENTWRITE );
	}

	@Override
	public String getOpString() {
		String s = new String("");
		s += HopsData2String.get(_dataop);
		s += " "+getName();
		return s;
	}

	public void printMe() throws HopsException {
		if (LOG.isDebugEnabled()){
			if (getVisited() != VisitStatus.DONE) {
				super.printMe();
				LOG.debug("  DataOp: " + _dataop);
				if (_fileName != null) {
					LOG.debug(" file: " + _fileName);
				}
				LOG.debug(" format: " + getInputFormatType());
				for (Hop h : getInput()) {
					h.printMe();
				}
			}
			setVisited(VisitStatus.DONE);
		}
	}

	@Override
	public boolean allowsAllExecTypes()
	{
		return false;
	}	
	
	@Override
	protected double computeOutputMemEstimate( long dim1, long dim2, long nnz )
	{		
		double ret = 0;
		
		if ( getDataType() == DataType.SCALAR ) 
		{
			switch( getValueType() ) 
			{
				case INT:
					ret = OptimizerUtils.INT_SIZE; break;
				case DOUBLE:
					ret = OptimizerUtils.DOUBLE_SIZE; break;
				case BOOLEAN:
					ret = OptimizerUtils.BOOLEAN_SIZE; break;
				case STRING: 
					// by default, it estimates the size of string[100]
					ret = 100 * OptimizerUtils.CHAR_SIZE; break;
				case OBJECT:
					ret = OptimizerUtils.DEFAULT_SIZE; break;
				default:
					ret = 0;
			}
		}
		else //MATRIX 
		{
			if(   _dataop == DataOpTypes.PERSISTENTREAD 
			   || _dataop == DataOpTypes.TRANSIENTREAD ) 
			{
				double sparsity = OptimizerUtils.getSparsity(dim1, dim2, nnz);
				ret = OptimizerUtils.estimateSizeExactSparsity(dim1, dim2, sparsity);	
			}
			// output memory estimate is not required for "write" nodes (just input)
		}
		
		return ret;
	}
	
	@Override
	protected double computeIntermediateMemEstimate( long dim1, long dim2, long nnz )
	{
		return LocalFileUtils.BUFFER_SIZE;
	}
	
	@Override
	protected long[] inferOutputCharacteristics( MemoTable memo )
	{
		long[] ret = null;
		
		if(   _dataop == DataOpTypes.PERSISTENTWRITE
			|| _dataop == DataOpTypes.TRANSIENTWRITE ) 
		{
			MatrixCharacteristics mc = memo.getAllInputStats(getInput().get(0));
			if( mc.dimsKnown() )
				ret = new long[]{ mc.getRows(), mc.getCols(), mc.getNonZeros() };
		}
		else if( _dataop == DataOpTypes.TRANSIENTREAD )
		{
			//prepare statistics, passed from cross-dag transient writes
			MatrixCharacteristics mc = memo.getAllInputStats(this);
			if( mc.dimsKnown() )
				ret = new long[]{ mc.getRows(), mc.getCols(), mc.getNonZeros() };
		}
		
		return ret;
	}
	
	
	
	@Override
	protected ExecType optFindExecType() 
		throws HopsException 
	{
		//MB: find exec type has two meanings here: (1) for write it means the actual
		//exec type, while (2) for read it affects the recompilation decision as needed
		//for example for sum(X) where the memory consumption is solely determined by the DataOp
		
		ExecType letype = (OptimizerUtils.isMemoryBasedOptLevel()) ? findExecTypeByMemEstimate() : null;
		ExecType REMOTE = OptimizerUtils.isSparkExecutionMode() ? ExecType.SPARK : ExecType.MR;
		
		//NOTE: independent of etype executed in MR (piggybacked) if input to persistent write is MR
		if( _dataop == DataOpTypes.PERSISTENTWRITE || _dataop == DataOpTypes.TRANSIENTWRITE )
		{
			checkAndSetForcedPlatform();

			//additional check for write only
			if( getDataType()==DataType.SCALAR )
				_etypeForced = ExecType.CP;
			
			if( _etypeForced != null ) 			
			{
				_etype = _etypeForced;
			}
			else 
			{
				if ( OptimizerUtils.isMemoryBasedOptLevel() ) 
				{
					_etype = letype;
				}
				else if ( getInput().get(0).areDimsBelowThreshold() )
				{
					_etype = ExecType.CP;
				}
				else
				{
					_etype = REMOTE;
				}
			
				//check for valid CP dimensions and matrix size
				checkAndSetInvalidCPDimsAndSize();
			}
			
			//mark for recompile (forever)
			if( OptimizerUtils.ALLOW_DYN_RECOMPILATION && !dimsKnown(true) && _etype==REMOTE ) {
				setRequiresRecompile();
			}
		}
	    else //READ
		{
	    	//mark for recompile (forever)
			if( OptimizerUtils.ALLOW_DYN_RECOMPILATION && !dimsKnown(true) && letype==REMOTE 
				&& (_recompileRead || _requiresCheckpoint) ) 
			{
				setRequiresRecompile();
			}
			
			_etype = letype;
		}
		
		return _etype;
	}
	
	@Override
	public void refreshSizeInformation()
	{
		if( _dataop == DataOpTypes.PERSISTENTWRITE || _dataop == DataOpTypes.TRANSIENTWRITE )
		{
			Hop input1 = getInput().get(0);
			setDim1(input1.getDim1());
			setDim2(input1.getDim2());
			setNnz(input1.getNnz());
		}
		else //READ
		{
			//do nothing; dimensions updated via set output params
		}
	}
		
	
	/**
	 * Explicitly disables recompilation of transient reads, this additional information 
	 * is required because requiresRecompile is set in a top-down manner, hence any value
	 * set from a consuming operating would be overwritten by opFindExecType.
	 */
	public void disableRecompileRead()
	{
		_recompileRead = false;
	}
	
	
	@Override
	@SuppressWarnings("unchecked")
	public Object clone() throws CloneNotSupportedException 
	{
		DataOp ret = new DataOp();	
		
		//copy generic attributes
		ret.clone(this, false);
		
		//copy specific attributes
		ret._dataop = _dataop;
		ret._fileName = _fileName;
		ret._inFormat = _inFormat;
		ret._inRowsInBlock = _inRowsInBlock;
		ret._inColsInBlock = _inColsInBlock;
		ret._recompileRead = _recompileRead;
		ret._paramIndexMap = (HashMap) _paramIndexMap.clone();
		//note: no deep cp of params since read-only 
		
		return ret;
	}
	
	@Override
	public boolean compare( Hop that )
	{
		return false;
	}
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy