org.apache.sysml.hops.DataOp Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of systemml Show documentation
Show all versions of systemml Show documentation
Declarative Machine Learning
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.sysml.hops;
import java.util.HashMap;
import java.util.Map.Entry;
import org.apache.sysml.conf.CompilerConfig.ConfigType;
import org.apache.sysml.conf.ConfigurationManager;
import org.apache.sysml.lops.Data;
import org.apache.sysml.lops.Lop;
import org.apache.sysml.lops.LopProperties.ExecType;
import org.apache.sysml.lops.LopsException;
import org.apache.sysml.parser.Expression.DataType;
import org.apache.sysml.parser.Expression.ValueType;
import org.apache.sysml.runtime.controlprogram.caching.MatrixObject.UpdateType;
import org.apache.sysml.runtime.matrix.MatrixCharacteristics;
import org.apache.sysml.runtime.util.LocalFileUtils;
public class DataOp extends Hop
{
private DataOpTypes _dataop;
private String _fileName = null;
//read dataop properties
private FileFormatTypes _inFormat = FileFormatTypes.TEXT;
private long _inRowsInBlock = -1;
private long _inColsInBlock = -1;
private boolean _recompileRead = true;
/**
* List of "named" input parameters. They are maintained as a hashmap:
* parameter names (String) are mapped as indices (Integer) into getInput()
* arraylist.
*
* i.e., getInput().get(_paramIndexMap.get(parameterName)) refers to the Hop
* that is associated with parameterName.
*/
private HashMap _paramIndexMap = new HashMap();
private DataOp() {
//default constructor for clone
}
/**
* READ operation for Matrix w/ dim1, dim2.
* This constructor does not support any expression in parameters
*
* @param l ?
* @param dt data type
* @param vt value type
* @param dop data operator type
* @param fname file name
* @param dim1 dimension 1
* @param dim2 dimension 2
* @param nnz number of non-zeros
* @param rowsPerBlock rows per block
* @param colsPerBlock cols per block
*/
public DataOp(String l, DataType dt, ValueType vt, DataOpTypes dop,
String fname, long dim1, long dim2, long nnz, long rowsPerBlock, long colsPerBlock) {
super(l, dt, vt);
_dataop = dop;
_fileName = fname;
setDim1(dim1);
setDim2(dim2);
setRowsInBlock(rowsPerBlock);
setColsInBlock(colsPerBlock);
setNnz(nnz);
if( dop == DataOpTypes.TRANSIENTREAD )
setInputFormatType(FileFormatTypes.BINARY);
}
public DataOp(String l, DataType dt, ValueType vt, DataOpTypes dop,
String fname, long dim1, long dim2, long nnz, UpdateType update, long rowsPerBlock, long colsPerBlock) {
this(l, dt, vt, dop, fname, dim1, dim2, nnz, rowsPerBlock, colsPerBlock);
setUpdateType(update);
}
/**
* READ operation for Matrix
* This constructor supports expressions in parameters
*
* @param l ?
* @param dt data type
* @param vt value type
* @param dop data operator type
* @param inputParameters input parameters
*/
public DataOp(String l, DataType dt, ValueType vt,
DataOpTypes dop, HashMap inputParameters) {
super(l, dt, vt);
_dataop = dop;
int index = 0;
for( Entry e : inputParameters.entrySet() )
{
String s = e.getKey();
Hop input = e.getValue();
getInput().add(input);
input.getParent().add(this);
_paramIndexMap.put(s, index);
index++;
}
if (dop == DataOpTypes.TRANSIENTREAD ){
setInputFormatType(FileFormatTypes.BINARY);
}
}
// WRITE operation
// This constructor does not support any expression in parameters
public DataOp(String l, DataType dt, ValueType vt, Hop in,
DataOpTypes dop, String fname) {
super(l, dt, vt);
_dataop = dop;
getInput().add(0, in);
in.getParent().add(this);
_fileName = fname;
if (dop == DataOpTypes.TRANSIENTWRITE || dop == DataOpTypes.FUNCTIONOUTPUT )
setInputFormatType(FileFormatTypes.BINARY);
}
/**
* WRITE operation for Matrix
* This constructor supports expression in parameters
*
* @param l ?
* @param dt data type
* @param vt value type
* @param dop data operator type
* @param in high-level operator
* @param inputParameters input parameters
*/
public DataOp(String l, DataType dt, ValueType vt,
DataOpTypes dop, Hop in, HashMap inputParameters) {
super(l, dt, vt);
_dataop = dop;
getInput().add(0, in);
in.getParent().add(this);
if (inputParameters != null){
int index = 1;
for( Entry e : inputParameters.entrySet() )
{
String s = e.getKey();
Hop input = e.getValue();
getInput().add(input);
input.getParent().add(this);
_paramIndexMap.put(s, index);
index++;
}
}
if (dop == DataOpTypes.TRANSIENTWRITE)
setInputFormatType(FileFormatTypes.BINARY);
}
public DataOpTypes getDataOpType()
{
return _dataop;
}
public void setDataOpType( DataOpTypes type )
{
_dataop = type;
}
public void setOutputParams(long dim1, long dim2, long nnz, UpdateType update, long rowsPerBlock, long colsPerBlock) {
setDim1(dim1);
setDim2(dim2);
setNnz(nnz);
setUpdateType(update);
setRowsInBlock(rowsPerBlock);
setColsInBlock(colsPerBlock);
}
public void setFileName(String fn) {
_fileName = fn;
}
public String getFileName() {
return _fileName;
}
public int getParameterIndex(String name)
{
return _paramIndexMap.get(name);
}
@Override
public Lop constructLops()
throws HopsException, LopsException
{
//return already created lops
if( getLops() != null )
return getLops();
ExecType et = optFindExecType();
Lop l = null;
// construct lops for all input parameters
HashMap inputLops = new HashMap();
for (Entry cur : _paramIndexMap.entrySet()) {
inputLops.put(cur.getKey(), getInput().get(cur.getValue())
.constructLops());
}
// Create the lop
switch(_dataop)
{
case TRANSIENTREAD:
l = new Data(HopsData2Lops.get(_dataop), null, inputLops, getName(), null,
getDataType(), getValueType(), true, getInputFormatType());
setOutputDimensions(l);
break;
case PERSISTENTREAD:
l = new Data(HopsData2Lops.get(_dataop), null, inputLops, getName(), null,
getDataType(), getValueType(), false, getInputFormatType());
l.getOutputParameters().setDimensions(getDim1(), getDim2(), _inRowsInBlock, _inColsInBlock, getNnz(), getUpdateType());
break;
case PERSISTENTWRITE:
l = new Data(HopsData2Lops.get(_dataop), getInput().get(0).constructLops(), inputLops, getName(), null,
getDataType(), getValueType(), false, getInputFormatType());
((Data)l).setExecType(et);
setOutputDimensions(l);
break;
case TRANSIENTWRITE:
l = new Data(HopsData2Lops.get(_dataop), getInput().get(0).constructLops(), inputLops, getName(), null,
getDataType(), getValueType(), true, getInputFormatType());
setOutputDimensions(l);
break;
case FUNCTIONOUTPUT:
l = new Data(HopsData2Lops.get(_dataop), getInput().get(0).constructLops(), inputLops, getName(), null,
getDataType(), getValueType(), true, getInputFormatType());
((Data)l).setExecType(et);
setOutputDimensions(l);
break;
default:
throw new LopsException("Invalid operation type for Data LOP: " + _dataop);
}
setLineNumbers(l);
setLops(l);
//add reblock/checkpoint lops if necessary
constructAndSetLopsDataFlowProperties();
return getLops();
}
public void setInputFormatType(FileFormatTypes ft) {
_inFormat = ft;
}
public FileFormatTypes getInputFormatType() {
return _inFormat;
}
public void setInputBlockSizes( long brlen, long bclen ){
setInputRowsInBlock(brlen);
setInputColsInBlock(bclen);
}
public void setInputRowsInBlock( long brlen ){
_inRowsInBlock = brlen;
}
public long getInputRowsInBlock(){
return _inRowsInBlock;
}
public void setInputColsInBlock( long bclen ){
_inColsInBlock = bclen;
}
public long getInputColsInBlock(){
return _inColsInBlock;
}
public boolean isRead()
{
return( _dataop == DataOpTypes.PERSISTENTREAD || _dataop == DataOpTypes.TRANSIENTREAD );
}
public boolean isWrite()
{
return( _dataop == DataOpTypes.PERSISTENTWRITE || _dataop == DataOpTypes.TRANSIENTWRITE );
}
public boolean isPersistentReadWrite()
{
return( _dataop == DataOpTypes.PERSISTENTREAD || _dataop == DataOpTypes.PERSISTENTWRITE );
}
@Override
public String getOpString() {
String s = new String("");
s += HopsData2String.get(_dataop);
s += " "+getName();
return s;
}
@Override
public boolean allowsAllExecTypes()
{
return false;
}
@Override
protected double computeOutputMemEstimate( long dim1, long dim2, long nnz )
{
double ret = 0;
if ( getDataType() == DataType.SCALAR )
{
switch( getValueType() )
{
case INT:
ret = OptimizerUtils.INT_SIZE; break;
case DOUBLE:
ret = OptimizerUtils.DOUBLE_SIZE; break;
case BOOLEAN:
ret = OptimizerUtils.BOOLEAN_SIZE; break;
case STRING:
// by default, it estimates the size of string[100]
ret = 100 * OptimizerUtils.CHAR_SIZE; break;
case OBJECT:
ret = OptimizerUtils.DEFAULT_SIZE; break;
default:
ret = 0;
}
}
else //MATRIX / FRAME
{
if( _dataop == DataOpTypes.PERSISTENTREAD
|| _dataop == DataOpTypes.TRANSIENTREAD )
{
double sparsity = OptimizerUtils.getSparsity(dim1, dim2, nnz);
ret = OptimizerUtils.estimateSizeExactSparsity(dim1, dim2, sparsity);
}
// output memory estimate is not required for "write" nodes (just input)
}
return ret;
}
@Override
protected double computeIntermediateMemEstimate( long dim1, long dim2, long nnz )
{
return LocalFileUtils.BUFFER_SIZE;
}
@Override
protected long[] inferOutputCharacteristics( MemoTable memo )
{
long[] ret = null;
if( _dataop == DataOpTypes.PERSISTENTWRITE
|| _dataop == DataOpTypes.TRANSIENTWRITE )
{
MatrixCharacteristics mc = memo.getAllInputStats(getInput().get(0));
if( mc.dimsKnown() )
ret = new long[]{ mc.getRows(), mc.getCols(), mc.getNonZeros() };
}
else if( _dataop == DataOpTypes.TRANSIENTREAD )
{
//prepare statistics, passed from cross-dag transient writes
MatrixCharacteristics mc = memo.getAllInputStats(this);
if( mc.dimsKnown() )
ret = new long[]{ mc.getRows(), mc.getCols(), mc.getNonZeros() };
}
return ret;
}
@Override
protected ExecType optFindExecType()
throws HopsException
{
//MB: find exec type has two meanings here: (1) for write it means the actual
//exec type, while (2) for read it affects the recompilation decision as needed
//for example for sum(X) where the memory consumption is solely determined by the DataOp
ExecType letype = (OptimizerUtils.isMemoryBasedOptLevel()) ? findExecTypeByMemEstimate() : null;
ExecType REMOTE = OptimizerUtils.isSparkExecutionMode() ? ExecType.SPARK : ExecType.MR;
//NOTE: independent of etype executed in MR (piggybacked) if input to persistent write is MR
if( _dataop == DataOpTypes.PERSISTENTWRITE || _dataop == DataOpTypes.TRANSIENTWRITE )
{
checkAndSetForcedPlatform();
//additional check for write only
if( getDataType()==DataType.SCALAR || (getDataType()==DataType.FRAME && REMOTE==ExecType.MR) )
_etypeForced = ExecType.CP;
if( _etypeForced != null )
{
_etype = _etypeForced;
}
else
{
if ( OptimizerUtils.isMemoryBasedOptLevel() )
{
_etype = letype;
}
else if ( getInput().get(0).areDimsBelowThreshold() )
{
_etype = ExecType.CP;
}
else
{
_etype = REMOTE;
}
//check for valid CP dimensions and matrix size
checkAndSetInvalidCPDimsAndSize();
}
//mark for recompile (forever)
if( ConfigurationManager.isDynamicRecompilation() && !dimsKnown(true) && _etype==REMOTE ) {
setRequiresRecompile();
}
}
else //READ
{
//mark for recompile (forever)
if( ConfigurationManager.isDynamicRecompilation() && !dimsKnown(true) && letype==REMOTE
&& (_recompileRead || _requiresCheckpoint) )
{
setRequiresRecompile();
}
_etype = letype;
}
return _etype;
}
@Override
public void refreshSizeInformation()
{
if( _dataop == DataOpTypes.PERSISTENTWRITE || _dataop == DataOpTypes.TRANSIENTWRITE )
{
Hop input1 = getInput().get(0);
setDim1(input1.getDim1());
setDim2(input1.getDim2());
setNnz(input1.getNnz());
}
else //READ
{
//do nothing; dimensions updated via set output params
}
}
/**
* Explicitly disables recompilation of transient reads, this additional information
* is required because requiresRecompile is set in a top-down manner, hence any value
* set from a consuming operating would be overwritten by opFindExecType.
*/
public void disableRecompileRead()
{
_recompileRead = false;
}
@Override
@SuppressWarnings("unchecked")
public Object clone() throws CloneNotSupportedException
{
DataOp ret = new DataOp();
//copy generic attributes
ret.clone(this, false);
//copy specific attributes
ret._dataop = _dataop;
ret._fileName = _fileName;
ret._inFormat = _inFormat;
ret._inRowsInBlock = _inRowsInBlock;
ret._inColsInBlock = _inColsInBlock;
ret._recompileRead = _recompileRead;
ret._paramIndexMap = (HashMap) _paramIndexMap.clone();
//note: no deep cp of params since read-only
return ret;
}
@Override
public boolean compare( Hop that )
{
if( !(that instanceof DataOp) )
return false;
//common subexpression elimination for redundant persistent reads, in order
//to avoid unnecessary read and reblocks as well as to prevent specific anomalies, e.g.,
//with multiple piggybacked csvreblock of the same input w/ unknown input sizes
DataOp that2 = (DataOp)that;
boolean ret = ( OptimizerUtils.ALLOW_COMMON_SUBEXPRESSION_ELIMINATION
&& ConfigurationManager.getCompilerConfigFlag(ConfigType.ALLOW_CSE_PERSISTENT_READS)
&&_dataop == that2._dataop
&& _dataop == DataOpTypes.PERSISTENTREAD
&& _fileName.equals(that2._fileName)
&& _inFormat == that2._inFormat
&& _inRowsInBlock == that2._inRowsInBlock
&& _inColsInBlock == that2._inColsInBlock
&& _paramIndexMap!=null && that2._paramIndexMap!=null );
//above conditions also ensure consistency with regard to
//(1) checkpointing, (2) reblock and (3) recompile.
if( ret ) {
for( Entry e : _paramIndexMap.entrySet() ) {
String key1 = e.getKey();
int pos1 = e.getValue();
int pos2 = that2._paramIndexMap.get(key1);
ret &= ( that2.getInput().get(pos2)!=null
&& getInput().get(pos1) == that2.getInput().get(pos2) );
}
}
return ret;
}
/**
* Remove an input from the list of inputs and from the parameter index map.
* Parameter index map values higher than the index of the removed input
* will be decremented by one.
*
* @param inputName The name of the input to remove
*/
public void removeInput(String inputName) {
int inputIndex = getParameterIndex(inputName);
_input.remove(inputIndex);
_paramIndexMap.remove(inputName);
for (Entry entry : _paramIndexMap.entrySet()) {
if (entry.getValue() > inputIndex) {
_paramIndexMap.put(entry.getKey(), (entry.getValue() - 1));
}
}
}
}