org.apache.sysml.hops.ReorgOp Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of systemml Show documentation
Show all versions of systemml Show documentation
Declarative Machine Learning
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.sysml.hops;
import java.util.ArrayList;
import org.apache.sysml.api.DMLScript;
import org.apache.sysml.hops.Hop.MultiThreadedHop;
import org.apache.sysml.hops.rewrite.HopRewriteUtils;
import org.apache.sysml.lops.Aggregate;
import org.apache.sysml.lops.Group;
import org.apache.sysml.lops.Lop;
import org.apache.sysml.lops.LopsException;
import org.apache.sysml.lops.SortKeys;
import org.apache.sysml.lops.Transform;
import org.apache.sysml.lops.LopProperties.ExecType;
import org.apache.sysml.lops.Transform.OperationTypes;
import org.apache.sysml.parser.Expression.DataType;
import org.apache.sysml.parser.Expression.ValueType;
import org.apache.sysml.runtime.matrix.MatrixCharacteristics;
/**
* Reorg (cell) operation: aij
* Properties:
* Symbol: ', rdiag, rshape, rsort
* 1 Operand (except sort and reshape take additional arguments)
*
* Semantic: change indices (in mapper or reducer)
*
*
* NOTE MB: reshape integrated here because (1) ParameterizedBuiltinOp requires name-value pairs for params
* and (2) most importantly semantic of reshape is exactly a reorg op.
*/
public class ReorgOp extends Hop implements MultiThreadedHop
{
public static boolean FORCE_DIST_SORT_INDEXES = false;
public boolean bSortSPRewriteApplicable = false;
private ReOrgOp op;
private int _maxNumThreads = -1; //-1 for unlimited
private ReorgOp() {
//default constructor for clone
}
public ReorgOp(String l, DataType dt, ValueType vt, ReOrgOp o, Hop inp)
{
super(l, dt, vt);
op = o;
getInput().add(0, inp);
inp.getParent().add(this);
//compute unknown dims and nnz
refreshSizeInformation();
}
public ReorgOp(String l, DataType dt, ValueType vt, ReOrgOp o, ArrayList inp)
{
super(l, dt, vt);
op = o;
for( int i=0; i OptimizerUtils.getLocalMemBudget()
|| FORCE_DIST_SORT_INDEXES )
{
//large vector, fallback to MR sort
//sort indexes according to given values
SortKeys sort = new SortKeys(
vinput.constructLops(), HopRewriteUtils.getBooleanValueSafe((LiteralOp)desc),
SortKeys.OperationTypes.Indexes,
vinput.getDataType(), vinput.getValueType(), ExecType.MR);
sort.getOutputParameters().setDimensions(vinput.getDim1(), 1,
vinput.getRowsInBlock(), vinput.getColsInBlock(), vinput.getNnz());
setLineNumbers(sort);
//note: this sortindexes includes also the shift by offsets and
//final aggregate because sideways passing of offsets would
//not nicely fit the current instruction model
setLops(sort);
voutput = this;
}
else
{
//small vector, use in-memory sort
ArrayList sinputs = new ArrayList();
sinputs.add(vinput);
sinputs.add(new LiteralOp(1)); //by (always vector)
sinputs.add(desc);
sinputs.add(new LiteralOp(true)); //indexreturn (always indexes)
voutput = new ReorgOp("tmp3", getDataType(), getValueType(), ReOrgOp.SORT, sinputs);
HopRewriteUtils.copyLineNumbers(this, voutput);
//explicitly construct CP lop; otherwise there is danger of infinite recursion if forced runtime platform.
voutput.setLops( constructCPOrSparkSortLop(vinput, sinputs.get(1), sinputs.get(2), sinputs.get(3), ExecType.CP, false) );
voutput.getLops().getOutputParameters().setDimensions(vinput.getDim1(), vinput.getDim2(), vinput.getRowsInBlock(), vinput.getColsInBlock(), vinput.getNnz());
setLops( voutput.constructLops() );
}
//Step 3: Data permutation (only required for sorting data)
// -- done via X' = table(seq(), IX') %*% X;
if( !HopRewriteUtils.getBooleanValueSafe((LiteralOp)ixret) )
{
//generate seq
DataGenOp seq = HopRewriteUtils.createSeqDataGenOp(voutput);
seq.setName("tmp4");
seq.refreshSizeInformation();
seq.computeMemEstimate(new MemoTable()); //select exec type
HopRewriteUtils.copyLineNumbers(this, seq);
//generate table
TernaryOp table = new TernaryOp("tmp5", DataType.MATRIX, ValueType.DOUBLE, OpOp3.CTABLE, seq, voutput, new LiteralOp(1L) );
table.setOutputBlocksizes(getRowsInBlock(), getColsInBlock());
table.refreshSizeInformation();
table.setForcedExecType(ExecType.MR); //force MR
HopRewriteUtils.copyLineNumbers(this, table);
table.setDisjointInputs(true);
table.setOutputEmptyBlocks(false);
//generate matrix mult
AggBinaryOp mmult = HopRewriteUtils.createMatrixMultiply(table, input);
mmult.setForcedExecType(ExecType.MR); //force MR
setLops( mmult.constructLops() );
//cleanups
HopRewriteUtils.removeChildReference(table, input);
}
}
else //CP or Spark
{
if( et==ExecType.SPARK && !FORCE_DIST_SORT_INDEXES)
bSortSPRewriteApplicable = isSortSPRewriteApplicable();
Lop transform1 = constructCPOrSparkSortLop(input, by, desc, ixret, et, bSortSPRewriteApplicable);
setOutputDimensions(transform1);
setLineNumbers(transform1);
setLops(transform1);
}
break;
}
default:
throw new HopsException("Unsupported lops construction for operation type '"+op+"'.");
}
//add reblock/checkpoint lops if necessary
constructAndSetLopsDataFlowProperties();
return getLops();
}
private static Lop constructCPOrSparkSortLop( Hop input, Hop by, Hop desc, Hop ixret, ExecType et, boolean bSortIndInMem )
throws HopsException, LopsException
{
Transform transform1 = new Transform( input.constructLops(), HopsTransf2Lops.get(ReOrgOp.SORT),
input.getDataType(), input.getValueType(), et, bSortIndInMem);
for( Hop c : new Hop[]{by,desc,ixret} ) {
Lop ltmp = c.constructLops();
transform1.addInput(ltmp);
ltmp.addOutput(transform1);
}
transform1.setLevel(); //force order of added lops
return transform1;
}
@Override
protected double computeOutputMemEstimate( long dim1, long dim2, long nnz )
{
//no dedicated mem estimation per op type, because always propagated via refreshSizeInformation
double sparsity = OptimizerUtils.getSparsity(dim1, dim2, nnz);
return OptimizerUtils.estimateSizeExactSparsity(dim1, dim2, sparsity);
}
@Override
protected double computeIntermediateMemEstimate( long dim1, long dim2, long nnz )
{
if( op == ReOrgOp.SORT )
{
Hop ixreturn = getInput().get(3);
if( !(ixreturn instanceof LiteralOp && !HopRewriteUtils.getBooleanValueSafe((LiteralOp)ixreturn)
&& (dim2==1 || nnz==0) ) ) //NOT early abort case
{
//Version 2: memory requirements for temporary index int[] array,
//(temporary double[] array already covered by output)
return dim1 * 4;
//Version 1: memory requirements for temporary index Integer[] array
//8-16 (12) bytes for object, 4byte int payload, 4-8 (8) byte pointers.
//return dim1 * 24;
}
}
//default: no intermediate memory requirements
return 0;
}
@Override
protected long[] inferOutputCharacteristics( MemoTable memo )
{
long[] ret = null;
Hop input = getInput().get(0);
MatrixCharacteristics mc = memo.getAllInputStats(input);
switch(op)
{
case TRANSPOSE:
{
// input is a [k1,k2] matrix and output is a [k2,k1] matrix
// #nnz in output is exactly the same as in input
if( mc.dimsKnown() )
ret = new long[]{ mc.getCols(), mc.getRows(), mc.getNonZeros() };
break;
}
case REV:
{
// dims and nnz are exactly the same as in input
if( mc.dimsKnown() )
ret = new long[]{ mc.getRows(), mc.getCols(), mc.getNonZeros() };
break;
}
case DIAG:
{
// NOTE: diag is overloaded according to the number of columns of the input
long k = mc.getRows();
// CASE a) DIAG V2M
// input is a [1,k] or [k,1] matrix, and output is [k,k] matrix
// #nnz in output is in the worst case k => sparsity = 1/k
if( k == 1 )
ret = new long[]{k, k, ((mc.getNonZeros()>=0) ? mc.getNonZeros() : k)};
// CASE b) DIAG M2V
// input is [k,k] matrix and output is [k,1] matrix
// #nnz in the output is likely to be k (a dense matrix)
if( k > 1 )
ret = new long[]{k, 1, ((mc.getNonZeros()>=0) ? Math.min(k,mc.getNonZeros()) : k) };
break;
}
case RESHAPE:
{
// input is a [k1,k2] matrix and output is a [k3,k4] matrix with k1*k2=k3*k4
// #nnz in output is exactly the same as in input
if( mc.dimsKnown() ) {
if( _dim1 > 0 )
ret = new long[]{ _dim1, mc.getRows()*mc.getCols()/_dim1, mc.getNonZeros()};
else if( _dim2 > 0 )
ret = new long[]{ mc.getRows()*mc.getCols()/_dim2, _dim2, mc.getNonZeros()};
}
break;
}
case SORT:
{
// input is a [k1,k2] matrix and output is a [k1,k3] matrix, where k3=k2 if no index return;
// otherwise k3=1 (for the index vector)
Hop input4 = getInput().get(3); //indexreturn
boolean unknownIxRet = !(input4 instanceof LiteralOp);
if( !unknownIxRet ) {
boolean ixret = HopRewriteUtils.getBooleanValueSafe((LiteralOp)input4);
long dim2 = ixret ? 1 : mc.getCols();
long nnz = ixret ? mc.getRows() : mc.getNonZeros();
ret = new long[]{ mc.getRows(), dim2, nnz};
}
else {
ret = new long[]{ mc.getRows(), -1, -1};
}
}
}
return ret;
}
@Override
public boolean allowsAllExecTypes()
{
return true;
}
@Override
protected ExecType optFindExecType() throws HopsException {
checkAndSetForcedPlatform();
ExecType REMOTE = OptimizerUtils.isSparkExecutionMode() ? ExecType.SPARK : ExecType.MR;
if( _etypeForced != null )
{
_etype = _etypeForced;
}
else
{
if ( OptimizerUtils.isMemoryBasedOptLevel() ) {
_etype = findExecTypeByMemEstimate();
}
// Choose CP, if the input dimensions are below threshold or if the input is a vector
else if ( getInput().get(0).areDimsBelowThreshold() || getInput().get(0).isVector() )
{
_etype = ExecType.CP;
}
else
{
_etype = REMOTE;
}
//check for valid CP dimensions and matrix size
checkAndSetInvalidCPDimsAndSize();
}
//mark for recompile (forever)
setRequiresRecompileIfNecessary();
return _etype;
}
@Override
public void refreshSizeInformation()
{
Hop input1 = getInput().get(0);
switch(op)
{
case TRANSPOSE:
{
// input is a [k1,k2] matrix and output is a [k2,k1] matrix
// #nnz in output is exactly the same as in input
setDim1(input1.getDim2());
setDim2(input1.getDim1());
setNnz(input1.getNnz());
break;
}
case REV:
{
// dims and nnz are exactly the same as in input
setDim1(input1.getDim1());
setDim2(input1.getDim2());
setNnz(input1.getNnz());
break;
}
case DIAG:
{
// NOTE: diag is overloaded according to the number of columns of the input
long k = input1.getDim1();
setDim1(k);
// CASE a) DIAG_V2M
// input is a [1,k] or [k,1] matrix, and output is [k,k] matrix
// #nnz in output is in the worst case k => sparsity = 1/k
if( input1.getDim2()==1 ) {
setDim2(k);
setNnz( (input1.getNnz()>=0) ? input1.getNnz() : k );
}
// CASE b) DIAG_M2V
// input is [k,k] matrix and output is [k,1] matrix
// #nnz in the output is likely to be k (a dense matrix)
if( input1.getDim2()>1 ){
setDim2(1);
setNnz( (input1.getNnz()>=0) ? Math.min(k,input1.getNnz()) : k );
}
break;
}
case RESHAPE:
{
// input is a [k1,k2] matrix and output is a [k3,k4] matrix with k1*k2=k3*k4
// #nnz in output is exactly the same as in input
Hop input2 = getInput().get(1); //rows
Hop input3 = getInput().get(2); //cols
refreshRowsParameterInformation(input2); //refresh rows
refreshColsParameterInformation(input3); //refresh cols
setNnz(input1.getNnz());
if( !dimsKnown() &&input1.dimsKnown() ) { //reshape allows to infer dims, if input and 1 dim known
if(_dim1 > 0)
_dim2 = (input1._dim1*input1._dim2)/_dim1;
else if(_dim2 > 0)
_dim1 = (input1._dim1*input1._dim2)/_dim2;
}
break;
}
case SORT:
{
// input is a [k1,k2] matrix and output is a [k1,k3] matrix, where k3=k2 if no index return;
// otherwise k3=1 (for the index vector)
Hop input4 = getInput().get(3); //indexreturn
boolean unknownIxRet = !(input4 instanceof LiteralOp);
_dim1 = input1.getDim1();
if( !unknownIxRet ) {
boolean ixret = HopRewriteUtils.getBooleanValueSafe((LiteralOp)input4);
_dim2 = ixret ? 1 : input1.getDim2();
_nnz = ixret ? input1.getDim1() : input1.getNnz();
}
else {
_dim2 = -1;
_nnz = -1;
}
break;
}
}
}
@Override
public Object clone() throws CloneNotSupportedException
{
ReorgOp ret = new ReorgOp();
//copy generic attributes
ret.clone(this, false);
//copy specific attributes
ret.op = op;
ret._maxNumThreads = _maxNumThreads;
return ret;
}
@Override
public boolean compare( Hop that )
{
if( !(that instanceof ReorgOp) )
return false;
ReorgOp that2 = (ReorgOp)that;
boolean ret = (op == that2.op)
&& (_maxNumThreads == that2._maxNumThreads)
&& (getInput().size()==that.getInput().size());
//compare all childs (see reshape, sort)
if( ret ) //sizes matched
for( int i=0; i<_input.size(); i++ )
ret &= getInput().get(i) == that2.getInput().get(i);
return ret;
}
/**
* This will check if there is sufficient memory locally (twice the size of second matrix, for original and sort data), and remotely (size of second matrix (sorted data)).
* @return true if sufficient memory locally
*/
private boolean isSortSPRewriteApplicable()
{
boolean ret = false;
Hop input = getInput().get(0);
//note: both cases (partitioned matrix, and sorted double array), require to
//fit the broadcast twice into the local memory budget. Also, the memory
//constraint only needs to take the rhs into account because the output is
//guaranteed to be an aggregate of <=16KB
double size = input.dimsKnown() ?
OptimizerUtils.estimateSize(input.getDim1(), 1) : //dims known and estimate fits
input.getOutputMemEstimate(); //dims unknown but worst-case estimate fits
if( OptimizerUtils.checkSparkBroadcastMemoryBudget(size) ) {
ret = true;
}
return ret;
}
}