org.apache.sysml.runtime.controlprogram.parfor.opt.OptimizerRuleBased Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of systemml Show documentation
Show all versions of systemml Show documentation
Declarative Machine Learning
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.sysml.runtime.controlprogram.parfor.opt;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map.Entry;
import java.util.Set;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.sysml.conf.ConfigurationManager;
import org.apache.sysml.conf.DMLConfig;
import org.apache.sysml.hops.AggBinaryOp;
import org.apache.sysml.hops.DataOp;
import org.apache.sysml.hops.FunctionOp;
import org.apache.sysml.hops.Hop;
import org.apache.sysml.hops.AggBinaryOp.MMultMethod;
import org.apache.sysml.hops.Hop.MultiThreadedHop;
import org.apache.sysml.hops.Hop.ParamBuiltinOp;
import org.apache.sysml.hops.Hop.ReOrgOp;
import org.apache.sysml.hops.HopsException;
import org.apache.sysml.hops.IndexingOp;
import org.apache.sysml.hops.LeftIndexingOp;
import org.apache.sysml.hops.LiteralOp;
import org.apache.sysml.hops.OptimizerUtils;
import org.apache.sysml.hops.ParameterizedBuiltinOp;
import org.apache.sysml.hops.ReorgOp;
import org.apache.sysml.hops.rewrite.HopRewriteUtils;
import org.apache.sysml.hops.rewrite.ProgramRewriteStatus;
import org.apache.sysml.hops.rewrite.ProgramRewriter;
import org.apache.sysml.hops.rewrite.RewriteInjectSparkLoopCheckpointing;
import org.apache.sysml.hops.recompile.Recompiler;
import org.apache.sysml.lops.LopProperties;
import org.apache.sysml.lops.LopsException;
import org.apache.sysml.parser.DMLProgram;
import org.apache.sysml.parser.Expression.DataType;
import org.apache.sysml.parser.FunctionStatementBlock;
import org.apache.sysml.parser.LanguageException;
import org.apache.sysml.parser.ParForStatement;
import org.apache.sysml.parser.ParForStatementBlock;
import org.apache.sysml.parser.StatementBlock;
import org.apache.sysml.runtime.DMLRuntimeException;
import org.apache.sysml.runtime.DMLUnsupportedOperationException;
import org.apache.sysml.runtime.controlprogram.ForProgramBlock;
import org.apache.sysml.runtime.controlprogram.FunctionProgramBlock;
import org.apache.sysml.runtime.controlprogram.LocalVariableMap;
import org.apache.sysml.runtime.controlprogram.ParForProgramBlock;
import org.apache.sysml.runtime.controlprogram.Program;
import org.apache.sysml.runtime.controlprogram.ProgramBlock;
import org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PDataPartitionFormat;
import org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PDataPartitioner;
import org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PExecMode;
import org.apache.sysml.runtime.controlprogram.ParForProgramBlock.POptMode;
import org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PResultMerge;
import org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PTaskPartitioner;
import org.apache.sysml.runtime.controlprogram.caching.MatrixObject;
import org.apache.sysml.runtime.controlprogram.context.ExecutionContext;
import org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext;
import org.apache.sysml.runtime.controlprogram.parfor.ProgramConverter;
import org.apache.sysml.runtime.controlprogram.parfor.ResultMergeLocalFile;
import org.apache.sysml.runtime.controlprogram.parfor.opt.OptNode.ExecType;
import org.apache.sysml.runtime.controlprogram.parfor.opt.OptNode.NodeType;
import org.apache.sysml.runtime.controlprogram.parfor.opt.OptNode.ParamType;
import org.apache.sysml.runtime.controlprogram.parfor.opt.PerfTestTool.TestMeasure;
import org.apache.sysml.runtime.controlprogram.parfor.stat.InfrastructureAnalyzer;
import org.apache.sysml.runtime.instructions.Instruction;
import org.apache.sysml.runtime.instructions.cp.Data;
import org.apache.sysml.runtime.instructions.cp.FunctionCallCPInstruction;
import org.apache.sysml.runtime.instructions.spark.data.RDDObject;
import org.apache.sysml.runtime.matrix.MatrixCharacteristics;
import org.apache.sysml.runtime.matrix.MatrixFormatMetaData;
import org.apache.sysml.runtime.matrix.data.MatrixBlock;
import org.apache.sysml.runtime.matrix.data.OutputInfo;
import org.apache.sysml.runtime.matrix.data.SparseRow;
import org.apache.sysml.yarn.ropt.YarnClusterAnalyzer;
/**
* Rule-Based ParFor Optimizer (time: O(n)):
*
* Applied rule-based rewrites
* - 1) rewrite set data partitioner (incl. recompile RIX)
* - 2) rewrite remove unnecessary compare matrix
* - 3) rewrite result partitioning (incl. recompile LIX)
* - 4) rewrite set execution strategy
* - 5) rewrite set operations exec type (incl. recompile)
* - 6) rewrite use data colocation
* - 7) rewrite set partition replication factor
* - 8) rewrite set export replication factor
* - 9) rewrite use nested parallelism
* - 10) rewrite set degree of parallelism
* - 11) rewrite set task partitioner
* - 12) rewrite set fused data partitioning and execution
* - 13) rewrite transpose vector operations (for sparse)
* - 14) rewrite set in-place result indexing
* - 15) rewrite disable caching (prevent sparse serialization)
* - 16) rewrite enable runtime piggybacking
* - 17) rewrite inject spark loop checkpointing
* - 18) rewrite inject spark repartition (for zipmm)
* - 19) rewrite set spark eager rdd caching
* - 20) rewrite set result merge
* - 21) rewrite set recompile memory budget
* - 22) rewrite remove recursive parfor
* - 23) rewrite remove unnecessary parfor
*
* TODO fuse also result merge into fused data partitioning and execute
* (for writing the result directly from execute we need to partition
* columns/rows according to blocksize -> rewrite (only applicable if
* numCols/blocksize>numreducers)+custom MR partitioner)
*
*
* TODO take remote memory into account in data/result partitioning rewrites (smaller/larger)
* TODO memory estimates with shared reads
* TODO memory estimates of result merge into plan tree
* TODO blockwise partitioning
*
*/
public class OptimizerRuleBased extends Optimizer
{
public static final double PROB_SIZE_THRESHOLD_REMOTE = 100; //wrt # top-level iterations (min)
public static final double PROB_SIZE_THRESHOLD_PARTITIONING = 2; //wrt # top-level iterations (min)
public static final double PROB_SIZE_THRESHOLD_MB = 256*1024*1024; //wrt overall memory consumption (min)
public static final int MAX_REPLICATION_FACTOR_PARTITIONING = 5;
public static final int MAX_REPLICATION_FACTOR_EXPORT = 7;
public static final boolean ALLOW_REMOTE_NESTED_PARALLELISM = false;
public static final boolean APPLY_REWRITE_NESTED_PARALLELISM = false;
public static final String FUNCTION_UNFOLD_NAMEPREFIX = "__unfold_";
public static final double PAR_K_FACTOR = OptimizationWrapper.PAR_FACTOR_INFRASTRUCTURE;
public static final double PAR_K_MR_FACTOR = 1.0 * OptimizationWrapper.PAR_FACTOR_INFRASTRUCTURE;
//problem and infrastructure properties
protected long _N = -1; //problemsize
protected long _Nmax = -1; //max problemsize (including subproblems)
protected int _lk = -1; //local par
protected int _lkmaxCP = -1; //local max par (if only CP inst)
protected int _lkmaxMR = -1; //local max par (if also MR inst)
protected int _rnk = -1; //remote num nodes
protected int _rk = -1; //remote par (mappers)
protected int _rk2 = -1; //remote par (reducers)
protected int _rkmax = -1; //remote max par (mappers)
protected int _rkmax2 = -1; //remote max par (reducers)
protected double _lm = -1; //local memory constraint
protected double _rm = -1; //remote memory constraint (mappers)
protected double _rm2 = -1; //remote memory constraint (reducers)
protected CostEstimator _cost = null;
@Override
public CostModelType getCostModelType()
{
return CostModelType.STATIC_MEM_METRIC;
}
@Override
public PlanInputType getPlanInputType()
{
return PlanInputType.ABSTRACT_PLAN;
}
@Override
public POptMode getOptMode()
{
return POptMode.RULEBASED;
}
/**
* Main optimization procedure.
*
* Transformation-based heuristic (rule-based) optimization
* (no use of sb, direct change of pb).
*/
@Override
public boolean optimize(ParForStatementBlock sb, ParForProgramBlock pb, OptTree plan, CostEstimator est, ExecutionContext ec)
throws DMLRuntimeException, DMLUnsupportedOperationException
{
LOG.debug("--- "+getOptMode()+" OPTIMIZER -------");
OptNode pn = plan.getRoot();
double M0 = -1, M1 = -1, M2 = -1; //memory consumption
//early abort for empty parfor body
if( pn.isLeaf() )
return true;
//ANALYZE infrastructure properties
analyzeProblemAndInfrastructure( pn );
_cost = est;
//debug and warnings output
LOG.debug(getOptMode()+" OPT: Optimize w/ max_mem="+toMB(_lm)+"/"+toMB(_rm)+"/"+toMB(_rm2)+", max_k="+_lk+"/"+_rk+"/"+_rk2+")." );
if( _rnk<=0 || _rk<=0 )
LOG.warn(getOptMode()+" OPT: Optimize for inactive cluster (num_nodes="+_rnk+", num_map_slots="+_rk+")." );
//ESTIMATE memory consumption
pn.setSerialParFor(); //for basic mem consumption
M0 = _cost.getEstimate(TestMeasure.MEMORY_USAGE, pn);
LOG.debug(getOptMode()+" OPT: estimated mem (serial exec) M="+toMB(M0) );
//OPTIMIZE PARFOR PLAN
// rewrite 1: data partitioning (incl. log. recompile RIX)
HashMap partitionedMatrices = new HashMap();
rewriteSetDataPartitioner( pn, ec.getVariables(), partitionedMatrices );
M1 = _cost.getEstimate(TestMeasure.MEMORY_USAGE, pn); //reestimate
// rewrite 2: remove unnecessary compare matrix (before result partitioning)
rewriteRemoveUnnecessaryCompareMatrix(pn, ec);
// rewrite 3: rewrite result partitioning (incl. log/phy recompile LIX)
boolean flagLIX = rewriteSetResultPartitioning( pn, M1, ec.getVariables() );
M1 = _cost.getEstimate(TestMeasure.MEMORY_USAGE, pn); //reestimate
M2 = _cost.getEstimate(TestMeasure.MEMORY_USAGE, pn, LopProperties.ExecType.CP);
LOG.debug(getOptMode()+" OPT: estimated new mem (serial exec) M="+toMB(M1) );
LOG.debug(getOptMode()+" OPT: estimated new mem (serial exec, all CP) M="+toMB(M2) );
// rewrite 4: execution strategy
boolean flagRecompMR = rewriteSetExecutionStategy( pn, M0, M1, M2, flagLIX );
//exec-type-specific rewrites
if( pn.getExecType() == ExecType.MR || pn.getExecType()==ExecType.SPARK )
{
if( flagRecompMR ){
//rewrite 5: set operations exec type
rewriteSetOperationsExecType( pn, flagRecompMR );
M1 = _cost.getEstimate(TestMeasure.MEMORY_USAGE, pn); //reestimate
}
// rewrite 6: data colocation
rewriteDataColocation( pn, ec.getVariables() );
// rewrite 7: rewrite set partition replication factor
rewriteSetPartitionReplicationFactor( pn, partitionedMatrices, ec.getVariables() );
// rewrite 8: rewrite set partition replication factor
rewriteSetExportReplicationFactor( pn, ec.getVariables() );
// rewrite 9: nested parallelism (incl exec types)
boolean flagNested = rewriteNestedParallelism( pn, M1, flagLIX );
// rewrite 10: determine parallelism
rewriteSetDegreeOfParallelism( pn, M1, flagNested );
// rewrite 11: task partitioning
rewriteSetTaskPartitioner( pn, flagNested, flagLIX );
// rewrite 12: fused data partitioning and execution
rewriteSetFusedDataPartitioningExecution(pn, M1, flagLIX, partitionedMatrices, ec.getVariables());
// rewrite 13: transpose sparse vector operations
rewriteSetTranposeSparseVectorOperations(pn, partitionedMatrices, ec.getVariables());
// rewrite 14: set in-place result indexing
HashSet inplaceResultVars = new HashSet();
rewriteSetInPlaceResultIndexing(pn, M1, ec.getVariables(), inplaceResultVars);
// rewrite 15: disable caching
rewriteDisableCPCaching(pn, inplaceResultVars, ec.getVariables());
}
else //if( pn.getExecType() == ExecType.CP )
{
// rewrite 10: determine parallelism
rewriteSetDegreeOfParallelism( pn, M1, false );
// rewrite 11: task partitioning
rewriteSetTaskPartitioner( pn, false, false ); //flagLIX always false
// rewrite 14: set in-place result indexing
HashSet inplaceResultVars = new HashSet();
rewriteSetInPlaceResultIndexing(pn, M1, ec.getVariables(), inplaceResultVars);
if( !OptimizerUtils.isSparkExecutionMode() ) {
// rewrite 16: runtime piggybacking
rewriteEnableRuntimePiggybacking( pn, ec.getVariables(), partitionedMatrices );
}
else {
//rewrite 17: checkpoint injection for parfor loop body
rewriteInjectSparkLoopCheckpointing( pn );
//rewrite 18: repartition read-only inputs for zipmm
rewriteInjectSparkRepartition( pn, ec.getVariables() );
//rewrite 19: eager caching for checkpoint rdds
rewriteSetSparkEagerRDDCaching( pn, ec.getVariables() );
}
}
// rewrite 20: set result merge
rewriteSetResultMerge( pn, ec.getVariables(), true );
// rewrite 21: set local recompile memory budget
rewriteSetRecompileMemoryBudget( pn );
///////
//Final rewrites for cleanup / minor improvements
// rewrite 22: parfor (in recursive functions) to for
rewriteRemoveRecursiveParFor( pn, ec.getVariables() );
// rewrite 23: parfor (par=1) to for
rewriteRemoveUnnecessaryParFor( pn );
//info optimization result
_numTotalPlans = -1; //_numEvaluatedPlans maintained in rewrites;
return true;
}
/**
*
* @param pn
*/
protected void analyzeProblemAndInfrastructure( OptNode pn )
{
_N = Long.parseLong(pn.getParam(ParamType.NUM_ITERATIONS));
_Nmax = pn.getMaxProblemSize();
_lk = InfrastructureAnalyzer.getLocalParallelism();
_lkmaxCP = (int) Math.ceil( PAR_K_FACTOR * _lk );
_lkmaxMR = (int) Math.ceil( PAR_K_MR_FACTOR * _lk );
_rnk = InfrastructureAnalyzer.getRemoteParallelNodes();
_rk = InfrastructureAnalyzer.getRemoteParallelMapTasks();
_rk2 = InfrastructureAnalyzer.getRemoteParallelReduceTasks();
_rkmax = (int) Math.ceil( PAR_K_FACTOR * _rk );
_rkmax2 = (int) Math.ceil( PAR_K_FACTOR * _rk2 );
_lm = OptimizerUtils.getLocalMemBudget();
_rm = OptimizerUtils.getRemoteMemBudgetMap(false);
_rm2 = OptimizerUtils.getRemoteMemBudgetReduce();
//correction of max parallelism if yarn enabled because yarn
//does not have the notion of map/reduce slots and hence returns
//small constants of map=10*nodes, reduce=2*nodes
//(not doing this correction would loose available degree of parallelism)
if( InfrastructureAnalyzer.isYarnEnabled() ) {
long tmprk = YarnClusterAnalyzer.getNumCores();
_rk = (int) Math.max( _rk, tmprk );
_rk2 = (int) Math.max( _rk2, tmprk/2 );
}
//correction of max parallelism and memory if spark runtime enabled because
//spark limits the available parallelism by its own executor configuration
if( OptimizerUtils.isSparkExecutionMode() ) {
_rk = (int) SparkExecutionContext.getDefaultParallelism(true);
_rk2 = _rk; //equal map/reduce unless we find counter-examples
_rkmax = (int) Math.ceil( PAR_K_FACTOR * _rk );
_rkmax2 = (int) Math.ceil( PAR_K_FACTOR * _rk2 );
int cores = SparkExecutionContext.getDefaultParallelism(true)
/ SparkExecutionContext.getNumExecutors();
int ccores = (int) Math.min(cores, _N);
_rm = SparkExecutionContext.getBroadcastMemoryBudget() / ccores;
_rm2 = SparkExecutionContext.getBroadcastMemoryBudget() / ccores;
}
}
///////
//REWRITE set data partitioner
///
/**
*
* @param n
* @param partitionedMatrices
* @throws DMLRuntimeException
*/
protected boolean rewriteSetDataPartitioner(OptNode n, LocalVariableMap vars, HashMap partitionedMatrices )
throws DMLRuntimeException
{
if( n.getNodeType() != NodeType.PARFOR )
LOG.warn(getOptMode()+" OPT: Data partitioner can only be set for a ParFor node.");
boolean blockwise = false;
//preparations
long id = n.getID();
Object[] o = OptTreeConverter.getAbstractPlanMapping().getMappedProg(id);
ParForStatementBlock pfsb = (ParForStatementBlock) o[0];
ParForProgramBlock pfpb = (ParForProgramBlock) o[1];
//search for candidates
boolean apply = false;
if( OptimizerUtils.isHybridExecutionMode() //only if we are allowed to recompile
&& (_N >= PROB_SIZE_THRESHOLD_PARTITIONING || _Nmax >= PROB_SIZE_THRESHOLD_PARTITIONING) ) //only if beneficial wrt problem size
{
ArrayList cand = pfsb.getReadOnlyParentVars();
HashMap cand2 = new HashMap();
for( String c : cand )
{
PDataPartitionFormat dpf = pfsb.determineDataPartitionFormat( c );
//System.out.println("Partitioning Format: "+dpf);
if( dpf != PDataPartitionFormat.NONE
&& dpf != PDataPartitionFormat.BLOCK_WISE_M_N ) //FIXME
{
cand2.put( c, dpf );
}
}
apply = rFindDataPartitioningCandidates(n, cand2, vars);
if( apply )
partitionedMatrices.putAll(cand2);
}
PDataPartitioner REMOTE = OptimizerUtils.isSparkExecutionMode() ?
PDataPartitioner.REMOTE_SPARK : PDataPartitioner.REMOTE_MR;
PDataPartitioner pdp = (apply)? REMOTE : PDataPartitioner.NONE;
//NOTE: since partitioning is only applied in case of MR index access, we assume a large
// matrix and hence always apply REMOTE_MR (the benefit for large matrices outweigths
// potentially unnecessary MR jobs for smaller matrices)
// modify rtprog
pfpb.setDataPartitioner( pdp );
// modify plan
n.addParam(ParamType.DATA_PARTITIONER, pdp.toString());
_numEvaluatedPlans++;
LOG.debug(getOptMode()+" OPT: rewrite 'set data partitioner' - result="+pdp.toString()+
" ("+ProgramConverter.serializeStringCollection(partitionedMatrices.keySet())+")" );
return blockwise;
}
/**
*
* @param n
* @param cand
* @return
* @throws DMLRuntimeException
*/
protected boolean rFindDataPartitioningCandidates( OptNode n, HashMap cand, LocalVariableMap vars )
throws DMLRuntimeException
{
boolean ret = false;
if( !n.isLeaf() )
{
for( OptNode cn : n.getChilds() )
if( cn.getNodeType() != NodeType.FUNCCALL ) //prevent conflicts with aliases
ret |= rFindDataPartitioningCandidates( cn, cand, vars );
}
else if( n.getNodeType()== NodeType.HOP
&& n.getParam(ParamType.OPSTRING).equals(IndexingOp.OPSTRING) )
{
Hop h = OptTreeConverter.getAbstractPlanMapping().getMappedHop(n.getID());
String inMatrix = h.getInput().get(0).getName();
if( cand.containsKey(inMatrix) ) //Required Condition: partitioning applicable
{
PDataPartitionFormat dpf = cand.get(inMatrix);
double mnew = getNewRIXMemoryEstimate( n, inMatrix, dpf, vars );
//NOTE: for the moment, we do not partition according to the remote mem, because we can execute
//it even without partitioning in CP. However, advanced optimizers should reason about this
//double mold = h.getMemEstimate();
if( n.getExecType() == ExecType.MR || n.getExecType()==ExecType.SPARK ) //Opt Condition: MR/Spark
// || (mold > _rm && mnew <= _rm) ) //Opt Condition: non-MR special cases (for remote exec)
{
//NOTE: subsequent rewrites will still use the MR mem estimate
//(guarded by subsequent operations that have at least the memory req of one partition)
//if( mnew < _lm ) //apply rewrite if partitions fit into memory
// n.setExecType(ExecType.CP);
//else
// n.setExecType(ExecType.CP); //CP_FILE, but hop still in MR
n.setExecType(ExecType.CP);
n.addParam(ParamType.DATA_PARTITION_FORMAT, dpf.toString());
h.setMemEstimate( mnew ); //CP vs CP_FILE in ProgramRecompiler bases on mem_estimate
ret = true;
}
}
}
return ret;
}
/**
* TODO consolidate mem estimation with Indexing Hop
*
* NOTE: Using the dimensions without sparsity is a conservative worst-case consideration.
*
* @param n
* @param varName
* @param dpf
* @return
* @throws DMLRuntimeException
*/
protected double getNewRIXMemoryEstimate( OptNode n, String varName, PDataPartitionFormat dpf, LocalVariableMap vars )
throws DMLRuntimeException
{
double mem = -1;
//not all intermediates need to be known on optimize
Data dat = vars.get( varName );
if( dat != null )
{
MatrixObject mo = (MatrixObject) dat;
//those are worst-case (dense) estimates
switch( dpf )
{
case COLUMN_WISE:
mem = OptimizerUtils.estimateSize(mo.getNumRows(), 1);
break;
case ROW_WISE:
mem = OptimizerUtils.estimateSize(1, mo.getNumColumns());
break;
case BLOCK_WISE_M_N:
mem = Integer.MAX_VALUE; //TODO
break;
default:
//do nothing
}
}
return mem;
}
/**
*
* @param mo
* @param dpf
* @return
* @throws DMLRuntimeException
*/
protected static LopProperties.ExecType getRIXExecType( MatrixObject mo, PDataPartitionFormat dpf )
throws DMLRuntimeException
{
return getRIXExecType(mo, dpf, false);
}
/**
*
* @param mo
* @param dpf
* @return
* @throws DMLRuntimeException
*/
protected static LopProperties.ExecType getRIXExecType( MatrixObject mo, PDataPartitionFormat dpf, boolean withSparsity )
throws DMLRuntimeException
{
double mem = -1;
long rlen = mo.getNumRows();
long clen = mo.getNumColumns();
long brlen = mo.getNumRowsPerBlock();
long bclen = mo.getNumColumnsPerBlock();
long nnz = mo.getNnz();
double lsparsity = ((double)nnz)/rlen/clen;
double sparsity = withSparsity ? lsparsity : 1.0;
switch( dpf )
{
case COLUMN_WISE:
mem = OptimizerUtils.estimateSizeExactSparsity(mo.getNumRows(), 1, sparsity);
break;
case COLUMN_BLOCK_WISE:
mem = OptimizerUtils.estimateSizeExactSparsity(mo.getNumRows(), bclen, sparsity);
break;
case ROW_WISE:
mem = OptimizerUtils.estimateSizeExactSparsity(1, mo.getNumColumns(), sparsity);
break;
case ROW_BLOCK_WISE:
mem = OptimizerUtils.estimateSizeExactSparsity(brlen, mo.getNumColumns(), sparsity);
break;
default:
//do nothing
}
if( mem < OptimizerUtils.getLocalMemBudget() )
return LopProperties.ExecType.CP;
else
return LopProperties.ExecType.CP_FILE;
}
/**
*
* @param mo
* @param dpf
* @return
* @throws DMLRuntimeException
*/
public static PDataPartitionFormat decideBlockWisePartitioning( MatrixObject mo, PDataPartitionFormat dpf )
throws DMLRuntimeException
{
long rlen = mo.getNumRows();
long clen = mo.getNumColumns();
long brlen = mo.getNumRowsPerBlock();
long bclen = mo.getNumColumnsPerBlock();
long k = InfrastructureAnalyzer.getRemoteParallelMapTasks();
PDataPartitionFormat ret = dpf;
if( getRIXExecType(mo, dpf)==LopProperties.ExecType.CP )
if( ret == PDataPartitionFormat.ROW_WISE )
{
if( rlen/brlen > 4*k && //note: average sparsity, read must deal with it
getRIXExecType(mo, PDataPartitionFormat.ROW_BLOCK_WISE, false)==LopProperties.ExecType.CP )
{
ret = PDataPartitionFormat.ROW_BLOCK_WISE;
}
}
else if( ret == PDataPartitionFormat.COLUMN_WISE )
{
if( clen/bclen > 4*k && //note: average sparsity, read must deal with it
getRIXExecType(mo, PDataPartitionFormat.COLUMN_BLOCK_WISE, false)==LopProperties.ExecType.CP )
{
ret = PDataPartitionFormat.COLUMN_BLOCK_WISE;
}
}
return ret;
}
/**
*
* @return
* @throws DMLRuntimeException
*/
public static boolean allowsBinaryCellPartitions( MatrixObject mo, PDataPartitionFormat dpf )
throws DMLRuntimeException
{
return (getRIXExecType(mo, PDataPartitionFormat.COLUMN_BLOCK_WISE, false)==LopProperties.ExecType.CP );
}
///////
//REWRITE set result partitioning
///
/**
*
* @param n
* @throws DMLRuntimeException
*/
protected boolean rewriteSetResultPartitioning(OptNode n, double M, LocalVariableMap vars)
throws DMLRuntimeException
{
//preparations
long id = n.getID();
Object[] o = OptTreeConverter.getAbstractPlanMapping().getMappedProg(id);
ParForProgramBlock pfpb = (ParForProgramBlock) o[1];
//search for candidates
Collection cand = n.getNodeList(ExecType.MR);
//determine if applicable
boolean apply = M < _rm //ops fit in remote memory budget
&& !cand.isEmpty() //at least one MR
&& isResultPartitionableAll(cand,pfpb.getResultVariables(),vars, pfpb.getIterablePredicateVars()[0]); // check candidates
//recompile LIX
if( apply )
{
try
{
for(OptNode lix : cand)
recompileLIX( lix, vars );
}
catch(Exception ex)
{
throw new DMLRuntimeException("Unable to recompile LIX.", ex);
}
}
_numEvaluatedPlans++;
LOG.debug(getOptMode()+" OPT: rewrite 'set result partitioning' - result="+apply );
return apply;
}
/**
*
* @param nlist
* @param resultVars
* @param vars
* @param iterVarname
* @return
* @throws DMLRuntimeException
*/
protected boolean isResultPartitionableAll( Collection nlist, ArrayList resultVars, LocalVariableMap vars, String iterVarname )
throws DMLRuntimeException
{
boolean ret = true;
for( OptNode n : nlist )
{
ret &= isResultPartitionable(n, resultVars, vars, iterVarname);
if(!ret) //early abort
break;
}
return ret;
}
/**
*
* @param n
* @param resultVars
* @param vars
* @param iterVarname
* @return
* @throws DMLRuntimeException
*/
protected boolean isResultPartitionable( OptNode n, ArrayList resultVars, LocalVariableMap vars, String iterVarname )
throws DMLRuntimeException
{
boolean ret = true;
//check left indexing operator
String opStr = n.getParam(ParamType.OPSTRING);
if( opStr==null || !opStr.equals(LeftIndexingOp.OPSTRING) )
ret = false;
Hop h = null;
Hop base = null;
if( ret ) {
h = OptTreeConverter.getAbstractPlanMapping().getMappedHop(n.getID());
base = h.getInput().get(0);
//check result variable
if( !resultVars.contains(base.getName()) )
ret = false;
}
//check access pattern, memory budget
if( ret ) {
int dpf = 0;
Hop inpRowL = h.getInput().get(2);
Hop inpRowU = h.getInput().get(3);
Hop inpColL = h.getInput().get(4);
Hop inpColU = h.getInput().get(5);
if( (inpRowL.getName().equals(iterVarname) && inpRowU.getName().equals(iterVarname)) )
dpf = 1; //rowwise
if( (inpColL.getName().equals(iterVarname) && inpColU.getName().equals(iterVarname)) )
dpf = (dpf==0) ? 2 : 3; //colwise or cellwise
if( dpf == 0 )
ret = false;
else
{
//check memory budget
MatrixObject mo = (MatrixObject)vars.get(base.getName());
if( mo.getNnz() != 0 ) //-1 valid because result var known during opt
ret = false;
//Note: for memory estimation the common case is sparse since remote_mr and individual tasks;
//and in the dense case, we would not benefit from result partitioning
boolean sparse = MatrixBlock.evalSparseFormatInMemory(base.getDim1(), base.getDim2(),base.getDim1());
if( sparse )
{
//custom memory estimatation in order to account for structural properties
//e.g., for rowwise we know that we only pay one sparserow overhead per task
double memSparseBlock = estimateSizeSparseRowBlock(base.getDim1());
double memSparseRow1 = estimateSizeSparseRow(base.getDim2(), base.getDim2());
double memSparseRowMin = estimateSizeSparseRowMin(base.getDim2());
double memTask1 = -1;
int taskN = -1;
switch(dpf) {
case 1: //rowwise
//sparse block and one sparse row per task
memTask1 = memSparseBlock + memSparseRow1;
taskN = (int) ((_rm-memSparseBlock) / memSparseRow1);
break;
case 2: //colwise
//sparse block, sparse row per row but shared over tasks
memTask1 = memSparseBlock + memSparseRowMin * base.getDim1();
taskN = estimateNumTasksSparseCol(_rm-memSparseBlock, base.getDim1());
break;
case 3: //cellwise
//sparse block and one minimal sparse row per task
memTask1 = memSparseBlock + memSparseRowMin;
taskN = (int) ((_rm-memSparseBlock) / memSparseRowMin);
break;
}
if( memTask1>_rm || memTask1<0 )
ret = false;
else
n.addParam(ParamType.TASK_SIZE, String.valueOf(taskN));
}
else
{
//dense (no result partitioning possible)
ret = false;
}
}
}
return ret;
}
/**
*
* @param rows
* @return
*/
private double estimateSizeSparseRowBlock( long rows ) {
//see MatrixBlock.estimateSizeSparseInMemory
return 44 + rows * 8;
}
/**
*
* @param cols
* @param nnz
* @return
*/
private double estimateSizeSparseRow( long cols, long nnz ) {
//see MatrixBlock.estimateSizeSparseInMemory
long cnnz = Math.max(SparseRow.initialCapacity, Math.max(cols, nnz));
return ( 116 + 12 * cnnz ); //sparse row
}
/**
*
* @param cols
* @return
*/
private double estimateSizeSparseRowMin( long cols ) {
//see MatrixBlock.estimateSizeSparseInMemory
long cnnz = Math.min(SparseRow.initialCapacity, cols);
return ( 116 + 12 * cnnz ); //sparse row
}
/**
*
* @param budget
* @param rows
* @return
*/
private int estimateNumTasksSparseCol( double budget, long rows ) {
//see MatrixBlock.estimateSizeSparseInMemory
double lbudget = budget - rows * 116;
return (int) Math.floor( lbudget / 12 );
}
/**
*
* @param n
* @throws DMLRuntimeException
* @throws HopsException
* @throws LopsException
* @throws DMLUnsupportedOperationException
* @throws IOException
*/
protected void recompileLIX( OptNode n, LocalVariableMap vars )
throws DMLRuntimeException, HopsException, LopsException, DMLUnsupportedOperationException, IOException
{
Hop h = OptTreeConverter.getAbstractPlanMapping().getMappedHop(n.getID());
//set forced exec type
h.setForcedExecType(LopProperties.ExecType.CP);
n.setExecType(ExecType.CP);
//recompile parent pb
long pid = OptTreeConverter.getAbstractPlanMapping().getMappedParentID(n.getID());
OptNode nParent = OptTreeConverter.getAbstractPlanMapping().getOptNode(pid);
Object[] o = OptTreeConverter.getAbstractPlanMapping().getMappedProg(pid);
StatementBlock sb = (StatementBlock) o[0];
ProgramBlock pb = (ProgramBlock) o[1];
//keep modified estimated of partitioned rix (in same dag as lix)
HashMap estRix = getPartitionedRIXEstimates(nParent);
//construct new instructions
ArrayList newInst = Recompiler.recompileHopsDag(sb, sb.get_hops(), vars, null, false, 0);
pb.setInstructions( newInst );
//reset all rix estimated (modified by recompile)
resetPartitionRIXEstimates( estRix );
//set new mem estimate (last, otherwise overwritten from recompile)
h.setMemEstimate(_rm-1);
}
/**
*
* @param parent
* @return
*/
protected HashMap getPartitionedRIXEstimates(OptNode parent)
{
HashMap estimates = new HashMap();
for( OptNode n : parent.getChilds() )
if( n.getParam(ParamType.DATA_PARTITION_FORMAT) != null )
{
Hop h = OptTreeConverter.getAbstractPlanMapping().getMappedHop(n.getID());
estimates.put( h, h.getMemEstimate() );
}
return estimates;
}
/**
*
* @param parent
* @param estimates
*/
protected void resetPartitionRIXEstimates( HashMap estimates )
{
for( Entry e : estimates.entrySet() )
{
Hop h = e.getKey();
double val = e.getValue();
h.setMemEstimate(val);
}
}
///////
//REWRITE set execution strategy
///
/**
*
* @param n
* @param M
* @throws DMLRuntimeException
*/
protected boolean rewriteSetExecutionStategy(OptNode n, double M0, double M, double M2, boolean flagLIX)
throws DMLRuntimeException
{
boolean isCPOnly = n.isCPOnly();
boolean isCPOnlyPossible = isCPOnly || isCPOnlyPossible(n, _rm);
String datapartitioner = n.getParam(ParamType.DATA_PARTITIONER);
ExecType REMOTE = OptimizerUtils.isSparkExecutionMode() ? ExecType.SPARK : ExecType.MR;
PDataPartitioner REMOTE_DP = OptimizerUtils.isSparkExecutionMode() ? PDataPartitioner.REMOTE_SPARK : PDataPartitioner.REMOTE_MR;
//deciding on the execution strategy
if( (isCPOnly && M <= _rm ) //Required: all instruction can be be executed in CP
|| (isCPOnlyPossible && M2 <= _rm) ) //Required: cp inst fit into remote JVM mem
{
//at this point all required conditions for REMOTE_MR given, now its an opt decision
int cpk = (int) Math.min( _lk, Math.floor( _lm / M ) ); //estimated local exploited par
//MR if local par cannot be exploited due to mem constraints (this implies that we work on large data)
//(the factor of 2 is to account for hyper-threading and in order prevent too eager remote parfor)
if( 2*cpk < _lk && 2*cpk < _N && 2*cpk < _rk )
{
n.setExecType( REMOTE ); //remote parfor
}
//MR if problem is large enough and remote parallelism is larger than local
else if( _lk < _N && _lk < _rk && isLargeProblem(n, M0) )
{
n.setExecType( REMOTE ); //remote parfor
}
//MR if MR operations in local, but CP only in remote (less overall MR jobs)
else if( (!isCPOnly) && isCPOnlyPossible )
{
n.setExecType( REMOTE ); //remote parfor
}
//MR if necessary for LIX rewrite (LIX true iff cp only and rm valid)
else if( flagLIX )
{
n.setExecType( REMOTE ); //remote parfor
}
//MR if remote data partitioning, because data will be distributed on all nodes
else if( datapartitioner!=null && datapartitioner.equals(REMOTE_DP.toString())
&& !InfrastructureAnalyzer.isLocalMode())
{
n.setExecType( REMOTE ); //remote parfor
}
//otherwise CP
else
{
n.setExecType( ExecType.CP ); //local parfor
}
}
else //mr instructions in body, or rm too small
{
n.setExecType( ExecType.CP ); //local parfor
}
//actual programblock modification
long id = n.getID();
ParForProgramBlock pfpb = (ParForProgramBlock) OptTreeConverter
.getAbstractPlanMapping().getMappedProg(id)[1];
PExecMode mode = n.getExecType().toParForExecMode();
pfpb.setExecMode( mode );
//decide if recompilation according to remote mem budget necessary
boolean requiresRecompile = ((mode == PExecMode.REMOTE_MR || mode == PExecMode.REMOTE_SPARK) && !isCPOnly );
_numEvaluatedPlans++;
LOG.debug(getOptMode()+" OPT: rewrite 'set execution strategy' - result="+mode+" (recompile="+requiresRecompile+")" );
return requiresRecompile;
}
/**
*
* @param pn
* @return
*/
protected boolean isLargeProblem(OptNode pn, double M0)
{
return ((_N >= PROB_SIZE_THRESHOLD_REMOTE || _Nmax >= 10 * PROB_SIZE_THRESHOLD_REMOTE )
&& M0 > PROB_SIZE_THRESHOLD_MB ); //original operations at least larger than 256MB
}
/**
*
* @param n
* @param memBudget
* @return
* @throws DMLRuntimeException
*/
protected boolean isCPOnlyPossible( OptNode n, double memBudget )
throws DMLRuntimeException
{
ExecType et = n.getExecType();
boolean ret = ( et == ExecType.CP);
if( n.isLeaf() && (et == ExecType.MR || et == ExecType.SPARK) )
{
Hop h = OptTreeConverter.getAbstractPlanMapping().getMappedHop( n.getID() );
if( h.getForcedExecType()!=LopProperties.ExecType.MR //e.g., -exec=hadoop
&& h.getForcedExecType()!=LopProperties.ExecType.SPARK)
{
double mem = _cost.getLeafNodeEstimate(TestMeasure.MEMORY_USAGE, n, LopProperties.ExecType.CP);
if( mem <= memBudget )
ret = true;
}
}
if( !n.isLeaf() )
for( OptNode c : n.getChilds() )
{
if( !ret ) break; //early abort if already false
ret &= isCPOnlyPossible(c, memBudget);
}
return ret;
}
///////
//REWRITE set operations exec type
///
/**
*
* @param pn
* @param recompile
* @throws DMLRuntimeException
*/
protected void rewriteSetOperationsExecType(OptNode pn, boolean recompile)
throws DMLRuntimeException
{
//set exec type in internal opt tree
int count = setOperationExecType(pn, ExecType.CP);
//recompile program (actual programblock modification)
if( recompile && count<=0 )
LOG.warn("OPT: Forced set operations exec type 'CP', but no operation requires recompile.");
ParForProgramBlock pfpb = (ParForProgramBlock) OptTreeConverter
.getAbstractPlanMapping().getMappedProg(pn.getID())[1];
HashSet fnStack = new HashSet();
Recompiler.recompileProgramBlockHierarchy2Forced(pfpb.getChildBlocks(), 0, fnStack, LopProperties.ExecType.CP);
//debug output
LOG.debug(getOptMode()+" OPT: rewrite 'set operation exec type CP' - result="+count);
}
/**
*
* @param n
* @param et
* @return
*/
protected int setOperationExecType( OptNode n, ExecType et )
{
int count = 0;
//set operation exec type to CP, count num recompiles
if( n.getExecType()!=ExecType.CP && n.getNodeType()==NodeType.HOP ) {
n.setExecType( ExecType.CP );
count = 1;
}
//recursively set exec type of childs
if( !n.isLeaf() )
for( OptNode c : n.getChilds() )
count += setOperationExecType(c, et);
return count;
}
///////
//REWRITE enable data colocation
///
/**
* NOTE: if MAX_REPLICATION_FACTOR_PARTITIONING is set larger than 10, co-location may
* throw warnings per split since this exceeds "max block locations"
*
* @param n
* @throws DMLRuntimeException
*/
protected void rewriteDataColocation( OptNode n, LocalVariableMap vars )
throws DMLRuntimeException
{
// data colocation is beneficial if we have dp=REMOTE_MR, etype=REMOTE_MR
// and there is at least one direct col-/row-wise access with the index variable
// on the partitioned matrix
boolean apply = false;
String varname = null;
String partitioner = n.getParam(ParamType.DATA_PARTITIONER);
ParForProgramBlock pfpb = (ParForProgramBlock) OptTreeConverter
.getAbstractPlanMapping().getMappedProg(n.getID())[1];
if( partitioner!=null && partitioner.equals(PDataPartitioner.REMOTE_MR.toString())
&& n.getExecType()==ExecType.MR )
{
//find all candidates matrices (at least one partitioned access via iterVar)
HashSet cand = new HashSet();
rFindDataColocationCandidates(n, cand, pfpb.getIterablePredicateVars()[0]);
//select largest matrix for colocation (based on nnz to account for sparsity)
long nnzMax = Long.MIN_VALUE;
for( String c : cand ) {
MatrixObject tmp = (MatrixObject)vars.get(c);
if( tmp != null ){
long nnzTmp = tmp.getNnz();
if( nnzTmp > nnzMax ) {
nnzMax = nnzTmp;
varname = c;
apply = true;
}
}
}
}
//modify the runtime plan (apply true if at least one candidate)
if( apply )
pfpb.enableColocatedPartitionedMatrix( varname );
_numEvaluatedPlans++;
LOG.debug(getOptMode()+" OPT: rewrite 'enable data colocation' - result="+apply+((apply)?" ("+varname+")":"") );
}
/**
*
* @param n
* @param cand
* @param iterVarname
* @return
* @throws DMLRuntimeException
*/
protected void rFindDataColocationCandidates( OptNode n, HashSet cand, String iterVarname )
throws DMLRuntimeException
{
if( !n.isLeaf() )
{
for( OptNode cn : n.getChilds() )
rFindDataColocationCandidates( cn, cand, iterVarname );
}
else if( n.getNodeType()== NodeType.HOP
&& n.getParam(ParamType.OPSTRING).equals(IndexingOp.OPSTRING)
&& n.getParam(ParamType.DATA_PARTITION_FORMAT) != null )
{
PDataPartitionFormat dpf = PDataPartitionFormat.valueOf(n.getParam(ParamType.DATA_PARTITION_FORMAT));
Hop h = OptTreeConverter.getAbstractPlanMapping().getMappedHop(n.getID());
String inMatrix = h.getInput().get(0).getName();
String indexAccess = null;
switch( dpf )
{
case ROW_WISE: //input 1 and 2 eq
if( h.getInput().get(1) instanceof DataOp )
indexAccess = h.getInput().get(1).getName();
break;
case COLUMN_WISE: //input 3 and 4 eq
if( h.getInput().get(3) instanceof DataOp )
indexAccess = h.getInput().get(3).getName();
break;
default:
//do nothing
}
if( indexAccess != null && indexAccess.equals(iterVarname) )
cand.add( inMatrix );
}
}
///////
//REWRITE set partition replication factor
///
/**
* Increasing the partition replication factor is beneficial if partitions are
* read multiple times (e.g., in nested loops) because partitioning (done once)
* gets slightly slower but there is a higher probability for local access
*
* NOTE: this rewrite requires 'set data partitioner' to be executed in order to
* leverage the partitioning information in the plan tree.
*
* @param n
* @throws DMLRuntimeException
*/
protected void rewriteSetPartitionReplicationFactor( OptNode n, HashMap partitionedMatrices, LocalVariableMap vars )
throws DMLRuntimeException
{
boolean apply = false;
double sizeReplicated = 0;
int replication = ParForProgramBlock.WRITE_REPLICATION_FACTOR;
ParForProgramBlock pfpb = (ParForProgramBlock) OptTreeConverter
.getAbstractPlanMapping().getMappedProg(n.getID())[1];
if( n.getExecType()==ExecType.MR
&& n.getParam(ParamType.DATA_PARTITIONER).equals(PDataPartitioner.REMOTE_MR.toString())
&& n.hasNestedParallelism(false)
&& n.hasNestedPartitionReads(false) )
{
apply = true;
//account for problem and cluster constraints
replication = (int)Math.min( _N, _rnk );
//account for internal max constraint (note hadoop will warn if max > 10)
replication = (int)Math.min( replication, MAX_REPLICATION_FACTOR_EXPORT );
//account for remaining hdfs capacity
try {
FileSystem fs = FileSystem.get(ConfigurationManager.getCachedJobConf());
long hdfsCapacityRemain = fs.getStatus().getRemaining();
long sizeInputs = 0; //sum of all input sizes (w/o replication)
for( String var : partitionedMatrices.keySet() )
{
MatrixObject mo = (MatrixObject)vars.get(var);
Path fname = new Path(mo.getFileName());
if( fs.exists( fname ) ) //non-existing (e.g., CP) -> small file
sizeInputs += fs.getContentSummary(fname).getLength();
}
replication = (int) Math.min(replication, Math.floor(0.9*hdfsCapacityRemain/sizeInputs));
//ensure at least replication 1
replication = Math.max( replication, ParForProgramBlock.WRITE_REPLICATION_FACTOR);
sizeReplicated = replication * sizeInputs;
}
catch(Exception ex)
{
throw new DMLRuntimeException("Failed to analyze remaining hdfs capacity.", ex);
}
}
//modify the runtime plan
if( apply )
pfpb.setPartitionReplicationFactor( replication );
_numEvaluatedPlans++;
LOG.debug(getOptMode()+" OPT: rewrite 'set partition replication factor' - result="+apply+
((apply)?" ("+replication+", "+toMB(sizeReplicated)+")":"") );
}
///////
//REWRITE set export replication factor
///
/**
* Increasing the export replication factor is beneficial for remote execution
* because each task will read the full input data set. This only applies to
* matrices that are created as in-memory objects before parfor execution.
*
* NOTE: this rewrite requires 'set execution strategy' to be executed.
*
* @param n
* @param partitionedMatrices
* @throws DMLRuntimeException
*/
protected void rewriteSetExportReplicationFactor( OptNode n, LocalVariableMap vars )
throws DMLRuntimeException
{
boolean apply = false;
int replication = -1;
ParForProgramBlock pfpb = (ParForProgramBlock) OptTreeConverter
.getAbstractPlanMapping().getMappedProg(n.getID())[1];
//decide on the replication factor
if( n.getExecType()==ExecType.MR || n.getExecType()==ExecType.SPARK )
{
apply = true;
//account for problem and cluster constraints
replication = (int)Math.min( _N, _rnk );
//account for internal max constraint (note hadoop will warn if max > 10)
replication = (int)Math.min( replication, MAX_REPLICATION_FACTOR_EXPORT );
}
//modify the runtime plan
if( apply )
pfpb.setExportReplicationFactor( replication );
_numEvaluatedPlans++;
LOG.debug(getOptMode()+" OPT: rewrite 'set export replication factor' - result="+apply+((apply)?" ("+replication+")":"") );
}
///////
//REWRITE enable nested parallelism
///
/**
*
* @param n
* @param M
* @return
* @throws DMLRuntimeException
* @throws DMLUnsupportedOperationException
*/
@SuppressWarnings("all")
protected boolean rewriteNestedParallelism(OptNode n, double M, boolean flagLIX )
throws DMLRuntimeException, DMLUnsupportedOperationException
{
boolean nested = false;
if( APPLY_REWRITE_NESTED_PARALLELISM
&& !flagLIX // if not applied left indexing rewrite
&& _N >= _rnk // at least exploit all nodes
&& !n.hasNestedParallelism(false)// only for 1D problems, otherwise potentially bad load balance
&& M * _lkmaxCP <= _rm ) // only if we can exploit full local parallelism in the map task JVM memory
{
//modify tree
ArrayList tmpOld = n.getChilds();
OptNode nest = new OptNode(NodeType.PARFOR, ExecType.CP);
ArrayList tmpNew = new ArrayList();
tmpNew.add(nest);
n.setChilds(tmpNew);
nest.setChilds(tmpOld);
//modify rtprog
long id = n.getID();
ParForProgramBlock pfpb = (ParForProgramBlock) OptTreeConverter
.getAbstractPlanMapping().getMappedProg(id)[1];
ArrayList tmpPBOld = pfpb.getChildBlocks();
//create new program block structure and modify parameters (from, to, incr, types,)
String[] iterVars = pfpb.getIterablePredicateVars(); //from, to stay original
String[] iterVars2 = iterVars.clone(); //itervar, incr stay original
int outIncr = (int)Math.ceil(((double)_N)/_rnk);
iterVars[ 0 ] = ParForStatementBlock.INTERAL_FN_INDEX_ROW; // already checked for uniqueness in ParForStatementBlock
iterVars[ 3 ] = String.valueOf(outIncr);
iterVars2[ 1 ] = ParForStatementBlock.INTERAL_FN_INDEX_ROW; //sub start
iterVars2[ 2 ] = null;
HashMap params = pfpb.getParForParams();
HashMap params2 = (HashMap)params.clone();
ParForProgramBlock pfpb2 = new ParForProgramBlock(pfpb.getProgram(),iterVars2, params2);
OptTreeConverter.getAbstractPlanMapping().putProgMapping(null, pfpb2, nest);
ArrayList tmpPBNew = new ArrayList();
tmpPBNew.add(pfpb2);
pfpb.setChildBlocks(tmpPBNew);
pfpb.setIterablePredicateVars(iterVars);
pfpb.setIncrementInstructions(new ArrayList());
pfpb.setExecMode(PExecMode.REMOTE_MR);
pfpb2.setChildBlocks(tmpPBOld);
pfpb2.setResultVariables(pfpb.getResultVariables());
pfpb2.setFromInstructions(new ArrayList());
pfpb2.setToInstructions(ProgramRecompiler.createNestedParallelismToInstructionSet( ParForStatementBlock.INTERAL_FN_INDEX_ROW, String.valueOf(outIncr-1) ));
pfpb2.setIncrementInstructions(new ArrayList());
pfpb2.setExecMode(PExecMode.LOCAL);
nested = true;
}
_numEvaluatedPlans++;
LOG.debug(getOptMode()+" OPT: rewrite 'enable nested parallelism' - result="+nested );
return nested;
}
///////
//REWRITE set degree of parallelism
///
/**
*
* @param n
* @param M
* @param kMax
* @param mMax (per node)
* @param nested
* @throws DMLRuntimeException
*/
protected void rewriteSetDegreeOfParallelism(OptNode n, double M, boolean flagNested)
throws DMLRuntimeException
{
ExecType type = n.getExecType();
long id = n.getID();
//special handling for different exec models (CP, MR, MR nested)
ParForProgramBlock pfpb = (ParForProgramBlock) OptTreeConverter
.getAbstractPlanMapping().getMappedProg(id)[1];
if( type == ExecType.CP )
{
//determine local max parallelism constraint
int kMax = -1;
if( n.isCPOnly() )
kMax = _lkmaxCP;
else
kMax = _lkmaxMR;
//ensure local memory constraint (for spark more conservative in order to
//prevent unnecessary guarded collect)
double mem = OptimizerUtils.isSparkExecutionMode() ? _lm/2 : _lm;
kMax = Math.min( kMax, (int)Math.floor( mem / M ) );
kMax = Math.max( kMax, 1);
//constrain max parfor parallelism by problem size
int parforK = (int)((_N= 1 (see exec strategy)
if( kMax < 1 )
kMax = 1;
//disable nested parallelism, if required
if( !ALLOW_REMOTE_NESTED_PARALLELISM )
kMax = 1;
//distribute remaining parallelism and recompile parallel instructions
rAssignRemainingParallelism( n, kMax, 1 );
}
_numEvaluatedPlans++;
LOG.debug(getOptMode()+" OPT: rewrite 'set degree of parallelism' - result=(see EXPLAIN)" );
}
/**
*
* @param n
* @param par
* @throws DMLRuntimeException
*/
protected void rAssignRemainingParallelism(OptNode n, int parforK, int opsK)
throws DMLRuntimeException
{
ArrayList childs = n.getChilds();
if( childs != null )
{
boolean recompileSB = false;
for( OptNode c : childs )
{
//NOTE: we cannot shortcut with c.setSerialParFor() on par=1 because
//this would miss to recompile multi-threaded hop operations
if( c.getNodeType() == NodeType.PARFOR )
{
//constrain max parfor parallelism by problem size
int tmpN = Integer.parseInt(c.getParam(ParamType.NUM_ITERATIONS));
int tmpK = (tmpN= pn.getK() ) //to prevent imbalance due to ceiling
{
setTaskPartitioner( pn, PTaskPartitioner.FACTORING );
}
else
{
setTaskPartitioner( pn, PTaskPartitioner.NAIVE );
}
}
/**
*
* @param n
* @param partitioner
* @param flagLIX
*/
protected void setTaskPartitioner( OptNode n, PTaskPartitioner partitioner )
{
long id = n.getID();
// modify rtprog
ParForProgramBlock pfpb = (ParForProgramBlock) OptTreeConverter
.getAbstractPlanMapping().getMappedProg(id)[1];
pfpb.setTaskPartitioner(partitioner);
// modify plan
n.addParam(ParamType.TASK_PARTITIONER, partitioner.toString());
//handle specific case of LIX recompile
boolean flagLIX = (partitioner == PTaskPartitioner.FACTORING_CMAX);
if( flagLIX )
{
long maxc = n.getMaxC( _N );
pfpb.setTaskSize( maxc ); //used as constraint
pfpb.disableJVMReuse();
n.addParam(ParamType.TASK_SIZE, String.valueOf(maxc));
}
_numEvaluatedPlans++;
LOG.debug(getOptMode()+" OPT: rewrite 'set task partitioner' - result="+partitioner+((flagLIX) ? ","+n.getParam(ParamType.TASK_SIZE) : "") );
}
///////
//REWRITE set fused data partitioning / execution
///
/**
* This dedicated execution mode can only be applied if all of the
* following conditions are true:
* - Only cp instructions in the parfor body
* - Only one partitioned input
* - number of iterations is equal to number of partitions (nrow/ncol)
* - partitioned matrix access via plain iteration variables (no composed expressions)
* (this ensures that each partition is exactly read once)
* - no left indexing (since by default static task partitioning)
*
* Furthermore, it should be only chosen if we already decided for remote partitioning
* and otherwise would create a large number of partition files.
*
* NOTE: We already respect the reducer memory budget for plan correctness. However,
* we miss optimization potential if the reducer budget is larger than the mapper budget
* (if we were not able to select REMOTE_MR as execution strategy wrt mapper budget)
* TODO modify 'set exec strategy' and related rewrites for conditional data partitioning.
*
*
* @param M
* @param partitionedMatrices, ExecutionContext ec
*
* @param n
* @param partitioner
* @throws DMLRuntimeException
*/
protected void rewriteSetFusedDataPartitioningExecution(OptNode pn, double M, boolean flagLIX, HashMap partitionedMatrices, LocalVariableMap vars)
throws DMLRuntimeException
{
//assertions (warnings of corrupt optimizer decisions)
if( pn.getNodeType() != NodeType.PARFOR )
LOG.warn(getOptMode()+" OPT: Fused data partitioning and execution is only applicable for a ParFor node.");
boolean apply = false;
String partitioner = pn.getParam(ParamType.DATA_PARTITIONER);
PDataPartitioner REMOTE_DP = OptimizerUtils.isSparkExecutionMode() ? PDataPartitioner.REMOTE_SPARK : PDataPartitioner.REMOTE_MR;
PExecMode REMOTE_DPE = OptimizerUtils.isSparkExecutionMode() ? PExecMode.REMOTE_SPARK_DP : PExecMode.REMOTE_MR_DP;
//precondition: rewrite only invoked if exec type MR
// (this also implies that the body is CP only)
// try to merge MR data partitioning and MR exec
if( (pn.getExecType()==ExecType.MR || pn.getExecType()==ExecType.SPARK) //MR/SP EXEC and CP body
&& M < _rm2 //fits into remote memory of reducers
&& partitioner!=null && partitioner.equals(REMOTE_DP.toString()) //MR/SP partitioning
&& partitionedMatrices.size()==1 ) //only one partitioned matrix
{
ParForProgramBlock pfpb = (ParForProgramBlock) OptTreeConverter
.getAbstractPlanMapping().getMappedProg(pn.getID())[1];
//partitioned matrix
String moVarname = partitionedMatrices.keySet().iterator().next();
PDataPartitionFormat moDpf = partitionedMatrices.get(moVarname);
MatrixObject mo = (MatrixObject)vars.get(moVarname);
//check if access via iteration variable and sizes match
String iterVarname = pfpb.getIterablePredicateVars()[0];
if( rIsAccessByIterationVariable(pn, moVarname, iterVarname) &&
((moDpf==PDataPartitionFormat.ROW_WISE && mo.getNumRows()==_N ) ||
(moDpf==PDataPartitionFormat.COLUMN_WISE && mo.getNumColumns()==_N)) )
{
int k = (int)Math.min(_N,_rk2);
pn.addParam(ParamType.DATA_PARTITIONER, REMOTE_DPE.toString()+"(fused)");
pn.setK( k );
pfpb.setExecMode(REMOTE_DPE); //set fused exec type
pfpb.setDataPartitioner(PDataPartitioner.NONE);
pfpb.enableColocatedPartitionedMatrix( moVarname );
pfpb.setDegreeOfParallelism(k);
apply = true;
}
}
LOG.debug(getOptMode()+" OPT: rewrite 'set fused data partitioning and execution' - result="+apply );
}
/**
*
* @param n
* @param iterVarname
* @return
* @throws DMLRuntimeException
*/
protected boolean rIsAccessByIterationVariable( OptNode n, String varName, String iterVarname )
throws DMLRuntimeException
{
boolean ret = true;
if( !n.isLeaf() )
{
for( OptNode cn : n.getChilds() )
rIsAccessByIterationVariable( cn, varName, iterVarname );
}
else if( n.getNodeType()== NodeType.HOP
&& n.getParam(ParamType.OPSTRING).equals(IndexingOp.OPSTRING)
&& n.getParam(ParamType.DATA_PARTITION_FORMAT) != null )
{
PDataPartitionFormat dpf = PDataPartitionFormat.valueOf(n.getParam(ParamType.DATA_PARTITION_FORMAT));
Hop h = OptTreeConverter.getAbstractPlanMapping().getMappedHop(n.getID());
String inMatrix = h.getInput().get(0).getName();
String indexAccess = null;
switch( dpf )
{
case ROW_WISE: //input 1 and 2 eq
if( h.getInput().get(1) instanceof DataOp )
indexAccess = h.getInput().get(1).getName();
break;
case COLUMN_WISE: //input 3 and 4 eq
if( h.getInput().get(3) instanceof DataOp )
indexAccess = h.getInput().get(3).getName();
break;
default:
//do nothing
}
ret &= ( (inMatrix!=null && inMatrix.equals(varName))
&& (indexAccess!=null && indexAccess.equals(iterVarname)));
}
return ret;
}
///////
//REWRITE transpose sparse vector operations
///
protected void rewriteSetTranposeSparseVectorOperations(OptNode pn, HashMap partitionedMatrices, LocalVariableMap vars)
throws DMLRuntimeException
{
//assertions (warnings of corrupt optimizer decisions)
if( pn.getNodeType() != NodeType.PARFOR )
LOG.warn(getOptMode()+" OPT: Transpose sparse vector operations is only applicable for a ParFor node.");
boolean apply = false;
ParForProgramBlock pfpb = (ParForProgramBlock) OptTreeConverter
.getAbstractPlanMapping().getMappedProg(pn.getID())[1];
if( pfpb.getExecMode() == PExecMode.REMOTE_MR_DP
&& partitionedMatrices.size()==1 ) //general applicable
{
String moVarname = partitionedMatrices.keySet().iterator().next();
PDataPartitionFormat moDpf = partitionedMatrices.get(moVarname);
Data dat = vars.get(moVarname);
if( dat !=null && dat instanceof MatrixObject
&& moDpf == PDataPartitionFormat.COLUMN_WISE
&& ((MatrixObject)dat).getSparsity()<= MatrixBlock.SPARSITY_TURN_POINT //check for sparse matrix
&& rIsTransposeSafePartition(pn, moVarname) ) //tranpose-safe
{
pfpb.setTransposeSparseColumnVector( true );
apply = true;
}
}
LOG.debug(getOptMode()+" OPT: rewrite 'set transpose sparse vector operations' - result="+apply );
}
/**
*
* @param n
* @param iterVarname
* @return
* @throws DMLRuntimeException
*/
protected boolean rIsTransposeSafePartition( OptNode n, String varName )
throws DMLRuntimeException
{
boolean ret = true;
if( !n.isLeaf() )
{
for( OptNode cn : n.getChilds() )
rIsTransposeSafePartition( cn, varName );
}
else if( n.getNodeType()== NodeType.HOP
&& n.getParam(ParamType.OPSTRING).equals(IndexingOp.OPSTRING)
&& n.getParam(ParamType.DATA_PARTITION_FORMAT) != null )
{
Hop h = OptTreeConverter.getAbstractPlanMapping().getMappedHop(n.getID());
String inMatrix = h.getInput().get(0).getName();
if( inMatrix.equals(varName) )
{
//check that all parents are transpose-safe operations
//(even a transient write would not be safe due to indirection into other DAGs)
ArrayList parent = h.getParent();
for( Hop p : parent )
ret &= p.isTransposeSafe();
}
}
return ret;
}
///////
//REWRITE set in-place result indexing
///
/**
*
* @param pn
* @param M
* @param vars
* @param inPlaceResultVars
* @throws DMLRuntimeException
*/
protected void rewriteSetInPlaceResultIndexing(OptNode pn, double M, LocalVariableMap vars, HashSet inPlaceResultVars)
throws DMLRuntimeException
{
//assertions (warnings of corrupt optimizer decisions)
if( pn.getNodeType() != NodeType.PARFOR )
LOG.warn(getOptMode()+" OPT: Set in-place result update is only applicable for a ParFor node.");
boolean apply = false;
ParForProgramBlock pfpb = (ParForProgramBlock) OptTreeConverter
.getAbstractPlanMapping().getMappedProg(pn.getID())[1];
//note currently we decide for all result vars jointly, i.e.,
//only if all fit pinned in remaining budget, we apply this rewrite.
ArrayList retVars = pfpb.getResultVariables();
//compute total sum of pinned result variable memory
double sum = computeTotalSizeResultVariables(retVars, vars, pfpb.getDegreeOfParallelism());
//NOTE: currently this rule is too conservative (the result variable is assumed to be dense and
//most importantly counted twice if this is part of the maximum operation)
double totalMem = Math.max((M+sum), rComputeSumMemoryIntermediates(pn, new HashSet()));
//optimization decision
if( rHasOnlyInPlaceSafeLeftIndexing(pn, retVars) ) //basic correctness constraint
{
//result update in-place for MR/Spark (w/ remote memory constraint)
if( ( pfpb.getExecMode() == PExecMode.REMOTE_MR_DP || pfpb.getExecMode() == PExecMode.REMOTE_MR
|| pfpb.getExecMode() == PExecMode.REMOTE_SPARK_DP || pfpb.getExecMode() == PExecMode.REMOTE_SPARK)
&& totalMem < _rm )
{
apply = true;
}
//result update in-place for CP (w/ local memory constraint)
else if( pfpb.getExecMode() == PExecMode.LOCAL
&& totalMem * pfpb.getDegreeOfParallelism() < _lm
&& pn.isCPOnly() ) //no forced mr/spark execution
{
apply = true;
}
}
//modify result variable meta data, if rewrite applied
if( apply )
{
//add result vars to result and set state
//will be serialized and transfered via symbol table
for( String var : retVars ){
Data dat = vars.get(var);
if( dat instanceof MatrixObject )
((MatrixObject)dat).enableUpdateInPlace(true);
}
inPlaceResultVars.addAll(retVars);
}
LOG.debug(getOptMode()+" OPT: rewrite 'set in-place result indexing' - result="+
apply+" ("+ProgramConverter.serializeStringCollection(inPlaceResultVars)+", M="+toMB(totalMem)+")" );
}
/**
*
* @param n
* @param retVars
* @return
* @throws DMLRuntimeException
*/
protected boolean rHasOnlyInPlaceSafeLeftIndexing( OptNode n, ArrayList retVars )
throws DMLRuntimeException
{
boolean ret = true;
if( !n.isLeaf() )
{
for( OptNode cn : n.getChilds() )
ret &= rHasOnlyInPlaceSafeLeftIndexing( cn, retVars );
}
else if( n.getNodeType()== NodeType.HOP
&& n.getParam(ParamType.OPSTRING).equals(LeftIndexingOp.OPSTRING) )
{
Hop h = OptTreeConverter.getAbstractPlanMapping().getMappedHop(n.getID());
if( retVars.contains( h.getInput().get(0).getName() ) )
{
ret &= (h.getParent().size()==1
&& h.getParent().get(0).getName().equals(h.getInput().get(0).getName()));
}
}
return ret;
}
/**
*
* @param retVars
* @param vars
* @return
*/
private double computeTotalSizeResultVariables(ArrayList retVars, LocalVariableMap vars, int k)
{
double sum = 1;
for( String var : retVars ){
Data dat = vars.get(var);
if( dat instanceof MatrixObject )
{
MatrixObject mo = (MatrixObject)dat;
double nnz = mo.getNnz();
if(nnz == 0.0)
sum += OptimizerUtils.estimateSizeExactSparsity(mo.getNumRows(), mo.getNumColumns(), 1.0);
else {
double sp = mo.getSparsity();
sum += (k+1) * (OptimizerUtils.estimateSizeExactSparsity(mo.getNumRows(), mo.getNumColumns(),
Math.min((1.0/k)+sp, 1.0))); // Every worker will consume memory for (MatrixSize/k + nnz) data.
// This is applicable only when there is non-zero nnz.
}
}
}
return sum;
}
///////
//REWRITE disable CP caching
///
/**
*
* @param pn
* @param inplaceResultVars
* @param vars
* @throws DMLRuntimeException
*/
protected void rewriteDisableCPCaching(OptNode pn, HashSet inplaceResultVars, LocalVariableMap vars)
throws DMLRuntimeException
{
//assertions (warnings of corrupt optimizer decisions)
if( pn.getNodeType() != NodeType.PARFOR )
LOG.warn(getOptMode()+" OPT: Disable caching is only applicable for a ParFor node.");
ParForProgramBlock pfpb = (ParForProgramBlock) OptTreeConverter
.getAbstractPlanMapping().getMappedProg(pn.getID())[1];
double M_sumInterm = rComputeSumMemoryIntermediates(pn, inplaceResultVars);
boolean apply = false;
if( (pfpb.getExecMode() == PExecMode.REMOTE_MR_DP || pfpb.getExecMode() == PExecMode.REMOTE_MR)
&& M_sumInterm < _rm ) //all intermediates and operations fit into memory budget
{
pfpb.setCPCaching(false); //default is true
apply = true;
}
LOG.debug(getOptMode()+" OPT: rewrite 'disable CP caching' - result="+apply+" (M="+toMB(M_sumInterm)+")" );
}
/**
*
* @param n
* @param inplaceResultVars
* @return
* @throws DMLRuntimeException
*/
protected double rComputeSumMemoryIntermediates( OptNode n, HashSet inplaceResultVars )
throws DMLRuntimeException
{
double sum = 0;
if( !n.isLeaf() )
{
for( OptNode cn : n.getChilds() )
sum += rComputeSumMemoryIntermediates( cn, inplaceResultVars );
}
else if( n.getNodeType()== NodeType.HOP )
{
Hop h = OptTreeConverter.getAbstractPlanMapping().getMappedHop(n.getID());
if( n.getParam(ParamType.OPSTRING).equals(IndexingOp.OPSTRING)
&& n.getParam(ParamType.DATA_PARTITION_FORMAT) != null )
{
//set during partitioning rewrite
sum += h.getMemEstimate();
}
else
{
//base intermediate (worst-case w/ materialized intermediates)
sum += h.getOutputMemEstimate()
+ h.getIntermediateMemEstimate();
//inputs not represented in the planopttree (worst-case no CSE)
if( h.getInput() != null )
for( Hop cn : h.getInput() )
if( cn instanceof DataOp && ((DataOp)cn).isRead() //read data
&& !inplaceResultVars.contains(cn.getName())) //except in-place result vars
{
sum += cn.getMemEstimate();
}
}
}
return sum;
}
///////
//REWRITE enable runtime piggybacking
///
/**
*
* @param n
* @param partitionedMatrices.keySet()
* @param vars
* @throws DMLRuntimeException
*/
protected void rewriteEnableRuntimePiggybacking( OptNode n, LocalVariableMap vars, HashMap partitionedMatrices )
throws DMLRuntimeException
{
ParForProgramBlock pfpb = (ParForProgramBlock) OptTreeConverter
.getAbstractPlanMapping().getMappedProg(n.getID())[1];
HashSet sharedVars = new HashSet();
boolean apply = false;
//enable runtime piggybacking if MR jobs on shared read-only data set
if( OptimizerUtils.ALLOW_RUNTIME_PIGGYBACKING )
{
//apply runtime piggybacking if hop in mr and shared input variable
//(any input variabled which is not partitioned and is read only and applies)
apply = rHasSharedMRInput(n, vars.keySet(), partitionedMatrices.keySet(), sharedVars)
&& n.getTotalK() > 1; //apply only if degree of parallelism > 1
}
if( apply )
pfpb.setRuntimePiggybacking(apply);
_numEvaluatedPlans++;
LOG.debug(getOptMode()+" OPT: rewrite 'enable runtime piggybacking' - result="+apply+
" ("+ProgramConverter.serializeStringCollection(sharedVars)+")" );
}
/**
*
* @param n
* @param inputVars
* @param partitionedVars
* @return
* @throws DMLRuntimeException
*/
protected boolean rHasSharedMRInput( OptNode n, Set inputVars, Set partitionedVars, HashSet sharedVars )
throws DMLRuntimeException
{
boolean ret = false;
if( !n.isLeaf() )
{
for( OptNode cn : n.getChilds() )
ret |= rHasSharedMRInput( cn, inputVars, partitionedVars, sharedVars );
}
else if( n.getNodeType()== NodeType.HOP && n.getExecType()==ExecType.MR )
{
Hop h = OptTreeConverter.getAbstractPlanMapping().getMappedHop(n.getID());
for( Hop ch : h.getInput() )
{
//note: we replaxed the contraint of non-partitioned inputs for additional
//latecy hiding and scan sharing of partitions which are read multiple times
if( ch instanceof DataOp && ch.getDataType() == DataType.MATRIX
&& inputVars.contains(ch.getName()) )
//&& !partitionedVars.contains(ch.getName()))
{
ret = true;
sharedVars.add(ch.getName());
}
else if( ch instanceof ReorgOp && ((ReorgOp)ch).getOp()==ReOrgOp.TRANSPOSE
&& ch.getInput().get(0) instanceof DataOp && ch.getInput().get(0).getDataType() == DataType.MATRIX
&& inputVars.contains(ch.getInput().get(0).getName()) )
//&& !partitionedVars.contains(ch.getInput().get(0).getName()))
{
ret = true;
sharedVars.add(ch.getInput().get(0).getName());
}
}
}
return ret;
}
///////
//REWRITE inject spark loop checkpointing
///
/**
*
* @param n
* @throws DMLRuntimeException
* @throws DMLUnsupportedOperationException
*/
protected void rewriteInjectSparkLoopCheckpointing(OptNode n)
throws DMLRuntimeException, DMLUnsupportedOperationException
{
//get program blocks of root parfor
Object[] progobj = OptTreeConverter.getAbstractPlanMapping().getMappedProg(n.getID());
ParForStatementBlock pfsb = (ParForStatementBlock)progobj[0];
ParForStatement fs = (ParForStatement) pfsb.getStatement(0);
ParForProgramBlock pfpb = (ParForProgramBlock)progobj[1];
boolean applied = false;
try
{
//apply hop rewrite inject spark checkpoints (but without context awareness)
RewriteInjectSparkLoopCheckpointing rewrite = new RewriteInjectSparkLoopCheckpointing(false);
ProgramRewriter rewriter = new ProgramRewriter(rewrite);
ProgramRewriteStatus state = new ProgramRewriteStatus();
rewriter.rewriteStatementBlockHopDAGs( pfsb, state );
fs.setBody(rewriter.rewriteStatementBlocks(fs.getBody(), state));
//recompile if additional checkpoints introduced
if( state.getInjectedCheckpoints() ) {
pfpb.setChildBlocks(ProgramRecompiler.generatePartitialRuntimeProgram(pfpb.getProgram(), fs.getBody()));
applied = true;
}
}
catch(Exception ex) {
throw new DMLRuntimeException(ex);
}
LOG.debug(getOptMode()+" OPT: rewrite 'inject spark loop checkpointing' - result="+applied );
}
///////
//REWRITE inject spark repartition for zipmm
///
/**
*
* @param n
* @throws DMLRuntimeException
* @throws DMLUnsupportedOperationException
*/
protected void rewriteInjectSparkRepartition(OptNode n, LocalVariableMap vars)
throws DMLRuntimeException, DMLUnsupportedOperationException
{
//get program blocks of root parfor
Object[] progobj = OptTreeConverter.getAbstractPlanMapping().getMappedProg(n.getID());
ParForStatementBlock pfsb = (ParForStatementBlock)progobj[0];
ParForProgramBlock pfpb = (ParForProgramBlock)progobj[1];
ArrayList ret = new ArrayList();
if( OptimizerUtils.isSparkExecutionMode() //spark exec mode
&& n.getExecType() == ExecType.CP //local parfor
&& _N > 1 ) //at least 2 iterations
{
//collect candidates from zipmm spark instructions
HashSet cand = new HashSet();
rCollectZipmmPartitioningCandidates(n, cand);
//prune updated candidates
HashSet probe = new HashSet(pfsb.getReadOnlyParentVars());
for( String var : cand )
if( probe.contains( var ) )
ret.add( var );
//prune small candidates
ArrayList tmp = new ArrayList(ret);
ret.clear();
for( String var : tmp )
if( vars.get(var) instanceof MatrixObject )
{
MatrixObject mo = (MatrixObject) vars.get(var);
double sp = OptimizerUtils.getSparsity(mo.getNumRows(), mo.getNumColumns(), mo.getNnz());
double size = OptimizerUtils.estimateSizeExactSparsity(mo.getNumRows(), mo.getNumColumns(), sp);
if( size > OptimizerUtils.getLocalMemBudget() )
ret.add(var);
}
//apply rewrite to parfor pb
if( !ret.isEmpty() ) {
pfpb.setSparkRepartitionVariables(ret);
}
}
_numEvaluatedPlans++;
LOG.debug(getOptMode()+" OPT: rewrite 'inject spark input repartition' - result="+ret.size()+
" ("+ProgramConverter.serializeStringCollection(ret)+")" );
}
/**
*
* @param n
* @param cand
*/
private void rCollectZipmmPartitioningCandidates( OptNode n, HashSet cand )
{
//collect zipmm inputs
if( n.getNodeType()==NodeType.HOP )
{
Hop h = OptTreeConverter.getAbstractPlanMapping().getMappedHop(n.getID());
if( h instanceof AggBinaryOp && (((AggBinaryOp)h).getMMultMethod()==MMultMethod.ZIPMM
||((AggBinaryOp)h).getMMultMethod()==MMultMethod.CPMM) )
{
//found zipmm or cpmm (unknowns) which might turn into zipmm
//check for dataop or dataops under transpose on both sides
for( Hop in : h.getInput() ) {
if( in instanceof DataOp )
cand.add( in.getName() );
else if( in instanceof ReorgOp
&& ((ReorgOp)in).getOp()==ReOrgOp.TRANSPOSE
&& in.getInput().get(0) instanceof DataOp )
cand.add( in.getInput().get(0).getName() );
}
}
}
//recursively process childs
if( !n.isLeaf() )
for( OptNode c : n.getChilds() )
rCollectZipmmPartitioningCandidates(c, cand);
}
///////
//REWRITE set spark eager rdd caching
///
/**
*
* @param n
* @throws DMLRuntimeException
* @throws DMLUnsupportedOperationException
*/
protected void rewriteSetSparkEagerRDDCaching(OptNode n, LocalVariableMap vars)
throws DMLRuntimeException, DMLUnsupportedOperationException
{
//get program blocks of root parfor
Object[] progobj = OptTreeConverter.getAbstractPlanMapping().getMappedProg(n.getID());
ParForStatementBlock pfsb = (ParForStatementBlock)progobj[0];
ParForProgramBlock pfpb = (ParForProgramBlock)progobj[1];
ArrayList ret = new ArrayList();
if( OptimizerUtils.isSparkExecutionMode() //spark exec mode
&& n.getExecType() == ExecType.CP //local parfor
&& _N > 1 ) //at least 2 iterations
{
Set cand = pfsb.variablesRead().getVariableNames();
Collection rpVars = pfpb.getSparkRepartitionVariables();
for( String var : cand)
{
Data dat = vars.get(var);
if( dat!=null && dat instanceof MatrixObject
&& ((MatrixObject)dat).getRDDHandle()!=null )
{
MatrixObject mo = (MatrixObject)dat;
MatrixCharacteristics mc = mo.getMatrixCharacteristics();
RDDObject rdd = mo.getRDDHandle();
if( (rpVars==null || !rpVars.contains(var)) //not a repartition var
&& rdd.rHasCheckpointRDDChilds() //is cached rdd
&& _lm / n.getK() < //is out-of-core dataset
OptimizerUtils.estimateSizeExactSparsity(mc))
{
ret.add(var);
}
}
}
//apply rewrite to parfor pb
if( !ret.isEmpty() ) {
pfpb.setSparkEagerCacheVariables(ret);
}
}
_numEvaluatedPlans++;
LOG.debug(getOptMode()+" OPT: rewrite 'set spark eager rdd caching' - result="+ret.size()+
" ("+ProgramConverter.serializeStringCollection(ret)+")" );
}
///////
//REWRITE remove compare matrix (for result merge, needs to be invoked before setting result merge)
///
/**
*
*
* @param n
* @throws DMLRuntimeException
*/
protected void rewriteRemoveUnnecessaryCompareMatrix( OptNode n, ExecutionContext ec )
throws DMLRuntimeException
{
ParForProgramBlock pfpb = (ParForProgramBlock) OptTreeConverter
.getAbstractPlanMapping().getMappedProg(n.getID())[1];
ArrayList cleanedVars = new ArrayList();
ArrayList resultVars = pfpb.getResultVariables();
String itervar = pfpb.getIterablePredicateVars()[0];
for( String rvar : resultVars ) {
Data dat = ec.getVariable(rvar);
if( dat instanceof MatrixObject && ((MatrixObject)dat).getNnz()!=0 //subject to result merge with compare
&& n.hasOnlySimpleChilds() //guaranteed no conditional indexing
&& rContainsResultFullReplace(n, rvar, itervar, (MatrixObject)dat) //guaranteed full matrix replace
//&& !pfsb.variablesRead().containsVariable(rvar) //never read variable in loop body
&& !rIsReadInRightIndexing(n, rvar) //never read variable in loop body
&& ((MatrixObject)dat).getNumRows()<=Integer.MAX_VALUE
&& ((MatrixObject)dat).getNumColumns()<=Integer.MAX_VALUE )
{
//replace existing matrix object with empty matrix
MatrixObject mo = (MatrixObject)dat;
ec.cleanupMatrixObject(mo);
ec.setMatrixOutput(rvar, new MatrixBlock((int)mo.getNumRows(), (int)mo.getNumColumns(),false));
//keep track of cleaned result variables
cleanedVars.add(rvar);
}
}
_numEvaluatedPlans++;
LOG.debug(getOptMode()+" OPT: rewrite 'remove unnecessary compare matrix' - result="+(!cleanedVars.isEmpty())+" ("+ProgramConverter.serializeStringCollection(cleanedVars)+")" );
}
/**
*
* @param n
* @param resultVar
* @param iterVarname
* @param mo
* @return
* @throws DMLRuntimeException
*/
protected boolean rContainsResultFullReplace( OptNode n, String resultVar, String iterVarname, MatrixObject mo )
throws DMLRuntimeException
{
boolean ret = false;
//process hop node
if( n.getNodeType()==NodeType.HOP )
ret |= isResultFullReplace(n, resultVar, iterVarname, mo);
//process childs recursively
if( !n.isLeaf() ) {
for( OptNode c : n.getChilds() )
ret |= rContainsResultFullReplace(c, resultVar, iterVarname, mo);
}
return ret;
}
/**
*
* @param n
* @param resultVar
* @param iterVarname
* @param mo
* @return
* @throws DMLRuntimeException
*/
protected boolean isResultFullReplace( OptNode n, String resultVar, String iterVarname, MatrixObject mo )
throws DMLRuntimeException
{
//check left indexing operator
String opStr = n.getParam(ParamType.OPSTRING);
if( opStr==null || !opStr.equals(LeftIndexingOp.OPSTRING) )
return false;
Hop h = OptTreeConverter.getAbstractPlanMapping().getMappedHop(n.getID());
Hop base = h.getInput().get(0);
//check result variable
if( !resultVar.equals(base.getName()) )
return false;
//check access pattern, memory budget
Hop inpRowL = h.getInput().get(2);
Hop inpRowU = h.getInput().get(3);
Hop inpColL = h.getInput().get(4);
Hop inpColU = h.getInput().get(5);
//check for rowwise overwrite
if( (inpRowL.getName().equals(iterVarname) && inpRowU.getName().equals(iterVarname))
&& inpColL instanceof LiteralOp && HopRewriteUtils.getDoubleValueSafe((LiteralOp)inpColL)==1
&& inpColU instanceof LiteralOp && HopRewriteUtils.getDoubleValueSafe((LiteralOp)inpColU)==mo.getNumColumns() )
{
return true;
}
//check for colwise overwrite
if( (inpColL.getName().equals(iterVarname) && inpColU.getName().equals(iterVarname))
&& inpRowL instanceof LiteralOp && HopRewriteUtils.getDoubleValueSafe((LiteralOp)inpRowL)==1
&& inpRowU instanceof LiteralOp && HopRewriteUtils.getDoubleValueSafe((LiteralOp)inpRowU)==mo.getNumRows() )
{
return true;
}
return false;
}
/**
*
* @param n
* @param var
* @return
*/
protected boolean rIsReadInRightIndexing(OptNode n, String var)
{
//NOTE: This method checks if a given variables is used in right indexing
//expressions. This is sufficient for "remove unnecessary compare matrix" because
//we already checked for full replace, which is only valid if we dont access
//the entire matrix in any other operation.
boolean ret = false;
if( n.getNodeType()==NodeType.HOP ) {
Hop h = OptTreeConverter.getAbstractPlanMapping().getMappedHop(n.getID());
if( h instanceof IndexingOp && h.getInput().get(0) instanceof DataOp
&& h.getInput().get(0).getName().equals(var) )
{
ret |= true;
}
}
//process childs recursively
if( !n.isLeaf() )
for( OptNode c : n.getChilds() )
ret |= rIsReadInRightIndexing(c, var);
return ret;
}
///////
//REWRITE set result merge
///
/**
*
*
* @param n
* @throws DMLRuntimeException
*/
protected void rewriteSetResultMerge( OptNode n, LocalVariableMap vars, boolean inLocal )
throws DMLRuntimeException
{
ParForProgramBlock pfpb = (ParForProgramBlock) OptTreeConverter
.getAbstractPlanMapping().getMappedProg(n.getID())[1];
PResultMerge REMOTE = OptimizerUtils.isSparkExecutionMode() ?
PResultMerge.REMOTE_SPARK : PResultMerge.REMOTE_MR;
PResultMerge ret = null;
//investigate details of current parfor node
boolean flagRemoteParFOR = (n.getExecType() == ExecType.MR || n.getExecType() == ExecType.SPARK);
boolean flagLargeResult = hasLargeTotalResults( n, pfpb.getResultVariables(), vars, true );
boolean flagRemoteLeftIndexing = hasResultMRLeftIndexing( n, pfpb.getResultVariables(), vars, true );
boolean flagCellFormatWoCompare = determineFlagCellFormatWoCompare(pfpb.getResultVariables(), vars);
boolean flagOnlyInMemResults = hasOnlyInMemoryResults(n, pfpb.getResultVariables(), vars, true );
//optimimality decision on result merge
//MR, if remote exec, and w/compare (prevent huge transfer/merge costs)
if( flagRemoteParFOR && flagLargeResult )
{
ret = REMOTE;
}
//CP, if all results in mem
else if( flagOnlyInMemResults )
{
ret = PResultMerge.LOCAL_MEM;
}
//MR, if result partitioning and copy not possible
//NOTE: 'at least one' instead of 'all' condition of flagMRLeftIndexing because the
// benefit for large matrices outweigths potentially unnecessary MR jobs for smaller matrices)
else if( ( flagRemoteParFOR || flagRemoteLeftIndexing)
&& !(flagCellFormatWoCompare && ResultMergeLocalFile.ALLOW_COPY_CELLFILES ) )
{
ret = REMOTE;
}
//CP, otherwise (decide later if in mem or file-based)
else
{
ret = PResultMerge.LOCAL_AUTOMATIC;
}
// modify rtprog
pfpb.setResultMerge(ret);
// modify plan
n.addParam(ParamType.RESULT_MERGE, ret.toString());
//recursively apply rewrite for parfor nodes
if( n.getChilds() != null )
rInvokeSetResultMerge(n.getChilds(), vars, inLocal && !flagRemoteParFOR);
_numEvaluatedPlans++;
LOG.debug(getOptMode()+" OPT: rewrite 'set result merge' - result="+ret );
}
/**
*
* @param resultVars
* @param vars
* @return
*/
protected boolean determineFlagCellFormatWoCompare( ArrayList resultVars, LocalVariableMap vars )
{
boolean ret = true;
for( String rVar : resultVars )
{
Data dat = vars.get(rVar);
if( dat == null || !(dat instanceof MatrixObject) )
{
ret = false;
break;
}
else
{
MatrixObject mo = (MatrixObject)dat;
MatrixFormatMetaData meta = (MatrixFormatMetaData) mo.getMetaData();
OutputInfo oi = meta.getOutputInfo();
long nnz = meta.getMatrixCharacteristics().getNonZeros();
if( oi == OutputInfo.BinaryBlockOutputInfo || nnz != 0 )
{
ret = false;
break;
}
}
}
return ret;
}
/**
*
* @param n
* @param resultVars
* @return
* @throws DMLRuntimeException
*/
protected boolean hasResultMRLeftIndexing( OptNode n, ArrayList resultVars, LocalVariableMap vars, boolean checkSize )
throws DMLRuntimeException
{
boolean ret = false;
if( n.isLeaf() )
{
String opName = n.getParam(ParamType.OPSTRING);
//check opstring and exec type
if( opName !=null && opName.equals(LeftIndexingOp.OPSTRING) &&
(n.getExecType() == ExecType.MR || n.getExecType() == ExecType.SPARK) )
{
LeftIndexingOp hop = (LeftIndexingOp) OptTreeConverter.getAbstractPlanMapping().getMappedHop(n.getID());
//check agains set of varname
String varName = hop.getInput().get(0).getName();
if( resultVars.contains(varName) )
{
ret = true;
if( checkSize && vars.keySet().contains(varName) )
{
//dims of result vars must be known at this point in time
MatrixObject mo = (MatrixObject) vars.get( hop.getInput().get(0).getName() );
long rows = mo.getNumRows();
long cols = mo.getNumColumns();
ret = !isInMemoryResultMerge(rows, cols, OptimizerUtils.getRemoteMemBudgetMap(false));
}
}
}
}
else
{
for( OptNode c : n.getChilds() )
ret |= hasResultMRLeftIndexing(c, resultVars, vars, checkSize);
}
return ret;
}
/**
* Heuristically compute total result sizes, if larger than local mem budget assumed to be large.
*
* @param n
* @param resultVars
* @param vars
* @param checkSize
* @return
* @throws DMLRuntimeException
*/
protected boolean hasLargeTotalResults( OptNode pn, ArrayList resultVars, LocalVariableMap vars, boolean checkSize )
throws DMLRuntimeException
{
double totalSize = 0;
//get num tasks according to task partitioning
PTaskPartitioner tp = PTaskPartitioner.valueOf(pn.getParam(ParamType.TASK_PARTITIONER));
int k = pn.getK();
long W = estimateNumTasks(tp, _N, k);
for( String var : resultVars )
{
//Potential unknowns: for local result var of child parfor (but we're only interested in top level)
//Potential scalars: for disabled dependency analysis and unbounded scoping
Data dat = vars.get( var );
if( dat != null && dat instanceof MatrixObject )
{
MatrixObject mo = (MatrixObject) vars.get( var );
long rows = mo.getNumRows();
long cols = mo.getNumColumns();
long nnz = mo.getNnz();
if( nnz > 0 ) //w/ compare
{
totalSize += W * OptimizerUtils.estimateSizeExactSparsity(rows, cols, 1.0);
}
else //in total at most as dimensions (due to disjoint results)
{
totalSize += OptimizerUtils.estimateSizeExactSparsity(rows, cols, 1.0);
}
}
}
return ( totalSize >= _lm ); //heuristic: large if >= local mem budget
}
/**
*
* @param tp
* @param N
* @param k
* @return
*/
protected long estimateNumTasks( PTaskPartitioner tp, long N, int k )
{
long W = -1;
switch( tp )
{
case NAIVE:
case FIXED: W = N; break;
case STATIC: W = N / k; break;
case FACTORING:
case FACTORING_CMIN:
case FACTORING_CMAX: W = k * (long)(Math.log(((double)N)/k)/Math.log(2.0)); break;
default: W = N; break; //N as worst case estimate
}
return W;
}
/**
*
* @param n
* @param resultVars
* @param vars
* @return
* @throws DMLRuntimeException
*/
protected boolean hasOnlyInMemoryResults( OptNode n, ArrayList resultVars, LocalVariableMap vars, boolean inLocal )
throws DMLRuntimeException
{
boolean ret = true;
if( n.isLeaf() )
{
String opName = n.getParam(ParamType.OPSTRING);
//check opstring and exec type
if( opName.equals(LeftIndexingOp.OPSTRING) )
{
LeftIndexingOp hop = (LeftIndexingOp) OptTreeConverter.getAbstractPlanMapping().getMappedHop(n.getID());
//check agains set of varname
String varName = hop.getInput().get(0).getName();
if( resultVars.contains(varName) && vars.keySet().contains(varName) )
{
//dims of result vars must be known at this point in time
MatrixObject mo = (MatrixObject) vars.get( hop.getInput().get(0).getName() );
long rows = mo.getNumRows();
long cols = mo.getNumColumns();
double memBudget = inLocal ? OptimizerUtils.getLocalMemBudget() :
OptimizerUtils.getRemoteMemBudgetMap();
ret &= isInMemoryResultMerge(rows, cols, memBudget);
}
}
}
else
{
for( OptNode c : n.getChilds() )
ret &= hasOnlyInMemoryResults(c, resultVars, vars, inLocal);
}
return ret;
}
/**
*
* @param nodes
* @param vars
* @throws DMLRuntimeException
*/
protected void rInvokeSetResultMerge( Collection nodes, LocalVariableMap vars, boolean inLocal)
throws DMLRuntimeException
{
for( OptNode n : nodes )
if( n.getNodeType() == NodeType.PARFOR )
{
rewriteSetResultMerge(n, vars, inLocal);
if( n.getExecType()==ExecType.MR || n.getExecType()==ExecType.SPARK )
inLocal = false;
}
else if( n.getChilds()!=null )
rInvokeSetResultMerge(n.getChilds(), vars, inLocal);
}
/**
*
* @param rows
* @param cols
* @return
*/
public static boolean isInMemoryResultMerge( long rows, long cols, double memBudget )
{
if( !ParForProgramBlock.USE_PARALLEL_RESULT_MERGE )
{
//1/4 mem budget because: 2xout (incl sparse-dense change), 1xin, 1xcompare
return ( rows>=0 && cols>=0 && MatrixBlock.estimateSizeInMemory(rows, cols, 1.0) < memBudget/4 );
}
else
return ( rows>=0 && cols>=0 && rows*cols < Math.pow(Hop.CPThreshold, 2) );
}
///////
//REWRITE set recompile memory budget
///
/**
*
* @param n
* @param M
*/
protected void rewriteSetRecompileMemoryBudget( OptNode n )
{
double newLocalMem = _lm;
//check et because recompilation only happens at the master node
if( n.getExecType() == ExecType.CP )
{
//compute local recompile memory budget
int par = n.getTotalK();
newLocalMem = _lm / par;
//modify runtime plan
ParForProgramBlock pfpb = (ParForProgramBlock) OptTreeConverter
.getAbstractPlanMapping().getMappedProg(n.getID())[1];
pfpb.setRecompileMemoryBudget( newLocalMem );
}
_numEvaluatedPlans++;
LOG.debug(getOptMode()+" OPT: rewrite 'set recompile memory budget' - result="+toMB(newLocalMem) );
}
///////
//REWRITE remove recursive parfor
///
/**
*
* @param n
* @throws DMLRuntimeException
* @throws DMLUnsupportedOperationException
*/
protected void rewriteRemoveRecursiveParFor(OptNode n, LocalVariableMap vars)
throws DMLRuntimeException, DMLUnsupportedOperationException
{
int count = 0; //num removed parfor
//find recursive parfor
HashSet recPBs = new HashSet();
rFindRecursiveParFor( n, recPBs, false );
if( !recPBs.isEmpty() )
{
//unfold if necessary
try
{
ParForProgramBlock pfpb = (ParForProgramBlock) OptTreeConverter
.getAbstractPlanMapping().getMappedProg(n.getID())[1];
if( recPBs.contains(pfpb) )
rFindAndUnfoldRecursiveFunction(n, pfpb, recPBs, vars);
}
catch(Exception ex)
{
throw new DMLRuntimeException(ex);
}
//remove recursive parfor (parfor to for)
count = removeRecursiveParFor(n, recPBs);
}
_numEvaluatedPlans++;
LOG.debug(getOptMode()+" OPT: rewrite 'remove recursive parfor' - result="+recPBs.size()+"/"+count );
}
/**
*
* @param n
* @param cand
* @param recContext
* @return
*/
protected void rFindRecursiveParFor( OptNode n, HashSet cand, boolean recContext )
{
//recursive invocation
if( !n.isLeaf() )
for( OptNode c : n.getChilds() )
{
if( c.getNodeType() == NodeType.FUNCCALL && c.isRecursive() )
rFindRecursiveParFor(c, cand, true);
else
rFindRecursiveParFor(c, cand, recContext);
}
//add candidate program blocks
if( recContext && n.getNodeType()==NodeType.PARFOR )
{
ParForProgramBlock pfpb = (ParForProgramBlock) OptTreeConverter
.getAbstractPlanMapping().getMappedProg(n.getID())[1];
cand.add(pfpb);
}
}
/**
*
* @param n
* @param parfor
* @param recPBs
* @throws DMLRuntimeException
* @throws DMLUnsupportedOperationException
* @throws HopsException
* @throws LanguageException
*/
protected void rFindAndUnfoldRecursiveFunction( OptNode n, ParForProgramBlock parfor, HashSet recPBs, LocalVariableMap vars )
throws DMLRuntimeException, DMLUnsupportedOperationException, HopsException, LanguageException
{
//unfold if found
if( n.getNodeType() == NodeType.FUNCCALL && n.isRecursive())
{
boolean exists = rContainsNode(n, parfor);
if( exists )
{
String fnameKey = n.getParam(ParamType.OPSTRING);
String[] names = fnameKey.split(Program.KEY_DELIM);
String fnamespace = names[0];
String fname = names[1];
String fnameNew = FUNCTION_UNFOLD_NAMEPREFIX + fname;
//unfold function
FunctionOp fop = (FunctionOp) OptTreeConverter.getAbstractPlanMapping().getMappedHop(n.getID());
Program prog = parfor.getProgram();
DMLProgram dmlprog = parfor.getStatementBlock().getDMLProg();
FunctionProgramBlock fpb = prog.getFunctionProgramBlock(fnamespace, fname);
FunctionProgramBlock copyfpb = ProgramConverter.createDeepCopyFunctionProgramBlock(fpb, new HashSet(), new HashSet());
prog.addFunctionProgramBlock(fnamespace, fnameNew, copyfpb);
dmlprog.addFunctionStatementBlock(fnamespace, fnameNew, (FunctionStatementBlock)copyfpb.getStatementBlock());
//replace function names in old subtree (link to new function)
rReplaceFunctionNames(n, fname, fnameNew);
//recreate sub opttree
String fnameNewKey = fnamespace + Program.KEY_DELIM + fnameNew;
OptNode nNew = new OptNode(NodeType.FUNCCALL);
OptTreeConverter.getAbstractPlanMapping().putHopMapping(fop, nNew);
nNew.setExecType(ExecType.CP);
nNew.addParam(ParamType.OPSTRING, fnameNewKey);
long parentID = OptTreeConverter.getAbstractPlanMapping().getMappedParentID(n.getID());
OptTreeConverter.getAbstractPlanMapping().getOptNode(parentID).exchangeChild(n, nNew);
HashSet memo = new HashSet();
memo.add(fnameKey); //required if functionop not shared (because not replaced yet)
memo.add(fnameNewKey); //requied if functionop shared (indirectly replaced)
for( int i=0; i()) );
recPBs.addAll( rGetAllParForPBs(nNew, new HashSet()) );
//replace function names in new subtree (recursive link to new function)
rReplaceFunctionNames(nNew, fname, fnameNew);
}
//else, we can return anyway because we will not find that parfor
return;
}
//recursive invocation (only for non-recursive functions)
if( !n.isLeaf() )
for( OptNode c : n.getChilds() )
rFindAndUnfoldRecursiveFunction(c, parfor, recPBs, vars);
}
/**
*
* @param n
* @param parfor
* @return
*/
protected boolean rContainsNode( OptNode n, ParForProgramBlock parfor )
{
boolean ret = false;
if( n.getNodeType() == NodeType.PARFOR )
{
ParForProgramBlock pfpb = (ParForProgramBlock) OptTreeConverter
.getAbstractPlanMapping().getMappedProg(n.getID())[1];
ret = (parfor == pfpb);
}
if( !ret && !n.isLeaf() )
for( OptNode c : n.getChilds() ) {
ret |= rContainsNode(c, parfor);
if( ret ) break; //early abort
}
return ret;
}
/**
*
* @param n
* @param pbs
* @return
*/
protected HashSet rGetAllParForPBs( OptNode n, HashSet pbs )
{
//collect parfor
if( n.getNodeType()==NodeType.PARFOR )
{
ParForProgramBlock pfpb = (ParForProgramBlock) OptTreeConverter
.getAbstractPlanMapping().getMappedProg(n.getID())[1];
pbs.add(pfpb);
}
//recursive invocation
if( !n.isLeaf() )
for( OptNode c : n.getChilds() )
rGetAllParForPBs(c, pbs);
return pbs;
}
/**
*
* @param n
* @param oldName
* @param newName
* @throws DMLRuntimeException
* @throws DMLUnsupportedOperationException
* @throws HopsException
*/
protected void rReplaceFunctionNames( OptNode n, String oldName, String newName )
throws DMLRuntimeException, DMLUnsupportedOperationException, HopsException
{
if( n.getNodeType() == NodeType.FUNCCALL)
{
FunctionOp fop = (FunctionOp) OptTreeConverter.getAbstractPlanMapping().getMappedHop(n.getID());
String[] names = n.getParam(ParamType.OPSTRING).split(Program.KEY_DELIM);
String fnamespace = names[0];
String fname = names[1];
if( fname.equals(oldName) || fname.equals(newName) ) //newName if shared hop
{
//set opttree function name
n.addParam(ParamType.OPSTRING, DMLProgram.constructFunctionKey(fnamespace,newName));
//set instruction function name
long parentID = OptTreeConverter.getAbstractPlanMapping().getMappedParentID(n.getID());
ProgramBlock pb = (ProgramBlock) OptTreeConverter.getAbstractPlanMapping().getMappedProg(parentID)[1];
ArrayList instArr = pb.getInstructions();
for( int i=0; i recPBs )
throws DMLUnsupportedOperationException, DMLRuntimeException
{
int count = 0;
if( !n.isLeaf() )
{
for( OptNode sub : n.getChilds() )
{
if( sub.getNodeType() == NodeType.PARFOR )
{
long id = sub.getID();
Object[] progobj = OptTreeConverter.getAbstractPlanMapping().getMappedProg(id);
ParForStatementBlock pfsb = (ParForStatementBlock)progobj[0];
ParForProgramBlock pfpb = (ParForProgramBlock)progobj[1];
if( recPBs.contains(pfpb) )
{
//create for pb as replacement
Program prog = pfpb.getProgram();
ForProgramBlock fpb = ProgramConverter.createShallowCopyForProgramBlock(pfpb, prog);
//replace parfor with for, and update objectmapping
OptTreeConverter.replaceProgramBlock(n, sub, pfpb, fpb, false);
//update link to statement block
fpb.setStatementBlock(pfsb);
//update node
sub.setNodeType(NodeType.FOR);
sub.setK(1);
count++;
}
}
count += removeRecursiveParFor(sub, recPBs);
}
}
return count;
}
///////
//REWRITE remove unnecessary parfor
///
/**
*
* @param n
* @throws DMLRuntimeException
* @throws DMLUnsupportedOperationException
*/
protected void rewriteRemoveUnnecessaryParFor(OptNode n)
throws DMLRuntimeException, DMLUnsupportedOperationException
{
int count = removeUnnecessaryParFor( n );
_numEvaluatedPlans++;
LOG.debug(getOptMode()+" OPT: rewrite 'remove unnecessary parfor' - result="+count );
}
/**
*
* @param n
* @return
* @throws DMLUnsupportedOperationException
* @throws DMLRuntimeException
*/
protected int removeUnnecessaryParFor( OptNode n )
throws DMLUnsupportedOperationException, DMLRuntimeException
{
int count = 0;
if( !n.isLeaf() )
{
for( OptNode sub : n.getChilds() )
{
if( sub.getNodeType() == NodeType.PARFOR && sub.getK() == 1 )
{
long id = sub.getID();
Object[] progobj = OptTreeConverter.getAbstractPlanMapping().getMappedProg(id);
ParForStatementBlock pfsb = (ParForStatementBlock)progobj[0];
ParForProgramBlock pfpb = (ParForProgramBlock)progobj[1];
//create for pb as replacement
Program prog = pfpb.getProgram();
ForProgramBlock fpb = ProgramConverter.createShallowCopyForProgramBlock(pfpb, prog);
//replace parfor with for, and update objectmapping
OptTreeConverter.replaceProgramBlock(n, sub, pfpb, fpb, false);
//update link to statement block
fpb.setStatementBlock(pfsb);
//update node
sub.setNodeType(NodeType.FOR);
sub.setK(1);
count++;
}
count += removeUnnecessaryParFor(sub);
}
}
return count;
}
////////////////////////
// Helper methods //
////////////////////////
public static String toMB( double inB )
{
return OptimizerUtils.toMB(inB) + "MB";
}
}