org.apache.sysml.runtime.controlprogram.parfor.opt.PerfTestTool Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of systemml Show documentation
Show all versions of systemml Show documentation
Declarative Machine Learning
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.sysml.runtime.controlprogram.parfor.opt;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.Random;
import java.util.StringTokenizer;
import java.util.Map.Entry;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLOutputFactory;
import javax.xml.stream.XMLStreamConstants;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamReader;
import javax.xml.stream.XMLStreamWriter;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import au.com.bytecode.opencsv.CSVReader;
import au.com.bytecode.opencsv.CSVWriter;
import org.apache.sysml.api.DMLException;
import org.apache.sysml.api.DMLScript;
import org.apache.sysml.lops.Lop;
import org.apache.sysml.lops.MMTSJ.MMTSJType;
import org.apache.sysml.parser.DMLProgram;
import org.apache.sysml.parser.DMLTranslator;
import org.apache.sysml.parser.DataIdentifier;
import org.apache.sysml.parser.ExternalFunctionStatement;
import org.apache.sysml.parser.ParseException;
import org.apache.sysml.parser.Expression.DataType;
import org.apache.sysml.parser.Expression.ValueType;
import org.apache.sysml.runtime.DMLRuntimeException;
import org.apache.sysml.runtime.DMLUnsupportedOperationException;
import org.apache.sysml.runtime.controlprogram.ExternalFunctionProgramBlockCP;
import org.apache.sysml.runtime.controlprogram.LocalVariableMap;
import org.apache.sysml.runtime.controlprogram.Program;
import org.apache.sysml.runtime.controlprogram.ProgramBlock;
import org.apache.sysml.runtime.controlprogram.caching.CacheException;
import org.apache.sysml.runtime.controlprogram.caching.LazyWriteBuffer;
import org.apache.sysml.runtime.controlprogram.caching.MatrixObject;
import org.apache.sysml.runtime.controlprogram.context.ExecutionContext;
import org.apache.sysml.runtime.controlprogram.context.ExecutionContextFactory;
import org.apache.sysml.runtime.controlprogram.parfor.stat.Timing;
import org.apache.sysml.runtime.controlprogram.parfor.util.IDHandler;
import org.apache.sysml.runtime.controlprogram.parfor.util.IDSequence;
import org.apache.sysml.runtime.instructions.CPInstructionParser;
import org.apache.sysml.runtime.instructions.Instruction;
import org.apache.sysml.runtime.instructions.MRJobInstruction;
import org.apache.sysml.runtime.instructions.cp.Data;
import org.apache.sysml.runtime.instructions.cp.DataGenCPInstruction;
import org.apache.sysml.runtime.instructions.cp.FunctionCallCPInstruction;
import org.apache.sysml.runtime.io.IOUtilFunctions;
import org.apache.sysml.runtime.matrix.MatrixCharacteristics;
import org.apache.sysml.runtime.matrix.MatrixFormatMetaData;
import org.apache.sysml.runtime.matrix.data.InputInfo;
import org.apache.sysml.runtime.matrix.data.MatrixBlock;
import org.apache.sysml.runtime.matrix.data.OutputInfo;
import org.apache.sysml.runtime.util.LocalFileUtils;
import org.apache.sysml.runtime.util.MapReduceTool;
/**
* DML Instructions Performance Test Tool:
*
* Creates an offline performance profile (required once per installation) of DML instructions.
* The profile is a combination of all individual statistical models trained per combination of
* instruction and test configuration. In order to train those models, we execute and measure
* real executions of DML instructions on random input data. Finally, during runtime, the profile
* is used by the costs estimator in order to create statistic estimates for cost-based optimization.
*
*
*/
public class PerfTestTool
{
//public parameters (used for estimation)
public static final long MIN_DATASIZE = 1000;
public static final long MAX_DATASIZE = 1000000;
public static final long DEFAULT_DATASIZE = 500000;//(MAX_DATASIZE-MIN_DATASIZE)/2;
public static final long DATASIZE_MR_SCALE = 20;
public static final double MIN_DIMSIZE = 1;
public static final double MAX_DIMSIZE = 1000;
public static final double MIN_SPARSITY = 0.1;
public static final double MAX_SPARSITY = 1.0;
public static final double DEFAULT_SPARSITY = 0.5;//(MAX_SPARSITY-MIN_SPARSITY)/2;
public static final double MIN_SORT_IO_MEM = 10;
public static final double MAX_SORT_IO_MEM = 500;
public static final double DEFAULT_SORT_IO_MEM = 256; //BI: default 256MB, hadoop: default 100MB
//internal parameters
private static final boolean READ_STATS_ON_STARTUP = false;
private static final int TEST_REPETITIONS = 10;
private static final int NUM_SAMPLES_PER_TEST = 11;
private static final int MODEL_MAX_ORDER = 2;
private static final boolean MODEL_INTERCEPT = true;
private static final String PERF_TOOL_DIR = "./conf/PerfTestTool/";
// private static final String PERF_RESULTS_FNAME = PERF_TOOL_DIR + "%id%.dat";
private static final String PERF_PROFILE_FNAME = PERF_TOOL_DIR + "performance_profile.xml";
private static final String DML_SCRIPT_FNAME = "./src/org/apache/sysml/runtime/controlprogram/parfor/opt/PerfTestToolRegression.dml";
private static final String DML_TMP_FNAME = PERF_TOOL_DIR + "temp.dml";
//XML profile tags and attributes
private static final String XML_PROFILE = "profile";
private static final String XML_DATE = "date";
private static final String XML_INSTRUCTION = "instruction";
private static final String XML_ID = "id";
private static final String XML_NAME = "name";
private static final String XML_COSTFUNCTION = "cost_function";
private static final String XML_MEASURE = "measure";
private static final String XML_VARIABLE = "lvariable";
private static final String XML_INTERNAL_VARIABLES = "pvariables";
private static final String XML_DATAFORMAT = "dataformat";
private static final String XML_ELEMENT_DELIMITER = "\u002c"; //",";
//ID sequences for instructions and test definitions
private static IDSequence _seqInst = null;
private static IDSequence _seqTestDef = null;
//registered instructions and test definitions
private static HashMap _regTestDef = null;
private static HashMap _regInst = null;
private static HashMap _regInst_IDNames = null;
private static HashMap _regInst_NamesID = null;
private static HashMap _regInst_IDTestDef = null;
private static HashMap _regInst_IDVectors = null;
private static HashMap _regInst_IDIOSchema = null;
protected static final Log LOG = LogFactory.getLog(PerfTestTool.class.getName());
private static Integer[] _defaultConf = null;
// private static Integer[] _MRConf = null;
//raw measurement data (instID, physical defID, results)
private static HashMap>> _results = null;
//profile data
private static boolean _flagReadData = false;
private static HashMap> _profile = null;
public enum TestMeasure //logical test measure
{
EXEC_TIME,
MEMORY_USAGE
}
public enum TestVariable //logical test variable
{
DATA_SIZE,
SPARSITY,
PARALLELISM,
//some mr specific conf properites
SORT_IO_MEM
}
public enum InternalTestVariable //physical test variable
{
DATA_SIZE,
DIM1_SIZE,
DIM2_SIZE,
DIM3_SIZE,
SPARSITY,
NUM_THREADS,
NUM_MAPPERS,
NUM_REDUCERS,
SORT_IO_MEM
}
public enum IOSchema
{
NONE_NONE,
NONE_UNARY,
UNARY_NONE,
UNARY_UNARY,
BINARY_NONE,
BINARY_UNARY
}
public enum DataFormat //logical data format
{
DENSE,
SPARSE
}
public enum TestConstants //logical test constants
{
DFS_READ_THROUGHPUT,
DFS_WRITE_THROUGHPUT,
LFS_READ_THROUGHPUT,
LFS_WRITE_THROUGHPUT
}
static
{
//init repository
_seqInst = new IDSequence();
_seqTestDef = new IDSequence();
_regTestDef = new HashMap();
_regInst = new HashMap();
_regInst_IDNames = new HashMap();
_regInst_NamesID = new HashMap();
_regInst_IDTestDef = new HashMap();
_regInst_IDVectors = new HashMap();
_regInst_IDIOSchema = new HashMap();
_results = new HashMap>>();
_profile = new HashMap>();
_flagReadData = false;
//load existing profile if required
try
{
if( READ_STATS_ON_STARTUP )
readProfile( PERF_PROFILE_FNAME );
}
catch(Exception ex)
{
throw new RuntimeException(ex);
}
}
/**
*
* @throws DMLRuntimeException
*/
public static void lazyInit()
throws DMLRuntimeException
{
//read profile for first access
if( !_flagReadData )
{
try
{
//register all testdefs and instructions
registerTestConfigurations();
registerInstructions();
//read profile
readProfile( PERF_PROFILE_FNAME );
}
catch(Exception ex)
{
throw new DMLRuntimeException(ex);
}
}
if( _profile == null )
throw new DMLRuntimeException("Performance test results have not been loaded completely.");
}
/**
*
* @param opStr
* @return
* @throws DMLRuntimeException
*/
public static boolean isRegisteredInstruction(String opStr)
throws DMLRuntimeException
{
//init if required
lazyInit();
//determine if inst registered
return _regInst_NamesID.containsKey(opStr);
}
/**
*
* @param instName
* @return
* @throws DMLRuntimeException
*/
public static CostFunction getCostFunction( String instName, TestMeasure measure, TestVariable variable, DataFormat dataformat )
throws DMLRuntimeException
{
//init if required
lazyInit();
CostFunction tmp = null;
int instID = getInstructionID( instName );
if( instID != -1 ) //existing profile
{
int tdefID = getMappedTestDefID(instID, measure, variable, dataformat);
tmp = _profile.get(instID).get(tdefID);
}
return tmp;
}
/**
*
* @param measure
* @param variable
* @param dataformat
* @return
*/
public CostFunction getInvariantCostFunction( TestMeasure measure, TestVariable[] variable, DataFormat dataformat )
{
//TODO: implement for additional rewrites
throw new RuntimeException("Not implemented yet.");
}
/**
*
* @return
*/
@SuppressWarnings("all")
public static boolean runTest()
{
boolean ret = false;
try
{
Timing time = new Timing();
time.start();
//init caching
LazyWriteBuffer.init();
//register all testdefs and instructions
registerTestConfigurations();
registerInstructions();
//execute tests for all confs and all instructions
executeTest();
//compute regression models
int rows = NUM_SAMPLES_PER_TEST;
int cols = MODEL_MAX_ORDER + (MODEL_INTERCEPT ? 1 : 0);
HashMap tmp = writeResults( PERF_TOOL_DIR );
computeRegressionModels( DML_SCRIPT_FNAME, DML_TMP_FNAME, PERF_TOOL_DIR, tmp.size(), rows, cols);
readRegressionModels( PERF_TOOL_DIR, tmp);
//execConstantRuntimeTest();
//execConstantMemoryTest();
//write final profile to XML file
writeProfile(PERF_TOOL_DIR, PERF_PROFILE_FNAME);
System.out.format("SystemML PERFORMANCE TEST TOOL: finished profiling (in %.2f min), profile written to "+PERF_PROFILE_FNAME+"%n", time.stop()/60000);
ret = true;
}
catch(Exception ex)
{
LOG.error("Failed to run performance test.", ex);
}
return ret;
}
/**
*
*/
private static void registerTestConfigurations()
{
//reset ID Sequence for consistent IDs
_seqTestDef.reset();
//register default testdefs //TODO
TestMeasure[] M = new TestMeasure[]{ TestMeasure.EXEC_TIME/*, TestMeasure.MEMORY_USAGE*/ };
DataFormat[] D = new DataFormat[]{DataFormat.DENSE/*,DataFormat.SPARSE*/};
Integer[] defaultConf = new Integer[M.length*D.length*2];
int i=0;
for( TestMeasure m : M ) //for all measures
for( DataFormat d : D ) //for all data formats
{
defaultConf[i++] = registerTestDef( new PerfTestDef(m, TestVariable.DATA_SIZE, d, InternalTestVariable.DATA_SIZE,
MIN_DATASIZE, MAX_DATASIZE, NUM_SAMPLES_PER_TEST ) );
defaultConf[i++] = registerTestDef( new PerfTestDef(m, TestVariable.SPARSITY, d, InternalTestVariable.SPARSITY,
MIN_SPARSITY, MAX_SPARSITY, NUM_SAMPLES_PER_TEST ) );
}
//register advanced (multi-dim) test defs
//FIXME enable
/*for( TestMeasure m : M ) //for all measures
for( DataFormat d : D ) //for all data formats
{
registerTestDef( new PerfTestDef( m, TestVariable.DATA_SIZE, d,
new InternalTestVariable[]{InternalTestVariable.DIM1_SIZE,InternalTestVariable.DIM2_SIZE,InternalTestVariable.DIM3_SIZE},
MIN_DIMSIZE, MAX_DIMSIZE, NUM_SAMPLES_PER_TEST ) );
}?*
//register MR specific instructions FIXME: just for test
/*Integer[] mrConf = new Integer[D.length];
i = 0;
for( DataFormat d : D )
{
mrConf[i++] = registerTestDef( new PerfTestDef(TestMeasure.EXEC_TIME, TestVariable.SORT_IO_MEM, d,
InternalTestVariable.SORT_IO_MEM,
MIN_SORT_IO_MEM, MAX_SORT_IO_MEM, NUM_SAMPLES_PER_TEST ) );
}*/
//set default testdefs
_defaultConf = defaultConf;
//_MRConf = mrConf;
}
/**
*
* @throws DMLUnsupportedOperationException
* @throws DMLRuntimeException
*/
private static void registerInstructions()
throws DMLUnsupportedOperationException, DMLRuntimeException
{
//reset ID sequences for consistent IDs
_seqInst.reset();
///////
// CP instructions
//matrix multiply mmtsj
registerInstruction( "CP"+Lop.OPERAND_DELIMITOR+"tsmm", CPInstructionParser.parseSingleInstruction("CP"+Lop.OPERAND_DELIMITOR+"tsmm"+Lop.OPERAND_DELIMITOR+"A"+Lop.DATATYPE_PREFIX+"MATRIX"+Lop.VALUETYPE_PREFIX+"DOUBLE"+Lop.OPERAND_DELIMITOR+"C"+Lop.DATATYPE_PREFIX+"MATRIX"+Lop.VALUETYPE_PREFIX+"DOUBLE"+Lop.OPERAND_DELIMITOR+MMTSJType.LEFT),
getDefaultTestDefs(), false, IOSchema.UNARY_UNARY );
/*
//matrix multiply
registerInstruction( "CP"+Lops.OPERAND_DELIMITOR+"ba+*", CPInstructionParser.parseSingleInstruction("CP"+Lops.OPERAND_DELIMITOR+"ba+*"+Lops.OPERAND_DELIMITOR+"A"+Lops.DATATYPE_PREFIX+"MATRIX"+Lops.VALUETYPE_PREFIX+"DOUBLE"+Lops.OPERAND_DELIMITOR+"B"+Lops.DATATYPE_PREFIX+"MATRIX"+Lops.VALUETYPE_PREFIX+"DOUBLE"+Lops.OPERAND_DELIMITOR+"C"+Lops.DATATYPE_PREFIX+"MATRIX"+Lops.VALUETYPE_PREFIX+"DOUBLE"),
getDefaultTestDefs(), false, IOSchema.BINARY_UNARY );
////registerInstruction( "CP"+Lops.OPERAND_DELIMITOR+"ba+*", CPInstructionParser.parseSingleInstruction("CP"+Lops.OPERAND_DELIMITOR+"ba+*"+Lops.OPERAND_DELIMITOR+"A"+Lops.DATATYPE_PREFIX+"MATRIX"+Lops.VALUETYPE_PREFIX+"DOUBLE"+Lops.OPERAND_DELIMITOR+"B"+Lops.DATATYPE_PREFIX+"MATRIX"+Lops.VALUETYPE_PREFIX+"DOUBLE"+Lops.OPERAND_DELIMITOR+"C"+Lops.DATATYPE_PREFIX+"MATRIX"+Lops.VALUETYPE_PREFIX+"DOUBLE"),
//// changeToMuliDimTestDefs(TestVariable.DATA_SIZE, getDefaultTestDefs()) );
//rand
registerInstruction( "CP"+Lops.OPERAND_DELIMITOR+"Rand", CPInstructionParser.parseSingleInstruction("CP"+Lops.OPERAND_DELIMITOR+"Rand"+Lops.OPERAND_DELIMITOR+"rows=1"+Lops.OPERAND_DELIMITOR+"cols=1"+Lops.OPERAND_DELIMITOR+"rowsInBlock=1000"+Lops.OPERAND_DELIMITOR+"colsInBlock=1000"+Lops.OPERAND_DELIMITOR+"min=1.0"+Lops.OPERAND_DELIMITOR+"max=100.0"+Lops.OPERAND_DELIMITOR+"sparsity=1.0"+Lops.OPERAND_DELIMITOR+"seed=7"+Lops.OPERAND_DELIMITOR+"pdf=uniform"+Lops.OPERAND_DELIMITOR+"dir=."+Lops.OPERAND_DELIMITOR+"C"+Lops.DATATYPE_PREFIX+"MATRIX"+Lops.VALUETYPE_PREFIX+"DOUBLE"),
getDefaultTestDefs(), false, IOSchema.NONE_UNARY );
//matrix transpose
registerInstruction( "CP"+Lops.OPERAND_DELIMITOR+"r'", CPInstructionParser.parseSingleInstruction("CP"+Lops.OPERAND_DELIMITOR+"r'"+Lops.OPERAND_DELIMITOR+"A"+Lops.DATATYPE_PREFIX+"MATRIX"+Lops.VALUETYPE_PREFIX+"DOUBLE"+Lops.OPERAND_DELIMITOR+"C"+Lops.DATATYPE_PREFIX+"MATRIX"+Lops.VALUETYPE_PREFIX+"DOUBLE"),
getDefaultTestDefs(), false, IOSchema.UNARY_UNARY );
//sum
registerInstruction( "CP"+Lops.OPERAND_DELIMITOR+"uak+", CPInstructionParser.parseSingleInstruction("CP"+Lops.OPERAND_DELIMITOR+"uak+"+Lops.OPERAND_DELIMITOR+"A"+Lops.DATATYPE_PREFIX+"MATRIX"+Lops.VALUETYPE_PREFIX+"DOUBLE"+Lops.OPERAND_DELIMITOR+"B"+Lops.DATATYPE_PREFIX+"MATRIX"+Lops.VALUETYPE_PREFIX+"DOUBLE"), //needs B instead of C
getDefaultTestDefs(), false, IOSchema.UNARY_UNARY );
//external function
registerInstruction( "CP"+Lops.OPERAND_DELIMITOR+"extfunct", CPInstructionParser.parseSingleInstruction("CP"+Lops.OPERAND_DELIMITOR+"extfunct"+Lops.OPERAND_DELIMITOR+DMLProgram.DEFAULT_NAMESPACE+""+Lops.OPERAND_DELIMITOR+"execPerfTestExtFunct"+Lops.OPERAND_DELIMITOR+"1"+Lops.OPERAND_DELIMITOR+"1"+Lops.OPERAND_DELIMITOR+"A"+Lops.OPERAND_DELIMITOR+"C"),
getDefaultTestDefs(), false, IOSchema.UNARY_UNARY );
//central moment
registerInstruction( "CP"+Lops.OPERAND_DELIMITOR+"cm", CPInstructionParser.parseSingleInstruction("CP"+Lops.OPERAND_DELIMITOR+"cm"+Lops.OPERAND_DELIMITOR+"A"+Lops.DATATYPE_PREFIX+"MATRIX"+Lops.VALUETYPE_PREFIX+"DOUBLE"+Lops.OPERAND_DELIMITOR+"2"+Lops.DATATYPE_PREFIX+"SCALAR"+Lops.VALUETYPE_PREFIX+"INT"+Lops.OPERAND_DELIMITOR+"c"+Lops.DATATYPE_PREFIX+"SCALAR"+Lops.VALUETYPE_PREFIX+"DOUBLE"),
getDefaultTestDefs(), true, IOSchema.UNARY_NONE );
//co-variance
registerInstruction( "CP"+Lops.OPERAND_DELIMITOR+"cov", CPInstructionParser.parseSingleInstruction("CP"+Lops.OPERAND_DELIMITOR+"cov"+Lops.OPERAND_DELIMITOR+"A"+Lops.DATATYPE_PREFIX+"MATRIX"+Lops.VALUETYPE_PREFIX+"DOUBLE"+Lops.OPERAND_DELIMITOR+"B"+Lops.DATATYPE_PREFIX+"MATRIX"+Lops.VALUETYPE_PREFIX+"DOUBLE"+Lops.OPERAND_DELIMITOR+"c"+Lops.DATATYPE_PREFIX+"SCALAR"+Lops.VALUETYPE_PREFIX+"DOUBLE"),
getDefaultTestDefs(), true, IOSchema.BINARY_NONE );
*/
/*
///////
// MR instructions
registerInstruction( "jobtypeMMRJ", createMRJobInstruction(JobType.MMRJ,
MRInstructionParser.parseSingleInstruction("MR"+Lops.OPERAND_DELIMITOR+
"rmm"+Lops.OPERAND_DELIMITOR+
"0"+Lops.DATATYPE_PREFIX+"MATRIX"+Lops.VALUETYPE_PREFIX+"DOUBLE"+Lops.OPERAND_DELIMITOR+
"1"+Lops.DATATYPE_PREFIX+"MATRIX"+Lops.VALUETYPE_PREFIX+"DOUBLE"+Lops.OPERAND_DELIMITOR+
"2"+Lops.DATATYPE_PREFIX+"MATRIX"+Lops.VALUETYPE_PREFIX+"DOUBLE ")),
_MRConf, false, IOSchema.BINARY_UNARY );
*/
/*ADD ADDITIONAL INSTRUCTIONS HERE*/
//extend list to all (expensive) instructions; maybe also: createvar, assignvar, cpvar, rm, mv, setfilename, rmfilevar
}
/*
private static Instruction createMRJobInstruction(JobType type, MRInstruction inst)
{
MRJobInstruction mrinst = new MRJobInstruction(type);
if( type == JobType.MMRJ )
{
ArrayList inLab = new ArrayList();
ArrayList outLab = new ArrayList();
inLab.add("A");
inLab.add("B");
outLab.add("C");
mrinst.setMMRJInstructions(new String[]{"A","B"},
"",
inst.toString(),
"",
"",
new String[]{"C"},
new byte[]{2},
10, 1 );
}
return mrinst;
}
*/
/**
*
* @param def
* @return
*/
private static int registerTestDef( PerfTestDef def )
{
int ID = (int)_seqTestDef.getNextID();
_regTestDef.put( ID, def );
return ID;
}
/**
*
* @param iname
* @param inst
* @param testDefIDs
* @param vectors
* @param schema
*/
private static void registerInstruction( String iname, Instruction inst, Integer[] testDefIDs, boolean vectors, IOSchema schema )
{
int ID = (int)_seqInst.getNextID();
registerInstruction(ID, iname, inst, testDefIDs, vectors, schema);
}
/**
*
* @param ID
* @param iname
* @param inst
* @param testDefIDs
* @param vector
* @param schema
*/
private static void registerInstruction( int ID, String iname, Instruction inst, Integer[] testDefIDs, boolean vector, IOSchema schema )
{
_regInst.put( ID, inst );
_regInst_IDNames.put( ID, iname );
_regInst_NamesID.put( iname, ID );
_regInst_IDTestDef.put( ID, testDefIDs );
_regInst_IDVectors.put( ID, vector );
_regInst_IDIOSchema.put( ID, schema );
}
/**
*
* @param instID
* @param measure
* @param variable
* @param dataformat
* @return
*/
private static int getMappedTestDefID( int instID, TestMeasure measure, TestVariable variable, DataFormat dataformat )
{
int ret = -1;
for( Integer defID : _regInst_IDTestDef.get(instID) )
{
PerfTestDef def = _regTestDef.get(defID);
if( def.getMeasure()==measure
&& def.getVariable()==variable
&& def.getDataformat()==dataformat )
{
ret = defID;
break;
}
}
return ret;
}
/**
*
* @param measure
* @param lvariable
* @param dataformat
* @param pvariable
* @return
*/
@SuppressWarnings("unused")
private static int getTestDefID( TestMeasure measure, TestVariable lvariable, DataFormat dataformat, InternalTestVariable pvariable )
{
return getTestDefID(measure, lvariable, dataformat, new InternalTestVariable[]{pvariable});
}
/**
*
* @param measure
* @param lvariable
* @param dataformat
* @param pvariables
* @return
*/
private static int getTestDefID( TestMeasure measure, TestVariable lvariable, DataFormat dataformat, InternalTestVariable[] pvariables )
{
int ret = -1;
for( Entry e : _regTestDef.entrySet() )
{
PerfTestDef def = e.getValue();
TestMeasure tmp1 = def.getMeasure();
TestVariable tmp2 = def.getVariable();
DataFormat tmp3 = def.getDataformat();
InternalTestVariable[] tmp4 = def.getInternalVariables();
if( tmp1==measure && tmp2==lvariable && tmp3==dataformat )
{
boolean flag = true;
for( int i=0; i inst : _regInst.entrySet() )
{
int instID = inst.getKey();
System.out.println( "Running INSTRUCTION "+_regInst_IDNames.get(instID) );
Integer[] testDefIDs = _regInst_IDTestDef.get(instID);
boolean vectors = _regInst_IDVectors.get(instID);
IOSchema schema = _regInst_IDIOSchema.get(instID);
//create tmp program block and set instruction
Program prog = new Program();
ProgramBlock pb = new ProgramBlock( prog );
ArrayList ainst = new ArrayList();
ainst.add( inst.getValue() );
pb.setInstructions(ainst);
ExecutionContext ec = ExecutionContextFactory.createContext();
//foreach registered test configuration
for( Integer defID : testDefIDs )
{
PerfTestDef def = _regTestDef.get(defID);
TestMeasure m = def.getMeasure();
TestVariable lv = def.getVariable();
DataFormat df = def.getDataformat();
InternalTestVariable[] pv = def.getInternalVariables();
double min = def.getMin();
double max = def.getMax();
double samples = def.getNumSamples();
System.out.println( "Running TESTDEF(measure="+m+", variable="+String.valueOf(lv)+" "+pv.length+", format="+String.valueOf(df)+")" );
//vary input variable
LinkedList dmeasure = new LinkedList();
LinkedList dvariable = generateSequence(min, max, samples);
int plen = pv.length;
if( plen == 1 ) //1D function
{
for( Double var : dvariable )
{
dmeasure.add(executeTestCase1D(m, pv[0], df, var, pb, vectors, schema, ec));
}
}
else //multi-dim function
{
//init index stack
int[] index = new int[plen];
for( int i=0; i=0; i-- )
{
if(i==plen-1)
index[i]++;
else if( index[i+1] >= dlen )
{
index[i]++;
index[i+1]=0;
}
}
}
}
//append values to results
if( !_results.containsKey(instID) )
_results.put(instID, new HashMap>());
_results.get(instID).put(defID, dmeasure);
}
}
}
/**
*
* @param m
* @param v
* @param df
* @param varValue
* @param pb
* @param vectors
* @param schema
*
* @return
* @throws DMLRuntimeException
* @throws DMLUnsupportedOperationException
* @throws IOException
*/
private static double executeTestCase1D( TestMeasure m, InternalTestVariable v, DataFormat df, double varValue, ProgramBlock pb, boolean vectors, IOSchema schema, ExecutionContext ec )
throws DMLRuntimeException, DMLUnsupportedOperationException, IOException
{
double datasize = -1;
double dim1 = -1, dim2 = -1;
double sparsity = -1;
//double sortio = -1;
System.out.println( "VAR VALUE "+varValue );
//set test variables
switch ( v )
{
case DATA_SIZE:
datasize = varValue;
sparsity = DEFAULT_SPARSITY;
break;
case SPARSITY:
datasize = DEFAULT_DATASIZE;
sparsity = varValue;
break;
case SORT_IO_MEM: //FIXME
datasize = DEFAULT_DATASIZE * DATASIZE_MR_SCALE;
sparsity = DEFAULT_SPARSITY;
//sortio = varValue;
break;
default:
//do nothing
}
//set specific dimensions
if( vectors )
{
dim1 = datasize;
dim2 = 1;
}
else
{
dim1 = Math.sqrt( datasize );
dim2 = dim1;
}
//instruction-specific configurations
Instruction inst = pb.getInstruction(0); //always exactly one instruction
if( inst instanceof DataGenCPInstruction )
{
DataGenCPInstruction rand = (DataGenCPInstruction) inst;
rand.setRows((long)dim1);
rand.setCols((long)dim2);
rand.setSparsity(sparsity);
}
else if ( inst instanceof FunctionCallCPInstruction ) //ExternalFunctionInvocationInstruction
{
Program prog = pb.getProgram();
ArrayList in = new ArrayList();
DataIdentifier dat1 = new DataIdentifier("A");
dat1.setDataType(DataType.MATRIX);
dat1.setValueType(ValueType.DOUBLE);
in.add(dat1);
ArrayList out = new ArrayList();
DataIdentifier dat2 = new DataIdentifier("C");
dat2.setDataType(DataType.MATRIX);
dat2.setValueType(ValueType.DOUBLE);
out.add(dat2);
HashMap params = new HashMap();
params.put(ExternalFunctionStatement.CLASS_NAME, PerfTestExtFunctCP.class.getName());
ExternalFunctionProgramBlockCP fpb = new ExternalFunctionProgramBlockCP(prog, in, out, params, PERF_TOOL_DIR);
prog.addFunctionProgramBlock(DMLProgram.DEFAULT_NAMESPACE, "execPerfTestExtFunct", fpb);
}
else if ( inst instanceof MRJobInstruction )
{
//FIXME hardcoded for test
//MMRJMR.SORT_IO_MEM = sortio;
}
//generate input and output matrices
LocalVariableMap vars = ec.getVariables();
vars.removeAll();
double mem1 = PerfTestMemoryObserver.getUsedMemory();
if( schema!=IOSchema.NONE_NONE && schema!=IOSchema.NONE_UNARY )
vars.put("A", generateInputDataset(PERF_TOOL_DIR+"/A", dim1, dim2, sparsity, df));
if( schema==IOSchema.BINARY_NONE || schema==IOSchema.BINARY_UNARY || schema==IOSchema.UNARY_UNARY )
vars.put("B", generateInputDataset(PERF_TOOL_DIR+"/B", dim1, dim2, sparsity, df));
if( schema==IOSchema.NONE_UNARY || schema==IOSchema.UNARY_UNARY || schema==IOSchema.BINARY_UNARY)
vars.put("C", generateEmptyResult(PERF_TOOL_DIR+"/C", dim1, dim2, df));
double mem2 = PerfTestMemoryObserver.getUsedMemory();
//foreach repetition
double value = 0;
for( int i=0; i 0 )
value = value + mem2-mem1; //correction: input sizes added
System.out.println("--- RESULT: "+value+" byte"); break;
default: System.out.println("--- RESULT: "+value); break;
}
return value;
}
/**
*
* @param m
* @param v
* @param df
* @param varValue
* @param pb
* @param schema
* @return
* @throws DMLRuntimeException
* @throws DMLUnsupportedOperationException
* @throws IOException
*/
private static double executeTestCaseMD( TestMeasure m, InternalTestVariable[] v, DataFormat df, double[] varValue, ProgramBlock pb, IOSchema schema, ExecutionContext ec )
throws DMLRuntimeException, DMLUnsupportedOperationException, IOException
{
//double datasize = DEFAULT_DATASIZE;
double sparsity = DEFAULT_SPARSITY;
double dim1 = -1;
double dim2 = -1;
double dim3 = -1;
for( int i=0; i 0 )
value = value + mem2-mem1; //correction: input sizes added
System.out.println("--- RESULT: "+value+" byte"); break;
default: System.out.println("--- RESULT: "+value); break;
}
return value;
}
/**
*
* @param measure
* @param pb
* @return
* @throws DMLRuntimeException
* @throws DMLUnsupportedOperationException
*/
public static double executeGenericProgramBlock( TestMeasure measure, ProgramBlock pb, ExecutionContext ec )
throws DMLRuntimeException, DMLUnsupportedOperationException
{
double value = 0;
try
{
switch( measure )
{
case EXEC_TIME:
Timing time = new Timing();
time.start();
pb.execute( ec );
value = time.stop();
break;
case MEMORY_USAGE:
PerfTestMemoryObserver mo = new PerfTestMemoryObserver();
mo.measureStartMem();
Thread t = new Thread(mo);
t.start();
pb.execute( ec );
mo.setStopped();
value = mo.getMaxMemConsumption();
t.join();
break;
}
}
catch(Exception ex)
{
throw new DMLRuntimeException(ex);
}
//clear matrixes from cache
for( String str : ec.getVariables().keySet() )
{
Data dat = ec.getVariable(str);
if( dat instanceof MatrixObject )
((MatrixObject)dat).clearData();
}
return value;
}
/**
*
* @param min
* @param max
* @param num
* @return
*/
public static LinkedList generateSequence( double min, double max, double num )
{
LinkedList data = new LinkedList();
double increment = (max-min)/(num-1);
for( int i=0; i sparsity)
continue;
matrix[i][j] = (random.nextDouble() * (max - min) + min);
}
}
return matrix;
}
/**
*
* @param fname
* @throws DMLUnsupportedOperationException
* @throws DMLRuntimeException
* @throws XMLStreamException
* @throws IOException
*/
public static void externalReadProfile( String fname )
throws DMLUnsupportedOperationException, DMLRuntimeException, XMLStreamException, IOException
{
//validate external name (security issue)
if( !LocalFileUtils.validateExternalFilename(fname, false) )
throw new DMLRuntimeException("Invalid (non-trustworthy) external profile filename.");
//register internals and read external profile
registerTestConfigurations();
registerInstructions();
readProfile( fname );
}
/**
*
* @param dirname
* @return
* @throws IOException
* @throws DMLUnsupportedOperationException
*/
@SuppressWarnings("all")
private static HashMap writeResults( String dirname )
throws IOException, DMLUnsupportedOperationException
{
HashMap map = new HashMap();
int count = 1;
int offset = (MODEL_INTERCEPT ? 1 : 0);
int cols = MODEL_MAX_ORDER + offset;
for( Entry>> inst : _results.entrySet() )
{
int instID = inst.getKey();
HashMap> instCF = inst.getValue();
for( Entry> cfun : instCF.entrySet() )
{
int tDefID = cfun.getKey();
long ID = IDHandler.concatIntIDsToLong(instID, tDefID);
LinkedList dmeasure = cfun.getValue();
PerfTestDef def = _regTestDef.get(tDefID);
LinkedList dvariable = generateSequence(def.getMin(), def.getMax(), NUM_SAMPLES_PER_TEST);
int dlen = dvariable.size();
int plen = def.getInternalVariables().length;
//write variable data set
CSVWriter writer1 = new CSVWriter( new FileWriter( dirname+count+"_in1.csv" ),',', CSVWriter.NO_QUOTE_CHARACTER);
if( plen == 1 ) //one dimensional function
{
//write 1, x, x^2, x^3, ...
String[] sbuff = new String[cols];
for( Double val : dvariable )
{
for( int j=0; j=0; i-- )
{
if(i==plen-1)
index[i]++;
else if( index[i+1] >= dlen )
{
index[i]++;
index[i+1]=0;
}
}
}
}
writer1.close();
//write measure data set
CSVWriter writer2 = new CSVWriter( new FileWriter( dirname+count+"_in2.csv" ),',', CSVWriter.NO_QUOTE_CHARACTER);
String[] buff2 = new String[1];
for( Double val : dmeasure )
{
buff2[0] = String.valueOf( val );
writer2.writeNext(buff2);
}
writer2.close();
map.put(count, ID);
count++;
}
}
return map;
}
/**
*
* @param dmlname
* @param dmltmpname
* @param dir
* @param models
* @param rows
* @param cols
* @throws IOException
* @throws ParseException
* @throws DMLException
*/
private static void computeRegressionModels( String dmlname, String dmltmpname, String dir, int models, int rows, int cols )
throws IOException, ParseException, DMLException
{
//clean scratch space
//AutomatedTestBase.cleanupScratchSpace();
//read DML template
StringBuilder buffer = new StringBuilder();
BufferedReader br = new BufferedReader( new FileReader(new File( dmlname )) );
try
{
String line = null;
while( (line=br.readLine()) != null )
{
buffer.append(line);
buffer.append("\n");
}
}
finally
{
if( br != null )
br.close();
}
//replace parameters
String template = buffer.toString();
template = template.replaceAll("%numModels%", String.valueOf(models));
template = template.replaceAll("%numRows%", String.valueOf(rows));
template = template.replaceAll("%numCols%", String.valueOf(cols));
template = template.replaceAll("%indir%", String.valueOf(dir));
// write temp DML file
File fout = new File(dmltmpname);
FileOutputStream fos = new FileOutputStream(fout);
try {
fos.write(template.getBytes());
}
finally
{
if( fos != null )
fos.close();
}
// execute DML script
DMLScript.main(new String[] { "-f", dmltmpname });
}
/**
*
* @param dname
* @param IDMapping
* @throws IOException
*/
private static void readRegressionModels( String dname, HashMap IDMapping )
throws IOException
{
for( Entry e : IDMapping.entrySet() )
{
int count = e.getKey();
long ID = e.getValue();
int instID = IDHandler.extractIntIDFromLong(ID, 1);
int tDefID = IDHandler.extractIntIDFromLong(ID, 2);
//read file and parse
LinkedList params = new LinkedList();
CSVReader reader1 = new CSVReader( new FileReader(dname+count+"_out.csv"), ',' );
String[] nextline = null;
while( (nextline = reader1.readNext()) != null )
{
params.add(Double.parseDouble(nextline[0]));
}
reader1.close();
double[] dparams = new double[params.size()];
int i=0;
for( Double d : params )
{
dparams[i] = d;
i++;
}
//create new cost function
boolean multidim = _regTestDef.get(tDefID).getInternalVariables().length > 1;
CostFunction cf = new CostFunction(dparams, multidim);
//append to profile
if( !_profile.containsKey(instID) )
_profile.put(instID, new HashMap());
_profile.get(instID).put(tDefID, cf);
}
}
/**
*
* @param vars
* @return
*/
private static String serializeTestVariables( InternalTestVariable[] vars )
{
StringBuilder sb = new StringBuilder();
for( int i=0; i0 )
sb.append( XML_ELEMENT_DELIMITER );
sb.append( String.valueOf(vars[i]) );
}
return sb.toString();
}
/**
*
* @param vars
* @return
*/
private static InternalTestVariable[] parseTestVariables(String vars)
{
StringTokenizer st = new StringTokenizer(vars, XML_ELEMENT_DELIMITER);
InternalTestVariable[] v = new InternalTestVariable[st.countTokens()];
for( int i=0; i0 )
sb.append( XML_ELEMENT_DELIMITER );
sb.append( String.valueOf(vals[i]) );
}
return sb.toString();
}
/**
*
* @param valStr
* @return
*/
private static double[] parseParams( String valStr )
{
StringTokenizer st = new StringTokenizer(valStr, XML_ELEMENT_DELIMITER);
double[] params = new double[st.countTokens()];
for( int i=0; i>();
//read existing profile
FileInputStream fis = new FileInputStream( fname );
try
{
//xml parsing
XMLInputFactory xif = XMLInputFactory.newInstance();
XMLStreamReader xsr = xif.createXMLStreamReader( fis );
int e = xsr.nextTag(); // profile start
while( true ) //read all instructions
{
e = xsr.nextTag(); // instruction start
if( e == XMLStreamConstants.END_ELEMENT )
break; //reached profile end tag
//parse instruction
int ID = Integer.parseInt( xsr.getAttributeValue(null, XML_ID) );
//String name = xsr.getAttributeValue(null, XML_NAME).trim().replaceAll(" ", Lops.OPERAND_DELIMITOR);
HashMap tmp = new HashMap();
_profile.put( ID, tmp );
while( true )
{
e = xsr.nextTag(); // cost function start
if( e == XMLStreamConstants.END_ELEMENT )
break; //reached instruction end tag
//parse cost function
TestMeasure m = TestMeasure.valueOf( xsr.getAttributeValue(null, XML_MEASURE) );
TestVariable lv = TestVariable.valueOf( xsr.getAttributeValue(null, XML_VARIABLE) );
InternalTestVariable[] pv = parseTestVariables( xsr.getAttributeValue(null, XML_INTERNAL_VARIABLES) );
DataFormat df = DataFormat.valueOf( xsr.getAttributeValue(null, XML_DATAFORMAT) );
int tDefID = getTestDefID(m, lv, df, pv);
xsr.next(); //read characters
double[] params = parseParams(xsr.getText());
boolean multidim = _regTestDef.get(tDefID).getInternalVariables().length > 1;
CostFunction cf = new CostFunction( params, multidim );
tmp.put(tDefID, cf);
xsr.nextTag(); // cost function end
//System.out.println("added cost function");
}
}
xsr.close();
}
finally
{
IOUtilFunctions.closeSilently(fis);
}
//mark profile as successfully read
_flagReadData = true;
}
/**
* StAX for efficient streaming XML writing.
*
* @throws IOException
* @throws XMLStreamException
*/
private static void writeProfile( String dname, String fname )
throws IOException, XMLStreamException
{
//create initial directory and file
File dir = new File( dname );
if( !dir.exists() )
dir.mkdir();
File f = new File( fname );
f.createNewFile();
FileOutputStream fos = new FileOutputStream( f );
try
{
//create document
XMLOutputFactory xof = XMLOutputFactory.newInstance();
XMLStreamWriter xsw = xof.createXMLStreamWriter( fos );
//TODO use an alternative way for intentation
//xsw = new IndentingXMLStreamWriter( xsw ); //remove this line if no indenting required
//write document content
xsw.writeStartDocument();
xsw.writeStartElement( XML_PROFILE );
xsw.writeAttribute(XML_DATE, String.valueOf(new Date()) );
//foreach instruction (boundle of cost functions)
for( Entry> inst : _profile.entrySet() )
{
int instID = inst.getKey();
String instName = _regInst_IDNames.get( instID );
xsw.writeStartElement( XML_INSTRUCTION );
xsw.writeAttribute(XML_ID, String.valueOf( instID ));
xsw.writeAttribute(XML_NAME, instName.replaceAll(Lop.OPERAND_DELIMITOR, " "));
//foreach testdef cost function
for( Entry cfun : inst.getValue().entrySet() )
{
int tdefID = cfun.getKey();
PerfTestDef def = _regTestDef.get(tdefID);
CostFunction cf = cfun.getValue();
xsw.writeStartElement( XML_COSTFUNCTION );
xsw.writeAttribute( XML_ID, String.valueOf( tdefID ));
xsw.writeAttribute( XML_MEASURE, def.getMeasure().toString() );
xsw.writeAttribute( XML_VARIABLE, def.getVariable().toString() );
xsw.writeAttribute( XML_INTERNAL_VARIABLES, serializeTestVariables(def.getInternalVariables()) );
xsw.writeAttribute( XML_DATAFORMAT, def.getDataformat().toString() );
xsw.writeCharacters(serializeParams( cf.getParams() ));
xsw.writeEndElement();// XML_COSTFUNCTION
}
xsw.writeEndElement(); //XML_INSTRUCTION
}
xsw.writeEndElement();//XML_PROFILE
xsw.writeEndDocument();
xsw.close();
}
finally
{
IOUtilFunctions.closeSilently(fos);
}
}
/**
* Main for invoking the actual performance test in order to produce profile.xml
*
* @param args
*/
public static void main(String[] args)
{
//execute the local / remote performance test
PerfTestTool.runTest();
}
}