org.apache.sysml.runtime.instructions.MRJobInstruction Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of systemml Show documentation
Show all versions of systemml Show documentation
Declarative Machine Learning
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.sysml.runtime.instructions;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import org.apache.sysml.api.DMLScript;
import org.apache.sysml.api.DMLScript.RUNTIME_PLATFORM;
import org.apache.sysml.hops.OptimizerUtils;
import org.apache.sysml.lops.DataGen;
import org.apache.sysml.lops.Lop;
import org.apache.sysml.lops.compile.JobType;
import org.apache.sysml.lops.runtime.RunMRJobs;
import org.apache.sysml.parser.Expression.DataType;
import org.apache.sysml.runtime.DMLRuntimeException;
import org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PDataPartitionFormat;
import org.apache.sysml.runtime.controlprogram.caching.FrameObject;
import org.apache.sysml.runtime.controlprogram.caching.MatrixObject;
import org.apache.sysml.runtime.controlprogram.context.ExecutionContext;
import org.apache.sysml.runtime.controlprogram.parfor.ProgramConverter;
import org.apache.sysml.runtime.instructions.cp.Data;
import org.apache.sysml.runtime.matrix.JobReturn;
import org.apache.sysml.runtime.matrix.MatrixCharacteristics;
import org.apache.sysml.runtime.matrix.MatrixDimensionsMetaData;
import org.apache.sysml.runtime.matrix.MatrixFormatMetaData;
import org.apache.sysml.runtime.matrix.data.InputInfo;
import org.apache.sysml.runtime.matrix.data.NumItemsByEachReducerMetaData;
import org.apache.sysml.runtime.matrix.data.OutputInfo;
import org.apache.sysml.runtime.matrix.mapred.MRJobConfiguration;
import org.apache.sysml.runtime.util.UtilFunctions;
import org.apache.sysml.utils.Statistics;
/*
---------------------------------------------------------------------------------------
JobType Rand RecordReader Mapper Shuffle AggInReducer OtherInReducer
---------------------------------------------------------------------------------------
GMR * * * *
RAND * * * *
REBLOCK * * *
MMCJ * *
MMRJ * * *
CM_COV * *
GROUPED_AGG * *
COMBINE *
SORT * *
PARTITION
---------------------------------------------------------------------------------------
*/
public class MRJobInstruction extends Instruction
{
//public enum JobType {MMCJ, MMRJ, GMR, Partition, RAND, ReBlock, SortKeys, Combine, CMCOV, GroupedAgg};
private JobType jobType;
private String _randInstructions = "";
private String _recordReaderInstructions = "";
private String _mapperInstructions = "";
private String _shuffleInstructions = "";
private String _aggInstructions = "";
private String _otherInstructions = "";
private String[] inputVars;
private String[] outputVars;
private byte [] _resultIndices;
private int iv_numReducers;
private int iv_replication;
private String dimsUnknownFilePrefix;
private double _mapperMem = -1;
/**
* This structure contains the DML script line number
* of each MR instructions within this MR job
*/
private ArrayList MRJobInstructionsLineNumbers;
/*
* Following attributes are populated by pulling out information from Symbol Table.
* This is done just before a job is submitted/spawned.
*/
private String[] inputs;
private InputInfo[] inputInfos;
private long[] rlens;
private long[] clens;
private int[] brlens;
private int[] bclens;
private String[] outputs;
private OutputInfo[] outputInfos;
// Member variables to store partitioning-related information for all input matrices
private boolean[] partitioned;
private PDataPartitionFormat[] pformats;
private int[] psizes;
/*
* These members store references to MatrixObjects corresponding to different
* MATRIX variables in inputVars and outputVars, respectively. Note that the
* references to SCALAR input variables are not stored in inputMatrices
.
* Every reference in outputMatrices
is always points to MATRIX
* since MR jobs always produces matrices.
*/
private MatrixObject[] inputMatrices, outputMatrices;
// Indicates the data type of inputVars
private DataType[] inputDataTypes;
public MRJobInstruction(JobType type)
{
setType(Instruction.INSTRUCTION_TYPE.MAPREDUCE_JOB);
jobType = type;
instOpcode = "MR-Job_"+getJobType();
}
/**
* (deep) Copy constructor, primarily used in parfor.
* Additionally, replace all occurrences of srcPattern
with targetPattern
*
* @param that MR job instruction
* @throws IllegalArgumentException if IllegalArgumentException occurs
* @throws IllegalAccessException if IllegalAccessException occurs
*/
public MRJobInstruction(MRJobInstruction that)
throws IllegalArgumentException, IllegalAccessException
{
this( that.jobType );
//copy basic variables
_randInstructions = that._randInstructions;
_recordReaderInstructions = that._recordReaderInstructions;
_mapperInstructions = that._mapperInstructions;
_shuffleInstructions = that._shuffleInstructions;
_aggInstructions = that._aggInstructions;
_otherInstructions = that._otherInstructions;
iv_numReducers = that.iv_numReducers;
iv_replication = that.iv_replication;
dimsUnknownFilePrefix = that.dimsUnknownFilePrefix;
_mapperMem = that._mapperMem;
MRJobInstructionsLineNumbers = that.MRJobInstructionsLineNumbers;
//copy array variables (via clone)
inputVars = that.inputVars.clone();
outputVars = that.outputVars.clone();
_resultIndices = that._resultIndices.clone();
//copy all remaining attributes (these are overwritten by
//extractInputMatrices/extractOutputMatrices but we need to
//shallow copy them for special cases of runtime piggybacking
inputs = that.inputs;
inputInfos = that.inputInfos;
rlens = that.rlens;
clens = that.clens;
brlens = that.brlens;
bclens = that.bclens;
outputs = that.outputs;
outputInfos = that.outputInfos;
partitioned = that.partitioned;
pformats = that.pformats;
psizes = that.psizes;
inputMatrices = that.inputMatrices;
outputMatrices = that.outputMatrices;
inputDataTypes = that.inputDataTypes;
}
public JobType getJobType()
{
return jobType;
}
public String getIv_instructionsInMapper()
{
return _mapperInstructions;
}
public void setIv_instructionsInMapper(String inst)
{
_mapperInstructions = inst;
}
public String getIv_recordReaderInstructions()
{
return _recordReaderInstructions;
}
public void setIv_recordReaderInstructions(String inst)
{
_recordReaderInstructions = inst;
}
public String getIv_randInstructions()
{
return _randInstructions;
}
public void setIv_randInstructions(String inst)
{
_randInstructions = inst;
}
public String getIv_shuffleInstructions()
{
return _shuffleInstructions;
}
public void setIv_shuffleInstructions(String inst)
{
_shuffleInstructions = inst;
}
public String getIv_aggInstructions()
{
return _aggInstructions;
}
public void setIv_aggInstructions(String inst)
{
_aggInstructions = inst;
}
public String getIv_otherInstructions()
{
return _otherInstructions;
}
public void setIv_otherInstructions(String inst)
{
_otherInstructions = inst;
}
public byte[] getIv_resultIndices()
{
return _resultIndices;
}
public int getIv_numReducers()
{
return iv_numReducers;
}
public int getIv_replication()
{
return iv_replication;
}
public double getMemoryRequirements(){
return _mapperMem;
}
public void setMemoryRequirements(double mem) {
_mapperMem = mem;
}
public String getDimsUnknownFilePrefix() {
return dimsUnknownFilePrefix;
}
public void setDimsUnknownFilePrefix(String prefix) {
dimsUnknownFilePrefix = prefix;
}
public String[] getInputVars()
{
return inputVars;
}
public String[] getOutputVars()
{
return outputVars;
}
/**
* Getter for MRJobInstructionslineNumbers
*
* @return list containing all instructions indexed by line number
*/
public ArrayList getMRJobInstructionsLineNumbers()
{
return MRJobInstructionsLineNumbers;
}
/**
* Method to set outputs (output indices) for a MapReduce instruction.
*
* @param outputIndices output indices
*/
public void setOutputs(byte[] outputIndices) {
_resultIndices = outputIndices;
}
/**
* Method to set the number of reducers for a MapReducer instruction.
* @param numReducers number of reducers
*/
public void setNumberOfReducers(int numReducers) {
iv_numReducers = numReducers;
}
/**
* Method to set the replication factor for outputs produced from a MapReduce instruction.
*
* @param replication replication factor
*/
public void setReplication(int replication) {
iv_replication = replication;
}
/**
* Method to set input and output labels for a MapReduce instruction.
*
* @param inputLabels input labels
* @param outputLabels output labels
*/
public void setInputOutputLabels(String[] inputLabels, String[] outputLabels) {
this.inputVars = inputLabels;
this.outputVars = outputLabels;
}
public void setRecordReaderInstructions(String rrInstructions) {
_recordReaderInstructions = rrInstructions;
}
public void setMapperInstructions(String mapperInstructions) {
_mapperInstructions = mapperInstructions;
}
public void setShuffleInstructions(String shuffleInstructions) {
_shuffleInstructions = shuffleInstructions;
}
public void setAggregateInstructionsInReducer(String aggInstructions) {
_aggInstructions = aggInstructions;
}
public void setOtherInstructionsInReducer(String otherInstructions) {
_otherInstructions = otherInstructions;
}
public void setRandInstructions(String randInstructions) {
_randInstructions = randInstructions;
}
/**
* Setter for MRJobInstructionslineNumbers field
*
* @param MRJobLineNumbers Line numbers for each instruction in this MRJob
*/
public void setMRJobInstructionsLineNumbers(ArrayList MRJobLineNumbers) {
MRJobInstructionsLineNumbers = MRJobLineNumbers;
}
public void setGMRInstructions(String[] inLabels,
String recordReaderInstructions, String mapperInstructions,
String aggInstructions, String otherInstructions, String [] outLabels, byte [] resultIndex,
int numReducers, int replication)
{
setOutputs(resultIndex);
setRecordReaderInstructions(recordReaderInstructions);
setMapperInstructions(mapperInstructions);
setShuffleInstructions("");
setAggregateInstructionsInReducer(aggInstructions);
setOtherInstructionsInReducer(otherInstructions);
setInputOutputLabels(inLabels, outLabels);
setNumberOfReducers(numReducers);
setReplication(replication);
}
public void setReBlockInstructions(String[] inLabels,
String mapperInstructions, String reblockInstructions, String otherInstructions,
String[] outLabels, byte [] resultIndex,
int numReducers, int replication)
{
setOutputs(resultIndex);
setMapperInstructions(mapperInstructions);
setShuffleInstructions(reblockInstructions);
setAggregateInstructionsInReducer("");
setOtherInstructionsInReducer(otherInstructions);
setInputOutputLabels(inLabels, outLabels);
setNumberOfReducers(numReducers);
setReplication(replication);
}
/**
* Search whether or not this MR job contains at least one
* MR instruction with specified line number parameter
*
* @param lineNum Line number in DML script
* @return Return true if found, otherwise return false
*/
public boolean findMRInstructions(int lineNum) {
if (!DMLScript.ENABLE_DEBUG_MODE) {
System.err.println("Error: Expecting debug mode to be enabled for this functionality");
return false;
}
for (Integer lineNumber : MRJobInstructionsLineNumbers) {
if (lineNum == lineNumber)
return true;
}
return false;
}
public String getString(byte [] arr)
{
StringBuilder sb = new StringBuilder();
for(int i = 0; i < arr.length; i++) {
sb.append(",");
sb.append(Byte.toString(arr[i]));
}
return sb.toString();
}
public String toString()
{
String instruction = "";
instruction += "jobtype = " + jobType + " \n";
instruction += "input labels = " + Arrays.toString(inputVars) + " \n";
instruction += "recReader inst = " + _recordReaderInstructions + " \n";
instruction += "rand inst = " + _randInstructions + " \n";
instruction += "mapper inst = " + _mapperInstructions + " \n";
instruction += "shuffle inst = " + _shuffleInstructions + " \n";
instruction += "agg inst = " + _aggInstructions + " \n";
instruction += "other inst = " + _otherInstructions + " \n";
instruction += "output labels = " + Arrays.toString(outputVars) + " \n";
instruction += "result indices = " + getString(_resultIndices) + " \n";
//instruction += "result dims unknown " + getString(iv_resultDimsUnknown) + " \n";
instruction += "num reducers = " + iv_numReducers + " \n";
instruction += "replication = " + iv_replication + " \n";
return instruction;
}
/**
* Method for displaying MR instructions interspersed with source code
* ONLY USED IN DEBUG MODE
*
* @param debug Flag for displaying instructions in debugger test integration
* @return MR string
*/
public String getMRString(boolean debug)
{
if (!DMLScript.ENABLE_DEBUG_MODE) {
System.err.println("Error: Expecting debug mode to be enabled for this functionality");
return "";
}
StringBuilder sb = new StringBuilder();
sb.append("MR-Job[\n");
sb.append("\t\t\t\tjobtype = " + jobType + " \n");
if (!debug) {
sb.append("\t\t\t\tinput labels = ");
sb.append(Arrays.toString(inputVars));
sb.append(" \n");
}
if (_recordReaderInstructions.length() > 0) {
String [] instArray = _recordReaderInstructions.split(Lop.INSTRUCTION_DELIMITOR);
if (!debug) {
sb.append("\t\t\t\trecReader inst = ");
sb.append(instArray[0]);
sb.append(" \n");
}
else {
String [] instStr = prepareInstruction(instArray[0]).split(" ");
sb.append("\t\t\t\trecReader inst = ");
sb.append(instStr[0]);
sb.append(" ");
sb.append(instStr[1]);
sb.append(" \n");
}
for (int i = 1; i < instArray.length ; i++)
{
if (!debug){
sb.append("\t\t\t\t ");
sb.append(instArray[i]);
sb.append(" \n");
}
else {
String [] instStr = prepareInstruction(instArray[i]).split(" ");
sb.append("\t\t\t\t ");
sb.append(instStr[0]);
sb.append(" ");
sb.append(instStr[1]);
sb.append(" \n");
}
}
}
if (_randInstructions.length() > 0) {
String [] instArray = _randInstructions.split(Lop.INSTRUCTION_DELIMITOR);
if (!debug) {
sb.append("\t\t\t\trand inst = ");
sb.append(instArray[0]);
sb.append(" \n");
}
else {
String [] instStr = prepareInstruction(instArray[0]).split(" ");
sb.append("\t\t\t\trand inst = ");
sb.append(instStr[0]);
sb.append(" ");
sb.append(instStr[1]);
sb.append(" \n");
}
for (int i = 1; i < instArray.length ; i++) {
if (!debug){
sb.append("\t\t\t\t ");
sb.append(instArray[i]);
sb.append(" \n");
}
else {
String [] instStr = prepareInstruction(instArray[i]).split(" ");
sb.append("\t\t\t\t ");
sb.append(instStr[0]);
sb.append(" ");
sb.append(instStr[1]);
sb.append(" \n");
}
}
}
if (_mapperInstructions.length() > 0) {
String [] instArray = _mapperInstructions.split(Lop.INSTRUCTION_DELIMITOR);
if (!debug){
sb.append("\t\t\t\tmapper inst = ");
sb.append(instArray[0]);
sb.append(" \n");
}
else {
String [] instStr = prepareInstruction(instArray[0]).split(" ");
sb.append("\t\t\t\tmapper inst = ");
sb.append(instStr[0]);
sb.append(" ");
sb.append(instStr[1]);
sb.append(" \n");
}
for (int i = 1; i < instArray.length ; i++)
{
if (!debug){
sb.append("\t\t\t\t ");
sb.append(instArray[i]);
sb.append(" \n");
}
else {
String [] instStr = prepareInstruction(instArray[i]).split(" ");
sb.append("\t\t\t\t ");
sb.append(instStr[0]);
sb.append(" ");
sb.append(instStr[1]);
sb.append(" \n");
}
}
}
if (_shuffleInstructions.length() > 0) {
String [] instArray = _shuffleInstructions.split(Lop.INSTRUCTION_DELIMITOR);
if (!debug) {
sb.append("\t\t\t\tshuffle inst = ");
sb.append(instArray[0]);
sb.append(" \n");
}
else {
String [] instStr = prepareInstruction(instArray[0]).split(" ");
sb.append("\t\t\t\tshuffle inst = ");
sb.append(instStr[0]);
sb.append(" ");
sb.append(instStr[1]);
sb.append(" \n");
}
for (int i = 1; i < instArray.length ; i++) {
if (!debug) {
sb.append("\t\t\t\t ");
sb.append(instArray[i]);
sb.append(" \n");
}
else {
String [] instStr = prepareInstruction(instArray[i]).split(" ");
sb.append("\t\t\t\t ");
sb.append(instStr[0]);
sb.append(" ");
sb.append(instStr[1]);
sb.append(" \n");
}
}
}
if (_aggInstructions.length() > 0) {
String [] instArray = _aggInstructions.split(Lop.INSTRUCTION_DELIMITOR);
if (!debug)
{
sb.append("\t\t\t\tagg inst = ");
sb.append(instArray[0]);
sb.append(" \n");
}
else {
String [] instStr = prepareInstruction(instArray[0]).split(" ");
sb.append("\t\t\t\tagg inst = ");
sb.append(instStr[0]);
sb.append(" ");
sb.append(instStr[1]);
sb.append(" \n");
}
for (int i = 1; i < instArray.length ; i++) {
if (!debug) {
sb.append("\t\t\t\t ");
sb.append(instArray[i]);
sb.append(" \n");
}
else {
String [] instStr = prepareInstruction(instArray[i]).split(" ");
sb.append("\t\t\t\t ");
sb.append(instStr[0]);
sb.append(" ");
sb.append(instStr[1]);
sb.append(" \n");
}
}
}
if (_otherInstructions.length() > 0) {
String [] instArray = _otherInstructions.split(Lop.INSTRUCTION_DELIMITOR);
if (!debug)
{
sb.append("\t\t\t\tother inst = ");
sb.append(instArray[0]);
sb.append(" \n");
}
else {
String [] instStr = prepareInstruction(instArray[0]).split(" ");
sb.append("\t\t\t\tother inst = ");
sb.append(instStr[0]);
sb.append(" ");
sb.append(instStr[1]);
sb.append(" \n");
}
for (int i = 1; i < instArray.length ; i++) {
if (!debug){
sb.append("\t\t\t\t ");
sb.append(instArray[i]);
sb.append(" \n");
}
else {
String [] instStr = prepareInstruction(instArray[i]).split(" ");
sb.append("\t\t\t\t ");
sb.append(instStr[0]);
sb.append(" ");
sb.append(instStr[1]);
sb.append(" \n");
}
}
}
if (!debug){
sb.append("\t\t\t\toutput labels = ");
sb.append(Arrays.toString(outputVars));
sb.append(" \n");
}
sb.append("\t\t\t ]");
return sb.toString();
}
public void printMe() {
LOG.debug("\nMRInstructions: \n" + this.toString());
}
private String getOps(String inst) {
StringBuilder sb = new StringBuilder();
for ( String i : inst.split(Lop.INSTRUCTION_DELIMITOR)) {
sb.append(",");
sb.append((i.split(Lop.OPERAND_DELIMITOR))[0]);
}
return sb.toString();
}
@Override
public String getGraphString() {
StringBuilder sb = new StringBuilder();
sb.append(jobType);
if (!_mapperInstructions.equals("")) {
sb.append(",map(");
sb.append(getOps(_mapperInstructions));
sb.append(")");
}
if (!_shuffleInstructions.equals("")) {
sb.append(",shuffle(");
sb.append(getOps(_shuffleInstructions));
sb.append(")");
}
if (!_aggInstructions.equals("")) {
sb.append(",agg(");
sb.append(getOps(_aggInstructions));
sb.append(")");
}
if (!_otherInstructions.equals("")) {
sb.append(",other(");
sb.append(getOps(_otherInstructions));
sb.append(")");
}
return sb.toString();
}
public boolean isMapOnly()
{
return ( (_shuffleInstructions == null || _shuffleInstructions.trim().length()==0)
&& (_aggInstructions == null || _aggInstructions.trim().length()==0)
&& (_otherInstructions == null || _otherInstructions.trim().length()==0) );
}
public String[] getInputs() {
return inputs;
}
public InputInfo[] getInputInfos() {
return inputInfos;
}
public long[] getRlens() {
return rlens;
}
public long[] getClens() {
return clens;
}
public int[] getBrlens() {
return brlens;
}
public int[] getBclens() {
return bclens;
}
public String[] getOutputs() {
return outputs;
}
public OutputInfo[] getOutputInfos() {
return outputInfos;
}
public MatrixObject[] getInputMatrices() {
return inputMatrices;
}
public boolean[] getPartitioned() {
return partitioned;
}
public void setPartitioned(boolean[] partitioned) {
this.partitioned = partitioned;
}
public PDataPartitionFormat[] getPformats() {
return pformats;
}
public void setPformats(PDataPartitionFormat[] pformats) {
this.pformats = pformats;
}
public int[] getPsizes() {
return psizes;
}
public void setPsizes(int[] psizes) {
this.psizes = psizes;
}
/**
* Extracts input variables with MATRIX data type, and stores references to
* corresponding matrix objects in inputMatrices
. Also, stores
* the data types in inputDataTypes
.
*
* @param ec execution context
* @return array of matrix objects
*/
public MatrixObject[] extractInputMatrices(ExecutionContext ec) {
ArrayList inputmat = new ArrayList();
inputDataTypes = new DataType[inputVars.length];
for ( int i=0; i < inputVars.length; i++ ) {
Data d = ec.getVariable(inputVars[i]);
inputDataTypes[i] = d.getDataType();
if ( d.getDataType() == DataType.MATRIX ) {
inputmat.add((MatrixObject) d);
}
else if( d.getDataType() == DataType.FRAME ) {
//FIXME conversion from frame to matrix object (meta data only) to adhere to
//the given matrix-based mr job submission framework
FrameObject fo = (FrameObject) d;
MatrixObject mo = new MatrixObject(fo.getValueType(), fo.getFileName(), fo.getMetaData());
mo.setFileFormatProperties(fo.getFileFormatProperties());
inputmat.add(mo);
}
}
inputMatrices = inputmat.toArray(new MatrixObject[inputmat.size()]);
// populate auxiliary data structures
populateInputs();
return inputMatrices;
}
public MatrixObject[] getOutputMatrices() {
return outputMatrices;
}
/**
* Extracts MatrixObject references to output variables, all of which will be
* of MATRIX data type, and stores them in outputMatrices
. Also,
* populates auxiliary data structures.
*
* @param ec execution context
* @return array of matrix objects
* @throws DMLRuntimeException if DMLRuntimeException occurs
*/
public MatrixObject[] extractOutputMatrices(ExecutionContext ec) throws DMLRuntimeException {
outputMatrices = new MatrixObject[getOutputVars().length];
int ind = 0;
for(String oo: getOutputVars()) {
Data d = ec.getVariable(oo);
if ( d.getDataType() == DataType.MATRIX ) {
outputMatrices[ind++] = (MatrixObject)d;
}
else {
throw new DMLRuntimeException(getJobType() + ": invalid datatype (" + d.getDataType() + ") for output variable " + oo);
}
}
// populate auxiliary data structures
populateOutputs();
return outputMatrices;
}
/**
* Auxiliary data structures that store information required to spawn MR jobs.
* These data structures are populated by pulling out information from symbol
* table. More specifically, from information stored in inputMatrices
* and outputMatrices
.
*/
private void populateInputs() {
// Since inputVars can potentially contain scalar variables,
// auxiliary data structures of size inputMatrices.length
// are allocated instead of size inputVars.length
// Allocate space
inputs = new String[inputMatrices.length];
inputInfos = new InputInfo[inputMatrices.length];
rlens = new long[inputMatrices.length];
clens = new long[inputMatrices.length];
brlens = new int[inputMatrices.length];
bclens = new int[inputMatrices.length];
partitioned = new boolean[inputMatrices.length];
pformats = new PDataPartitionFormat[inputMatrices.length];
psizes = new int[inputMatrices.length];
// populate information
for ( int i=0; i < inputMatrices.length; i++ ) {
inputs[i] = inputMatrices[i].getFileName();
MatrixCharacteristics mc = inputMatrices[i].getMatrixCharacteristics();
rlens[i] = mc.getRows();
clens[i] = mc.getCols();
brlens[i] = mc.getRowsPerBlock();
bclens[i] = mc.getColsPerBlock();
if ( inputMatrices[i].getMetaData() instanceof MatrixFormatMetaData ) {
inputInfos[i] = ((MatrixFormatMetaData) inputMatrices[i].getMetaData()).getInputInfo();
}
else if (inputMatrices[i].getMetaData() instanceof NumItemsByEachReducerMetaData ) {
inputInfos[i] = InputInfo.InputInfoForSortOutput;
inputInfos[i].metadata = inputMatrices[i].getMetaData();
}
partitioned[i] = inputMatrices[i].isPartitioned();
pformats[i] = inputMatrices[i].getPartitionFormat();
psizes[i] = inputMatrices[i].getPartitionSize();
}
}
/**
* Pulls out information from symbol table for output variables (i.e., outputMatrices)
* and populates auxiliary data structutes that are used in setting up MR jobs.
*/
private void populateOutputs() {
// Note: (outputVars.length == outputMatrices.length) -> true
// Allocate space
outputs = new String[outputVars.length];
outputInfos = new OutputInfo[outputVars.length];
// Populate information
for(int i=0; i < outputVars.length; i++) {
outputs[i] = outputMatrices[i].getFileName();
MatrixFormatMetaData md = (MatrixFormatMetaData) outputMatrices[i].getMetaData();
outputInfos[i] = md.getOutputInfo();
}
}
/**
* Prepare current instruction for printing
* by removing internal delimiters.
* @param inst Instruction to be displayed
* @return Post-processed instruction in string format
*/
private static String prepareInstruction(String inst) {
String tmp = inst;
tmp = tmp.replaceAll(Lop.OPERAND_DELIMITOR, " ");
tmp = tmp.replaceAll(Lop.DATATYPE_PREFIX, ".");
tmp = tmp.replaceAll(Lop.INSTRUCTION_DELIMITOR, ", ");
return tmp;
}
public void printCompleteMRJobInstruction(MatrixCharacteristics[] resultStats) throws DMLRuntimeException {
LOG.trace("jobtype" + jobType);
LOG.trace("Inputs: \n");
for(int i=0, mi=0; i < inputVars.length; i++ ) {
if(inputDataTypes[i] == DataType.SCALAR) {
LOG.trace(" " + inputVars[i] + " - SCALAR input (replaced w/ value)");
}
else if ( inputDataTypes[i] == DataType.MATRIX ) {
LOG.trace(" " + inputVars[i] +
" - [" + inputs[mi] +
"] [" + rlens[mi] + ", " + clens[mi] +
"] nnz[" + inputMatrices[mi].getNnz() +
"] block[" + brlens[mi] + ", " + bclens[mi] +
"] [" + InputInfo.inputInfoToString(inputInfos[mi]) +
"]");
mi++;
}
else
LOG.trace(" " + inputVars[i] + " - " + inputDataTypes[i]);
}
LOG.trace(" Instructions:");
if ( !_recordReaderInstructions.equals(""))
LOG.trace(" recReader inst - " + _recordReaderInstructions );
if ( !_randInstructions.equals(""))
LOG.trace(" rand inst - " + _randInstructions );
if ( !_mapperInstructions.equals(""))
LOG.trace(" mapper inst - " + _mapperInstructions );
if ( !_shuffleInstructions.equals(""))
LOG.trace(" shuffle inst - " + _shuffleInstructions );
if ( !_aggInstructions.equals(""))
LOG.trace(" agg inst - " + _aggInstructions );
if ( !_otherInstructions.equals(""))
LOG.trace(" other inst - " + _otherInstructions );
LOG.trace(" Outputs:");
for(int i=0; i < outputVars.length; i++ ) {
LOG.trace(" " + _resultIndices[i] + " : " + outputVars[i] +
" - [" + outputs[i] +
"] [" + resultStats[i].getRows() + ", " + resultStats[i].getCols() +
"] nnz[" + outputMatrices[i].getNnz() +
"] block[" + resultStats[i].getRows() + ", " + resultStats[i].getColsPerBlock() +
"] [" + OutputInfo.outputInfoToString(outputInfos[i]) +
"]");
}
LOG.trace(" #Reducers - " + iv_numReducers);
LOG.trace(" Replication - " + iv_replication);
}
@Override
public void updateInstructionThreadID(String pattern, String replace)
throws DMLRuntimeException
{
if( dimsUnknownFilePrefix!=null )
dimsUnknownFilePrefix = dimsUnknownFilePrefix.replaceAll(pattern, replace);
if( getJobType() == JobType.DATAGEN )
{
//update string representation (because parsing might fail due to pending instruction patching)
String rndinst = getIv_randInstructions().trim();
StringBuilder rndinst2 = new StringBuilder();
if( rndinst!=null && rndinst.length()>0 )
{
String[] instSet = rndinst.split( Lop.INSTRUCTION_DELIMITOR );
for( String dginst : instSet )
{
if( rndinst2.length()>0 )
rndinst2.append(Lop.INSTRUCTION_DELIMITOR);
//handle single instruction
String[] parts = dginst.split(Lop.OPERAND_DELIMITOR);
if( parts==null || parts.length<2 )
throw new DMLRuntimeException("Invalid datagen instruction: "+dginst);
int pos = -1;
if( parts[1].equals(DataGen.RAND_OPCODE) ) pos = 13;
if( parts[1].equals(DataGen.SEQ_OPCODE) ) pos = 11;
if( pos>0 )
{
StringBuilder sb = new StringBuilder();
for( int i=0; i0 )
sb.append(Lop.OPERAND_DELIMITOR);
if( i==pos )
sb.append(ProgramConverter.saveReplaceFilenameThreadID(parts[i], pattern, replace));
else
sb.append(parts[i]);
}
rndinst2.append( sb.toString() );
}
else
rndinst2.append( dginst );
}
setRandInstructions(rndinst2.toString());
}
}
}
public boolean isMergableMRJobInstruction( MRJobInstruction that )
{
boolean ret = true;
//check compatible job type (just in case its called with wrong assumptions)
if( jobType != that.jobType ) {
ret = false;
}
//check consistent input representation (other forced into common cell representation)
boolean blockedThis = MRJobConfiguration.deriveRepresentation(inputInfos);
boolean blockedThat = MRJobConfiguration.deriveRepresentation(that.inputInfos);
if( blockedThis != blockedThat ) {
ret = false;
}
//check max memory requirements of mapper instructions
if( (_mapperMem + that._mapperMem)
> OptimizerUtils.getRemoteMemBudgetMap(true) )
{
ret = false;
}
//check max possible byte indexes (worst-case: no sharing)
int maxIx1 = UtilFunctions.max(_resultIndices);
int maxIx2 = UtilFunctions.max(that._resultIndices);
if( (maxIx1+maxIx2) > Byte.MAX_VALUE ) {
ret = false;
}
//TODO conceptually this check should not be necessary
//check map only jobs versus full map-reduce jobs
if( isMapOnly() != that.isMapOnly() ) {
ret = false;
}
return ret;
}
public void mergeMRJobInstruction( MRJobInstruction that )
{
if( LOG.isDebugEnabled() ){
LOG.debug("Current instruction:\n"+this.toString());
LOG.debug("Next instruction:\n"+that.toString());
}
//compute offsets (inputs1, inputs2, intermediates1, intermediates2, outputs1, outputs2)
byte maxIxInst1 = UtilFunctions.max(_resultIndices);
byte maxIxInst2 = UtilFunctions.max(that._resultIndices);
byte sharedIx = 0;
//compute input index map (based on distinct filenames)
HashMap inMap = new HashMap();
for( int i=0; i transMap1 = new HashMap();
for( int i=0; i transMap2 = new HashMap();
byte nextIX = (byte)inputs.length;
for( int i=0; i transMap )
{
//prevent unnecessary parsing and reconstruction
if( inst == null || inst.isEmpty() || transMap.isEmpty() )
return inst;
String[] pinst = inst.split(Lop.INSTRUCTION_DELIMITOR);
StringBuilder instOut = new StringBuilder();
for( String lpinst : pinst ){ //for each instruction
//split instruction into parts
String[] parts = InstructionUtils.getInstructionPartsWithValueType(lpinst);
//replace instruction parts
for( int i=0; i0 ){
String index = lpart.substring(0, pos);
String newindex = String.valueOf(transMap.get(Byte.parseByte(index)));
parts[i] = newindex + lpart.substring(pos);
}
}
if( instOut.length()>0 )
instOut.append(Lop.INSTRUCTION_DELIMITOR);
//reconstruct instruction
instOut.append("MR");
for( String lpart : parts ){
instOut.append(Lop.OPERAND_DELIMITOR);
instOut.append(lpart);
}
}
return instOut.toString();
}
private String concatenateInstructions(String inst1, String inst2)
{
boolean emptyInst1 = (inst1 == null || inst1.length()==0);
boolean emptyInst2 = (inst2 == null || inst2.length()==0);
String ret = "";
if( !emptyInst1 && !emptyInst2 )
ret = inst1 + Lop.INSTRUCTION_DELIMITOR + inst2;
else if( !emptyInst1 )
ret = inst1;
else if( !emptyInst2 )
ret = inst2;
return ret;
}
@Override
public void processInstruction(ExecutionContext ec)
throws DMLRuntimeException
{
if ( DMLScript.rtplatform == RUNTIME_PLATFORM.SINGLE_NODE)
throw new DMLRuntimeException("MapReduce jobs cannot be executed when execution mode = singlenode");
//execute MR job
JobReturn jb = RunMRJobs.prepareAndSubmitJob(this, ec);
//specific post processing
if ( getJobType() == JobType.SORT && jb.getMetaData().length > 0 )
{
/* Populate returned stats into symbol table of matrices */
for ( int index=0; index < jb.getMetaData().length; index++) {
String varname = getOutputVars()[index];
ec.setMetaData(varname, jb.getMetaData()[index]);
}
}
else if ( jb.getMetaData().length > 0 )
{
/* Populate returned stats into symbol table of matrices */
for ( int index=0; index < jb.getMetaData().length; index++) {
String varname = getOutputVars()[index];
MatrixCharacteristics mc = ((MatrixDimensionsMetaData)jb.getMetaData(index)).getMatrixCharacteristics();
ec.getVariable(varname).updateMatrixCharacteristics(mc);
}
}
Statistics.incrementNoOfExecutedMRJobs();
}
}