org.apache.sysml.runtime.instructions.cp.VariableCPInstruction Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of systemml Show documentation
Show all versions of systemml Show documentation
Declarative Machine Learning
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.sysml.runtime.instructions.cp;
import java.io.IOException;
import org.apache.commons.lang.StringUtils;
import org.apache.sysml.lops.Lop;
import org.apache.sysml.lops.UnaryCP;
import org.apache.sysml.parser.Expression.DataType;
import org.apache.sysml.parser.Expression.ValueType;
import org.apache.sysml.runtime.DMLRuntimeException;
import org.apache.sysml.runtime.DMLUnsupportedOperationException;
import org.apache.sysml.runtime.controlprogram.caching.MatrixObject;
import org.apache.sysml.runtime.controlprogram.context.ExecutionContext;
import org.apache.sysml.runtime.controlprogram.parfor.ProgramConverter;
import org.apache.sysml.runtime.controlprogram.parfor.util.IDSequence;
import org.apache.sysml.runtime.instructions.Instruction;
import org.apache.sysml.runtime.instructions.InstructionUtils;
import org.apache.sysml.runtime.io.WriterMatrixMarket;
import org.apache.sysml.runtime.io.WriterTextCSV;
import org.apache.sysml.runtime.matrix.MatrixCharacteristics;
import org.apache.sysml.runtime.matrix.MatrixFormatMetaData;
import org.apache.sysml.runtime.matrix.MetaData;
import org.apache.sysml.runtime.matrix.data.CSVFileFormatProperties;
import org.apache.sysml.runtime.matrix.data.FileFormatProperties;
import org.apache.sysml.runtime.matrix.data.InputInfo;
import org.apache.sysml.runtime.matrix.data.MatrixBlock;
import org.apache.sysml.runtime.matrix.data.OutputInfo;
import org.apache.sysml.runtime.util.MapReduceTool;
import org.apache.sysml.runtime.util.UtilFunctions;
public class VariableCPInstruction extends CPInstruction
{
/*
* Supported Operations
* --------------------
* 1) assignvar x:type y:type
* assign value of y to x (both types should match)
* 2) rmvar x
* remove variable x
* 3) cpvar x y
* copy x to y (same as assignvar followed by rmvar, types are not required)
* 4) rmfilevar x:type b:type
* remove variable x, and if b=true then the file object associated with x (b's type should be boolean)
* 5) assignvarwithfile FN x
* assign x with the first value from the file whose name=FN
* 6) attachfiletovar FP x
* allocate a new file object with name FP, and associate it with variable x
* createvar x FP [dimensions] [formatinfo]
*/
private enum VariableOperationCode
{
CreateVariable,
AssignVariable,
CopyVariable,
MoveVariable,
RemoveVariable,
RemoveVariableAndFile,
CastAsScalarVariable,
CastAsMatrixVariable,
CastAsDoubleVariable,
CastAsIntegerVariable,
CastAsBooleanVariable,
Write,
Read,
SetFileName,
}
private static IDSequence _uniqueVarID;
private static final int CREATEVAR_FILE_NAME_VAR_POS=3;
private VariableOperationCode opcode;
private CPOperand input1;
private CPOperand input2;
private CPOperand input3;
private CPOperand output;
private MetaData metadata;
// CSV related members (used only in createvar instructions)
private FileFormatProperties formatProperties;
static {
_uniqueVarID = new IDSequence(true);
}
private static VariableOperationCode getVariableOperationCode ( String str ) throws DMLUnsupportedOperationException {
if ( str.equalsIgnoreCase("createvar"))
return VariableOperationCode.CreateVariable;
else if ( str.equalsIgnoreCase("assignvar"))
return VariableOperationCode.AssignVariable;
else if ( str.equalsIgnoreCase("cpvar"))
return VariableOperationCode.CopyVariable;
else if ( str.equalsIgnoreCase("mvvar"))
return VariableOperationCode.MoveVariable;
else if ( str.equalsIgnoreCase("rmvar") )
return VariableOperationCode.RemoveVariable;
else if ( str.equalsIgnoreCase("rmfilevar") )
return VariableOperationCode.RemoveVariableAndFile;
else if ( str.equalsIgnoreCase(UnaryCP.CAST_AS_SCALAR_OPCODE) )
return VariableOperationCode.CastAsScalarVariable;
else if ( str.equalsIgnoreCase(UnaryCP.CAST_AS_MATRIX_OPCODE) )
return VariableOperationCode.CastAsMatrixVariable;
else if ( str.equalsIgnoreCase(UnaryCP.CAST_AS_DOUBLE_OPCODE) )
return VariableOperationCode.CastAsDoubleVariable;
else if ( str.equalsIgnoreCase(UnaryCP.CAST_AS_INT_OPCODE) )
return VariableOperationCode.CastAsIntegerVariable;
else if ( str.equalsIgnoreCase(UnaryCP.CAST_AS_BOOLEAN_OPCODE) )
return VariableOperationCode.CastAsBooleanVariable;
else if ( str.equalsIgnoreCase("write") )
return VariableOperationCode.Write;
else if ( str.equalsIgnoreCase("read") )
return VariableOperationCode.Read;
else if ( str.equalsIgnoreCase("setfilename") )
return VariableOperationCode.SetFileName;
else
throw new DMLUnsupportedOperationException("Invalid function: " + str);
}
// Checks if this instructon is a remove instruction for varName
public boolean isRemoveVariable(String varName) {
if ( opcode == VariableOperationCode.RemoveVariable || opcode == VariableOperationCode.RemoveVariableAndFile) {
if ( input1.getName().equalsIgnoreCase(varName))
return true;
}
return false;
}
public boolean isRemoveVariable() {
if ( opcode == VariableOperationCode.RemoveVariable || opcode == VariableOperationCode.RemoveVariableAndFile) {
return true;
}
return false;
}
public VariableCPInstruction (VariableOperationCode op, CPOperand in1, CPOperand in2, CPOperand in3, CPOperand out, int _arity, String sopcode, String istr )
{
super(sopcode, istr);
_cptype = CPINSTRUCTION_TYPE.Variable;
opcode = op;
input1 = in1;
input2 = in2;
input3 = in3;
output = out;
formatProperties = null;
}
// This version of the constructor is used only in case of CreateVariable
public VariableCPInstruction (VariableOperationCode op, CPOperand in1, CPOperand in2, CPOperand in3, MetaData md, int _arity, String sopcode, String istr)
{
this(op, in1, in2, in3, (CPOperand)null, _arity, sopcode, istr);
metadata = md;
}
// This version of the constructor is used only in case of CreateVariable
public VariableCPInstruction (VariableOperationCode op, CPOperand in1, CPOperand in2, CPOperand in3, MetaData md, int _arity, FileFormatProperties formatProperties, String sopcode, String istr)
{
this(op, in1, in2, in3, (CPOperand)null, _arity, sopcode, istr);
metadata = md;
this.formatProperties = formatProperties;
}
public FileFormatProperties getFormatProperties() {
return formatProperties;
}
public void setFormatProperties(FileFormatProperties prop) {
formatProperties = prop;
}
public CPOperand getInput1() {
return input1;
}
public CPOperand getInput2() {
return input2;
}
public CPOperand getInput3() {
return input3;
}
public String getOutputVariableName(){
String ret = null;
if( output != null )
ret = output.getName();
return ret;
}
private static int getArity(VariableOperationCode op) {
switch(op) {
case RemoveVariable:
return 1;
case Write:
case SetFileName:
return 3;
default:
return 2;
}
}
public static VariableCPInstruction parseInstruction ( String str )
throws DMLRuntimeException, DMLUnsupportedOperationException
{
String[] parts = InstructionUtils.getInstructionPartsWithValueType ( str );
String opcode = parts[0];
VariableOperationCode voc = getVariableOperationCode(opcode);
int _arity = -1;
if ( voc == VariableOperationCode.CreateVariable ){
if ( parts.length < 5 ) //&& parts.length != 10 )
throw new DMLRuntimeException("Invalid number of operands in createvar instruction: " + str);
}
else if ( voc == VariableOperationCode.MoveVariable) {
// mvvar tempA A; or mvvar mvar5 "data/out.mtx" "binary"
if ( parts.length !=3 && parts.length != 4)
throw new DMLRuntimeException("Invalid number of operands in mvvar instruction: " + str);
}
else if ( voc == VariableOperationCode.Write ) {
// All write instructions have 3 parameters, except in case of delimited/csv file.
// Write instructions for csv files also include three additional parameters (hasHeader, delimiter, sparse)
if ( parts.length != 4 && parts.length != 7 )
throw new DMLRuntimeException("Invalid number of operands in createvar instruction: " + str);
}
else {
_arity = getArity(voc);
InstructionUtils.checkNumFields ( parts, _arity ); // no output
}
CPOperand in1=null, in2=null, in3=null, out=null;
switch (voc) {
case CreateVariable:
// variable name (only supports Matrices, and only w/ double value type)
in1 = new CPOperand(parts[1], ValueType.DOUBLE, DataType.MATRIX);
// file name
in2 = new CPOperand(parts[2], ValueType.STRING, DataType.SCALAR);
// file name override flag
in3 = new CPOperand(parts[3], ValueType.BOOLEAN, DataType.SCALAR);
// format
String fmt = parts[4];
if ( fmt.equalsIgnoreCase("csv") ) {
/*
* Cretevar instructions for CSV format either has 13 or 14 inputs.
* 13 inputs: createvar corresponding to WRITE -- includes properties hasHeader, delim, and sparse
* 14 inputs: createvar corresponding to READ -- includes properties hasHeader, delim, fill, and fillValue
*/
if ( parts.length < 13 || parts.length > 15 )
throw new DMLRuntimeException("Invalid number of operands in createvar instruction: " + str);
}
else {
if ( parts.length != 5 && parts.length != 10 )
throw new DMLRuntimeException("Invalid number of operands in createvar instruction: " + str);
}
OutputInfo oi = OutputInfo.stringToOutputInfo(fmt);
InputInfo ii = OutputInfo.getMatchingInputInfo(oi);
MatrixCharacteristics mc = new MatrixCharacteristics();
if ( parts.length == 5 ) {
// do nothing
;
}
else if ( parts.length >= 10 ) {
// matrix characteristics
mc.setDimension(Long.parseLong(parts[5]), Long.parseLong(parts[6]));
mc.setBlockSize(Integer.parseInt(parts[7]), Integer.parseInt(parts[8]));
mc.setNonZeros(Long.parseLong(parts[9]));
}
else {
throw new DMLRuntimeException("Invalid number of operands in createvar instruction: " + str);
}
MatrixFormatMetaData iimd = new MatrixFormatMetaData(mc, oi, ii);
if ( fmt.equalsIgnoreCase("csv") ) {
/*
* Cretevar instructions for CSV format either has 13 or 14 inputs.
* 13 inputs: createvar corresponding to WRITE -- includes properties hasHeader, delim, and sparse
* 14 inputs: createvar corresponding to READ -- includes properties hasHeader, delim, fill, and fillValue
*/
FileFormatProperties fmtProperties = null;
if ( parts.length == 13 ) {
boolean hasHeader = Boolean.parseBoolean(parts[10]);
String delim = parts[11];
boolean sparse = Boolean.parseBoolean(parts[12]);
fmtProperties = new CSVFileFormatProperties(hasHeader, delim, sparse) ;
}
else {
boolean hasHeader = Boolean.parseBoolean(parts[10]);
String delim = parts[11];
boolean fill = Boolean.parseBoolean(parts[12]);
double fillValue = UtilFunctions.parseToDouble(parts[13]);
String naStrings = null;
if ( parts.length == 15 )
naStrings = parts[14];
fmtProperties = new CSVFileFormatProperties(hasHeader, delim, fill, fillValue, naStrings) ;
}
return new VariableCPInstruction(VariableOperationCode.CreateVariable, in1, in2, in3, iimd, parts.length, fmtProperties, opcode, str);
}
else {
return new VariableCPInstruction(VariableOperationCode.CreateVariable, in1, in2, in3, iimd, parts.length, opcode, str);
}
case AssignVariable:
in1 = new CPOperand(parts[1]);
in2 = new CPOperand(parts[2]);
//if ( in1.getValueType() != in2.getValueType() )
// throw new DMLRuntimeException("Value type mismatch while assigning variables ("+in1.getValueType()+", "+in2.getValueType()+").");
break;
case CopyVariable:
// Value types are not given here
in1 = new CPOperand(parts[1], ValueType.UNKNOWN, DataType.UNKNOWN);
in2 = new CPOperand(parts[2], ValueType.UNKNOWN, DataType.UNKNOWN);
break;
case MoveVariable:
in1 = new CPOperand(parts[1], ValueType.UNKNOWN, DataType.UNKNOWN);
in2 = new CPOperand(parts[2], ValueType.UNKNOWN, DataType.UNKNOWN);
if(parts.length > 3)
in3 = new CPOperand(parts[3], ValueType.UNKNOWN, DataType.UNKNOWN);
break;
case RemoveVariable:
in1 = new CPOperand(parts[1], ValueType.UNKNOWN, DataType.SCALAR);
break;
case RemoveVariableAndFile:
in1 = new CPOperand(parts[1]);
in2 = new CPOperand(parts[2]);
// second argument must be a boolean
if ( in2.getValueType() != ValueType.BOOLEAN)
throw new DMLRuntimeException("Unexpected value type for second argument in: " + str);
break;
case CastAsScalarVariable:
case CastAsMatrixVariable:
case CastAsDoubleVariable:
case CastAsIntegerVariable:
case CastAsBooleanVariable:
in1 = new CPOperand(parts[1]); // first operand is a variable name => string value type
out = new CPOperand(parts[2]); // output variable name
break;
case Write:
in1 = new CPOperand(parts[1]);
in2 = new CPOperand(parts[2]);
in3 = new CPOperand(parts[3]);
VariableCPInstruction inst = new VariableCPInstruction(getVariableOperationCode(opcode), in1, in2, in3, out, _arity, opcode, str);
if ( in3.getName().equalsIgnoreCase("csv") ) {
boolean hasHeader = Boolean.parseBoolean(parts[4]);
String delim = parts[5];
boolean sparse = Boolean.parseBoolean(parts[6]);
FileFormatProperties formatProperties = new CSVFileFormatProperties(hasHeader, delim, sparse);
inst.setFormatProperties(formatProperties);
}
return inst;
case Read:
in1 = new CPOperand(parts[1]);
in2 = new CPOperand(parts[2]);
out = null;
break;
case SetFileName:
in1 = new CPOperand(parts[1]); // variable name
in2 = new CPOperand(parts[2], ValueType.UNKNOWN, DataType.UNKNOWN); // file name
in3 = new CPOperand(parts[3], ValueType.UNKNOWN, DataType.UNKNOWN); // option: remote or local
//return new VariableCPInstruction(getVariableOperationCode(opcode), in1, in2, in3, str);
break;
}
return new VariableCPInstruction(getVariableOperationCode(opcode), in1, in2, in3, out, _arity, opcode, str);
}
@Override
public void processInstruction(ExecutionContext ec)
throws DMLRuntimeException, DMLUnsupportedOperationException
{
switch ( opcode )
{
case CreateVariable:
if ( input1.getDataType() == DataType.MATRIX ) {
//create new variable for symbol table and cache
//(existing objects gets cleared through rmvar instructions)
String fname = input2.getName();
// check if unique filename needs to be generated
boolean overrideFileName = ((BooleanObject) ec.getScalarInput(input3.getName(), input3.getValueType(), true)).getBooleanValue();; //!(input1.getName().startsWith("p")); //
if ( overrideFileName ) {
fname = fname + "_" + _uniqueVarID.getNextID();
}
MatrixObject mobj = new MatrixObject(input1.getValueType(), fname );
mobj.setVarName(input1.getName());
mobj.setDataType(DataType.MATRIX);
//clone meta data because it is updated on copy-on-write, otherwise there
//is potential for hidden side effects between variables.
mobj.setMetaData((MetaData)metadata.clone());
mobj.setFileFormatProperties(formatProperties);
ec.setVariable(input1.getName(), mobj);
}
else if ( input1.getDataType() == DataType.SCALAR ){
ScalarObject sobj = null;
ec.setScalarOutput(input1.getName(), sobj);
}
else {
throw new DMLRuntimeException("Unexpected data type: " + input1.getDataType());
}
break;
case AssignVariable:
// assign value of variable to the other
ec.setScalarOutput(input2.getName(), ec.getScalarInput(input1.getName(), input1.getValueType(), input1.isLiteral()));
break;
case CopyVariable:
processCopyInstruction(ec);
break;
case MoveVariable:
processMoveInstruction(ec);
break;
case RemoveVariable:
processRemoveVariableInstruction(ec, input1.getName());
break;
case RemoveVariableAndFile:
// Remove the variable from HashMap _variables, and possibly delete the data on disk.
boolean del = ( (BooleanObject) ec.getScalarInput(input2.getName(), input2.getValueType(), true) ).getBooleanValue();
MatrixObject m = (MatrixObject) ec.removeVariable(input1.getName());
if ( !del ) {
// HDFS file should be retailed after clearData(),
// therefore data must be exported if dirty flag is set
if ( m.isDirty() )
m.exportData();
}
else {
//throw new DMLRuntimeException("rmfilevar w/ true is not expected! " + instString);
//cleanDataOnHDFS(pb, input1.getName());
cleanDataOnHDFS( m );
}
// check if in-memory object can be cleaned up
if ( !ec.getVariables().hasReferences(m) ) {
// no other variable in the symbol table points to the same Data object as that of input1.getName()
//remove matrix object from cache
m.clearData();
}
break;
case CastAsScalarVariable: //castAsScalarVariable
MatrixBlock mBlock = ec.getMatrixInput(input1.getName());
if( mBlock.getNumRows()!=1 || mBlock.getNumColumns()!=1 )
throw new DMLRuntimeException("Dimension mismatch - unable to cast matrix '"+input1.getName()+"' of dimension ("+mBlock.getNumRows()+" x "+mBlock.getNumColumns()+") to scalar.");
double value = mBlock.getValue(0,0);
ec.releaseMatrixInput(input1.getName());
ec.setScalarOutput(output.getName(), new DoubleObject(value));
break;
case CastAsMatrixVariable:{
ScalarObject scalarInput = ec.getScalarInput(input1.getName(), input1.getValueType(), input1.isLiteral());
MatrixBlock out = new MatrixBlock(1,1,false);
out.quickSetValue(0, 0, scalarInput.getDoubleValue());
ec.setMatrixOutput(output.getName(), out);
break;
}
case CastAsDoubleVariable:{
ScalarObject scalarInput = ec.getScalarInput(input1.getName(), input1.getValueType(), input1.isLiteral());
ec.setScalarOutput(output.getName(), new DoubleObject(scalarInput.getDoubleValue()));
break;
}
case CastAsIntegerVariable:{
ScalarObject scalarInput = ec.getScalarInput(input1.getName(), input1.getValueType(), input1.isLiteral());
ec.setScalarOutput(output.getName(), new IntObject(scalarInput.getLongValue()));
break;
}
case CastAsBooleanVariable:{
ScalarObject scalarInput = ec.getScalarInput(input1.getName(), input1.getValueType(), input1.isLiteral());
ec.setScalarOutput(output.getName(), new BooleanObject(scalarInput.getBooleanValue()));
break;
}
case Read:
ScalarObject res = null;
try {
switch(input1.getValueType()) {
case DOUBLE:
double d = MapReduceTool.readDoubleFromHDFSFile(input2.getName());
res = (ScalarObject) new DoubleObject(d);
break;
case INT:
long i = MapReduceTool.readIntegerFromHDFSFile(input2.getName());
res = (ScalarObject) new IntObject(i);
break;
case BOOLEAN:
boolean b = MapReduceTool.readBooleanFromHDFSFile(input2.getName());
res = (ScalarObject) new BooleanObject(b);
break;
case STRING:
String s = MapReduceTool.readStringFromHDFSFile(input2.getName());
res = (ScalarObject) new StringObject(s);
break;
default:
throw new DMLRuntimeException("Invalid value type (" + input1.getValueType() + ") while processing readScalar instruction.");
}
} catch ( IOException e ) {
throw new DMLRuntimeException(e);
}
ec.setScalarOutput(input1.getName(), res);
break;
case Write:
processWriteInstruction(ec);
break;
case SetFileName:
Data data = ec.getVariable(input1.getName());
if ( data.getDataType() == DataType.MATRIX ) {
if ( input3.getName().equalsIgnoreCase("remote") ) {
((MatrixObject)data).setFileName(input2.getName());
}
else {
throw new DMLRuntimeException("Invalid location (" + input3.getName() + ") in SetFileName instruction: " + instString);
}
} else{
throw new DMLRuntimeException("Invalid data type (" + input1.getDataType() + ") in SetFileName instruction: " + instString);
}
break;
default:
throw new DMLRuntimeException("Unknown opcode: " + opcode );
}
}
/**
* Handler for mvvar instructions.
* Example: mvvar
* Move the file pointed by srcvar to destFile.
* Currently, applicable only when format=binaryblock.
*
* @param ec
* @throws DMLRuntimeException
*/
private void processMoveInstruction(ExecutionContext ec) throws DMLRuntimeException {
if ( input3 == null ) {
// example: mvvar tempA A
// get source variable
Data srcData = ec.getVariable(input1.getName());
if ( srcData == null )
throw new DMLRuntimeException("Unexpected error: could not find a data object for variable name:" + input1.getName() + ", while processing instruction " +this.toString());
// remove existing variable bound to target name
Data tgt = ec.removeVariable(input2.getName());
//cleanup matrix data on fs/hdfs (if necessary)
if ( tgt != null && tgt instanceof MatrixObject ) {
ec.cleanupMatrixObject((MatrixObject) tgt);
}
// do the actual move
ec.setVariable(input2.getName(), srcData);
ec.removeVariable(input1.getName());
}
else {
// example instruction: mvvar
if ( ec.getVariable(input1.getName()) == null )
throw new DMLRuntimeException("Unexpected error: could not find a data object for variable name:" + input1.getName() + ", while processing instruction " +this.toString());
MatrixObject mo = (MatrixObject) ec.getVariable(input1.getName());
if ( input3.getName().equalsIgnoreCase("binaryblock") ) {
boolean success = mo.moveData(input2.getName(), input3.getName());
if (!success) {
throw new DMLRuntimeException("Failed to move var " + input1.getName() + " to file " + input2.getName() + ".");
}
}
else
throw new DMLRuntimeException("Unexpected formats while copying: from blocks ["
+ mo.getNumRowsPerBlock() + "," + mo.getNumColumnsPerBlock() + "] to " + input3.getName());
}
}
/**
* Handler for cpvar instructions.
* Example: cpvar
*
* @param ec
* @throws DMLRuntimeException
*/
private void processCopyInstruction(ExecutionContext ec) throws DMLRuntimeException {
// get source variable
Data dd = ec.getVariable(input1.getName());
if ( dd == null )
throw new DMLRuntimeException("Unexpected error: could not find a data object for variable name:" + input1.getName() + ", while processing instruction " +this.toString());
// remove existing variable bound to target name
Data input2_data = ec.removeVariable(input2.getName());
//cleanup matrix data on fs/hdfs (if necessary)
if ( input2_data != null && input2_data instanceof MatrixObject ) {
ec.cleanupMatrixObject((MatrixObject) input2_data);
}
// do the actual copy!
ec.setVariable(input2.getName(), dd);
}
/**
* Handler for write instructions.
*
* Non-native formats like MM and CSV are handled through specialized helper functions.
* The default behavior is to write out the specified matrix from the instruction, in
* the format given by the corresponding symbol table entry.
*
* @throws DMLRuntimeException
*/
private void processWriteInstruction(ExecutionContext ec)
throws DMLRuntimeException
{
//get filename (literal or variable expression)
String fname = ec.getScalarInput(input2.getName(), ValueType.STRING, input2.isLiteral()).getStringValue();
if ( input1.getDataType() == DataType.SCALAR ) {
writeScalarToHDFS(ec, fname);
}
else
{
String outFmt = input3.getName();
if (outFmt.equalsIgnoreCase("matrixmarket")) {
writeMMFile(ec, fname);
}
else if (outFmt.equalsIgnoreCase("csv") ) {
writeCSVFile(ec, fname);
}
else {
// Default behavior
MatrixObject mo = (MatrixObject)ec.getVariable(input1.getName());
mo.exportData(fname, outFmt);
}
}
}
/**
* Remove variable instruction externalized as a static function in order to allow various
* cleanup procedures to use the same codepath as the actual rmVar instruction
*
* @param ec
* @param varname
* @throws DMLRuntimeException
*/
public static void processRemoveVariableInstruction( ExecutionContext ec, String varname )
throws DMLRuntimeException
{
// remove variable from symbol table
Data input1_data = ec.removeVariable(varname);
if ( input1_data == null )
throw new DMLRuntimeException("Unexpected error: could not find a data object for variable name:" + varname + ", while processing rmvar instruction.");
//cleanup matrix data on fs/hdfs (if necessary)
if ( input1_data instanceof MatrixObject ) {
ec.cleanupMatrixObject( (MatrixObject) input1_data );
}
}
/**
* Helper function to write CSV files to HDFS.
*
* @param ec
* @throws DMLRuntimeException
*/
private void writeCSVFile(ExecutionContext ec, String fname)
throws DMLRuntimeException
{
MatrixObject mo = (MatrixObject)ec.getVariable(input1.getName());
String outFmt = "csv";
if(mo.isDirty()) {
// there exist data computed in CP that is not backed up on HDFS
// i.e., it is either in-memory or in evicted space
mo.exportData(fname, outFmt, formatProperties);
}
else {
try {
OutputInfo oi = ((MatrixFormatMetaData)mo.getMetaData()).getOutputInfo();
MatrixCharacteristics mc = ((MatrixFormatMetaData)mo.getMetaData()).getMatrixCharacteristics();
if(oi == OutputInfo.CSVOutputInfo) {
WriterTextCSV writer = new WriterTextCSV((CSVFileFormatProperties)formatProperties);
writer.addHeaderToCSV(mo.getFileName(), fname, mc.getRows(), mc.getCols());
}
else if ( oi == OutputInfo.BinaryBlockOutputInfo || oi == OutputInfo.TextCellOutputInfo ) {
mo.exportData(fname, outFmt, formatProperties);
}
else {
throw new DMLRuntimeException("Unexpected data format (" + OutputInfo.outputInfoToString(oi) + "): can not export into CSV format.");
}
// Write Metadata file
MapReduceTool.writeMetaDataFile (fname + ".mtd", mo.getValueType(), mc, OutputInfo.CSVOutputInfo, formatProperties);
} catch (IOException e) {
throw new DMLRuntimeException(e);
}
}
}
/**
* Helper function to write MM files to HDFS.
* @param ec
* @throws DMLRuntimeException
*/
private void writeMMFile(ExecutionContext ec, String fname)
throws DMLRuntimeException
{
MatrixObject mo = (MatrixObject)ec.getVariable(input1.getName());
String outFmt = "matrixmarket";
if(mo.isDirty()) {
// there exist data computed in CP that is not backed up on HDFS
// i.e., it is either in-memory or in evicted space
mo.exportData(fname, outFmt);
}
else {
OutputInfo oi = ((MatrixFormatMetaData)mo.getMetaData()).getOutputInfo();
MatrixCharacteristics mc = mo.getMatrixCharacteristics();
if(oi == OutputInfo.TextCellOutputInfo) {
try {
WriterMatrixMarket writer = new WriterMatrixMarket();
writer.mergeTextcellToMatrixMarket(mo.getFileName(), fname, mc.getRows(), mc.getCols(), mc.getNonZeros());
} catch (IOException e) {
throw new DMLRuntimeException(e);
}
}
else if ( oi == OutputInfo.BinaryBlockOutputInfo) {
mo.exportData(fname, outFmt);
}
else {
throw new DMLRuntimeException("Unexpected data format (" + OutputInfo.outputInfoToString(oi) + "): can not export into MatrixMarket format.");
}
}
}
/**
* Helper function to write scalars to HDFS based on its value type.
* @throws DMLRuntimeException
*/
private void writeScalarToHDFS(ExecutionContext ec, String fname)
throws DMLRuntimeException
{
ScalarObject scalar = ec.getScalarInput(input1.getName(), input1.getValueType(), input1.isLiteral());
try {
switch ( input1.getValueType() ) {
case DOUBLE:
MapReduceTool.writeDoubleToHDFS(scalar.getDoubleValue(), fname);
break;
case INT:
MapReduceTool.writeIntToHDFS(scalar.getLongValue(), fname);
break;
case BOOLEAN:
MapReduceTool.writeBooleanToHDFS(scalar.getBooleanValue(), fname);
break;
case STRING:
MapReduceTool.writeStringToHDFS(scalar.getStringValue(), fname);
break;
default:
throw new DMLRuntimeException("Invalid value type (" + input1.getValueType() + ") in writeScalar instruction: " + instString);
}
// write out .mtd file
MapReduceTool.writeScalarMetaDataFile(fname +".mtd", input1.getValueType());
} catch ( IOException e ) {
throw new DMLRuntimeException(e);
}
}
private static void cleanDataOnHDFS(MatrixObject mo)
throws DMLRuntimeException
{
try {
String fpath = mo.getFileName();
if (fpath != null) {
MapReduceTool.deleteFileIfExistOnHDFS(fpath);
MapReduceTool.deleteFileIfExistOnHDFS(fpath + ".mtd");
}
} catch (IOException e) {
throw new DMLRuntimeException(e);
}
}
public static Instruction prepareRemoveInstruction(String varName) throws DMLRuntimeException, DMLUnsupportedOperationException {
StringBuilder sb = new StringBuilder();
sb.append("CP");
sb.append(Lop.OPERAND_DELIMITOR);
sb.append("rmvar");
sb.append(Lop.OPERAND_DELIMITOR);
sb.append(varName);
String str = sb.toString();
return parseInstruction(str);
}
public static Instruction prepareCopyInstruction(String srcVar, String destVar) throws DMLRuntimeException, DMLUnsupportedOperationException {
StringBuilder sb = new StringBuilder();
sb.append("CP");
sb.append(Lop.OPERAND_DELIMITOR);
sb.append("cpvar");
sb.append(Lop.OPERAND_DELIMITOR);
sb.append(srcVar);
sb.append(Lop.OPERAND_DELIMITOR);
sb.append(destVar);
String str = sb.toString();
return parseInstruction(str);
}
public static Instruction prepareMoveInstruction(String srcVar, String destFileName, String format) throws DMLRuntimeException, DMLUnsupportedOperationException {
StringBuilder sb = new StringBuilder();
sb.append("CP");
sb.append(Lop.OPERAND_DELIMITOR);
sb.append("mvvar");
sb.append(Lop.OPERAND_DELIMITOR);
sb.append(srcVar);
sb.append(Lop.OPERAND_DELIMITOR);
sb.append(destFileName);
sb.append(Lop.OPERAND_DELIMITOR);
sb.append(format);
String str = sb.toString();
return parseInstruction(str);
}
public static Instruction prepareMoveInstruction(String srcVar, String destVar) throws DMLRuntimeException, DMLUnsupportedOperationException {
// example: mvvar tempA A
// (instead of two instructions -- cpvar tempA A; rmvar tempA)
StringBuilder sb = new StringBuilder();
sb.append("CP");
sb.append(Lop.OPERAND_DELIMITOR);
sb.append("mvvar");
sb.append(Lop.OPERAND_DELIMITOR);
sb.append(srcVar);
sb.append(Lop.OPERAND_DELIMITOR);
sb.append(destVar);
String str = sb.toString();
return parseInstruction(str);
}
private static String getBasicCreateVarString(String varName, String fileName, boolean fNameOverride, String format) {
StringBuilder sb = new StringBuilder();
sb.append("CP");
sb.append(Lop.OPERAND_DELIMITOR);
sb.append("createvar");
sb.append(Lop.OPERAND_DELIMITOR);
sb.append(varName);
sb.append(Lop.OPERAND_DELIMITOR);
sb.append(fileName); // Constant CREATEVAR_FILE_NAME_VAR_POS is used to find a position of filename within a string generated through this function.
// If this position of filename within this string changes then constant CREATEVAR_FILE_NAME_VAR_POS to be updated.
sb.append(Lop.OPERAND_DELIMITOR);
sb.append(fNameOverride);
sb.append(Lop.OPERAND_DELIMITOR);
sb.append(format);
return sb.toString();
}
public static Instruction prepareCreateVariableInstruction(String varName, String fileName, boolean fNameOverride, String format) throws DMLRuntimeException, DMLUnsupportedOperationException {
return parseInstruction(getBasicCreateVarString(varName, fileName, fNameOverride, format));
}
public static Instruction prepareCreateVariableInstruction(String varName, String fileName, boolean fNameOverride, String format, MatrixCharacteristics mc) throws DMLRuntimeException, DMLUnsupportedOperationException {
StringBuilder sb = new StringBuilder();
sb.append(getBasicCreateVarString(varName, fileName, fNameOverride, format));
sb.append(Lop.OPERAND_DELIMITOR);
sb.append(mc.getRows());
sb.append(Lop.OPERAND_DELIMITOR);
sb.append(mc.getCols());
sb.append(Lop.OPERAND_DELIMITOR);
sb.append(mc.getRowsPerBlock());
sb.append(Lop.OPERAND_DELIMITOR);
sb.append(mc.getColsPerBlock());
sb.append(Lop.OPERAND_DELIMITOR);
sb.append(mc.getNonZeros());
String str = sb.toString();
return parseInstruction(str);
}
public static Instruction prepareCreateVariableInstruction(String varName, String fileName, boolean fNameOverride, String format, MatrixCharacteristics mc, boolean hasHeader, String delim, boolean sparse) throws DMLRuntimeException, DMLUnsupportedOperationException {
StringBuilder sb = new StringBuilder();
sb.append(getBasicCreateVarString(varName, fileName, fNameOverride, format));
sb.append(Lop.OPERAND_DELIMITOR);
sb.append(mc.getRows());
sb.append(Lop.OPERAND_DELIMITOR);
sb.append(mc.getCols());
sb.append(Lop.OPERAND_DELIMITOR);
sb.append(mc.getRowsPerBlock());
sb.append(Lop.OPERAND_DELIMITOR);
sb.append(mc.getColsPerBlock());
sb.append(Lop.OPERAND_DELIMITOR);
sb.append(mc.getNonZeros());
sb.append(Lop.OPERAND_DELIMITOR);
sb.append(hasHeader);
sb.append(Lop.OPERAND_DELIMITOR);
sb.append(delim);
sb.append(Lop.OPERAND_DELIMITOR);
sb.append(sparse);
String str = sb.toString();
return parseInstruction(str);
}
@Override
public void updateInstructionThreadID(String pattern, String replace)
throws DMLRuntimeException
{
if( opcode == VariableOperationCode.CreateVariable
|| opcode == VariableOperationCode.SetFileName )
{
//replace in-memory instruction
input2.set_name(input2.getName().replaceAll(pattern, replace));
// Find a start position of file name string.
int iPos = StringUtils.ordinalIndexOf(instString, Lop.OPERAND_DELIMITOR, CREATEVAR_FILE_NAME_VAR_POS);
// Find a end position of file name string.
int iPos2 = StringUtils.indexOf(instString, Lop.OPERAND_DELIMITOR, iPos+1);
StringBuilder sb = new StringBuilder();
sb.append(instString.substring(0,iPos+1)); // It takes first part before file name.
// This will replace 'pattern' with 'replace' string from file name.
sb.append(ProgramConverter.saveReplaceFilenameThreadID(instString.substring(iPos+1, iPos2+1), pattern, replace));
sb.append(instString.substring(iPos2+1)); // It takes last part after file name.
instString = sb.toString();
}
}
/**
*
* @return
*/
public boolean isVariableCastInstruction()
{
return ( opcode == VariableOperationCode.CastAsScalarVariable ||
opcode == VariableOperationCode.CastAsMatrixVariable ||
opcode == VariableOperationCode.CastAsIntegerVariable ||
opcode == VariableOperationCode.CastAsDoubleVariable ||
opcode == VariableOperationCode.CastAsBooleanVariable );
}
}