org.apache.sysml.parser.ParameterizedBuiltinFunctionExpression Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of systemml Show documentation
Show all versions of systemml Show documentation
Declarative Machine Learning
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.sysml.parser;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import org.apache.sysml.hops.Hop.ParamBuiltinOp;
import org.apache.sysml.hops.OptimizerUtils;
import org.apache.sysml.parser.LanguageException.LanguageErrorCodes;
public class ParameterizedBuiltinFunctionExpression extends DataIdentifier
{
private ParameterizedBuiltinFunctionOp _opcode;
private HashMap _varParams;
public static final String TF_FN_PARAM_DATA = "target";
public static final String TF_FN_PARAM_MTD2 = "meta";
public static final String TF_FN_PARAM_SPEC = "spec";
public static final String TF_FN_PARAM_MTD = "transformPath"; //NOTE MB: for backwards compatibility
public static final String TF_FN_PARAM_APPLYMTD = "applyTransformPath";
public static final String TF_FN_PARAM_OUTNAMES = "outputNames";
private static HashMap opcodeMap;
static {
opcodeMap = new HashMap();
opcodeMap.put("aggregate", Expression.ParameterizedBuiltinFunctionOp.GROUPEDAGG);
opcodeMap.put("groupedAggregate", Expression.ParameterizedBuiltinFunctionOp.GROUPEDAGG);
opcodeMap.put("removeEmpty",Expression.ParameterizedBuiltinFunctionOp.RMEMPTY);
opcodeMap.put("replace", Expression.ParameterizedBuiltinFunctionOp.REPLACE);
opcodeMap.put("order", Expression.ParameterizedBuiltinFunctionOp.ORDER);
// Distribution Functions
opcodeMap.put("cdf", Expression.ParameterizedBuiltinFunctionOp.CDF);
opcodeMap.put("pnorm", Expression.ParameterizedBuiltinFunctionOp.PNORM);
opcodeMap.put("pt", Expression.ParameterizedBuiltinFunctionOp.PT);
opcodeMap.put("pf", Expression.ParameterizedBuiltinFunctionOp.PF);
opcodeMap.put("pchisq", Expression.ParameterizedBuiltinFunctionOp.PCHISQ);
opcodeMap.put("pexp", Expression.ParameterizedBuiltinFunctionOp.PEXP);
opcodeMap.put("icdf", Expression.ParameterizedBuiltinFunctionOp.INVCDF);
opcodeMap.put("qnorm", Expression.ParameterizedBuiltinFunctionOp.QNORM);
opcodeMap.put("qt", Expression.ParameterizedBuiltinFunctionOp.QT);
opcodeMap.put("qf", Expression.ParameterizedBuiltinFunctionOp.QF);
opcodeMap.put("qchisq", Expression.ParameterizedBuiltinFunctionOp.QCHISQ);
opcodeMap.put("qexp", Expression.ParameterizedBuiltinFunctionOp.QEXP);
// data transformation functions
opcodeMap.put("transform", Expression.ParameterizedBuiltinFunctionOp.TRANSFORM);
opcodeMap.put("transformapply", Expression.ParameterizedBuiltinFunctionOp.TRANSFORMAPPLY);
opcodeMap.put("transformdecode", Expression.ParameterizedBuiltinFunctionOp.TRANSFORMDECODE);
opcodeMap.put("transformencode", Expression.ParameterizedBuiltinFunctionOp.TRANSFORMENCODE);
opcodeMap.put("transformmeta", Expression.ParameterizedBuiltinFunctionOp.TRANSFORMMETA);
// toString
opcodeMap.put("toString", Expression.ParameterizedBuiltinFunctionOp.TOSTRING);
}
public static HashMap pbHopMap;
static {
pbHopMap = new HashMap();
pbHopMap.put(Expression.ParameterizedBuiltinFunctionOp.GROUPEDAGG, ParamBuiltinOp.GROUPEDAGG);
pbHopMap.put(Expression.ParameterizedBuiltinFunctionOp.RMEMPTY, ParamBuiltinOp.RMEMPTY);
pbHopMap.put(Expression.ParameterizedBuiltinFunctionOp.REPLACE, ParamBuiltinOp.REPLACE);
// For order, a ReorgOp is constructed with ReorgOp.SORT type
pbHopMap.put(Expression.ParameterizedBuiltinFunctionOp.ORDER, ParamBuiltinOp.INVALID);
// Distribution Functions
pbHopMap.put(Expression.ParameterizedBuiltinFunctionOp.CDF, ParamBuiltinOp.CDF);
pbHopMap.put(Expression.ParameterizedBuiltinFunctionOp.PNORM, ParamBuiltinOp.CDF);
pbHopMap.put(Expression.ParameterizedBuiltinFunctionOp.PT, ParamBuiltinOp.CDF);
pbHopMap.put(Expression.ParameterizedBuiltinFunctionOp.PF, ParamBuiltinOp.CDF);
pbHopMap.put(Expression.ParameterizedBuiltinFunctionOp.PCHISQ, ParamBuiltinOp.CDF);
pbHopMap.put(Expression.ParameterizedBuiltinFunctionOp.PEXP, ParamBuiltinOp.CDF);
pbHopMap.put(Expression.ParameterizedBuiltinFunctionOp.INVCDF, ParamBuiltinOp.INVCDF);
pbHopMap.put(Expression.ParameterizedBuiltinFunctionOp.QNORM, ParamBuiltinOp.INVCDF);
pbHopMap.put(Expression.ParameterizedBuiltinFunctionOp.QT, ParamBuiltinOp.INVCDF);
pbHopMap.put(Expression.ParameterizedBuiltinFunctionOp.QF, ParamBuiltinOp.INVCDF);
pbHopMap.put(Expression.ParameterizedBuiltinFunctionOp.QCHISQ, ParamBuiltinOp.INVCDF);
pbHopMap.put(Expression.ParameterizedBuiltinFunctionOp.QEXP, ParamBuiltinOp.INVCDF);
// toString
pbHopMap.put(Expression.ParameterizedBuiltinFunctionOp.TOSTRING, ParamBuiltinOp.TOSTRING);
}
public static ParameterizedBuiltinFunctionExpression getParamBuiltinFunctionExpression(String functionName, ArrayList paramExprsPassed,
String fileName, int blp, int bcp, int elp, int ecp){
if (functionName == null || paramExprsPassed == null)
return null;
Expression.ParameterizedBuiltinFunctionOp pbifop = opcodeMap.get(functionName);
if ( pbifop == null )
return null;
HashMap varParams = new HashMap();
for (ParameterExpression pexpr : paramExprsPassed)
varParams.put(pexpr.getName(), pexpr.getExpr());
ParameterizedBuiltinFunctionExpression retVal = new ParameterizedBuiltinFunctionExpression(pbifop,varParams,
fileName, blp, bcp, elp, ecp);
return retVal;
} // end method getBuiltinFunctionExpression
public ParameterizedBuiltinFunctionExpression(ParameterizedBuiltinFunctionOp op, HashMap varParams,
String filename, int blp, int bcp, int elp, int ecp) {
_kind = Kind.ParameterizedBuiltinFunctionOp;
_opcode = op;
_varParams = varParams;
this.setAllPositions(filename, blp, bcp, elp, ecp);
}
public Expression rewriteExpression(String prefix) throws LanguageException {
HashMap newVarParams = new HashMap();
for (String key : _varParams.keySet()){
Expression newExpr = _varParams.get(key).rewriteExpression(prefix);
newVarParams.put(key, newExpr);
}
ParameterizedBuiltinFunctionExpression retVal = new ParameterizedBuiltinFunctionExpression(_opcode, newVarParams,
this.getFilename(), this.getBeginLine(), this.getBeginColumn(), this.getEndLine(), this.getEndColumn());
return retVal;
}
public void setOpcode(ParameterizedBuiltinFunctionOp op) {
_opcode = op;
}
public ParameterizedBuiltinFunctionOp getOpCode() {
return _opcode;
}
public HashMap getVarParams() {
return _varParams;
}
public Expression getVarParam(String name) {
return _varParams.get(name);
}
public void addVarParam(String name, Expression value){
_varParams.put(name, value);
}
/**
* Validate parse tree : Process BuiltinFunction Expression in an assignment
* statement
*/
@Override
public void validateExpression(HashMap ids, HashMap constVars, boolean conditional)
throws LanguageException
{
// validate all input parameters
for ( String s : getVarParams().keySet() ) {
Expression paramExpr = getVarParam(s);
if (paramExpr instanceof FunctionCallIdentifier)
raiseValidateError("UDF function call not supported as parameter to built-in function call", false);
paramExpr.validateExpression(ids, constVars, conditional);
}
String outputName = getTempName();
DataIdentifier output = new DataIdentifier(outputName);
//output.setProperties(this.getFirstExpr().getOutput());
this.setOutput(output);
// IMPORTANT: for each operation, one must handle unnamed parameters
switch (this.getOpCode()) {
case GROUPEDAGG:
validateGroupedAgg(output, conditional);
break;
case CDF:
case INVCDF:
case PNORM:
case QNORM:
case PT:
case QT:
case PF:
case QF:
case PCHISQ:
case QCHISQ:
case PEXP:
case QEXP:
validateDistributionFunctions(output, conditional);
break;
case RMEMPTY:
validateRemoveEmpty(output, conditional);
break;
case REPLACE:
validateReplace(output, conditional);
break;
case ORDER:
validateOrder(output, conditional);
break;
case TRANSFORM:
validateTransform(output, conditional);
break;
case TRANSFORMAPPLY:
validateTransformApply(output, conditional);
break;
case TRANSFORMDECODE:
validateTransformDecode(output, conditional);
break;
case TRANSFORMMETA:
validateTransformMeta(output, conditional);
break;
case TOSTRING:
validateCastAsString(output, conditional);
break;
default: //always unconditional (because unsupported operation)
//handle common issue of transformencode
if( getOpCode()==ParameterizedBuiltinFunctionOp.TRANSFORMENCODE )
raiseValidateError("Parameterized function "+ getOpCode() +" requires a multi-assignment statement "
+ "for data and metadata.", false, LanguageErrorCodes.UNSUPPORTED_EXPRESSION);
else
raiseValidateError("Unsupported parameterized function "+ getOpCode(),
false, LanguageErrorCodes.UNSUPPORTED_EXPRESSION);
}
return;
}
@Override
public void validateExpression(MultiAssignmentStatement stmt, HashMap ids, HashMap constVars, boolean conditional)
throws LanguageException
{
// validate all input parameters
for ( String s : getVarParams().keySet() ) {
Expression paramExpr = getVarParam(s);
if (paramExpr instanceof FunctionCallIdentifier)
raiseValidateError("UDF function call not supported as parameter to built-in function call", false);
paramExpr.validateExpression(ids, constVars, conditional);
}
_outputs = new Identifier[stmt.getTargetList().size()];
int count = 0;
for (DataIdentifier outParam: stmt.getTargetList()){
DataIdentifier tmp = new DataIdentifier(outParam);
tmp.setAllPositions(this.getFilename(), this.getBeginLine(), this.getBeginColumn(), this.getEndLine(), this.getEndColumn());
_outputs[count++] = tmp;
}
switch (this.getOpCode()) {
case TRANSFORMENCODE:
DataIdentifier out1 = (DataIdentifier) getOutputs()[0];
DataIdentifier out2 = (DataIdentifier) getOutputs()[1];
validateTransformEncode(out1, out2, conditional);
break;
default: //always unconditional (because unsupported operation)
raiseValidateError("Unsupported parameterized function "+ getOpCode(), false, LanguageErrorCodes.INVALID_PARAMETERS);
}
return;
}
// example: A = transform(data=D, txmtd="", txspec="")
private void validateTransform(DataIdentifier output, boolean conditional)
throws LanguageException
{
//validate data
checkDataType("transform", TF_FN_PARAM_DATA, DataType.FRAME, conditional);
Expression txmtd = getVarParam(TF_FN_PARAM_MTD);
if( txmtd==null ) {
raiseValidateError("Named parameter '" + TF_FN_PARAM_MTD + "' missing. Please specify the transformation metadata file path.", conditional, LanguageErrorCodes.INVALID_PARAMETERS);
}
else if( txmtd.getOutput().getDataType() != DataType.SCALAR || txmtd.getOutput().getValueType() != ValueType.STRING ){
raiseValidateError("Transformation metadata file '" + TF_FN_PARAM_MTD + "' must be a string value (a scalar).", conditional, LanguageErrorCodes.INVALID_PARAMETERS);
}
Expression txspec = getVarParam(TF_FN_PARAM_SPEC);
Expression applyMTD = getVarParam(TF_FN_PARAM_APPLYMTD);
if( txspec==null ) {
if ( applyMTD == null )
raiseValidateError("Named parameter '" + TF_FN_PARAM_SPEC + "' missing. Please specify the transformation specification (JSON string).", conditional, LanguageErrorCodes.INVALID_PARAMETERS);
}
else if( txspec.getOutput().getDataType() != DataType.SCALAR || txspec.getOutput().getValueType() != ValueType.STRING ){
raiseValidateError("Transformation specification '" + TF_FN_PARAM_SPEC + "' must be a string value (a scalar).", conditional, LanguageErrorCodes.INVALID_PARAMETERS);
}
if ( applyMTD != null ) {
if( applyMTD.getOutput().getDataType() != DataType.SCALAR || applyMTD.getOutput().getValueType() != ValueType.STRING ){
raiseValidateError("Apply transformation metadata file'" + TF_FN_PARAM_APPLYMTD + "' must be a string value (a scalar).", conditional, LanguageErrorCodes.INVALID_PARAMETERS);
}
//NOTE: txspec can still be optionally specified; if specified it takes precedence over
// specification persisted in txmtd during transform.
}
Expression outNames = getVarParam(TF_FN_PARAM_OUTNAMES);
if ( outNames != null ) {
if( outNames.getOutput().getDataType() != DataType.SCALAR || outNames.getOutput().getValueType() != ValueType.STRING )
raiseValidateError("The parameter specifying column names in the output file '" + TF_FN_PARAM_MTD + "' must be a string value (a scalar).", conditional, LanguageErrorCodes.INVALID_PARAMETERS);
if ( applyMTD != null)
raiseValidateError("Only one of '" + TF_FN_PARAM_APPLYMTD + "' or '" + TF_FN_PARAM_OUTNAMES + "' can be specified in transform().", conditional, LanguageErrorCodes.INVALID_PARAMETERS);
}
// disable frame csv reblocks as transform operates directly over csv files
// (this is required to support both file-based transform and frame-based
// transform at the same time; hence, transform and frame-based transform
// functions over csv cannot be used in the same script; accordingly we
// give an appropriate warning)
OptimizerUtils.ALLOW_FRAME_CSV_REBLOCK = false;
raiseValidateError("Disable frame csv reblock to support file-based transform.", true);
// Output is a matrix with same dims as input
output.setDataType(DataType.MATRIX);
output.setFormatType(FormatType.CSV);
output.setValueType(ValueType.DOUBLE);
// Output dimensions may not be known at compile time, for example when dummycoding.
output.setDimensions(-1, -1);
}
// example: A = transformapply(target=X, meta=M, spec=s)
private void validateTransformApply(DataIdentifier output, boolean conditional)
throws LanguageException
{
//validate data / metadata (recode maps)
checkDataType("transformapply", TF_FN_PARAM_DATA, DataType.FRAME, conditional);
checkDataType("transformapply", TF_FN_PARAM_MTD2, DataType.FRAME, conditional);
//validate specification
checkDataValueType("transformapply", TF_FN_PARAM_SPEC, DataType.SCALAR, ValueType.STRING, conditional);
//set output dimensions
output.setDataType(DataType.MATRIX);
output.setValueType(ValueType.DOUBLE);
output.setDimensions(-1, -1);
}
private void validateTransformDecode(DataIdentifier output, boolean conditional)
throws LanguageException
{
//validate data / metadata (recode maps)
checkDataType("transformdecode", TF_FN_PARAM_DATA, DataType.MATRIX, conditional);
checkDataType("transformdecode", TF_FN_PARAM_MTD2, DataType.FRAME, conditional);
//validate specification
checkDataValueType("transformdecode", TF_FN_PARAM_SPEC, DataType.SCALAR, ValueType.STRING, conditional);
//set output dimensions
output.setDataType(DataType.FRAME);
output.setValueType(ValueType.STRING);
output.setDimensions(-1, -1);
}
private void validateTransformMeta(DataIdentifier output, boolean conditional)
throws LanguageException
{
//validate specification
checkDataValueType("transformmeta", TF_FN_PARAM_SPEC, DataType.SCALAR, ValueType.STRING, conditional);
//validate meta data path
checkDataValueType("transformmeta", TF_FN_PARAM_MTD, DataType.SCALAR, ValueType.STRING, conditional);
//set output dimensions
output.setDataType(DataType.FRAME);
output.setValueType(ValueType.STRING);
output.setDimensions(-1, -1);
}
private void validateTransformEncode(DataIdentifier output1, DataIdentifier output2, boolean conditional)
throws LanguageException
{
//validate data / metadata (recode maps)
checkDataType("transformencode", TF_FN_PARAM_DATA, DataType.FRAME, conditional);
//validate specification
checkDataValueType("transformencode", TF_FN_PARAM_SPEC, DataType.SCALAR, ValueType.STRING, conditional);
//set output dimensions
output1.setDataType(DataType.MATRIX);
output1.setValueType(ValueType.DOUBLE);
output1.setDimensions(-1, -1);
output2.setDataType(DataType.FRAME);
output2.setValueType(ValueType.STRING);
output2.setDimensions(-1, -1);
}
private void validateReplace(DataIdentifier output, boolean conditional) throws LanguageException {
//check existence and correctness of arguments
Expression target = getVarParam("target");
if( target==null ) {
raiseValidateError("Named parameter 'target' missing. Please specify the input matrix.", conditional, LanguageErrorCodes.INVALID_PARAMETERS);
}
else if( target.getOutput().getDataType() != DataType.MATRIX ){
raiseValidateError("Input matrix 'target' is of type '"+target.getOutput().getDataType()+"'. Please specify the input matrix.", conditional, LanguageErrorCodes.INVALID_PARAMETERS);
}
Expression pattern = getVarParam("pattern");
if( pattern==null ) {
raiseValidateError("Named parameter 'pattern' missing. Please specify the replacement pattern.", conditional, LanguageErrorCodes.INVALID_PARAMETERS);
}
else if( pattern.getOutput().getDataType() != DataType.SCALAR ){
raiseValidateError("Replacement pattern 'pattern' is of type '"+pattern.getOutput().getDataType()+"'. Please, specify a scalar replacement pattern.", conditional, LanguageErrorCodes.INVALID_PARAMETERS);
}
Expression replacement = getVarParam("replacement");
if( replacement==null ) {
raiseValidateError("Named parameter 'replacement' missing. Please specify the replacement value.", conditional, LanguageErrorCodes.INVALID_PARAMETERS);
}
else if( replacement.getOutput().getDataType() != DataType.SCALAR ){
raiseValidateError("Replacement value 'replacement' is of type '"+replacement.getOutput().getDataType()+"'. Please, specify a scalar replacement value.", conditional, LanguageErrorCodes.INVALID_PARAMETERS);
}
// Output is a matrix with same dims as input
output.setDataType(DataType.MATRIX);
output.setValueType(ValueType.DOUBLE);
output.setDimensions(target.getOutput().getDim1(), target.getOutput().getDim2());
}
private void validateOrder(DataIdentifier output, boolean conditional) throws LanguageException {
//check existence and correctness of arguments
Expression target = getVarParam("target"); //[MANDATORY] TARGET
if( target==null ) {
raiseValidateError("Named parameter 'target' missing. Please specify the input matrix.", conditional, LanguageErrorCodes.INVALID_PARAMETERS);
}
else if( target.getOutput().getDataType() != DataType.MATRIX ){
raiseValidateError("Input matrix 'target' is of type '"+target.getOutput().getDataType()+"'. Please specify the input matrix.", conditional, LanguageErrorCodes.INVALID_PARAMETERS);
}
//check for unsupported parameters
for(String param : getVarParams().keySet())
if( !(param.equals("target") || param.equals("by") || param.equals("decreasing") || param.equals("index.return")) )
raiseValidateError("Unsupported order parameter: '"+param+"'", false);
Expression orderby = getVarParam("by"); //[OPTIONAL] BY
if( orderby == null ) { //default first column, good fit for vectors
orderby = new IntIdentifier(1, "1", -1, -1, -1, -1);
addVarParam("by", orderby);
}
else if( orderby !=null && orderby.getOutput().getDataType() != DataType.SCALAR ){
raiseValidateError("Orderby column 'by' is of type '"+orderby.getOutput().getDataType()+"'. Please, specify a scalar order by column index.", conditional, LanguageErrorCodes.INVALID_PARAMETERS);
}
Expression decreasing = getVarParam("decreasing"); //[OPTIONAL] DECREASING
if( decreasing == null ) { //default: ascending
addVarParam("decreasing", new BooleanIdentifier(false, "false", -1, -1, -1, -1));
}
else if( decreasing!=null && decreasing.getOutput().getDataType() != DataType.SCALAR ){
raiseValidateError("Ordering 'decreasing' is of type '"+decreasing.getOutput().getDataType()+"', '"+decreasing.getOutput().getValueType()+"'. Please, specify 'decreasing' as a scalar boolean.", conditional, LanguageErrorCodes.INVALID_PARAMETERS);
}
Expression indexreturn = getVarParam("index.return"); //[OPTIONAL] DECREASING
if( indexreturn == null ) { //default: sorted data
indexreturn = new BooleanIdentifier(false, "false", -1, -1, -1, -1);
addVarParam("index.return", indexreturn);
}
else if( indexreturn!=null && indexreturn.getOutput().getDataType() != DataType.SCALAR ){
raiseValidateError("Return type 'index.return' is of type '"+indexreturn.getOutput().getDataType()+"', '"+indexreturn.getOutput().getValueType()+"'. Please, specify 'indexreturn' as a scalar boolean.", conditional, LanguageErrorCodes.INVALID_PARAMETERS);
}
long dim2 = ( indexreturn instanceof BooleanIdentifier ) ?
((BooleanIdentifier)indexreturn).getValue() ? 1: target.getOutput().getDim2() : -1;
// Output is a matrix with same dims as input
output.setDataType(DataType.MATRIX);
output.setValueType(ValueType.DOUBLE);
output.setDimensions(target.getOutput().getDim1(), dim2 );
}
private void validateRemoveEmpty(DataIdentifier output, boolean conditional) throws LanguageException {
//check existence and correctness of arguments
Expression target = getVarParam("target");
if( target==null ) {
raiseValidateError("Named parameter 'target' missing. Please specify the input matrix.", conditional, LanguageErrorCodes.INVALID_PARAMETERS);
}
else if( target.getOutput().getDataType() != DataType.MATRIX ){
raiseValidateError("Input matrix 'target' is of type '"+target.getOutput().getDataType()+"'. Please specify the input matrix.", conditional, LanguageErrorCodes.INVALID_PARAMETERS);
}
Expression margin = getVarParam("margin");
if( margin==null ){
raiseValidateError("Named parameter 'margin' missing. Please specify 'rows' or 'cols'.", conditional, LanguageErrorCodes.INVALID_PARAMETERS);
}
else if( !(margin instanceof DataIdentifier) && !margin.toString().equals("rows") && !margin.toString().equals("cols") ){
raiseValidateError("Named parameter 'margin' has an invalid value '"+margin.toString()+"'. Please specify 'rows' or 'cols'.", conditional, LanguageErrorCodes.INVALID_PARAMETERS);
}
Expression select = getVarParam("select");
if( select!=null && select.getOutput().getDataType() != DataType.MATRIX ){
raiseValidateError("Index matrix 'select' is of type '"+select.getOutput().getDataType()+"'. Please specify the select matrix.", conditional, LanguageErrorCodes.INVALID_PARAMETERS);
}
// Output is a matrix with unknown dims
output.setDataType(DataType.MATRIX);
output.setValueType(ValueType.DOUBLE);
output.setDimensions(-1, -1);
}
private void validateGroupedAgg(DataIdentifier output, boolean conditional)
throws LanguageException
{
//check existing target and groups
if (getVarParam(Statement.GAGG_TARGET) == null || getVarParam(Statement.GAGG_GROUPS) == null){
raiseValidateError("Must define both target and groups.", conditional);
}
Expression exprTarget = getVarParam(Statement.GAGG_TARGET);
Expression exprGroups = getVarParam(Statement.GAGG_GROUPS);
Expression exprNGroups = getVarParam(Statement.GAGG_NUM_GROUPS);
//check valid input dimensions
boolean colwise = true;
boolean matrix = false;
if( exprGroups.getOutput().dimsKnown() && exprTarget.getOutput().dimsKnown() )
{
//check for valid matrix input
if( exprGroups.getOutput().getDim2()==1 && exprTarget.getOutput().getDim2()>1 )
{
if( getVarParam(Statement.GAGG_WEIGHTS) != null ) {
raiseValidateError("Matrix input not supported with weights.", conditional);
}
if( getVarParam(Statement.GAGG_NUM_GROUPS) == null ) {
raiseValidateError("Matrix input not supported without specified numgroups.", conditional);
}
if( exprGroups.getOutput().getDim1() != exprTarget.getOutput().getDim1() ) {
raiseValidateError("Target and groups must have same dimensions -- " + " target dims: " +
exprTarget.getOutput().getDim1() +" x "+exprTarget.getOutput().getDim2()+", groups dims: " + exprGroups.getOutput().getDim1() + " x 1.", conditional);
}
matrix = true;
}
//check for valid col vector input
else if( exprGroups.getOutput().getDim2()==1 && exprTarget.getOutput().getDim2()==1 )
{
if( exprGroups.getOutput().getDim1() != exprTarget.getOutput().getDim1() ) {
raiseValidateError("Target and groups must have same dimensions -- " + " target dims: " +
exprTarget.getOutput().getDim1() +" x 1, groups dims: " + exprGroups.getOutput().getDim1() + " x 1.", conditional);
}
}
//check for valid row vector input
else if( exprGroups.getOutput().getDim1()==1 && exprTarget.getOutput().getDim1()==1 )
{
if( exprGroups.getOutput().getDim2() != exprTarget.getOutput().getDim2() ) {
raiseValidateError("Target and groups must have same dimensions -- " + " target dims: " +
"1 x " + exprTarget.getOutput().getDim2() +", groups dims: 1 x " + exprGroups.getOutput().getDim2() + ".", conditional);
}
colwise = true;
}
else {
raiseValidateError("Invalid target and groups inputs - dimension mismatch.", conditional);
}
}
//check function parameter
Expression functParam = getVarParam(Statement.GAGG_FN);
if( functParam == null ) {
raiseValidateError("must define function name (fn=) for aggregate()", conditional);
}
else if (functParam instanceof Identifier)
{
// standardize to lowercase and dequote fname
String fnameStr = functParam.toString();
// check that IF fname="centralmoment" THEN order=m is defined, where m=2,3,4
// check ELSE IF fname is allowed
if(fnameStr.equals(Statement.GAGG_FN_CM)){
String orderStr = getVarParam(Statement.GAGG_FN_CM_ORDER) == null ? null : getVarParam(Statement.GAGG_FN_CM_ORDER).toString();
if (orderStr == null || !(orderStr.equals("2") || orderStr.equals("3") || orderStr.equals("4"))){
raiseValidateError("for centralmoment, must define order. Order must be equal to 2,3, or 4", conditional);
}
}
else if (fnameStr.equals(Statement.GAGG_FN_COUNT)
|| fnameStr.equals(Statement.GAGG_FN_SUM)
|| fnameStr.equals(Statement.GAGG_FN_MEAN)
|| fnameStr.equals(Statement.GAGG_FN_VARIANCE)){}
else {
raiseValidateError("fname is " + fnameStr + " but must be either centeralmoment, count, sum, mean, variance", conditional);
}
}
//determine output dimensions
long outputDim1 = -1, outputDim2 = -1;
if( exprNGroups != null && exprNGroups instanceof Identifier )
{
Identifier numGroups = (Identifier) exprNGroups;
if ( numGroups != null && numGroups instanceof ConstIdentifier) {
long ngroups = ((ConstIdentifier)numGroups).getLongValue();
if ( colwise ) {
outputDim1 = ngroups;
outputDim2 = matrix ? exprTarget.getOutput().getDim2() : 1;
}
else {
outputDim1 = 1; //no support for matrix
outputDim2 = ngroups;
}
}
}
//set output meta data
output.setDataType(DataType.MATRIX);
output.setValueType(ValueType.DOUBLE);
output.setDimensions(outputDim1, outputDim2);
}
private void validateDistributionFunctions(DataIdentifier output, boolean conditional) throws LanguageException {
// CDF and INVCDF expects one unnamed parameter, it must be renamed as "quantile"
// (i.e., we must compute P(X <= x) where x is called as "quantile" )
ParameterizedBuiltinFunctionOp op = this.getOpCode();
// check if quantile is of type SCALAR
if ( getVarParam("target") == null || getVarParam("target").getOutput().getDataType() != DataType.SCALAR ) {
raiseValidateError("target must be provided for distribution functions, and it must be a scalar value.", conditional, LanguageErrorCodes.INVALID_PARAMETERS);
}
// Distribution specific checks
switch(op) {
case CDF:
case INVCDF:
if(getVarParam("dist") == null) {
raiseValidateError("For cdf() and icdf(), a distribution function must be specified (as a string).", conditional, LanguageErrorCodes.INVALID_PARAMETERS);
}
break;
case QF:
case PF:
if(getVarParam("df1") == null || getVarParam("df2") == null ) {
raiseValidateError("Two degrees of freedom df1 and df2 must be provided for F-distribution.", conditional, LanguageErrorCodes.INVALID_PARAMETERS);
}
break;
case QT:
case PT:
if(getVarParam("df") == null ) {
raiseValidateError("Degrees of freedom df must be provided for t-distribution.", conditional, LanguageErrorCodes.INVALID_PARAMETERS);
}
break;
case QCHISQ:
case PCHISQ:
if(getVarParam("df") == null ) {
raiseValidateError("Degrees of freedom df must be provided for chi-squared-distribution.", conditional, LanguageErrorCodes.INVALID_PARAMETERS);
}
break;
default:
break;
// Not checking for QNORM, PNORM: distribution parameters mean and sd are optional with default values 0.0 and 1.0, respectively
// Not checking for QEXP, PEXP: distribution parameter rate is optional with a default values 1.0
// For all cdf functions, additional parameter lower.tail is optional with a default value TRUE
}
// CDF and INVCDF specific checks:
switch(op) {
case INVCDF:
case QNORM:
case QF:
case QT:
case QCHISQ:
case QEXP:
if(getVarParam("lower.tail") != null ) {
raiseValidateError("Lower tail argument is invalid while computing inverse cumulative probabilities.", conditional, LanguageErrorCodes.INVALID_PARAMETERS);
}
break;
case CDF:
case PNORM:
case PF:
case PT:
case PCHISQ:
case PEXP:
// no checks yet
break;
default:
break;
}
// Output is a scalar
output.setDataType(DataType.SCALAR);
output.setValueType(ValueType.DOUBLE);
output.setDimensions(0, 0);
return;
}
private void validateCastAsString(DataIdentifier output, boolean conditional)
throws LanguageException
{
HashMap varParams = getVarParams();
// replace parameter name for matrix argument
if( varParams.containsKey(null) )
varParams.put("target", varParams.remove(null));
// check validate parameter names
String[] validArgsArr = {"target", "rows", "cols", "decimal", "sparse", "sep", "linesep"};
HashSet validArgs = new HashSet(Arrays.asList(validArgsArr));
for( String k : varParams.keySet() ) {
if( !validArgs.contains(k) ) {
raiseValidateError("Invalid parameter " + k + " for toString, valid parameters are " +
Arrays.toString(validArgsArr), conditional, LanguageErrorCodes.INVALID_PARAMETERS);
}
}
// set output characteristics
output.setDataType(DataType.SCALAR);
output.setValueType(ValueType.STRING);
output.setDimensions(0, 0);
}
private void checkDataType( String fname, String pname, DataType dt, boolean conditional )
throws LanguageException
{
Expression data = getVarParam(pname);
if( data==null )
raiseValidateError("Named parameter '" + pname + "' missing. Please specify the input.", conditional, LanguageErrorCodes.INVALID_PARAMETERS);
else if( data.getOutput().getDataType() != dt )
raiseValidateError("Input to "+fname+"::"+pname+" must be of type '"+dt.toString()+"'. It is of type '"+data.getOutput().getDataType()+"'.", conditional, LanguageErrorCodes.INVALID_PARAMETERS);
}
private void checkDataValueType( String fname, String pname, DataType dt, ValueType vt, boolean conditional )
throws LanguageException
{
Expression data = getVarParam(pname);
if( data==null )
raiseValidateError("Named parameter '" + pname + "' missing. Please specify the input.", conditional, LanguageErrorCodes.INVALID_PARAMETERS);
else if( data.getOutput().getDataType() != dt || data.getOutput().getValueType() != vt )
raiseValidateError("Input to "+fname+"::"+pname+" must be of type '"+dt.toString()+"', '"+vt.toString()+"'. "
+ "It is of type '"+data.getOutput().getDataType().toString()+"', '"+data.getOutput().getValueType().toString()+"'.", conditional, LanguageErrorCodes.INVALID_PARAMETERS);
}
public String toString() {
StringBuilder sb = new StringBuilder(_opcode.toString() + "(");
for (String key : _varParams.keySet()){
sb.append("," + key + "=" + _varParams.get(key));
}
sb.append(" )");
return sb.toString();
}
@Override
public VariableSet variablesRead() {
VariableSet result = new VariableSet();
for (String s : _varParams.keySet()) {
result.addVariables ( _varParams.get(s).variablesRead() );
}
return result;
}
@Override
public VariableSet variablesUpdated() {
VariableSet result = new VariableSet();
for (String s : _varParams.keySet()) {
result.addVariables ( _varParams.get(s).variablesUpdated() );
}
result.addVariable(((DataIdentifier)this.getOutput()).getName(), (DataIdentifier)this.getOutput());
return result;
}
@Override
public boolean multipleReturns() {
switch(_opcode) {
case TRANSFORMENCODE:
return true;
default:
return false;
}
}
}