Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
package prerna.ds.r;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.Vector;
import java.util.regex.Pattern;
import org.apache.commons.lang3.StringUtils;
import prerna.algorithm.api.SemossDataType;
import prerna.poi.main.HeadersException;
import prerna.sablecc2.om.Join;
import prerna.util.Utility;
public class RSyntaxHelper {
private static Map javaRDatTimeTranslationMap = new HashMap();
static {
javaRDatTimeTranslationMap.put("y2", "%y"); //yr without century (2 digits)
javaRDatTimeTranslationMap.put("Y2", "%y"); //yr without century (2 digits)
javaRDatTimeTranslationMap.put("y4", "%Y"); //full yr (4 digits)
javaRDatTimeTranslationMap.put("Y4", "%Y"); //full yr (4 digits)
// javaRDatTimeTranslationMap.put("G", value); //Era
javaRDatTimeTranslationMap.put("M1", "%m"); //Numerical month
javaRDatTimeTranslationMap.put("M2", "%m"); //Numerical month
javaRDatTimeTranslationMap.put("M3", "%b"); //Abbreviated month name
javaRDatTimeTranslationMap.put("M+", "%B"); //Full month name
javaRDatTimeTranslationMap.put("d", "%d"); //Day in month
javaRDatTimeTranslationMap.put("D", "%j"); //Day in year
// javaRDatTimeTranslationMap.put("w", value); //Week in year
// javaRDatTimeTranslationMap.put("W", value); //Week in month
// javaRDatTimeTranslationMap.put("F", value); //Day of week in month
javaRDatTimeTranslationMap.put("E3", "%a"); //Abbreviate day name in week
javaRDatTimeTranslationMap.put("E", "%A"); //Full day name in week
javaRDatTimeTranslationMap.put("u", "%u"); //Day number of week (1 = Monday, ..., 7 = Sunday)
javaRDatTimeTranslationMap.put("a", "%p"); //AM/PM --- need to be used with %I not %H
javaRDatTimeTranslationMap.put("H", "%H"); //Hour in day (0-23)
// javaRDatTimeTranslationMap.put("k", value); //Hour in day (1-24)
// javaRDatTimeTranslationMap.put("K", value); //Hour in am/pm (0-11)
javaRDatTimeTranslationMap.put("h", "%I"); //Hour in am/pm (1-12)
javaRDatTimeTranslationMap.put("m", "%M"); //Minute in hour
javaRDatTimeTranslationMap.put("s", "%S"); //Second in minute
javaRDatTimeTranslationMap.put("S", "%OS"); //Millisecond
// javaRDatTimeTranslationMap.put("z", "%Z"); //tz (Pacific Standard Time; PST; GMT-08:00) ???? - TODO needs to be handled via the tz param
javaRDatTimeTranslationMap.put("Z", "%z"); //tz (-0800)
javaRDatTimeTranslationMap.put("X", "%z"); //tz (-08; -0800; -08:00)
}
private RSyntaxHelper() {
}
/**
* Convert a java object[] into a r column vector
* @param row The object[] to convert
* @param dataType The data type for each entry in the object[]
* @return String containing the equivalent r column vector
*/
public static String createStringRColVec(Object[] row) {
StringBuilder str = new StringBuilder("c(");
int i = 0;
int size = row.length;
for(; i < size; i++) {
str.append("\"").append(row[i]).append("\"");
// if not the last entry, append a "," to separate entries
if( (i+1) != size) {
str.append(",");
}
}
str.append(")");
return str.toString();
}
/**
* Convert a java object[] into a r column vector
* @param row The object[] to convert
* @param dataType The data type for each entry in the object[]
* @return String containing the equivalent r column vector
*/
public static String createStringRColVec(Integer[] row) {
StringBuilder str = new StringBuilder("c(");
int i = 0;
int size = row.length;
for(; i < size; i++) {
str.append(row[i]);
// if not the last entry, append a "," to separate entries
if( (i+1) != size) {
str.append(",");
}
}
str.append(")");
return str.toString();
}
/**
* Convert a java object[] into a r column vector
* @param row The object[] to convert
* @param dataType The data type for each entry in the object[]
* @return String containing the equivalent r column vector
*/
public static String createStringRColVec(Double[] row) {
StringBuilder str = new StringBuilder("c(");
int i = 0;
int size = row.length;
for(; i < size; i++) {
str.append(row[i]);
// if not the last entry, append a "," to separate entries
if( (i+1) != size) {
str.append(",");
}
}
str.append(")");
return str.toString();
}
/**
* Convert a java object[] into a r ordered factor col
* @param row The object[] to convert
* @param orderedLevels The ordering of the factor
* @return String containing the equivalent r column vector
*/
public static String createOrderedRFactor(Object[] row, String orderedLevels) {
StringBuilder str = new StringBuilder();
String vectorParameter = createStringRColVec(row);
Object[] orderedLevelsSplit = orderedLevels.split("+++");
if (orderedLevelsSplit.length < 2) {
throw new RuntimeException("Ordered levels of a factor must contain 2 or more items.");
} else {
String orderLevelsVector = createStringRColVec(orderedLevelsSplit);
str.append("factor(" + vectorParameter + ", ordered = TRUE, levels = " + orderLevelsVector + ");");
}
return str.toString();
}
/**
* Convert a java object[] into a r ordered factor col
* @param row The object[] to convert
* @param orderedLevels The ordering of the factor
* @param orderedLevelLabels Corresponding labels of the ordered levels of the factor
* @return String containing the equivalent r column vector
*/
public static String createOrderedRFactor(Object[] row, String orderedLevels, String orderedLevelLabels) {
StringBuilder str = new StringBuilder();
String vectorParameter = createStringRColVec(row);
Object[] orderedLevelsSplit = orderedLevels.split("+++");
Object[] orderedLevelLabelsSplit = orderedLevelLabels.split("+++");
if (orderedLevelsSplit.length != orderedLevelLabelsSplit.length) {
throw new RuntimeException("Counts of ordered levels and the corresponding labels must be equal.");
} else if (orderedLevelsSplit.length < 2) {
throw new RuntimeException("Ordered levels/labels of a factor must contain 2 or more items.");
} else {
String orderLevelsVector = createStringRColVec(orderedLevelsSplit);
str.append("factor(" + vectorParameter + ", ordered = TRUE, levels = " + orderLevelsVector + "), labels = " + orderedLevelLabelsSplit +");");
}
return str.toString();
}
public static String getOrderedLevelsFromRFactorCol(String tableName, String colName) {
StringBuilder str = new StringBuilder();
str.append("paste(levels("+ tableName + "$" + colName + "), collapse = '+++');");
return str.toString();
}
public static String alterColumnTypeToCharacter(String tableName, String colName) {
// will generate a string similar to
// "datatable$Revenue_International <- as.numeric(as.character(datatable$Revenue_International))"
StringBuilder builder = new StringBuilder();
builder.append(tableName).append("$").append(colName).append(" <- ").append("as.character(")
.append(tableName).append("$").append(colName).append(")");
return builder.toString();
}
public static String alterColumnTypeToCharacter(String tableName, List colName) {
StringBuilder builder = new StringBuilder();
builder.append(tableName + "[,(c('" + StringUtils.join(colName,"','") + "')) := lapply(.SD, as.character), .SDcols = c('" + StringUtils.join(colName,"','") + "')]");
return builder.toString();
}
/**
* Converts a R column type to numeric
* @param tableName The name of the R table
* @param colName The name of the column
* @return The r script to execute
*/
public static String alterColumnTypeToNumeric(String tableName, String colName) {
// will generate a string similar to
// "datatable$Revenue_International <- as.numeric(as.character(datatable$Revenue_International))"
StringBuilder builder = new StringBuilder();
builder.append(tableName).append("$").append(colName).append(" <- ").append("as.numeric(as.character(")
.append(tableName).append("$").append(colName).append("))");
return builder.toString();
}
public static String alterColumnTypeToNumeric(String tableName, List colName) {
StringBuilder builder = new StringBuilder();
builder.append(tableName + "[,(c('" + StringUtils.join(colName,"','") + "')) := lapply(.SD, as.numeric), .SDcols = c('" + StringUtils.join(colName,"','") + "')]");
return builder.toString();
}
/**
* Converts a R column type to date
* @param tableName The name of the R table
* @param colName The name of the column
* @return The r script to execute
*/
public static String alterColumnTypeToDate(String tableName, String format, String colName) {
if(format == null) {
format = "%Y-%m-%d";
}
String[] parsedFormat = format.split("\\|");
// will generate a string similar to
// "datatable$Birthday <- as.Date(as.character(datatable$Birthday), format = '%m/%d/%Y')"
StringBuilder builder = new StringBuilder();
builder.append(tableName + "$" + colName + "<- as.Date("
+ tableName + "$" + colName + ", format = '" + parsedFormat[0] +"')");
return builder.toString();
}
public static String alterColumnTypeToDate(String tableName, String format, List cols) {
if(format == null) {
format = "%Y-%m-%d";
}
String[] parsedFormat = format.split("\\|");
StringBuilder builder = new StringBuilder();
//parse out the milliseconds options
builder.append(tableName + "[,(c('" + StringUtils.join(cols,"','") + "')) := "
+ "lapply(.SD, function(x) as.Date(x, format='" + parsedFormat[0] + "')), "
+ ".SDcols = c('" + StringUtils.join(cols,"','") + "')]");
return builder.toString();
}
public static List alterColumnTypeToDate_Excel(String tableName, List cols) {
List dateExcelR= new ArrayList();
StringBuilder builder = new StringBuilder();
String convertedDateCols_R = "convertedDateCols" + Utility.getRandomString(6);
String convertedDateColsList_R = "convertedDateColsList" + Utility.getRandomString(6);
//append the function that will handle dates that have been translated into a numerical value during read.xlsx2 upload
builder.append("clean_convertToDate_function <- function(x){ cleanCol <- gsub('^\\\\s*$', 'NA', " + tableName + "[[x]]) "
+ "%>% .[!is.na(.) & . != 'NA' & . != 'null' & . != 'NULL'];"
+ "if (all(!is.na(as.numeric(cleanCol)))) as.Date('1900-01-01') + as.numeric(" + tableName + "[[x]]) - 2 else 'NOTHING' };");
//call the function above and sort columns by whether they were handled by the function
builder.append(convertedDateCols_R + " <- lapply(c('" + StringUtils.join(cols, "','") + "'), clean_convertToDate_function);");
builder.append(convertedDateColsList_R + " <- c('" + StringUtils.join(cols,"','") + "')[-grep('NOTHING'," + convertedDateCols_R + ")];");
//update columns via the outputs of the function
builder.append("if (length(" + convertedDateColsList_R + ") > 0) "
+ tableName + "[, (" + convertedDateColsList_R + ") := lapply("
+ "setdiff(seq(1,length(" + convertedDateCols_R + ")), grep('NOTHING'," + convertedDateCols_R + ")), "
+ "function(x) unlist(" + convertedDateCols_R + "[[x]] ))];");
//clean up variables/functions
builder.append("rm(clean_convertToDate_function," + convertedDateCols_R + "); gc()");
//prep return list
dateExcelR.add(builder.toString());
dateExcelR.add(convertedDateColsList_R);
return dateExcelR;
}
public static String alterColumnTypeToDateTime(String tableName, String format,String colName) {
if(format == null) {
format = "%Y-%m-%d %H:%M:%S|NULL";
}
String[] parsedFormat = format.split("\\|");
StringBuilder builder = new StringBuilder();
builder.append("options(digits.secs=" + parsedFormat[1] + ");");
builder.append(tableName + "$" + colName + "<- as.POSIXct(fast_strptime("
+ tableName + "$" + colName + ", format = '" + parsedFormat[0] +"'))");
return builder.toString();
}
public static String alterColumnTypeToDateTime(String tableName, String format, List cols) {
if(format == null) {
format = "%Y-%m-%d %H:%M:%S|NULL";
}
//parse out the milliseconds options
String[] parsedFormat = format.split("\\|");
StringBuilder builder = new StringBuilder();
builder.append("options(digits.secs=" + parsedFormat[1] + ");");
builder.append(tableName + "[,(c('" + StringUtils.join(cols,"','") + "')) := "
+ "lapply(.SD, function(x) as.POSIXct(fast_strptime(x, format='" + parsedFormat[0] + "'))), "
+ ".SDcols = c('" + StringUtils.join(cols,"','") + "')]");
return builder.toString();
}
public static List alterColumnTypeToDateTime_Excel(String tableName, List cols) {
List dateTimeExcelR= new ArrayList();
StringBuilder builder = new StringBuilder();
String convertedDTCols_R = "convertedDTCols" + Utility.getRandomString(6);
String convertedDTColsList_R = "convertedDTColsList" + Utility.getRandomString(6);
//TODO need to make this based on an input parameter instead of being hardcoded
builder.append("options(digits.secs = 3);");
//append the function that will handle datetimes that have been translated into a numerical value during read.xlsx2 upload
builder.append("clean_convertToPOSIXct_function <- function(x){ cleanCol <- gsub('^\\\\s*$', 'NA', " + tableName + "[[x]]) "
+ "%>% .[!is.na(.) & . != 'NA' & . != 'null' & . != 'NULL'];"
+ "if (all(!is.na(as.numeric(cleanCol)))) as.POSIXct(as.numeric(" + tableName + "[[x]])*86400, origin = '1899-12-30', tz='UTC') else 'NOTHING' };");
//call the function above and sort columns by whether they were handled by the function
builder.append(convertedDTCols_R + " <- lapply(c('" + StringUtils.join(cols, "','") + "'), clean_convertToPOSIXct_function);");
builder.append(convertedDTColsList_R + " <- c('" + StringUtils.join(cols,"','") + "')[-grep('NOTHING'," + convertedDTCols_R + ")];");
//update columns via the outputs of the function
builder.append("if (length(" + convertedDTColsList_R + ") > 0) "
+ tableName + "[, (" + convertedDTColsList_R + ") := lapply("
+ "setdiff(seq(1,length(" + convertedDTCols_R + ")), grep('NOTHING'," + convertedDTCols_R + ")), "
+ "function(x) unlist(" + convertedDTCols_R + "[[x]] ))];");
//clean up variables/functions
builder.append("rm(clean_convertToDate_function," + convertedDTCols_R + "); gc()");
//prep return list
dateTimeExcelR.add(builder.toString());
dateTimeExcelR.add(convertedDTColsList_R);
return dateTimeExcelR;
}
/**
* Generate the syntax to perform a fRead to ingest a file
* @param tableName
* @param absolutePath
* @return
*/
public static String getFReadSyntax(String tableName, String absolutePath) {
return getFReadSyntax(tableName, absolutePath, ",");
}
/**
* Generate the syntax to perform a fRead to ingest a file
* @param tableName
* @param absolutePath
* @param delimiter
* @return
*/
public static String getFReadSyntax(String tableName, String absolutePath, String delimiter) {
StringBuilder builder = new StringBuilder();
builder.append(tableName).append(" <- fread(\"").append(absolutePath.replace("\\", "/"))
.append("\", sep=\"").append(delimiter).append("\")");
return builder.toString();
}
public static String getExcelReadSheetSyntax(String tableName, String absolutePath, int sheetIndex, List colIndices, boolean uploadSubset) {
StringBuilder builder = new StringBuilder();
builder.append(tableName).append(" <- as.data.table(read.xlsx2(\"").append(absolutePath.replace("\\", "/"))
.append("\", sheetIndex=").append(sheetIndex).append(", colClasses = c(rep('character',").append(colIndices.size())
.append(")), stringsAsFactors=FALSE");
if (uploadSubset) {
builder.append(", colIndex = c(").append(StringUtils.join(colIndices,",")).append(")");
}
builder.append("))");
return builder.toString();
}
/**
* Load excel sheet to dataframe
* @param filePath The file path of the excel file
* @param frameName The name of the frame to load excel sheet
* @param sheetName The name of the sheet in the excel workbook
* @param sheetRange The desired range to load
* @return
*/
public static String loadExcelSheet(String filePath, String frameName, String sheetName, String sheetRange) {
StringBuilder rsb = new StringBuilder();
rsb.append("library(readxl);library(cellranger);");
filePath = filePath.replace("\\", "/");
rsb.append(frameName + " <- read_excel(path = \"" + filePath + "\", col_names = TRUE, sheet = \"" + sheetName + "\", range='"
+ sheetRange + "');");
rsb.append(frameName + " <- as.data.table(" + frameName + ");");
return rsb.toString();
}
/**
*
* @param leftTableName
* @param rightTableName
* @param joinType
* @param joinCols
* @return
*/
public static String getMergeSyntax(String returnTable, String leftTableName, String rightTableName, String joinType, List