Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
prerna.ds.r.RFrameBuilder Maven / Gradle / Ivy
package prerna.ds.r;
import java.io.File;
import java.io.IOException;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.Vector;
import org.apache.commons.io.FileUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.rosuda.REngine.Rserve.RConnection;
import prerna.algorithm.api.SemossDataType;
import prerna.ds.util.flatfile.CsvFileIterator;
import prerna.ds.util.flatfile.ParquetFileIterator;
import prerna.engine.api.IHeadersDataRow;
import prerna.engine.api.IRawSelectWrapper;
import prerna.om.HeadersException;
import prerna.om.IStringExportProcessor;
import prerna.om.Insight;
import prerna.poi.main.helper.excel.ExcelSheetFileIterator;
import prerna.query.interpreters.RInterpreter;
import prerna.query.querystruct.CsvQueryStruct;
import prerna.query.querystruct.ExcelQueryStruct;
import prerna.query.querystruct.ParquetQueryStruct;
import prerna.query.querystruct.SelectQueryStruct;
import prerna.query.querystruct.filters.IQueryFilter;
import prerna.query.querystruct.filters.SimpleQueryFilter;
import prerna.query.querystruct.filters.SimpleQueryFilter.FILTER_TYPE;
import prerna.rdf.engine.wrappers.RawRSelectWrapper;
import prerna.reactor.frame.r.util.AbstractRJavaTranslator;
import prerna.reactor.frame.r.util.RJavaRserveTranslator;
import prerna.reactor.frame.r.util.RJavaUserRserveTranslator;
import prerna.sablecc2.om.PixelDataType;
import prerna.sablecc2.om.nounmeta.NounMetadata;
import prerna.sablecc2.om.task.BasicIteratorTask;
import prerna.util.Constants;
import prerna.util.Utility;
public class RFrameBuilder {
protected static final String CLASS_NAME = RFrameBuilder.class.getName();
protected Logger logger = LogManager.getLogger(CLASS_NAME);
// holds the name of the current data table
protected String dataTableName = "datatable";
// keep track of the indices that exist in the table for optimal speed in sorting
protected Set columnIndexSet = new HashSet();
// holds the connection object to execute r
protected AbstractRJavaTranslator rJavaTranslator = null;
protected boolean isInMem = true;
public RFrameBuilder(AbstractRJavaTranslator rJavaTranslator) {
this.rJavaTranslator = rJavaTranslator;
this.rJavaTranslator.startR();
}
public RFrameBuilder(AbstractRJavaTranslator rJavaTranslator, String dataTableName) {
this(rJavaTranslator);
this.dataTableName = dataTableName;
}
protected String getTableName() {
return this.dataTableName;
}
protected void setTableName(String dataTableName) {
this.dataTableName = dataTableName;
}
public void setLogger(Logger logger) {
this.logger = logger;
}
/**
* Getter for the translator
* @return
*/
public AbstractRJavaTranslator getRJavaTranslator() {
return this.rJavaTranslator;
}
////////////////////////////////////////////////////////////////////
///////////////////// Abstract Methods /////////////////////////////
/**
* Method to run a r script and not need to process output
* @param r
*/
public void evalR(String r) {
this.rJavaTranslator.executeEmptyR(r);
}
/**
* Creates a new data table from an iterator
* @param it The iterator to flush into a r data table
* @param typesMap The data type of each column
*/
public void createTableViaIterator(String tableName, Iterator it, Map typesMap) {
Map additionalType = new HashMap();
String fileType = "";
/*
* We have an iterator that comes for 3 main sources
* 1) some kind of resultset (i.e. engine/endpoint) -> we flush this out to a csv file and load it
* 2) an iterator for a csv file
* 3) an iterator for a single sheet of an excel file (later we will figure out multi sheet excels...)
*/
boolean loaded = false;
if(it instanceof RawRSelectWrapper) {
// TODO: THIS DOESN'T WORK ON THE SERVER SINCE RSERVE MAYBE RUNNING ON DIFFERENT PORTS
// ADD BACK ONCE HAVE THE PROPER CHECKS
// RawRSelectWrapper rIterator = (RawRSelectWrapper) it;
// SelectQueryStruct qs = rIterator.getOutput().getQs();
// if(qs.getQsType() == AbstractQueryStruct.QUERY_STRUCT_TYPE.ENGINE) {
// // if we have a small limit
// // write to new file
// // in case the variable size is really large and the IO
// // still produces better performance
// // TODO: determine optimal number for this...
// if(qs == null || qs.getLimit() == -1 || qs.getLimit() > 10_000) {
// RNativeEngine engine = (RNativeEngine) rIterator.getEngine();
// engine.directLoad(this.rJavaTranslator, tableName, rIterator.getTempVariableName());
// loaded = true;
// if(qs != null && (qs.getLimit() > 0 || qs.getOffset() > 0)) {
// int numRows = getNumRows(tableName);
// evalR(RSyntaxHelper.determineLimitOffsetSyntax(tableName, numRows, qs.getLimit(), qs.getOffset()));
// }
// }
// }
} else if(it instanceof CsvFileIterator) {
CsvQueryStruct csvQs = ((CsvFileIterator) it).getQs();
if(csvQs.getLimit() == -1 || csvQs.getLimit() > 10_000) {
createTableViaCsvFile(tableName, (CsvFileIterator) it);
additionalType = ((CsvFileIterator) it).getQs().getAdditionalTypes();
fileType = "csv";
loaded = true;
}
} else if(it instanceof ExcelSheetFileIterator ) {
ExcelQueryStruct qs = ((ExcelSheetFileIterator)it).getQs();
String sheetName = qs.getSheetName();
String filePath = qs.getFilePath();
String sheetRange = qs.getSheetRange();
// load sheet
this.rJavaTranslator.runR(RSyntaxHelper.loadExcelSheet(filePath, tableName, sheetName, sheetRange));
// clean headers
String[] colNames = this.rJavaTranslator.getColumns(tableName);
StringBuilder script = new StringBuilder();
script.append(RSyntaxHelper.cleanFrameHeaders(tableName, colNames));
// set new header names for frame
Map newHeaders = qs.getNewHeaderNames();
for (String oldHeader : newHeaders.keySet()) {
String newHeader = newHeaders.get(oldHeader);
script.append(RSyntaxHelper.alterColumnName(tableName, oldHeader, newHeader));
}
this.rJavaTranslator.runR(script.toString());
fileType = "excel";
loaded = true;
} else if(it instanceof ParquetFileIterator) {
ParquetQueryStruct qs = ((ParquetFileIterator) it).getQs();
String filePath = qs.getFilePath();
// load parquet file
this.rJavaTranslator.runR(RSyntaxHelper.loadParquetFile(filePath, tableName));
// clean headers
String[] colNames = this.rJavaTranslator.getColumns(tableName);
StringBuilder script = new StringBuilder();
// apply limit for import
long limit = qs.getLimit();
if (limit > -1) {
String rowLimits = String.valueOf(limit);
script.append(tableName + "<-" + tableName + "[1:" + rowLimits + ",];");
}
script.append(RSyntaxHelper.cleanFrameHeaders(tableName, colNames));
// set new header names for frame
Map newHeaders = qs.getNewHeaderNames();
for (String newHeader : newHeaders.keySet()) {
String oldHeader = newHeaders.get(newHeader);
script.append(RSyntaxHelper.alterColumnName(tableName, oldHeader, newHeader));
}
this.rJavaTranslator.runR(script.toString());
fileType = "parquet";
loaded = true;
}
if(!loaded) {
// default behavior is to just write this to a csv file
// get the fread() notation for that csv file
// and read it back in
String newFileLoc = Utility.getInsightCacheDir() + "/" + Utility.getRandomString(6) + ".tsv";
// if(Boolean.parseBoolean(Utility.getDIHelperProperty(Constants.CHROOT_ENABLE))) {
// Insight in = this.getRJavaTranslator().getInsight();
//
// String insightFolder = this.getRJavaTranslator().getInsight().getInsightFolder();
// try {
// FileUtils.forceMkdir(new File(insightFolder));
// if(in.getUser() != null) {
// in.getUser().getUserSymlinkHelper().mountFolder(this.getRJavaTranslator().getInsight().getInsightFolder(),this.getRJavaTranslator().getInsight().getInsightFolder(), false);
// }
// newFileLoc = insightFolder + "/" + Utility.getRandomString(6) + ".tsv";
// } catch (IOException e) {
// // TODO Auto-generated catch block
// logger.error(Constants.STACKTRACE, e);
// }
// }
File newFile = Utility.writeResultToFile(newFileLoc, it, typesMap, "\t", new IStringExportProcessor() {
// for fread - we need to replace all inner quotes with ""
@Override
public String processString(String input) {
return input.replace("\"", "\"\"");
}
});
String loadFileRScript = RSyntaxHelper.getFReadSyntax(tableName, newFile.getAbsolutePath(), "\\t");
evalR(loadFileRScript);
// // check that the variable exists
// if(isEmpty(tableName)) {
// throw new EmptyIteratorException("No data found to import");
// }
// update the headers to be cleaned
if(it instanceof IRawSelectWrapper) {
String[] headers = ((IRawSelectWrapper) it).getHeaders();
String[] cleanHeaders = HeadersException.getInstance().getCleanHeaders(headers);
String modHeaders = RSyntaxHelper.alterColumnNames(tableName, headers, cleanHeaders);
evalR(modHeaders);
} else if(it instanceof BasicIteratorTask) {
List> taskHeaders = ((BasicIteratorTask) it).getHeaderInfo();
int numHeaders = taskHeaders.size();
String[] headers = new String[numHeaders];
for(int i = 0; i < numHeaders; i++) {
Map headerInfo = taskHeaders.get(i);
String alias = (String) headerInfo.get("alias");
headers[i] = alias;
}
String[] cleanHeaders = HeadersException.getInstance().getCleanHeaders(headers);
String modHeaders = RSyntaxHelper.alterColumnNames(tableName, headers, cleanHeaders);
evalR(modHeaders);
}
}
// alter types
alterColumnTypes(tableName, typesMap, additionalType, fileType);
//add indices
addColumnIndex(tableName, typesMap.keySet().toArray(new String[typesMap.size()]));
}
private void createTableViaCsvFile(String tableName, CsvFileIterator it) {
CsvQueryStruct qs = it.getQs();
String[] newCleanHeaders = it.getHelper().getAllCSVHeaders();
{
long start = System.currentTimeMillis();
logger.info("Loading R table via CSV File");
// get you the fread notation with the csv file within the iterator
String loadFileRScript = RSyntaxHelper.getFReadSyntax(tableName, it.getFileLocation(), qs.getDelimiter() + "");
evalR(loadFileRScript);
// fread will use the original headers, even if there are duplicates
// we need to fix this -> grab the new ones and write it out
evalR("setnames(" + tableName + ", " + RSyntaxHelper.createStringRColVec(newCleanHeaders) + ")");
long end = System.currentTimeMillis();
logger.info("Loading R done in " + (end-start) + "ms");
}
if (qs.getSelectors().size() < newCleanHeaders.length) {
long start = System.currentTimeMillis();
logger.info("Need to filter R table based on selected headers");
RInterpreter interp = new RInterpreter();
interp.setDataTableName(tableName);
interp.setQueryStruct(qs);
Map strTypes = qs.getColumnTypes();
Map enumTypes = new HashMap();
for(String key : strTypes.keySet()) {
enumTypes.put(key, SemossDataType.convertStringToDataType(strTypes.get(key)));
}
interp.setColDataTypes(enumTypes);
String query = interp.composeQuery();
evalR(tableName + "<-" + query);
long end = System.currentTimeMillis();
logger.info("Done filter R table based on selected headers in " + (end-start) + "ms");
}
if(!qs.getExplicitFilters().isEmpty()) {
long start = System.currentTimeMillis();
logger.info("Need to filter R table based on QS");
// we need to execute a script to modify the table to only contain the data based on the filters defined
//create a new querystruct object that will have header names in the format used for RInterpreter
SelectQueryStruct modifiedQs = new SelectQueryStruct();
updateFileSelectors(modifiedQs, tableName, newCleanHeaders);
//add filters to the new qs
List gFilters = qs.getExplicitFilters().getFilters();
for (int i = 0; i < gFilters.size(); i++) {
//TODO: example this update filter logic!
IQueryFilter sFilter = gFilters.get(i);
if(sFilter.getQueryFilterType() == IQueryFilter.QUERY_FILTER_TYPE.SIMPLE) {
SimpleQueryFilter updatedFilter = updateFilter(tableName, (SimpleQueryFilter) sFilter);
modifiedQs.addExplicitFilter(updatedFilter);
}
}
RInterpreter interp = new RInterpreter();
interp.setDataTableName(tableName);
interp.setQueryStruct(modifiedQs);
Map strTypes = qs.getColumnTypes();
Map enumTypes = new HashMap();
for(String key : strTypes.keySet()) {
enumTypes.put(key, SemossDataType.convertStringToDataType(strTypes.get(key)));
}
interp.setColDataTypes(enumTypes);
String query = interp.composeQuery();
evalR(tableName + "<-" + query);
long end = System.currentTimeMillis();
logger.info("Done filter R table in " + (end-start) + "ms");
}
}
private SelectQueryStruct updateFileSelectors(SelectQueryStruct qs, String tableName, String[] colNames) {
for (int i = 0; i < colNames.length; i++) {
qs.addSelector(tableName, colNames[i]);
}
return qs;
}
private SimpleQueryFilter updateFilter(String tableName, SimpleQueryFilter filter) {
SimpleQueryFilter newFilter = null;
FILTER_TYPE fType = filter.getSimpleFilterType();
if(fType == FILTER_TYPE.COL_TO_COL) {
//change both left comparator and right comparator
String lHeader = filter.getLComparison().getValue().toString();
NounMetadata lNoun = new NounMetadata(tableName + "__" + lHeader, PixelDataType.COLUMN);
String rHeader = filter.getRComparison().getValue().toString();
NounMetadata rNoun = new NounMetadata(tableName + "__" + rHeader, PixelDataType.COLUMN);
newFilter = new SimpleQueryFilter(lNoun, filter.getComparator() , rNoun);
} else if(fType == FILTER_TYPE.COL_TO_VALUES) {
//change only the left comparator
String lHeader = filter.getLComparison().getValue().toString();
NounMetadata lNoun = new NounMetadata(tableName + "__" + lHeader, PixelDataType.COLUMN);
newFilter = new SimpleQueryFilter(lNoun, filter.getComparator(), filter.getRComparison());
} else if(fType == FILTER_TYPE.VALUES_TO_COL) {
//change only the right comparator
String rHeader = filter.getRComparison().getValue().toString();
NounMetadata rNoun = new NounMetadata(tableName + "__" + rHeader, PixelDataType.COLUMN);
newFilter = new SimpleQueryFilter(filter.getLComparison(), filter.getComparator() , rNoun);
} else if(fType == FILTER_TYPE.VALUE_TO_VALUE) {
// WHY WOULD YOU DO THIS!!!
}
return newFilter;
}
public void genRowId(String dataTableName, String rowIdName) {
// syntax
//id <- rownames(arAmgXk);
//d <- cbind(id=id, arAmgXk)
// generate the row names first
String idName = Utility.getRandomString(6);
String rStatement = idName + "<- rownames(" + dataTableName + ");";
evalR(rStatement);
// now bind it with the name
String newName = Utility.getRandomString(6);
rStatement = newName + " <- cbind(" + rowIdName + "=" + idName + ", " + dataTableName + ");";
evalR(rStatement);
// change the type of row
//evalR( addTryEvalToScript( RSyntaxHelper.alterColumnTypeToNumeric(newName, idName) ) );
// now change the table to this new name
rStatement = dataTableName + " <- " + newName + ";";
evalR(rStatement);
}
/**
* Alters a set of columns togheter
* Faster than running each conversion separately
* @param tableName
* @param typesMap
* @param javaDateFormatMap
*/
private void alterColumnTypes(String tableName, Map typesMap, Map javaDateFormatMap, String fileType) {
// go through all the headers
// and collect similar types
// so we can execute with a single r script line
// for performance improvements
List charColumns = new Vector();
List intColumns = new Vector();
List doubleColumns = new Vector();
List booleanColumns = new Vector();
Map> datesMap = new HashMap>();
Map> dateTimeMap = new HashMap>();
for(String header : typesMap.keySet()) {
SemossDataType type = typesMap.get(header);
if(type == SemossDataType.STRING) {
charColumns.add(header);
} else if(type == SemossDataType.INT) {
intColumns.add(header);
} else if(type == SemossDataType.DOUBLE) {
doubleColumns.add(header);
} else if(type == SemossDataType.BOOLEAN) {
booleanColumns.add(header);
} else if(type == SemossDataType.DATE) {
String format = null;
if(javaDateFormatMap != null) {
format = javaDateFormatMap.get(header);
}
if(format == null) {
format = "yyyy-MM-dd";
}
if(datesMap.containsKey(format)) {
// add to existing list
datesMap.get(format).add(header);
} else {
List headerList = new Vector();
headerList.add(header);
datesMap.put(format, headerList);
}
} else if( type == SemossDataType.TIMESTAMP) {
String format = null;
if(javaDateFormatMap != null) {
format = javaDateFormatMap.get(header);
}
if(format == null) {
format = "yyyy-MM-dd HH:mm:ss.SSS";
}
if(dateTimeMap.containsKey(format)) {
// add to existing list
dateTimeMap.get(format).add(header);
} else {
List headerList = new Vector();
headerList.add(header);
dateTimeMap.put(format, headerList);
}
}
}
// now that we have everything
// execute type modifications
if(!charColumns.isEmpty()) {
evalR( RSyntaxHelper.alterColumnTypeToCharacter(tableName, charColumns) );
evalR( RSyntaxHelper.replaceNAString(tableName, charColumns) );
}
if(!intColumns.isEmpty()) {
evalR( RSyntaxHelper.alterColumnTypeToInteger(tableName, intColumns) );
}
if(!doubleColumns.isEmpty()) {
evalR( RSyntaxHelper.alterColumnTypeToNumeric(tableName, doubleColumns) );
}
if(!booleanColumns.isEmpty()) {
evalR( RSyntaxHelper.alterColumnTypeToBoolean(tableName, booleanColumns) );
}
// seeing dates are now loader as the proper data type and not as numbers...
// if the original file type is excel, then need to assess if there are date/time cols that have been parsed to numbers first
// and handle those separately
// if (fileType.equals("excel")) {
// //handle date numbers
// if (!datesMap.isEmpty()) {
// List dateHeaders = new ArrayList();
// datesMap.values().forEach(dateHeaders::addAll);
// List dateExcelR = RSyntaxHelper.alterColumnTypeToDate_Excel(tableName, dateHeaders);
// this.rJavaTranslator.runR(dateExcelR.get(0));
// //retrieve cols have been converted to Date type
// if (this.rJavaTranslator.getInt("length(" + dateExcelR.get(1) + ")") > 0) {
// excelDateNumHeaders.addAll(Arrays.asList(this.rJavaTranslator.getStringArray(dateExcelR.get(1))));
// }
// //clean up the handledcol var in R
// this.rJavaTranslator.runR("rm(" + dateExcelR.get(1) + ";gc();");
// }
// //handle datetime numbers
// if (!dateTimeMap.isEmpty()) {
// List dateTimeHeaders = new ArrayList();
// dateTimeMap.values().forEach(dateTimeHeaders::addAll);
// //TODO track millisecond digits
// List dateTimeExcelR = RSyntaxHelper.alterColumnTypeToDateTime_Excel(tableName, dateTimeHeaders);
// for(int i = 0; i < dateTimeExcelR.size(); i++) {
// System.out.println(dateTimeExcelR.get(i));
// }
// this.rJavaTranslator.runR(dateTimeExcelR.get(0));
// //retrieve cols have been converted to Date/Time type
// if (this.rJavaTranslator.getInt("length(" + dateTimeExcelR.get(1) + ")") > 0) {
// excelDTNumHeaders.addAll(Arrays.asList(this.rJavaTranslator.getStringArray(dateTimeExcelR.get(1))));
// }
// //clean up the handledcol var in R
// this.rJavaTranslator.runR("rm(" + dateTimeExcelR.get(1) + ";gc();");
// }
// }
// loop through normal dates
for(String format : datesMap.keySet()) {
List dateHeaders = datesMap.get(format);
if (!dateHeaders.isEmpty()){
String rFormat = RSyntaxHelper.translateJavaRDateTimeFormat(format);
this.rJavaTranslator.runR( RSyntaxHelper.alterColumnTypeToDate(tableName, rFormat, dateHeaders) ) ;
}
}
// excel reading already loads as POSIXct types
// so no need to modify again
// TODO: need to handle strings that we are trying to parse as timestamps?
if(!fileType.equals("excel")) {
// loop through time stamps dates
if(isEmpty(tableName)) {
for(String format : dateTimeMap.keySet()) {
List dateTimeHeaders = dateTimeMap.get(format);
if (!dateTimeHeaders.isEmpty()){
this.rJavaTranslator.runR( RSyntaxHelper.alterEmptyTableColumnTypeToDateTime(tableName, dateTimeHeaders) );
}
}
} else {
for(String format : dateTimeMap.keySet()) {
List dateTimeHeaders = dateTimeMap.get(format);
if (!dateTimeHeaders.isEmpty()){
String rFormat = RSyntaxHelper.translateJavaRDateTimeFormat(format);
this.rJavaTranslator.runR( RSyntaxHelper.alterColumnTypeToDateTime(tableName, rFormat, dateTimeHeaders) );
}
}
}
}
}
protected void addColumnIndex(String tableName, String colName) {
if (!columnIndexSet.contains(tableName + "+++" + colName)) {
long start = System.currentTimeMillis();
String rIndex = null;
logger.info("Generating index on R Data Table on column = " + colName);
logger.debug("CREATING INDEX ON R TABLE = " + tableName + " ON COLUMN = " + colName);
try {
rIndex = "CREATE INDEX ON " + tableName + "(" + colName + ")";
this.rJavaTranslator.executeEmptyR("setindex(" + tableName + "," + colName + ");");
String[] confirmedIndicesArr = this.rJavaTranslator.getStringArray("indices(" + tableName + ");");
if(confirmedIndicesArr != null) {
List confirmedIndices = Arrays.asList(confirmedIndicesArr);
if (confirmedIndices.contains(colName)) {
columnIndexSet.add(tableName + "+++" + colName);
}
long end = System.currentTimeMillis();
logger.debug("TIME FOR R INDEX CREATION = " + (end - start) + " ms");
logger.info("Finished generating indices on R Data Table on column = " + colName);
} else {
logger.info("Encountered issue with generating indices on R Data Table on column = " + colName);
}
} catch (Exception e) {
logger.debug("ERROR WITH R INDEX !!! " + rIndex);
logger.error(Constants.STACKTRACE, e);
}
}
}
protected void addColumnIndex(String tableName, String[] colNames) {
HashSet colNamesSet = new HashSet<>(Arrays.asList(colNames));
colNamesSet.removeAll(columnIndexSet);
if (colNamesSet.size() > 0 ){
long start = System.currentTimeMillis();
String rIndex = null;
logger.info("Generating index on R Data Table on columns = " + Utility.cleanLogString(StringUtils.join(colNamesSet,", ")));
logger.debug("CREATING INDEX ON R TABLE = " + tableName + " ON COLUMN(S) = " + StringUtils.join(colNamesSet,", "));
try {
rIndex = "CREATE INDEX ON " + tableName + "(" + StringUtils.join(colNamesSet,", ") + ")";
this.rJavaTranslator.executeEmptyR("invisible(lapply(c('" + StringUtils.join(colNamesSet,"','") + "')" + ", setindexv, x= " + tableName + "));");
// get the current indices
List confirmedIndices = null;
String[] indices = this.rJavaTranslator.getStringArray("indices(" + tableName + ");");
if(indices != null && indices.length > 0) {
confirmedIndices = Arrays.asList(indices);
} else {
confirmedIndices = new Vector();
}
// add if not a current index
for (String c : colNamesSet) {
if (confirmedIndices.contains(c)) {
columnIndexSet.add(tableName + "+++" + c);
}
}
long end = System.currentTimeMillis();
logger.debug("TIME FOR R INDEX CREATION = " + (end - start) + " ms");
logger.info("Finished generating indices on R Data Table on columns = " + Utility.cleanLogString(StringUtils.join(colNamesSet,", ")));
} catch (Exception e) {
logger.debug("ERROR WITH R INDEX !!! " + rIndex);
logger.error(Constants.STACKTRACE, e);
}
}
}
public void removeAllColumnIndex() {
this.columnIndexSet.clear();
}
public void dropTable() {
evalR("rm(" + this.dataTableName + ")");
evalR("gc()");
}
/*
* Wrappers around existing emthods in rJavaTranslator
*/
public Object[] getDataRow(String rScript, String[] headerOrdering) {
return this.rJavaTranslator.getDataRow(rScript, headerOrdering);
}
public List getBulkDataRow(String rScript, String[] headerOrdering) {
return this.rJavaTranslator.getBulkDataRow(rScript, headerOrdering);
}
public boolean isEmpty() {
return this.rJavaTranslator.isEmpty(this.dataTableName);
}
public boolean isEmpty(String frameName) {
return this.rJavaTranslator.isEmpty(frameName);
}
public int getNumRows() {
return getNumRows(this.dataTableName);
}
public int getNumRows(String varName) {
return this.rJavaTranslator.getNumRows(varName);
}
public int getFrameSize(String varName) {
return this.rJavaTranslator.getInt("nrow(" + varName + ") * ncol(" + varName + ");");
}
public String[] getColumnNames() {
return getColumnNames(this.dataTableName);
}
public String[] getColumnNames(String varName) {
return this.rJavaTranslator.getColumns(varName);
}
public String[] getColumnTypes() {
return getColumnTypes(this.dataTableName);
}
public String[] getColumnTypes(String varName) {
return this.rJavaTranslator.getColumnTypes(varName);
}
protected RConnection getConnection() {
if(this.rJavaTranslator instanceof RJavaRserveTranslator) {
return ((RJavaRserveTranslator) this.rJavaTranslator).getConnection();
} else if(this.rJavaTranslator instanceof RJavaUserRserveTranslator){
return ((RJavaUserRserveTranslator) this.rJavaTranslator).getConnection();
}
return null;
}
protected String getPort() {
if(this.rJavaTranslator instanceof RJavaRserveTranslator) {
return ((RJavaRserveTranslator) this.rJavaTranslator).getPort();
}
return null;
}
protected void saveRda(String frameFileName, String frameName) {
this.evalR("save(" + frameName + ", file=\"" + frameFileName.replace("\\", "/") + "\")");
if (new File(frameFileName).length() == 0){
throw new IllegalArgumentException("Attempting to save an empty R frame");
}
}
protected void openRda(String frameFileName){
this.evalR("load(\"" + frameFileName.replace("\\", "/") + "\")");
}
protected void saveFst(String frameFileName, String frameName) {
this.evalR("library(\"fst\")");
this.evalR("write_fst(" + frameName + ", \"" + frameFileName.replace("\\", "/") + "\")");
if (new File(Utility.normalizePath(frameFileName)).length() == 0){
throw new IllegalArgumentException("Attempting to save an empty R frame");
}
}
protected void openFst(String frameFileName, String frameName){
this.evalR("library(\"fst\")");
// 2020-01-02
// newer version of fst library shouldn't require the additional "as.data.table" syntax
// https://github.com/fstpackage/fst/milestone/23
this.evalR(frameName + " <- as.data.table(read_fst(\"" + frameFileName.replace("\\", "/") + "\"))");
}
}