![JAR search and dependency download from the Maven repository](/logo.png)
com.github.chen0040.data.frame.DataQuery Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of java-data-frame Show documentation
Show all versions of java-data-frame Show documentation
Some common patterns of data frame in Java
The newest version!
package com.github.chen0040.data.frame;
import com.github.chen0040.data.utils.CsvUtils;
import com.github.chen0040.data.utils.NumberUtils;
import com.github.chen0040.data.utils.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import javax.xml.transform.Source;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.function.Function;
/**
* Created by xschen on 1/5/2017.
*/
public class DataQuery {
public interface DataFrameQueryBuilder {
DataFrameQueryBuilder skipRows(int skippedRowCount);
DataColumnBuilder selectColumn(int columnIndex);
DataFrame build();
}
public interface DataColumnBuilder {
DataColumnBuilder transform(Function columnTransformer);
default DataColumnBuilder asNumeric(){
return transform(StringUtils::parseDouble);
}
default DataColumnBuilder asCategory(){
return transform(String::trim);
}
DataFrameQueryBuilder asInput(String columnName);
DataFrameQueryBuilder asOutput(String columnName);
}
public interface FormatBuilder {
@Deprecated
SourceBuilder csv(String splitter, boolean skipFirstLine);
SourceBuilder csv(String splitter);
default SourceBuilder csv() {
return csv("\\s");
}
SourceBuilder libsvm();
DataTableBuilder blank();
}
public interface DataTableBuilder {
DataTableBuilder newInput(String columnName);
DataTableBuilder newOutput(String columnName);
DataFrameQueryBuilder end();
}
public interface SourceBuilder {
DataFrameQueryBuilder from(InputStream inputStream);
}
private static class DataFrameColumn {
private int index;
private Function transformer;
private String columnName;
public DataFrameColumn(String columnName, int index, Function transformer){
this.columnName = columnName;
this.index = index;
this.transformer = transformer;
}
}
private static class DataFrameBuilderX implements SourceBuilder, DataFrameQueryBuilder, DataColumnBuilder, FormatBuilder, DataTableBuilder {
private final List inputColumns = new ArrayList<>();
private final List outputColumns = new ArrayList<>();
private InputStream dataInputStream;
private String csvSplitter = "\\s";
private DataFileType fileType;
@Deprecated
private boolean skipFirstLine = false;
private int skippedRowCount = 0;
private static final Logger logger = LoggerFactory.getLogger(DataFrameBuilderX.class);
private DataFrameColumn selected = null;
@Override public DataColumnBuilder selectColumn(int columnIndex) {
selected = new DataFrameColumn("", columnIndex, x -> x);
return this;
}
@Override public DataFrameQueryBuilder skipRows(int skippedRowCount) {
this.skippedRowCount = skippedRowCount;
return this;
}
@Override public DataFrame build() {
final BasicDataFrame dataFrame = new BasicDataFrame();
if(fileType == DataFileType.Csv) {
if(inputColumns.isEmpty()){
throw new RuntimeException("data frame should not have empty input columns");
}
int skippedLines = Math.max(this.skipFirstLine ? 1 : 0, this.skippedRowCount);
CsvUtils.csv(dataInputStream, csvSplitter, skippedLines, (words) -> {
DataRow row = dataFrame.newRow();
for (int i = 0; i < words.length; ++i) {
for (DataFrameColumn c : inputColumns) {
if (c.index == i) {
Object data = c.transformer.apply(words[i]);
if(data instanceof String){
row.setCategoricalCell(c.columnName, (String)data);
} else {
row.setCell(c.columnName, NumberUtils.toDouble(data));
}
}
}
for (DataFrameColumn c : outputColumns) {
if (c.index == i) {
Object target = c.transformer.apply(words[i]);
if(target instanceof String) {
row.setCategoricalTargetCell(c.columnName, (String)target);
} else {
row.setTargetCell(c.columnName, NumberUtils.toDouble(target));
}
}
}
}
dataFrame.addRow(row);
return true;
}, (e) -> logger.error("Failed to read csv file", e));
} else if(fileType == DataFileType.HeartScale) {
List
© 2015 - 2025 Weber Informatics LLC | Privacy Policy