Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
package io.github.vmzakharov.ecdataframe.dataframe;
import io.github.vmzakharov.ecdataframe.dataset.HierarchicalDataSet;
import io.github.vmzakharov.ecdataframe.dsl.EvalContext;
import io.github.vmzakharov.ecdataframe.dsl.EvalContextAbstract;
import io.github.vmzakharov.ecdataframe.dsl.Expression;
import io.github.vmzakharov.ecdataframe.dsl.FunctionScript;
import io.github.vmzakharov.ecdataframe.dsl.SimpleEvalContext;
import io.github.vmzakharov.ecdataframe.dsl.value.BooleanValue;
import io.github.vmzakharov.ecdataframe.dsl.value.Value;
import io.github.vmzakharov.ecdataframe.dsl.value.ValueType;
import io.github.vmzakharov.ecdataframe.dsl.visitor.ExpressionEvaluationVisitor;
import io.github.vmzakharov.ecdataframe.dsl.visitor.InMemoryEvaluationVisitor;
import io.github.vmzakharov.ecdataframe.dsl.visitor.TypeInferenceVisitor;
import io.github.vmzakharov.ecdataframe.util.ExpressionParserHelper;
import org.eclipse.collections.api.BooleanIterable;
import org.eclipse.collections.api.DoubleIterable;
import org.eclipse.collections.api.FloatIterable;
import org.eclipse.collections.api.IntIterable;
import org.eclipse.collections.api.LongIterable;
import org.eclipse.collections.api.RichIterable;
import org.eclipse.collections.api.block.function.primitive.IntIntToIntFunction;
import org.eclipse.collections.api.block.predicate.primitive.IntPredicate;
import org.eclipse.collections.api.block.procedure.Procedure;
import org.eclipse.collections.api.list.ImmutableList;
import org.eclipse.collections.api.list.ListIterable;
import org.eclipse.collections.api.list.MutableList;
import org.eclipse.collections.api.list.primitive.IntList;
import org.eclipse.collections.api.list.primitive.MutableBooleanList;
import org.eclipse.collections.api.list.primitive.MutableIntList;
import org.eclipse.collections.api.map.MapIterable;
import org.eclipse.collections.api.map.MutableMap;
import org.eclipse.collections.api.multimap.list.MutableListMultimap;
import org.eclipse.collections.api.set.MutableSet;
import org.eclipse.collections.api.tuple.Triplet;
import org.eclipse.collections.api.tuple.Twin;
import org.eclipse.collections.impl.factory.Lists;
import org.eclipse.collections.impl.factory.Maps;
import org.eclipse.collections.impl.factory.Multimaps;
import org.eclipse.collections.impl.factory.primitive.IntLists;
import org.eclipse.collections.impl.list.mutable.primitive.BooleanArrayList;
import org.eclipse.collections.impl.set.sorted.mutable.TreeSortedSet;
import org.eclipse.collections.impl.tuple.Tuples;
import org.eclipse.collections.impl.utility.ArrayIterate;
import java.math.BigDecimal;
import java.time.LocalDate;
import java.time.LocalDateTime;
import java.util.Arrays;
import java.util.Comparator;
import java.util.LinkedHashSet;
import java.util.Set;
import java.util.function.Supplier;
import static io.github.vmzakharov.ecdataframe.dataframe.DfColumnSortOrder.ASC;
import static io.github.vmzakharov.ecdataframe.util.ExceptionFactory.exceptionByKey;
/**
* Data Frame - a tabular data structure
*/
public class DataFrame
implements DfIterate
{
private final String name;
private final MutableMap columnsByName = Maps.mutable.of();
private final MutableList columns = Lists.mutable.of();
private int rowCount = 0;
private final ThreadLocal localEvalContext;
private final ThreadLocal localEvalVisitor;
private IntList virtualRowMap = null;
private boolean poolingEnabled = false;
private MutableBooleanList bitmap = null;
private MutableList aggregateIndex = null;
private final MutableMap indices = Maps.mutable.of();
public DataFrame(String newName)
{
this.name = newName;
this.localEvalContext = ThreadLocal.withInitial(
() -> new DataFrameEvalContext(DataFrame.this)
);
this.localEvalVisitor = ThreadLocal.withInitial(
() -> new InMemoryEvaluationVisitor(DataFrame.this.localEvalContext.get())
);
this.resetBitmap();
}
public DataFrame addStringColumn(String newColumnName)
{
return this.addColumn(newColumnName, ValueType.STRING);
}
/**
* Add a computed column of string type to this data frame
* @deprecated use {@link #addColumn(String, String)} instead. The type of the column to add will be inferred from
* the expression
* @param newColumnName the name of the column to be added
* @param expressionAsString the expression used to compute the column values
* @return this data frame
*/
public DataFrame addStringColumn(String newColumnName, String expressionAsString)
{
return this.addColumnWithTypeValidation(newColumnName, ValueType.STRING, expressionAsString);
}
public DataFrame addStringColumn(String newColumnName, ListIterable values)
{
this.attachColumn(new DfStringColumnStored(this, newColumnName, values));
return this;
}
public DataFrame addLongColumn(String newColumnName)
{
return this.addColumn(newColumnName, ValueType.LONG);
}
/**
* Add a computed column of long type to this data frame
* @deprecated use {@link #addColumn(String, String)} instead. The type of the column to add will be inferred from
* the expression
* @param newColumnName the name of the column to be added
* @param expressionAsString the expression used to compute the column values
* @return this data frame
*/
public DataFrame addLongColumn(String newColumnName, String expressionAsString)
{
return this.addColumnWithTypeValidation(newColumnName, ValueType.LONG, expressionAsString);
}
public DataFrame addLongColumn(String newColumnName, LongIterable values)
{
this.attachColumn(new DfLongColumnStored(this, newColumnName, values));
return this;
}
/**
* Add a stored column of int type to this data frame
* @param newColumnName the name of the column to be added
* @return this data frame
*/
public DataFrame addIntColumn(String newColumnName)
{
return this.addColumn(newColumnName, ValueType.INT);
}
/**
* Add a computed column of int type to this data frame
* @deprecated use {@link #addColumn(String, String)} instead. The type of the column to add will be inferred from
* the expression
* @param newColumnName the name of the column to be added
* @param expressionAsString the expression used to compute the column values
* @return this data frame
*/
public DataFrame addIntColumn(String newColumnName, String expressionAsString)
{
return this.addColumnWithTypeValidation(newColumnName, ValueType.INT, expressionAsString);
}
public DataFrame addIntColumn(String newColumnName, IntIterable values)
{
this.attachColumn(new DfIntColumnStored(this, newColumnName, values));
return this;
}
/**
* Add a stored column of boolean type to this data frame
* @param newColumnName the name of the column to be added
* @return this data frame
*/
public DataFrame addBooleanColumn(String newColumnName)
{
return this.addColumn(newColumnName, ValueType.BOOLEAN);
}
public DataFrame addBooleanColumn(String newColumnName, BooleanIterable values)
{
this.attachColumn(new DfBooleanColumnStored(this, newColumnName, values));
return this;
}
public DataFrame addFloatColumn(String newColumnName)
{
return this.addColumn(newColumnName, ValueType.FLOAT);
}
/**
* Add a computed column of float type to this data frame
* @deprecated use {@link #addColumn(String, String)} instead. The type of the column to add will be inferred from
* the expression
* @param newColumnName the name of the column to be added
* @param expressionAsString the expression used to compute the column values
* @return this data frame
*/
public DataFrame addFloatColumn(String newColumnName, String expressionAsString)
{
return this.addColumnWithTypeValidation(newColumnName, ValueType.FLOAT, expressionAsString);
}
public DataFrame addFloatColumn(String newColumnName, FloatIterable values)
{
this.attachColumn(new DfFloatColumnStored(this, newColumnName, values));
return this;
}
public DataFrame addDoubleColumn(String newColumnName)
{
return this.addColumn(newColumnName, ValueType.DOUBLE);
}
/**
* Add a computed column of double type to this data frame
* @deprecated use {@link #addColumn(String, String)} instead. The type of the column to add will be inferred from
* the expression
* @param newColumnName the name of the column to be added
* @param expressionAsString the expression used to compute the column values
* @return this data frame
*/
public DataFrame addDoubleColumn(String newColumnName, String expressionAsString)
{
return this.addColumnWithTypeValidation(newColumnName, ValueType.DOUBLE, expressionAsString);
}
public DataFrame addDoubleColumn(String newColumnName, DoubleIterable values)
{
this.attachColumn(new DfDoubleColumnStored(this, newColumnName, values));
return this;
}
public DataFrame addDateColumn(String newColumnName)
{
return this.addColumn(newColumnName, ValueType.DATE);
}
public DataFrame addDateColumn(String newColumnName, ListIterable values)
{
this.attachColumn(new DfDateColumnStored(this, newColumnName, values));
return this;
}
/**
* Add a computed column of date type to this data frame
* @deprecated use {@link #addColumn(String, String)} instead. The type of the column to add will be inferred from
* the expression
* @param newColumnName the name of the column to be added
* @param expressionAsString the expression used to compute the column values
* @return this data frame
*/
public DataFrame addDateColumn(String newColumnName, String expressionAsString)
{
return this.addColumnWithTypeValidation(newColumnName, ValueType.DATE, expressionAsString);
}
public DataFrame addDateTimeColumn(String newColumnName)
{
return this.addColumn(newColumnName, ValueType.DATE_TIME);
}
public DataFrame addDateTimeColumn(String newColumnName, ListIterable values)
{
this.attachColumn(new DfDateTimeColumnStored(this, newColumnName, values));
return this;
}
/**
* Add a computed column of date/time type to this data frame
* @deprecated use {@link #addColumn(String, String)} instead. The type of the column to add will be inferred from
* the expression
* @param newColumnName the name of the column to be added
* @param expressionAsString the expression used to compute the column values
* @return this data frame
*/
public DataFrame addDateTimeColumn(String newColumnName, String expressionAsString)
{
return this.addColumnWithTypeValidation(newColumnName, ValueType.DATE_TIME, expressionAsString);
}
public DataFrame addDecimalColumn(String newColumnName)
{
return this.addColumn(newColumnName, ValueType.DECIMAL);
}
/**
* Add a computed column of decimal type to this data frame
* @deprecated use {@link #addColumn(String, String)} instead. The type of the column to add will be inferred from
* the expression
* @param newColumnName the name of the column to be added
* @param expressionAsString the expression used to compute the column values
* @return this data frame
*/
public DataFrame addDecimalColumn(String newColumnName, String expressionAsString)
{
return this.addColumnWithTypeValidation(newColumnName, ValueType.DECIMAL, expressionAsString);
}
public DataFrame addDecimalColumn(String newColumnName, ListIterable values)
{
this.attachColumn(new DfDecimalColumnStored(this, newColumnName, values));
return this;
}
/**
* Returns a string representation of the data frame, which consists of the data frame's name, the row count, and up
* to the first 10 rows of its data. If the data frame contains more than 10 rows, the first 10 rows are followed by
* the ellipsis punctuation mark ("...").
*
* @return a string representation of the data frame
*/
@Override
public String toString()
{
StringBuilder sb = new StringBuilder()
.append(this.getName())
.append(" [")
.append(this.rowCount)
.append(" rows]")
.append('\n')
.append(this.asCsvString(10));
if (this.rowCount() > 10)
{
sb.append("...\n");
}
return sb.toString();
}
private void attachColumn(DfColumn newColumn)
{
if (this.hasColumn(newColumn.getName()))
{
exceptionByKey("DF_DUPLICATE_COLUMN")
.with("columnName", newColumn.getName())
.with("dataFrameName", this.getName())
.fire();
}
this.columnsByName.put(newColumn.getName(), newColumn);
this.columns.add(newColumn);
if (this.isPoolingEnabled())
{
newColumn.enablePooling();
}
if (newColumn.isStored() && newColumn.getSize() > 0)
{
this.determineRowCount();
}
}
public void enablePooling()
{
this.poolingEnabled = true;
this.columns.forEach(DfColumn::enablePooling);
}
public boolean isPoolingEnabled()
{
return this.poolingEnabled;
}
public DfColumn getColumnNamed(String columnName)
{
DfColumn column = this.columnsByName.get(columnName);
if (column == null)
{
exceptionByKey("DF_COLUMN_DOES_NOT_EXIST")
.with("columnName", columnName)
.with("dataFrameName", this.getName())
.fire();
}
return column;
}
public ImmutableList getColumns()
{
return this.columns.toImmutable();
}
public DfColumn getColumnAt(int columnIndex)
{
return this.columns.get(columnIndex);
}
public void addRow(ListIterable rowValues)
{
rowValues.forEachWithIndex((v, i) -> this.columns.get(i).addValue(v));
this.rowCount++;
}
/**
* Convert the data frame into a multi-line CSV string. The output will include column headers.
*
* @return a string representation of the data frame.
*/
public String asCsvString()
{
return this.asCsvString(-1);
}
/**
* Convert the data frame into a multi-line CSV string. The output will include column headers.
*
* @param limit number of rows to return, all rows if the value is negative. If the value is zero the result will
* only contain column names.
* @return a CSV string representation of the data frame rows.
*/
public String asCsvString(int limit)
{
StringBuilder s = new StringBuilder();
s.append(this.columns.makeString(DfColumn::getName, "", ",", ""));
s.append('\n');
int columnCount = this.columnCount();
String[] row = new String[columnCount];
int last = limit < 0 ? this.rowCount() : Math.min(limit, this.rowCount());
for (int rowIndex = 0; rowIndex < last; rowIndex++)
{
for (int columnIndex = 0; columnIndex < columnCount; columnIndex++)
{
row[columnIndex] = this.getValueAsStringLiteral(rowIndex, columnIndex);
}
s.append(ArrayIterate.makeString(row, ","));
s.append('\n');
}
return s.toString();
}
public int rowCount()
{
return this.rowCount;
}
public DataFrame addRow()
{
this.columns.forEach(DfColumn::addEmptyValue);
this.rowCount++;
return this;
}
public DataFrame addRow(Object... values)
{
if (values.length > this.columnCount())
{
exceptionByKey("DF_ADDING_ROW_TOO_WIDE")
.with("elementCount", values.length)
.with("columnCount", this.columnCount())
.fire();
}
ArrayIterate.forEachWithIndex(values, (v, i) -> this.columns.get(i).addObject(v));
this.rowCount++;
return this;
}
public int columnCount()
{
return this.columns.size();
}
public String getName()
{
return this.name;
}
/**
* Add a computed column of int type to this data frame. The column type will be inferred from the expression
* provided
*
* @param columnName the name of the column to be added
* @param expressionAsString the expression used to compute the column values
* @return this data frame
*/
public DataFrame addColumn(String columnName, String expressionAsString)
{
return this.addColumn(
columnName,
this.inferExpressionType(columnName, expressionAsString),
expressionAsString);
}
/**
* creates a stored column with the specified name of the specified type and attaches it to this dataframe.
*
* @param columnName the name of the column to be created
* @param type the type of the new column
* @return this data frame
*/
public DataFrame addColumn(String columnName, ValueType type)
{
this.newColumn(columnName, type);
return this;
}
/**
* creates a stored column with the specified name of the specified type and attaches it to this dataframe.
*
* @param columnName the name of the column to be created
* @param type the type of the new column
* @return the newly created columns
*/
public DfColumnStored newColumn(String columnName, ValueType type)
{
DfColumnStored created = this.createStoredColumn(columnName, type);
this.attachColumn(created);
return created;
}
private DfColumnStored createStoredColumn(String columnName, ValueType type)
{
return switch (type)
{
case LONG -> new DfLongColumnStored(this, columnName);
case DOUBLE -> new DfDoubleColumnStored(this, columnName);
case STRING -> new DfStringColumnStored(this, columnName);
case DATE -> new DfDateColumnStored(this, columnName);
case DATE_TIME -> new DfDateTimeColumnStored(this, columnName);
case DECIMAL -> new DfDecimalColumnStored(this, columnName);
case INT -> new DfIntColumnStored(this, columnName);
case FLOAT -> new DfFloatColumnStored(this, columnName);
case BOOLEAN -> new DfBooleanColumnStored(this, columnName);
default -> throw exceptionByKey("DF_ADD_COL_UNKNOWN_TYPE")
.with("columnName", columnName)
.with("type", type)
.get();
};
}
/**
* creates a calculated column with the specified name of the specified type and attaches it to this dataframe.
*
* @param columnName the name of the column to be created
* @param type the type of the new column
* @param expressionAsString the expression
* @return this data frame
*/
public DataFrame addColumn(String columnName, ValueType type, String expressionAsString)
{
this.newColumn(columnName, type, expressionAsString);
return this;
}
private DataFrame addColumnWithTypeValidation(String columnName, ValueType columnType, String expressionAsString)
{
ValueType expressionType = this.inferExpressionType(columnName, expressionAsString);
if (expressionType != columnType)
{
throw exceptionByKey("DF_CALC_COL_TYPE_MISMATCH")
.with("columnName", columnName)
.with("dataFrameName", this.getName())
.with("inferredType", expressionType.toString())
.with("expression", expressionAsString)
.with("specifiedType", columnType.toString())
.get();
}
this.newColumn(columnName, columnType, expressionAsString);
return this;
}
/**
* creates a calculated column with the specified name of the specified type and attaches it to this dataframe.
*
* @param columnName the name of the column to be created
* @param type the type of the new column
* @param expressionAsString the expression used to calculate column values
* @return the newly created columns
*/
public DfColumnComputed newColumn(String columnName, ValueType type, String expressionAsString)
{
DfColumnComputed created = this.createComputedColumn(columnName, type, expressionAsString);
this.attachColumn(created);
return created;
}
private ValueType inferExpressionType(String columnName, String expressionAsString)
{
TypeInferenceVisitor visitor = new TypeInferenceVisitor(this.getEvalContext());
this.getColumns().each(col -> visitor.storeVariableType(col.getName(), col.getType()));
Expression expression = ExpressionParserHelper.DEFAULT.toExpressionOrScript(expressionAsString);
ValueType expressionType = visitor.inferExpressionType(expression);
if (visitor.hasErrors())
{
exceptionByKey("DF_CALC_COL_INFER_TYPE")
.with("columnName", columnName)
.with("dataFrameName", this.getName())
.with("expression", expressionAsString)
.with("errorList", visitor.getErrors()
.collect(err -> err.getOne() + ": " + err.getTwo())
.makeString("\n"))
.fire();
}
return expressionType;
}
private DfColumnComputed createComputedColumn(String columnName, ValueType type, String expressionAsString)
{
return switch (type)
{
case LONG -> new DfLongColumnComputed(this, columnName, expressionAsString);
case DOUBLE -> new DfDoubleColumnComputed(this, columnName, expressionAsString);
case STRING -> new DfStringColumnComputed(this, columnName, expressionAsString);
case DATE -> new DfDateColumnComputed(this, columnName, expressionAsString);
case DATE_TIME -> new DfDateTimeColumnComputed(this, columnName, expressionAsString);
case DECIMAL -> new DfDecimalColumnComputed(this, columnName, expressionAsString);
case INT -> new DfIntColumnComputed(this, columnName, expressionAsString);
case FLOAT -> new DfFloatColumnComputed(this, columnName, expressionAsString);
case BOOLEAN -> new DfBooleanColumnComputed(this, columnName, expressionAsString);
default -> throw exceptionByKey("DF_ADD_COL_UNKNOWN_TYPE").with("columnName", columnName)
.with("type", type)
.get();
};
}
protected int rowIndexMap(int virtualRowIndex)
{
if (this.isIndexed())
{
return this.virtualRowMap.get(virtualRowIndex);
}
return virtualRowIndex;
}
private boolean isIndexed()
{
return this.virtualRowMap != null;
}
public IntList getAggregateIndex(int rowIndex)
{
if (this.isAggregateWithIndex())
{
return this.aggregateIndex.get(this.rowIndexMap(rowIndex));
}
return IntLists.immutable.empty();
}
private boolean isAggregateWithIndex()
{
return this.aggregateIndex != null;
}
public Object getObject(int rowIndex, int columnIndex)
{
return this.columns.get(columnIndex).getObject(this.rowIndexMap(rowIndex));
}
public Object getObject(String columnName, int rowIndex)
{
return this.getColumnNamed(columnName).getObject(this.rowIndexMap(rowIndex));
}
public boolean isNull(String columnName, int rowIndex)
{
return this.getColumnNamed(columnName).isNull(this.rowIndexMap(rowIndex));
}
public Value getValue(int rowIndex, int columnIndex)
{
return this.columns.get(columnIndex).getValue(this.rowIndexMap(rowIndex));
}
public Value getValue(String columnName, int rowIndex)
{
return this.columnsByName.get(columnName).getValue(this.rowIndexMap(rowIndex));
}
public String getValueAsStringLiteral(int rowIndex, int columnIndex)
{
return this.columns.get(columnIndex).getValueAsStringLiteral(this.rowIndexMap(rowIndex));
}
public String getValueAsString(int rowIndex, int columnIndex)
{
return this.columns.get(columnIndex).getValueAsString(this.rowIndexMap(rowIndex));
}
public long getLong(String columnName, int rowIndex)
{
return this.getLongColumn(columnName).getLong(this.rowIndexMap(rowIndex));
}
public long getInt(String columnName, int rowIndex)
{
return this.getIntColumn(columnName).getInt(this.rowIndexMap(rowIndex));
}
public boolean getBoolean(String columnName, int rowIndex)
{
return this.getBooleanColumn(columnName).getBoolean(this.rowIndexMap(rowIndex));
}
public String getString(String columnName, int rowIndex)
{
return this.getStringColumn(columnName).getTypedObject(this.rowIndexMap(rowIndex));
}
public double getDouble(String columnName, int rowIndex)
{
return this.getDoubleColumn(columnName).getDouble(this.rowIndexMap(rowIndex));
}
public float getFloat(String columnName, int rowIndex)
{
return this.getFloatColumn(columnName).getFloat(this.rowIndexMap(rowIndex));
}
public LocalDate getDate(String columnName, int rowIndex)
{
return this.getDateColumn(columnName).getTypedObject(this.rowIndexMap(rowIndex));
}
public LocalDateTime getDateTime(String columnName, int rowIndex)
{
return this.getDateTimeColumn(columnName).getTypedObject(this.rowIndexMap(rowIndex));
}
public BigDecimal getDecimal(String columnName, int rowIndex)
{
return this.getDecimalColumn(columnName).getTypedObject(this.rowIndexMap(rowIndex));
}
public DfLongColumn getLongColumn(String columnName)
{
return (DfLongColumn) this.getColumnNamed(columnName);
}
public DfIntColumn getIntColumn(String columnName)
{
return (DfIntColumn) this.getColumnNamed(columnName);
}
public DfBooleanColumn getBooleanColumn(String columnName)
{
return (DfBooleanColumn) this.getColumnNamed(columnName);
}
public DfDoubleColumn getDoubleColumn(String columnName)
{
return (DfDoubleColumn) this.getColumnNamed(columnName);
}
public DfFloatColumn getFloatColumn(String columnName)
{
return (DfFloatColumn) this.getColumnNamed(columnName);
}
public DfDateColumn getDateColumn(String columnName)
{
return (DfDateColumn) this.getColumnNamed(columnName);
}
public DfDateTimeColumn getDateTimeColumn(String columnName)
{
return (DfDateTimeColumn) this.getColumnNamed(columnName);
}
public DfDecimalColumn getDecimalColumn(String columnName)
{
return (DfDecimalColumn) this.getColumnNamed(columnName);
}
public DfStringColumn getStringColumn(String columnName)
{
return (DfStringColumn) this.getColumnNamed(columnName);
}
public boolean hasColumn(String columnName)
{
return this.columnsByName.containsKey(columnName);
}
private DataFrameEvalContext getEvalContext()
{
return this.localEvalContext.get();
}
public void setEvalContextRowIndex(int rowIndex)
{
this.getEvalContext().setRowIndex(rowIndex);
}
public ExpressionEvaluationVisitor getEvalVisitor()
{
return this.localEvalVisitor.get();
}
public void setExternalEvalContext(EvalContext newEvalContext)
{
this.getEvalContext().setNestedContext(newEvalContext);
}
/**
* Indicates that no further updates will be made to this data frame and ensures that the data frame is in a
* consistent internal state. This method should be invoked when done populating a data frame with data. Failure to
* do so may result in degraded performance or delayed problem detection. It is usually OK to skip it in the context
* of unit tests.
*
* @return the data frame
*/
public DataFrame seal()
{
this.determineRowCount();
this.resetBitmap();
this.disablePooling();
return this;
}
private void disablePooling()
{
this.poolingEnabled = false;
this.columns.forEach(DfColumn::disablePooling);
}
private void determineRowCount()
{
MutableIntList storedColumnsSizes = this.columns.select(DfColumn::isStored).collectInt(DfColumn::getSize);
if (storedColumnsSizes.isEmpty())
{
this.rowCount = 0;
}
else
{
this.rowCount = storedColumnsSizes.getFirst();
if (storedColumnsSizes.anySatisfy(e -> e != this.rowCount))
{
exceptionByKey("DF_DIFFERENT_COL_SIZES").with("dataFrameName", this.getName()).fire();
}
}
}
/**
* Sums up the values in the specified columns
*
* @param columnsToAggregateNames - the columns to aggregate
* @return a single row data frame containing the aggregated values in the respective columns
*/
public DataFrame sum(ListIterable columnsToAggregateNames)
{
return this.aggregate(columnsToAggregateNames.collect(AggregateFunction::sum));
}
/**
* Pivot the data frame. This operation produces another data frame, with the columns that correspond to the values
* of the key column, populated with the values from the values columns. THe values are aggregated by one or more
* aggregation function.
*
* NOTE: If more than one aggregator is provided, the column names for the aggregate values will be made up of
* pairs of all the values of the pivot column and the column names specified in aggregators.
* So if a pivot values are for example "2001" and "2002" and the only aggregator provided is {@code sum("X")} the
* columns with aggregated values will have names "2001" and "2002". If there are two aggregator functions,
* sum("X") and avg("Y"), there will be four columns for aggregated values in the resulting table with the names
* "2001:X", "2001:Y", "2002:X", "2002:Y". It will also respect the column name overrides in the aggregator
* function, that is in the example above we have sum("X", "Foo") and avg("Y", "Bar") instead, the resulting column
* names will be "2001:Foo", "2001:Bar", "2002:Foo", "2002:Bar".
*
* @param columnsToGroupByNames the columns to group by the resulting pivot table
* @param pivotColumnName the column the values of which will become columns for the pivoted data frame.
* @param aggregators the aggregate functions to aggregate values in the value columns specified in
* their parameters
* @return a new data frame representing a pivot table view of this data frame.
*/
public DataFrame pivot(
ListIterable columnsToGroupByNames,
String pivotColumnName,
ListIterable aggregators
)
{
return this.pivot(columnsToGroupByNames, pivotColumnName, null, aggregators);
}
/**
* Pivot the data frame. This operation produces another data frame, with the columns that correspond to the values
* of the key column, populated with the values from the values columns. THe values are aggregated by one or more
* aggregation function.
*
* NOTE: If more than one aggregator is provided, the column names for the aggregate values will be made up of
* pairs of all the values of the pivot column and the column names specified in aggregators.
* So if a pivot values are for example "2001" and "2002" and the only aggregator provided is {@code sum("X")} the
* columns with aggregated values will have names "2001" and "2002". If there are two aggregator functions,
* sum("X") and avg("Y"), there will be four columns for aggregated values in the resulting table with the names
* "2001:X", "2001:Y", "2002:X", "2002:Y". It will also respect the column name overrides in the aggregator
* function, that is in the example above we have sum("X", "Foo") and avg("Y", "Bar") instead, the resulting column
* names will be "2001:Foo", "2001:Bar", "2002:Foo", "2002:Bar".
*
* @param columnsToGroupByNames the columns to group by the resulting pivot table
* @param pivotColumnName the column the values of which will become columns for the pivoted data frame.
* @param pivotColumnOrder the order in which the pivot columns will appear in the returned data frame (based
* on the ordering of the values of column headers)
* @param aggregators the aggregate functions to aggregate values in the value columns specified in
* their parameters
* @return a new data frame representing a pivot table view of this data frame.
*/
public DataFrame pivot(
ListIterable columnsToGroupByNames,
String pivotColumnName,
DfColumnSortOrder pivotColumnOrder,
ListIterable aggregators
)
{
DataFrame pivoted = new DataFrame(this.getName() + "-pivoted");
// index columns first
ListIterable columnsToGroupBy = this.columnsNamed(columnsToGroupByNames);
columnsToGroupBy.forEach(col -> pivoted.addColumn(col.getName(), col.getType()));
// then columns derived from pivot dimension values
// first, find distinct pivot dimension values
DfColumn columnToPivot = this.getColumnNamed(pivotColumnName);
Set> pivotColumnValues;
if (pivotColumnOrder == null)
{
pivotColumnValues = new LinkedHashSet<>(); // to maintain insertion order
}
else
{
pivotColumnValues = new TreeSortedSet>(
(pivotColumnOrder == ASC) ? Comparator.naturalOrder() : Comparator.reverseOrder()
);
}
for (int i = 0; i < columnToPivot.getSize(); i++)
{
pivotColumnValues.add((Comparable