io.github.vmzakharov.ecdataframe.dataframe.DfIndexKeeper Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of dataframe-ec Show documentation
Show all versions of dataframe-ec Show documentation
A tabular data structure based on the Eclipse Collections framework
The newest version!
package io.github.vmzakharov.ecdataframe.dataframe;
import org.eclipse.collections.api.block.procedure.primitive.IntProcedure;
import org.eclipse.collections.api.list.ListIterable;
import org.eclipse.collections.api.list.MutableList;
import org.eclipse.collections.api.map.primitive.MutableObjectIntMap;
import org.eclipse.collections.impl.factory.Lists;
import org.eclipse.collections.impl.factory.primitive.ObjectIntMaps;
/**
* Maintains an index on a data frame based on one or more column values. If a row with a specified index value does
* not exist, will add the row with the respective values to the data frame. This can be useful for example for
* aggregation to maintain a dataframe with accumulator rows.
* For indexed access to a data frame see the {@link io.github.vmzakharov.ecdataframe.dataframe.DataFrame#createIndex(String, ListIterable)}
* and {@link io.github.vmzakharov.ecdataframe.dataframe.DataFrame#index(String)} methods
*/
public class DfIndexKeeper
{
private final MutableObjectIntMap> rowIndexByKey = ObjectIntMaps.mutable.of();
private final ListIterable columnsToIndexBy;
private final DataFrame indexedDataFrame;
private final ListIterable sourceColumns;
public DfIndexKeeper(
DataFrame newIndexedDataFrame,
ListIterable indexByColumnNames,
DataFrame newSourceDataFrame)
{
this.indexedDataFrame = newIndexedDataFrame;
this.columnsToIndexBy = indexByColumnNames.collect(this.indexedDataFrame::getColumnNamed);
this.sourceColumns = indexByColumnNames.collect(newSourceDataFrame::getColumnNamed);
}
private int getRowIndexAtKey(ListIterable