io.github.vmzakharov.ecdataframe.dataframe.DfIndexKeeper Maven / Gradle / Ivy

Go to download

Show more of this group Show more artifacts with this name
Show all versions of dataframe-ec Show documentation

A tabular data structure based on the Eclipse Collections framework

The newest version!

package io.github.vmzakharov.ecdataframe.dataframe;

import org.eclipse.collections.api.block.procedure.primitive.IntProcedure;
import org.eclipse.collections.api.list.ListIterable;
import org.eclipse.collections.api.list.MutableList;
import org.eclipse.collections.api.map.primitive.MutableObjectIntMap;
import org.eclipse.collections.impl.factory.Lists;
import org.eclipse.collections.impl.factory.primitive.ObjectIntMaps;

/**
 * Maintains an index on a data frame based on one or more column values. If a row with a specified index value does
 * not exist, will add the row with the respective values to the data frame. This can be useful for example for
 * aggregation to maintain a dataframe with accumulator rows.
 * For indexed access to a data frame see the {@link io.github.vmzakharov.ecdataframe.dataframe.DataFrame#createIndex(String, ListIterable)}
 * and {@link io.github.vmzakharov.ecdataframe.dataframe.DataFrame#index(String)} methods
 */
public class DfIndexKeeper
{
    private final MutableObjectIntMap> rowIndexByKey = ObjectIntMaps.mutable.of();
    private final ListIterable columnsToIndexBy;
    private final DataFrame indexedDataFrame;
    private final ListIterable sourceColumns;

    public DfIndexKeeper(
            DataFrame newIndexedDataFrame,
            ListIterable indexByColumnNames,
            DataFrame newSourceDataFrame)
    {
        this.indexedDataFrame = newIndexedDataFrame;
        this.columnsToIndexBy = indexByColumnNames.collect(this.indexedDataFrame::getColumnNamed);
        this.sourceColumns = indexByColumnNames.collect(newSourceDataFrame::getColumnNamed);
    }

    private int getRowIndexAtKey(ListIterable