io.github.vmzakharov.ecdataframe.dataframe.DataFrame Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of dataframe-ec Show documentation
A tabular data structure based on the Eclipse Collections framework
There is a newer version: 1.0.0
package io.github.vmzakharov.ecdataframe.dataframe;

import io.github.vmzakharov.ecdataframe.dataset.HierarchicalDataSet;
import io.github.vmzakharov.ecdataframe.dsl.EvalContext;
import io.github.vmzakharov.ecdataframe.dsl.EvalContextAbstract;
import io.github.vmzakharov.ecdataframe.dsl.Expression;
import io.github.vmzakharov.ecdataframe.dsl.FunctionScript;
import io.github.vmzakharov.ecdataframe.dsl.SimpleEvalContext;
import io.github.vmzakharov.ecdataframe.dsl.value.BooleanValue;
import io.github.vmzakharov.ecdataframe.dsl.value.Value;
import io.github.vmzakharov.ecdataframe.dsl.value.ValueType;
import io.github.vmzakharov.ecdataframe.dsl.visitor.ExpressionEvaluationVisitor;
import io.github.vmzakharov.ecdataframe.dsl.visitor.InMemoryEvaluationVisitor;
import io.github.vmzakharov.ecdataframe.dsl.visitor.TypeInferenceVisitor;
import io.github.vmzakharov.ecdataframe.util.ExpressionParserHelper;
import org.eclipse.collections.api.BooleanIterable;
import org.eclipse.collections.api.DoubleIterable;
import org.eclipse.collections.api.FloatIterable;
import org.eclipse.collections.api.IntIterable;
import org.eclipse.collections.api.LongIterable;
import org.eclipse.collections.api.RichIterable;
import org.eclipse.collections.api.block.function.primitive.IntIntToIntFunction;
import org.eclipse.collections.api.block.predicate.primitive.IntPredicate;
import org.eclipse.collections.api.block.procedure.Procedure;
import org.eclipse.collections.api.list.ImmutableList;
import org.eclipse.collections.api.list.ListIterable;
import org.eclipse.collections.api.list.MutableList;
import org.eclipse.collections.api.list.primitive.IntList;
import org.eclipse.collections.api.list.primitive.MutableBooleanList;
import org.eclipse.collections.api.list.primitive.MutableIntList;
import org.eclipse.collections.api.map.MapIterable;
import org.eclipse.collections.api.map.MutableMap;
import org.eclipse.collections.api.multimap.list.MutableListMultimap;
import org.eclipse.collections.api.set.MutableSet;
import org.eclipse.collections.api.tuple.Triplet;
import org.eclipse.collections.api.tuple.Twin;
import org.eclipse.collections.impl.factory.Lists;
import org.eclipse.collections.impl.factory.Maps;
import org.eclipse.collections.impl.factory.Multimaps;
import org.eclipse.collections.impl.factory.primitive.IntLists;
import org.eclipse.collections.impl.list.mutable.primitive.BooleanArrayList;
import org.eclipse.collections.impl.set.sorted.mutable.TreeSortedSet;
import org.eclipse.collections.impl.tuple.Tuples;
import org.eclipse.collections.impl.utility.ArrayIterate;

import java.math.BigDecimal;
import java.time.LocalDate;
import java.time.LocalDateTime;
import java.util.Arrays;
import java.util.Comparator;
import java.util.LinkedHashSet;
import java.util.Set;
import java.util.function.Supplier;

import static io.github.vmzakharov.ecdataframe.dataframe.DfColumnSortOrder.ASC;
import static io.github.vmzakharov.ecdataframe.util.ExceptionFactory.exceptionByKey;

/**
 * Data Frame - a tabular data structure
 */
public class DataFrame
implements DfIterate
{
    private final String name;
    private final MutableMap columnsByName = Maps.mutable.of();
    private final MutableList columns = Lists.mutable.of();
    private int rowCount = 0;

    private final ThreadLocal localEvalContext;
    private final ThreadLocal localEvalVisitor;

    private IntList virtualRowMap = null;
    private boolean poolingEnabled = false;

    private MutableBooleanList bitmap = null;

    private MutableList aggregateIndex = null;

    private final MutableMap indices = Maps.mutable.of();

    public DataFrame(String newName)
    {
        this.name = newName;

        this.localEvalContext = ThreadLocal.withInitial(
                () -> new DataFrameEvalContext(DataFrame.this)
        );

        this.localEvalVisitor = ThreadLocal.withInitial(
                () -> new InMemoryEvaluationVisitor(DataFrame.this.localEvalContext.get())
        );

        this.resetBitmap();
    }

    public DataFrame addStringColumn(String newColumnName)
    {
        return this.addColumn(newColumnName, ValueType.STRING);
    }

    /**
     * Add a computed column of string type to this data frame
     * @deprecated use {@link #addColumn(String, String)} instead. The type of the column to add will be inferred from
     * the expression
     * @param newColumnName the name of the column to be added
     * @param expressionAsString the expression used to compute the column values
     * @return this data frame
     */
    public DataFrame addStringColumn(String newColumnName, String expressionAsString)
    {
        return this.addColumnWithTypeValidation(newColumnName, ValueType.STRING, expressionAsString);
    }

    public DataFrame addStringColumn(String newColumnName, ListIterable values)
    {
        this.attachColumn(new DfStringColumnStored(this, newColumnName, values));
        return this;
    }

    public DataFrame addLongColumn(String newColumnName)
    {
        return this.addColumn(newColumnName, ValueType.LONG);
    }

    /**
     * Add a computed column of long type to this data frame
     * @deprecated use {@link #addColumn(String, String)} instead. The type of the column to add will be inferred from
     * the expression
     * @param newColumnName the name of the column to be added
     * @param expressionAsString the expression used to compute the column values
     * @return this data frame
     */
    public DataFrame addLongColumn(String newColumnName, String expressionAsString)
    {
        return this.addColumnWithTypeValidation(newColumnName, ValueType.LONG, expressionAsString);
    }

    public DataFrame addLongColumn(String newColumnName, LongIterable values)
    {
        this.attachColumn(new DfLongColumnStored(this, newColumnName, values));
        return this;
    }

    /**
     * Add a stored column of int type to this data frame
     * @param newColumnName the name of the column to be added
     * @return this data frame
     */
    public DataFrame addIntColumn(String newColumnName)
    {
        return this.addColumn(newColumnName, ValueType.INT);
    }

    /**
     * Add a computed column of int type to this data frame
     * @deprecated use {@link #addColumn(String, String)} instead. The type of the column to add will be inferred from
     * the expression
     * @param newColumnName the name of the column to be added
     * @param expressionAsString the expression used to compute the column values
     * @return this data frame
     */
    public DataFrame addIntColumn(String newColumnName, String expressionAsString)
    {
        return this.addColumnWithTypeValidation(newColumnName, ValueType.INT, expressionAsString);
    }

    public DataFrame addIntColumn(String newColumnName, IntIterable values)
    {
        this.attachColumn(new DfIntColumnStored(this, newColumnName, values));
        return this;
    }

    /**
     * Add a stored column of boolean type to this data frame
     * @param newColumnName the name of the column to be added
     * @return this data frame
     */
    public DataFrame addBooleanColumn(String newColumnName)
    {
        return this.addColumn(newColumnName, ValueType.BOOLEAN);
    }

    public DataFrame addBooleanColumn(String newColumnName, BooleanIterable values)
    {
        this.attachColumn(new DfBooleanColumnStored(this, newColumnName, values));
        return this;
    }

    public DataFrame addFloatColumn(String newColumnName)
    {
        return this.addColumn(newColumnName, ValueType.FLOAT);
    }

    /**
     * Add a computed column of float type to this data frame
     * @deprecated use {@link #addColumn(String, String)} instead. The type of the column to add will be inferred from
     * the expression
     * @param newColumnName the name of the column to be added
     * @param expressionAsString the expression used to compute the column values
     * @return this data frame
     */
    public DataFrame addFloatColumn(String newColumnName, String expressionAsString)
    {
        return this.addColumnWithTypeValidation(newColumnName, ValueType.FLOAT, expressionAsString);
    }

    public DataFrame addFloatColumn(String newColumnName, FloatIterable values)
    {
        this.attachColumn(new DfFloatColumnStored(this, newColumnName, values));
        return this;
    }

    public DataFrame addDoubleColumn(String newColumnName)
    {
        return this.addColumn(newColumnName, ValueType.DOUBLE);
    }

    /**
     * Add a computed column of double type to this data frame
     * @deprecated use {@link #addColumn(String, String)} instead. The type of the column to add will be inferred from
     * the expression
     * @param newColumnName the name of the column to be added
     * @param expressionAsString the expression used to compute the column values
     * @return this data frame
     */
    public DataFrame addDoubleColumn(String newColumnName, String expressionAsString)
    {
        return this.addColumnWithTypeValidation(newColumnName, ValueType.DOUBLE, expressionAsString);
    }

    public DataFrame addDoubleColumn(String newColumnName, DoubleIterable values)
    {
        this.attachColumn(new DfDoubleColumnStored(this, newColumnName, values));
        return this;
    }

    public DataFrame addDateColumn(String newColumnName)
    {
        return this.addColumn(newColumnName, ValueType.DATE);
    }

    public DataFrame addDateColumn(String newColumnName, ListIterable values)
    {
        this.attachColumn(new DfDateColumnStored(this, newColumnName, values));
        return this;
    }

    /**
     * Add a computed column of date type to this data frame
     * @deprecated use {@link #addColumn(String, String)} instead. The type of the column to add will be inferred from
     * the expression
     * @param newColumnName the name of the column to be added
     * @param expressionAsString the expression used to compute the column values
     * @return this data frame
     */
    public DataFrame addDateColumn(String newColumnName, String expressionAsString)
    {
        return this.addColumnWithTypeValidation(newColumnName, ValueType.DATE, expressionAsString);
    }

    public DataFrame addDateTimeColumn(String newColumnName)
    {
        return this.addColumn(newColumnName, ValueType.DATE_TIME);
    }

    public DataFrame addDateTimeColumn(String newColumnName, ListIterable values)
    {
        this.attachColumn(new DfDateTimeColumnStored(this, newColumnName, values));
        return this;
    }

    /**
     * Add a computed column of date/time type to this data frame
     * @deprecated use {@link #addColumn(String, String)} instead. The type of the column to add will be inferred from
     * the expression
     * @param newColumnName the name of the column to be added
     * @param expressionAsString the expression used to compute the column values
     * @return this data frame
     */
    public DataFrame addDateTimeColumn(String newColumnName, String expressionAsString)
    {
        return this.addColumnWithTypeValidation(newColumnName, ValueType.DATE_TIME, expressionAsString);
    }

    public DataFrame addDecimalColumn(String newColumnName)
    {
        return this.addColumn(newColumnName, ValueType.DECIMAL);
    }

    /**
     * Add a computed column of decimal type to this data frame
     * @deprecated use {@link #addColumn(String, String)} instead. The type of the column to add will be inferred from
     * the expression
     * @param newColumnName the name of the column to be added
     * @param expressionAsString the expression used to compute the column values
     * @return this data frame
     */
    public DataFrame addDecimalColumn(String newColumnName, String expressionAsString)
    {
        return this.addColumnWithTypeValidation(newColumnName, ValueType.DECIMAL, expressionAsString);
    }

    public DataFrame addDecimalColumn(String newColumnName, ListIterable values)
    {
        this.attachColumn(new DfDecimalColumnStored(this, newColumnName, values));
        return this;
    }

    /**
     * Returns a string representation of the data frame, which consists of the data frame's name, the row count, and up
     * to the first 10 rows of its data. If the data frame contains more than 10 rows, the first 10 rows are followed by
     * the ellipsis punctuation mark ("...").
     *
     * @return a string representation of the data frame
     */
    @Override
    public String toString()
    {
        StringBuilder sb = new StringBuilder()
                .append(this.getName())
                .append(" [")
                .append(this.rowCount)
                .append(" rows]")
                .append('\n')
                .append(this.asCsvString(10));

        if (this.rowCount() > 10)
        {
            sb.append("...\n");
        }

        return sb.toString();
    }

    private void attachColumn(DfColumn newColumn)
    {
        if (this.hasColumn(newColumn.getName()))
        {
            exceptionByKey("DF_DUPLICATE_COLUMN")
                    .with("columnName", newColumn.getName())
                    .with("dataFrameName", this.getName())
                    .fire();
        }

        this.columnsByName.put(newColumn.getName(), newColumn);
        this.columns.add(newColumn);

        if (this.isPoolingEnabled())
        {
            newColumn.enablePooling();
        }

        if (newColumn.isStored() && newColumn.getSize() > 0)
        {
            this.determineRowCount();
        }
    }

    public void enablePooling()
    {
        this.poolingEnabled = true;
        this.columns.forEach(DfColumn::enablePooling);
    }

    public boolean isPoolingEnabled()
    {
        return this.poolingEnabled;
    }

    public DfColumn getColumnNamed(String columnName)
    {
        DfColumn column = this.columnsByName.get(columnName);

        if (column == null)
        {
            exceptionByKey("DF_COLUMN_DOES_NOT_EXIST")
                    .with("columnName", columnName)
                    .with("dataFrameName", this.getName())
                    .fire();
        }

        return column;
    }

    public ImmutableList getColumns()
    {
        return this.columns.toImmutable();
    }

    public DfColumn getColumnAt(int columnIndex)
    {
        return this.columns.get(columnIndex);
    }

    public void addRow(ListIterable rowValues)
    {
        rowValues.forEachWithIndex((v, i) -> this.columns.get(i).addValue(v));
        this.rowCount++;
    }

    /**
     * Convert the data frame into a multi-line CSV string. The output will include column headers.
     *
     * @return a string representation of the data frame.
     */
    public String asCsvString()
    {
        return this.asCsvString(-1);
    }

    /**
     * Convert the data frame into a multi-line CSV string. The output will include column headers.
     *
     * @param limit number of rows to return, all rows if the value is negative. If the value is zero the result will
     *              only contain column names.
     * @return a CSV string representation of the data frame rows.
     */
    public String asCsvString(int limit)
    {
        StringBuilder s = new StringBuilder();

        s.append(this.columns.makeString(DfColumn::getName, "", ",", ""));
        s.append('\n');

        int columnCount = this.columnCount();
        String[] row = new String[columnCount];

        int last = limit < 0 ? this.rowCount() : Math.min(limit, this.rowCount());

        for (int rowIndex = 0; rowIndex < last; rowIndex++)
        {
            for (int columnIndex = 0; columnIndex < columnCount; columnIndex++)
            {
                row[columnIndex] = this.getValueAsStringLiteral(rowIndex, columnIndex);
            }
            s.append(ArrayIterate.makeString(row, ","));
            s.append('\n');
        }

        return s.toString();
    }

    public int rowCount()
    {
        return this.rowCount;
    }

    public DataFrame addRow()
    {
        this.columns.forEach(DfColumn::addEmptyValue);
        this.rowCount++;
        return this;
    }

    public DataFrame addRow(Object... values)
    {
        if (values.length > this.columnCount())
        {
            exceptionByKey("DF_ADDING_ROW_TOO_WIDE")
                    .with("elementCount", values.length)
                    .with("columnCount", this.columnCount())
                    .fire();
        }

        ArrayIterate.forEachWithIndex(values, (v, i) -> this.columns.get(i).addObject(v));
        this.rowCount++;
        return this;
    }

    public int columnCount()
    {
        return this.columns.size();
    }

    public String getName()
    {
        return this.name;
    }

    /**
     * Add a computed column of int type to this data frame. The column type will be inferred from the expression
     * provided
     *
     * @param columnName the name of the column to be added
     * @param expressionAsString the expression used to compute the column values
     * @return this data frame
     */
    public DataFrame addColumn(String columnName, String expressionAsString)
    {
        return this.addColumn(
                columnName,
                this.inferExpressionType(columnName, expressionAsString),
                expressionAsString);
    }

    /**
     * creates a stored column with the specified name of the specified type and attaches it to this dataframe.
     *
     * @param columnName the name of the column to be created
     * @param type       the type of the new column
     * @return this data frame
     */
    public DataFrame addColumn(String columnName, ValueType type)
    {
        this.newColumn(columnName, type);
        return this;
    }

    /**
     * creates a stored column with the specified name of the specified type and attaches it to this dataframe.
     *
     * @param columnName the name of the column to be created
     * @param type       the type of the new column
     * @return the newly created columns
     */
    public DfColumnStored newColumn(String columnName, ValueType type)
    {
        DfColumnStored created = this.createStoredColumn(columnName, type);
        this.attachColumn(created);
        return created;
    }

    private DfColumnStored createStoredColumn(String columnName, ValueType type)
    {
        return switch (type)
        {
            case LONG -> new DfLongColumnStored(this, columnName);
            case DOUBLE -> new DfDoubleColumnStored(this, columnName);
            case STRING -> new DfStringColumnStored(this, columnName);
            case DATE -> new DfDateColumnStored(this, columnName);
            case DATE_TIME -> new DfDateTimeColumnStored(this, columnName);
            case DECIMAL -> new DfDecimalColumnStored(this, columnName);
            case INT -> new DfIntColumnStored(this, columnName);
            case FLOAT -> new DfFloatColumnStored(this, columnName);
            case BOOLEAN -> new DfBooleanColumnStored(this, columnName);
            default -> throw exceptionByKey("DF_ADD_COL_UNKNOWN_TYPE")
                    .with("columnName", columnName)
                    .with("type", type)
                    .get();
        };
    }

    /**
     * creates a calculated column with the specified name of the specified type and attaches it to this dataframe.
     *
     * @param columnName         the name of the column to be created
     * @param type               the type of the new column
     * @param expressionAsString the expression
     * @return this data frame
     */
    public DataFrame addColumn(String columnName, ValueType type, String expressionAsString)
    {
        this.newColumn(columnName, type, expressionAsString);
        return this;
    }

    private DataFrame addColumnWithTypeValidation(String columnName, ValueType columnType, String expressionAsString)
    {
        ValueType expressionType = this.inferExpressionType(columnName, expressionAsString);
        if (expressionType != columnType)
        {
            throw exceptionByKey("DF_CALC_COL_TYPE_MISMATCH")
                    .with("columnName", columnName)
                    .with("dataFrameName", this.getName())
                    .with("inferredType", expressionType.toString())
                    .with("expression", expressionAsString)
                    .with("specifiedType", columnType.toString())
                    .get();
        }
        this.newColumn(columnName, columnType, expressionAsString);
        return this;
    }

    /**
     * creates a calculated column with the specified name of the specified type and attaches it to this dataframe.
     *
     * @param columnName         the name of the column to be created
     * @param type               the type of the new column
     * @param expressionAsString the expression used to calculate column values
     * @return the newly created columns
     */
    public DfColumnComputed newColumn(String columnName, ValueType type, String expressionAsString)
    {
        DfColumnComputed created = this.createComputedColumn(columnName, type, expressionAsString);
        this.attachColumn(created);
        return created;
    }

    private ValueType inferExpressionType(String columnName, String expressionAsString)
    {
        TypeInferenceVisitor visitor = new TypeInferenceVisitor(this.getEvalContext());

        this.getColumns().each(col -> visitor.storeVariableType(col.getName(), col.getType()));

        Expression expression = ExpressionParserHelper.DEFAULT.toExpressionOrScript(expressionAsString);

        ValueType expressionType = visitor.inferExpressionType(expression);
        if (visitor.hasErrors())
        {
            exceptionByKey("DF_CALC_COL_INFER_TYPE")
                    .with("columnName", columnName)
                    .with("dataFrameName", this.getName())
                    .with("expression", expressionAsString)
                    .with("errorList", visitor.getErrors()
                                              .collect(err -> err.getOne() + ": " + err.getTwo())
                                              .makeString("\n"))
                    .fire();
        }

        return expressionType;
    }

    private DfColumnComputed createComputedColumn(String columnName, ValueType type, String expressionAsString)
    {
        return switch (type)
        {
            case LONG -> new DfLongColumnComputed(this, columnName, expressionAsString);
            case DOUBLE -> new DfDoubleColumnComputed(this, columnName, expressionAsString);
            case STRING -> new DfStringColumnComputed(this, columnName, expressionAsString);
            case DATE -> new DfDateColumnComputed(this, columnName, expressionAsString);
            case DATE_TIME -> new DfDateTimeColumnComputed(this, columnName, expressionAsString);
            case DECIMAL -> new DfDecimalColumnComputed(this, columnName, expressionAsString);
            case INT -> new DfIntColumnComputed(this, columnName, expressionAsString);
            case FLOAT -> new DfFloatColumnComputed(this, columnName, expressionAsString);
            case BOOLEAN -> new DfBooleanColumnComputed(this, columnName, expressionAsString);
            default -> throw exceptionByKey("DF_ADD_COL_UNKNOWN_TYPE").with("columnName", columnName)
                                                                      .with("type", type)
                                                                      .get();
        };
    }

    protected int rowIndexMap(int virtualRowIndex)
    {
        if (this.isIndexed())
        {
            return this.virtualRowMap.get(virtualRowIndex);
        }
        return virtualRowIndex;
    }

    private boolean isIndexed()
    {
        return this.virtualRowMap != null;
    }

    public IntList getAggregateIndex(int rowIndex)
    {
        if (this.isAggregateWithIndex())
        {
            return this.aggregateIndex.get(this.rowIndexMap(rowIndex));
        }

        return IntLists.immutable.empty();
    }

    private boolean isAggregateWithIndex()
    {
        return this.aggregateIndex != null;
    }

    public Object getObject(int rowIndex, int columnIndex)
    {
        return this.columns.get(columnIndex).getObject(this.rowIndexMap(rowIndex));
    }

    public Object getObject(String columnName, int rowIndex)
    {
        return this.getColumnNamed(columnName).getObject(this.rowIndexMap(rowIndex));
    }

    public boolean isNull(String columnName, int rowIndex)
    {
        return this.getColumnNamed(columnName).isNull(this.rowIndexMap(rowIndex));
    }

    public Value getValue(int rowIndex, int columnIndex)
    {
        return this.columns.get(columnIndex).getValue(this.rowIndexMap(rowIndex));
    }

    public Value getValue(String columnName, int rowIndex)
    {
        return this.columnsByName.get(columnName).getValue(this.rowIndexMap(rowIndex));
    }

    public String getValueAsStringLiteral(int rowIndex, int columnIndex)
    {
        return this.columns.get(columnIndex).getValueAsStringLiteral(this.rowIndexMap(rowIndex));
    }

    public String getValueAsString(int rowIndex, int columnIndex)
    {
        return this.columns.get(columnIndex).getValueAsString(this.rowIndexMap(rowIndex));
    }

    public long getLong(String columnName, int rowIndex)
    {
        return this.getLongColumn(columnName).getLong(this.rowIndexMap(rowIndex));
    }

    public long getInt(String columnName, int rowIndex)
    {
        return this.getIntColumn(columnName).getInt(this.rowIndexMap(rowIndex));
    }

    public boolean getBoolean(String columnName, int rowIndex)
    {
        return this.getBooleanColumn(columnName).getBoolean(this.rowIndexMap(rowIndex));
    }

    public String getString(String columnName, int rowIndex)
    {
        return this.getStringColumn(columnName).getTypedObject(this.rowIndexMap(rowIndex));
    }

    public double getDouble(String columnName, int rowIndex)
    {
        return this.getDoubleColumn(columnName).getDouble(this.rowIndexMap(rowIndex));
    }

    public float getFloat(String columnName, int rowIndex)
    {
        return this.getFloatColumn(columnName).getFloat(this.rowIndexMap(rowIndex));
    }

    public LocalDate getDate(String columnName, int rowIndex)
    {
        return this.getDateColumn(columnName).getTypedObject(this.rowIndexMap(rowIndex));
    }

    public LocalDateTime getDateTime(String columnName, int rowIndex)
    {
        return this.getDateTimeColumn(columnName).getTypedObject(this.rowIndexMap(rowIndex));
    }

    public BigDecimal getDecimal(String columnName, int rowIndex)
    {
        return this.getDecimalColumn(columnName).getTypedObject(this.rowIndexMap(rowIndex));
    }

    public DfLongColumn getLongColumn(String columnName)
    {
        return (DfLongColumn) this.getColumnNamed(columnName);
    }

    public DfIntColumn getIntColumn(String columnName)
    {
        return (DfIntColumn) this.getColumnNamed(columnName);
    }

    public DfBooleanColumn getBooleanColumn(String columnName)
    {
        return (DfBooleanColumn) this.getColumnNamed(columnName);
    }

    public DfDoubleColumn getDoubleColumn(String columnName)
    {
        return (DfDoubleColumn) this.getColumnNamed(columnName);
    }

    public DfFloatColumn getFloatColumn(String columnName)
    {
        return (DfFloatColumn) this.getColumnNamed(columnName);
    }

    public DfDateColumn getDateColumn(String columnName)
    {
        return (DfDateColumn) this.getColumnNamed(columnName);
    }

    public DfDateTimeColumn getDateTimeColumn(String columnName)
    {
        return (DfDateTimeColumn) this.getColumnNamed(columnName);
    }

    public DfDecimalColumn getDecimalColumn(String columnName)
    {
        return (DfDecimalColumn) this.getColumnNamed(columnName);
    }

    public DfStringColumn getStringColumn(String columnName)
    {
        return (DfStringColumn) this.getColumnNamed(columnName);
    }

    public boolean hasColumn(String columnName)
    {
        return this.columnsByName.containsKey(columnName);
    }

    private DataFrameEvalContext getEvalContext()
    {
        return this.localEvalContext.get();
    }

    public void setEvalContextRowIndex(int rowIndex)
    {
        this.getEvalContext().setRowIndex(rowIndex);
    }

    public ExpressionEvaluationVisitor getEvalVisitor()
    {
        return this.localEvalVisitor.get();
    }

    public void setExternalEvalContext(EvalContext newEvalContext)
    {
        this.getEvalContext().setNestedContext(newEvalContext);
    }

    /**
     * Indicates that no further updates will be made to this data frame and ensures that the data frame is in a
     * consistent internal state. This method should be invoked when done populating a data frame with data. Failure to
     * do so may result in degraded performance or delayed problem detection. It is usually OK to skip it in the context
     * of unit tests.
     *
     * @return the data frame
     */
    public DataFrame seal()
    {
        this.determineRowCount();
        this.resetBitmap();
        this.disablePooling();
        return this;
    }

    private void disablePooling()
    {
        this.poolingEnabled = false;
        this.columns.forEach(DfColumn::disablePooling);
    }

    private void determineRowCount()
    {
        MutableIntList storedColumnsSizes = this.columns.select(DfColumn::isStored).collectInt(DfColumn::getSize);
        if (storedColumnsSizes.isEmpty())
        {
            this.rowCount = 0;
        }
        else
        {
            this.rowCount = storedColumnsSizes.getFirst();
            if (storedColumnsSizes.anySatisfy(e -> e != this.rowCount))
            {
                exceptionByKey("DF_DIFFERENT_COL_SIZES").with("dataFrameName", this.getName()).fire();
            }
        }
    }

    /**
     * Sums up the values in the specified columns
     *
     * @param columnsToAggregateNames - the columns to aggregate
     * @return a single row data frame containing the aggregated values in the respective columns
     */
    public DataFrame sum(ListIterable columnsToAggregateNames)
    {
        return this.aggregate(columnsToAggregateNames.collect(AggregateFunction::sum));
    }

    /**
     * Pivot the data frame. This operation produces another data frame, with the columns that correspond to the values
     * of the key column, populated with the values from the values columns. THe values are aggregated by one or more
     * aggregation function.
     * 

     * NOTE: If more than one aggregator is provided, the column names for the aggregate values will be made up of
     * pairs of all the values of the pivot column and the column names specified in aggregators.
     * So if a pivot values are for example "2001" and "2002" and the only aggregator provided is {@code sum("X")} the
     * columns with aggregated values will have names "2001" and "2002". If there are two aggregator functions,
     * sum("X") and avg("Y"), there will be four columns for aggregated values in the resulting table with the names
     * "2001:X", "2001:Y", "2002:X", "2002:Y". It will also respect the column name overrides in the aggregator
     * function, that is in the example above we have sum("X", "Foo") and avg("Y", "Bar") instead, the resulting column
     * names will be "2001:Foo", "2001:Bar", "2002:Foo", "2002:Bar".
     *
     * @param columnsToGroupByNames the columns to group by the resulting pivot table
     * @param pivotColumnName       the column the values of which will become columns for the pivoted data frame.
     * @param aggregators           the aggregate functions to aggregate values in the value columns specified in
     *                              their parameters
     * @return a new data frame representing a pivot table view of this data frame.
     */
    public DataFrame pivot(
            ListIterable columnsToGroupByNames,
            String pivotColumnName,
            ListIterable aggregators
    )
    {
        return this.pivot(columnsToGroupByNames, pivotColumnName, null, aggregators);
    }

    /**
     * Pivot the data frame. This operation produces another data frame, with the columns that correspond to the values
     * of the key column, populated with the values from the values columns. THe values are aggregated by one or more
     * aggregation function.
     * 

     * NOTE: If more than one aggregator is provided, the column names for the aggregate values will be made up of
     * pairs of all the values of the pivot column and the column names specified in aggregators.
     * So if a pivot values are for example "2001" and "2002" and the only aggregator provided is {@code sum("X")} the
     * columns with aggregated values will have names "2001" and "2002". If there are two aggregator functions,
     * sum("X") and avg("Y"), there will be four columns for aggregated values in the resulting table with the names
     * "2001:X", "2001:Y", "2002:X", "2002:Y". It will also respect the column name overrides in the aggregator
     * function, that is in the example above we have sum("X", "Foo") and avg("Y", "Bar") instead, the resulting column
     * names will be "2001:Foo", "2001:Bar", "2002:Foo", "2002:Bar".
     *
     * @param columnsToGroupByNames the columns to group by the resulting pivot table
     * @param pivotColumnName       the column the values of which will become columns for the pivoted data frame.
     * @param pivotColumnOrder      the order in which the pivot columns will appear in the returned data frame (based
     *                              on the ordering of the values of column headers)
     * @param aggregators           the aggregate functions to aggregate values in the value columns specified in
     *                              their parameters
     * @return a new data frame representing a pivot table view of this data frame.
     */
    public DataFrame pivot(
            ListIterable columnsToGroupByNames,
            String pivotColumnName,
            DfColumnSortOrder pivotColumnOrder,
            ListIterable aggregators
    )
    {
        DataFrame pivoted = new DataFrame(this.getName() + "-pivoted");

        // index columns first
        ListIterable columnsToGroupBy = this.columnsNamed(columnsToGroupByNames);

        columnsToGroupBy.forEach(col -> pivoted.addColumn(col.getName(), col.getType()));

        // then columns derived from pivot dimension values
        // first, find distinct pivot dimension values
        DfColumn columnToPivot = this.getColumnNamed(pivotColumnName);

        Set> pivotColumnValues;

        if (pivotColumnOrder == null)
        {
            pivotColumnValues = new LinkedHashSet<>(); // to maintain insertion order
        }
        else
        {
            pivotColumnValues = new TreeSortedSet>(
                    (pivotColumnOrder == ASC) ? Comparator.naturalOrder() : Comparator.reverseOrder()
            );
        }

        for (int i = 0; i < columnToPivot.getSize(); i++)
        {
            pivotColumnValues.add((Comparable