All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.github.vmzakharov.ecdataframe.dataframe.AggregateFunction Maven / Gradle / Ivy

package io.github.vmzakharov.ecdataframe.dataframe;

import io.github.vmzakharov.ecdataframe.dataframe.aggregation.Avg;
import io.github.vmzakharov.ecdataframe.dataframe.aggregation.Avg2d;
import io.github.vmzakharov.ecdataframe.dataframe.aggregation.Count;
import io.github.vmzakharov.ecdataframe.dataframe.aggregation.Max;
import io.github.vmzakharov.ecdataframe.dataframe.aggregation.Min;
import io.github.vmzakharov.ecdataframe.dataframe.aggregation.Same;
import io.github.vmzakharov.ecdataframe.dataframe.aggregation.Sum;
import io.github.vmzakharov.ecdataframe.dsl.value.ValueType;
import org.eclipse.collections.api.list.ListIterable;

import java.lang.reflect.Constructor;
import java.lang.reflect.InvocationTargetException;

import static io.github.vmzakharov.ecdataframe.util.ExceptionFactory.exceptionByKey;

public abstract class AggregateFunction
{
    private final String sourceColumnName;
    private final String targetColumnName;

    public AggregateFunction(String newSourceColumnName)
    {
        this(newSourceColumnName, newSourceColumnName);
    }

    public AggregateFunction(String newSourceColumnName, String newTargetColumnName)
    {
        this.sourceColumnName = newSourceColumnName;
        this.targetColumnName = newTargetColumnName;
    }

    public static AggregateFunction sum(String newSourceColumnName)
    {
        return new Sum(newSourceColumnName);
    }

    public static AggregateFunction sum(String newSourceColumnName, String newTargetColumnName)
    {
        return new Sum(newSourceColumnName, newTargetColumnName);
    }

    public static AggregateFunction min(String newSourceColumnName)
    {
        return new Min(newSourceColumnName);
    }

    public static AggregateFunction min(String newSourceColumnName, String newTargetColumnName)
    {
        return new Min(newSourceColumnName, newTargetColumnName);
    }

    public static AggregateFunction max(String newSourceColumnName)
    {
        return new Max(newSourceColumnName);
    }

    public static AggregateFunction max(String newSourceColumnName, String newTargetColumnName)
    {
        return new Max(newSourceColumnName, newTargetColumnName);
    }

    public static AggregateFunction avg(String newSourceColumnName)
    {
        return new Avg(newSourceColumnName);
    }

    public static AggregateFunction avg(String newSourceColumnName, String newTargetColumnName)
    {
        return new Avg(newSourceColumnName, newTargetColumnName);
    }

    public static AggregateFunction avg2d(String newSourceColumnName)
    {
        return new Avg2d(newSourceColumnName);
    }

    public static AggregateFunction avg2d(String newSourceColumnName, String newTargetColumnName)
    {
        return new Avg2d(newSourceColumnName, newTargetColumnName);
    }

    public static AggregateFunction count(String newSourceColumnName)
    {
        return new Count(newSourceColumnName);
    }

    public static AggregateFunction count(String newSourceColumnName, String newTargetColumnName)
    {
        return new Count(newSourceColumnName, newTargetColumnName);
    }

    public static AggregateFunction same(String newSourceColumnName)
    {
        return new Same(newSourceColumnName);
    }

    public static AggregateFunction same(String newSourceColumnName, String newTargetColumnName)
    {
        return new Same(newSourceColumnName, newTargetColumnName);
    }

    public AggregateFunction cloneWith(String newSourceColumnName, String newTargetColumnName)
    {
        try
        {
            Constructor ctor = this.getClass().getConstructor(String.class, String.class);
            return (AggregateFunction) ctor.newInstance(newSourceColumnName, newTargetColumnName);
        }
        catch (NoSuchMethodException | InstantiationException | IllegalAccessException | InvocationTargetException e)
        {
            throw exceptionByKey("AGG_CANNOT_CLONE")
                    .with("operation", this.getName())
                    .get(e);
        }
    }

    /**
     * @deprecated use getSourceColumnName() instead
     * @return the name of the aggregation source column, i.e. the column the values of which will be aggregated
     */
    public String getColumnName()
    {
        return this.sourceColumnName;
    }

    /**
     * @return the name of the aggregation source column, i.e. the column the values of which will be aggregated
     */
    public String getSourceColumnName()
    {
        return this.sourceColumnName;
    }

    public String getTargetColumnName()
    {
        return this.targetColumnName;
    }

    public ValueType targetColumnType(ValueType sourceColumnType)
    {
        return sourceColumnType;
    }

    abstract public ListIterable supportedSourceTypes();

    public boolean supportsSourceType(ValueType type)
    {
        return this.supportedSourceTypes().contains(type);
    }

    public Object applyToColumn(DfColumn column)
    {
        switch (column.getType())
        {
            case LONG:
                return this.applyToLongColumn((DfLongColumn) column);
            case DOUBLE:
                return this.applyToDoubleColumn((DfDoubleColumn) column);
            case INT:
                return this.applyToIntColumn((DfIntColumn) column);
            case FLOAT:
                return this.applyToFloatColumn((DfFloatColumn) column);
            default:
                return this.applyToObjectColumn((DfObjectColumn) column);
        }
    }

    public Object applyToDoubleColumn(DfDoubleColumn doubleColumn)
    {
        throw this.notApplicable(doubleColumn);
    }

    public Object applyToFloatColumn(DfFloatColumn doubleColumn)
    {
        throw this.notApplicable(doubleColumn);
    }

    public Object applyToLongColumn(DfLongColumn longColumn)
    {
        throw this.notApplicable(longColumn);
    }

    public Object applyToIntColumn(DfIntColumn intColumn)
    {
        throw this.notApplicable(intColumn);
    }

    public Object applyToObjectColumn(DfObjectColumn objectColumn)
    {
        throw this.notApplicable(objectColumn);
    }

    protected RuntimeException notApplicable(DfColumn column)
    {
        return exceptionByKey("AGG_COL_TYPE_UNSUPPORTED")
                .with("operation", this.getName())
                .with("operationDescription", this.getDescription())
                .with("columnName", column.getName())
                .with("columnType", column.getType().toString().toLowerCase())
                .get();
    }

    public int intInitialValue()
    {
        throw this.noInitialValueException(ValueType.INT.toString());
    }

    public float floatInitialValue()
    {
        throw this.noInitialValueException(ValueType.FLOAT.toString());
    }

    public long longInitialValue()
    {
        throw this.noInitialValueException(ValueType.LONG.toString());
    }

    public double doubleInitialValue()
    {
        throw this.noInitialValueException(ValueType.DOUBLE.toString());
    }

    public Object objectInitialValue()
    {
        throw this.noInitialValueException("non-numeric");
    }

    protected long longAccumulator(long currentAggregate, long newValue)
    {
        throw this.unsupportedAccumulatorException(ValueType.LONG.toString());
    }

    protected int intAccumulator(int currentAggregate, int newValue)
    {
        throw this.unsupportedAccumulatorException(ValueType.INT.toString());
    }

    protected double doubleAccumulator(double currentAggregate, double newValue)
    {
        throw this.unsupportedAccumulatorException(ValueType.DOUBLE.toString());
    }

    protected float floatAccumulator(float currentAggregate, float newValue)
    {
        throw this.unsupportedAccumulatorException(ValueType.FLOAT.toString());
    }

    protected Object objectAccumulator(Object currentAggregate, Object newValue)
    {
        throw this.unsupportedAccumulatorException("object");
    }

    protected RuntimeException unsupportedAccumulatorException(String typeName)
    {
        return exceptionByKey("AGG_NO_ACCUMULATOR")
                .with("operation", this.getName())
                .with("type", typeName)
                .getUnsupported();
    }

    protected RuntimeException noInitialValueException(String typeName)
    {
        throw exceptionByKey("AGG_NO_INITIAL_VALUE")
                .with("operation", this.getName())
                .with("type", typeName)
                .getUnsupported();
    }

    /**
     * A short name used to identify this aggregation function (such as 'Sum', 'Min', 'Avg', etc.). It is primarily
     * meant for internal used (as opposed to the value returned by the getDescription() method).
     * @return the name of this aggregation function
     */
    public String getName()
    {
        return this.getClass().getSimpleName();
    }

    /**
     * A brief description of the aggregation operation supported by this function (primarily for display purposes)
     * @return the description of this aggregation function
     */
    public String getDescription()
    {
        return this.getName();
    }

    public Object valueForEmptyColumn(DfColumn column)
    {
        throw this.notApplicable(column);
    }

    public long getLongValue(DfColumn sourceColumn, int sourceRowIndex)
    {
        return ((DfLongColumn) sourceColumn).getLong(sourceRowIndex);
    }

    public int getIntValue(DfColumn sourceColumn, int sourceRowIndex)
    {
        return ((DfIntColumn) sourceColumn).getInt(sourceRowIndex);
    }

    public float getFloatValue(DfColumn sourceColumn, int sourceRowIndex)
    {
        return ((DfFloatColumn) sourceColumn).getFloat(sourceRowIndex);
    }

    public double getDoubleValue(DfColumn sourceColumn, int sourceRowIndex)
    {
        return ((DfDoubleColumn) sourceColumn).getDouble(sourceRowIndex);
    }

    public Object getObjectValue(DfColumn sourceColumn, int sourceRowIndex)
    {
        return sourceColumn.getObject(sourceRowIndex);
    }

    public void finishAggregating(DataFrame aggregatedDataFrame, int[] countsByRow)
    {
    }

    public void initializeValue(DfColumn accumulatorColumn, int accumulatorRowIndex)
    {
        if (accumulatorColumn.getType().isDouble())
        {
            ((DfDoubleColumnStored) accumulatorColumn).setDouble(accumulatorRowIndex, this.doubleInitialValue());
        }
        else if (accumulatorColumn.getType().isLong())
        {
            ((DfLongColumnStored) accumulatorColumn).setLong(accumulatorRowIndex, this.longInitialValue());
        }
        else if (accumulatorColumn.getType().isInt())
        {
            ((DfIntColumnStored) accumulatorColumn).setInt(accumulatorRowIndex, this.intInitialValue());
        }
        else if (accumulatorColumn.getType().isFloat())
        {
            ((DfFloatColumnStored) accumulatorColumn).setFloat(accumulatorRowIndex, this.floatInitialValue());
        }
        else
        {
            accumulatorColumn.setObject(accumulatorRowIndex, this.objectInitialValue());
        }
    }

    public void aggregateValueIntoLong(
            DfLongColumnStored targetColumn, int targetRowIndex,
            DfColumn sourceColumn, int sourceRowIndex)
    {
        long currentAggregatedValue = targetColumn.getLong(targetRowIndex);
        targetColumn.setLong(
                targetRowIndex,
                this.longAccumulator(currentAggregatedValue, this.getLongValue(sourceColumn, sourceRowIndex)));
    }

    public void aggregateValueIntoInt(
            DfIntColumnStored targetColumn, int targetRowIndex,
            DfColumn sourceColumn, int sourceRowIndex)
    {
        int currentAggregatedValue = targetColumn.getInt(targetRowIndex);
        targetColumn.setInt(
                targetRowIndex,
                this.intAccumulator(currentAggregatedValue, this.getIntValue(sourceColumn, sourceRowIndex)));
    }

    public void aggregateValueIntoDouble(
            DfDoubleColumnStored targetColumn, int targetRowIndex,
            DfColumn sourceColumn, int sourceRowIndex)
    {
        double currentAggregatedValue = targetColumn.getDouble(targetRowIndex);
        targetColumn.setDouble(
                targetRowIndex,
                this.doubleAccumulator(currentAggregatedValue, this.getDoubleValue(sourceColumn, sourceRowIndex)));
    }

    public void aggregateValueIntoFloat(
            DfFloatColumnStored targetColumn, int targetRowIndex,
            DfColumn sourceColumn, int sourceRowIndex)
    {
        float currentAggregatedValue = targetColumn.getFloat(targetRowIndex);
        targetColumn.setFloat(
                targetRowIndex,
                this.floatAccumulator(currentAggregatedValue, this.getFloatValue(sourceColumn, sourceRowIndex)));
    }

    /**
     * by default aggregators treat null values as "poisonous" - that is any null value passed in the aggregator will
     * cause the result of the entire aggregation to be null, which is a sensible behavior for most aggregation
     * functions.
     * Override this method to return false if this aggregation function can handle a null value.
     *
     * @return true if nulls are poisonous, false if nulls can be handled
     */
    public boolean nullsArePoisonous()
    {
        return true;
    }
}





© 2015 - 2025 Weber Informatics LLC | Privacy Policy