All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.deephaven.engine.table.impl.by.ssmpercentile.SsmChunkedPercentileOperator Maven / Gradle / Ivy

There is a newer version: 0.37.1
Show newest version
/**
 * Copyright (c) 2016-2022 Deephaven Data Labs and Patent Pending
 */
package io.deephaven.engine.table.impl.by.ssmpercentile;

import io.deephaven.base.verify.Assert;
import io.deephaven.chunk.attributes.ChunkLengths;
import io.deephaven.chunk.attributes.ChunkPositions;
import io.deephaven.chunk.attributes.Values;
import io.deephaven.configuration.Configuration;
import io.deephaven.engine.table.ColumnSource;
import io.deephaven.engine.rowset.chunkattributes.RowKeys;
import io.deephaven.engine.table.WritableColumnSource;
import io.deephaven.engine.table.impl.by.IterativeChunkedAggregationOperator;
import io.deephaven.engine.table.impl.sources.*;
import io.deephaven.chunk.*;
import io.deephaven.engine.table.impl.ssms.SegmentedSortedMultiSet;
import io.deephaven.engine.table.impl.util.compact.CompactKernel;
import org.apache.commons.lang3.mutable.MutableInt;
import org.jetbrains.annotations.NotNull;

import java.time.Instant;
import java.util.Collections;
import java.util.Map;
import java.util.function.Supplier;

/**
 * Iterative average operator.
 */
public class SsmChunkedPercentileOperator implements IterativeChunkedAggregationOperator {
    private static final int NODE_SIZE =
            Configuration.getInstance().getIntegerWithDefault("SsmChunkedMinMaxOperator.nodeSize", 4096);
    private final WritableColumnSource internalResult;
    private final ColumnSource externalResult;
    /**
     * Even slots hold the low values, odd slots hold the high values.
     */
    private final ObjectArraySource ssms;
    private final String name;
    private final CompactKernel compactAndCountKernel;
    private final Supplier ssmFactory;
    private final Supplier removeContextFactory;
    private final ChunkType chunkType;
    private final PercentileTypeHelper percentileTypeHelper;

    public SsmChunkedPercentileOperator(Class type, double percentile, boolean averageEvenlyDivided, String name) {
        this.name = name;
        this.ssms = new ObjectArraySource<>(SegmentedSortedMultiSet.class);
        final boolean isInstant = type == Instant.class;
        if (isInstant) {
            chunkType = ChunkType.Long;
        } else {
            chunkType = ChunkType.fromElementType(type);
        }
        if (isInstant) {
            internalResult = new LongArraySource();
            // noinspection unchecked
            externalResult = new LongAsInstantColumnSource(internalResult);
            averageEvenlyDivided = false;
        } else {
            if (averageEvenlyDivided) {
                switch (chunkType) {
                    case Int:
                    case Long:
                    case Double:
                        internalResult = new DoubleArraySource();
                        break;
                    case Float:
                        internalResult = new FloatArraySource();
                        break;
                    default:
                        // for things that are not int, long, double, or float we do not actually average the median;
                        // we just do the standard 50-%tile thing. It might be worth defining this to be friendlier.
                        internalResult = ArrayBackedColumnSource.getMemoryColumnSource(0, type);
                }
            } else {
                internalResult = ArrayBackedColumnSource.getMemoryColumnSource(0, type);
            }
            externalResult = internalResult;
        }
        compactAndCountKernel = CompactKernel.makeCompact(chunkType);
        ssmFactory = SegmentedSortedMultiSet.makeFactory(chunkType, NODE_SIZE, type);
        removeContextFactory = SegmentedSortedMultiSet.makeRemoveContextFactory(NODE_SIZE);
        percentileTypeHelper = makeTypeHelper(chunkType, type, percentile, averageEvenlyDivided, internalResult);
    }

    private static PercentileTypeHelper makeTypeHelper(ChunkType chunkType, Class type, double percentile,
            boolean averageEvenlyDivided, WritableColumnSource resultColumn) {
        if (averageEvenlyDivided) {
            switch (chunkType) {
                // for things that are not int, long, double, or float we do not actually average the median;
                // we just do the standard 50-%tile thing. It might be worth defining this to be friendlier.
                case Char:
                    return new CharPercentileTypeHelper(percentile, resultColumn);
                case Byte:
                    return new BytePercentileTypeHelper(percentile, resultColumn);
                case Short:
                    return new ShortPercentileTypeHelper(percentile, resultColumn);
                case Object:
                    return makeObjectHelper(type, percentile, resultColumn);
                // For the int, long, float, and double types we actually average the adjacent values to compute the
                // median
                case Int:
                    return new IntPercentileTypeMedianHelper(percentile, resultColumn);
                case Long:
                    return new LongPercentileTypeMedianHelper(percentile, resultColumn);
                case Float:
                    return new FloatPercentileTypeMedianHelper(percentile, resultColumn);
                case Double:
                    return new DoublePercentileTypeMedianHelper(percentile, resultColumn);
                default:
                case Boolean:
                    throw new UnsupportedOperationException();
            }
        } else {
            switch (chunkType) {
                case Char:
                    return new CharPercentileTypeHelper(percentile, resultColumn);
                case Byte:
                    return new BytePercentileTypeHelper(percentile, resultColumn);
                case Short:
                    return new ShortPercentileTypeHelper(percentile, resultColumn);
                case Int:
                    return new IntPercentileTypeHelper(percentile, resultColumn);
                case Long:
                    return new LongPercentileTypeHelper(percentile, resultColumn);
                case Float:
                    return new FloatPercentileTypeHelper(percentile, resultColumn);
                case Double:
                    return new DoublePercentileTypeHelper(percentile, resultColumn);
                case Object:
                    return makeObjectHelper(type, percentile, resultColumn);
                default:
                case Boolean:
                    throw new UnsupportedOperationException();
            }
        }
    }

    @NotNull
    private static PercentileTypeHelper makeObjectHelper(
            Class type,
            double percentile,
            WritableColumnSource resultColumn) {
        if (type == Boolean.class) {
            return new BooleanPercentileTypeHelper(percentile, resultColumn);
        } else if (type == Instant.class) {
            return new InstantPercentileTypeHelper(percentile, resultColumn);
        } else {
            return new ObjectPercentileTypeHelper(percentile, resultColumn);
        }
    }

    interface PercentileTypeHelper {
        boolean setResult(SegmentedSortedMultiSet ssmLo, SegmentedSortedMultiSet ssmHi, long destination);

        boolean setResultNull(long destination);

        int pivot(SegmentedSortedMultiSet ssmLo, Chunk valueCopy, IntChunk counts,
                int startPosition, int runLength, MutableInt leftOvers);

        int pivot(SegmentedSortedMultiSet segmentedSortedMultiSet, Chunk valueCopy,
                IntChunk counts, int startPosition, int runLength);
    }

    @Override
    public void addChunk(BucketedContext bucketedContext, Chunk values,
            LongChunk inputRowKeys, IntChunk destinations,
            IntChunk startPositions, IntChunk length,
            WritableBooleanChunk stateModified) {
        final BucketSsmMinMaxContext context = (BucketSsmMinMaxContext) bucketedContext;

        context.valueCopy.setSize(values.size());
        // noinspection unchecked
        context.valueCopy.copyFromChunk((Chunk) values, 0, 0, values.size());

        context.lengthCopy.setSize(length.size());
        context.lengthCopy.copyFromChunk(length, 0, 0, length.size());

        compactAndCountKernel.compactAndCount(context.valueCopy, context.counts, startPositions, context.lengthCopy);

        for (int ii = 0; ii < startPositions.size(); ++ii) {
            final int runLength = context.lengthCopy.get(ii);
            if (runLength == 0) {
                continue;
            }
            final int startPosition = startPositions.get(ii);
            final long destination = destinations.get(startPosition);

            final SegmentedSortedMultiSet ssmLo = ssmLoForSlot(destination);
            final SegmentedSortedMultiSet ssmHi = ssmHiForSlot(destination);

            pivotedInsertion(context, ssmLo, ssmHi, startPosition, runLength, context.valueCopy, context.counts);

            stateModified.set(ii, percentileTypeHelper.setResult(ssmLo, ssmHi, destination));
        }
    }

    @Override
    public void removeChunk(BucketedContext bucketedContext, Chunk values,
            LongChunk inputRowKeys, IntChunk destinations,
            IntChunk startPositions, IntChunk length,
            WritableBooleanChunk stateModified) {
        final BucketSsmMinMaxContext context = (BucketSsmMinMaxContext) bucketedContext;

        context.valueCopy.setSize(values.size());
        // noinspection unchecked
        context.valueCopy.copyFromChunk((Chunk) values, 0, 0, values.size());

        context.lengthCopy.setSize(length.size());
        context.lengthCopy.copyFromChunk(length, 0, 0, length.size());

        compactAndCountKernel.compactAndCount(context.valueCopy, context.counts, startPositions, context.lengthCopy);

        final SegmentedSortedMultiSet.RemoveContext removeContext = removeContextFactory.get();
        for (int ii = 0; ii < startPositions.size(); ++ii) {
            final int runLength = context.lengthCopy.get(ii);
            if (runLength == 0) {
                continue;
            }
            final int startPosition = startPositions.get(ii);
            final long destination = destinations.get(startPosition);

            final SegmentedSortedMultiSet ssmLo = ssmLoForSlot(destination);
            final SegmentedSortedMultiSet ssmHi = ssmHiForSlot(destination);
            pivotedRemoval(context, removeContext, startPosition, runLength, ssmLo, ssmHi, context.valueCopy,
                    context.counts);

            final boolean modified = percentileTypeHelper.setResult(ssmLo, ssmHi, destination);
            if (ssmLo.size() == 0) {
                clearSsm(destination, 0);
            }
            if (ssmHi.size() == 0) {
                clearSsm(destination, 1);
            }
            stateModified.set(ii, modified);
        }
    }

    private void pivotedRemoval(SsmMinMaxContext context, SegmentedSortedMultiSet.RemoveContext removeContext,
            int startPosition, int runLength, SegmentedSortedMultiSet ssmLo, SegmentedSortedMultiSet ssmHi,
            WritableChunk valueCopy, WritableIntChunk counts) {
        // We have no choice but to split this chunk, and furthermore to make sure that we do not remove more
        // of the maximum lo value than actually exist within ssmLo.
        final MutableInt leftOvers = new MutableInt();
        int loPivot;
        if (ssmLo.size() > 0) {
            loPivot = percentileTypeHelper.pivot(ssmLo, valueCopy, counts, startPosition, runLength, leftOvers);
            Assert.leq(leftOvers.intValue(), "leftOvers.intValue()", ssmHi.totalSize(), "ssmHi.totalSize()");
        } else {
            loPivot = 0;
        }

        if (loPivot > 0) {
            final WritableChunk loValueSlice =
                    context.valueResettable.resetFromChunk(valueCopy, startPosition, loPivot);
            final WritableIntChunk loCountSlice =
                    context.countResettable.resetFromChunk(counts, startPosition, loPivot);
            if (leftOvers.intValue() > 0) {
                counts.set(startPosition + loPivot - 1, counts.get(startPosition + loPivot - 1) - leftOvers.intValue());
            }
            ssmLo.remove(removeContext, loValueSlice, loCountSlice);
        }

        if (leftOvers.intValue() > 0) {
            counts.set(startPosition + loPivot - 1, leftOvers.intValue());
            loPivot--;
        }

        if (loPivot < runLength) {
            final WritableChunk hiValueSlice =
                    context.valueResettable.resetFromChunk(valueCopy, startPosition + loPivot, runLength - loPivot);
            final WritableIntChunk hiCountSlice =
                    context.countResettable.resetFromChunk(counts, startPosition + loPivot, runLength - loPivot);
            ssmHi.remove(removeContext, hiValueSlice, hiCountSlice);
        }
    }

    private void pivotedInsertion(SsmMinMaxContext context, SegmentedSortedMultiSet ssmLo,
            SegmentedSortedMultiSet ssmHi, int startPosition, int runLength, WritableChunk valueCopy,
            WritableIntChunk counts) {
        final int loPivot;
        if (ssmLo.size() > 0) {
            loPivot = percentileTypeHelper.pivot(ssmLo, valueCopy, counts, startPosition, runLength);
        } else {
            loPivot = 0;
        }

        if (loPivot > 0) {
            final WritableChunk loValueSlice =
                    context.valueResettable.resetFromChunk(valueCopy, startPosition, loPivot);
            final WritableIntChunk loCountSlice =
                    context.countResettable.resetFromChunk(counts, startPosition, loPivot);
            ssmLo.insert(loValueSlice, loCountSlice);
        }

        if (loPivot < runLength) {
            final WritableChunk hiValueSlice =
                    context.valueResettable.resetFromChunk(valueCopy, startPosition + loPivot, runLength - loPivot);
            final WritableIntChunk hiCountSlice =
                    context.countResettable.resetFromChunk(counts, startPosition + loPivot, runLength - loPivot);
            ssmHi.insert(hiValueSlice, hiCountSlice);
        }
    }


    @Override
    public void modifyChunk(BucketedContext bucketedContext, Chunk preValues,
            Chunk postValues, LongChunk postShiftRowKeys,
            IntChunk destinations, IntChunk startPositions, IntChunk length,
            WritableBooleanChunk stateModified) {
        final BucketSsmMinMaxContext context = (BucketSsmMinMaxContext) bucketedContext;

        context.valueCopy.setSize(preValues.size());
        // noinspection unchecked
        context.valueCopy.copyFromChunk((Chunk) preValues, 0, 0, preValues.size());

        context.lengthCopy.setSize(length.size());
        context.lengthCopy.copyFromChunk(length, 0, 0, length.size());

        compactAndCountKernel.compactAndCount(context.valueCopy, context.counts, startPositions, context.lengthCopy);

        final SegmentedSortedMultiSet.RemoveContext removeContext = removeContextFactory.get();
        context.ssmsToMaybeClear.fillWithValue(0, destinations.size(), false);
        for (int ii = 0; ii < startPositions.size(); ++ii) {
            final int runLength = context.lengthCopy.get(ii);
            if (runLength == 0) {
                continue;
            }
            final int startPosition = startPositions.get(ii);
            final long destination = destinations.get(startPosition);

            final SegmentedSortedMultiSet ssmLo = ssmLoForSlot(destination);
            final SegmentedSortedMultiSet ssmHi = ssmHiForSlot(destination);

            pivotedRemoval(context, removeContext, startPosition, runLength, ssmLo, ssmHi, context.valueCopy,
                    context.counts);
            if (ssmLo.size() == 0 && ssmHi.size() == 0) {
                context.ssmsToMaybeClear.set(ii, true);
            }
        }

        context.valueCopy.setSize(postValues.size());
        // noinspection unchecked
        context.valueCopy.copyFromChunk((Chunk) postValues, 0, 0, postValues.size());

        context.lengthCopy.setSize(length.size());
        context.lengthCopy.copyFromChunk(length, 0, 0, length.size());

        compactAndCountKernel.compactAndCount(context.valueCopy, context.counts, startPositions, context.lengthCopy);
        for (int ii = 0; ii < startPositions.size(); ++ii) {
            final int runLength = context.lengthCopy.get(ii);
            final int startPosition = startPositions.get(ii);
            final long destination = destinations.get(startPosition);
            if (runLength == 0) {
                if (context.ssmsToMaybeClear.get(ii)) {
                    // we may have deleted this position on the last round, really get rid of it
                    clearSsm(destination, 0);
                    clearSsm(destination, 1);
                    stateModified.set(ii, percentileTypeHelper.setResultNull(destination));
                } else {
                    stateModified.set(ii, percentileTypeHelper.setResult(ssmLoForSlot(destination),
                            ssmHiForSlot(destination), destination));
                }
                continue;
            }

            final SegmentedSortedMultiSet ssmLo = ssmLoForSlot(destination);
            final SegmentedSortedMultiSet ssmHi = ssmHiForSlot(destination);

            pivotedInsertion(context, ssmLo, ssmHi, startPosition, runLength, context.valueCopy, context.counts);

            stateModified.set(ii, percentileTypeHelper.setResult(ssmLo, ssmHi, destination));
        }
    }

    @Override
    public boolean addChunk(SingletonContext singletonContext, int chunkSize, Chunk values,
            LongChunk inputRowKeys, long destination) {
        final SsmMinMaxContext context = (SsmMinMaxContext) singletonContext;

        context.valueCopy.setSize(values.size());
        // noinspection unchecked
        context.valueCopy.copyFromChunk((Chunk) values, 0, 0, values.size());
        compactAndCountKernel.compactAndCount(context.valueCopy, context.counts);
        final SegmentedSortedMultiSet ssmLo = ssmLoForSlot(destination);
        final SegmentedSortedMultiSet ssmHi = ssmHiForSlot(destination);
        if (context.valueCopy.size() > 0) {
            pivotedInsertion(context, ssmLo, ssmHi, 0, context.valueCopy.size(), context.valueCopy, context.counts);
        }
        return percentileTypeHelper.setResult(ssmLo, ssmHi, destination);
    }

    @Override
    public boolean removeChunk(SingletonContext singletonContext, int chunkSize, Chunk values,
            LongChunk inputRowKeys, long destination) {
        final SsmMinMaxContext context = (SsmMinMaxContext) singletonContext;

        context.valueCopy.setSize(values.size());
        // noinspection unchecked
        context.valueCopy.copyFromChunk((Chunk) values, 0, 0, values.size());
        compactAndCountKernel.compactAndCount(context.valueCopy, context.counts);
        if (context.valueCopy.size() == 0) {
            return false;
        }
        final SegmentedSortedMultiSet ssmLo = ssmLoForSlot(destination);
        final SegmentedSortedMultiSet ssmHi = ssmHiForSlot(destination);

        pivotedRemoval(context, context.removeContext, 0, context.valueCopy.size(), ssmLo, ssmHi, context.valueCopy,
                context.counts);
        final boolean modified = percentileTypeHelper.setResult(ssmLo, ssmHi, destination);
        if (ssmLo.size() == 0) {
            clearSsm(destination, 0);
        }
        if (ssmHi.size() == 0) {
            clearSsm(destination, 1);
        }
        return modified;
    }

    @Override
    public boolean modifyChunk(SingletonContext singletonContext, int chunkSize, Chunk preValues,
            Chunk postValues, LongChunk postShiftRowKeys, long destination) {
        final SsmMinMaxContext context = (SsmMinMaxContext) singletonContext;

        context.valueCopy.setSize(preValues.size());
        // noinspection unchecked
        context.valueCopy.copyFromChunk((Chunk) preValues, 0, 0, preValues.size());
        compactAndCountKernel.compactAndCount(context.valueCopy, context.counts);
        SegmentedSortedMultiSet ssmLo = null;
        SegmentedSortedMultiSet ssmHi = null;
        if (context.valueCopy.size() > 0) {
            ssmLo = ssmLoForSlot(destination);
            ssmHi = ssmHiForSlot(destination);

            pivotedRemoval(context, context.removeContext, 0, context.valueCopy.size(), ssmLo, ssmHi, context.valueCopy,
                    context.counts);
        }

        context.valueCopy.setSize(postValues.size());
        // noinspection unchecked
        context.valueCopy.copyFromChunk((Chunk) postValues, 0, 0, postValues.size());
        compactAndCountKernel.compactAndCount(context.valueCopy, context.counts);
        if (context.valueCopy.size() > 0) {
            if (ssmLo == null) {
                ssmLo = ssmLoForSlot(destination);
                ssmHi = ssmHiForSlot(destination);
            }
            pivotedInsertion(context, ssmLo, ssmHi, 0, context.valueCopy.size(), context.valueCopy, context.counts);
            return percentileTypeHelper.setResult(ssmLo, ssmHi, destination);
        } else if (ssmLo != null && ssmLo.size() == 0 && ssmHi.size() == 0) {
            clearSsm(destination, 0);
            clearSsm(destination, 1);
            return percentileTypeHelper.setResultNull(destination);
        } else if (ssmLo == null) {
            return false;
        } else {
            return percentileTypeHelper.setResult(ssmLo, ssmHi, destination);
        }
    }

    private SegmentedSortedMultiSet ssmLoForSlot(long destination) {
        return ssmForSlot(destination, 0);
    }

    private SegmentedSortedMultiSet ssmHiForSlot(long destination) {
        return ssmForSlot(destination, 1);
    }

    private SegmentedSortedMultiSet ssmForSlot(long destination, int hi) {
        final long slot = destination * 2 + hi;
        SegmentedSortedMultiSet ssm = ssms.getUnsafe(slot);
        if (ssm == null) {
            ssms.set(slot, ssm = ssmFactory.get());
        }
        return ssm;
    }

    private void clearSsm(long destination, int hi) {
        final long slot = destination * 2 + hi;
        ssms.set(slot, null);
    }

    @Override
    public void ensureCapacity(long tableSize) {
        internalResult.ensureCapacity(tableSize);
        ssms.ensureCapacity(tableSize * 2);
    }

    @Override
    public Map> getResultColumns() {
        return Collections.>singletonMap(name, externalResult);
    }

    @Override
    public void startTrackingPrevValues() {
        internalResult.startTrackingPrevValues();
    }

    @Override
    public BucketedContext makeBucketedContext(int size) {
        return new BucketSsmMinMaxContext(chunkType, size);
    }

    @Override
    public SingletonContext makeSingletonContext(int size) {
        return new SsmMinMaxContext(chunkType, size);
    }

    private static class SsmMinMaxContext implements SingletonContext {
        final SegmentedSortedMultiSet.RemoveContext removeContext =
                SegmentedSortedMultiSet.makeRemoveContext(NODE_SIZE);
        final WritableChunk valueCopy;
        final WritableIntChunk counts;
        final ResettableWritableChunk valueResettable;
        final ResettableWritableIntChunk countResettable;

        private SsmMinMaxContext(ChunkType chunkType, int size) {
            valueCopy = chunkType.makeWritableChunk(size);
            counts = WritableIntChunk.makeWritableChunk(size);
            valueResettable = chunkType.makeResettableWritableChunk();
            countResettable = ResettableWritableIntChunk.makeResettableChunk();
        }

        @Override
        public void close() {
            valueCopy.close();
            counts.close();
            valueResettable.close();
            countResettable.close();
        }
    }

    private static class BucketSsmMinMaxContext extends SsmMinMaxContext implements BucketedContext {
        final WritableIntChunk lengthCopy;
        final WritableBooleanChunk ssmsToMaybeClear;

        private BucketSsmMinMaxContext(ChunkType chunkType, int size) {
            super(chunkType, size);
            lengthCopy = WritableIntChunk.makeWritableChunk(size);
            ssmsToMaybeClear = WritableBooleanChunk.makeWritableChunk(size);
        }

        @Override
        public void close() {
            super.close();
            lengthCopy.close();
            ssmsToMaybeClear.close();
        }
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy