All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.deephaven.server.table.stats.CharacterChunkedStats Maven / Gradle / Ivy

The newest version!
//
// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending
//
package io.deephaven.server.table.stats;

import gnu.trove.map.TCharLongMap;
import gnu.trove.map.hash.TCharLongHashMap;
import gnu.trove.set.TCharSet;
import gnu.trove.set.hash.TCharHashSet;
import io.deephaven.engine.rowset.RowSet;
import io.deephaven.engine.table.ColumnSource;
import io.deephaven.engine.table.Table;
import io.deephaven.engine.table.impl.util.ColumnHolder;
import io.deephaven.engine.table.iterators.CharacterColumnIterator;
import io.deephaven.engine.table.iterators.ChunkedCharacterColumnIterator;
import io.deephaven.engine.util.TableTools;
import io.deephaven.util.QueryConstants;

import java.util.ArrayList;
import java.util.Comparator;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Objects;

public class CharacterChunkedStats implements ChunkedStatsKernel {
    private final int maxUniqueToCollect;
    private final int maxUniqueToDisplay;

    public CharacterChunkedStats(int maxUniqueToCollect, int maxUniqueToDisplay) {
        this.maxUniqueToCollect = maxUniqueToCollect;
        this.maxUniqueToDisplay = maxUniqueToDisplay;
    }

    @Override
    public Table processChunks(final RowSet rowSet, final ColumnSource columnSource, boolean usePrev) {
        long count = 0;
        int uniqueCount = 0;

        final TCharLongMap countValues = new TCharLongHashMap();
        boolean useSet = false;
        final TCharSet uniqueValues = new TCharHashSet();

        try (CharacterColumnIterator iterator =
                new ChunkedCharacterColumnIterator(usePrev ? columnSource.getPrevSource() : columnSource, rowSet)) {
            while (iterator.hasNext()) {
                char val = iterator.nextChar();
                if (val == QueryConstants.NULL_CHAR) {
                    continue;
                }
                count++;

                if (countValues.adjustOrPutValue(val, 1, 1) == 1 && ++uniqueCount > maxUniqueToCollect) {
                    // we no longer want to track counts for these items; fall back to a Set to get at least a count
                    uniqueValues.addAll(countValues.keySet());
                    countValues.clear();
                    useSet = true;
                    break;
                }
            }
            while (iterator.hasNext()) {
                // items still remain, count non-nulls and uniques
                char val = iterator.nextChar();
                if (val == QueryConstants.NULL_CHAR) {
                    continue;
                }
                count++;

                uniqueValues.add(val);
            }
        }


        if (useSet) {
            return TableTools.newTable(
                    TableTools.longCol("COUNT", count),
                    TableTools.longCol("SIZE", rowSet.size()),
                    TableTools.intCol("UNIQUE_VALUES", uniqueValues.size()));
        }
        List> sorted = new ArrayList<>(countValues.size());

        countValues.forEachEntry((o, c) -> {
            sorted.add(Map.entry(Objects.toString(o), c));
            return true;
        });
        sorted.sort(Map.Entry.comparingByValue().reversed());

        int resultCount = Math.min(maxUniqueToDisplay, sorted.size());
        String[] uniqueKeys = new String[resultCount];
        long[] uniqueCounts = new long[resultCount];
        Iterator> iter = sorted.iterator();
        for (int i = 0; i < resultCount; i++) {
            Map.Entry entry = iter.next();
            uniqueKeys[i] = entry.getKey();
            uniqueCounts[i] = entry.getValue();
        }
        return TableTools.newTable(
                TableTools.longCol("COUNT", count),
                TableTools.longCol("SIZE", rowSet.size()),
                TableTools.intCol("UNIQUE_VALUES", countValues.size()),
                new ColumnHolder<>("UNIQUE_KEYS", String[].class, String.class, false, uniqueKeys),
                new ColumnHolder<>("UNIQUE_COUNTS", long[].class, long.class, false, uniqueCounts));
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy