All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.netflix.hollow.core.index.HollowSparseIntegerSet Maven / Gradle / Ivy

There is a newer version: 7.13.0
Show newest version
/*
 *  Copyright 2016-2019 Netflix, Inc.
 *
 *     Licensed under the Apache License, Version 2.0 (the "License");
 *     you may not use this file except in compliance with the License.
 *     You may obtain a copy of the License at
 *
 *         http://www.apache.org/licenses/LICENSE-2.0
 *
 *     Unless required by applicable law or agreed to in writing, software
 *     distributed under the License is distributed on an "AS IS" BASIS,
 *     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *     See the License for the specific language governing permissions and
 *     limitations under the License.
 *
 */
package com.netflix.hollow.core.index;

import com.netflix.hollow.core.read.engine.HollowReadStateEngine;
import com.netflix.hollow.core.read.engine.HollowTypeStateListener;
import java.util.BitSet;
import java.util.HashSet;
import java.util.Set;
import java.util.concurrent.atomic.AtomicReferenceArray;

/**
 * Create hollow integer set for sparse non-negative {@literal &} unique integer values referenced by fieldPath in a type based on a predicate.
 */
public class HollowSparseIntegerSet implements HollowTypeStateListener {

    private final HollowReadStateEngine readStateEngine;
    private final String type;
    private final FieldPath fieldPath;
    private final IndexPredicate predicate;

    protected volatile SparseBitSet sparseBitSetVolatile;

    private Set valuesToSet;
    private Set valuesToClear;
    private int maxValueToSet;

    public interface IndexPredicate {
        boolean shouldIndex(int ordinal);
    }

    private final static IndexPredicate DEFAULT_PREDICATE = new IndexPredicate() {
        @Override
        public boolean shouldIndex(int ordinal) {
            return true;
        }
    };

    /**
     * Create a index for integer values pointed by the given field path.
     *
     * @param readStateEngine the read state
     * @param type the type name
     * @param fieldPath the field path
     */
    public HollowSparseIntegerSet(HollowReadStateEngine readStateEngine, String type, String fieldPath) {
        this(readStateEngine, type, fieldPath, DEFAULT_PREDICATE);
    }

    /**
     * Create a index for integer values based on the given IndexPredicate.
     *
     * @param readStateEngine HollowReadStateEngine to read data set.
     * @param type            type which contains the path to integer values for indexing.
     * @param fieldPath       path to the integer values
     * @param predicate       implementation of IndexPredicate, indicating if the record passes the condition for indexing.
     */
    public HollowSparseIntegerSet(HollowReadStateEngine readStateEngine, String type, String fieldPath, IndexPredicate predicate) {

        // check arguments
        if (readStateEngine == null) throw new IllegalArgumentException("Read state engine cannot be null");
        if (type == null) throw new IllegalArgumentException("type cannot be null");
        if (fieldPath == null || fieldPath.isEmpty())
            throw new IllegalArgumentException("fieldPath cannot be null or empty");

        this.readStateEngine = readStateEngine;
        this.type = type;
        this.fieldPath = new FieldPath(readStateEngine, type, fieldPath);
        this.predicate = predicate;
        this.valuesToSet = new HashSet<>();
        this.valuesToClear = new HashSet<>();
        build();
    }

    protected void build() {

        // initialize an instance of SparseBitSet
        initSet(Integer.MAX_VALUE);

        // iterate through all populated ordinals for the type to set the values based on predicate
        BitSet typeBitSet = readStateEngine.getTypeState(type).getPopulatedOrdinals();
        int ordinal = typeBitSet.nextSetBit(0);
        while (ordinal != -1) {
            set(ordinal);
            ordinal = typeBitSet.nextSetBit(ordinal + 1);
        }

        // run compaction
        compact();
    }

    protected void initSet(int maxValue) {
        sparseBitSetVolatile = new SparseBitSet(maxValue);
    }

    protected void set(int ordinal) {
        if (predicate.shouldIndex(ordinal)) {
            Object[] values = fieldPath.findValues(ordinal);
            if (values != null && values.length > 0) {
                SparseBitSet bitSet = sparseBitSetVolatile;
                for (Object value : values) {
                    bitSet.set((int) value);
                }
            }
        }
    }

    protected void compact() {
        SparseBitSet current = sparseBitSetVolatile;
        SparseBitSet compactedSet = SparseBitSet.compact(current);
        sparseBitSetVolatile = compactedSet;
    }


    /**
     * Check if the given value is contained in the set (or if the given value satisfies the predicate condition.)
     *
     * @param i the integer value
     * @return {@code true} if the value is present
     */
    public boolean get(int i) {
        SparseBitSet current;
        boolean result;
        do {
            current = sparseBitSetVolatile;
            result = current.get(i);
        } while (current != sparseBitSetVolatile);
        return result;
    }

    /**
     * Estimate the total number of bits used to represent the integer set.
     *
     * @return Calculates the total number of bits used by longs in underlying data structure.
     */
    public long size() {
        SparseBitSet current;
        long size;
        do {
            current = sparseBitSetVolatile;
            size = current.estimateBitsUsed();
        } while (current != sparseBitSetVolatile);
        return size;
    }

    /**
     * @return the total number of integers added to the set.
     */
    public int cardinality() {
        SparseBitSet current;
        int cardinality;
        do {
            current = sparseBitSetVolatile;
            cardinality = current.cardinality();
        } while (current != sparseBitSetVolatile);
        return cardinality;
    }

    /**
     * Use this method to keep the index updated with delta changes on the read state engine.
     * Remember to call detachFromDeltaUpdates to stop the delta changes.
     * NOTE: Each delta updates creates a new prefix index and swaps the new with current.
     */
    public void listenForDeltaUpdates() {
        readStateEngine.getTypeState(type).addListener(this);
    }

    /**
     * Stop delta updates for this index.
     */
    public void detachFromDeltaUpdates() {
        readStateEngine.getTypeState(type).removeListener(this);
    }

    @Override
    public void beginUpdate() {
        valuesToSet.clear();
        valuesToClear.clear();
        maxValueToSet = -1;
    }

    @Override
    public void addedOrdinal(int ordinal) {
        if (predicate.shouldIndex(ordinal)) {
            Object[] values = fieldPath.findValues(ordinal);
            for (Object value : values) {
                valuesToSet.add((int) value);
                if (maxValueToSet < (int) value) maxValueToSet = (int) value;
            }
        }
    }

    @Override
    public void removedOrdinal(int ordinal) {
        Object[] values = fieldPath.findValues(ordinal);
        for (Object value : values)
            valuesToClear.add((int) value);
    }

    @Override
    public void endUpdate() {
        boolean didSomeWork = false;
        SparseBitSet updated = sparseBitSetVolatile;
        // first check if the max value among the new values to be added is more than the max value of the existing sparse bit set.
        if (valuesToSet.size() > 0 && maxValueToSet > updated.findMaxValue()) {
            updated = SparseBitSet.resize(updated, maxValueToSet);
            didSomeWork = true;
        }

        // when applying delta, check for duplicates, increment counts if duplicate values are found else set them
        for (int value : valuesToSet) {
            updated.set(value);
        }

        // first clear all the values that are meant to be cleared
        for (int value : valuesToClear) {
            updated.clear(value);
        }

        if (didSomeWork) {
            sparseBitSetVolatile = updated;
        }
    }

    /**
     * This implementation is motivated from several ideas to get a compact sparse set.
     * When using a a bucket of BitSet, problems
     * - smaller sizes of BitSet are not useful, since null references are themselves 64/32 bit references.
     * - larger sizes of BitSet for truly sparse integers, has overhead of too many zeroes in one BitSet.
     * 

* The idea is to only store longs in bb that have non-zero values where bucket sizes are longs. Bucket size of 64 longs are convenient when using mod operations. *

* Each bit in long value in indices array, indicates if a long value is initialized. 64 bits would point to 64 long values ( 1 bucket ). * Each bucket could contain 1-64 longs, we only hold non-zero long values in bucket. */ static class SparseBitSet { // shift used to determine which bucket private static final int BUCKET_SHIFT = 12; // shift used to determine which Long value to use in bucket. private static final int LONG_SHIFT = 6; private final int maxValue; private final AtomicReferenceArray buckets; private static class Bucket { private long idx; private long[] longs; private Bucket(long idx, long[] longs) { this.idx = idx; this.longs = longs; } } SparseBitSet(int maxValue) { int totalBuckets = maxValue >>> BUCKET_SHIFT; this.maxValue = maxValue; this.buckets = new AtomicReferenceArray<>(totalBuckets + 1); } private SparseBitSet(int maxValue, AtomicReferenceArray buckets) { this.maxValue = maxValue; this.buckets = buckets; } private static int getIndex(int i) { // logical right shift return i >>> BUCKET_SHIFT; } /** * This method returns the number of Longs initialized from LSB to the given bitInIndex. * For example longAtIndex (64 bits) = 00...1001 and bitInIndex (3rd bit is set) 000...100 * then this method will return 1 since only one bit is set in longAtIndex to the right of bitInIndex * * @param longAtIndex * @param bitInIndex * @return */ private static int getOffset(long longAtIndex, long bitInIndex) { // set all bits to one before the bit that is set in bitInIndex // example : 000...0100 will become 000...011 long setAllOnesBeforeBitInIndex = bitInIndex - 1; long offset = (longAtIndex & setAllOnesBeforeBitInIndex); return Long.bitCount(offset); } boolean get(int i) { if (i > maxValue || i < 0) return false; int index = getIndex(i); Bucket currentBucket = buckets.get(index); if (currentBucket == null) return false; long currentLongAtIndex = currentBucket.idx; long[] longs = currentBucket.longs; // find which bit in index will point to the long in bb long whichLong = i >>> LONG_SHIFT; long bitInIndex = 1L << whichLong;// whichLong % 64 long isLongInitialized = (currentLongAtIndex & bitInIndex); if (isLongInitialized == 0) return false; int offset = getOffset(currentLongAtIndex, bitInIndex); long value = longs[offset]; long whichBitInLong = 1L << i; return (value & whichBitInLong) != 0; } // thread-safe void set(int i) { if (i > maxValue) throw new IllegalArgumentException("Max value initialized is " + maxValue + " given value is " + i); if (i < 0) throw new IllegalArgumentException("Cannot index negative numbers"); // find which bucket int index = getIndex(i); // find which bit in index will point to the long in bb long whichLong = i >>> LONG_SHIFT; long bitInIndex = 1L << whichLong;// whichLong % 64 long whichBitInLong = 1L << i;// i % 64 while (true) { long longAtIndex = 0; long[] longs = null; Bucket currentBucket = buckets.get(index); if (currentBucket != null) { longAtIndex = currentBucket.idx; longs = currentBucket.longs.clone(); } boolean isLongInitialized = (longAtIndex & bitInIndex) != 0; if (isLongInitialized) { // if a long value is set, the find the correct offset to determine which long in longs to use. int offset = getOffset(longAtIndex, bitInIndex); longs[offset] |= whichBitInLong;// or preserves previous set operations in this long. } else if (longAtIndex == 0) { // first set that bit in idx for that bucket, and assign a new long[] longAtIndex = bitInIndex; longs = new long[]{whichBitInLong}; } else { // update long value at index longAtIndex |= bitInIndex; // find offset int offset = getOffset(longAtIndex, bitInIndex); int oldLongsLen = longs.length; long[] newLongs = new long[oldLongsLen + 1]; // if offset is 2 means 3 longs are needed starting from 0 // if current longs length is 2 (0,1) then append third long at end // if current longs length is greater than offset, then insert long 0 -> (offset - 1), new long, offset to (length -1) if (offset >= oldLongsLen) { // append new long at end int it; for (it = 0; it < oldLongsLen; it++) newLongs[it] = longs[it]; newLongs[it] = whichBitInLong; } else { // insert new long in between int it; for (it = 0; it < offset; it++) newLongs[it] = longs[it]; newLongs[offset] = whichBitInLong; for (it = offset; it < oldLongsLen; it++) newLongs[it + 1] = longs[it]; } longs = newLongs; } Bucket newBucket = new Bucket(longAtIndex, longs); if (buckets.compareAndSet(index, currentBucket, newBucket)) break; } } // thread-safe void clear(int i) { if (i > maxValue || i < 0) return; int index = getIndex(i); while (true) { Bucket currentBucket = buckets.get(index); if (currentBucket == null) return; long longAtIndex = currentBucket.idx; long[] longs = currentBucket.longs.clone(); // find which bit in index will point to the long in bb long whichLong = i >>> LONG_SHIFT; long bitInIndex = 1L << whichLong;// whichLong % 64 long whichBitInLong = 1L << i;// i % 64 long isLongInitialized = (longAtIndex & bitInIndex); if (isLongInitialized == 0) return; int offset = getOffset(longAtIndex, bitInIndex); long value = longs[offset]; // unset whichBitInIndex in value // to clear 3rd bit (00100 whichBitInLong) in 00101(value), & with 11011 to get 00001 long updatedValue = value & ~whichBitInLong; boolean isBucketEmpty = false; if (updatedValue != 0) { longs[offset] = updatedValue; } else { // if updatedValue is 0, then update the bucket removing that long int oldLongsLen = longs.length; // if only one long was initialized in the bucket, then make the reference null, indexAtLong 0 if (oldLongsLen == 1) { longs = null; longAtIndex = 0; isBucketEmpty = true; } else { // copy everything over, except the long at the given offset, long[] newLongs = new long[oldLongsLen - 1]; int it; for (it = 0; it < offset; it++) newLongs[it] = longs[it]; it++; while (it < oldLongsLen) { newLongs[it - 1] = longs[it]; it++; } longs = newLongs; longAtIndex &= ~bitInIndex; } } Bucket updatedBucket = null; if (!isBucketEmpty) updatedBucket = new Bucket(longAtIndex, longs); if (buckets.compareAndSet(index, currentBucket, updatedBucket)) break; } } int findMaxValue() { // find the last index that is initialized int index = buckets.length() - 1; while (index >= 0) { if (buckets.get(index) != null) break; index--; } // if no buckets are initialized, then return -1 ( meaning set is empty) if (index < 0) return -1; // find the highest bit in indexAtLong to see which is last long init in bucket int highestBitSetInIndexAtLong = 63 - Long.numberOfLeadingZeros(Long.highestOneBit(buckets.get(index).idx)); long[] longs = buckets.get(index).longs; long value = longs[longs.length - 1]; long highestBitSetInLong = 63 - Long.numberOfLeadingZeros(Long.highestOneBit(value)); return (int) ((index << BUCKET_SHIFT) + (highestBitSetInIndexAtLong << 6) + highestBitSetInLong); } int cardinality() { int cardinality = 0; int index = 0; while (index < buckets.length()) { if (buckets.get(index) != null) { long[] longs = buckets.get(index).longs; for (long value : longs) cardinality += Long.bitCount(value); } index++; } return cardinality; } long estimateBitsUsed() { long longsUsed = 0; long idxCounts = 0; int index = 0; while (index < buckets.length()) { if (buckets.get(index) != null) { idxCounts++; longsUsed += buckets.get(index).longs.length; } index++; } // total bits used long bitsUsedByArrayPointers = buckets.length() * 64; long bitsUsedByIdx = idxCounts * 64; long bitsUsedByLongs = longsUsed * 64; return bitsUsedByArrayPointers + bitsUsedByIdx + bitsUsedByLongs; } /** * * Use this method to compact an existing SparseBitSet. Note any attempts to add a new value greater than the max value will result in exception. * * @param sparseBitSet * @return new SparseBitSet that is compact, does not hold null references beyond the max int value added in the given input. */ static SparseBitSet compact(SparseBitSet sparseBitSet) { int maxValueAdded = sparseBitSet.findMaxValue(); // if the given set is empty then compact the sparseBitSet to have only 1 bucket i.e. 64 longs if (maxValueAdded < 0) { maxValueAdded = (1 << BUCKET_SHIFT) - 1; } int indexForMaxValueAdded = getIndex(maxValueAdded); int newLength = indexForMaxValueAdded + 1; return copyWithNewLength(sparseBitSet, newLength, newLength, maxValueAdded); } static SparseBitSet resize(SparseBitSet sparseBitSet, int newMaxValue) { if (sparseBitSet.findMaxValue() < newMaxValue) { int indexForNewMaxValue = getIndex(newMaxValue); int newLength = indexForNewMaxValue + 1; return copyWithNewLength(sparseBitSet, newLength, sparseBitSet.buckets.length(), newMaxValue); } return sparseBitSet; } private static SparseBitSet copyWithNewLength(SparseBitSet sparseBitSet, int newLength, int lengthToClone, int newMaxValue) { AtomicReferenceArray compactBuckets = new AtomicReferenceArray(newLength); for (int i = 0; i < lengthToClone; i++) { if (sparseBitSet.buckets.get(i) != null) compactBuckets.set(i, sparseBitSet.buckets.get(i)); } return new SparseBitSet(newMaxValue, compactBuckets); } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy