All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.questdb.std.LongLongHashSet Maven / Gradle / Ivy

/*******************************************************************************
 *     ___                  _   ____  ____
 *    / _ \ _   _  ___  ___| |_|  _ \| __ )
 *   | | | | | | |/ _ \/ __| __| | | |  _ \
 *   | |_| | |_| |  __/\__ \ |_| |_| | |_) |
 *    \__\_\\__,_|\___||___/\__|____/|____/
 *
 *  Copyright (c) 2014-2019 Appsicle
 *  Copyright (c) 2019-2024 QuestDB
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *  http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 *
 ******************************************************************************/

package io.questdb.std;

import io.questdb.std.str.CharSink;
import io.questdb.std.str.Sinkable;
import org.jetbrains.annotations.NotNull;

import java.util.Arrays;

/**
 * Open addressing linear probing hash set specialized in storing long tuples.
 *
 * 

* Currently, it implements only limited amount of methods. Feel free to add more as you need them. *

* Note: * Semantic of capacity differs from collections in the JDK. It says how many elements you * can store in the set without triggering rehashing. This is consistent with other collections * in the QuestDB project. *

* Implementation notes: * The whole set is backed by a single long array. It uses a concept of slots: Each slot owns a tuple of two longs * stored along each other for spatial locality. Hence, when you want to get an index into a backing array you need to * multiply slot by two to get the index of the first long and add one to get the index of the second long. *

* This class is not thread safe. */ public final class LongLongHashSet implements Mutable, Sinkable { public static final SinkStrategy LONG_LONG_STRATEGY = (key1, key2, sink) -> { sink.putAscii('['); Numbers.append(sink, key1, false); sink.putAscii(','); Numbers.append(sink, key2, false); sink.putAscii(']'); }; public static final SinkStrategy UUID_STRATEGY = (key1, key2, sink) -> { sink.putAscii('\''); Numbers.appendUuid(key1, key2, sink); sink.putAscii('\''); }; private static final int MIN_INITIAL_CAPACITY = 16; private final double loadFactor; private final long noEntryKeyValue; private final SinkStrategy sinkStrategy; private int capacity; private int mask; private int size; private long[] values; private boolean hasNull = false; /** * Creates a new set with a given capacity, load factor and no entry sentinel value. *

* No entry sentinel value is used to indicate that a slot is empty. It means that you cannot store a tuple * where both longs are equal to no entry sentinel value. * * @param initialCapacity initial capacity of the set. * @param loadFactor load factor of the set. * @param noEntryValue no entry sentinel value. * @param sinkStrategy sink strategy */ public LongLongHashSet(int initialCapacity, double loadFactor, long noEntryValue, SinkStrategy sinkStrategy) { if (loadFactor <= 0d || loadFactor >= 1d) { throw new IllegalArgumentException("0 < load factor < 1"); } this.noEntryKeyValue = noEntryValue; this.loadFactor = loadFactor; this.capacity = Math.max(initialCapacity, MIN_INITIAL_CAPACITY); int slots = Numbers.ceilPow2((int) (this.capacity / loadFactor)); this.values = new long[2 * slots]; this.mask = slots - 1; this.sinkStrategy = sinkStrategy; Arrays.fill(values, noEntryKeyValue); } /** * Adds a tuple to the set. * * @param key1 first long of the tuple * @param key2 second long of the tuple * @return true if the tuple was added, false if it was already in the set */ public boolean add(long key1, long key2) { if (key1 == noEntryKeyValue && key2 == noEntryKeyValue) { throw new IllegalArgumentException("keys cannot be NO_ENTRY_KEY (" + noEntryKeyValue + ")"); } int slot = keyIndex(key1, key2); if (slot < 0) { return false; } addAt(slot, key1, key2); return true; } /** * Store key1 and key2 at slot. This method does not check if slot is occupied. * * @param slot slot to store key1 and key2 at * @param key1 first key * @param key2 second key */ public void addAt(int slot, long key1, long key2) { set(slot, key1, key2); if (++size == capacity) { rehash(); } } /** * Clears the set. */ @Override public void clear() { Arrays.fill(values, noEntryKeyValue); size = 0; hasNull = false; } public boolean hasNull() { return hasNull; } public void addNull() { this.hasNull = true; } /** * Check if the set contains a tuple with the given keys. * * @return true if the set contains the tuple, false otherwise */ public boolean contains(long key1, long key2) { return keyIndex(key1, key2) < 0; } /** * Get a slot where a give tuple would be stored. *

* If the tuple is already in the set, the slot is negative and the value is the index of the tuple. * If the tuple is not in the set, the slot is positive and the value is the index of the first empty slot. * * @param key1 first key * @param key2 second key * @return slot index */ public int keyIndex(long key1, long key2) { int hash = Hash.hashLong128_32(key1, key2); int index = hash & mask; return probe(key1, key2, index); } /** * Returns the number of elements in the set. * * @return number of elements in the set */ public int size() { return size; } @Override public void toSink(@NotNull CharSink sink) { sink.putAscii('['); boolean pastFirst = false; for (int i = 0, n = values.length; i < n; i += 2) { long key1 = values[i]; long key2 = values[i + 1]; if (key1 != noEntryKeyValue || key2 != noEntryKeyValue) { if (pastFirst) { sink.putAscii(','); } sinkStrategy.put(key1, key2, sink); pastFirst = true; } } sink.putAscii(']'); } private static long firstValue(long[] val, int slot) { return val[slot * 2]; } private static long secondValue(long[] val, int slot) { return val[slot * 2 + 1]; } private int probe(long key1, long key2, int slot) { do { if (firstValue(values, slot) == noEntryKeyValue && secondValue(values, slot) == noEntryKeyValue) { return slot; } if (firstValue(values, slot) == key1 && secondValue(values, slot) == key2) { return -slot - 1; } slot = (slot + 1) & mask; } while (true); } private void rehash() { int newCapacity = capacity * 2; int slots = Numbers.ceilPow2((int) (newCapacity / loadFactor)); if (slots < 0 || slots * 2 < 0) { throw new IllegalStateException("cannot rehash, required capacity is too large. [current-capacity=" + capacity + ", load-factor=" + loadFactor + "]"); } long[] newValues = new long[2 * slots]; Arrays.fill(newValues, noEntryKeyValue); mask = slots - 1; long[] oldKeys = this.values; this.values = newValues; int oldSlots = oldKeys.length / 2; for (int i = 0; i < oldSlots; i++) { long key1 = firstValue(oldKeys, i); long key2 = secondValue(oldKeys, i); if (key1 != noEntryKeyValue || key2 != noEntryKeyValue) { int slot = keyIndex(key1, key2); set(slot, key1, key2); } } capacity = newCapacity; } private void set(int slot, long key1, long key2) { values[slot * 2] = key1; values[slot * 2 + 1] = key2; } public interface SinkStrategy { void put(long key1, long key2, CharSink sink); } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy