All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.datasketches.theta.DirectQuickSelectSketchR Maven / Gradle / Ivy

Go to download

Core sketch algorithms used alone and by other Java repositories in the DataSketches library.

There is a newer version: 7.0.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.datasketches.theta;

import static org.apache.datasketches.Util.REBUILD_THRESHOLD;
import static org.apache.datasketches.theta.PreambleUtil.FAMILY_BYTE;
import static org.apache.datasketches.theta.PreambleUtil.LG_ARR_LONGS_BYTE;
import static org.apache.datasketches.theta.PreambleUtil.LG_NOM_LONGS_BYTE;
import static org.apache.datasketches.theta.PreambleUtil.LG_RESIZE_FACTOR_BIT;
import static org.apache.datasketches.theta.PreambleUtil.PREAMBLE_LONGS_BYTE;
import static org.apache.datasketches.theta.PreambleUtil.P_FLOAT;
import static org.apache.datasketches.theta.PreambleUtil.RETAINED_ENTRIES_INT;
import static org.apache.datasketches.theta.PreambleUtil.THETA_LONG;
import static org.apache.datasketches.theta.PreambleUtil.extractLgArrLongs;
import static org.apache.datasketches.theta.PreambleUtil.extractLgNomLongs;
import static org.apache.datasketches.theta.PreambleUtil.extractPreLongs;

import org.apache.datasketches.Family;
import org.apache.datasketches.ResizeFactor;
import org.apache.datasketches.SketchesReadOnlyException;
import org.apache.datasketches.memory.Memory;
import org.apache.datasketches.memory.WritableMemory;

/**
 * The default Theta Sketch using the QuickSelect algorithm.
 * This is the read-only implementation with non-functional methods, which affect the state.
 *
 * 

This implementation uses data in a given Memory that is owned and managed by the caller. * This Memory can be off-heap, which if managed properly will greatly reduce the need for * the JVM to perform garbage collection.

* * @author Lee Rhodes * @author Kevin Lang */ class DirectQuickSelectSketchR extends UpdateSketch { static final double DQS_RESIZE_THRESHOLD = 15.0 / 16.0; //tuned for space final long seed_; //provided, kept only on heap, never serialized. int hashTableThreshold_; //computed, kept only on heap, never serialized. WritableMemory mem_; //A WritableMemory for child class, but no write methods here //only called by DirectQuickSelectSketch and below DirectQuickSelectSketchR(final long seed, final WritableMemory wmem) { seed_ = seed; mem_ = wmem; } /** * Wrap a sketch around the given source Memory containing sketch data that originated from * this sketch. * @param srcMem See Memory * The given Memory object must be in hash table form and not read only. * @param seed See Update Hash Seed * @return instance of this sketch */ static DirectQuickSelectSketchR readOnlyWrap(final Memory srcMem, final long seed) { final int preambleLongs = extractPreLongs(srcMem); //byte 0 final int lgNomLongs = extractLgNomLongs(srcMem); //byte 3 final int lgArrLongs = extractLgArrLongs(srcMem); //byte 4 UpdateSketch.checkUnionQuickSelectFamily(srcMem, preambleLongs, lgNomLongs); checkMemIntegrity(srcMem, seed, preambleLongs, lgNomLongs, lgArrLongs); final DirectQuickSelectSketchR dqssr = new DirectQuickSelectSketchR(seed, (WritableMemory) srcMem); dqssr.hashTableThreshold_ = setHashTableThreshold(lgNomLongs, lgArrLongs); return dqssr; } /** * Fast-wrap a sketch around the given source Memory containing sketch data that originated from * this sketch. This does NO validity checking of the given Memory. * @param srcMem See Memory * The given Memory object must be in hash table form and not read only. * @param seed See Update Hash Seed * @return instance of this sketch */ static DirectQuickSelectSketchR fastReadOnlyWrap(final Memory srcMem, final long seed) { final int lgNomLongs = srcMem.getByte(LG_NOM_LONGS_BYTE) & 0XFF; final int lgArrLongs = srcMem.getByte(LG_ARR_LONGS_BYTE) & 0XFF; final DirectQuickSelectSketchR dqss = new DirectQuickSelectSketchR(seed, (WritableMemory) srcMem); dqss.hashTableThreshold_ = setHashTableThreshold(lgNomLongs, lgArrLongs); return dqss; } //Sketch @Override public int getCurrentBytes(final boolean compact) { if (!compact) { final byte lgArrLongs = mem_.getByte(LG_ARR_LONGS_BYTE); final int preambleLongs = mem_.getByte(PREAMBLE_LONGS_BYTE) & 0X3F; final int lengthBytes = (preambleLongs + (1 << lgArrLongs)) << 3; return lengthBytes; } final int preLongs = getCurrentPreambleLongs(true); final int curCount = getRetainedEntries(true); return (preLongs + curCount) << 3; } @Override public Family getFamily() { final int familyID = mem_.getByte(FAMILY_BYTE) & 0XFF; return Family.idToFamily(familyID); } @Override public int getRetainedEntries(final boolean valid) { //always valid return mem_.getInt(RETAINED_ENTRIES_INT); } @Override public long getThetaLong() { return mem_.getLong(THETA_LONG); } @Override public boolean hasMemory() { return true; } @Override public boolean isDirect() { return mem_.isDirect(); } @Override public boolean isEmpty() { return PreambleUtil.isEmpty(mem_); } @Override public boolean isSameResource(final Memory that) { return mem_.isSameResource(that); } @Override public HashIterator iterator() { return new MemoryHashIterator(mem_, 1 << getLgArrLongs(), getThetaLong()); } @Override public byte[] toByteArray() { //MY_FAMILY is stored in mem_ final byte lgArrLongs = mem_.getByte(LG_ARR_LONGS_BYTE); final int preambleLongs = mem_.getByte(PREAMBLE_LONGS_BYTE) & 0X3F; final int lengthBytes = (preambleLongs + (1 << lgArrLongs)) << 3; final byte[] byteArray = new byte[lengthBytes]; final WritableMemory mem = WritableMemory.wrap(byteArray); mem_.copyTo(0, mem, 0, lengthBytes); return byteArray; } //UpdateSketch @Override public int getLgNomLongs() { return PreambleUtil.extractLgNomLongs(mem_); } @Override public ResizeFactor getResizeFactor() { return ResizeFactor.getRF(getLgRF()); } @Override public UpdateSketch rebuild() { throw new SketchesReadOnlyException(); } @Override public void reset() { throw new SketchesReadOnlyException(); } //restricted methods @Override long[] getCache() { final long lgArrLongs = mem_.getByte(LG_ARR_LONGS_BYTE) & 0XFF; final int preambleLongs = mem_.getByte(PREAMBLE_LONGS_BYTE) & 0X3F; final long[] cacheArr = new long[1 << lgArrLongs]; final WritableMemory mem = WritableMemory.wrap(cacheArr); mem_.copyTo(preambleLongs << 3, mem, 0, 8 << lgArrLongs); return cacheArr; } @Override int getCurrentPreambleLongs(final boolean compact) { if (!compact) { return PreambleUtil.extractPreLongs(mem_); } return computeCompactPreLongs(getThetaLong(), isEmpty(), getRetainedEntries(true)); } @Override WritableMemory getMemory() { return mem_; } @Override float getP() { return mem_.getFloat(P_FLOAT); } @Override long getSeed() { return seed_; } @Override short getSeedHash() { return (short) PreambleUtil.extractSeedHash(mem_); } @Override boolean isDirty() { return false; //Always false for QuickSelectSketch } @Override boolean isOutOfSpace(final int numEntries) { return numEntries > hashTableThreshold_; } @Override int getLgArrLongs() { return mem_.getByte(LG_ARR_LONGS_BYTE) & 0XFF; } int getLgRF() { //only Direct needs this return (mem_.getByte(PREAMBLE_LONGS_BYTE) >>> LG_RESIZE_FACTOR_BIT) & 0X3; } @Override UpdateReturnState hashUpdate(final long hash) { throw new SketchesReadOnlyException(); } /** * Returns the cardinality limit given the current size of the hash table array. * * @param lgNomLongs See lgNomLongs. * @param lgArrLongs See lgArrLongs. * @return the hash table threshold */ static final int setHashTableThreshold(final int lgNomLongs, final int lgArrLongs) { //FindBugs may complain (DB_DUPLICATE_BRANCHES) if DQS_RESIZE_THRESHOLD == REBUILD_THRESHOLD, //but this allows us to tune these constants for different sketches. final double fraction = (lgArrLongs <= lgNomLongs) ? DQS_RESIZE_THRESHOLD : REBUILD_THRESHOLD; return (int) Math.floor(fraction * (1 << lgArrLongs)); } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy