All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.datasketches.theta.IntersectionImplR Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.datasketches.theta;

import static org.apache.datasketches.Util.MIN_LG_ARR_LONGS;
import static org.apache.datasketches.Util.floorPowerOf2;
import static org.apache.datasketches.theta.CompactSketch.compactCachePart;
import static org.apache.datasketches.theta.PreambleUtil.EMPTY_FLAG_MASK;
import static org.apache.datasketches.theta.PreambleUtil.FAMILY_BYTE;
import static org.apache.datasketches.theta.PreambleUtil.FLAGS_BYTE;
import static org.apache.datasketches.theta.PreambleUtil.LG_ARR_LONGS_BYTE;
import static org.apache.datasketches.theta.PreambleUtil.LG_NOM_LONGS_BYTE;
import static org.apache.datasketches.theta.PreambleUtil.PREAMBLE_LONGS_BYTE;
import static org.apache.datasketches.theta.PreambleUtil.P_FLOAT;
import static org.apache.datasketches.theta.PreambleUtil.RETAINED_ENTRIES_INT;
import static org.apache.datasketches.theta.PreambleUtil.SEED_HASH_SHORT;
import static org.apache.datasketches.theta.PreambleUtil.SER_VER;
import static org.apache.datasketches.theta.PreambleUtil.SER_VER_BYTE;
import static org.apache.datasketches.theta.PreambleUtil.THETA_LONG;

import org.apache.datasketches.Family;
import org.apache.datasketches.SketchesArgumentException;
import org.apache.datasketches.SketchesReadOnlyException;
import org.apache.datasketches.SketchesStateException;
import org.apache.datasketches.Util;
import org.apache.datasketches.memory.Memory;
import org.apache.datasketches.memory.WritableMemory;

/**
 * Intersection operation for Theta Sketches.
 *
 * 

This implementation uses data either on-heap or off-heap in a given Memory * that is owned and managed by the caller. * The off-heap Memory, which if managed properly, will greatly reduce the need for * the JVM to perform garbage collection.

* * @author Lee Rhodes * @author Kevin Lang */ class IntersectionImplR extends Intersection { protected final short seedHash_; protected final WritableMemory mem_; //Note: Intersection does not use lgNomLongs or k, per se. protected int lgArrLongs_; //current size of hash table protected int curCount_; //curCount of HT, if < 0 means Universal Set (US) is true protected long thetaLong_; protected boolean empty_; protected long[] hashTable_ = null; //HT => Data. Only used On Heap protected int maxLgArrLongs_ = 0; //max size of hash table. Only used Off Heap IntersectionImplR(final WritableMemory mem, final long seed, final boolean newMem) { mem_ = mem; if (mem != null) { if (newMem) { seedHash_ = computeSeedHash(seed); mem_.putShort(SEED_HASH_SHORT, seedHash_); } else { seedHash_ = mem_.getShort(SEED_HASH_SHORT); Util.checkSeedHashes(seedHash_, computeSeedHash(seed)); //check for seed hash conflict } } else { seedHash_ = computeSeedHash(seed); } } IntersectionImplR(final short seedHash) { seedHash_ = seedHash; mem_ = null; lgArrLongs_ = 0; curCount_ = -1; thetaLong_ = Long.MAX_VALUE; empty_ = false; hashTable_ = null; } /** * Wrap an Intersection target around the given source Memory containing intersection data. * @param srcMem The source Memory image. * See Memory * @param seed See seed * @return an IntersectionImplR that wraps a read-only Intersection image referenced by srcMem */ static IntersectionImplR wrapInstance(final Memory srcMem, final long seed) { final IntersectionImplR impl = new IntersectionImplR((WritableMemory) srcMem, seed, false); return internalWrapInstance(srcMem, impl); } static IntersectionImplR internalWrapInstance(final Memory srcMem, final IntersectionImplR impl) { //Get Preamble //Note: Intersection does not use lgNomLongs (or k), per se. //seedHash loaded and checked in constructor final int preLongsMem = srcMem.getByte(PREAMBLE_LONGS_BYTE) & 0X3F; final int serVer = srcMem.getByte(SER_VER_BYTE) & 0XFF; final int famID = srcMem.getByte(FAMILY_BYTE) & 0XFF; final int lgArrLongs = srcMem.getByte(LG_ARR_LONGS_BYTE) & 0XFF; final int flags = srcMem.getByte(FLAGS_BYTE) & 0XFF; final boolean empty = (flags & EMPTY_FLAG_MASK) > 0; //Checks if (preLongsMem != CONST_PREAMBLE_LONGS) { throw new SketchesArgumentException( "Memory PreambleLongs must equal " + CONST_PREAMBLE_LONGS + ": " + preLongsMem); } if (serVer != SER_VER) { throw new SketchesArgumentException("Serialization Version must equal " + SER_VER); } Family.INTERSECTION.checkFamilyID(famID); final int curCount = srcMem.getInt(RETAINED_ENTRIES_INT); final long thetaLong = srcMem.getLong(THETA_LONG); if (empty) { if (curCount != 0) { throw new SketchesArgumentException( "srcMem empty state inconsistent with curCount: " + empty + "," + curCount); } //empty = true AND curCount_ = 0: OK } //else empty = false, curCount could be anything //Initialize impl.lgArrLongs_ = lgArrLongs; impl.curCount_ = curCount; impl.thetaLong_ = thetaLong; impl.empty_ = empty; impl.maxLgArrLongs_ = checkMaxLgArrLongs(srcMem); //Only Off Heap, check for min size return impl; } @Override public CompactSketch getResult(final boolean dstOrdered, final WritableMemory dstMem) { if (curCount_ < 0) { throw new SketchesStateException( "Calling getResult() with no intervening intersections would represent the infinite set, " + "which is not a legal result."); } long[] compactCacheR; if (curCount_ == 0) { compactCacheR = new long[0]; return createCompactSketch( compactCacheR, empty_, seedHash_, curCount_, thetaLong_, dstOrdered, dstMem); } //else curCount > 0 final long[] hashTable; if (mem_ != null) { final int htLen = 1 << lgArrLongs_; hashTable = new long[htLen]; mem_.getLongArray(CONST_PREAMBLE_LONGS << 3, hashTable, 0, htLen); } else { hashTable = hashTable_; } compactCacheR = compactCachePart(hashTable, lgArrLongs_, curCount_, thetaLong_, dstOrdered); //Create the CompactSketch return createCompactSketch( compactCacheR, empty_, seedHash_, curCount_, thetaLong_, dstOrdered, dstMem); } @Override public CompactSketch getResult() { return getResult(true, null); } /** * Gets the number of retained entries from this operation. If negative, it is interpreted * as the infinite Universal Set. */ @Override int getRetainedEntries(final boolean valid) { return curCount_; } @Override public boolean hasResult() { return (mem_ != null) ? mem_.getInt(RETAINED_ENTRIES_INT) >= 0 : curCount_ >= 0; } @Override boolean isEmpty() { return empty_; } @Override public boolean isSameResource(final Memory that) { return (mem_ != null) ? mem_.isSameResource(that) : false; } @Override public void reset() { throw new SketchesReadOnlyException(); } @Override public byte[] toByteArray() { final int preBytes = CONST_PREAMBLE_LONGS << 3; final int dataBytes = (curCount_ > 0) ? 8 << lgArrLongs_ : 0; final byte[] byteArrOut = new byte[preBytes + dataBytes]; if (mem_ != null) { mem_.getByteArray(0, byteArrOut, 0, preBytes + dataBytes); } else { final WritableMemory memOut = WritableMemory.wrap(byteArrOut); //preamble memOut.putByte(PREAMBLE_LONGS_BYTE, (byte) CONST_PREAMBLE_LONGS); //RF not used = 0 memOut.putByte(SER_VER_BYTE, (byte) SER_VER); memOut.putByte(FAMILY_BYTE, (byte) Family.INTERSECTION.getID()); memOut.putByte(LG_NOM_LONGS_BYTE, (byte) 0); //not used memOut.putByte(LG_ARR_LONGS_BYTE, (byte) lgArrLongs_); if (empty_) { memOut.setBits(FLAGS_BYTE, (byte) EMPTY_FLAG_MASK); } else { memOut.clearBits(FLAGS_BYTE, (byte) EMPTY_FLAG_MASK); } memOut.putShort(SEED_HASH_SHORT, seedHash_); memOut.putInt(RETAINED_ENTRIES_INT, curCount_); memOut.putFloat(P_FLOAT, (float) 1.0); memOut.putLong(THETA_LONG, thetaLong_); //data if (curCount_ > 0) { memOut.putLongArray(preBytes, hashTable_, 0, 1 << lgArrLongs_); } } return byteArrOut; } @Override public void update(final Sketch sketchIn) { throw new SketchesReadOnlyException(); } @Override public CompactSketch intersect(final Sketch a, final Sketch b, final boolean dstOrdered, final WritableMemory dstMem) { throw new SketchesReadOnlyException(); } //restricted @Override long[] getCache() { if (mem_ == null) { return (hashTable_ != null) ? hashTable_ : new long[0]; } //Direct final int arrLongs = 1 << lgArrLongs_; final long[] outArr = new long[arrLongs]; mem_.getLongArray(CONST_PREAMBLE_LONGS << 3, outArr, 0, arrLongs); return outArr; } @Override short getSeedHash() { return seedHash_; } @Override long getThetaLong() { return thetaLong_; } /** * Returns the correct maximum lgArrLongs given the capacity of the Memory. Checks that the * capacity is large enough for the minimum sized hash table. * @param dstMem the given Memory * @return the correct maximum lgArrLongs given the capacity of the Memory */ static final int checkMaxLgArrLongs(final Memory dstMem) { final int preBytes = CONST_PREAMBLE_LONGS << 3; final long cap = dstMem.getCapacity(); final int maxLgArrLongs = Integer.numberOfTrailingZeros(floorPowerOf2((int)(cap - preBytes)) >>> 3); if (maxLgArrLongs < MIN_LG_ARR_LONGS) { throw new SketchesArgumentException( "dstMem not large enough for minimum sized hash table: " + cap); } return maxLgArrLongs; } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy