org.apache.datasketches.theta.SetOperation Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.datasketches.theta;
import static java.lang.Math.max;
import static org.apache.datasketches.Family.idToFamily;
import static org.apache.datasketches.Util.DEFAULT_UPDATE_SEED;
import static org.apache.datasketches.Util.MIN_LG_ARR_LONGS;
import static org.apache.datasketches.Util.REBUILD_THRESHOLD;
import static org.apache.datasketches.Util.ceilingPowerOf2;
import static org.apache.datasketches.theta.PreambleUtil.FAMILY_BYTE;
import static org.apache.datasketches.theta.PreambleUtil.SER_VER_BYTE;
import static org.apache.datasketches.theta.Sketch.emptyFromCountAndTheta;
import static org.apache.datasketches.theta.Sketch.thetaOnCompact;
import org.apache.datasketches.Family;
import org.apache.datasketches.SketchesArgumentException;
import org.apache.datasketches.Util;
import org.apache.datasketches.memory.Memory;
import org.apache.datasketches.memory.WritableMemory;
/**
* The parent API for all Set Operations
*
* @author Lee Rhodes
*/
public abstract class SetOperation {
static final int CONST_PREAMBLE_LONGS = 3;
SetOperation() {}
/**
* Makes a new builder
*
* @return a new builder
*/
public static final SetOperationBuilder builder() {
return new SetOperationBuilder();
}
/**
* Heapify takes the SetOperations image in Memory and instantiates an on-heap
* SetOperation using the
* Default Update Seed.
* The resulting SetOperation will not retain any link to the source Memory.
* @param srcMem an image of a SetOperation where the image seed hash matches the default seed hash.
* See Memory
* @return a Heap-based SetOperation from the given Memory
*/
public static SetOperation heapify(final Memory srcMem) {
return heapify(srcMem, DEFAULT_UPDATE_SEED);
}
/**
* Heapify takes the SetOperation image in Memory and instantiates an on-heap
* SetOperation using the given seed.
* The resulting SetOperation will not retain any link to the source Memory.
* @param srcMem an image of a SetOperation where the hash of the given seed matches the image seed hash.
* See Memory
* @param seed See Update Hash Seed.
* @return a Heap-based SetOperation from the given Memory
*/
public static SetOperation heapify(final Memory srcMem, final long seed) {
final byte famID = srcMem.getByte(FAMILY_BYTE);
final Family family = idToFamily(famID);
switch (family) {
case UNION : {
return UnionImpl.heapifyInstance(srcMem, seed);
}
case INTERSECTION : {
return IntersectionImpl.heapifyInstance(srcMem, seed);
}
default: {
throw new SketchesArgumentException("SetOperation cannot heapify family: "
+ family.toString());
}
}
}
/**
* Wrap takes the SetOperation image in Memory and refers to it directly.
* There is no data copying onto the java heap.
* Only "Direct" SetOperations that have been explicitly stored as direct can be wrapped.
* This method assumes the
* Default Update Seed.
* @param srcMem an image of a SetOperation where the image seed hash matches the default seed hash.
* See Memory
* @return a SetOperation backed by the given Memory
*/
public static SetOperation wrap(final Memory srcMem) {
return wrap(srcMem, DEFAULT_UPDATE_SEED);
}
/**
* Wrap takes the SetOperation image in Memory and refers to it directly.
* There is no data copying onto the java heap.
* Only "Direct" SetOperations that have been explicitly stored as direct can be wrapped.
* @param srcMem an image of a SetOperation where the hash of the given seed matches the image seed hash.
* See Memory
* @param seed See Update Hash Seed.
* @return a SetOperation backed by the given Memory
*/
public static SetOperation wrap(final Memory srcMem, final long seed) {
final byte famID = srcMem.getByte(FAMILY_BYTE);
final Family family = idToFamily(famID);
final int serVer = srcMem.getByte(SER_VER_BYTE);
if (serVer != 3) {
throw new SketchesArgumentException("SerVer must be 3: " + serVer);
}
switch (family) {
case UNION : {
return UnionImpl.wrapInstance(srcMem, seed);
}
case INTERSECTION : {
return IntersectionImplR.wrapInstance(srcMem, seed);
}
default:
throw new SketchesArgumentException("SetOperation cannot wrap family: " + family.toString());
}
}
/**
* Wrap takes the SetOperation image in Memory and refers to it directly.
* There is no data copying onto the java heap.
* Only "Direct" SetOperations that have been explicitly stored as direct can be wrapped.
* This method assumes the
* Default Update Seed.
* @param srcMem an image of a SetOperation where the image seed hash matches the default seed hash.
* See Memory
* @return a SetOperation backed by the given Memory
*/
public static SetOperation wrap(final WritableMemory srcMem) {
return wrap(srcMem, DEFAULT_UPDATE_SEED);
}
/**
* Wrap takes the SetOperation image in Memory and refers to it directly.
* There is no data copying onto the java heap.
* Only "Direct" SetOperations that have been explicitly stored as direct can be wrapped.
* @param srcMem an image of a SetOperation where the hash of the given seed matches the image seed hash.
* See Memory
* @param seed See Update Hash Seed.
* @return a SetOperation backed by the given Memory
*/
public static SetOperation wrap(final WritableMemory srcMem, final long seed) {
final byte famID = srcMem.getByte(FAMILY_BYTE);
final Family family = idToFamily(famID);
final int serVer = srcMem.getByte(SER_VER_BYTE);
if (serVer != 3) {
throw new SketchesArgumentException("SerVer must be 3: " + serVer);
}
switch (family) {
case UNION : {
return UnionImpl.wrapInstance(srcMem, seed);
}
case INTERSECTION : {
return IntersectionImpl.wrapInstance(srcMem, seed);
}
default:
throw new SketchesArgumentException("SetOperation cannot wrap family: "
+ family.toString());
}
}
/**
* Returns the maximum required storage bytes given a nomEntries parameter for Union operations
* @param nomEntries Nominal Entries
* This will become the ceiling power of 2 if it is not.
* @return the maximum required storage bytes given a nomEntries parameter
*/
public static int getMaxUnionBytes(final int nomEntries) {
final int nomEnt = ceilingPowerOf2(nomEntries);
return (nomEnt << 4) + (Family.UNION.getMaxPreLongs() << 3);
}
/**
* Returns the maximum required storage bytes given a nomEntries parameter for Intersection
* operations
* @param nomEntries Nominal Entries
* This will become the ceiling power of 2 if it is not.
* @return the maximum required storage bytes given a nomEntries parameter
*/
public static int getMaxIntersectionBytes(final int nomEntries) {
final int nomEnt = ceilingPowerOf2(nomEntries);
final int bytes = (nomEnt << 4) + (Family.INTERSECTION.getMaxPreLongs() << 3);
return bytes;
}
/**
* Gets the Family of this SetOperation
* @return the Family of this SetOperation
*/
public abstract Family getFamily();
/**
* Returns true if the backing resource of this is identical with the backing resource
* of that. The capacities must be the same. If this is a region,
* the region offset must also be the same.
* @param that A different non-null object
* @return true if the backing resource of this is the same as the backing resource
* of that.
*/
public abstract boolean isSameResource(Memory that);
//restricted
abstract long[] getCache();
//intentionally not made public because behavior will be confusing to end user.
abstract int getRetainedEntries(boolean valid);
abstract short getSeedHash();
abstract long getThetaLong();
static short computeSeedHash(final long seed) {
return Util.computeSeedHash(seed);
}
//intentionally not made public because behavior will be confusing to end user.
abstract boolean isEmpty();
//used only by the set operations
static final CompactSketch createCompactSketch(final long[] compactCache, boolean empty,
final short seedHash, final int curCount, long thetaLong, final boolean dstOrdered,
final WritableMemory dstMem) {
thetaLong = thetaOnCompact(empty, curCount, thetaLong);
empty = emptyFromCountAndTheta(curCount, thetaLong);
if (empty) {
final EmptyCompactSketch sk = EmptyCompactSketch.getInstance();
if (dstMem != null) {
dstMem.putByteArray(0, sk.toByteArray(), 0, 8);
}
return sk;
}
if ((thetaLong == Long.MAX_VALUE) && (curCount == 1)) {
final SingleItemSketch sis = new SingleItemSketch(compactCache[0], seedHash);
if ((dstMem != null) && (dstMem.getCapacity() >= 16)) {
dstMem.putByteArray(0, sis.toByteArray(), 0, 16);
}
return sis;
}
if (dstMem == null) {
if (dstOrdered) {
return HeapCompactOrderedSketch.compact(compactCache, empty, seedHash, curCount,
thetaLong); //converts to SingleItem format if curCount == 1
} else {
return HeapCompactUnorderedSketch.compact(compactCache, empty, seedHash, curCount,
thetaLong); //converts to SingleItem if curCount == 1
}
} else {
if (dstOrdered) {
return DirectCompactOrderedSketch.compact(compactCache, empty, seedHash, curCount,
thetaLong, dstMem); //converts to SingleItem format if curCount == 1
} else {
return DirectCompactUnorderedSketch.compact(compactCache, empty, seedHash, curCount,
thetaLong, dstMem); //converts to SingleItem format if curCount == 1
}
}
}
/**
* Computes minimum lgArrLongs from a current count.
* @param count the given current count
* @return the minimum lgArrLongs from a current count.
*/
//Used by intersection and AnotB
static final int computeMinLgArrLongsFromCount(final int count) {
final int upperCount = (int) Math.ceil(count / REBUILD_THRESHOLD);
final int arrLongs = max(ceilingPowerOf2(upperCount), 1 << MIN_LG_ARR_LONGS);
final int newLgArrLongs = Integer.numberOfTrailingZeros(arrLongs);
return newLgArrLongs;
}
/**
* Returns true if given Family id is one of the set operations
* @param id the given Family id
* @return true if given Family id is one of the set operations
*/
static boolean isValidSetOpID(final int id) {
final Family family = Family.idToFamily(id);
final boolean ret = ((family == Family.UNION) || (family == Family.INTERSECTION)
|| (family == Family.A_NOT_B));
return ret;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy