org.apache.datasketches.theta.HeapAnotB Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.datasketches.theta;
import static java.lang.Math.min;
import static org.apache.datasketches.HashOperations.hashArrayInsert;
import static org.apache.datasketches.HashOperations.hashSearch;
import static org.apache.datasketches.theta.CompactSketch.compactCache;
import java.util.Arrays;
import org.apache.datasketches.Util;
import org.apache.datasketches.memory.Memory;
import org.apache.datasketches.memory.WritableMemory;
/**
* @author Lee Rhodes
* @author Kevin Lang
*/
final class HeapAnotB extends AnotB {
private final short seedHash_;
private Sketch a_;
private Sketch b_;
private long thetaLong_;
private boolean empty_;
private long[] cache_; // no match set
private int curCount_;
private int lgArrLongsHT_; //for Hash Table only. may not need to be member after refactoring
private long[] bHashTable_; //may not need to be member after refactoring.
/**
* Construct a new AnotB SetOperation on the java heap. Called by SetOperation.Builder.
*
* @param seed See seed
*/
HeapAnotB(final long seed) {
this(Util.computeSeedHash(seed));
}
/**
* Construct a new AnotB SetOperation on the java heap. Called by PairwiseSetOperation.
*
* @param seedHash 16 bit hash of the chosen update seed.
*/
HeapAnotB(final short seedHash) {
seedHash_ = seedHash;
reset();
}
@Override
public void update(final Sketch a, final Sketch b) {
a_ = a;
b_ = b;
thetaLong_ = Long.MAX_VALUE;
empty_ = true;
cache_ = null;
curCount_ = 0;
lgArrLongsHT_ = 5;
bHashTable_ = null;
compute();
}
@Override
public CompactSketch aNotB(final Sketch a, final Sketch b, final boolean dstOrdered,
final WritableMemory dstMem) {
update(a, b);
return getResult(dstOrdered, dstMem);
}
@Override
public CompactSketch getResult() {
return getResult(true, null);
}
@Override
public CompactSketch getResult(final boolean dstOrdered, final WritableMemory dstMem) {
final long[] compactCache = (curCount_ <= 0)
? new long[0]
: Arrays.copyOfRange(cache_, 0, curCount_);
if (dstOrdered && (curCount_ > 1)) {
Arrays.sort(compactCache);
}
//Create the CompactSketch
final boolean empty = (curCount_ == 0) && (thetaLong_ == Long.MAX_VALUE);
final CompactSketch comp = createCompactSketch(
compactCache, empty, seedHash_, curCount_, thetaLong_, dstOrdered, dstMem);
reset();
return comp;
}
@Override
int getRetainedEntries(final boolean valid) {
return curCount_;
}
@Override
boolean isEmpty() {
return empty_;
}
@Override
public boolean isSameResource(final Memory that) {
return false;
}
//restricted
void compute() {
final int swA = ((a_ == null) || (a_ instanceof EmptyCompactSketch))
? 0
: (a_.isEmpty())
? 1
: (a_ instanceof UpdateSketch)
? 4
: (a_.isOrdered())
? 3
: 2;
final int swB = ((b_ == null) || (b_ instanceof EmptyCompactSketch))
? 0
: (b_.isEmpty())
? 1
: (b_ instanceof UpdateSketch)
? 4
: (b_.isOrdered())
? 3
: 2;
final int sw = (swA * 8) | swB;
// NOTES:
// In the table below, A and B refer to the two input sketches in the order A-not-B.
// The Theta rule: min(ThetaA, ThetaB)
// The Empty rule: Whatever the empty state of A is: E(A)
// The Return triple is defined as: (Theta, Count, EmptyFlag).
// bHashTable temporarily stores the values of B.
// A sketch in stored form can be in one of 5 states.
// Null is not actually a state, but is included for completeness.
// Null is interpreted as {Theta = 1.0, count = 0, empty = true}.
// The empty state may have Theta < 1.0 but it is ignored; count must be zero.
// State:
// 0 N Null
// 1 E Empty
// 2 C Compact, not ordered
// 3 O Compact Ordered
// 4 H Hash-Table
//
//A B swA swB Case Actions
//N N 0 0 0 Return (1.0, 0, T)
//N E 0 1 1 CheckB, Return (1.0, 0, T)
//N C 0 2 2 CheckB, Return (1.0, 0, T)
//N O 0 3 3 CheckB, Return (1.0, 0, T)
//N H 0 4 4 CheckB, Return (1.0, 0, T)
//E N 1 0 8 CheckA, Return (1.0, 0, T)
//E E 1 1 9 CheckAB, Return (1.0, 0, T)
//E C 1 2 10 CheckAB, Return (1.0, 0, T)
//E O 1 3 11 CheckAB, Return (1.0, 0, T)
//E H 1 4 12 CheckAB, Return (1.0, 0, T)
//C N 2 0 16 CheckA, Return (ThA, |A|, F), copyA
//C E 2 1 17 CheckAB, Return (ThA, |A|, F)), copyA
//C C 2 2 18 CheckAB, B -> H; => C,H; scanAllAsearchB()
//C O 2 3 19 CheckAB, B -> H; => C,H; scanAllAsearchB()
//C H 2 4 20 CheckAB, scanAllAsearchB()
//O N 3 0 24 CheckA, Return (ThA, |A|, F), copyA
//O E 3 1 25 CheckAB, Return (ThA, |A|, F), copyA
//O C 3 2 26 CheckAB, B -> H; => O,H; scanEarlyStopAsearchB()
//O O 3 3 27 CheckAB, B -> H; => O,H; scanEarlyStopAsearchB()
//O H 3 4 28 CheckAB, scanEarlyStopAsearchB()
//H N 4 0 32 CheckA, Return (ThA, |A|, F), copyA
//H E 4 1 33 CheckAB, Return (ThA, |A|, F), copyA
//H C 4 2 34 CheckAB, B -> H; => H,H; scanAllAsearchB()
//H O 4 3 35 CheckAB, B -> H; => H,H; scanAllAsearchB()
//H H 4 4 36 CheckAB, scanAllAsearchB()
switch (sw) {
case 0 : //A Null, B Null; Return (1.0, 0, T)
thetaLong_ = Long.MAX_VALUE;
empty_ = true;
break;
case 10: //A Empty, B Compact; CheckAB, Return (1.0, 0, T)
case 11: //A Empty, B Ordered; CheckAB, Return (1.0, 0, T)
case 12: //A Empty, B HashTbl; CheckAB, Return (1.0, 0, T)
Util.checkSeedHashes(seedHash_, a_.getSeedHash());//lgtm [java/dereferenced-value-may-be-null]
//$FALL-THROUGH$
case 1: //A Null, B Empty; CheckB, Return (1.0, 0, T)
case 2: //A Null, B Compact; CheckB, Return (1.0, 0, T)
case 3: //A Null, B Ordered; CheckB, Return (1.0, 0, T)
case 4: //A Null, B HashTbl; CheckB, Return (1.0, 0, T)
Util.checkSeedHashes(seedHash_, b_.getSeedHash());//lgtm [java/dereferenced-value-may-be-null]
thetaLong_ = Long.MAX_VALUE;
empty_ = true;
break;
case 9: //A Empty, B Empty; CheckAB, Return (1.0, 0, T)
Util.checkSeedHashes(seedHash_, b_.getSeedHash());//lgtm [java/dereferenced-value-may-be-null]
//$FALL-THROUGH$
case 8: //A Empty, B Null; CheckA, Return (1.0, 0, T)
Util.checkSeedHashes(seedHash_, a_.getSeedHash());//lgtm [java/dereferenced-value-may-be-null]
thetaLong_ = Long.MAX_VALUE;
empty_ = true;
break;
case 17: //A Compact, B Empty; CheckAB, Return (ThA, |A|, F), copyA
case 25: //A Ordered, B Empty; CheckAB, Return (ThA, |A|, F), copyA
case 33: //A HashTbl, B Empty; CheckAB, Return (ThA, |A|, F), copyA
Util.checkSeedHashes(seedHash_, b_.getSeedHash());//lgtm [java/dereferenced-value-may-be-null]
//$FALL-THROUGH$
case 16: //A Compact, B Null; CheckA, Return (ThA, |A|, F), copyA
case 24: //A Ordered, B Null; CheckA, Return (ThA, |A|, F), copyA
case 32: //A HashTbl, B Null; CheckA, Return (ThA, |A|, F), copyA
Util.checkSeedHashes(seedHash_, a_.getSeedHash());//lgtm [java/dereferenced-value-may-be-null]
thetaLong_ = a_.getThetaLong();
empty_ = false;
curCount_ = a_.getRetainedEntries(true);
cache_ = compactCache(a_.getCache(), curCount_, thetaLong_, false);
break;
case 18: //A Compact, B Compact; CheckAB, B -> H; => C,H; scanAllAsearchB()
case 19: //A Compact, B Ordered; CheckAB, B -> H; => C,H; scanAllAsearchB()
case 34: //A HashTbl, B Compact; CheckAB, B -> H; => H,H; scanAllAsearchB()
case 35: //A HashTbl, B Ordered; CheckAB, B -> H; => H,H; scanAllAsearchB()
Util.checkSeedHashes(seedHash_, a_.getSeedHash());//lgtm [java/dereferenced-value-may-be-null]
Util.checkSeedHashes(seedHash_, b_.getSeedHash());//lgtm [java/dereferenced-value-may-be-null]
thetaLong_ = min(a_.getThetaLong(), b_.getThetaLong());
empty_ = false;
convertBtoHT();
scanAllAsearchB();
break;
case 26: //A Ordered, B Compact; CheckAB, B -> H; => O,H; scanEarlyStopAsearchB()
case 27: //A Ordered, B Ordered; CheckAB, B -> H; => O,H; scanEarlyStopAsearchB()
Util.checkSeedHashes(seedHash_, a_.getSeedHash());//lgtm [java/dereferenced-value-may-be-null]
Util.checkSeedHashes(seedHash_, b_.getSeedHash());//lgtm [java/dereferenced-value-may-be-null]
thetaLong_ = min(a_.getThetaLong(), b_.getThetaLong());
empty_ = false;
convertBtoHT();
scanEarlyStopAsearchB();
break;
case 20: //A Compact, B HashTbl; CheckAB, scanAllAsearchB()
case 36: //A HashTbl, B HashTbl; CheckAB, scanAllAsearchB()
Util.checkSeedHashes(seedHash_, a_.getSeedHash());//lgtm [java/dereferenced-value-may-be-null]
Util.checkSeedHashes(seedHash_, b_.getSeedHash());//lgtm [java/dereferenced-value-may-be-null]
thetaLong_ = min(a_.getThetaLong(), b_.getThetaLong());
empty_ = false;
lgArrLongsHT_ = ((UpdateSketch)b_).getLgArrLongs();
bHashTable_ = b_.getCache();
scanAllAsearchB();
break;
case 28: //A Ordered, B HashTbl; CheckAB, scanEarlyStopAsearchB()
Util.checkSeedHashes(seedHash_, a_.getSeedHash());//lgtm [java/dereferenced-value-may-be-null]
Util.checkSeedHashes(seedHash_, b_.getSeedHash());//lgtm [java/dereferenced-value-may-be-null]
thetaLong_ = min(a_.getThetaLong(), b_.getThetaLong());
empty_ = false;
lgArrLongsHT_ = ((UpdateSketch)b_).getLgArrLongs();
bHashTable_ = b_.getCache();
scanEarlyStopAsearchB();
break;
//default: //This cannot happen and cannot be tested
}
}
private void convertBtoHT() {
final int curCountB = b_.getRetainedEntries(true);
lgArrLongsHT_ = computeMinLgArrLongsFromCount(curCountB);
bHashTable_ = new long[1 << lgArrLongsHT_];
hashArrayInsert(b_.getCache(), bHashTable_, lgArrLongsHT_, thetaLong_);
}
//Sketch A is either unordered compact or hash table
private void scanAllAsearchB() {
final long[] scanAArr = a_.getCache();
final int arrLongsIn = scanAArr.length;
cache_ = new long[arrLongsIn];
for (int i = 0; i < arrLongsIn; i++ ) {
final long hashIn = scanAArr[i];
if ((hashIn <= 0L) || (hashIn >= thetaLong_)) { continue; }
final int foundIdx = hashSearch(bHashTable_, lgArrLongsHT_, hashIn);
if (foundIdx > -1) { continue; }
cache_[curCount_++] = hashIn;
}
}
//Sketch A is ordered compact, which enables early stop
private void scanEarlyStopAsearchB() {
final long[] scanAArr = a_.getCache();
final int arrLongsIn = scanAArr.length;
cache_ = new long[arrLongsIn]; //maybe 2x what is needed, but getRetainedEntries can be slow.
for (int i = 0; i < arrLongsIn; i++ ) {
final long hashIn = scanAArr[i];
if (hashIn <= 0L) { continue; }
if (hashIn >= thetaLong_) {
break; //early stop assumes that hashes in input sketch are ordered!
}
final int foundIdx = hashSearch(bHashTable_, lgArrLongsHT_, hashIn);
if (foundIdx > -1) { continue; }
cache_[curCount_++] = hashIn;
}
}
private void reset() {
a_ = null;
b_ = null;
thetaLong_ = Long.MAX_VALUE;
empty_ = true;
cache_ = null;
curCount_ = 0;
lgArrLongsHT_ = 5;
bHashTable_ = null;
}
@Override
long[] getCache() {
return null;
}
@Override
short getSeedHash() {
return seedHash_;
}
@Override
long getThetaLong() {
return thetaLong_;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy