org.apache.datasketches.tuple.HeapArrayOfDoublesAnotB Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.datasketches.tuple;
import static org.apache.datasketches.Util.MIN_LG_NOM_LONGS;
import static org.apache.datasketches.Util.REBUILD_THRESHOLD;
import static org.apache.datasketches.Util.ceilingPowerOf2;
import java.util.Arrays;
import org.apache.datasketches.HashOperations;
import org.apache.datasketches.memory.WritableMemory;
/**
* The on-heap implementation of the set difference operation A and not B for
* tuple sketches of type ArrayOfDoubles.
*/
final class HeapArrayOfDoublesAnotB extends ArrayOfDoublesAnotB {
private boolean isEmpty_ = true;
private long theta_ = Long.MAX_VALUE;
private long[] keys_;
private double[] values_;
private int count_;
private final short seedHash_;
private final int numValues_;
/**
* Creates an instance of HeapArrayOfDoublesAnotB given a custom seed
* @param numValues Number of double values to keep for each key.
* @param seed See seed
*/
HeapArrayOfDoublesAnotB(final int numValues, final long seed) {
numValues_ = numValues;
seedHash_ = Util.computeSeedHash(seed);
}
@Override
public void update(final ArrayOfDoublesSketch a, final ArrayOfDoublesSketch b) {
if (a != null) { Util.checkSeedHashes(seedHash_, a.getSeedHash()); }
if (b != null) { Util.checkSeedHashes(seedHash_, b.getSeedHash()); }
if (a != null) { //stays this way even if we end up with no result entries
isEmpty_ = a.isEmpty();
}
final long thetaA = a == null ? Long.MAX_VALUE : a.getThetaLong();
final long thetaB = b == null ? Long.MAX_VALUE : b.getThetaLong();
theta_ = Math.min(thetaA, thetaB);
if ((a == null) || (a.getRetainedEntries() == 0)) { return; }
if ((b == null) || (b.getRetainedEntries() == 0)) {
getNoMatchSetFromSketch(a);
} else {
final long[] hashTable;
hashTable = convertToHashTable(b, theta_);
final int lgHashTableSize = Integer.numberOfTrailingZeros(hashTable.length);
final int noMatchSize = a.getRetainedEntries();
keys_ = new long[noMatchSize];
values_ = new double[noMatchSize * numValues_];
final ArrayOfDoublesSketchIterator it = a.iterator();
while (it.next()) {
if (it.getKey() < theta_) {
final int index = HashOperations.hashSearch(hashTable, lgHashTableSize, it.getKey());
if (index == -1) {
keys_[count_] = it.getKey();
System.arraycopy(it.getValues(), 0, values_, count_ * numValues_, numValues_);
count_++;
}
}
}
}
}
@Override
public ArrayOfDoublesCompactSketch getResult() {
if (count_ == 0) {
return new
HeapArrayOfDoublesCompactSketch(null, null, Long.MAX_VALUE, true, numValues_, seedHash_);
}
final ArrayOfDoublesCompactSketch result = new HeapArrayOfDoublesCompactSketch(
Arrays.copyOfRange(keys_, 0, count_),
Arrays.copyOfRange(values_, 0, count_ * numValues_),
theta_,
isEmpty_,
numValues_,
seedHash_
);
reset();
return result;
}
@Override
public ArrayOfDoublesCompactSketch getResult(final WritableMemory mem) {
if ((mem == null) || (count_ == 0)) { return getResult(); }
final ArrayOfDoublesCompactSketch result = new DirectArrayOfDoublesCompactSketch(
Arrays.copyOfRange(keys_, 0, count_),
Arrays.copyOfRange(values_, 0, count_ * numValues_),
theta_,
isEmpty_,
numValues_,
seedHash_,
mem
);
reset();
return result;
}
private static long[] convertToHashTable(final ArrayOfDoublesSketch sketch, final long theta) {
final int size = Math.max(
ceilingPowerOf2((int) Math.ceil(sketch.getRetainedEntries() / REBUILD_THRESHOLD)),
1 << MIN_LG_NOM_LONGS
);
final long[] hashTable = new long[size];
final ArrayOfDoublesSketchIterator it = sketch.iterator();
final int lgSize = Integer.numberOfTrailingZeros(size);
while (it.next()) {
if (it.getKey() < theta) {
HashOperations.hashInsertOnly(hashTable, lgSize, it.getKey());
}
}
return hashTable;
}
private void reset() {
isEmpty_ = true;
theta_ = Long.MAX_VALUE;
keys_ = null;
values_ = null;
count_ = 0;
}
private void getNoMatchSetFromSketch(final ArrayOfDoublesSketch sketch) {
count_ = sketch.getRetainedEntries();
keys_ = new long[count_];
values_ = new double[count_ * numValues_];
final ArrayOfDoublesSketchIterator it = sketch.iterator();
int i = 0;
while (it.next()) {
keys_[i] = it.getKey();
System.arraycopy(it.getValues(), 0, values_, i * numValues_, numValues_);
i++;
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy