org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesAnotBImpl Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of datasketches-java Show documentation
Show all versions of datasketches-java Show documentation
Core sketch algorithms used alone and by other Java repositories in the DataSketches library.
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.datasketches.tuple.arrayofdoubles;
import static java.lang.Math.min;
import static org.apache.datasketches.common.Util.exactLog2OfLong;
import static org.apache.datasketches.thetacommon.HashOperations.continueCondition;
import static org.apache.datasketches.thetacommon.HashOperations.convertToHashTable;
import static org.apache.datasketches.thetacommon.HashOperations.count;
import static org.apache.datasketches.thetacommon.HashOperations.hashSearch;
import java.util.Arrays;
import org.apache.datasketches.common.SketchesArgumentException;
import org.apache.datasketches.common.SuppressFBWarnings;
import org.apache.datasketches.memory.WritableMemory;
import org.apache.datasketches.thetacommon.SetOperationCornerCases;
import org.apache.datasketches.thetacommon.SetOperationCornerCases.AnotbAction;
import org.apache.datasketches.thetacommon.SetOperationCornerCases.CornerCase;
import org.apache.datasketches.thetacommon.ThetaUtil;
import org.apache.datasketches.tuple.Util;
/**
* Computes a set difference, A-AND-NOT-B, of two ArrayOfDoublesSketches.
*
* This class includes a stateless operation as follows:
*
*
* CompactSketch csk = anotb.aNotB(ArrayOfDoublesSketch skA, ArrayOfDoublesSketch skB);
*
*
* @author Lee Rhodes
*/
public class ArrayOfDoublesAnotBImpl extends ArrayOfDoublesAnotB {
private int numValues_;
private short seedHash_;
private long thetaLong_ = Long.MAX_VALUE;
private boolean empty_ = true;
private long[] keys_;
private double[] values_;
private int count_;
ArrayOfDoublesAnotBImpl(final int numValues, final long seed) {
numValues_ = numValues;
seedHash_ = Util.computeSeedHash(seed);
}
@Override
@SuppressFBWarnings(value = "EI_EXPOSE_REP2", justification = "This is OK here")
public void update(final ArrayOfDoublesSketch skA, final ArrayOfDoublesSketch skB) {
if (skA == null || skB == null) {
throw new SketchesArgumentException("Neither argument may be null.");
}
numValues_ = skA.getNumValues();
seedHash_ = skA.getSeedHash();
if (numValues_ != skB.getNumValues()) {
throw new SketchesArgumentException("Inputs cannot have different numValues");
}
if (seedHash_ != skB.getSeedHash()) {
throw new SketchesArgumentException("Inputs cannot have different seedHashes");
}
final long thetaLongA = skA.getThetaLong();
final int countA = skA.getRetainedEntries();
final boolean emptyA = skA.isEmpty();
final long thetaLongB = skB.getThetaLong();
final int countB = skB.getRetainedEntries();
final boolean emptyB = skB.isEmpty();
final int id =
SetOperationCornerCases.createCornerCaseId(thetaLongA, countA, emptyA, thetaLongB, countB, emptyB);
final CornerCase cCase = CornerCase.caseIdToCornerCase(id);
final AnotbAction anotbAction = cCase.getAnotbAction();
final long minThetaLong = min(thetaLongA, thetaLongB);
switch (anotbAction) {
case EMPTY_1_0_T: {
reset();
break;
}
case DEGEN_MIN_0_F: {
keys_ = null;
values_ = null;
thetaLong_ = minThetaLong;
empty_ = false;
count_ = 0;
break;
}
case DEGEN_THA_0_F: {
keys_ = null;
values_ = null;
thetaLong_ = thetaLongA;
empty_ = false;
count_ = 0;
break;
}
case TRIM_A: {
final DataArrays daA = new DataArrays(skA.getKeys(), skA.getValuesAsOneDimension(), countA);
final DataArrays da = trimDataArrays(daA, minThetaLong, numValues_);
keys_ = da.hashArr;
values_ = da.valuesArr;
thetaLong_ = minThetaLong;
empty_ = skA.isEmpty();
count_ = da.count;
break;
}
case SKETCH_A: {
final ArrayOfDoublesCompactSketch csk = skA.compact();
keys_ = csk.getKeys();
values_ = csk.getValuesAsOneDimension();
thetaLong_ = csk.thetaLong_;
empty_ = csk.isEmpty();
count_ = csk.getRetainedEntries();
break;
}
case FULL_ANOTB: { //both A and B should have valid entries.
final long[] keysA = skA.getKeys();
final double[] valuesA = skA.getValuesAsOneDimension();
final DataArrays daR = getResultArrays(minThetaLong, countA, keysA, valuesA, skB);
count_ = daR.count;
keys_ = (count_ == 0) ? null : daR.hashArr;
values_ = (count_ == 0) ? null : daR.valuesArr;
thetaLong_ = minThetaLong;
empty_ = (minThetaLong == Long.MAX_VALUE) && (count_ == 0);
break;
}
//default: not possible
}
}
@Override
public ArrayOfDoublesCompactSketch getResult() {
return new HeapArrayOfDoublesCompactSketch(keys_, values_, thetaLong_, empty_, numValues_, seedHash_);
}
@Override
public ArrayOfDoublesCompactSketch getResult(final WritableMemory dstMem) {
return new DirectArrayOfDoublesCompactSketch(keys_, values_, thetaLong_, empty_, numValues_, seedHash_, dstMem);
}
private static DataArrays getResultArrays(
final long minThetaLong,
final int countA,
final long[] hashArrA,
final double[] valuesArrA,
final ArrayOfDoublesSketch skB) {
final int numValues = skB.numValues_;
//create hashtable of skB
final long[] hashTableB = convertToHashTable(skB.getKeys(), skB.getRetainedEntries(), minThetaLong,
ThetaUtil.REBUILD_THRESHOLD);
//build temporary arrays of skA
long[] tmpHashArrA = new long[countA];
double[] tmpValuesArrA = new double[countA * numValues];
//search for non matches and build temp arrays
final int lgHTBLen = exactLog2OfLong(hashTableB.length);
int nonMatches = 0;
for (int i = 0; i < countA; i++) {
final long hash = hashArrA[i];
if (continueCondition(minThetaLong, hash)) { continue; }
final int index = hashSearch(hashTableB, lgHTBLen, hash);
if (index == -1) {
tmpHashArrA[nonMatches] = hash;
System.arraycopy(valuesArrA, i * numValues, tmpValuesArrA, nonMatches * numValues, numValues);
nonMatches++;
}
}
tmpHashArrA = Arrays.copyOf(tmpHashArrA, nonMatches);
tmpValuesArrA = Arrays.copyOf(tmpValuesArrA, nonMatches * numValues);
final DataArrays daR = new DataArrays(tmpHashArrA, tmpValuesArrA, nonMatches);
return daR;
}
private static class DataArrays {
long[] hashArr;
double[] valuesArr;
int count;
DataArrays(final long[] hashArr, final double[] valuesArr, final int count) {
this.hashArr = hashArr;
this.valuesArr = valuesArr;
this.count = count;
}
}
private static DataArrays trimDataArrays(final DataArrays da, final long thetaLong, final int numValues) {
final long[] hashArrIn = da.hashArr;
final double[] valuesArrIn = da.valuesArr;
final int count = count(hashArrIn, thetaLong);
final long[] hashArrOut = new long[count];
final double[] valuesArrOut = new double[count * numValues];
int haInIdx;
int vaInIdx = 0;
int haOutIdx = 0;
int vaOutIdx = 0;
for (haInIdx = 0; haInIdx < count; haInIdx++, vaInIdx += numValues) {
final long hash = hashArrIn[haInIdx];
if (continueCondition(thetaLong, hash)) { continue; }
hashArrOut[haOutIdx] = hashArrIn[haInIdx];
System.arraycopy(valuesArrIn, vaInIdx, valuesArrOut, vaOutIdx, numValues);
haOutIdx++;
vaOutIdx += numValues;
}
return new DataArrays(hashArrOut, valuesArrOut, count);
}
private void reset() {
empty_ = true;
thetaLong_ = Long.MAX_VALUE;
keys_ = null;
values_ = null;
count_ = 0;
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy