All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesAnotBImpl Maven / Gradle / Ivy

Go to download

Core sketch algorithms used alone and by other Java repositories in the DataSketches library.

There is a newer version: 6.1.1
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.datasketches.tuple.arrayofdoubles;

import static java.lang.Math.min;
import static org.apache.datasketches.common.Util.exactLog2OfLong;
import static org.apache.datasketches.thetacommon.HashOperations.continueCondition;
import static org.apache.datasketches.thetacommon.HashOperations.convertToHashTable;
import static org.apache.datasketches.thetacommon.HashOperations.count;
import static org.apache.datasketches.thetacommon.HashOperations.hashSearch;

import java.util.Arrays;

import org.apache.datasketches.common.SketchesArgumentException;
import org.apache.datasketches.common.SuppressFBWarnings;
import org.apache.datasketches.memory.WritableMemory;
import org.apache.datasketches.thetacommon.SetOperationCornerCases;
import org.apache.datasketches.thetacommon.SetOperationCornerCases.AnotbAction;
import org.apache.datasketches.thetacommon.SetOperationCornerCases.CornerCase;
import org.apache.datasketches.thetacommon.ThetaUtil;
import org.apache.datasketches.tuple.Util;

/**
 * Computes a set difference, A-AND-NOT-B, of two ArrayOfDoublesSketches.
 *
 * 

This class includes a stateless operation as follows:

* *

 * CompactSketch csk = anotb.aNotB(ArrayOfDoublesSketch skA, ArrayOfDoublesSketch skB);
 * 
* * @author Lee Rhodes */ public class ArrayOfDoublesAnotBImpl extends ArrayOfDoublesAnotB { private int numValues_; private short seedHash_; private long thetaLong_ = Long.MAX_VALUE; private boolean empty_ = true; private long[] keys_; private double[] values_; private int count_; ArrayOfDoublesAnotBImpl(final int numValues, final long seed) { numValues_ = numValues; seedHash_ = Util.computeSeedHash(seed); } @Override @SuppressFBWarnings(value = "EI_EXPOSE_REP2", justification = "This is OK here") public void update(final ArrayOfDoublesSketch skA, final ArrayOfDoublesSketch skB) { if (skA == null || skB == null) { throw new SketchesArgumentException("Neither argument may be null."); } numValues_ = skA.getNumValues(); seedHash_ = skA.getSeedHash(); if (numValues_ != skB.getNumValues()) { throw new SketchesArgumentException("Inputs cannot have different numValues"); } if (seedHash_ != skB.getSeedHash()) { throw new SketchesArgumentException("Inputs cannot have different seedHashes"); } final long thetaLongA = skA.getThetaLong(); final int countA = skA.getRetainedEntries(); final boolean emptyA = skA.isEmpty(); final long thetaLongB = skB.getThetaLong(); final int countB = skB.getRetainedEntries(); final boolean emptyB = skB.isEmpty(); final int id = SetOperationCornerCases.createCornerCaseId(thetaLongA, countA, emptyA, thetaLongB, countB, emptyB); final CornerCase cCase = CornerCase.caseIdToCornerCase(id); final AnotbAction anotbAction = cCase.getAnotbAction(); final long minThetaLong = min(thetaLongA, thetaLongB); switch (anotbAction) { case EMPTY_1_0_T: { reset(); break; } case DEGEN_MIN_0_F: { keys_ = null; values_ = null; thetaLong_ = minThetaLong; empty_ = false; count_ = 0; break; } case DEGEN_THA_0_F: { keys_ = null; values_ = null; thetaLong_ = thetaLongA; empty_ = false; count_ = 0; break; } case TRIM_A: { final DataArrays daA = new DataArrays(skA.getKeys(), skA.getValuesAsOneDimension(), countA); final DataArrays da = trimDataArrays(daA, minThetaLong, numValues_); keys_ = da.hashArr; values_ = da.valuesArr; thetaLong_ = minThetaLong; empty_ = skA.isEmpty(); count_ = da.count; break; } case SKETCH_A: { final ArrayOfDoublesCompactSketch csk = skA.compact(); keys_ = csk.getKeys(); values_ = csk.getValuesAsOneDimension(); thetaLong_ = csk.thetaLong_; empty_ = csk.isEmpty(); count_ = csk.getRetainedEntries(); break; } case FULL_ANOTB: { //both A and B should have valid entries. final long[] keysA = skA.getKeys(); final double[] valuesA = skA.getValuesAsOneDimension(); final DataArrays daR = getResultArrays(minThetaLong, countA, keysA, valuesA, skB); count_ = daR.count; keys_ = (count_ == 0) ? null : daR.hashArr; values_ = (count_ == 0) ? null : daR.valuesArr; thetaLong_ = minThetaLong; empty_ = (minThetaLong == Long.MAX_VALUE) && (count_ == 0); break; } //default: not possible } } @Override public ArrayOfDoublesCompactSketch getResult() { return new HeapArrayOfDoublesCompactSketch(keys_, values_, thetaLong_, empty_, numValues_, seedHash_); } @Override public ArrayOfDoublesCompactSketch getResult(final WritableMemory dstMem) { return new DirectArrayOfDoublesCompactSketch(keys_, values_, thetaLong_, empty_, numValues_, seedHash_, dstMem); } private static DataArrays getResultArrays( final long minThetaLong, final int countA, final long[] hashArrA, final double[] valuesArrA, final ArrayOfDoublesSketch skB) { final int numValues = skB.numValues_; //create hashtable of skB final long[] hashTableB = convertToHashTable(skB.getKeys(), skB.getRetainedEntries(), minThetaLong, ThetaUtil.REBUILD_THRESHOLD); //build temporary arrays of skA long[] tmpHashArrA = new long[countA]; double[] tmpValuesArrA = new double[countA * numValues]; //search for non matches and build temp arrays final int lgHTBLen = exactLog2OfLong(hashTableB.length); int nonMatches = 0; for (int i = 0; i < countA; i++) { final long hash = hashArrA[i]; if (continueCondition(minThetaLong, hash)) { continue; } final int index = hashSearch(hashTableB, lgHTBLen, hash); if (index == -1) { tmpHashArrA[nonMatches] = hash; System.arraycopy(valuesArrA, i * numValues, tmpValuesArrA, nonMatches * numValues, numValues); nonMatches++; } } tmpHashArrA = Arrays.copyOf(tmpHashArrA, nonMatches); tmpValuesArrA = Arrays.copyOf(tmpValuesArrA, nonMatches * numValues); final DataArrays daR = new DataArrays(tmpHashArrA, tmpValuesArrA, nonMatches); return daR; } private static class DataArrays { long[] hashArr; double[] valuesArr; int count; DataArrays(final long[] hashArr, final double[] valuesArr, final int count) { this.hashArr = hashArr; this.valuesArr = valuesArr; this.count = count; } } private static DataArrays trimDataArrays(final DataArrays da, final long thetaLong, final int numValues) { final long[] hashArrIn = da.hashArr; final double[] valuesArrIn = da.valuesArr; final int count = count(hashArrIn, thetaLong); final long[] hashArrOut = new long[count]; final double[] valuesArrOut = new double[count * numValues]; int haInIdx; int vaInIdx = 0; int haOutIdx = 0; int vaOutIdx = 0; for (haInIdx = 0; haInIdx < count; haInIdx++, vaInIdx += numValues) { final long hash = hashArrIn[haInIdx]; if (continueCondition(thetaLong, hash)) { continue; } hashArrOut[haOutIdx] = hashArrIn[haInIdx]; System.arraycopy(valuesArrIn, vaInIdx, valuesArrOut, vaOutIdx, numValues); haOutIdx++; vaOutIdx += numValues; } return new DataArrays(hashArrOut, valuesArrOut, count); } private void reset() { empty_ = true; thetaLong_ = Long.MAX_VALUE; keys_ = null; values_ = null; count_ = 0; } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy