All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.datasketches.tuple.HeapArrayOfDoublesAnotB Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.datasketches.tuple;

import static org.apache.datasketches.Util.MIN_LG_NOM_LONGS;
import static org.apache.datasketches.Util.REBUILD_THRESHOLD;
import static org.apache.datasketches.Util.ceilingPowerOf2;

import java.util.Arrays;

import org.apache.datasketches.HashOperations;
import org.apache.datasketches.memory.WritableMemory;

/**
 * The on-heap implementation of the set difference operation A and not B for
 * tuple sketches of type ArrayOfDoubles.
 */
final class HeapArrayOfDoublesAnotB extends ArrayOfDoublesAnotB {

  private boolean isEmpty_ = true;
  private long theta_ = Long.MAX_VALUE;
  private long[] keys_;
  private double[] values_;
  private int count_;
  private final short seedHash_;
  private final int numValues_;

  /**
   * Creates an instance of HeapArrayOfDoublesAnotB given a custom seed
   * @param numValues Number of double values to keep for each key.
   * @param seed See seed
   */
  HeapArrayOfDoublesAnotB(final int numValues, final long seed) {
    numValues_ = numValues;
    seedHash_ = Util.computeSeedHash(seed);
  }

  @Override
  public void update(final ArrayOfDoublesSketch a, final ArrayOfDoublesSketch b) {
    if (a != null) { Util.checkSeedHashes(seedHash_, a.getSeedHash()); }
    if (b != null) { Util.checkSeedHashes(seedHash_, b.getSeedHash()); }
    if (a != null) { //stays this way even if we end up with no result entries
      isEmpty_ = a.isEmpty();
    }
    final long thetaA = a == null ? Long.MAX_VALUE : a.getThetaLong();
    final long thetaB = b == null ? Long.MAX_VALUE : b.getThetaLong();
    theta_ = Math.min(thetaA, thetaB);
    if ((a == null) || (a.getRetainedEntries() == 0)) { return; }
    if ((b == null) || (b.getRetainedEntries() == 0)) {
      getNoMatchSetFromSketch(a);
    } else {
      final long[] hashTable;
      hashTable = convertToHashTable(b, theta_);
      final int lgHashTableSize = Integer.numberOfTrailingZeros(hashTable.length);
      final int noMatchSize = a.getRetainedEntries();
      keys_ = new long[noMatchSize];
      values_ = new double[noMatchSize * numValues_];
      final ArrayOfDoublesSketchIterator it = a.iterator();
      while (it.next()) {
        if (it.getKey() < theta_) {
          final int index = HashOperations.hashSearch(hashTable, lgHashTableSize, it.getKey());
          if (index == -1) {
            keys_[count_] = it.getKey();
            System.arraycopy(it.getValues(), 0, values_, count_ * numValues_, numValues_);
            count_++;
          }
        }
      }
    }
  }

  @Override
  public ArrayOfDoublesCompactSketch getResult() {
    if (count_ == 0) {
      return new
        HeapArrayOfDoublesCompactSketch(null, null, Long.MAX_VALUE, true, numValues_, seedHash_);
    }
    final ArrayOfDoublesCompactSketch result = new HeapArrayOfDoublesCompactSketch(
      Arrays.copyOfRange(keys_, 0, count_),
      Arrays.copyOfRange(values_, 0, count_ * numValues_),
      theta_,
      isEmpty_,
      numValues_,
      seedHash_
    );
    reset();
    return result;
  }

  @Override
  public ArrayOfDoublesCompactSketch getResult(final WritableMemory mem) {
    if ((mem == null) || (count_ == 0)) { return getResult(); }
    final ArrayOfDoublesCompactSketch result = new DirectArrayOfDoublesCompactSketch(
      Arrays.copyOfRange(keys_, 0, count_),
      Arrays.copyOfRange(values_, 0, count_ * numValues_),
      theta_,
      isEmpty_,
      numValues_,
      seedHash_,
      mem
    );
    reset();
    return result;
  }

  private static long[] convertToHashTable(final ArrayOfDoublesSketch sketch, final long theta) {
    final int size = Math.max(
      ceilingPowerOf2((int) Math.ceil(sketch.getRetainedEntries() / REBUILD_THRESHOLD)),
      1 << MIN_LG_NOM_LONGS
    );
    final long[] hashTable = new long[size];
    final ArrayOfDoublesSketchIterator it = sketch.iterator();
    final int lgSize = Integer.numberOfTrailingZeros(size);
    while (it.next()) {
      if (it.getKey() < theta) {
        HashOperations.hashInsertOnly(hashTable, lgSize, it.getKey());
      }
    }
    return hashTable;
  }

  private void reset() {
    isEmpty_ = true;
    theta_ = Long.MAX_VALUE;
    keys_ = null;
    values_ = null;
    count_ = 0;
  }

  private void getNoMatchSetFromSketch(final ArrayOfDoublesSketch sketch) {
    count_ = sketch.getRetainedEntries();
    keys_ = new long[count_];
    values_ = new double[count_ * numValues_];
    final ArrayOfDoublesSketchIterator it = sketch.iterator();
    int i = 0;
    while (it.next()) {
      keys_[i] = it.getKey();
      System.arraycopy(it.getValues(), 0, values_, i * numValues_, numValues_);
      i++;
    }
  }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy