All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.datasketches.cpc.PairTable Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.datasketches.cpc;

import static org.apache.datasketches.cpc.RuntimeAsserts.rtAssert;
import static org.apache.datasketches.cpc.RuntimeAsserts.rtAssertEquals;

import java.util.Arrays;

import org.apache.datasketches.SketchesArgumentException;
import org.apache.datasketches.SketchesStateException;

/**
 * Note: Definition of
 * Snow Plow Effect.
 * @author Lee Rhodes
 * @author Kevin Lang
 */
final class PairTable {
  private static final String LS = System.getProperty("line.separator");
  private static final int upsizeNumer = 3;
  private static final int upsizeDenom = 4;
  private static final int downsizeNumer = 1;
  private static final int downsizeDenom = 4;

  private int lgSizeInts;
  private final int validBits;
  private int numPairs;
  private int[] slotsArr;

  PairTable(final int lgSizeInts, final int numValidBits) {
    checkLgSizeInts(lgSizeInts);
    this.lgSizeInts = lgSizeInts;
    final int numSlots = 1 << lgSizeInts;
    validBits = numValidBits;
    numPairs = 0;
    slotsArr = new int[numSlots];
    for (int i = 0; i < numSlots; i++) { slotsArr[i] = -1; }
  }

  //Factory
  static PairTable newInstanceFromPairsArray(final int[] pairs, final int numPairs, final int lgK) {
    int lgNumSlots = 2;
    while ((upsizeDenom * numPairs) > (upsizeNumer * (1 << lgNumSlots))) {
      lgNumSlots++;
    }
    final PairTable table = new PairTable(lgNumSlots, 6 + lgK);

    /* Note: there is a possible
     * Snow Plow Effect here because
     * the caller is passing in a sorted pairs array. However, we are starting out with the correct
     * final table size, so the problem is not likely to occur.
    */

    for (int i = 0; i < numPairs; i++) {
      mustInsert(table, pairs[i]);
    }
    table.numPairs = numPairs;
    return table;
  }

  PairTable clear() {
    Arrays.fill(slotsArr, -1);
    numPairs = 0;
    return this;
  }

  PairTable copy() {
    final PairTable copy = new PairTable(lgSizeInts, validBits);
    copy.numPairs = numPairs;
    copy.slotsArr = slotsArr.clone(); //slotsArr can never be null
    return copy;
  }

  int getLgSizeInts() {
    return lgSizeInts;
  }

  int getNumPairs() {
    return numPairs;
  }

  int[] getSlotsArr() {
    return slotsArr;
  }

  int getValidBits() {
    return validBits;
  }

  /**
   * Rebuilds to a larger size. NumItems and validBits remain unchanged.
   * @param newLgSizeInts the new size
   * @return a larger PairTable
   */
  PairTable rebuild(final int newLgSizeInts) {
    checkLgSizeInts(newLgSizeInts);
    final int newSize = 1 << newLgSizeInts;
    final int oldSize = 1 << lgSizeInts;
    rtAssert(newSize > numPairs);
    final int[] oldSlotsArr = slotsArr;
    slotsArr = new int[newSize];
    Arrays.fill(slotsArr, -1);
    lgSizeInts = newLgSizeInts;
    for (int i = 0; i < oldSize; i++) {
      final int item = oldSlotsArr[i];
      if (item != -1) { mustInsert(this, item); }
    }
    return this;
  }

  @Override
  public String toString() {
    return toString(false);
  }

  private static void mustInsert(final PairTable table, final int item) {
    //SHARED CODE (implemented as a macro in C and expanded here)
    final int lgSizeInts = table.lgSizeInts;
    final int sizeInts = 1 << lgSizeInts;
    final int mask = sizeInts - 1;
    final int shift = table.validBits - lgSizeInts;
    rtAssert(shift > 0);
    int probe = item >>> shift; //extract high tablesize bits
    rtAssert((probe >= 0) && (probe <= mask));
    final int[] arr = table.slotsArr;
    int fetched = arr[probe];
    while ((fetched != item) && (fetched != -1)) {
      probe = (probe + 1) & mask;
      fetched = arr[probe];
    }
    //END SHARED CODE
    if (fetched == item) { throw new SketchesStateException("PairTable mustInsert() failed"); }
    else {
      assert (fetched == -1);
      arr[probe] = item;
      // counts and resizing must be handled by the caller.
    }
  }

  static boolean maybeInsert(final PairTable table, final int item) {
    //SHARED CODE (implemented as a macro in C and expanded here)
    final int lgSizeInts = table.lgSizeInts;
    final int sizeInts = 1 << lgSizeInts;
    final int mask = sizeInts - 1;
    final int shift = table.validBits - lgSizeInts;
    rtAssert(shift > 0);
    int probe = item >>> shift;
    rtAssert((probe >= 0) && (probe <= mask));
    final int[] arr = table.slotsArr;
    int fetched = arr[probe];
    while ((fetched != item) && (fetched != -1)) {
      probe = (probe + 1) & mask;
      fetched = arr[probe];
    }
    //END SHARED CODE
    if (fetched == item) { return false; }
    else {
      assert (fetched == -1);
      arr[probe] = item;
      table.numPairs += 1;
      while ((upsizeDenom * table.numPairs) > (upsizeNumer * (1 << table.lgSizeInts))) {
        table.rebuild(table.lgSizeInts + 1);
      }
      return true;
    }
  }

  static boolean maybeDelete(final PairTable table, final int item) {
    //SHARED CODE (implemented as a macro in C and expanded here)
    final int lgSizeInts = table.lgSizeInts;
    final int sizeInts = 1 << lgSizeInts;
    final int mask = sizeInts - 1;
    final int shift = table.validBits - lgSizeInts;
    rtAssert(shift > 0);
    int probe = item >>> shift;
    rtAssert((probe >= 0) && (probe <= mask));
    final int[] arr = table.slotsArr;
    int fetched = arr[probe];
    while ((fetched != item) && (fetched != -1)) {
      probe = (probe + 1) & mask;
      fetched = arr[probe];
    }
    //END SHARED CODE
    if (fetched == -1) { return false; }
    else {
      assert (fetched == item);
      // delete the item
      arr[probe] = -1;
      table.numPairs -= 1; assert (table.numPairs >= 0);

      // re-insert all items between the freed slot and the next empty slot
      probe = (probe + 1) & mask; fetched = arr[probe];
      while (fetched != -1) {
        arr[probe] = -1;
        mustInsert(table, fetched);
        probe = (probe + 1) & mask; fetched = arr[probe];
      }

      // shrink if necessary
      while (((downsizeDenom * table.numPairs)
              < (downsizeNumer * (1 << table.lgSizeInts))) && (table.lgSizeInts > 2)) {
        table.rebuild(table.lgSizeInts - 1);
      }
      return true;
    }
  }

  /**
   * While extracting the items from a linear probing hashtable,
   * this will usually undo the wrap-around provided that the table
   * isn't too full. Experiments suggest that for sufficiently large tables
   * the load factor would have to be over 90 percent before this would fail frequently,
   * and even then the subsequent sort would fix things up.
   * @param table the given table to unwrap
   * @param numPairs the number of valid pairs in the table
   * @return the unwrapped table
   */
  static int[] unwrappingGetItems(final PairTable table, final int numPairs) {
    if (numPairs < 1) { return null; }
    final int[] slotsArr = table.slotsArr;
    final int tableSize = 1 << table.lgSizeInts;
    final int[] result = new int[numPairs];
    int i = 0;
    int l = 0;
    int r = numPairs - 1;

    // Special rules for the region before the first empty slot.
    final int hiBit = 1 << (table.validBits - 1);
    while ((i < tableSize) && (slotsArr[i] != -1)) {
      final int item = slotsArr[i++];
      if ((item & hiBit) != 0) { result[r--] = item; } // This item was probably wrapped, so move to end.
      else                     { result[l++] = item; }
    }

    // The rest of the table is processed normally.
    while (i < tableSize) {
      final int look = slotsArr[i++];
      if (look != -1) { result[l++] = look; }
    }
    assert l == (r + 1);
    return result;
  }

  /**
   * In applications where the input array is already nearly sorted,
   * insertion sort runs in linear time with a very small constant.
   * This introspective version of insertion sort protects against
   * the quadratic cost of sorting bad input arrays.
   * It keeps track of how much work has been done, and if that exceeds a
   * constant times the array length, it switches to a different sorting algorithm.
   * @param a the array to sort
   * @param l points AT the leftmost element, i.e., inclusive.
   * @param r points AT the rightmost element, i.e., inclusive.
   */
  static void introspectiveInsertionSort(final int[] a, final int l, final int r) {
    final int length = (r - l) + 1;
    long cost = 0;
    final long costLimit = 8L * length;
    for (int i = l + 1; i <= r; i++) {
      int j = i;
      final long v = a[i] & 0XFFFF_FFFFL; //v must be long
      while ((j >= (l + 1)) && (v < ((a[j - 1]) & 0XFFFF_FFFFL))) {
        a[j] = a[j - 1];
        j -= 1;
      }
      a[j] = (int) v;
      cost += (i - j); // distance moved is a measure of work

      if (cost > costLimit) {
        //we need an unsigned sort, but it is linear time and very rarely occurs
        final long[] b = new long[a.length];
        for (int m = 0; m < a.length; m++) { b[m] = a[m] & 0XFFFF_FFFFL; }
        Arrays.sort(b, l, r + 1);
        for (int m = 0; m < a.length; m++) { a[m] = (int) b[m]; }
        // The following sanity check can be used during development
        //int bad = 0;
        //for (int m = l; m < (r - 1); m++) {
        //  final long b1 = a[m] & 0XFFFF_FFFFL;
        //  final long b2 = a[m + 1] & 0XFFFF_FFFFL;
        //  if (b1 > b2) { bad++; }
        //}
        //assert (bad == 0);
        return;
      }
    }
    // The following sanity check can be used during development
    //    int bad = 0;
    //    for (int m = l; m < (r - 1); m++) {
    //      final long b1 = a[m] & 0XFFFF_FFFFL;
    //      final long b2 = a[m + 1] & 0XFFFF_FFFFL;
    //      if (b1 > b2) { bad++; }
    //    }
    //    assert (bad == 0);
  }

  static void merge(
      final int[] arrA, final int startA, final int lengthA, //input
      final int[] arrB, final int startB, final int lengthB, //input
      final int[] arrC, final int startC) { //output

    final int lengthC = lengthA + lengthB;
    final int limA = startA + lengthA;
    final int limB = startB + lengthB;
    final int limC = startC + lengthC;
    int a = startA;
    int b = startB;
    int c = startC;
    for ( ; c < limC ; c++) {
      if      (b >= limB)         { arrC[c] = arrA[a++]; }
      else if (a >= limA)         { arrC[c] = arrB[b++]; }
      else {
        final long aa = arrA[a] & 0XFFFF_FFFFL;
        final long bb = arrB[b] & 0XFFFF_FFFFL;
        if (aa < bb)    { arrC[c] = arrA[a++]; }
        else            { arrC[c] = arrB[b++]; }
      }
    }
    assert (a == limA);
    assert (b == limB);
  }

  static boolean equals(final PairTable a, final PairTable b) {
    if ((a == null) && (b == null)) { return true; }
    if ((a == null) || (b == null)) {
      throw new SketchesArgumentException("PairTable " + ((a == null) ? "a" : "b") + " is null");
    }
    //Note: lgSize array sizes may not be equal, but contents still can be equal
    rtAssertEquals(a.validBits, b.validBits);
    final int numPairsA = a.numPairs;
    final int numPairsB = b.numPairs;
    rtAssertEquals(numPairsA, numPairsB);
    final int[] pairsA = PairTable.unwrappingGetItems(a, numPairsA);
    final int[] pairsB = PairTable.unwrappingGetItems(b, numPairsB);
    PairTable.introspectiveInsertionSort(pairsA, 0, numPairsA - 1);
    PairTable.introspectiveInsertionSort(pairsB, 0, numPairsB - 1);
    rtAssertEquals(pairsA, pairsB);
    return true;
  }

  String toString(final boolean detail) {
    final StringBuilder sb = new StringBuilder();
    final int sizeInts = 1 << lgSizeInts;
    sb.append("PairTable").append(LS);
    sb.append("  Lg Size Ints  : ").append(lgSizeInts).append(LS);
    sb.append("  Size Ints     : ").append(sizeInts).append(LS);
    sb.append("  Valid Bits    : ").append(validBits).append(LS);
    sb.append("  Num Pairs     : ").append(numPairs).append(LS);
    if (detail) {
      sb.append("  DATA (hex) : ").append(LS);
      final String hdr = String.format("%9s %9s %9s %4s", "Index","Word","Row","Col");
      sb.append(hdr).append(LS);
      for (int i = 0; i < sizeInts; i++) {
        final int word = slotsArr[i];
        if (word == -1) { //empty
          final String h = String.format("%9d %9s", i, "--");
          sb.append(h).append(LS);
        } else { //data
          final int row = word >>> 6;
          final int col = word & 0X3F;
          final String wordStr = Long.toHexString(word & 0XFFFF_FFFFL);
          final String data = String.format("%9d %9s %9d %4d", i, wordStr, row, col);
          sb.append(data).append(LS);
        }
      }
    }
    return sb.toString();
  }

  private static void checkLgSizeInts(final int lgSizeInts) {
    if ((lgSizeInts < 2) || (lgSizeInts > 26)) {
      throw new SketchesArgumentException("Illegal LgSizeInts: " + lgSizeInts);
    }
  }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy