All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.lucene.util.FrequencyTrackingRingBuffer Maven / Gradle / Ivy

There is a newer version: 4.0.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.util;


import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;

/**
 * A ring buffer that tracks the frequency of the integers that it contains.
 * This is typically useful to track the hash codes of popular recently-used
 * items.
 *
 * This data-structure requires 22 bytes per entry on average (between 16 and
 * 28).
 *
 * @lucene.internal
 */
public final class FrequencyTrackingRingBuffer implements Accountable {

  private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(FrequencyTrackingRingBuffer.class);

  private final int maxSize;
  private final int[] buffer;
  private int position;
  private final IntBag frequencies;

  /** Create a new ring buffer that will contain at most maxSize items.
   *  This buffer will initially contain maxSize times the
   *  sentinel value. */
  public FrequencyTrackingRingBuffer(int maxSize, int sentinel) {
    if (maxSize < 2) {
      throw new IllegalArgumentException("maxSize must be at least 2");
    }
    this.maxSize = maxSize;
    buffer = new int[maxSize];
    position = 0;
    frequencies = new IntBag(maxSize);

    Arrays.fill(buffer, sentinel);
    for (int i = 0; i < maxSize; ++i) {
      frequencies.add(sentinel);
    }
    assert frequencies.frequency(sentinel) == maxSize;
  }

  @Override
  public long ramBytesUsed() {
    return BASE_RAM_BYTES_USED
        + frequencies.ramBytesUsed()
        + RamUsageEstimator.sizeOf(buffer);
  }

  /**
   * Add a new item to this ring buffer, potentially removing the oldest
   * entry from this buffer if it is already full.
   */
  public void add(int i) {
    // remove the previous value
    final int removed = buffer[position];
    final boolean removedFromBag = frequencies.remove(removed);
    assert removedFromBag;
    // add the new value
    buffer[position] = i;
    frequencies.add(i);
    // increment the position
    position += 1;
    if (position == maxSize) {
      position = 0;
    }
  }

  /**
   * Returns the frequency of the provided key in the ring buffer.
   */
  public int frequency(int key) {
    return frequencies.frequency(key);
  }

  // pkg-private for testing
  Map asFrequencyMap() {
    return frequencies.asMap();
  }

  /**
   * A bag of integers.
   * Since in the context of the ring buffer the maximum size is known up-front
   * there is no need to worry about resizing the underlying storage.
   */
  private static class IntBag implements Accountable {

    private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(IntBag.class);

    private final int[] keys;
    private final int[] freqs;
    private final int mask;

    IntBag(int maxSize) {
      // load factor of 2/3
      int capacity = Math.max(2, maxSize * 3 / 2);
      // round up to the next power of two
      capacity = Integer.highestOneBit(capacity - 1) << 1;
      assert capacity > maxSize;
      keys = new int[capacity];
      freqs = new int[capacity];
      mask = capacity - 1;
    }

    @Override
    public long ramBytesUsed() {
      return BASE_RAM_BYTES_USED
          + RamUsageEstimator.sizeOf(keys)
          + RamUsageEstimator.sizeOf(freqs);
    }

    /** Return the frequency of the give key in the bag. */
    int frequency(int key) {
      for (int slot = key & mask; ; slot = (slot + 1) & mask) {
        if (keys[slot] == key) {
          return freqs[slot];
        } else if (freqs[slot] == 0) {
          return 0;
        }
      }
    }

    /** Increment the frequency of the given key by 1 and return its new frequency. */
    int add(int key) {
      for (int slot = key & mask; ; slot = (slot + 1) & mask) {
        if (freqs[slot] == 0) {
          keys[slot] = key;
          return freqs[slot] = 1;
        } else if (keys[slot] == key) {
          return ++freqs[slot];
        }
      }
    }

    /** Decrement the frequency of the given key by one, or do nothing if the
     *  key is not present in the bag. Returns true iff the key was contained
     *  in the bag. */
    boolean remove(int key) {
      for (int slot = key & mask; ; slot = (slot + 1) & mask) {
        if (freqs[slot] == 0) {
          // no such key in the bag
          return false;
        } else if (keys[slot] == key) {
          final int newFreq = --freqs[slot];
          if (newFreq == 0) { // removed
            relocateAdjacentKeys(slot);
          }
          return true;
        }
      }
    }

    private void relocateAdjacentKeys(int freeSlot) {
      for (int slot = (freeSlot + 1) & mask; ; slot = (slot + 1) & mask) {
        final int freq = freqs[slot];
        if (freq == 0) {
          // end of the collision chain, we're done
          break;
        }
        final int key = keys[slot];
        // the slot where key should be if there were no collisions
        final int expectedSlot = key & mask;
        // if the free slot is between the expected slot and the slot where the
        // key is, then we can relocate there
        if (between(expectedSlot, slot, freeSlot)) {
          keys[freeSlot] = key;
          freqs[freeSlot] = freq;
          // slot is the new free slot
          freqs[slot] = 0;
          freeSlot = slot;
        }
      }
    }

    /** Given a chain of occupied slots between chainStart
     *  and chainEnd, return whether slot is
     *  between the start and end of the chain. */
    private static boolean between(int chainStart, int chainEnd, int slot) {
      if (chainStart <= chainEnd) {
        return chainStart <= slot && slot <= chainEnd;
      } else {
        // the chain is across the end of the array
        return slot >= chainStart || slot <= chainEnd;
      }
    }

    Map asMap() {
      Map map = new HashMap<>();
      for (int i = 0; i < keys.length; ++i) {
        if (freqs[i] > 0) {
          map.put(keys[i], freqs[i]);
        }
      }
      return map;
    }

  }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy