All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.roaringbitmap.buffer.MutableRoaringArray Maven / Gradle / Ivy

The newest version!
/*
 * (c) the authors Licensed under the Apache License, Version 2.0.
 */

package org.roaringbitmap.buffer;


import java.io.*;
import java.nio.LongBuffer;
import java.nio.ShortBuffer;
import java.util.Arrays;
import java.util.NoSuchElementException;

import org.roaringbitmap.AppendableStorage;
import org.roaringbitmap.InvalidRoaringFormat;
import org.roaringbitmap.Util;

import static org.roaringbitmap.buffer.BufferUtil.compareUnsigned;
import static org.roaringbitmap.buffer.BufferUtil.toIntUnsigned;


/**
 * Specialized array to store the containers used by a RoaringBitmap. This class is similar to
 * org.roaringbitmap.RoaringArray but meant to be used with memory mapping. This is not meant to be
 * used by end users.
 *
 * Objects of this class reside in RAM.
 */
public final class MutableRoaringArray implements Cloneable, Externalizable, PointableRoaringArray,
        AppendableStorage {

  protected static final int INITIAL_CAPACITY = 4;

  protected static final short SERIAL_COOKIE_NO_RUNCONTAINER = 12346;
  protected static final short SERIAL_COOKIE = 12347;

  protected static final int NO_OFFSET_THRESHOLD = 4;

  private static final long serialVersionUID = 5L; // TODO: OFK was 4L, not sure


  short[] keys = null;
  MappeableContainer[] values = null;

  int size = 0;

  protected MutableRoaringArray() {
    this(INITIAL_CAPACITY);
  }

  public MutableRoaringArray(int initialCapacity) {
    this(new short[initialCapacity], new MappeableContainer[initialCapacity], 0);
  }

  MutableRoaringArray(short[] keys, MappeableContainer[] values, int size) {
    this.keys = keys;
    this.values = values;
    this.size = size;
  }


  @Override
  public int advanceUntil(short x, int pos) {
    int lower = pos + 1;

    // special handling for a possibly common sequential case
    if (lower >= size || toIntUnsigned(keys[lower]) >= toIntUnsigned(x)) {
      return lower;
    }

    int spansize = 1; // could set larger
    // bootstrap an upper limit

    while (lower + spansize < size
        && toIntUnsigned(keys[lower + spansize]) < toIntUnsigned(x)) {
      spansize *= 2; // hoping for compiler will reduce to shift
    }
    int upper = (lower + spansize < size) ? lower + spansize : size - 1;

    // maybe we are lucky (could be common case when the seek ahead
    // expected to be small and sequential will otherwise make us look bad)
    if (keys[upper] == x) {
      return upper;
    }

    if (toIntUnsigned(keys[upper]) < toIntUnsigned(x)) {// means array has no
                                                                              // item key >= x
      return size;
    }

    // we know that the next-smallest span was too small
    lower += (spansize / 2);

    // else begin binary search
    // invariant: array[lower]x
    while (lower + 1 != upper) {
      int mid = (lower + upper) / 2;
      if (keys[mid] == x) {
        return mid;
      } else if (toIntUnsigned(keys[mid]) < toIntUnsigned(x)) {
        lower = mid;
      } else {
        upper = mid;
      }
    }
    return upper;
  }

  @Override
  public void append(short key, MappeableContainer value) {
    if (size > 0 && compareUnsigned(key, keys[size - 1]) < 0) {
      throw new IllegalArgumentException("append only: " + toIntUnsigned(key)
              + " < " + toIntUnsigned(keys[size - 1]));
    }
    extendArray(1);
    this.keys[this.size] = key;
    this.values[this.size] = value;
    this.size++;
  }

  void append(MutableRoaringArray appendage) {
    assert size == 0 || appendage.size == 0
            || compareUnsigned(keys[size - 1], appendage.keys[0]) < 0;
    if (appendage.size != 0 && size != 0) {
      keys = Arrays.copyOf(keys, size + appendage.size);
      values = Arrays.copyOf(values, size + appendage.size);
      System.arraycopy(appendage.keys, 0, keys, size, appendage.size);
      System.arraycopy(appendage.values, 0, values, size, appendage.size);
      size += appendage.size;
    } else if (size == 0 && appendage.size != 0) {
      keys = Arrays.copyOf(appendage.keys, appendage.keys.length);
      values = Arrays.copyOf(appendage.values, appendage.values.length);
      size = appendage.size;
    }
  }

  /**
   * Append copies of the values AFTER a specified key (may or may not be present) to end.
   *
   * @param highLowContainer the other array
   * @param beforeStart given key is the largest key that we won't copy
   */
  protected void appendCopiesAfter(PointableRoaringArray highLowContainer, short beforeStart) {

    int startLocation = highLowContainer.getIndex(beforeStart);
    if (startLocation >= 0) {
      startLocation++;
    } else {
      startLocation = -startLocation - 1;
    }
    extendArray(highLowContainer.size() - startLocation);

    for (int i = startLocation; i < highLowContainer.size(); ++i) {
      this.keys[this.size] = highLowContainer.getKeyAtIndex(i);
      this.values[this.size] = highLowContainer.getContainerAtIndex(i).clone();
      this.size++;
    }
  }

  /**
   * Append copies of the values from another array, from the start
   *
   * @param highLowContainer the other array
   * @param stoppingKey any equal or larger key in other array will terminate copying
   */
  protected void appendCopiesUntil(PointableRoaringArray highLowContainer, short stoppingKey) {
    final int stopKey = toIntUnsigned(stoppingKey);
    MappeableContainerPointer cp = highLowContainer.getContainerPointer();
    while (cp.hasContainer()) {
      if (toIntUnsigned(cp.key()) >= stopKey) {
        break;
      }
      extendArray(1);
      this.keys[this.size] = cp.key();
      this.values[this.size] = cp.getContainer().clone();
      this.size++;
      cp.advance();
    }
  }

  /**
   * Append copies of the values from another array
   *
   * @param highLowContainer other array
   * @param startingIndex starting index in the other array
   * @param end last index array in the other array
   */
  protected void appendCopy(PointableRoaringArray highLowContainer, int startingIndex, int end) {
    extendArray(end - startingIndex);
    for (int i = startingIndex; i < end; ++i) {
      this.keys[this.size] = highLowContainer.getKeyAtIndex(i);
      this.values[this.size] = highLowContainer.getContainerAtIndex(i).clone();
      this.size++;
    }
  }

  protected void appendCopy(short key, MappeableContainer value) {
    extendArray(1);
    this.keys[this.size] = key;
    this.values[this.size] = value.clone();
    this.size++;
  }

  private int binarySearch(int begin, int end, short key) {
    return Util.unsignedBinarySearch(keys, begin, end, key);
  }

  protected void clear() {
    this.keys = null;
    this.values = null;
    this.size = 0;
  }

  /**
   * If possible, recover wasted memory.
   */
  public void trim() {
    keys = Arrays.copyOf(keys, size);
    values = Arrays.copyOf(values, size);
    for (MappeableContainer c : values) {
      c.trim();
    }
  }

  @Override
  public MutableRoaringArray clone() {
    MutableRoaringArray sa;
    try {
      sa = (MutableRoaringArray) super.clone();

      // OFK: do we need runcontainer bitmap? Guess not, this is just a directory
      // and each container knows what kind it is.
      sa.keys = Arrays.copyOf(this.keys, this.size);
      sa.values = Arrays.copyOf(this.values, this.size);
      for (int k = 0; k < this.size; ++k) {
        sa.values[k] = sa.values[k].clone();
      }
      sa.size = this.size;
      return sa;

    } catch (CloneNotSupportedException e) {
      return null;
    }
  }

  protected void copyRange(int begin, int end, int newBegin) {
    // assuming begin <= end and newBegin < begin
    final int range = end - begin;
    System.arraycopy(this.keys, begin, this.keys, newBegin, range);
    System.arraycopy(this.values, begin, this.values, newBegin, range);
  }

  /**
   * Deserialize.
   *
   * @param in the DataInput stream
   * @throws IOException Signals that an I/O exception has occurred.
   */
  public void deserialize(DataInput in) throws IOException {
    this.clear();
    // little endian
    final int cookie = Integer.reverseBytes(in.readInt());
    if ((cookie & 0xFFFF) != SERIAL_COOKIE && cookie != SERIAL_COOKIE_NO_RUNCONTAINER) {
      throw new InvalidRoaringFormat("I failed to find a valid cookie.");
    }
    this.size = ((cookie & 0xFFFF) == SERIAL_COOKIE) ? (cookie >>> 16) + 1
        : Integer.reverseBytes(in.readInt());
    // logically we cannot have more than (1<<16) containers.
    if(this.size > (1<<16)) {
      throw new InvalidRoaringFormat("Size too large");
    }
    if ((this.keys == null) || (this.keys.length < this.size)) {
      this.keys = new short[this.size];
      this.values = new MappeableContainer[this.size];
    }

    byte[] bitmapOfRunContainers = null;
    boolean hasrun = (cookie & 0xFFFF) == SERIAL_COOKIE;
    if (hasrun) {
      bitmapOfRunContainers = new byte[(size + 7) / 8];
      in.readFully(bitmapOfRunContainers);
    }

    final short keys[] = new short[this.size];
    final int cardinalities[] = new int[this.size];
    final boolean isBitmap[] = new boolean[this.size];
    for (int k = 0; k < this.size; ++k) {
      keys[k] = Short.reverseBytes(in.readShort());
      cardinalities[k] = 1 + (0xFFFF & Short.reverseBytes(in.readShort()));
      isBitmap[k] = cardinalities[k] > MappeableArrayContainer.DEFAULT_MAX_SIZE;
      if (bitmapOfRunContainers != null && (bitmapOfRunContainers[k / 8] & (1 << (k % 8))) != 0) {
        isBitmap[k] = false;
      }
    }
    if ((!hasrun) || (this.size >= NO_OFFSET_THRESHOLD)) {
      // skipping the offsets
      in.skipBytes(this.size * 4);
    }
    // Reading the containers
    for (int k = 0; k < this.size; ++k) {
      MappeableContainer val;
      if (isBitmap[k]) {
        final LongBuffer bitmapArray =
            LongBuffer.allocate(MappeableBitmapContainer.MAX_CAPACITY / 64);
        // little endian
        for (int l = 0; l < bitmapArray.limit(); ++l) {
          bitmapArray.put(l, Long.reverseBytes(in.readLong()));
        }
        val = new MappeableBitmapContainer(bitmapArray, cardinalities[k]);
      } else if (bitmapOfRunContainers != null
          && ((bitmapOfRunContainers[k / 8] & (1 << (k % 8))) != 0)) {
        int nbrruns = toIntUnsigned(Short.reverseBytes(in.readShort()));
        final ShortBuffer shortArray = ShortBuffer.allocate(2 * nbrruns);
        for (int l = 0; l < shortArray.limit(); ++l) {
          shortArray.put(l, Short.reverseBytes(in.readShort()));
        }
        val = new MappeableRunContainer(shortArray, nbrruns);
      } else {
        final ShortBuffer shortArray = ShortBuffer.allocate(cardinalities[k]);
        for (int l = 0; l < shortArray.limit(); ++l) {
          shortArray.put(l, Short.reverseBytes(in.readShort()));
        }
        val = new MappeableArrayContainer(shortArray, cardinalities[k]);
      }
      this.keys[k] = keys[k];
      this.values[k] = val;
    }
  }

  // make sure there is capacity for at least k more elements
  protected void extendArray(int k) {
    // size + 1 could overflow
    if (this.size + k >= this.keys.length) {
      int newCapacity;
      if (this.keys.length < 1024) {
        newCapacity = 2 * (this.size + k);
      } else {
        newCapacity = 5 * (this.size + k) / 4;
      }
      this.keys = Arrays.copyOf(this.keys, newCapacity);
      this.values = Arrays.copyOf(this.values, newCapacity);
    }
  }

  @Override
  public int getCardinality(int i) {
    return getContainerAtIndex(i).getCardinality();
  }

  // retired method (inefficient)
  // involves a binary search
  /*@Override
  public MappeableContainer getContainer(short x) {
    final int i = this.binarySearch(0, size, x);
    if (i < 0) {
      return null;
    }
    return this.values[i];
  }*/
  
  @Override
  public int getContainerIndex(short x) {
    return this.binarySearch(0, size, x);
  }  
  

  @Override
  public MappeableContainer getContainerAtIndex(int i) {
    return this.values[i];
  }

  @Override
  public MappeableContainerPointer getContainerPointer() {
    return getContainerPointer(0);
  }

  @Override
  public MappeableContainerPointer getContainerPointer(final int startIndex) {
    return new MappeableContainerPointer() {
      int k = startIndex;

      @Override
      public void advance() {
        ++k;
      }

      @Override
      public MappeableContainerPointer clone() {
        try {
          return (MappeableContainerPointer) super.clone();
        } catch (CloneNotSupportedException e) {
          return null;// will not happen
        }
      }

      @Override
      public int compareTo(MappeableContainerPointer o) {
        if (key() != o.key()) {
          return toIntUnsigned(key()) - toIntUnsigned(o.key());
        }
        return o.getCardinality() - this.getCardinality();
      }

      @Override
      public int getCardinality() {
        return getContainer().getCardinality();
      }

      @Override
      public MappeableContainer getContainer() {
        if (k >= MutableRoaringArray.this.size) {
          return null;
        }
        return MutableRoaringArray.this.values[k];
      }

      @Override
      public int getSizeInBytes() {
        return getContainer().getArraySizeInBytes();
      }

      @Override
      public boolean hasContainer() {
        return 0 <= k & k < MutableRoaringArray.this.size;
      }

      @Override
      public boolean isBitmapContainer() {
        return getContainer() instanceof MappeableBitmapContainer;
      }

      @Override
      public boolean isRunContainer() {
        return getContainer() instanceof MappeableRunContainer;
      }

      @Override
      public short key() {
        return MutableRoaringArray.this.keys[k];

      }


      @Override
      public void previous() {
        --k;
      }
    };

  }

  // involves a binary search
  @Override
  public int getIndex(short x) {
    // before the binary search, we optimize for frequent cases
    if ((size == 0) || (keys[size - 1] == x)) {
      return size - 1;
    }
    // no luck we have to go through the list
    return this.binarySearch(0, size, x);
  }

  @Override
  public short getKeyAtIndex(int i) {
    return this.keys[i];
  }

  @Override
  public int hashCode() {
    int hashvalue = 0;
    for (int k = 0; k < this.size; ++k) {
      hashvalue = 31 * hashvalue + keys[k] * 0xF0F0F0 + values[k].hashCode();
    }
    return hashvalue;
  }

  @Override
  public boolean hasRunCompression() {
    for (int k = 0; k < size; ++k) {
      MappeableContainer ck = values[k];
      if (ck instanceof MappeableRunContainer) {
        return true;
      }
    }
    return false;
  }

  protected int headerSize() {
    if (hasRunCompression()) {
      if (size < NO_OFFSET_THRESHOLD) {// for small bitmaps, we omit the offsets
        return 4 + (size + 7) / 8 + 4 * size;
      }
      return 4 + (size + 7) / 8 + 8 * size;// - 4 because we pack the size with the cookie
    } else {
      return 4 + 4 + 8 * size;
    }
  }

  // insert a new key, it is assumed that it does not exist
  protected void insertNewKeyValueAt(int i, short key, MappeableContainer value) {
    extendArray(1);
    System.arraycopy(keys, i, keys, i + 1, size - i);
    System.arraycopy(values, i, values, i + 1, size - i);
    keys[i] = key;
    values[i] = value;
    size++;
  }

  @Override
  public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException {
    deserialize(in);
  }

  protected void removeAtIndex(int i) {
    System.arraycopy(keys, i + 1, keys, i, size - i - 1);
    keys[size - 1] = 0;
    System.arraycopy(values, i + 1, values, i, size - i - 1);
    values[size - 1] = null;
    size--;
  }


  protected void removeIndexRange(int begin, int end) {
    if (end <= begin) {
      return;
    }
    final int range = end - begin;
    System.arraycopy(keys, end, keys, begin, size - end);
    System.arraycopy(values, end, values, begin, size - end);
    for (int i = 1; i <= range; ++i) {
      keys[size - i] = 0;
      values[size - i] = null;
    }
    size -= range;
  }

  protected void replaceKeyAndContainerAtIndex(int i, short key, MappeableContainer c) {
    this.keys[i] = key;
    this.values[i] = c;
  }


  protected void resize(int newLength) {
    Arrays.fill(this.keys, newLength, this.size, (short) 0);
    Arrays.fill(this.values, newLength, this.size, null);
    this.size = newLength;
  }

  /**
   * Serialize.
   *
   * The current bitmap is not modified.
   *
   * @param out the DataOutput stream
   * @throws IOException Signals that an I/O exception has occurred.
   */
  @Override
  public void serialize(DataOutput out) throws IOException {
    int startOffset = 0;
    boolean hasrun = hasRunCompression();
    if (hasrun) {
      out.writeInt(Integer.reverseBytes(SERIAL_COOKIE | ((this.size - 1) << 16)));
      byte[] bitmapOfRunContainers = new byte[(size + 7) / 8];
      for (int i = 0; i < size; ++i) {
        if (this.values[i] instanceof MappeableRunContainer) {
          bitmapOfRunContainers[i / 8] |= (1 << (i % 8));
        }
      }
      out.write(bitmapOfRunContainers);
      if (this.size < NO_OFFSET_THRESHOLD) {
        startOffset = 4 + 4 * this.size + bitmapOfRunContainers.length;
      } else {
        startOffset = 4 + 8 * this.size + bitmapOfRunContainers.length;
      }
    } else { // backwards compatibilility
      out.writeInt(Integer.reverseBytes(SERIAL_COOKIE_NO_RUNCONTAINER));
      out.writeInt(Integer.reverseBytes(size));
      startOffset = 4 + 4 + this.size * 4 + this.size * 4;
    }
    for (int k = 0; k < size; ++k) {
      out.writeShort(Short.reverseBytes(this.keys[k]));
      out.writeShort(Short.reverseBytes((short) (this.values[k].getCardinality() - 1)));
    }
    if ((!hasrun) || (this.size >= NO_OFFSET_THRESHOLD)) {
      for (int k = 0; k < this.size; k++) {
        out.writeInt(Integer.reverseBytes(startOffset));
        startOffset = startOffset + values[k].getArraySizeInBytes();
      }
    }
    for (int k = 0; k < size; ++k) {
      values[k].writeArray(out);
    }

  }

  /**
   * Report the number of bytes required for serialization.
   *
   * @return the size in bytes
   */
  @Override
  public int serializedSizeInBytes() {
    int count = headerSize();
    // for each container, we store cardinality (16 bits), key (16 bits) and location offset (32
    // bits).
    for (int k = 0; k < this.size; ++k) {
      count += values[k].getArraySizeInBytes();
    }
    return count;
  }

  protected void setContainerAtIndex(int i, MappeableContainer c) {
    this.values[i] = c;
  }

  @Override
  public int size() {
    return this.size;
  }

  @Override
  public void writeExternal(ObjectOutput out) throws IOException {
    serialize(out);
  }


  @Override
  public boolean containsForContainerAtIndex(int i, short x) {
    return getContainerAtIndex(i).contains(x);// no faster way
  }

  @Override
  public int first() {
    assertNonEmpty();
    short firstKey = getKeyAtIndex(0);
    MappeableContainer container = getContainerAtIndex(0);
    return firstKey << 16 | container.first();
  }

  @Override
  public int last() {
    assertNonEmpty();
    short lastKey = getKeyAtIndex(size - 1);
    MappeableContainer container = getContainerAtIndex(size - 1);
    return lastKey << 16 | container.last();
  }

  private void assertNonEmpty() {
    if(size == 0) {
      throw new NoSuchElementException("Empty MutableRoaringArray");
    }
  }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy