All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.roaringbitmap.art.Node48 Maven / Gradle / Ivy

Go to download

Roaring bitmaps are compressed bitmaps (also called bitsets) which tend to outperform conventional compressed bitmaps such as WAH or Concise.

There is a newer version: 1.3.0
Show newest version
package org.roaringbitmap.art;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.LongBuffer;
import java.util.Arrays;
import org.roaringbitmap.longlong.LongUtils;

public class Node48 extends Node {

  //the actual byte value of childIndex content won't be beyond 48
  // 256 bytes packed into longs
  static final int BYTES_PER_LONG = 8;
  static final int LONGS_USED = 256 / BYTES_PER_LONG;
  static final int INDEX_SHIFT = 3; // 2^3 == BYTES_PER_LONG
  static final int POS_MASK = 0x7; // the mask to access the pos in the long for the byte
  long[] childIndex = new long[LONGS_USED];
  Node[] children = new Node[48];
  static final byte EMPTY_VALUE = (byte)0xFF;
  static final long INIT_LONG_VALUE = 0xFFffFFffFFffFFffL;

  public Node48(int compressedPrefixSize) {
    super(NodeType.NODE48, compressedPrefixSize);
    Arrays.fill(childIndex, INIT_LONG_VALUE);
  }

  @Override
  public int getChildPos(byte k) {
    int unsignedIdx = Byte.toUnsignedInt(k);
    int childIdx = childrenIdx(unsignedIdx, childIndex);
    if (childIdx != EMPTY_VALUE) {
      return unsignedIdx;
    }
    return ILLEGAL_IDX;
  }

  @Override
  public SearchResult getNearestChildPos(byte k) {
    int unsignedIdx = Byte.toUnsignedInt(k);
    int childIdx = childrenIdx(unsignedIdx, childIndex);
    if (childIdx != EMPTY_VALUE) {
      return SearchResult.found(unsignedIdx);
    }
    return SearchResult.notFound(getNextSmallerPos(unsignedIdx), getNextLargerPos(unsignedIdx));
  }

  @Override
  public byte getChildKey(int pos) {

    return (byte) pos;
  }

  @Override
  public Node getChild(int pos) {
    byte idx = childrenIdx(pos, childIndex);
    return children[(int) idx];
  }

  @Override
  public void replaceNode(int pos, Node freshOne) {
    byte idx = childrenIdx(pos, childIndex);
    children[(int) idx] = freshOne;
  }

  @Override
  public int getMinPos() {
    int pos = 0;
    for (int i = 0; i < LONGS_USED; i++) {
      long longv = childIndex[i];
      if (longv == INIT_LONG_VALUE) {
        //skip over empty bytes
        pos += BYTES_PER_LONG;
        continue;
      } else {
        for (int j = 0; j < BYTES_PER_LONG; j++) {
          byte v = (byte) (longv >>> ((BYTES_PER_LONG - 1 - j) << INDEX_SHIFT));
          if (v != EMPTY_VALUE) {
            return pos;
          }
          pos++;
        }
      }
    }
    return ILLEGAL_IDX;
  }

  @Override
  public int getNextLargerPos(int pos) {
    if (pos == ILLEGAL_IDX) {
      pos = -1;
    }
    pos++;
    int i = pos >>> INDEX_SHIFT;
    for (; i < LONGS_USED; i++) {
      long longv = childIndex[i];
      if (longv == INIT_LONG_VALUE) {
        //skip over empty bytes
        pos = (pos + BYTES_PER_LONG) & 0xF8;
        continue;
      }

      for (int j = pos & POS_MASK; j < BYTES_PER_LONG; j++) {
        int shiftNum = (BYTES_PER_LONG - 1 - j) << INDEX_SHIFT;
        byte v = (byte) (longv >>> shiftNum);
        if (v != EMPTY_VALUE) {
          return pos;
        }
        pos++;
      }
    }
    return ILLEGAL_IDX;
  }

  @Override
  public int getMaxPos() {
    int pos = 255;
    for (int i = (LONGS_USED - 1); i >= 0; i--) {
      long longv = childIndex[i];
      if (longv == INIT_LONG_VALUE) {
        pos -= BYTES_PER_LONG;
        continue;
      } else {
        // the zeroth value is stored in the MSB, but because we are searching from high to low
        // across all bytes, we can avoid the "double negative" of starting at 7 and j-- to 0
        // and then shifting by (7-j)*8
        for (int j = 0; j < BYTES_PER_LONG; j++) {
          byte v = (byte) (longv >>> (j << INDEX_SHIFT));
          if (v != EMPTY_VALUE) {
            return pos;
          }
          pos--;
        }
      }
    }
    return ILLEGAL_IDX;
  }

  @Override
  public int getNextSmallerPos(int pos) {
    if (pos == ILLEGAL_IDX) {
      pos = 256;
    }
    pos--;
    int i = pos >>> INDEX_SHIFT;
    for (; i >= 0 && i < LONGS_USED; i--) {
      long longv = childIndex[i];
      if (longv == INIT_LONG_VALUE) {
        //skip over empty bytes
        pos -= Math.min(BYTES_PER_LONG,(pos & POS_MASK) + 1);
        continue;
      }
      // because we are starting potentially at non aligned location, we need to start at 7
      // (or less) and decrement to zero, and then unpack the long correctly.
      for (int j = pos & POS_MASK; j >= 0; j--) {
        int shiftNum = (BYTES_PER_LONG - 1 - j) << INDEX_SHIFT;
        byte v = (byte) (longv >>> shiftNum);
        if (v != EMPTY_VALUE) {
          return pos;
        }
        pos--;
      }
    }
    return ILLEGAL_IDX;
  }

  /**
   * insert a child node into the node48 node with the key byte
   *
   * @param currentNode the node4
   * @param child the child node
   * @param key the key byte
   * @return the node48 or an adaptive generated node256
   */
  public static Node insert(Node currentNode, Node child, byte key) {
    Node48 node48 = (Node48) currentNode;
    if (node48.count < 48) {
      //insert leaf node into current node
      int pos = node48.count;
      if (node48.children[pos] != null) {
        pos = 0;
        while (node48.children[pos] != null) {
          pos++;
        }
      }
      node48.children[pos] = child;
      int unsignedByte = Byte.toUnsignedInt(key);
      setOneByte(unsignedByte, (byte)pos, node48.childIndex);
      node48.count++;
      return node48;
    } else {
      //grow to Node256
      Node256 node256 = new Node256(node48.prefixLength);
      int currentPos = ILLEGAL_IDX;
      while ((currentPos = node48.getNextLargerPos(currentPos)) != ILLEGAL_IDX) {
        Node childNode = node48.getChild(currentPos);
        node256.children[currentPos] = childNode;
        Node256.setBit((byte) currentPos, node256.bitmapMask);
      }
      node256.count = node48.count;
      copyPrefix(node48, node256);
      Node freshOne = Node256.insert(node256, child, key);
      return freshOne;
    }
  }

  @Override
  public Node remove(int pos) {
    byte idx = childrenIdx(pos, childIndex);
    setOneByte(pos, EMPTY_VALUE, childIndex);
    children[idx] = null;
    count--;
    if (count <= 12) {
      //shrink to node16
      Node16 node16 = new Node16(this.prefixLength);
      int j = 0;
      ByteBuffer byteBuffer = ByteBuffer.allocate(16).order(ByteOrder.BIG_ENDIAN);
      int currentPos = ILLEGAL_IDX;
      while ((currentPos = getNextLargerPos(currentPos)) != ILLEGAL_IDX) {
        Node child = getChild(currentPos);
        byteBuffer.put(j, (byte) currentPos);
        node16.children[j] = child;
        j++;
      }
      node16.firstV = byteBuffer.getLong(0);
      node16.secondV = byteBuffer.getLong(8);
      node16.count = (short) j;
      copyPrefix(this, node16);
      return node16;
    }
    return this;
  }

  @Override
  public void serializeNodeBody(DataOutput dataOutput) throws IOException {
    for (int i = 0; i < LONGS_USED; i++) {
      long longv = childIndex[i];
      dataOutput.writeLong(Long.reverseBytes(longv));
    }
  }

  @Override
  public void serializeNodeBody(ByteBuffer byteBuffer) throws IOException {
    LongBuffer longBuffer = byteBuffer.asLongBuffer();
    longBuffer.put(childIndex);
    byteBuffer.position(byteBuffer.position() + LONGS_USED * BYTES_PER_LONG);
  }

  @Override
  public void deserializeNodeBody(DataInput dataInput) throws IOException {
    for (int i = 0; i < LONGS_USED; i++) {
      childIndex[i] = Long.reverseBytes(dataInput.readLong());
    }
  }

  @Override
  public void deserializeNodeBody(ByteBuffer byteBuffer) throws IOException {
    LongBuffer longBuffer = byteBuffer.asLongBuffer();
    longBuffer.get(childIndex);
    byteBuffer.position(byteBuffer.position() + LONGS_USED * BYTES_PER_LONG);
  }

  @Override
  public int serializeNodeBodySizeInBytes() {
    return LONGS_USED * BYTES_PER_LONG;
  }

  @Override
  void replaceChildren(Node[] children) {
    int step = 0;
    for (int i = 0; i < LONGS_USED; i++) {
      long longv = Long.reverseBytes(childIndex[i]);
      if (longv != INIT_LONG_VALUE) {
        for (int j = 0; j < BYTES_PER_LONG; j++) {
          long currentByte = longv & 0xFF;
          if (currentByte != 0xFF) {
            this.children[(int) currentByte] = children[step];
            step++;
          }
          longv >>>= 8;
        }
      }
    }
  }

  private static byte childrenIdx(int pos, long[] childIndex) {
    int longPos = pos >>> INDEX_SHIFT;
    int bytePos = pos & POS_MASK;
    long longV = childIndex[longPos];
    byte idx = (byte) ((longV) >>> ((BYTES_PER_LONG - 1 - bytePos) << INDEX_SHIFT));
    return idx;
  }

  static void setOneByte(int pos, byte v, long[] childIndex) {
    final int longPos = pos >>> INDEX_SHIFT;
    final int bytePos = pos & POS_MASK;
    final int shift = (BYTES_PER_LONG - 1 - bytePos) << INDEX_SHIFT;
    final long preVal = childIndex[longPos];
    final long newVal = (preVal & ~(0xFFL << shift)) | (Byte.toUnsignedLong(v) << shift);
    childIndex[longPos] = newVal;
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy