All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.roaringbitmap.art.Art Maven / Gradle / Ivy

Go to download

Roaring bitmaps are compressed bitmaps (also called bitsets) which tend to outperform conventional compressed bitmaps such as WAH or Concise.

There is a newer version: 1.3.0
Show newest version
package org.roaringbitmap.art;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.nio.ByteBuffer;
import org.roaringbitmap.ArraysShim;

/**
 * See: https://db.in.tum.de/~leis/papers/ART.pdf a cpu cache friendly main memory data structure.
 * At our case, the LeafNode's key is always 48 bit size. The high 48 bit keys here are compared
 * using the byte dictionary comparison.
 */
public class Art {

  private Node root;
  private long keySize = 0;

  private static byte[] EMPTY_BYTES = new byte[0];

  public Art() {
    root = null;
  }

  public boolean isEmpty() {
    return root == null;
  }

  /**
   * insert the 48 bit key and the corresponding containerIdx
   *
   * @param key the high 48 bit of the long data
   * @param containerIdx the container index
   */
  public void insert(byte[] key, long containerIdx) {
    Node freshRoot = insert(root, key, 0, containerIdx);
    if (freshRoot != root) {
      this.root = freshRoot;
    }
    keySize++;
  }

  /**
   * @param key the high 48 bit of the long data
   * @return the key's corresponding containerIdx
   */
  public long findByKey(byte[] key) {
    Node node = findByKey(root, key, 0);
    if (node != null) {
      LeafNode leafNode = (LeafNode) node;
      return leafNode.containerIdx;
    }
    return Node.ILLEGAL_IDX;
  }

  private Node findByKey(Node node, byte[] key, int depth) {
    while (node != null) {
      if (node.nodeType == NodeType.LEAF_NODE) {
        LeafNode leafNode = (LeafNode) node;
        byte[] leafNodeKeyBytes = leafNode.getKeyBytes();
        if (depth == LeafNode.LEAF_NODE_KEY_LENGTH_IN_BYTES) {
          return leafNode;
        }
        int mismatchIndex = ArraysShim
            .mismatch(leafNodeKeyBytes, depth, LeafNode.LEAF_NODE_KEY_LENGTH_IN_BYTES,
                key, depth, LeafNode.LEAF_NODE_KEY_LENGTH_IN_BYTES);
        if (mismatchIndex != -1) {
          return null;
        }
        return leafNode;
      }
      if (node.prefixLength > 0) {
        int commonLength = commonPrefixLength(key, depth, key.length, node.prefix, 0,
            node.prefixLength);
        if (commonLength != node.prefixLength) {
          return null;
        }
        //common prefix is the same ,then increase the depth
        depth += node.prefixLength;
      }
      int pos = node.getChildPos(key[depth]);
      if (pos == Node.ILLEGAL_IDX) {
        return null;
      }
      node = node.getChild(pos);
      depth++;
    }
    return null;
  }

  /**
   * a convenient method to traverse the key space in ascending order.
   * @param containers input containers
   * @return the key iterator
   */
  public KeyIterator iterator(Containers containers) {
    return new KeyIterator(this, containers);
  }

  /**
   * remove the key from the art if it's there.
   * @param key the high 48 bit key
   * @return the corresponding containerIdx or -1 indicating not exist
   */
  public long remove(byte[] key) {
    Toolkit toolkit = removeSpecifyKey(root, key, 0);
    if (toolkit != null) {
      return toolkit.matchedContainerId;
    }
    return Node.ILLEGAL_IDX;
  }

  protected Toolkit removeSpecifyKey(Node node, byte[] key, int dep) {
    if (node == null) {
      return null;
    }
    if (node.nodeType == NodeType.LEAF_NODE) {
      //root is null
      LeafNode leafNode = (LeafNode) node;
      if (leafMatch(leafNode, key, dep)) {
        //remove this node
        if (node == this.root) {
          this.root = null;
        }
        keySize--;
        return new Toolkit(null, leafNode.getContainerIdx(), null);
      } else {
        return null;
      }
    }
    if (node.prefixLength > 0) {
      int commonLength = commonPrefixLength(
          key, dep, key.length,
          node.prefix, 0, node.prefixLength
      );
      if (commonLength != node.prefixLength) {
        return null;
      }
      dep += node.prefixLength;
    }
    int pos = node.getChildPos(key[dep]);
    if (pos != Node.ILLEGAL_IDX) {
      Node child = node.getChild(pos);
      if (child.nodeType == NodeType.LEAF_NODE && leafMatch((LeafNode) child, key, dep)) {
        //found matched leaf node from the current node.
        Node freshNode = node.remove(pos);
        keySize--;
        if (node == this.root && freshNode != node) {
          this.root = freshNode;
        }
        long matchedContainerIdx = ((LeafNode) child).getContainerIdx();
        Toolkit toolkit = new Toolkit(freshNode, matchedContainerIdx, node);
        toolkit.needToVerifyReplacing = true;
        return toolkit;
      } else {
        Toolkit toolkit = removeSpecifyKey(child, key, dep + 1);
        if (toolkit != null && toolkit.needToVerifyReplacing
            && toolkit.freshMatchedParentNode != null && toolkit.freshMatchedParentNode
            != toolkit.originalMatchedParentNode) {
          //meaning find the matched key and the shrinking happened
          node.replaceNode(pos, toolkit.freshMatchedParentNode);
          toolkit.needToVerifyReplacing = false;
          return toolkit;
        }
        if (toolkit != null) {
          return toolkit;
        }
      }
    }
    return null;
  }

  class Toolkit {

    Node freshMatchedParentNode;//indicating a fresh parent node while the original
    // parent node shrunk and changed
    long matchedContainerId; //holding the matched key's corresponding container index id
    Node originalMatchedParentNode; //holding the matched key's leaf node's original old parent node
    boolean needToVerifyReplacing = false; //indicate whether the shrinking node's parent
    // node has replaced its corresponding child node

    Toolkit(Node freshMatchedParentNode, long matchedContainerId, Node originalMatchedParentNode) {
      this.freshMatchedParentNode = freshMatchedParentNode;
      this.matchedContainerId = matchedContainerId;
      this.originalMatchedParentNode = originalMatchedParentNode;
    }
  }

  private boolean leafMatch(LeafNode leafNode, byte[] key, int dep) {
    byte[] leafNodeKeyBytes = leafNode.getKeyBytes();
    int mismatchIndex = ArraysShim
        .mismatch(leafNodeKeyBytes, dep, LeafNode.LEAF_NODE_KEY_LENGTH_IN_BYTES,
            key, dep, LeafNode.LEAF_NODE_KEY_LENGTH_IN_BYTES);
    if (mismatchIndex == -1) {
      return true;
    } else {
      return false;
    }
  }

  private Node insert(Node node, byte[] key, int depth, long containerIdx) {
    if (node == null) {
      LeafNode leafNode = new LeafNode(key, containerIdx);
      return leafNode;
    }
    if (node.nodeType == NodeType.LEAF_NODE) {
      LeafNode leafNode = (LeafNode) node;
      byte[] prefix = leafNode.getKeyBytes();
      int commonPrefix = commonPrefixLength(prefix, depth, prefix.length, key, depth, key.length);
      //The leaf node maybe was shrunk from some other node type before and
      // contained an old prefixLength,so we reset it to 0 here.
      leafNode.prefixLength = 0;
      leafNode.prefix = EMPTY_BYTES;
      Node4 node4 = new Node4(commonPrefix);
      //copy common prefix
      node4.prefixLength = (byte) commonPrefix;
      System.arraycopy(key, depth, node4.prefix, 0, commonPrefix);
      //generate two leaf nodes as the children of the fresh node4
      Node4.insert(node4, leafNode, prefix[depth + commonPrefix]);
      LeafNode anotherLeaf = new LeafNode(key, containerIdx);
      Node4.insert(node4, anotherLeaf, key[depth + commonPrefix]);
      //replace the current node with this internal node4
      return node4;
    }
    //to a inner node case
    if (node.prefixLength > 0) {
      //find the mismatch position
      int mismatchPos = ArraysShim.mismatch(node.prefix, 0, node.prefixLength,
          key, depth, key.length);
      if (mismatchPos != node.prefixLength) {
        Node4 node4 = new Node4(mismatchPos);
        //copy prefix
        node4.prefixLength = (byte) mismatchPos;
        System.arraycopy(node.prefix, 0, node4.prefix, 0, mismatchPos);
        //split the current internal node, spawn a fresh node4 and let the
        //current internal node as its children.
        Node4.insert(node4, node, node.prefix[mismatchPos]);
        int nodeOriginalPrefixLength = node.prefixLength;
        node.prefixLength = (byte) (nodeOriginalPrefixLength - (mismatchPos + (byte) 1));
        //move the remained common prefix of the initial internal node
        if (node.prefixLength > 0) {
          System.arraycopy(node.prefix, mismatchPos + 1, node.prefix, 0, node.prefixLength);
        } else {
          //TODO:to reduce the 0 prefix memory space,we could mark the prefix as null
          node.prefix = new byte[0];
        }
        LeafNode leafNode = new LeafNode(key, containerIdx);
        Node4.insert(node4, leafNode, key[mismatchPos + depth]);
        return node4;
      }
      depth += node.prefixLength;
    }
    int pos = node.getChildPos(key[depth]);
    if (pos != Node.ILLEGAL_IDX) {
      //insert the key as current internal node's children's child node.
      Node child = node.getChild(pos);
      Node freshOne = insert(child, key, depth + 1, containerIdx);
      if (freshOne != child) {
        node.replaceNode(pos, freshOne);
      }
      return node;
    }
    //insert the key as a child leaf node of the current internal node
    LeafNode leafNode = new LeafNode(key, containerIdx);
    Node freshOne = Node.insertLeaf(node, leafNode, key[depth]);
    return freshOne;
  }

  //find common prefix length
  static int commonPrefixLength(byte[] key1, int aFromIndex, int aToIndex,
      byte[] key2, int bFromIndex, int bToIndex) {
    int aLength = aToIndex - aFromIndex;
    int bLength = bToIndex - bFromIndex;
    int minLength = Math.min(aLength, bLength);
    int mismatchIndex = ArraysShim.mismatch(key1, aFromIndex, aToIndex, key2, bFromIndex, bToIndex);

    if (aLength != bLength && mismatchIndex >= minLength) {
      return minLength;
    }
    return mismatchIndex;
  }

  public Node getRoot() {
    return root;
  }


  private LeafNode getExtremeLeaf(boolean reverse) {
    Node parent = getRoot();
    for (int depth = 0 ; depth < AbstractShuttle.MAX_DEPTH ; depth++) {
      if (parent.nodeType == NodeType.LEAF_NODE) {
        break;
      }

      int childIndex = reverse ? parent.getMaxPos() : parent.getMinPos();
      parent = parent.getChild(childIndex);
    }
    return (LeafNode) parent;
  }

  public LeafNode first() {
    return getExtremeLeaf(false);
  }

  public LeafNode last() {
    return getExtremeLeaf(true);
  }

  public void serializeArt(DataOutput dataOutput) throws IOException {
    dataOutput.writeLong(Long.reverseBytes(keySize));
    serialize(root, dataOutput);
  }

  public void deserializeArt(DataInput dataInput) throws IOException {
    keySize = Long.reverseBytes(dataInput.readLong());
    root = deserialize(dataInput);
  }

  public void serializeArt(ByteBuffer byteBuffer) throws IOException {
    byteBuffer.putLong(keySize);
    serialize(root, byteBuffer);
  }

  public void deserializeArt(ByteBuffer byteBuffer) throws IOException {
    keySize = byteBuffer.getLong();
    root = deserialize(byteBuffer);
  }

  public LeafNodeIterator leafNodeIterator(boolean reverse, Containers containers) {
    return new LeafNodeIterator(this, reverse, containers);
  }

  public LeafNodeIterator leafNodeIteratorFrom(long bound, boolean reverse, Containers containers) {
    return new LeafNodeIterator(this, reverse, containers, bound);
  }

  private void serialize(Node node, DataOutput dataOutput) throws IOException {
    if (node.nodeType != NodeType.LEAF_NODE) {
      //serialize the internal node itself first
      node.serialize(dataOutput);
      //then all the internal node's children
      int nexPos = node.getNextLargerPos(Node.ILLEGAL_IDX);
      while (nexPos != Node.ILLEGAL_IDX) {
        //serialize all the not null child node
        Node child = node.getChild(nexPos);
        serialize(child, dataOutput);
        nexPos = node.getNextLargerPos(nexPos);
      }
    } else {
      //serialize the leaf node
      node.serialize(dataOutput);
    }
  }

  private void serialize(Node node, ByteBuffer byteBuffer) throws IOException {
    if (node.nodeType != NodeType.LEAF_NODE) {
      //serialize the internal node itself first
      node.serialize(byteBuffer);
      //then all the internal node's children
      int nexPos = node.getNextLargerPos(Node.ILLEGAL_IDX);
      while (nexPos != Node.ILLEGAL_IDX) {
        //serialize all the not null child node
        Node child = node.getChild(nexPos);
        serialize(child, byteBuffer);
        nexPos = node.getNextLargerPos(nexPos);
      }
    } else {
      //serialize the leaf node
      node.serialize(byteBuffer);
    }
  }

  private Node deserialize(DataInput dataInput) throws IOException {
    Node oneNode = Node.deserialize(dataInput);
    if (oneNode == null) {
      return null;
    }
    if (oneNode.nodeType == NodeType.LEAF_NODE) {
      return oneNode;
    } else {
      //internal node
      int count = oneNode.count;
      //all the not null child nodes
      Node[] children = new Node[count];
      for (int i = 0; i < count; i++) {
        Node child = deserialize(dataInput);
        children[i] = child;
      }
      oneNode.replaceChildren(children);
      return oneNode;
    }
  }

  private Node deserialize(ByteBuffer byteBuffer) throws IOException {
    Node oneNode = Node.deserialize(byteBuffer);
    if (oneNode == null) {
      return null;
    }
    if (oneNode.nodeType == NodeType.LEAF_NODE) {
      return oneNode;
    } else {
      //internal node
      int count = oneNode.count;
      //all the not null child nodes
      Node[] children = new Node[count];
      for (int i = 0; i < count; i++) {
        Node child = deserialize(byteBuffer);
        children[i] = child;
      }
      oneNode.replaceChildren(children);
      return oneNode;
    }
  }

  public long serializeSizeInBytes() {
    return serializeSizeInBytes(this.root) + 8;
  }

  public long getKeySize() {
    return keySize;
  }

  private long serializeSizeInBytes(Node node) {
    if (node.nodeType != NodeType.LEAF_NODE) {
      //serialize the internal node itself first
      int currentNodeSize = node.serializeSizeInBytes();
      //then all the internal node's children
      long childrenTotalSize = 0L;
      int nexPos = node.getNextLargerPos(Node.ILLEGAL_IDX);
      while (nexPos != Node.ILLEGAL_IDX) {
        //serialize all the not null child node
        Node child = node.getChild(nexPos);
        long childSize = serializeSizeInBytes(child);
        nexPos = node.getNextLargerPos(nexPos);
        childrenTotalSize += childSize;
      }
      return currentNodeSize + childrenTotalSize;
    } else {
      //serialize the leaf node
      int nodeSize = node.serializeSizeInBytes();
      return nodeSize;
    }
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy