org.apache.hudi.org.apache.hadoop.hbase.io.hfile.CompoundBloomFilter Maven / Gradle / Ivy

Show more of this group Show more artifacts with this name
Show all versions of hudi-gcp-bundle Show documentation
There is a newer version: 1.0.0-beta1
/*
 *
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.hbase.io.hfile;

import java.io.DataInput;
import java.io.IOException;

import org.apache.hadoop.hbase.Cell;
import org.apache.yetus.audience.InterfaceAudience;
import org.apache.hadoop.hbase.nio.ByteBuff;
import org.apache.hadoop.hbase.regionserver.BloomType;
import org.apache.hadoop.hbase.util.BloomFilter;
import org.apache.hadoop.hbase.util.BloomFilterUtil;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.Hash;

/**
 * A Bloom filter implementation built on top of 
 * {@link org.apache.hadoop.hbase.util.BloomFilterChunk}, encapsulating
 * a set of fixed-size Bloom filters written out at the time of
 * {@link org.apache.hadoop.hbase.io.hfile.HFile} generation into the data
 * block stream, and loaded on demand at query time. This class only provides
 * reading capabilities.
 */
@InterfaceAudience.Private
public class CompoundBloomFilter extends CompoundBloomFilterBase
    implements BloomFilter {

  /** Used to load chunks on demand */
  private HFile.Reader reader;

  private HFileBlockIndex.BlockIndexReader index;

  private int hashCount;
  private Hash hash;

  private long[] numQueriesPerChunk;
  private long[] numPositivesPerChunk;

  /**
   * De-serialization for compound Bloom filter metadata. Must be consistent
   * with what {@link CompoundBloomFilterWriter} does.
   *
   * @param meta serialized Bloom filter metadata without any magic blocks
   * @throws IOException
   */
  public CompoundBloomFilter(DataInput meta, HFile.Reader reader)
      throws IOException {
    this.reader = reader;

    totalByteSize = meta.readLong();
    hashCount = meta.readInt();
    hashType = meta.readInt();
    totalKeyCount = meta.readLong();
    totalMaxKeys = meta.readLong();
    numChunks = meta.readInt();
    byte[] comparatorClassName = Bytes.readByteArray(meta);
    // The writer would have return 0 as the vint length for the case of 
    // Bytes.BYTES_RAWCOMPARATOR.  In such cases do not initialize comparator, it can be
    // null
    if (comparatorClassName.length != 0) {
      comparator = FixedFileTrailer.createComparator(Bytes.toString(comparatorClassName));
    }

    hash = Hash.getInstance(hashType);
    if (hash == null) {
      throw new IllegalArgumentException("Invalid hash type: " + hashType);
    }
    // We will pass null for ROW block
    if(comparator == null) {
      index = new HFileBlockIndex.ByteArrayKeyBlockIndexReader(1);
    } else {
      index = new HFileBlockIndex.CellBasedKeyBlockIndexReader(comparator, 1);
    }
    index.readRootIndex(meta, numChunks);
  }

  @Override
  public boolean contains(byte[] key, int keyOffset, int keyLength, ByteBuff bloom) {
    int block = index.rootBlockContainingKey(key, keyOffset, keyLength);
    if (block < 0) {
      return false; // This key is not in the file.
    }
    boolean result;
    HFileBlock bloomBlock = getBloomBlock(block);
    try {
      ByteBuff bloomBuf = bloomBlock.getBufferReadOnly();
      result = BloomFilterUtil.contains(key, keyOffset, keyLength, bloomBuf,
          bloomBlock.headerSize(), bloomBlock.getUncompressedSizeWithoutHeader(), hash, hashCount);
    } finally {
      // After the use, should release the block to deallocate byte buffers.
      bloomBlock.release();
    }
    if (numPositivesPerChunk != null && result) {
      // Update statistics. Only used in unit tests.
      ++numPositivesPerChunk[block];
    }
    return result;
  }

  private HFileBlock getBloomBlock(int block) {
    HFileBlock bloomBlock;
    try {
      // We cache the block and use a positional read.
      bloomBlock = reader.readBlock(index.getRootBlockOffset(block),
          index.getRootBlockDataSize(block), true, true, false, true, BlockType.BLOOM_CHUNK, null);
    } catch (IOException ex) {
      // The Bloom filter is broken, turn it off.
      throw new IllegalArgumentException("Failed to load Bloom block", ex);
    }

    if (numQueriesPerChunk != null) {
      // Update statistics. Only used in unit tests.
      ++numQueriesPerChunk[block];
    }
    return bloomBlock;
  }

  @Override
  public boolean contains(Cell keyCell, ByteBuff bloom, BloomType type) {
    int block = index.rootBlockContainingKey(keyCell);
    if (block < 0) {
      return false; // This key is not in the file.
    }
    boolean result;
    HFileBlock bloomBlock = getBloomBlock(block);
    try {
      ByteBuff bloomBuf = bloomBlock.getBufferReadOnly();
      result = BloomFilterUtil.contains(keyCell, bloomBuf, bloomBlock.headerSize(),
        bloomBlock.getUncompressedSizeWithoutHeader(), hash, hashCount, type);
    } finally {
      // After the use, should release the block to deallocate the byte buffers.
      bloomBlock.release();
    }
    if (numPositivesPerChunk != null && result) {
      // Update statistics. Only used in unit tests.
      ++numPositivesPerChunk[block];
    }
    return result;
  }

  @Override
  public boolean supportsAutoLoading() {
    return true;
  }

  public int getNumChunks() {
    return numChunks;
  }

  public void enableTestingStats() {
    numQueriesPerChunk = new long[numChunks];
    numPositivesPerChunk = new long[numChunks];
  }

  public String formatTestingStats() {
    StringBuilder sb = new StringBuilder();
    for (int i = 0; i < numChunks; ++i) {
      sb.append("chunk #");
      sb.append(i);
      sb.append(": queries=");
      sb.append(numQueriesPerChunk[i]);
      sb.append(", positives=");
      sb.append(numPositivesPerChunk[i]);
      sb.append(", positiveRatio=");
      sb.append(numPositivesPerChunk[i] * 1.0 / numQueriesPerChunk[i]);
      sb.append(";\n");
    }
    return sb.toString();
  }

  public long getNumQueriesForTesting(int chunk) {
    return numQueriesPerChunk[chunk];
  }

  public long getNumPositivesForTesting(int chunk) {
    return numPositivesPerChunk[chunk];
  }

  @Override
  public String toString() {
    StringBuilder sb = new StringBuilder();
    sb.append(BloomFilterUtil.formatStats(this));
    sb.append(BloomFilterUtil.STATS_RECORD_SEP + 
        "Number of chunks: " + numChunks);
    sb.append(BloomFilterUtil.STATS_RECORD_SEP + 
        ((comparator != null) ? "Comparator: "
        + comparator.getClass().getSimpleName() : "Comparator: "
        + Bytes.BYTES_RAWCOMPARATOR.getClass().getSimpleName()));
    return sb.toString();
  }

}