org.apache.hadoop.hbase.util.CompoundBloomFilter Maven / Gradle / Ivy

Show more of this group Show more artifacts with this name
Show all versions of hbase-server Show documentation
Main functionality for HBase
There is a newer version: 3.0.0-beta-1
/*
 *
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.hbase.util;

import java.io.DataInput;
import java.io.IOException;
import java.nio.ByteBuffer;

import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.KeyValue.KVComparator;
import org.apache.hadoop.hbase.io.hfile.BlockType;
import org.apache.hadoop.hbase.io.hfile.FixedFileTrailer;
import org.apache.hadoop.hbase.io.hfile.HFile;
import org.apache.hadoop.hbase.io.hfile.HFileBlock;
import org.apache.hadoop.hbase.io.hfile.HFileBlockIndex;

/**
 * A Bloom filter implementation built on top of {@link ByteBloomFilter},
 * encapsulating a set of fixed-size Bloom filters written out at the time of
 * {@link org.apache.hadoop.hbase.io.hfile.HFile} generation into the data
 * block stream, and loaded on demand at query time. This class only provides
 * reading capabilities.
 */
@InterfaceAudience.Private
public class CompoundBloomFilter extends CompoundBloomFilterBase
    implements BloomFilter {

  /** Used to load chunks on demand */
  private HFile.Reader reader;

  private HFileBlockIndex.BlockIndexReader index;

  private int hashCount;
  private Hash hash;

  private long[] numQueriesPerChunk;
  private long[] numPositivesPerChunk;

  /**
   * De-serialization for compound Bloom filter metadata. Must be consistent
   * with what {@link CompoundBloomFilterWriter} does.
   *
   * @param meta serialized Bloom filter metadata without any magic blocks
   * @throws IOException
   */
  public CompoundBloomFilter(DataInput meta, HFile.Reader reader)
      throws IOException {
    this.reader = reader;

    totalByteSize = meta.readLong();
    hashCount = meta.readInt();
    hashType = meta.readInt();
    totalKeyCount = meta.readLong();
    totalMaxKeys = meta.readLong();
    numChunks = meta.readInt();
    byte[] comparatorClassName = Bytes.readByteArray(meta);
    if (comparatorClassName.length != 0) {
      comparator = FixedFileTrailer.createComparator(Bytes.toString(comparatorClassName));
    } else {
      // Fallback. In 2.0 we will not write the RAW_COMPARATOR name. So when reading back such meta
      // data. Refer to HBASE-16189
      // we set the comparator to RAW_COMPARATOR
      comparator = KeyValue.RAW_COMPARATOR;
    }

    hash = Hash.getInstance(hashType);
    if (hash == null) {
      throw new IllegalArgumentException("Invalid hash type: " + hashType);
    }

    index = new HFileBlockIndex.BlockIndexReader(comparator, 1);
    index.readRootIndex(meta, numChunks);
  }

  @Override
  public boolean contains(byte[] key, int keyOffset, int keyLength,
      ByteBuffer bloom) {
    // We try to store the result in this variable so we can update stats for
    // testing, but when an error happens, we log a message and return.
    boolean result;

    int block = index.rootBlockContainingKey(key, keyOffset,
        keyLength);
    if (block < 0) {
      result = false; // This key is not in the file.
    } else {
      HFileBlock bloomBlock;
      try {
        // We cache the block and use a positional read.
        bloomBlock = reader.readBlock(index.getRootBlockOffset(block),
            index.getRootBlockDataSize(block), true, true, false, true,
            BlockType.BLOOM_CHUNK, null);
      } catch (IOException ex) {
        // The Bloom filter is broken, turn it off.
        throw new IllegalArgumentException(
            "Failed to load Bloom block for key "
                + Bytes.toStringBinary(key, keyOffset, keyLength), ex);
      }

      ByteBuffer bloomBuf = bloomBlock.getBufferReadOnly();
      result = ByteBloomFilter.contains(key, keyOffset, keyLength,
          bloomBuf, bloomBlock.headerSize(),
          bloomBlock.getUncompressedSizeWithoutHeader(), hash, hashCount);
    }

    if (numQueriesPerChunk != null && block >= 0) {
      // Update statistics. Only used in unit tests.
      ++numQueriesPerChunk[block];
      if (result)
        ++numPositivesPerChunk[block];
    }

    return result;
  }

  public boolean supportsAutoLoading() {
    return true;
  }

  public int getNumChunks() {
    return numChunks;
  }

  @Override
  public KVComparator getComparator() {
    return comparator;
  }

  public void enableTestingStats() {
    numQueriesPerChunk = new long[numChunks];
    numPositivesPerChunk = new long[numChunks];
  }

  public String formatTestingStats() {
    StringBuilder sb = new StringBuilder();
    for (int i = 0; i < numChunks; ++i) {
      sb.append("chunk #");
      sb.append(i);
      sb.append(": queries=");
      sb.append(numQueriesPerChunk[i]);
      sb.append(", positives=");
      sb.append(numPositivesPerChunk[i]);
      sb.append(", positiveRatio=");
      sb.append(numPositivesPerChunk[i] * 1.0 / numQueriesPerChunk[i]);
      sb.append(";\n");
    }
    return sb.toString();
  }

  public long getNumQueriesForTesting(int chunk) {
    return numQueriesPerChunk[chunk];
  }

  public long getNumPositivesForTesting(int chunk) {
    return numPositivesPerChunk[chunk];
  }

  @Override
  public String toString() {
    StringBuilder sb = new StringBuilder();
    sb.append(ByteBloomFilter.formatStats(this));
    sb.append(ByteBloomFilter.STATS_RECORD_SEP + 
        "Number of chunks: " + numChunks);
    sb.append(ByteBloomFilter.STATS_RECORD_SEP + 
        "Comparator: " + comparator.getClass().getSimpleName());
    return sb.toString();
  }

}