org.apache.hadoop.hbase.io.hfile.NoOpIndexBlockEncoder Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of hbase-server Show documentation
Server functionality for HBase
There is a newer version: 3.0.0-beta-1
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hbase.io.hfile;

import static org.apache.hadoop.hbase.io.hfile.HFileBlockIndex.MID_KEY_METADATA_SIZE;
import static org.apache.hadoop.hbase.io.hfile.HFileBlockIndex.SECONDARY_INDEX_ENTRY_OVERHEAD;

import java.io.DataInput;
import java.io.DataInputStream;
import java.io.DataOutput;
import java.io.IOException;
import java.util.concurrent.atomic.AtomicReference;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellComparator;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
import org.apache.hadoop.hbase.io.encoding.IndexBlockEncoding;
import org.apache.hadoop.hbase.nio.ByteBuff;
import org.apache.hadoop.hbase.regionserver.KeyValueScanner;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.ClassSize;
import org.apache.yetus.audience.InterfaceAudience;

/**
 * Does not perform any kind of encoding/decoding.
 */
@InterfaceAudience.Private
public class NoOpIndexBlockEncoder implements HFileIndexBlockEncoder {

  public static final NoOpIndexBlockEncoder INSTANCE = new NoOpIndexBlockEncoder();

  /** Cannot be instantiated. Use {@link #INSTANCE} instead. */
  private NoOpIndexBlockEncoder() {
  }

  @Override
  public void saveMetadata(HFile.Writer writer) {
  }

  @Override
  public void encode(BlockIndexChunk blockIndexChunk, boolean rootIndexBlock, DataOutput out)
    throws IOException {
    if (rootIndexBlock) {
      writeRoot(blockIndexChunk, out);
    } else {
      writeNonRoot(blockIndexChunk, out);
    }
  }

  /**
   * Writes the block index chunk in the non-root index block format. This format contains the
   * number of entries, an index of integer offsets for quick binary search on variable-length
   * records, and tuples of block offset, on-disk block size, and the first key for each entry.
   */
  private void writeNonRoot(BlockIndexChunk blockIndexChunk, DataOutput out) throws IOException {
    // The number of entries in the block.
    out.writeInt(blockIndexChunk.getNumEntries());

    if (
      blockIndexChunk.getSecondaryIndexOffsetMarks().size() != blockIndexChunk.getBlockKeys().size()
    ) {
      throw new IOException("Corrupted block index chunk writer: "
        + blockIndexChunk.getBlockKeys().size() + " entries but "
        + blockIndexChunk.getSecondaryIndexOffsetMarks().size() + " secondary index items");
    }

    // For each entry, write a "secondary index" of relative offsets to the
    // entries from the end of the secondary index. This works, because at
    // read time we read the number of entries and know where the secondary
    // index ends.
    for (int currentSecondaryIndex : blockIndexChunk.getSecondaryIndexOffsetMarks())
      out.writeInt(currentSecondaryIndex);

    // We include one other element in the secondary index to calculate the
    // size of each entry more easily by subtracting secondary index elements.
    out.writeInt(blockIndexChunk.getCurTotalNonRootEntrySize());

    for (int i = 0; i < blockIndexChunk.getNumEntries(); ++i) {
      out.writeLong(blockIndexChunk.getBlockOffset(i));
      out.writeInt(blockIndexChunk.getOnDiskDataSize(i));
      out.write(blockIndexChunk.getBlockKey(i));
    }
  }

  /**
   * Writes this chunk into the given output stream in the root block index format. This format is
   * similar to the {@link HFile} version 1 block index format, except that we store on-disk size of
   * the block instead of its uncompressed size.
   * @param out the data output stream to write the block index to. Typically a stream writing into
   *            an {@link HFile} block.
   */
  private void writeRoot(BlockIndexChunk blockIndexChunk, DataOutput out) throws IOException {
    for (int i = 0; i < blockIndexChunk.getNumEntries(); ++i) {
      out.writeLong(blockIndexChunk.getBlockOffset(i));
      out.writeInt(blockIndexChunk.getOnDiskDataSize(i));
      Bytes.writeByteArray(out, blockIndexChunk.getBlockKey(i));
    }
  }

  @Override
  public IndexBlockEncoding getIndexBlockEncoding() {
    return IndexBlockEncoding.NONE;
  }

  @Override
  public EncodedSeeker createSeeker() {
    return new NoOpEncodedSeeker();
  }

  @Override
  public String toString() {
    return getClass().getSimpleName();
  }

  protected static class NoOpEncodedSeeker implements EncodedSeeker {

    protected long[] blockOffsets;
    protected int[] blockDataSizes;
    protected int rootCount = 0;

    // Mid-key metadata.
    protected long midLeafBlockOffset = -1;
    protected int midLeafBlockOnDiskSize = -1;
    protected int midKeyEntry = -1;

    private Cell[] blockKeys;
    private CellComparator comparator;
    protected int searchTreeLevel;

    /** Pre-computed mid-key */
    private AtomicReference midKey = new AtomicReference<>();

    @Override
    public long heapSize() {
      long heapSize = ClassSize.align(ClassSize.OBJECT);

      // Mid-key metadata.
      heapSize += MID_KEY_METADATA_SIZE;

      if (blockOffsets != null) {
        heapSize += ClassSize.align(ClassSize.ARRAY + blockOffsets.length * Bytes.SIZEOF_LONG);
      }

      if (blockDataSizes != null) {
        heapSize += ClassSize.align(ClassSize.ARRAY + blockDataSizes.length * Bytes.SIZEOF_INT);
      }

      if (blockKeys != null) {
        heapSize += ClassSize.REFERENCE;
        // Adding array + references overhead
        heapSize += ClassSize.align(ClassSize.ARRAY + blockKeys.length * ClassSize.REFERENCE);

        // Adding blockKeys
        for (Cell key : blockKeys) {
          heapSize += ClassSize.align(key.heapSize());
        }
      }
      // Add comparator and the midkey atomicreference
      heapSize += 2 * ClassSize.REFERENCE;
      // Add rootCount and searchTreeLevel
      heapSize += 2 * Bytes.SIZEOF_INT;

      return ClassSize.align(heapSize);
    }

    @Override
    public boolean isEmpty() {
      return blockKeys.length == 0;
    }

    @Override
    public Cell getRootBlockKey(int i) {
      return blockKeys[i];
    }

    @Override
    public int getRootBlockCount() {
      return rootCount;
    }

    @Override
    public void initRootIndex(HFileBlock blk, int numEntries, CellComparator comparator,
      int treeLevel) throws IOException {
      this.comparator = comparator;
      this.searchTreeLevel = treeLevel;
      init(blk, numEntries);
    }

    private void init(HFileBlock blk, int numEntries) throws IOException {
      DataInputStream in = readRootIndex(blk, numEntries);
      // after reading the root index the checksum bytes have to
      // be subtracted to know if the mid key exists.
      int checkSumBytes = blk.totalChecksumBytes();
      if ((in.available() - checkSumBytes) < MID_KEY_METADATA_SIZE) {
        // No mid-key metadata available.
        return;
      }
      midLeafBlockOffset = in.readLong();
      midLeafBlockOnDiskSize = in.readInt();
      midKeyEntry = in.readInt();
    }

    private DataInputStream readRootIndex(HFileBlock blk, final int numEntries) throws IOException {
      DataInputStream in = blk.getByteStream();
      readRootIndex(in, numEntries);
      return in;
    }

    private void readRootIndex(DataInput in, final int numEntries) throws IOException {
      blockOffsets = new long[numEntries];
      initialize(numEntries);
      blockDataSizes = new int[numEntries];

      // If index size is zero, no index was written.
      if (numEntries > 0) {
        for (int i = 0; i < numEntries; ++i) {
          long offset = in.readLong();
          int dataSize = in.readInt();
          byte[] key = Bytes.readByteArray(in);
          add(key, offset, dataSize);
        }
      }
    }

    private void initialize(int numEntries) {
      blockKeys = new Cell[numEntries];
    }

    private void add(final byte[] key, final long offset, final int dataSize) {
      blockOffsets[rootCount] = offset;
      // Create the blockKeys as Cells once when the reader is opened
      blockKeys[rootCount] = new KeyValue.KeyOnlyKeyValue(key, 0, key.length);
      blockDataSizes[rootCount] = dataSize;
      rootCount++;
    }

    @Override
    public Cell midkey(HFile.CachingBlockReader cachingBlockReader) throws IOException {
      if (rootCount == 0) throw new IOException("HFile empty");

      Cell targetMidKey = this.midKey.get();
      if (targetMidKey != null) {
        return targetMidKey;
      }

      if (midLeafBlockOffset >= 0) {
        if (cachingBlockReader == null) {
          throw new IOException(
            "Have to read the middle leaf block but " + "no block reader available");
        }

        // Caching, using pread, assuming this is not a compaction.
        HFileBlock midLeafBlock = cachingBlockReader.readBlock(midLeafBlockOffset,
          midLeafBlockOnDiskSize, true, true, false, true, BlockType.LEAF_INDEX, null);
        try {
          ByteBuff b = midLeafBlock.getBufferWithoutHeader();
          int numDataBlocks = b.getIntAfterPosition(0);
          int keyRelOffset = b.getIntAfterPosition(Bytes.SIZEOF_INT * (midKeyEntry + 1));
          int keyLen = b.getIntAfterPosition(Bytes.SIZEOF_INT * (midKeyEntry + 2)) - keyRelOffset
            - SECONDARY_INDEX_ENTRY_OVERHEAD;
          int keyOffset =
            Bytes.SIZEOF_INT * (numDataBlocks + 2) + keyRelOffset + SECONDARY_INDEX_ENTRY_OVERHEAD;
          byte[] bytes = b.toBytes(keyOffset, keyLen);
          targetMidKey = new KeyValue.KeyOnlyKeyValue(bytes, 0, bytes.length);
        } finally {
          midLeafBlock.release();
        }
      } else {
        // The middle of the root-level index.
        targetMidKey = blockKeys[rootCount / 2];
      }

      this.midKey.set(targetMidKey);
      return targetMidKey;
    }

    @Override
    public BlockWithScanInfo loadDataBlockWithScanInfo(Cell key, HFileBlock currentBlock,
      boolean cacheBlocks, boolean pread, boolean isCompaction,
      DataBlockEncoding expectedDataBlockEncoding, HFile.CachingBlockReader cachingBlockReader)
      throws IOException {
      int rootLevelIndex = rootBlockContainingKey(key);
      if (rootLevelIndex < 0 || rootLevelIndex >= blockOffsets.length) {
        return null;
      }

      // the next indexed key
      Cell nextIndexedKey = null;

      // Read the next-level (intermediate or leaf) index block.
      long currentOffset = blockOffsets[rootLevelIndex];
      int currentOnDiskSize = blockDataSizes[rootLevelIndex];

      if (rootLevelIndex < blockKeys.length - 1) {
        nextIndexedKey = blockKeys[rootLevelIndex + 1];
      } else {
        nextIndexedKey = KeyValueScanner.NO_NEXT_INDEXED_KEY;
      }

      int lookupLevel = 1; // How many levels deep we are in our lookup.
      int index = -1;

      HFileBlock block = null;
      KeyValue.KeyOnlyKeyValue tmpNextIndexKV = new KeyValue.KeyOnlyKeyValue();
      while (true) {
        try {
          // Must initialize it with null here, because if don't and once an exception happen in
          // readBlock, then we'll release the previous assigned block twice in the finally block.
          // (See HBASE-22422)
          block = null;
          if (currentBlock != null && currentBlock.getOffset() == currentOffset) {
            // Avoid reading the same block again, even with caching turned off.
            // This is crucial for compaction-type workload which might have
            // caching turned off. This is like a one-block cache inside the
            // scanner.
            block = currentBlock;
          } else {
            // Call HFile's caching block reader API. We always cache index
            // blocks, otherwise we might get terrible performance.
            boolean shouldCache = cacheBlocks || (lookupLevel < searchTreeLevel);
            BlockType expectedBlockType;
            if (lookupLevel < searchTreeLevel - 1) {
              expectedBlockType = BlockType.INTERMEDIATE_INDEX;
            } else if (lookupLevel == searchTreeLevel - 1) {
              expectedBlockType = BlockType.LEAF_INDEX;
            } else {
              // this also accounts for ENCODED_DATA
              expectedBlockType = BlockType.DATA;
            }
            block = cachingBlockReader.readBlock(currentOffset, currentOnDiskSize, shouldCache,
              pread, isCompaction, true, expectedBlockType, expectedDataBlockEncoding);
          }

          if (block == null) {
            throw new IOException("Failed to read block at offset " + currentOffset
              + ", onDiskSize=" + currentOnDiskSize);
          }

          // Found a data block, break the loop and check our level in the tree.
          if (block.getBlockType().isData()) {
            break;
          }

          // Not a data block. This must be a leaf-level or intermediate-level
          // index block. We don't allow going deeper than searchTreeLevel.
          if (++lookupLevel > searchTreeLevel) {
            throw new IOException("Search Tree Level overflow: lookupLevel=" + lookupLevel
              + ", searchTreeLevel=" + searchTreeLevel);
          }

          // Locate the entry corresponding to the given key in the non-root
          // (leaf or intermediate-level) index block.
          ByteBuff buffer = block.getBufferWithoutHeader();
          index = HFileBlockIndex.BlockIndexReader.locateNonRootIndexEntry(buffer, key, comparator);
          if (index == -1) {
            // This has to be changed
            // For now change this to key value
            throw new IOException("The key " + CellUtil.getCellKeyAsString(key) + " is before the"
              + " first key of the non-root index block " + block);
          }

          currentOffset = buffer.getLong();
          currentOnDiskSize = buffer.getInt();

          // Only update next indexed key if there is a next indexed key in the current level
          byte[] nonRootIndexedKey = getNonRootIndexedKey(buffer, index + 1);
          if (nonRootIndexedKey != null) {
            tmpNextIndexKV.setKey(nonRootIndexedKey, 0, nonRootIndexedKey.length);
            nextIndexedKey = tmpNextIndexKV;
          }
        } finally {
          if (block != null && !block.getBlockType().isData()) {
            // Release the block immediately if it is not the data block
            block.release();
          }
        }
      }

      if (lookupLevel != searchTreeLevel) {
        assert block.getBlockType().isData();
        // Though we have retrieved a data block we have found an issue
        // in the retrieved data block. Hence returned the block so that
        // the ref count can be decremented
        if (block != null) {
          block.release();
        }
        throw new IOException("Reached a data block at level " + lookupLevel
          + " but the number of levels is " + searchTreeLevel);
      }

      // set the next indexed key for the current block.
      return new BlockWithScanInfo(block, nextIndexedKey);
    }

    @Override
    public int rootBlockContainingKey(Cell key) {
      // Here the comparator should not be null as this happens for the root-level block
      int pos = Bytes.binarySearch(blockKeys, key, comparator);
      // pos is between -(blockKeys.length + 1) to blockKeys.length - 1, see
      // binarySearch's javadoc.

      if (pos >= 0) {
        // This means this is an exact match with an element of blockKeys.
        assert pos < blockKeys.length;
        return pos;
      }

      // Otherwise, pos = -(i + 1), where blockKeys[i - 1] < key < blockKeys[i],
      // and i is in [0, blockKeys.length]. We are returning j = i - 1 such that
      // blockKeys[j] <= key < blockKeys[j + 1]. In particular, j = -1 if
      // key < blockKeys[0], meaning the file does not contain the given key.

      int i = -pos - 1;
      assert 0 <= i && i <= blockKeys.length;
      return i - 1;
    }

    @Override
    public String toString() {
      StringBuilder sb = new StringBuilder();
      sb.append("size=" + rootCount).append("\n");
      for (int i = 0; i < rootCount; i++) {
        sb.append("key=").append((blockKeys[i])).append("\n  offset=").append(blockOffsets[i])
          .append(", dataSize=" + blockDataSizes[i]).append("\n");
      }
      return sb.toString();
    }

    /**
     * The indexed key at the ith position in the nonRootIndex. The position starts at 0.
     * @param i the ith position
     * @return The indexed key at the ith position in the nonRootIndex.
     */
    protected byte[] getNonRootIndexedKey(ByteBuff nonRootIndex, int i) {
      int numEntries = nonRootIndex.getInt(0);
      if (i < 0 || i >= numEntries) {
        return null;
      }

      // Entries start after the number of entries and the secondary index.
      // The secondary index takes numEntries + 1 ints.
      int entriesOffset = Bytes.SIZEOF_INT * (numEntries + 2);
      // Targetkey's offset relative to the end of secondary index
      int targetKeyRelOffset = nonRootIndex.getInt(Bytes.SIZEOF_INT * (i + 1));

      // The offset of the target key in the blockIndex buffer
      int targetKeyOffset = entriesOffset // Skip secondary index
        + targetKeyRelOffset // Skip all entries until mid
        + SECONDARY_INDEX_ENTRY_OVERHEAD; // Skip offset and on-disk-size

      // We subtract the two consecutive secondary index elements, which
      // gives us the size of the whole (offset, onDiskSize, key) tuple. We
      // then need to subtract the overhead of offset and onDiskSize.
      int targetKeyLength = nonRootIndex.getInt(Bytes.SIZEOF_INT * (i + 2)) - targetKeyRelOffset
        - SECONDARY_INDEX_ENTRY_OVERHEAD;

      // TODO check whether we can make BB backed Cell here? So can avoid bytes copy.
      return nonRootIndex.toBytes(targetKeyOffset, targetKeyLength);
    }
  }
}