org.apache.hadoop.hbase.io.hfile.CompoundBloomFilterWriter Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of hbase-server Show documentation
Show all versions of hbase-server Show documentation
Server functionality for HBase
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.io.hfile;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.LinkedList;
import java.util.Queue;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellComparator;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.PrivateCellUtil;
import org.apache.hadoop.hbase.KeyValueUtil;
import org.apache.yetus.audience.InterfaceAudience;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.hbase.regionserver.BloomType;
import org.apache.hadoop.hbase.util.BloomFilterChunk;
import org.apache.hadoop.hbase.util.BloomFilterUtil;
import org.apache.hadoop.hbase.util.BloomFilterWriter;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.Writable;
/**
* Adds methods required for writing a compound Bloom filter to the data
* section of an {@link org.apache.hadoop.hbase.io.hfile.HFile} to the
* {@link CompoundBloomFilter} class.
*/
@InterfaceAudience.Private
public class CompoundBloomFilterWriter extends CompoundBloomFilterBase
implements BloomFilterWriter, InlineBlockWriter {
private static final Logger LOG =
LoggerFactory.getLogger(CompoundBloomFilterWriter.class);
/** The current chunk being written to */
private BloomFilterChunk chunk;
/** Previous chunk, so that we can create another similar chunk */
private BloomFilterChunk prevChunk;
/** Maximum fold factor */
private int maxFold;
/** The size of individual Bloom filter chunks to create */
private int chunkByteSize;
/** The prev Cell that was processed */
private Cell prevCell;
/** A Bloom filter chunk enqueued for writing */
private static class ReadyChunk {
int chunkId;
byte[] firstKey;
BloomFilterChunk chunk;
}
private Queue readyChunks = new LinkedList<>();
/** The first key in the current Bloom filter chunk. */
private byte[] firstKeyInChunk = null;
private HFileBlockIndex.BlockIndexWriter bloomBlockIndexWriter =
new HFileBlockIndex.BlockIndexWriter();
/** Whether to cache-on-write compound Bloom filter chunks */
private boolean cacheOnWrite;
private BloomType bloomType;
/**
* @param chunkByteSizeHint
* each chunk's size in bytes. The real chunk size might be different
* as required by the fold factor.
* @param errorRate
* target false positive rate
* @param hashType
* hash function type to use
* @param maxFold
* maximum degree of folding allowed
* @param bloomType
* the bloom type
*/
public CompoundBloomFilterWriter(int chunkByteSizeHint, float errorRate,
int hashType, int maxFold, boolean cacheOnWrite,
CellComparator comparator, BloomType bloomType) {
chunkByteSize = BloomFilterUtil.computeFoldableByteSize(
chunkByteSizeHint * 8L, maxFold);
this.errorRate = errorRate;
this.hashType = hashType;
this.maxFold = maxFold;
this.cacheOnWrite = cacheOnWrite;
this.comparator = comparator;
this.bloomType = bloomType;
}
@Override
public boolean shouldWriteBlock(boolean closing) {
enqueueReadyChunk(closing);
return !readyChunks.isEmpty();
}
/**
* Enqueue the current chunk if it is ready to be written out.
*
* @param closing true if we are closing the file, so we do not expect new
* keys to show up
*/
private void enqueueReadyChunk(boolean closing) {
if (chunk == null ||
(chunk.getKeyCount() < chunk.getMaxKeys() && !closing)) {
return;
}
if (firstKeyInChunk == null) {
throw new NullPointerException("Trying to enqueue a chunk, " +
"but first key is null: closing=" + closing + ", keyCount=" +
chunk.getKeyCount() + ", maxKeys=" + chunk.getMaxKeys());
}
ReadyChunk readyChunk = new ReadyChunk();
readyChunk.chunkId = numChunks - 1;
readyChunk.chunk = chunk;
readyChunk.firstKey = firstKeyInChunk;
readyChunks.add(readyChunk);
long prevMaxKeys = chunk.getMaxKeys();
long prevByteSize = chunk.getByteSize();
chunk.compactBloom();
if (LOG.isTraceEnabled() && prevByteSize != chunk.getByteSize()) {
LOG.trace("Compacted Bloom chunk #" + readyChunk.chunkId + " from ["
+ prevMaxKeys + " max keys, " + prevByteSize + " bytes] to ["
+ chunk.getMaxKeys() + " max keys, " + chunk.getByteSize()
+ " bytes]");
}
totalMaxKeys += chunk.getMaxKeys();
totalByteSize += chunk.getByteSize();
firstKeyInChunk = null;
prevChunk = chunk;
chunk = null;
}
@Override
public void append(Cell cell) throws IOException {
if (cell == null)
throw new NullPointerException();
enqueueReadyChunk(false);
if (chunk == null) {
if (firstKeyInChunk != null) {
throw new IllegalStateException("First key in chunk already set: "
+ Bytes.toStringBinary(firstKeyInChunk));
}
// This will be done only once per chunk
if (bloomType == BloomType.ROW) {
firstKeyInChunk = CellUtil.copyRow(cell);
} else {
firstKeyInChunk =
PrivateCellUtil
.getCellKeySerializedAsKeyValueKey(PrivateCellUtil.createFirstOnRowCol(cell));
}
allocateNewChunk();
}
chunk.add(cell);
this.prevCell = cell;
++totalKeyCount;
}
@Override
public void beforeShipped() throws IOException {
if (this.prevCell != null) {
this.prevCell = KeyValueUtil.toNewKeyCell(this.prevCell);
}
}
@Override
public Cell getPrevCell() {
return this.prevCell;
}
private void allocateNewChunk() {
if (prevChunk == null) {
// First chunk
chunk = BloomFilterUtil.createBySize(chunkByteSize, errorRate,
hashType, maxFold, bloomType);
} else {
// Use the same parameters as the last chunk, but a new array and
// a zero key count.
chunk = prevChunk.createAnother();
}
if (chunk.getKeyCount() != 0) {
throw new IllegalStateException("keyCount=" + chunk.getKeyCount()
+ " > 0");
}
chunk.allocBloom();
++numChunks;
}
@Override
public void writeInlineBlock(DataOutput out) throws IOException {
// We don't remove the chunk from the queue here, because we might need it
// again for cache-on-write.
ReadyChunk readyChunk = readyChunks.peek();
BloomFilterChunk readyChunkBloom = readyChunk.chunk;
readyChunkBloom.writeBloom(out);
}
@Override
public void blockWritten(long offset, int onDiskSize, int uncompressedSize) {
ReadyChunk readyChunk = readyChunks.remove();
bloomBlockIndexWriter.addEntry(readyChunk.firstKey, offset, onDiskSize);
}
@Override
public BlockType getInlineBlockType() {
return BlockType.BLOOM_CHUNK;
}
private class MetaWriter implements Writable {
protected MetaWriter() {}
@Override
public void readFields(DataInput in) throws IOException {
throw new IOException("Cant read with this class.");
}
/**
* This is modeled after {@link CompoundBloomFilterWriter.MetaWriter} for simplicity,
* although the two metadata formats do not have to be consistent. This
* does have to be consistent with how {@link
* CompoundBloomFilter#CompoundBloomFilter(DataInput,
* org.apache.hadoop.hbase.io.hfile.HFile.Reader)} reads fields.
*/
@Override
public void write(DataOutput out) throws IOException {
out.writeInt(VERSION);
out.writeLong(getByteSize());
out.writeInt(prevChunk.getHashCount());
out.writeInt(prevChunk.getHashType());
out.writeLong(getKeyCount());
out.writeLong(getMaxKeys());
// Fields that don't have equivalents in ByteBloomFilter.
out.writeInt(numChunks);
if (comparator != null) {
Bytes.writeByteArray(out, Bytes.toBytes(comparator.getClass().getName()));
} else {
// Internally writes a 0 vint if the byte[] is null
Bytes.writeByteArray(out, null);
}
// Write a single-level index without compression or block header.
bloomBlockIndexWriter.writeSingleLevelIndex(out, "Bloom filter");
}
}
@Override
public void compactBloom() {
}
@Override
public Writable getMetaWriter() {
return new MetaWriter();
}
@Override
public Writable getDataWriter() {
return null;
}
@Override
public boolean getCacheOnWrite() {
return cacheOnWrite;
}
}