org.apache.hadoop.hbase.regionserver.wal.WALCellCodec Maven / Gradle / Ivy
The newest version!
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.regionserver.wal;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.HBaseInterfaceAudience;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.KeyValueUtil;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.hbase.codec.BaseDecoder;
import org.apache.hadoop.hbase.codec.BaseEncoder;
import org.apache.hadoop.hbase.codec.Codec;
import org.apache.hadoop.hbase.codec.KeyValueCodecWithTags;
import org.apache.hadoop.hbase.io.util.Dictionary;
import org.apache.hadoop.hbase.io.util.StreamUtils;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.ReflectionUtils;
import org.apache.hadoop.io.IOUtils;
import com.google.protobuf.ByteString;
/**
* Compression in this class is lifted off Compressor/KeyValueCompression.
* This is a pure coincidence... they are independent and don't have to be compatible.
*
* This codec is used at server side for writing cells to WAL as well as for sending edits
* as part of the distributed splitting process.
*/
@InterfaceAudience.LimitedPrivate({HBaseInterfaceAudience.COPROC, HBaseInterfaceAudience.PHOENIX, HBaseInterfaceAudience.CONFIG})
public class WALCellCodec implements Codec {
/** Configuration key for the class to use when encoding cells in the WAL */
public static final String WAL_CELL_CODEC_CLASS_KEY = "hbase.regionserver.wal.codec";
protected final CompressionContext compression;
protected final ByteStringUncompressor statelessUncompressor = new ByteStringUncompressor() {
@Override
public byte[] uncompress(ByteString data, Dictionary dict) throws IOException {
return WALCellCodec.uncompressByteString(data, dict);
}
};
/**
* All subclasses must implement a no argument constructor
*/
public WALCellCodec() {
this.compression = null;
}
/**
* Default constructor - all subclasses must implement a constructor with this signature
* if they are to be dynamically loaded from the {@link Configuration}.
* @param conf configuration to configure this
* @param compression compression the codec should support, can be null to indicate no
* compression
*/
public WALCellCodec(Configuration conf, CompressionContext compression) {
this.compression = compression;
}
public static Class> getWALCellCodecClass(Configuration conf) {
return conf.getClass(WAL_CELL_CODEC_CLASS_KEY, WALCellCodec.class);
}
/**
* Create and setup a {@link WALCellCodec} from the {@code cellCodecClsName} and
* CompressionContext, if {@code cellCodecClsName} is specified.
* Otherwise Cell Codec classname is read from {@link Configuration}.
* Fully prepares the codec for use.
* @param conf {@link Configuration} to read for the user-specified codec. If none is specified,
* uses a {@link WALCellCodec}.
* @param compression compression the codec should use
* @return a {@link WALCellCodec} ready for use.
* @throws UnsupportedOperationException if the codec cannot be instantiated
*/
public static WALCellCodec create(Configuration conf, String cellCodecClsName,
CompressionContext compression) throws UnsupportedOperationException {
if (cellCodecClsName == null) {
cellCodecClsName = getWALCellCodecClass(conf).getName();
}
return ReflectionUtils.instantiateWithCustomCtor(cellCodecClsName, new Class[]
{ Configuration.class, CompressionContext.class }, new Object[] { conf, compression });
}
/**
* Create and setup a {@link WALCellCodec} from the
* CompressionContext.
* Cell Codec classname is read from {@link Configuration}.
* Fully prepares the codec for use.
* @param conf {@link Configuration} to read for the user-specified codec. If none is specified,
* uses a {@link WALCellCodec}.
* @param compression compression the codec should use
* @return a {@link WALCellCodec} ready for use.
* @throws UnsupportedOperationException if the codec cannot be instantiated
*/
public static WALCellCodec create(Configuration conf,
CompressionContext compression) throws UnsupportedOperationException {
String cellCodecClsName = getWALCellCodecClass(conf).getName();
return ReflectionUtils.instantiateWithCustomCtor(cellCodecClsName, new Class[]
{ Configuration.class, CompressionContext.class }, new Object[] { conf, compression });
}
public interface ByteStringCompressor {
ByteString compress(byte[] data, Dictionary dict) throws IOException;
}
public interface ByteStringUncompressor {
byte[] uncompress(ByteString data, Dictionary dict) throws IOException;
}
// TODO: it sucks that compression context is in WAL.Entry. It'd be nice if it was here.
// Dictionary could be gotten by enum; initially, based on enum, context would create
// an array of dictionaries.
static class BaosAndCompressor extends ByteArrayOutputStream implements ByteStringCompressor {
public ByteString toByteString() {
return ByteString.copyFrom(this.buf, 0, this.count);
}
@Override
public ByteString compress(byte[] data, Dictionary dict) throws IOException {
writeCompressed(data, dict);
ByteString result = ByteString.copyFrom(this.buf, 0, this.count);
reset(); // Only resets the count - we reuse the byte array.
return result;
}
private void writeCompressed(byte[] data, Dictionary dict) throws IOException {
assert dict != null;
short dictIdx = dict.findEntry(data, 0, data.length);
if (dictIdx == Dictionary.NOT_IN_DICTIONARY) {
write(Dictionary.NOT_IN_DICTIONARY);
StreamUtils.writeRawVInt32(this, data.length);
write(data, 0, data.length);
} else {
StreamUtils.writeShort(this, dictIdx);
}
}
}
private static byte[] uncompressByteString(ByteString bs, Dictionary dict) throws IOException {
InputStream in = bs.newInput();
byte status = (byte)in.read();
if (status == Dictionary.NOT_IN_DICTIONARY) {
byte[] arr = new byte[StreamUtils.readRawVarint32(in)];
int bytesRead = in.read(arr);
if (bytesRead != arr.length) {
throw new IOException("Cannot read; wanted " + arr.length + ", but got " + bytesRead);
}
if (dict != null) dict.addEntry(arr, 0, arr.length);
return arr;
} else {
// Status here is the higher-order byte of index of the dictionary entry.
short dictIdx = StreamUtils.toShort(status, (byte)in.read());
byte[] entry = dict.getEntry(dictIdx);
if (entry == null) {
throw new IOException("Missing dictionary entry for index " + dictIdx);
}
return entry;
}
}
static class CompressedKvEncoder extends BaseEncoder {
private final CompressionContext compression;
public CompressedKvEncoder(OutputStream out, CompressionContext compression) {
super(out);
this.compression = compression;
}
@Override
public void write(Cell cell) throws IOException {
// We first write the KeyValue infrastructure as VInts.
StreamUtils.writeRawVInt32(out, KeyValueUtil.keyLength(cell));
StreamUtils.writeRawVInt32(out, cell.getValueLength());
// To support tags
int tagsLength = cell.getTagsLength();
StreamUtils.writeRawVInt32(out, tagsLength);
// Write row, qualifier, and family; use dictionary
// compression as they're likely to have duplicates.
write(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength(), compression.rowDict);
write(cell.getFamilyArray(), cell.getFamilyOffset(), cell.getFamilyLength(),
compression.familyDict);
write(cell.getQualifierArray(), cell.getQualifierOffset(), cell.getQualifierLength(),
compression.qualifierDict);
// Write timestamp, type and value as uncompressed.
StreamUtils.writeLong(out, cell.getTimestamp());
out.write(cell.getTypeByte());
out.write(cell.getValueArray(), cell.getValueOffset(), cell.getValueLength());
if (tagsLength > 0) {
if (compression.tagCompressionContext != null) {
// Write tags using Dictionary compression
compression.tagCompressionContext.compressTags(out, cell.getTagsArray(),
cell.getTagsOffset(), tagsLength);
} else {
// Tag compression is disabled within the WAL compression. Just write the tags bytes as
// it is.
out.write(cell.getTagsArray(), cell.getTagsOffset(), tagsLength);
}
}
}
private void write(byte[] data, int offset, int length, Dictionary dict) throws IOException {
short dictIdx = Dictionary.NOT_IN_DICTIONARY;
if (dict != null) {
dictIdx = dict.findEntry(data, offset, length);
}
if (dictIdx == Dictionary.NOT_IN_DICTIONARY) {
out.write(Dictionary.NOT_IN_DICTIONARY);
StreamUtils.writeRawVInt32(out, length);
out.write(data, offset, length);
} else {
StreamUtils.writeShort(out, dictIdx);
}
}
}
static class CompressedKvDecoder extends BaseDecoder {
private final CompressionContext compression;
public CompressedKvDecoder(InputStream in, CompressionContext compression) {
super(in);
this.compression = compression;
}
@Override
protected Cell parseCell() throws IOException {
int keylength = StreamUtils.readRawVarint32(in);
int vlength = StreamUtils.readRawVarint32(in);
int tagsLength = StreamUtils.readRawVarint32(in);
int length = 0;
if(tagsLength == 0) {
length = KeyValue.KEYVALUE_INFRASTRUCTURE_SIZE + keylength + vlength;
} else {
length = KeyValue.KEYVALUE_WITH_TAGS_INFRASTRUCTURE_SIZE + keylength + vlength + tagsLength;
}
byte[] backingArray = new byte[length];
int pos = 0;
pos = Bytes.putInt(backingArray, pos, keylength);
pos = Bytes.putInt(backingArray, pos, vlength);
// the row
int elemLen = readIntoArray(backingArray, pos + Bytes.SIZEOF_SHORT, compression.rowDict);
checkLength(elemLen, Short.MAX_VALUE);
pos = Bytes.putShort(backingArray, pos, (short)elemLen);
pos += elemLen;
// family
elemLen = readIntoArray(backingArray, pos + Bytes.SIZEOF_BYTE, compression.familyDict);
checkLength(elemLen, Byte.MAX_VALUE);
pos = Bytes.putByte(backingArray, pos, (byte)elemLen);
pos += elemLen;
// qualifier
elemLen = readIntoArray(backingArray, pos, compression.qualifierDict);
pos += elemLen;
// timestamp, type and value
int tsTypeValLen = length - pos;
if (tagsLength > 0) {
tsTypeValLen = tsTypeValLen - tagsLength - KeyValue.TAGS_LENGTH_SIZE;
}
IOUtils.readFully(in, backingArray, pos, tsTypeValLen);
pos += tsTypeValLen;
// tags
if (tagsLength > 0) {
pos = Bytes.putAsShort(backingArray, pos, tagsLength);
if (compression.tagCompressionContext != null) {
compression.tagCompressionContext.uncompressTags(in, backingArray, pos, tagsLength);
} else {
IOUtils.readFully(in, backingArray, pos, tagsLength);
}
}
return new KeyValue(backingArray, 0, length);
}
private int readIntoArray(byte[] to, int offset, Dictionary dict) throws IOException {
byte status = (byte)in.read();
if (status == Dictionary.NOT_IN_DICTIONARY) {
// status byte indicating that data to be read is not in dictionary.
// if this isn't in the dictionary, we need to add to the dictionary.
int length = StreamUtils.readRawVarint32(in);
IOUtils.readFully(in, to, offset, length);
dict.addEntry(to, offset, length);
return length;
} else {
// the status byte also acts as the higher order byte of the dictionary entry.
short dictIdx = StreamUtils.toShort(status, (byte)in.read());
byte[] entry = dict.getEntry(dictIdx);
if (entry == null) {
throw new IOException("Missing dictionary entry for index " + dictIdx);
}
// now we write the uncompressed value.
Bytes.putBytes(to, offset, entry, 0, entry.length);
return entry.length;
}
}
private static void checkLength(int len, int max) throws IOException {
if (len < 0 || len > max) {
throw new IOException("Invalid length for compresesed portion of keyvalue: " + len);
}
}
}
public static class EnsureKvEncoder extends BaseEncoder {
public EnsureKvEncoder(OutputStream out) {
super(out);
}
@Override
public void write(Cell cell) throws IOException {
checkFlushed();
// Make sure to write tags into WAL
KeyValueUtil.oswrite(cell, this.out, true);
}
}
@Override
public Decoder getDecoder(InputStream is) {
return (compression == null)
? new KeyValueCodecWithTags.KeyValueDecoder(is) : new CompressedKvDecoder(is, compression);
}
@Override
public Encoder getEncoder(OutputStream os) {
return (compression == null)
? new EnsureKvEncoder(os) : new CompressedKvEncoder(os, compression);
}
public ByteStringCompressor getByteStringCompressor() {
// TODO: ideally this should also encapsulate compressionContext
return new BaosAndCompressor();
}
public ByteStringUncompressor getByteStringUncompressor() {
// TODO: ideally this should also encapsulate compressionContext
return this.statelessUncompressor;
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy