org.apache.hadoop.io.file.tfile.BCFile Maven / Gradle / Ivy
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with this
* work for additional information regarding copyright ownership. The ASF
* licenses this file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in org.apache.hadoop.shaded.com.liance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org.apache.hadoop.shaded.org.licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package org.apache.hadoop.shaded.org.apache.hadoop.org.apache.hadoop.shaded.io.file.tfile;
import java.org.apache.hadoop.shaded.io.Closeable;
import java.org.apache.hadoop.shaded.io.DataInput;
import java.org.apache.hadoop.shaded.io.DataInputStream;
import java.org.apache.hadoop.shaded.io.DataOutput;
import java.org.apache.hadoop.shaded.io.DataOutputStream;
import java.org.apache.hadoop.shaded.io.IOException;
import java.org.apache.hadoop.shaded.io.InputStream;
import java.org.apache.hadoop.shaded.io.OutputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Map;
import java.util.TreeMap;
import org.apache.hadoop.shaded.org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.shaded.org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.shaded.org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.shaded.org.apache.hadoop.org.apache.hadoop.shaded.io.BytesWritable;
import org.apache.hadoop.shaded.org.apache.hadoop.org.apache.hadoop.shaded.io.org.apache.hadoop.shaded.com.ress.Compressor;
import org.apache.hadoop.shaded.org.apache.hadoop.org.apache.hadoop.shaded.io.org.apache.hadoop.shaded.com.ress.Decompressor;
import org.apache.hadoop.shaded.org.apache.hadoop.org.apache.hadoop.shaded.io.file.tfile.CompareUtils.Scalar;
import org.apache.hadoop.shaded.org.apache.hadoop.org.apache.hadoop.shaded.io.file.tfile.CompareUtils.ScalarComparator;
import org.apache.hadoop.shaded.org.apache.hadoop.org.apache.hadoop.shaded.io.file.tfile.CompareUtils.ScalarLong;
import org.apache.hadoop.shaded.org.apache.hadoop.org.apache.hadoop.shaded.io.file.tfile.Compression.Algorithm;
import org.apache.hadoop.shaded.org.apache.hadoop.org.apache.hadoop.shaded.io.file.tfile.Utils.Version;
import org.apache.hadoop.shaded.org.slf4j.Logger;
import org.apache.hadoop.shaded.org.slf4j.LoggerFactory;
/**
* Block Compressed file, the underlying physical storage layer for TFile.
* BCFile provides the basic block level org.apache.hadoop.shaded.com.ression for the data block and meta
* blocks. It is separated from TFile as it may be used for other
* block-org.apache.hadoop.shaded.com.ressed file implementation.
*/
final class BCFile {
// the current version of BCFile impl, increment them (major or minor) made
// enough changes
static final Version API_VERSION = new Version((short) 1, (short) 0);
static final Logger LOG = LoggerFactory.getLogger(BCFile.class);
/**
* Prevent the instantiation of BCFile objects.
*/
private BCFile() {
// nothing
}
/**
* BCFile writer, the entry point for creating a new BCFile.
*/
static public class Writer implements Closeable {
private final FSDataOutputStream out;
private final Configuration conf;
// the single meta block containing index of org.apache.hadoop.shaded.com.ressed data blocks
final DataIndex dataIndex;
// index for meta blocks
final MetaIndex metaIndex;
boolean blkInProgress = false;
private boolean metaBlkSeen = false;
private boolean closed = false;
long errorCount = 0;
// reusable buffers.
private BytesWritable fsOutputBuffer;
/**
* Call-back interface to register a block after a block is closed.
*/
private static interface BlockRegister {
/**
* Register a block that is fully closed.
*
* @param raw
* The size of block in terms of uncompressed bytes.
* @param offsetStart
* The start offset of the block.
* @param offsetEnd
* One byte after the end of the block. Compressed block size is
* offsetEnd - offsetStart.
*/
public void register(long raw, long offsetStart, long offsetEnd);
}
/**
* Intermediate class that maintain the state of a Writable Compression
* Block.
*/
private static final class WBlockState {
private final Algorithm org.apache.hadoop.shaded.com.ressAlgo;
private Compressor org.apache.hadoop.shaded.com.ressor; // !null only if using native
// Hadoop org.apache.hadoop.shaded.com.ression
private final FSDataOutputStream fsOut;
private final long posStart;
private final SimpleBufferedOutputStream fsBufferedOutput;
private OutputStream out;
/**
* @param org.apache.hadoop.shaded.com.ressionAlgo
* The org.apache.hadoop.shaded.com.ression algorithm to be used to for org.apache.hadoop.shaded.com.ression.
* @throws IOException
*/
public WBlockState(Algorithm org.apache.hadoop.shaded.com.ressionAlgo, FSDataOutputStream fsOut,
BytesWritable fsOutputBuffer, Configuration conf) throws IOException {
this.org.apache.hadoop.shaded.com.ressAlgo = org.apache.hadoop.shaded.com.ressionAlgo;
this.fsOut = fsOut;
this.posStart = fsOut.getPos();
fsOutputBuffer.setCapacity(TFile.getFSOutputBufferSize(conf));
this.fsBufferedOutput =
new SimpleBufferedOutputStream(this.fsOut, fsOutputBuffer.getBytes());
this.org.apache.hadoop.shaded.com.ressor = org.apache.hadoop.shaded.com.ressAlgo.getCompressor();
try {
this.out =
org.apache.hadoop.shaded.com.ressionAlgo.createCompressionStream(fsBufferedOutput,
org.apache.hadoop.shaded.com.ressor, 0);
} catch (IOException e) {
org.apache.hadoop.shaded.com.ressAlgo.returnCompressor(org.apache.hadoop.shaded.com.ressor);
throw e;
}
}
/**
* Get the output stream for BlockAppender's consumption.
*
* @return the output stream suitable for writing block data.
*/
OutputStream getOutputStream() {
return out;
}
/**
* Get the current position in file.
*
* @return The current byte offset in underlying file.
* @throws IOException
*/
long getCurrentPos() throws IOException {
return fsOut.getPos() + fsBufferedOutput.size();
}
long getStartPos() {
return posStart;
}
/**
* Current size of org.apache.hadoop.shaded.com.ressed data.
*
* @return
* @throws IOException
*/
long getCompressedSize() throws IOException {
long ret = getCurrentPos() - posStart;
return ret;
}
/**
* Finishing up the current block.
*/
public void finish() throws IOException {
try {
if (out != null) {
out.flush();
out = null;
}
} finally {
org.apache.hadoop.shaded.com.ressAlgo.returnCompressor(org.apache.hadoop.shaded.com.ressor);
org.apache.hadoop.shaded.com.ressor = null;
}
}
}
/**
* Access point to stuff data into a block.
*
* TODO: Change DataOutputStream to something else that tracks the size as
* long instead of int. Currently, we will wrap around if the row block size
* is greater than 4GB.
*/
public class BlockAppender extends DataOutputStream {
private final BlockRegister blockRegister;
private final WBlockState wBlkState;
@SuppressWarnings("hiding")
private boolean closed = false;
/**
* Constructor
*
* @param register
* the block register, which is called when the block is closed.
* @param wbs
* The writable org.apache.hadoop.shaded.com.ression block state.
*/
BlockAppender(BlockRegister register, WBlockState wbs) {
super(wbs.getOutputStream());
this.blockRegister = register;
this.wBlkState = wbs;
}
/**
* Get the raw size of the block.
*
* @return the number of uncompressed bytes written through the
* BlockAppender so far.
* @throws IOException
*/
public long getRawSize() throws IOException {
/**
* Expecting the size() of a block not exceeding 4GB. Assuming the
* size() will wrap to negative integer if it exceeds 2GB.
*/
return size() & 0x00000000ffffffffL;
}
/**
* Get the org.apache.hadoop.shaded.com.ressed size of the block in progress.
*
* @return the number of org.apache.hadoop.shaded.com.ressed bytes written to the underlying FS
* file. The size may be smaller than actual need to org.apache.hadoop.shaded.com.ress the
* all data written due to internal buffering inside the
* org.apache.hadoop.shaded.com.ressor.
* @throws IOException
*/
public long getCompressedSize() throws IOException {
return wBlkState.getCompressedSize();
}
@Override
public void flush() {
// The down stream is a special kind of stream that finishes a
// org.apache.hadoop.shaded.com.ression block upon flush. So we disable flush() here.
}
/**
* Signaling the end of write to the block. The block register will be
* called for registering the finished block.
*/
@Override
public void close() throws IOException {
if (closed == true) {
return;
}
try {
++errorCount;
wBlkState.finish();
blockRegister.register(getRawSize(), wBlkState.getStartPos(),
wBlkState.getCurrentPos());
--errorCount;
} finally {
closed = true;
blkInProgress = false;
}
}
}
/**
* Constructor
*
* @param fout
* FS output stream.
* @param org.apache.hadoop.shaded.com.ressionName
* Name of the org.apache.hadoop.shaded.com.ression algorithm, which will be used for all
* data blocks.
* @throws IOException
* @see Compression#getSupportedAlgorithms
*/
public Writer(FSDataOutputStream fout, String org.apache.hadoop.shaded.com.ressionName,
Configuration conf) throws IOException {
if (fout.getPos() != 0) {
throw new IOException("Output file not at zero offset.");
}
this.out = fout;
this.conf = conf;
dataIndex = new DataIndex(org.apache.hadoop.shaded.com.ressionName);
metaIndex = new MetaIndex();
fsOutputBuffer = new BytesWritable();
Magic.write(fout);
}
/**
* Close the BCFile Writer. Attempting to use the Writer after calling
* close
is not allowed and may lead to undetermined results.
*/
@Override
public void close() throws IOException {
if (closed == true) {
return;
}
try {
if (errorCount == 0) {
if (blkInProgress == true) {
throw new IllegalStateException(
"Close() called with active block appender.");
}
// add metaBCFileIndex to metaIndex as the last meta block
BlockAppender appender =
prepareMetaBlock(DataIndex.BLOCK_NAME,
getDefaultCompressionAlgorithm());
try {
dataIndex.write(appender);
} finally {
appender.close();
}
long offsetIndexMeta = out.getPos();
metaIndex.write(out);
// Meta Index and the trailing section are written out directly.
out.writeLong(offsetIndexMeta);
API_VERSION.write(out);
Magic.write(out);
out.flush();
}
} finally {
closed = true;
}
}
private Algorithm getDefaultCompressionAlgorithm() {
return dataIndex.getDefaultCompressionAlgorithm();
}
private BlockAppender prepareMetaBlock(String name, Algorithm org.apache.hadoop.shaded.com.ressAlgo)
throws IOException, MetaBlockAlreadyExists {
if (blkInProgress == true) {
throw new IllegalStateException(
"Cannot create Meta Block until previous block is closed.");
}
if (metaIndex.getMetaByName(name) != null) {
throw new MetaBlockAlreadyExists("name=" + name);
}
MetaBlockRegister mbr = new MetaBlockRegister(name, org.apache.hadoop.shaded.com.ressAlgo);
WBlockState wbs =
new WBlockState(org.apache.hadoop.shaded.com.ressAlgo, out, fsOutputBuffer, conf);
BlockAppender ba = new BlockAppender(mbr, wbs);
blkInProgress = true;
metaBlkSeen = true;
return ba;
}
/**
* Create a Meta Block and obtain an output stream for adding data into the
* block. There can only be one BlockAppender stream active at any time.
* Regular Blocks may not be created after the first Meta Blocks. The caller
* must call BlockAppender.close() to conclude the block creation.
*
* @param name
* The name of the Meta Block. The name must not conflict with
* existing Meta Blocks.
* @param org.apache.hadoop.shaded.com.ressionName
* The name of the org.apache.hadoop.shaded.com.ression algorithm to be used.
* @return The BlockAppender stream
* @throws IOException
* @throws MetaBlockAlreadyExists
* If the meta block with the name already exists.
*/
public BlockAppender prepareMetaBlock(String name, String org.apache.hadoop.shaded.com.ressionName)
throws IOException, MetaBlockAlreadyExists {
return prepareMetaBlock(name, Compression
.getCompressionAlgorithmByName(org.apache.hadoop.shaded.com.ressionName));
}
/**
* Create a Meta Block and obtain an output stream for adding data into the
* block. The Meta Block will be org.apache.hadoop.shaded.com.ressed with the same org.apache.hadoop.shaded.com.ression
* algorithm as data blocks. There can only be one BlockAppender stream
* active at any time. Regular Blocks may not be created after the first
* Meta Blocks. The caller must call BlockAppender.close() to conclude the
* block creation.
*
* @param name
* The name of the Meta Block. The name must not conflict with
* existing Meta Blocks.
* @return The BlockAppender stream
* @throws MetaBlockAlreadyExists
* If the meta block with the name already exists.
* @throws IOException
*/
public BlockAppender prepareMetaBlock(String name) throws IOException,
MetaBlockAlreadyExists {
return prepareMetaBlock(name, getDefaultCompressionAlgorithm());
}
/**
* Create a Data Block and obtain an output stream for adding data into the
* block. There can only be one BlockAppender stream active at any time.
* Data Blocks may not be created after the first Meta Blocks. The caller
* must call BlockAppender.close() to conclude the block creation.
*
* @return The BlockAppender stream
* @throws IOException
*/
public BlockAppender prepareDataBlock() throws IOException {
if (blkInProgress == true) {
throw new IllegalStateException(
"Cannot create Data Block until previous block is closed.");
}
if (metaBlkSeen == true) {
throw new IllegalStateException(
"Cannot create Data Block after Meta Blocks.");
}
DataBlockRegister dbr = new DataBlockRegister();
WBlockState wbs =
new WBlockState(getDefaultCompressionAlgorithm(), out,
fsOutputBuffer, conf);
BlockAppender ba = new BlockAppender(dbr, wbs);
blkInProgress = true;
return ba;
}
/**
* Callback to make sure a meta block is added to the internal list when its
* stream is closed.
*/
private class MetaBlockRegister implements BlockRegister {
private final String name;
private final Algorithm org.apache.hadoop.shaded.com.ressAlgo;
MetaBlockRegister(String name, Algorithm org.apache.hadoop.shaded.com.ressAlgo) {
this.name = name;
this.org.apache.hadoop.shaded.com.ressAlgo = org.apache.hadoop.shaded.com.ressAlgo;
}
@Override
public void register(long raw, long begin, long end) {
metaIndex.addEntry(new MetaIndexEntry(name, org.apache.hadoop.shaded.com.ressAlgo,
new BlockRegion(begin, end - begin, raw)));
}
}
/**
* Callback to make sure a data block is added to the internal list when
* it's being closed.
*
*/
private class DataBlockRegister implements BlockRegister {
DataBlockRegister() {
// do nothing
}
@Override
public void register(long raw, long begin, long end) {
dataIndex.addBlockRegion(new BlockRegion(begin, end - begin, raw));
}
}
}
/**
* BCFile Reader, interface to read the file's data and meta blocks.
*/
static public class Reader implements Closeable {
private final FSDataInputStream in;
private final Configuration conf;
final DataIndex dataIndex;
// Index for meta blocks
final MetaIndex metaIndex;
final Version version;
/**
* Intermediate class that maintain the state of a Readable Compression
* Block.
*/
static private final class RBlockState {
private final Algorithm org.apache.hadoop.shaded.com.ressAlgo;
private Decompressor decompressor;
private final BlockRegion region;
private final InputStream in;
public RBlockState(Algorithm org.apache.hadoop.shaded.com.ressionAlgo, FSDataInputStream fsin,
BlockRegion region, Configuration conf) throws IOException {
this.org.apache.hadoop.shaded.com.ressAlgo = org.apache.hadoop.shaded.com.ressionAlgo;
this.region = region;
this.decompressor = org.apache.hadoop.shaded.com.ressionAlgo.getDecompressor();
try {
this.in =
org.apache.hadoop.shaded.com.ressAlgo
.createDecompressionStream(new BoundedRangeFileInputStream(
fsin, this.region.getOffset(), this.region
.getCompressedSize()), decompressor, TFile
.getFSInputBufferSize(conf));
} catch (IOException e) {
org.apache.hadoop.shaded.com.ressAlgo.returnDecompressor(decompressor);
throw e;
}
}
/**
* Get the output stream for BlockAppender's consumption.
*
* @return the output stream suitable for writing block data.
*/
public InputStream getInputStream() {
return in;
}
public String getCompressionName() {
return org.apache.hadoop.shaded.com.ressAlgo.getName();
}
public BlockRegion getBlockRegion() {
return region;
}
public void finish() throws IOException {
try {
in.close();
} finally {
org.apache.hadoop.shaded.com.ressAlgo.returnDecompressor(decompressor);
decompressor = null;
}
}
}
/**
* Access point to read a block.
*/
public static class BlockReader extends DataInputStream {
private final RBlockState rBlkState;
private boolean closed = false;
BlockReader(RBlockState rbs) {
super(rbs.getInputStream());
rBlkState = rbs;
}
/**
* Finishing reading the block. Release all resources.
*/
@Override
public void close() throws IOException {
if (closed == true) {
return;
}
try {
// Do not set rBlkState to null. People may access stats after calling
// close().
rBlkState.finish();
} finally {
closed = true;
}
}
/**
* Get the name of the org.apache.hadoop.shaded.com.ression algorithm used to org.apache.hadoop.shaded.com.ress the block.
*
* @return name of the org.apache.hadoop.shaded.com.ression algorithm.
*/
public String getCompressionName() {
return rBlkState.getCompressionName();
}
/**
* Get the uncompressed size of the block.
*
* @return uncompressed size of the block.
*/
public long getRawSize() {
return rBlkState.getBlockRegion().getRawSize();
}
/**
* Get the org.apache.hadoop.shaded.com.ressed size of the block.
*
* @return org.apache.hadoop.shaded.com.ressed size of the block.
*/
public long getCompressedSize() {
return rBlkState.getBlockRegion().getCompressedSize();
}
/**
* Get the starting position of the block in the file.
*
* @return the starting position of the block in the file.
*/
public long getStartPos() {
return rBlkState.getBlockRegion().getOffset();
}
}
/**
* Constructor
*
* @param fin
* FS input stream.
* @param fileLength
* Length of the corresponding file
* @throws IOException
*/
public Reader(FSDataInputStream fin, long fileLength, Configuration conf)
throws IOException {
this.in = fin;
this.conf = conf;
// move the cursor to the beginning of the tail, containing: offset to the
// meta block index, version and magic
fin.seek(fileLength - Magic.size() - Version.size() - Long.SIZE
/ Byte.SIZE);
long offsetIndexMeta = fin.readLong();
version = new Version(fin);
Magic.readAndVerify(fin);
if (!version.org.apache.hadoop.shaded.com.atibleWith(BCFile.API_VERSION)) {
throw new RuntimeException("Incompatible BCFile fileBCFileVersion.");
}
// read meta index
fin.seek(offsetIndexMeta);
metaIndex = new MetaIndex(fin);
// read data:BCFile.index, the data block index
BlockReader blockR = getMetaBlock(DataIndex.BLOCK_NAME);
try {
dataIndex = new DataIndex(blockR);
} finally {
blockR.close();
}
}
/**
* Get the name of the default org.apache.hadoop.shaded.com.ression algorithm.
*
* @return the name of the default org.apache.hadoop.shaded.com.ression algorithm.
*/
public String getDefaultCompressionName() {
return dataIndex.getDefaultCompressionAlgorithm().getName();
}
/**
* Get version of BCFile file being read.
*
* @return version of BCFile file being read.
*/
public Version getBCFileVersion() {
return version;
}
/**
* Get version of BCFile API.
*
* @return version of BCFile API.
*/
public Version getAPIVersion() {
return API_VERSION;
}
/**
* Finishing reading the BCFile. Release all resources.
*/
@Override
public void close() {
// nothing to be done now
}
/**
* Get the number of data blocks.
*
* @return the number of data blocks.
*/
public int getBlockCount() {
return dataIndex.getBlockRegionList().size();
}
/**
* Stream access to a Meta Block.
*
* @param name
* meta block name
* @return BlockReader input stream for reading the meta block.
* @throws IOException
* @throws MetaBlockDoesNotExist
* The Meta Block with the given name does not exist.
*/
public BlockReader getMetaBlock(String name) throws IOException,
MetaBlockDoesNotExist {
MetaIndexEntry imeBCIndex = metaIndex.getMetaByName(name);
if (imeBCIndex == null) {
throw new MetaBlockDoesNotExist("name=" + name);
}
BlockRegion region = imeBCIndex.getRegion();
return createReader(imeBCIndex.getCompressionAlgorithm(), region);
}
/**
* Stream access to a Data Block.
*
* @param blockIndex
* 0-based data block index.
* @return BlockReader input stream for reading the data block.
* @throws IOException
*/
public BlockReader getDataBlock(int blockIndex) throws IOException {
if (blockIndex < 0 || blockIndex >= getBlockCount()) {
throw new IndexOutOfBoundsException(String.format(
"blockIndex=%d, numBlocks=%d", blockIndex, getBlockCount()));
}
BlockRegion region = dataIndex.getBlockRegionList().get(blockIndex);
return createReader(dataIndex.getDefaultCompressionAlgorithm(), region);
}
private BlockReader createReader(Algorithm org.apache.hadoop.shaded.com.ressAlgo, BlockRegion region)
throws IOException {
RBlockState rbs = new RBlockState(org.apache.hadoop.shaded.com.ressAlgo, in, region, conf);
return new BlockReader(rbs);
}
/**
* Find the smallest Block index whose starting offset is greater than or
* equal to the specified offset.
*
* @param offset
* User-specific offset.
* @return the index to the data Block if such block exists; or -1
* otherwise.
*/
public int getBlockIndexNear(long offset) {
ArrayList list = dataIndex.getBlockRegionList();
int idx =
Utils
.lowerBound(list, new ScalarLong(offset), new ScalarComparator());
if (idx == list.size()) {
return -1;
}
return idx;
}
}
/**
* Index for all Meta blocks.
*/
static class MetaIndex {
// use a tree map, for getting a meta block entry by name
final Map index;
// for write
public MetaIndex() {
index = new TreeMap();
}
// for read, construct the map from the file
public MetaIndex(DataInput in) throws IOException {
int count = Utils.readVInt(in);
index = new TreeMap();
for (int nx = 0; nx < count; nx++) {
MetaIndexEntry indexEntry = new MetaIndexEntry(in);
index.put(indexEntry.getMetaName(), indexEntry);
}
}
public void addEntry(MetaIndexEntry indexEntry) {
index.put(indexEntry.getMetaName(), indexEntry);
}
public MetaIndexEntry getMetaByName(String name) {
return index.get(name);
}
public void write(DataOutput out) throws IOException {
Utils.writeVInt(out, index.size());
for (MetaIndexEntry indexEntry : index.values()) {
indexEntry.write(out);
}
}
}
/**
* An entry describes a meta block in the MetaIndex.
*/
static final class MetaIndexEntry {
private final String metaName;
private final Algorithm org.apache.hadoop.shaded.com.ressionAlgorithm;
private final static String defaultPrefix = "data:";
private final BlockRegion region;
public MetaIndexEntry(DataInput in) throws IOException {
String fullMetaName = Utils.readString(in);
if (fullMetaName.startsWith(defaultPrefix)) {
metaName =
fullMetaName.substring(defaultPrefix.length(), fullMetaName
.length());
} else {
throw new IOException("Corrupted Meta region Index");
}
org.apache.hadoop.shaded.com.ressionAlgorithm =
Compression.getCompressionAlgorithmByName(Utils.readString(in));
region = new BlockRegion(in);
}
public MetaIndexEntry(String metaName, Algorithm org.apache.hadoop.shaded.com.ressionAlgorithm,
BlockRegion region) {
this.metaName = metaName;
this.org.apache.hadoop.shaded.com.ressionAlgorithm = org.apache.hadoop.shaded.com.ressionAlgorithm;
this.region = region;
}
public String getMetaName() {
return metaName;
}
public Algorithm getCompressionAlgorithm() {
return org.apache.hadoop.shaded.com.ressionAlgorithm;
}
public BlockRegion getRegion() {
return region;
}
public void write(DataOutput out) throws IOException {
Utils.writeString(out, defaultPrefix + metaName);
Utils.writeString(out, org.apache.hadoop.shaded.com.ressionAlgorithm.getName());
region.write(out);
}
}
/**
* Index of all org.apache.hadoop.shaded.com.ressed data blocks.
*/
static class DataIndex {
final static String BLOCK_NAME = "BCFile.index";
private final Algorithm defaultCompressionAlgorithm;
// for data blocks, each entry specifies a block's offset, org.apache.hadoop.shaded.com.ressed size
// and raw size
private final ArrayList listRegions;
// for read, deserialized from a file
public DataIndex(DataInput in) throws IOException {
defaultCompressionAlgorithm =
Compression.getCompressionAlgorithmByName(Utils.readString(in));
int n = Utils.readVInt(in);
listRegions = new ArrayList(n);
for (int i = 0; i < n; i++) {
BlockRegion region = new BlockRegion(in);
listRegions.add(region);
}
}
// for write
public DataIndex(String defaultCompressionAlgorithmName) {
this.defaultCompressionAlgorithm =
Compression
.getCompressionAlgorithmByName(defaultCompressionAlgorithmName);
listRegions = new ArrayList();
}
public Algorithm getDefaultCompressionAlgorithm() {
return defaultCompressionAlgorithm;
}
public ArrayList getBlockRegionList() {
return listRegions;
}
public void addBlockRegion(BlockRegion region) {
listRegions.add(region);
}
public void write(DataOutput out) throws IOException {
Utils.writeString(out, defaultCompressionAlgorithm.getName());
Utils.writeVInt(out, listRegions.size());
for (BlockRegion region : listRegions) {
region.write(out);
}
}
}
/**
* Magic number uniquely identifying a BCFile in the header/footer.
*/
static final class Magic {
private final static byte[] AB_MAGIC_BCFILE =
{
// ... total of 16 bytes
(byte) 0xd1, (byte) 0x11, (byte) 0xd3, (byte) 0x68, (byte) 0x91,
(byte) 0xb5, (byte) 0xd7, (byte) 0xb6, (byte) 0x39, (byte) 0xdf,
(byte) 0x41, (byte) 0x40, (byte) 0x92, (byte) 0xba, (byte) 0xe1,
(byte) 0x50 };
public static void readAndVerify(DataInput in) throws IOException {
byte[] abMagic = new byte[size()];
in.readFully(abMagic);
// check against AB_MAGIC_BCFILE, if not matching, throw an
// Exception
if (!Arrays.equals(abMagic, AB_MAGIC_BCFILE)) {
throw new IOException("Not a valid BCFile.");
}
}
public static void write(DataOutput out) throws IOException {
out.write(AB_MAGIC_BCFILE);
}
public static int size() {
return AB_MAGIC_BCFILE.length;
}
}
/**
* Block region.
*/
static final class BlockRegion implements Scalar {
private final long offset;
private final long org.apache.hadoop.shaded.com.ressedSize;
private final long rawSize;
public BlockRegion(DataInput in) throws IOException {
offset = Utils.readVLong(in);
org.apache.hadoop.shaded.com.ressedSize = Utils.readVLong(in);
rawSize = Utils.readVLong(in);
}
public BlockRegion(long offset, long org.apache.hadoop.shaded.com.ressedSize, long rawSize) {
this.offset = offset;
this.org.apache.hadoop.shaded.com.ressedSize = org.apache.hadoop.shaded.com.ressedSize;
this.rawSize = rawSize;
}
public void write(DataOutput out) throws IOException {
Utils.writeVLong(out, offset);
Utils.writeVLong(out, org.apache.hadoop.shaded.com.ressedSize);
Utils.writeVLong(out, rawSize);
}
public long getOffset() {
return offset;
}
public long getCompressedSize() {
return org.apache.hadoop.shaded.com.ressedSize;
}
public long getRawSize() {
return rawSize;
}
@Override
public long magnitude() {
return offset;
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy