![JAR search and dependency download from the Maven repository](/logo.png)
tachyon.hadoop.HdfsFileInputStream Maven / Gradle / Ivy
/*
* Licensed to the University of California, Berkeley under one or more contributor license
* agreements. See the NOTICE file distributed with this work for additional information regarding
* copyright ownership. The ASF licenses this file to You under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance with the License. You may obtain a
* copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License
* is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
* or implied. See the License for the specific language governing permissions and limitations under
* the License.
*/
package tachyon.hadoop;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileSystem.Statistics;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PositionedReadable;
import org.apache.hadoop.fs.Seekable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.primitives.Ints;
import tachyon.Constants;
import tachyon.client.ReadType;
import tachyon.client.TachyonFS;
import tachyon.client.TachyonFile;
import tachyon.client.file.FileInStream;
import tachyon.conf.TachyonConf;
import tachyon.util.io.BufferUtils;
public class HdfsFileInputStream extends InputStream implements Seekable, PositionedReadable {
private static final Logger LOG = LoggerFactory.getLogger(Constants.LOGGER_TYPE);
private long mCurrentPosition;
private TachyonFS mTFS;
private long mFileId;
private Path mHdfsPath;
private Configuration mHadoopConf;
private int mHadoopBufferSize;
private Statistics mStatistics;
private TachyonFile mTachyonFile;
private FSDataInputStream mHdfsInputStream = null;
private FileInStream mTachyonFileInputStream = null;
private boolean mClosed = false;
private int mBufferLimit = 0;
private int mBufferPosition = 0;
private byte[] mBuffer;
private final TachyonConf mTachyonConf;
/**
* @param tfs the TachyonFS
* @param fileId the file id
* @param hdfsPath the HDFS path
* @param conf Hadoop configuration
* @param bufferSize the buffer size
* @param stats filesystem statistics
* @param tachyonConf Tachyon configuration
* @throws IOException if the underlying file does not exist or its stream cannot be created
*/
public HdfsFileInputStream(TachyonFS tfs, long fileId, Path hdfsPath, Configuration conf,
int bufferSize, FileSystem.Statistics stats, TachyonConf tachyonConf) throws IOException {
LOG.debug("HdfsFileInputStream({}, {}, {}, {}, {}, {})", tfs, fileId, hdfsPath, conf,
bufferSize, stats);
mTachyonConf = tachyonConf;
long bufferBytes = mTachyonConf.getBytes(Constants.USER_FILE_BUFFER_BYTES);
mBuffer = new byte[Ints.checkedCast(bufferBytes) * 4];
mCurrentPosition = 0;
mTFS = tfs;
mFileId = fileId;
mHdfsPath = hdfsPath;
mHadoopConf = conf;
mHadoopBufferSize = bufferSize;
mStatistics = stats;
mTachyonFile = mTFS.getFile(mFileId);
if (mTachyonFile == null) {
throw new FileNotFoundException("File " + hdfsPath + " with FID " + fileId
+ " is not found.");
}
mTachyonFile.setUFSConf(mHadoopConf);
mTachyonFileInputStream = mTachyonFile.getInStream(ReadType.CACHE);
}
/**
* This method is not supported in HdfsFileInputStream
.
*
* @return N/A
* @throws IOException always
*/
@Override
public int available() throws IOException {
throw new IOException("Not supported");
}
@Override
public void close() throws IOException {
if (mTachyonFileInputStream != null) {
mTachyonFileInputStream.close();
}
if (mHdfsInputStream != null) {
mHdfsInputStream.close();
}
mClosed = true;
}
/**
* Sets mHdfsInputStream to a stream from the under storage system with the stream starting at
* mCurrentPosition.
*
* @throws IOException if opening the file fails
*/
// TODO(calvin): Consider removing this when the recovery logic is available in FileInStream
private void getHdfsInputStream() throws IOException {
if (mHdfsInputStream == null) {
FileSystem fs = mHdfsPath.getFileSystem(mHadoopConf);
mHdfsInputStream = fs.open(mHdfsPath, mHadoopBufferSize);
mHdfsInputStream.seek(mCurrentPosition);
}
}
/**
* Sets mHdfsInputStream to a stream from the under storage system with the stream starting at
* position. The mCurrentPosition is not modified to be position.
*
* @throws IOException if opening the file fails
*/
private void getHdfsInputStream(long position) throws IOException {
if (mHdfsInputStream == null) {
FileSystem fs = mHdfsPath.getFileSystem(mHadoopConf);
mHdfsInputStream = fs.open(mHdfsPath, mHadoopBufferSize);
}
mHdfsInputStream.seek(position);
}
@Override
public long getPos() throws IOException {
return mCurrentPosition;
}
@Override
public int read() throws IOException {
if (mClosed) {
throw new IOException("Cannot read from a closed stream.");
}
if (mTachyonFileInputStream != null) {
int ret = 0;
try {
ret = mTachyonFileInputStream.read();
if (mStatistics != null && ret != -1) {
mStatistics.incrementBytesRead(1);
}
mCurrentPosition ++;
return ret;
} catch (IOException e) {
LOG.error(e.getMessage(), e);
mTachyonFileInputStream.close();
mTachyonFileInputStream = null;
}
}
getHdfsInputStream();
return readFromHdfsBuffer();
}
@Override
public int read(byte[] b) throws IOException {
throw new IOException("Not supported");
}
@Override
public int read(byte[] b, int off, int len) throws IOException {
if (mClosed) {
throw new IOException("Cannot read from a closed stream.");
}
if (mTachyonFileInputStream != null) {
int ret = 0;
try {
ret = mTachyonFileInputStream.read(b, off, len);
if (mStatistics != null && ret != -1) {
mStatistics.incrementBytesRead(ret);
}
mCurrentPosition += ret;
return ret;
} catch (IOException e) {
LOG.error(e.getMessage(), e);
mTachyonFileInputStream.close();
mTachyonFileInputStream = null;
}
}
getHdfsInputStream();
int byteRead = readFromHdfsBuffer();
// byteRead is an unsigned byte, if its -1 then we have hit EOF
if (byteRead == -1) {
return -1;
}
// Convert byteRead back to a signed byte
b[off] = (byte) byteRead;
return 1;
}
@Override
public synchronized int read(long position, byte[] buffer, int offset, int length)
throws IOException {
if (mClosed) {
throw new IOException("Cannot read from a closed stream.");
}
int ret = -1;
long oldPos = getPos();
if ((position < 0) || (position >= mTachyonFile.length())) {
return ret;
}
if (mTachyonFileInputStream != null) {
try {
mTachyonFileInputStream.seek(position);
ret = mTachyonFileInputStream.read(buffer, offset, length);
if (mStatistics != null && ret != -1) {
mStatistics.incrementBytesRead(ret);
}
return ret;
} finally {
mTachyonFileInputStream.seek(oldPos);
}
}
try {
getHdfsInputStream(position);
ret = mHdfsInputStream.read(buffer, offset, length);
if (mStatistics != null && ret != -1) {
mStatistics.incrementBytesRead(ret);
}
return ret;
} finally {
if (mHdfsInputStream != null) {
mHdfsInputStream.seek(oldPos);
}
}
}
/**
* Similar to read(), returns a single unsigned byte from the hdfs buffer, or -1 if there is no
* more data to be read. This method also fills the hdfs buffer with new data if it is empty.
*
* @return the next value in the stream from 0 to 255 or -1 if there is no more data to be read
* @throws IOException if the bulk read from hdfs fails.
*/
private int readFromHdfsBuffer() throws IOException {
if (mBufferPosition < mBufferLimit) {
if (mStatistics != null) {
mStatistics.incrementBytesRead(1);
}
mCurrentPosition ++;
return BufferUtils.byteToInt(mBuffer[mBufferPosition ++]);
}
LOG.error("Reading from HDFS directly");
while ((mBufferLimit = mHdfsInputStream.read(mBuffer)) == 0) {
LOG.error("Read 0 bytes in readFromHdfsBuffer for " + mHdfsPath);
}
if (mBufferLimit == -1) {
return -1;
}
mBufferPosition = 0;
if (mStatistics != null) {
mStatistics.incrementBytesRead(1);
}
mCurrentPosition ++;
return BufferUtils.byteToInt(mBuffer[mBufferPosition ++]);
}
/**
* This method is not supported in HdfsFileInputStream
.
*
* @throws IOException always
*/
@Override
public void readFully(long position, byte[] buffer) throws IOException {
throw new IOException("Not supported");
}
/**
* This method is not supported in HdfsFileInputStream
.
*
* @throws IOException always
*/
@Override
public void readFully(long position, byte[] buffer, int offset, int length) throws IOException {
throw new IOException("Not supported");
}
@Override
public void seek(long pos) throws IOException {
if (pos == mCurrentPosition) {
return;
}
if (pos < 0) {
throw new IOException("Seek position is negative: " + pos);
}
if (pos > mTachyonFile.length()) {
throw new IOException("Seek position is past EOF: " + pos + ", fileSize = "
+ mTachyonFile.length());
}
if (mTachyonFileInputStream != null) {
mTachyonFileInputStream.seek(pos);
} else {
getHdfsInputStream(pos);
// TODO(calvin): Optimize for the case when the data is still valid in the buffer
// Invalidate buffer
mBufferLimit = -1;
}
mCurrentPosition = pos;
}
/**
* This method is not supported in HdfsFileInputStream
.
*
* @return N/A
* @throws IOException always
*/
@Override
public boolean seekToNewSource(long targetPos) throws IOException {
throw new IOException("Not supported");
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy