org.apache.hadoop.fs.s3a.S3AInputStream Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of hadoop-aws Show documentation
Show all versions of hadoop-aws Show documentation
This module contains code to support integration with Amazon Web Services.
It also declares the dependencies needed to work with AWS services.
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.s3a;
import com.amazonaws.services.s3.AmazonS3Client;
import com.amazonaws.services.s3.model.GetObjectRequest;
import com.amazonaws.services.s3.model.S3Object;
import com.amazonaws.services.s3.model.S3ObjectInputStream;
import org.apache.hadoop.fs.FSExceptionMessages;
import org.apache.hadoop.fs.FSInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.slf4j.Logger;
import java.io.EOFException;
import java.io.IOException;
import java.net.SocketTimeoutException;
import java.net.SocketException;
public class S3AInputStream extends FSInputStream {
private long pos;
private boolean closed;
private S3ObjectInputStream wrappedStream;
private FileSystem.Statistics stats;
private AmazonS3Client client;
private String bucket;
private String key;
private long contentLength;
public static final Logger LOG = S3AFileSystem.LOG;
public static final long CLOSE_THRESHOLD = 4096;
public S3AInputStream(String bucket, String key, long contentLength, AmazonS3Client client,
FileSystem.Statistics stats) {
this.bucket = bucket;
this.key = key;
this.contentLength = contentLength;
this.client = client;
this.stats = stats;
this.pos = 0;
this.closed = false;
this.wrappedStream = null;
}
private void openIfNeeded() throws IOException {
if (wrappedStream == null) {
reopen(0);
}
}
private synchronized void reopen(long pos) throws IOException {
if (wrappedStream != null) {
if (LOG.isDebugEnabled()) {
LOG.debug("Aborting old stream to open at pos " + pos);
}
wrappedStream.abort();
}
if (pos < 0) {
throw new EOFException(FSExceptionMessages.NEGATIVE_SEEK
+" " + pos);
}
if (contentLength > 0 && pos > contentLength-1) {
throw new EOFException(
FSExceptionMessages.CANNOT_SEEK_PAST_EOF
+ " " + pos);
}
LOG.debug("Actually opening file " + key + " at pos " + pos);
GetObjectRequest request = new GetObjectRequest(bucket, key);
request.setRange(pos, contentLength-1);
wrappedStream = client.getObject(request).getObjectContent();
if (wrappedStream == null) {
throw new IOException("Null IO stream");
}
this.pos = pos;
}
@Override
public synchronized long getPos() throws IOException {
return pos;
}
@Override
public synchronized void seek(long pos) throws IOException {
checkNotClosed();
if (this.pos == pos) {
return;
}
LOG.debug(
"Reopening " + this.key + " to seek to new offset " + (pos - this.pos));
reopen(pos);
}
@Override
public boolean seekToNewSource(long targetPos) throws IOException {
return false;
}
@Override
public synchronized int read() throws IOException {
checkNotClosed();
openIfNeeded();
int byteRead;
try {
byteRead = wrappedStream.read();
} catch (SocketTimeoutException e) {
LOG.info("Got timeout while trying to read from stream, trying to recover " + e);
reopen(pos);
byteRead = wrappedStream.read();
} catch (SocketException e) {
LOG.info("Got socket exception while trying to read from stream, trying to recover " + e);
reopen(pos);
byteRead = wrappedStream.read();
}
if (byteRead >= 0) {
pos++;
}
if (stats != null && byteRead >= 0) {
stats.incrementBytesRead(1);
}
return byteRead;
}
@Override
public synchronized int read(byte[] buf, int off, int len) throws IOException {
checkNotClosed();
openIfNeeded();
int byteRead;
try {
byteRead = wrappedStream.read(buf, off, len);
} catch (SocketTimeoutException e) {
LOG.info("Got timeout while trying to read from stream, trying to recover " + e);
reopen(pos);
byteRead = wrappedStream.read(buf, off, len);
} catch (SocketException e) {
LOG.info("Got socket exception while trying to read from stream, trying to recover " + e);
reopen(pos);
byteRead = wrappedStream.read(buf, off, len);
}
if (byteRead > 0) {
pos += byteRead;
}
if (stats != null && byteRead > 0) {
stats.incrementBytesRead(byteRead);
}
return byteRead;
}
private void checkNotClosed() throws IOException {
if (closed) {
throw new IOException(FSExceptionMessages.STREAM_IS_CLOSED);
}
}
@Override
public synchronized void close() throws IOException {
super.close();
closed = true;
if (wrappedStream != null) {
if (contentLength - pos <= CLOSE_THRESHOLD) {
// Close, rather than abort, so that the http connection can be reused.
wrappedStream.close();
} else {
// Abort, rather than just close, the underlying stream. Otherwise, the
// remaining object payload is read from S3 while closing the stream.
wrappedStream.abort();
}
}
}
@Override
public synchronized int available() throws IOException {
checkNotClosed();
long remaining = this.contentLength - this.pos;
if (remaining > Integer.MAX_VALUE) {
return Integer.MAX_VALUE;
}
return (int)remaining;
}
@Override
public boolean markSupported() {
return false;
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy