org.archive.io.ReplayInputStream Maven / Gradle / Ivy
The newest version!
/*
* This file is part of the Heritrix web crawler (crawler.archive.org).
*
* Licensed to the Internet Archive (IA) by one or more individual
* contributors.
*
* The IA licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.archive.io;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import org.apache.commons.io.IOUtils;
import org.archive.util.ArchiveUtils;
import org.archive.util.FileUtils;
/**
* Replays the bytes recorded from a RecordingInputStream or
* RecordingOutputStream.
*
* This InputStream supports mark and reset.
*
* @author gojomo
*/
public class ReplayInputStream extends SeekInputStream
{
private static final int DEFAULT_BUFFER_SIZE = 256*1024; // 256KiB
private BufferedSeekInputStream diskStream;
private byte[] buffer;
private long position;
/**
* Total size of stream content.
*
* Size of data to replay.
*/
private long size = -1;
/**
* Where the response body starts, if marked
*/
protected long responseBodyStart = -1;
/**
* Constructor.
*
* @param buffer Buffer to read from.
* @param size Size of data to replay.
* @param responseBodyStart Start of the response body.
* @param backingFilename Backing file that sits behind the buffer. If
* size
> than buffer then we go to backing file to read
* data that is beyond buffer.length.
*
* @throws IOException If we fail to open an input stream on
* backing file.
*/
public ReplayInputStream(byte[] buffer, long size, long responseBodyStart,
String backingFilename)
throws IOException
{
this(buffer, size, backingFilename);
this.responseBodyStart = responseBodyStart;
}
/**
* Constructor.
*
* @param buffer Buffer to read from.
* @param size Size of data to replay.
* @param backingFilename Backing file that sits behind the buffer. If
* size
> than buffer then we go to backing file to read
* data that is beyond buffer.length.
* @throws IOException If we fail to open an input stream on
* backing file.
*/
public ReplayInputStream(byte[] buffer, long size, String backingFilename)
throws IOException
{
this.buffer = buffer;
this.size = size;
if (size > buffer.length) {
setupDiskStream(new File(backingFilename));
}
}
protected void setupDiskStream(File backingFile) throws IOException {
RandomAccessInputStream rais = new RandomAccessInputStream(backingFile);
diskStream = new BufferedSeekInputStream(rais, 4096);
}
protected File backingFile;
/**
* Create a ReplayInputStream from the given source stream. Requires
* reading the entire stream (and possibly overflowing to a temporary
* file). Primary reason for doing so would be to have a repositionable
* version of the original stream's contents.
*
* If created via this constructor, use the destroy() method to ensure
* prompt deletion of any associated tmp file when done.
*
* @param fillStream
* @throws IOException
*/
public ReplayInputStream(InputStream fillStream) throws IOException {
this.buffer = new byte[DEFAULT_BUFFER_SIZE];
long count = ArchiveUtils.readFully(fillStream, buffer);
if(fillStream.available()>0) {
this.backingFile = File.createTempFile("tid"+Thread.currentThread().getId(), "ris");
count += FileUtils.readFullyToFile(fillStream, backingFile);
setupDiskStream(backingFile);
}
this.size = count;
}
/**
* Close & destroy any internally-generated temporary files.
*/
public void destroy() {
IOUtils.closeQuietly(this);
if(backingFile!=null) {
FileUtils.deleteSoonerOrLater(backingFile);
}
}
public long setToResponseBodyStart() throws IOException {
position(responseBodyStart);
return this.position;
}
/* (non-Javadoc)
* @see java.io.InputStream#read()
*/
public int read() throws IOException {
if (position == size) {
return -1; // EOF
}
if (position < buffer.length) {
// Convert to unsigned int.
int c = buffer[(int) position] & 0xFF;
position++;
return c;
}
int c = diskStream.read();
if (c >= 0) {
position++;
}
return c;
}
/*
* (non-Javadoc)
*
* @see java.io.InputStream#read(byte[], int, int)
*/
public int read(byte[] b, int off, int len) throws IOException {
if (position == size) {
return -1; // EOF
}
if (position < buffer.length) {
int toCopy = (int)Math.min(size - position,
Math.min(len, buffer.length - position));
System.arraycopy(buffer, (int)position, b, off, toCopy);
if (toCopy > 0) {
position += toCopy;
}
return toCopy;
}
// into disk zone
int read = diskStream.read(b,off,len);
if(read>0) {
position += read;
}
return read;
}
public void readFullyTo(OutputStream os) throws IOException {
readFullyTo(this, os);
}
public static void readFullyTo(InputStream in, OutputStream os) throws IOException {
byte[] buf = new byte[4096];
int c = in.read(buf);
while (c != -1) {
os.write(buf,0,c);
c = in.read(buf);
}
}
/*
* Like 'readFullyTo', but only reads the header-part.
* Starts from the beginning each time it is called.
*/
public void readHeaderTo(OutputStream os) throws IOException {
position = 0;
byte[] buf = new byte[(int)responseBodyStart];
int c = read(buf,0,buf.length);
if(c != -1) {
os.write(buf,0,c);
}
}
/*
* Like 'readFullyTo', but only reads the content-part.
*/
public void readContentTo(OutputStream os) throws IOException {
setToResponseBodyStart();
readFullyTo(os);
}
/**
* Convenience method to copy content out to target stream.
* @param os stream to write content to
* @param maxSize maximum count of bytes to copy
* @throws IOException
*/
public void readContentTo(OutputStream os, long maxSize) throws IOException {
setToResponseBodyStart();
byte[] buf = new byte[4096];
int c = read(buf);
long tot = 0;
while (c != -1 && tot < maxSize) {
os.write(buf,0,c);
c = read(buf);
tot += c;
}
}
/* (non-Javadoc)
* @see java.io.InputStream#close()
*/
public void close() throws IOException {
super.close();
if(diskStream != null) {
diskStream.close();
}
}
/**
* Total size of stream content.
* @return Returns the size.
*/
public long getSize()
{
return size;
}
/**
* Total size of header.
* @return the size of the header.
*/
public long getHeaderSize()
{
return responseBodyStart;
}
/**
* Total size of content.
* @return the size of the content.
*/
public long getContentSize()
{
return size - responseBodyStart;
}
/**
* @return Amount THEORETICALLY remaining (TODO: Its not theoretical
* seemingly. The class implemetentation depends on it being exact).
*/
public long remaining() {
return size - position;
}
/**
* Reposition the stream.
*
* @param p the new position for this stream
* @throws IOException if an IO error occurs
*/
public void position(long p) throws IOException {
if (p < 0) {
throw new IOException("Negative seek offset.");
}
if (p > size) {
throw new IOException("Desired position exceeds size.");
}
if (p < buffer.length) {
// Only seek file if necessary
if (position > buffer.length) {
diskStream.position(0);
}
} else {
diskStream.position(p - buffer.length);
}
this.position = p;
}
public long position() throws IOException {
return position;
}
protected byte[] getBuffer() {
return buffer;
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy