org.archive.io.RepositionableInputStream Maven / Gradle / Ivy
/*
* This file is part of the Heritrix web crawler (crawler.archive.org).
*
* Licensed to the Internet Archive (IA) by one or more individual
* contributors.
*
* The IA licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.archive.io;
import it.unimi.dsi.fastutil.io.RepositionableStream;
import java.io.BufferedInputStream;
import java.io.IOException;
import java.io.InputStream;
/**
* Wrapper around an {@link InputStream} to make a primitive Repositionable
* stream. Uses a {@link BufferedInputStream}. Calls mark on every read so
* we'll remember at least the last thing read (You can only backup on the
* last thing read -- not last 2 or 3 things read). Used by
* GzippedInputStream when reading streams over a network. Wraps a
* HTTP, etc., stream so we can back it up if needs be after the
* GZIP inflater has done a fill of its full buffer though it only needed
* the first few bytes to finish decompressing the current GZIP member.
*
* TODO: More robust implementation. Tried to use the it.unimi.dsi.io
* FastBufferdInputStream but relies on FileChannel ByteBuffers and if not
* present -- as would be the case reading from a network stream, the main
* application for this instance -- then it expects the underlying stream
* implements RepositionableStream interface so chicken or egg problem.
* @author stack
*/
public class RepositionableInputStream extends BufferedInputStream implements
RepositionableStream {
private long position = 0;
private long markPosition = -1;
public RepositionableInputStream(InputStream in) {
super(in);
}
public RepositionableInputStream(InputStream in, int size) {
super(in, size);
}
public int read(byte[] b) throws IOException {
int read = super.read(b);
if (read != -1) {
position += read;
}
return read;
}
public synchronized int read(byte[] b, int offset, int ct)
throws IOException {
// Mark the underlying stream so that we'll remember what we are about
// to read unless a mark has been set in this RepositionableStream
// (We have two levels of mark). In this latter case we want the
// underlying stream to preserve its mark position so aligns with
// this RS when eset is called.
if (!isMarked()) {
super.mark((ct > offset)? ct - offset: ct);
}
int read = super.read(b, offset, ct);
if (read != -1) {
position += read;
}
return read;
}
public int read() throws IOException {
// Mark the underlying stream so that we'll remember what we are about
// to read unless a mark has been set in this RepositionableStream
// (We have two levels of mark). In this latter case we want the
// underlying stream to preserve its mark position so aligns with
// this RS when eset is called.
if (!isMarked()) {
super.mark(1);
}
int c = super.read();
if (c != -1) {
position++;
}
return c;
}
public void position(final long offset) {
if (this.position == offset) {
return;
}
int diff = (int)(offset - this.position);
long lowerBound = this.position - this.pos;
long upperBound = lowerBound + this.count;
if (offset < lowerBound || offset >= upperBound) {
throw new IllegalAccessError("Offset goes outside " +
"current this.buf (TODO: Do buffer fills if positive)");
}
this.position = offset;
this.pos += diff;
// Clear any mark.
this.markPosition = -1;
}
public void mark(int readlimit) {
this.markPosition = this.position;
super.mark(readlimit);
}
public void reset() throws IOException {
super.reset();
this.position = this.markPosition;
this.markPosition = -1;
}
protected boolean isMarked() {
return this.markPosition != -1;
}
public long position() {
return this.position;
}
}