
stream.io.SequentialFileInputStream Maven / Gradle / Ivy
/*
* streams library
*
* Copyright (C) 2011-2014 by Christian Bockermann, Hendrik Blom
*
* streams is a library, API and runtime environment for processing high
* volume data streams. It is composed of three submodules "stream-api",
* "stream-core" and "stream-runtime".
*
* The streams library (and its submodules) is free software: you can
* redistribute it and/or modify it under the terms of the
* GNU Affero General Public License as published by the Free Software
* Foundation, either version 3 of the License, or (at your option) any
* later version.
*
* The stream.ai library (and its submodules) is distributed in the hope
* that it will be useful, but WITHOUT ANY WARRANTY; without even the implied
* warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*/
package stream.io;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.util.Comparator;
import java.util.Set;
import java.util.SortedSet;
import java.util.TreeSet;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
*
* This class implements an input stream that will read from a sequence of
* files. It is intended to provide the opposite of a split of files into parts.
* Each part is read in chronological order.
*
*
* At the end, the stream waits for new data to be appended to the last file, or
* a new file to be created that matches the names.
*
*
* @author Christian Bockermann <chris@jwall.org>
*
*/
public class SequentialFileInputStream extends InputStream {
/* A global logger for this class */
static Logger log = LoggerFactory
.getLogger(SequentialFileInputStream.class);
/*
* This constant object implements a comparator for files, which determines
* the order in which this file stream processes files
*/
final static Comparator FILE_ORDER = new Comparator() {
public int compare(File arg0, File arg1) {
if (arg0 == arg1 || arg0.compareTo(arg1) == 0)
return 0;
Long lastModified = new Long(arg0.lastModified());
int rc = lastModified.compareTo(arg1.lastModified());
if (rc == 0)
return arg0.getName().compareTo(arg1.getName());
return rc;
}
};
/* The time gap this stream puts itself to sleep until new data is available */
Integer sleep = 500;
/* The initial file this reader was created from */
File file = null;
/* The file we currently read from */
File current = null;
/* The next file to read */
File next = null;
/* The input-stream of the current file */
FileInputStream reader;
FileChannel channel;
String pattern = "";
boolean closed = false;
long maxWaitingTime = -1L;
long read = 0L;
long total = 0L;
long waitingTime = 0L;
boolean removeAfterRead = true;
Set finished = new TreeSet(FILE_ORDER);
final ByteBuffer buffer = ByteBuffer.allocate(1);
/**
* Creates a new SequentialInputStream, which will read the specified file
* and any subsequent files that match the file's name, possibly and a
* appended number, i.e. for the file /tmp/test.log
the stream
* will read
*
* * /tmp/test.log
* /tmp/test.log.1
* /tmp/test.log.2
* ...
*
*
* The trailing digits may as well be time-stamps or the like. The files are
* read in order of their last-modification-time.
*
* @param file
* The initial file.
* @throws IOException
*/
public SequentialFileInputStream(File file) throws IOException {
this(file, false);
}
/**
*
* This creates a SequentialInputStream which will remove any files that
* have completeley been read (i.e. they have been processed until EOF
* and another, newer file matching the pattern does exist).
*
*
* Whether the old files are to be removed is determined by the
* removeAfterRead
flag.
*
*
* @param file
* The initial file to start with.
* @param removeAfterRead
* Whether the old files should be removed or not.
* @throws IOException
*/
public SequentialFileInputStream(File file, boolean removeAfterRead)
throws IOException {
this(file, file.getName() + "(\\.\\d+)?$", removeAfterRead);
}
public SequentialFileInputStream(File file, String pattern,
boolean removeAfterRead) throws IOException {
this.file = file;
this.current = this.file;
this.pattern = pattern;
this.removeAfterRead = removeAfterRead;
if (current.isFile())
reader = new FileInputStream(current);
}
public boolean matchesSequence(File f) {
if (f.getAbsolutePath().equals(file.getAbsolutePath()))
return false;
if (f.isFile() && !f.equals(file) && f.getName().matches(pattern)) {
return !finished.contains(f);
}
return false;
}
/**
* This method checks if there exists a next file in the sequence.
*
* @return true
if a new file exists, which may indicate that
* the current file is finished.
*/
protected boolean hasNext() {
File dir = file.getParentFile();
if (file.isDirectory())
dir = file;
for (File f : dir.listFiles())
if (matchesSequence(f) && !f.equals(current))
return true;
return false;
}
/**
*
* This method closes the current file an opens the next file in the
* sequence. If no next file exists, this method will block until one
* has been created.
*
*
* @throws IOException
*/
protected void openNextFile() throws IOException {
log.debug("Current file {} seems to have ended, checking for next one",
current);
boolean proceeded = false;
do {
TreeSet files = new TreeSet(FILE_ORDER);
File dir = file.getParentFile();
if (file.isDirectory())
dir = file;
for (File f : dir.listFiles()) {
if (matchesSequence(f) && !finished.contains(f)) {
// log.info(
// " File {} is considered a candidate to proceed", f );
files.add(f);
}
}
SortedSet sequence = files; // .tailSet( current,
// finished.contains( current )
// );
for (File file : sequence) {
log.debug(" file: {} (modified: {})", file,
file.lastModified());
}
if (!sequence.isEmpty()) {
if (reader != null) {
log.debug("Closing old reader on file {}...", current);
reader.close();
}
if (removeAfterRead) {
log.debug("Removing file {}", current);
current.delete();
}
log.debug("Read {} bytes from {}", read, current);
current = sequence.first();
finished.add(current);
read = 0L;
reader = new FileInputStream(current);
channel = reader.getChannel();
log.debug("Now reading from '{}'", current);
proceeded = true;
} else {
try {
log.debug("After reading {} bytes from {}", read, current);
log.debug(" a total of {} bytes read so far", total);
log.debug(
"No sequential file found for {}, sleeping for {} ms and checking again...",
file, sleep);
Thread.sleep(sleep);
} catch (Exception e) {
}
}
} while (!proceeded);
}
/**
*
* This read method is basically a read of the current open file. It will
* block if there is no more data and no new file exists.
*
*
* @see java.io.InputStream#read()
*/
@Override
public int read() throws IOException {
waitingTime = 0;
if (closed)
return -1;
while (reader == null) {
openNextFile();
}
buffer.clear();
int data = channel.read(buffer);
// log.info("Read {} bytes", data);
while (data == -1) {
buffer.clear();
if (hasNext()) {
openNextFile();
} else {
// if (maxWaitingTime == 0)
// return -1;
try {
// log.debug(
// "Waiting {} ms for new data to arrive at file {}",
// sleep, current);
// log.debug(" file '{}' has size {}", current,
// current.length());
// log.debug(" channel pos is: {}", channel.position());
Thread.sleep(sleep);
waitingTime += sleep;
} catch (Exception e) {
if (log.isDebugEnabled()) {
e.printStackTrace();
}
}
if (maxWaitingTime >= 0 && waitingTime > maxWaitingTime) {
closed = true;
log.debug("Total sleeping time exhausted!");
return -1;
}
}
log.debug("Trying to read from {} again...", current);
data = channel.read(buffer);
// log.info("read {} bytes", data);
}
read++;
total++;
byte b = buffer.array()[0];
// log.info("returning byte: '{}'", (char) b);
return b;
}
/**
* @see java.io.InputStream#markSupported()
*/
@Override
public boolean markSupported() {
return false;
}
/**
* @see java.io.InputStream#available()
*/
@Override
public int available() throws IOException {
if (reader == null)
return super.available();
return reader.available();
}
/**
* @return the sleep
*/
public Integer getSleep() {
return sleep;
}
/**
* @param sleep
* the sleep to set
*/
public void setSleep(Integer sleep) {
this.sleep = sleep;
}
/**
* @return the maxWaitingTime
*/
public long getMaxWaitingTime() {
return maxWaitingTime;
}
/**
* @param maxWaitingTime
* the maxWaitingTime to set
*/
public void setMaxWaitingTime(long maxWaitingTime) {
this.maxWaitingTime = maxWaitingTime;
}
/**
* @return the removeAfterRead
*/
public boolean isRemoveAfterRead() {
return removeAfterRead;
}
/**
* @param removeAfterRead
* the removeAfterRead to set
*/
public void setRemoveAfterRead(boolean removeAfterRead) {
this.removeAfterRead = removeAfterRead;
}
public File getCurrentFile() {
if (current == null)
return null;
return new File(current.getAbsolutePath());
}
}