src.it.unimi.dsi.io.OfflineIterable Maven / Gradle / Ivy
package it.unimi.dsi.io;
/*
* DSI utilities
*
* Copyright (C) 2005-2017 Sebastiano Vigna
*
* This library is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published by the Free
* Software Foundation; either version 3 of the License, or (at your option)
* any later version.
*
* This library is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
* for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, see .
*
*/
import it.unimi.dsi.fastutil.Size64;
import it.unimi.dsi.fastutil.io.FastBufferedInputStream;
import it.unimi.dsi.fastutil.io.FastBufferedOutputStream;
import it.unimi.dsi.fastutil.objects.ObjectIterator;
import java.io.DataInput;
import java.io.DataInputStream;
import java.io.DataOutput;
import java.io.DataOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.NoSuchElementException;
import org.slf4j.LoggerFactory;
/** An iterable that offers elements that were previously stored offline using specialized
* serialization methods. At construction, you provide a {@linkplain #OfflineIterable(it.unimi.dsi.io.OfflineIterable.Serializer, Object) serializer}
* that establishes how elements are written offline; after that, you can
* {@linkplain #add(Object) add elements} one at a time or in a {@linkplain #addAll(Iterable) bulk way}.
* At any moment, you can {@linkplain #iterator() get} an {@link OfflineIterable.OfflineIterator OfflineIterator}
* on this object that returns all the elements added so far. Note that the returned iterator caches the current number of elements,
* so each iterator will return just the elements added at the time of its creation.
*
* Warning: The store object provided at {@linkplain OfflineIterable#OfflineIterable(it.unimi.dsi.io.OfflineIterable.Serializer, Object)
* construction time} is shared by all iterators.
*
*
Closing
*
* Both {@link OfflineIterable} and {@link OfflineIterable.OfflineIterator OfflineIterator} are {@link SafelyCloseable} (the latter will
* close its input stream when hasNext()
returns false), but for better resource management you should close them after usage.
*
* @author Sebastiano Vigna
* @since 0.9.2
*/
public class OfflineIterable implements Iterable, SafelyCloseable, Size64 {
public static final long serialVersionUID = 1L;
/** An iterator returned by an {@link OfflineIterable}. */
public final static class OfflineIterator implements ObjectIterator, SafelyCloseable {
/** The data input stream that accesses the file of the related {@link OfflineIterable}. */
private final DataInputStream dis;
/** The number of elements in the related {@link OfflineIterable}. */
private final long size;
/** The serializer used to store and read the elements of this iterable. */
private final Serializer super A, B> serializer;
/** An object that is (re)used by the iterator(s) iterating on this iterable. */
private final B store;
/** The number of elements read by this iterator. */
private long read;
/** Whether this iterator has been closed. */
private boolean closed = false;
private OfflineIterator(final DataInputStream dis, final Serializer super A, B> serializer, final B store, final long size) {
this.dis = dis;
this.serializer = serializer;
this.store = store;
this.size = size;
}
@Override
public boolean hasNext() {
if (read >= size) close();
return read < size;
}
@Override
public B next() {
if (!hasNext()) throw new NoSuchElementException();
try {
serializer.read(dis, store);
}
catch (IOException e) {
throw new RuntimeException(e);
}
read++;
return store;
}
@Override
public void close() {
if (!closed) {
try {
dis.close();
}
catch (IOException e) {
throw new RuntimeException(e);
}
closed = true;
}
}
@Override
protected void finalize() throws Throwable {
try {
if (! closed) {
LoggerFactory.getLogger(this.getClass()).warn("This " + this.getClass().getName() + " [" + toString() + "] should have been closed.");
close();
}
}
finally {
super.finalize();
}
}
}
/** Determines a strategy to serialize and deserialize elements.
*/
public interface Serializer {
/** Writes out an element.
*
* @param x the element to be written.
* @param dos the stream where the element should be written.
* @throws IOException if an exception occurs while writing.
*/
public void write(A x, DataOutput dos) throws IOException;
/** Reads an element.
*
* @param dis the stream whence the element should be read.
* @param x the object where the element will be read.
* @throws IOException if an exception occurs while reading.
*/
public void read(DataInput dis, B x) throws IOException;
}
/** The serializer used to store and read the elements of this iterable. */
private final Serializer super T, U> serializer;
/** The file where elements are serialized. */
private final File file;
/** A data output stream associated with {@link #file}. */
private final DataOutputStream dos;
/** An object that is (re)used by the iterator(s) iterating on this iterable. */
private final U store;
/** The number of elements written so far. */
private long size;
/** Whether this iterable has been closed. */
private boolean closed;
/** The fast buffered output stream associated with {@link #dos}. */
private FastBufferedOutputStream fbos;
/** Creates an offline iterable with given serializer.
*
* @param serializer the serializer to be used.
* @param store an object that is (re)used by the iterator(s) iterating on this iterable.
* @throws IOException
*/
public OfflineIterable(final Serializer super T, U> serializer, final U store) throws IOException {
this.serializer = serializer;
this.store = store;
file = File.createTempFile(OfflineIterable.class.getSimpleName(), "elmts");
file.deleteOnExit();
fbos = new FastBufferedOutputStream(new FileOutputStream(file));
dos = new DataOutputStream(fbos);
}
/** Adds a new element at the end of this iterable.
*
* @param x the element to be added.
* @throws IOException
*/
public void add(T x) throws IOException {
serializer.write(x, dos);
size++;
}
/** Adds all the elements of the given iterable at the end of this iterable.
*
* @param it the iterable producing the elements to be added.
* @throws IOException
*/
public void addAll(Iterable it) throws IOException {
for (T x: it) add(x);
}
@Override
public OfflineIterator iterator() {
try {
dos.flush();
final DataInputStream dis = new DataInputStream(new FastBufferedInputStream(new FileInputStream(file)));
return new OfflineIterator<>(dis, serializer, store, size);
}
catch (IOException e) {
throw new RuntimeException(e);
}
}
public void clear() throws IOException {
if (closed) throw new IOException("This" + this.getClass().getName() + " [" + toString() + "] has been closed.");
size = 0;
dos.flush();
fbos.position(0);
}
@Override
public void close() {
if (!closed) {
try {
dos.close();
file.delete();
}
catch (IOException e) {
throw new RuntimeException(e);
}
closed = true;
}
}
@Override
protected void finalize() throws Throwable {
try {
if (! closed) {
LoggerFactory.getLogger(this.getClass()).warn("This " + this.getClass().getName() + " [" + toString() + "] should have been closed.");
close();
}
}
finally {
super.finalize();
}
}
/** Returns the number of elements added so far, unless it is too big to fit in an integer (in which case this method will throw an
* exception).
*
* @return the number of elements added so far.
* @deprecated Use {@link #size64()} instead.
*/
@Override
@Deprecated
public int size() {
final long size64 = size64();
if (size64 > Integer.MAX_VALUE) throw new IllegalStateException("The number of elements of this bit list (" + size64 + ") exceeds Integer.MAX_INT");
return (int)size64;
}
/** Returns the number of elements added so far.
*
* @return the number of elements added so far.
*/
@Override
public long size64() {
return size;
}
}