All Downloads are FREE. Search and download functionalities are using the official Maven repository.

it.unimi.dsi.io.OfflineIterable Maven / Gradle / Ivy

Go to download

Blazegraph Modifications to the DSI utils. This are forked from version 1.10.0 under LGPLv2.1.

There is a newer version: 2.1.4
Show newest version
package it.unimi.dsi.io;


/*		 
 * DSI utilities
 *
 * Copyright (C) 2005-2009 Sebastiano Vigna 
 *
 *  This library is free software; you can redistribute it and/or modify it
 *  under the terms of the GNU Lesser General Public License as published by the Free
 *  Software Foundation; either version 2.1 of the License, or (at your option)
 *  any later version.
 *
 *  This library is distributed in the hope that it will be useful, but
 *  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 *  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
 *  for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 *
 */

import it.unimi.dsi.Util;
import it.unimi.dsi.fastutil.io.FastBufferedInputStream;
import it.unimi.dsi.fastutil.io.FastBufferedOutputStream;
import it.unimi.dsi.fastutil.objects.AbstractObjectIterator;

import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.NoSuchElementException;

import org.apache.log4j.Logger;

/** An iterable that offers elements that were previously stored offline using specialized
 *  serialization methods. At construction, you provide a {@linkplain #OfflineIterable(it.unimi.dsi.io.OfflineIterable.Serializer, Object) serializer}
 *  that establishes how elements are written offline; after that, you can
 *  {@linkplain #add(Object) add elements} one at a time or in a {@linkplain #addAll(Iterable) bulk way}. 
 *  At any moment, you can {@linkplain #iterator() get} an {@link OfflineIterable.OfflineIterator OfflineIterator} 
 *  on this object that returns all the elements added so far. Note that the returned iterator caches the current number of elements,
 *  so each iterator will return just the elements added at the time of its creation.
 *  
 *  

Warning: The store object provided at {@linkplain OfflineIterable#OfflineIterable(it.unimi.dsi.io.OfflineIterable.Serializer, Object ) * construction time} is shared by all iterators. * *

Closing

* *

Both {@link OfflineIterable} and {@link OfflineIterable.OfflineIterator OfflineIterator} are {@link SafelyCloseable} (the latter will * close its input stream when hasNext() returns false), but for better resource management you should close them after usage. * *

Store reuse

* * @author Sebastiano Vigna * @since 0.9.2 */ public class OfflineIterable implements Iterable,SafelyCloseable { public static final long serialVersionUID = 1L; private static final Logger LOGGER = Util.getLogger( OfflineIterable.class ); /** An iterator returned by an {@link OfflineIterable}. */ public final static class OfflineIterator extends AbstractObjectIterator implements SafelyCloseable { /** The data input stream that accesses the file of the related {@link OfflineIterable}. */ private final DataInputStream dis; /** The number of elements in the related {@link OfflineIterable}. */ private final long size; /** The serializer used to store and read the elements of this iterable. */ private final Serializer serializer; /** An object that is (re)used by the iterator(s) iterating on this iterable. */ private final B store; /** The number of elements read by this iterator. */ private long read; /** Whether this iterator has been closed. */ private boolean closed = false; private OfflineIterator( DataInputStream dis, final Serializer serializer, B store, long size ) { this.dis = dis; this.serializer = serializer; this.store = store; this.size = size; } public boolean hasNext() { if ( read >= size ) close(); return read < size; } public B next() { if ( !hasNext() ) throw new NoSuchElementException(); try { serializer.read( dis, store ); } catch ( IOException e ) { throw new RuntimeException( e ); } read++; return store; } public void close() { if ( !closed ) { try { dis.close(); } catch ( IOException e ) { throw new RuntimeException( e ); } closed = true; } } protected void finalize() throws Throwable { try { if ( ! closed ) { LOGGER.warn( "This " + this.getClass().getName() + " [" + toString() + "] should have been closed." ); close(); } } finally { super.finalize(); } } } /** Determines a strategy to serialize and deserialize elements. */ public interface Serializer { /** Writes out an element. * * @param x the element to be written. * @param dos the stream where the element should be written. * @throws IOException if an exception occurs while writing. */ public void write( A x, DataOutputStream dos ) throws IOException; /** Reads an element. * * @param dis the stream whence the element should be read. * @param x the object where the element will be read. * @throws IOException if an exception occurs while reading. */ public void read( DataInputStream dis, B x ) throws IOException; } /** The serializer used to store and read the elements of this iterable. */ private final Serializer serializer; /** The file where elements are serialized. */ private final File file; /** A data output stream associated with {@link #file}. */ private final DataOutputStream dos; /** An object that is (re)used by the iterator(s) iterating on this iterable. */ private final U store; /** The number of elements written so far. */ private long size; /** Whether this iterable has been closed. */ private boolean closed = false; /** Creates an offline iterable with given serializer. * * @param serializer the serializer to be used. * @param store an object that is (re)used by the iterator(s) iterating on this iterable. * @throws IOException */ public OfflineIterable( final Serializer serializer, final U store ) throws IOException { this.serializer = serializer; this.store = store; file = File.createTempFile( OfflineIterable.class.getSimpleName(), "elmts" ); file.deleteOnExit(); dos = new DataOutputStream( new FastBufferedOutputStream( new FileOutputStream( file ) ) ); } /** Adds a new element at the end of this iterable. * * @param x the element to be added. * @throws IOException */ public void add( T x ) throws IOException { serializer.write( x, dos ); size++; } /** Adds all the elements of the given iterable at the end of this iterable. * * @param it the iterable producing the elements to be added. * @throws IOException */ public void addAll( Iterable it ) throws IOException { for ( T x: it ) add( x ); } public OfflineIterator iterator() { try { dos.flush(); final DataInputStream dis = new DataInputStream( new FastBufferedInputStream( new FileInputStream( file ) ) ); return new OfflineIterator( dis, serializer, store, size ); } catch ( IOException e ) { throw new RuntimeException( e ); } } public void close() { if ( !closed ) { try { dos.close(); file.delete(); } catch ( IOException e ) { throw new RuntimeException( e ); } closed = true; } } protected void finalize() throws Throwable { try { if ( ! closed ) { LOGGER.warn( "This " + this.getClass().getName() + " [" + toString() + "] should have been closed." ); close(); } } finally { super.finalize(); } } /** Returns the number of elements added so far, unless it is too big to fit in an integer (in which case this method will throw an * exception). * * @return the number of elements added so far. */ public int size() { final long length = length(); if ( length > Integer.MAX_VALUE ) throw new IllegalStateException( "The number of elements of this bit list (" + length + ") exceeds Integer.MAX_INT" ); return (int)length; } /** Returns the number of elements added so far. * * @return the number of elements added so far. */ public long length() { return size; } }