All Downloads are FREE. Search and download functionalities are using the official Maven repository.

it.unimi.dsi.io.FileLinesCollection Maven / Gradle / Ivy

Go to download

Blazegraph Modifications to the DSI utils. This are forked from version 1.10.0 under LGPLv2.1.

There is a newer version: 2.1.4
Show newest version
package it.unimi.dsi.io;


/*		 
 * DSI utilities
 *
 * Copyright (C) 2005-2009 Sebastiano Vigna 
 *
 *  This library is free software; you can redistribute it and/or modify it
 *  under the terms of the GNU Lesser General Public License as published by the Free
 *  Software Foundation; either version 2.1 of the License, or (at your option)
 *  any later version.
 *
 *  This library is distributed in the hope that it will be useful, but
 *  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 *  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
 *  for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 *
 */

import it.unimi.dsi.fastutil.objects.ObjectArrayList;
import it.unimi.dsi.fastutil.objects.ObjectList;
import it.unimi.dsi.lang.MutableString;

import java.io.Closeable;
import java.io.FileInputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.AbstractCollection;
import java.util.Iterator;
import java.util.NoSuchElementException;
import java.util.zip.GZIPInputStream;

/** A wrapper exhibiting the lines of a file as a {@link java.util.Collection}.
 * 
 * 

Warning: the lines returned by iterators generated by * instances of this class are not cacheable. The returned value is * a {@link it.unimi.dsi.lang.MutableString} instance that is reused * at each call, and that is modified by a call to {@link java.util.Iterator#hasNext() hasNext()}. * Thus, for instance, *

 *    ObjectIterators.unwrap( fileLinesColletion.iterator() );
 * 
* will not give the expected results. Use {@link #allLines()} to get * the list of all lines (again, under the form of compact {@link it.unimi.dsi.lang.MutableString}s). * Note also that {@link #toString()} will return a single string containing all * file lines separated by the string associated to the system property line.separator. * *

An instance of this class allows to access the lines of a file as a * {@link java.util.Collection}. Using {@linkplain java.util.Collection#contains(java.lang.Object) * direct access} is strongly discouraged (it will require a full scan of the file), but * the {@link #iterator()} can be fruitfully used to scan the file, and can be called any * number of times, as it opens an independent input stream at each call. For the * same reason, the returned iterator type ({@link it.unimi.dsi.io.FileLinesCollection.FileLinesIterator}) * is {@link java.io.Closeable}, and should be closed after usage. * *

Using a suitable {@linkplain #FileLinesCollection(CharSequence, String, boolean) constructor}, it is possible * to specify that the file is compresse in gzip format (in this case, it will be opened using a {@link GZIPInputStream}). * *

Note that the first call to {@link #size()} will require a full file scan. * * @author Sebastiano Vigna * @since 0.9.2 */ public class FileLinesCollection extends AbstractCollection { /** The filename upon which this file-lines collection is based. */ private final String filename; /** The encoding of {@link #filename}, or null for the standard platform encoding. */ private final String encoding; /** The cached size of the collection. */ private int size = -1; /** Whether {@link #filename} is zipped. */ private final boolean zipped; /** Creates a file-lines collection for the specified filename with the specified encoding. * * @param filename a filename. * @param encoding an encoding. */ public FileLinesCollection( final CharSequence filename, final String encoding ) { this( filename, encoding, false ); } /** Creates a file-lines collection for the specified filename with the specified encoding, optionally assuming * that the file is compressed using gzip format. * * @param filename a filename. * @param encoding an encoding. * @param zipped whether filename is zipped. */ public FileLinesCollection( final CharSequence filename, final String encoding, final boolean zipped ) { this.zipped = zipped; this.filename = filename.toString(); this.encoding = encoding; } /** An iterator over the lines of a {@link FileLinesCollection}. * *

Instances of this class open an {@link java.io.InputStream}, and thus should be {@linkplain Closeable#close() closed} after * usage. A “safety-net” finaliser tries to take care of the cases in which * closing an instance is impossible. An exhausted iterator, however, will be closed automagically. */ public static final class FileLinesIterator implements Iterator, SafelyCloseable { private FastBufferedReader fbr; MutableString s = new MutableString(), next; boolean toAdvance = true; private FileLinesIterator( final String filename, final String encoding, final boolean zipped ) { try { fbr = encoding != null ? new FastBufferedReader( new InputStreamReader( zipped ? new GZIPInputStream( new FileInputStream( filename ) ) : new FileInputStream( filename ), encoding ) ) : new FastBufferedReader( new FileReader( filename ) ); } catch (IOException e) { throw new RuntimeException( e ); } } public boolean hasNext() { if ( toAdvance ) { try { next = fbr.readLine( s ); if ( next == null ) close(); } catch (IOException e) { throw new RuntimeException( e ); } toAdvance = false; } return next != null; } public MutableString next() { if ( ! hasNext() ) throw new NoSuchElementException(); toAdvance = true; return s; } public void remove() { throw new UnsupportedOperationException(); } public synchronized void close() { if ( fbr == null ) return; try { fbr.close(); } catch ( IOException e ) { throw new RuntimeException( e ); } finally { fbr = null; } } protected synchronized void finalize() throws Throwable { try { if ( fbr != null ) close(); } finally { super.finalize(); } } } public FileLinesIterator iterator() { return new FileLinesIterator( filename, encoding, zipped ); } public synchronized int size() { if ( size == -1 ) { FileLinesIterator i = iterator(); size = 0; while( i.hasNext() ) { size++; i.next(); } i.close(); } return size; } /** Returns all lines of the file wrapped by this file-lines collection. * * @return all lines of the file wrapped by this file-lines collection. */ public ObjectList allLines() { final ObjectArrayList result = new ObjectArrayList(); for( Iterator i = iterator(); i.hasNext(); ) result.add( i.next().copy() ); return result; } public String toString() { final MutableString separator = new MutableString( System.getProperty( "line.separator" ) ); final MutableString s = new MutableString(); for( MutableString l: this ) s.append( l ).append( separator ); return s.toString(); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy