All Downloads are FREE. Search and download functionalities are using the official Maven repository.

src.it.unimi.dsi.io.FileLinesCollection Maven / Gradle / Ivy

Go to download

The DSI utilities are a mishmash of classes accumulated during the last twenty years in projects developed at the DSI (Dipartimento di Scienze dell'Informazione, i.e., Information Sciences Department), now DI (Dipartimento di Informatica, i.e., Informatics Department), of the Universita` degli Studi di Milano.

There is a newer version: 2.7.3
Show newest version
package it.unimi.dsi.io;

import java.io.Closeable;
import java.io.FileInputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.AbstractCollection;
import java.util.Iterator;
import java.util.NoSuchElementException;
import java.util.zip.GZIPInputStream;

/*
 * DSI utilities
 *
 * Copyright (C) 2005-2020 Sebastiano Vigna
 *
 *  This library is free software; you can redistribute it and/or modify it
 *  under the terms of the GNU Lesser General Public License as published by the Free
 *  Software Foundation; either version 3 of the License, or (at your option)
 *  any later version.
 *
 *  This library is distributed in the hope that it will be useful, but
 *  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 *  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
 *  for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public License
 *  along with this program; if not, see .
 *
 */

import it.unimi.dsi.fastutil.objects.ObjectArrayList;
import it.unimi.dsi.fastutil.objects.ObjectList;
import it.unimi.dsi.lang.MutableString;

/** A wrapper exhibiting the lines of a file as a {@link java.util.Collection}.
 *
 * 

Warning: the lines returned by iterators generated by * instances of this class are not cacheable. The returned value is * a {@link it.unimi.dsi.lang.MutableString} instance that is reused * at each call, and that is modified by a call to {@link java.util.Iterator#hasNext() hasNext()}. * Thus, for instance, *

 *    ObjectIterators.unwrap(fileLinesColletion.iterator());
 * 
* will not give the expected results. Use {@link #allLines()} to get * the list of all lines (again, under the form of compact {@link it.unimi.dsi.lang.MutableString}s). * Note also that {@link #toString()} will return a single string containing all * file lines separated by the string associated with the system property line.separator. * *

An instance of this class allows to access the lines of a file as a * {@link java.util.Collection}. Using {@linkplain java.util.Collection#contains(java.lang.Object) * direct access} is strongly discouraged (it will require a full scan of the file), but * the {@link #iterator()} can be fruitfully used to scan the file, and can be called any * number of times, as it opens an independent input stream at each call. For the * same reason, the returned iterator type ({@link it.unimi.dsi.io.FileLinesCollection.FileLinesIterator}) * is {@link java.io.Closeable}, and should be closed after usage. * *

Using a suitable {@linkplain #FileLinesCollection(CharSequence, String, boolean) constructor}, it is possible * to specify that the file is compressed in gzip format (in this case, it will be opened using a {@link GZIPInputStream}). * *

Note that the first call to {@link #size()} will require a full file scan. * * @author Sebastiano Vigna * @since 0.9.2 */ public class FileLinesCollection extends AbstractCollection { /** The filename upon which this file-lines collection is based. */ private final String filename; /** The encoding of {@link #filename}, or {@code null} for the standard platform encoding. */ private final String encoding; /** The cached size of the collection. */ private int size = -1; /** Whether {@link #filename} is zipped. */ private final boolean zipped; /** Creates a file-lines collection for the specified filename with the specified encoding. * * @param filename a filename. * @param encoding an encoding. */ public FileLinesCollection(final CharSequence filename, final String encoding) { this(filename, encoding, false); } /** Creates a file-lines collection for the specified filename with the specified encoding, optionally assuming * that the file is compressed using gzip format. * * @param filename a filename. * @param encoding an encoding. * @param zipped whether filename is zipped. */ public FileLinesCollection(final CharSequence filename, final String encoding, final boolean zipped) { this.zipped = zipped; this.filename = filename.toString(); this.encoding = encoding; } /** An iterator over the lines of a {@link FileLinesCollection}. * *

Instances of this class open an {@link java.io.InputStream}, and thus should be {@linkplain Closeable#close() closed} after * usage. A “safety-net” finaliser tries to take care of the cases in which * closing an instance is impossible. An exhausted iterator, however, will be closed automagically. */ public static final class FileLinesIterator implements Iterator, SafelyCloseable { private FastBufferedReader fbr; MutableString s = new MutableString(), next; boolean toAdvance = true; private FileLinesIterator(final String filename, final String encoding, final boolean zipped) { try { fbr = encoding != null ? new FastBufferedReader(new InputStreamReader(zipped ? new GZIPInputStream(new FileInputStream(filename)) : new FileInputStream(filename), encoding)) : new FastBufferedReader(new FileReader(filename)); } catch (final IOException e) { throw new RuntimeException(e); } } @Override public boolean hasNext() { if (toAdvance) { try { next = fbr.readLine(s); if (next == null) close(); } catch (final IOException e) { throw new RuntimeException(e); } toAdvance = false; } return next != null; } @Override public MutableString next() { if (! hasNext()) throw new NoSuchElementException(); toAdvance = true; return s; } @Override public void remove() { throw new UnsupportedOperationException(); } @Override public synchronized void close() { if (fbr == null) return; try { fbr.close(); } catch (final IOException e) { throw new RuntimeException(e); } finally { fbr = null; } } @SuppressWarnings("deprecation") @Override protected synchronized void finalize() throws Throwable { try { if (fbr != null) close(); } finally { super.finalize(); } } } @Override public FileLinesIterator iterator() { return new FileLinesIterator(filename, encoding, zipped); } @Override public synchronized int size() { if (size == -1) { final FileLinesIterator i = iterator(); size = 0; while(i.hasNext()) { size++; i.next(); } i.close(); } return size; } /** Returns all lines of the file wrapped by this file-lines collection. * * @return all lines of the file wrapped by this file-lines collection. */ public ObjectList allLines() { final ObjectArrayList result = new ObjectArrayList<>(); for(final Iterator i = iterator(); i.hasNext();) result.add(i.next().copy()); return result; } @Override public Object[] toArray() { throw new UnsupportedOperationException("Use allLines()"); } @Override public T[] toArray(final T[] a) { throw new UnsupportedOperationException("Use allLines()"); } @Override public String toString() { final MutableString separator = new MutableString(System.getProperty("line.separator")); final MutableString s = new MutableString(); for(final MutableString l: this) s.append(l).append(separator); return s.toString(); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy