All Downloads are FREE. Search and download functionalities are using the official Maven repository.

src.it.unimi.dsi.big.io.FileLinesCollection Maven / Gradle / Ivy

Go to download

The DSI utilities are a mishmash of classes accumulated during the last twenty years in projects developed at the DSI (Dipartimento di Scienze dell'Informazione, i.e., Information Sciences Department), now DI (Dipartimento di Informatica, i.e., Informatics Department), of the Universita` degli Studi di Milano.

There is a newer version: 2.7.3
Show newest version
/*
 * DSI utilities
 *
 * Copyright (C) 2005-2020 Sebastiano Vigna
 *
 *  This library is free software; you can redistribute it and/or modify it
 *  under the terms of the GNU Lesser General Public License as published by the Free
 *  Software Foundation; either version 3 of the License, or (at your option)
 *  any later version.
 *
 *  This library is distributed in the hope that it will be useful, but
 *  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 *  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
 *  for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public License
 *  along with this program; if not, see .
 *
 */

package it.unimi.dsi.big.io;

import java.io.Closeable;
import java.io.FileInputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.AbstractCollection;
import java.util.Iterator;
import java.util.NoSuchElementException;
import java.util.zip.GZIPInputStream;

import it.unimi.dsi.fastutil.Size64;
import it.unimi.dsi.fastutil.objects.ObjectBigArrayBigList;
import it.unimi.dsi.fastutil.objects.ObjectBigList;
import it.unimi.dsi.io.FastBufferedReader;
import it.unimi.dsi.io.SafelyCloseable;
import it.unimi.dsi.lang.MutableString;

/** A wrapper exhibiting the lines of a file as a {@link java.util.Collection}.
 *
 * 

Warning: the lines returned by iterators generated by * instances of this class are not cacheable. The returned value is * a {@link it.unimi.dsi.lang.MutableString} instance that is reused * at each call, and that is modified by a call to {@link java.util.Iterator#hasNext() hasNext()}. * Thus, for instance, *

 *    ObjectIterators.unwrap(fileLinesColletion.iterator());
 * 
* will not give the expected results. Use {@link #allLines()} to get * the {@linkplain ObjectBigList big list} of all lines (again, under the form of compact {@link it.unimi.dsi.lang.MutableString}s). * Note also that {@link #toString()} will return a single string containing all * file lines separated by the string associated with the system property line.separator. * *

An instance of this class allows to access the lines of a file as a * {@link java.util.Collection}. Using {@linkplain java.util.Collection#contains(java.lang.Object) * direct access} is strongly discouraged (it will require a full scan of the file), but * the {@link #iterator()} can be fruitfully used to scan the file, and can be called any * number of times, as it opens an independent input stream at each call. For the * same reason, the returned iterator type ({@link it.unimi.dsi.io.FileLinesCollection.FileLinesIterator}) * is {@link java.io.Closeable}, and should be closed after usage. * *

Using a suitable {@linkplain #FileLinesCollection(CharSequence, String, boolean) constructor}, it is possible * to specify that the file is compressed in gzip format (in this case, it will be opened using a {@link GZIPInputStream}). * *

Note that the first call to {@link #size64()} will require a full file scan. * * @author Sebastiano Vigna * @since 2.0 */ public class FileLinesCollection extends AbstractCollection implements Size64 { /** The filename upon which this file-lines collection is based. */ private final String filename; /** The encoding of {@link #filename}, or {@code null} for the standard platform encoding. */ private final String encoding; /** The cached size of the collection. */ private long size = -1; /** Whether {@link #filename} is zipped. */ private final boolean zipped; /** Creates a file-lines collection for the specified filename with the specified encoding. * * @param filename a filename. * @param encoding an encoding. */ public FileLinesCollection(final CharSequence filename, final String encoding) { this(filename, encoding, false); } /** Creates a file-lines collection for the specified filename with the specified encoding, optionally assuming * that the file is compressed using gzip format. * * @param filename a filename. * @param encoding an encoding. * @param zipped whether filename is zipped. */ public FileLinesCollection(final CharSequence filename, final String encoding, final boolean zipped) { this.zipped = zipped; this.filename = filename.toString(); this.encoding = encoding; } /** An iterator over the lines of a {@link FileLinesCollection}. * *

Instances of this class open an {@link java.io.InputStream}, and thus should be {@linkplain Closeable#close() closed} after * usage. A “safety-net” finaliser tries to take care of the cases in which * closing an instance is impossible. An exhausted iterator, however, will be closed automagically. */ public static final class FileLinesIterator implements Iterator, SafelyCloseable { private FastBufferedReader fbr; MutableString s = new MutableString(), next; boolean toAdvance = true; private FileLinesIterator(final String filename, final String encoding, final boolean zipped) { try { fbr = encoding != null ? new FastBufferedReader(new InputStreamReader(zipped ? new GZIPInputStream(new FileInputStream(filename)) : new FileInputStream(filename), encoding)) : new FastBufferedReader(new FileReader(filename)); } catch (final IOException e) { throw new RuntimeException(e); } } @Override public boolean hasNext() { if (toAdvance) { try { next = fbr.readLine(s); if (next == null) close(); } catch (final IOException e) { throw new RuntimeException(e); } toAdvance = false; } return next != null; } @Override public MutableString next() { if (! hasNext()) throw new NoSuchElementException(); toAdvance = true; return s; } @Override public synchronized void close() { if (fbr == null) return; try { fbr.close(); } catch (final IOException e) { throw new RuntimeException(e); } finally { fbr = null; } } @SuppressWarnings("deprecation") @Override protected synchronized void finalize() throws Throwable { try { if (fbr != null) close(); } finally { super.finalize(); } } } @Override public FileLinesIterator iterator() { return new FileLinesIterator(filename, encoding, zipped); } @Override @Deprecated public synchronized int size() { return (int)Math.min(Integer.MAX_VALUE, size); } @Override public synchronized long size64() { if (size == -1) { final FileLinesIterator i = iterator(); size = 0; while(i.hasNext()) { size++; i.next(); } i.close(); } return size; } /** Returns all lines of the file wrapped by this file-lines collection. * * @return all lines of the file wrapped by this file-lines collection. */ public ObjectBigList allLines() { final ObjectBigList result = new ObjectBigArrayBigList<>(); for(final Iterator i = iterator(); i.hasNext();) result.add(i.next().copy()); return result; } @Override @Deprecated public Object[] toArray() { throw new UnsupportedOperationException("Use allLines()"); } @Override @Deprecated public T[] toArray(final T[] a) { throw new UnsupportedOperationException("Use allLines()"); } @Override public String toString() { final MutableString separator = new MutableString(System.getProperty("line.separator")); final MutableString s = new MutableString(); for(final MutableString l: this) s.append(l).append(separator); return s.toString(); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy