All Downloads are FREE. Search and download functionalities are using the official Maven repository.

src.it.unimi.dsi.io.FileLinesCollection Maven / Gradle / Ivy

/*
 * DSI utilities
 *
 * Copyright (C) 2005-2023 Sebastiano Vigna
 *
 * This program and the accompanying materials are made available under the
 * terms of the GNU Lesser General Public License v2.1 or later,
 * which is available at
 * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html,
 * or the Apache Software License 2.0, which is available at
 * https://www.apache.org/licenses/LICENSE-2.0.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 * or FITNESS FOR A PARTICULAR PURPOSE.
 *
 * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0
 */

package it.unimi.dsi.io;

import java.io.Closeable;
import java.io.FileInputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.AbstractCollection;
import java.util.Iterator;
import java.util.NoSuchElementException;
import java.util.zip.GZIPInputStream;

import it.unimi.dsi.fastutil.objects.ObjectArrayList;
import it.unimi.dsi.fastutil.objects.ObjectList;
import it.unimi.dsi.lang.MutableString;

/**
 * A wrapper exhibiting the lines of a file as a {@link java.util.Collection}.
 *
 * 

* Warning: the lines returned by iterators generated by instances of this class * are not cacheable. The returned value is a {@link it.unimi.dsi.lang.MutableString} * instance that is reused at each call, and that is modified by a call to * {@link java.util.Iterator#hasNext() hasNext()}. Thus, for instance, * *

 * ObjectIterators.unwrap(fileLinesColletion.iterator());
 * 
* * will not give the expected results. Use {@link #allLines()} to get the list of all lines (again, * under the form of compact {@link it.unimi.dsi.lang.MutableString}s). Note also that * {@link #toString()} will return a single string containing all file lines separated by the string * associated with the system property line.separator. * *

* An instance of this class allows to access the lines of a file as a {@link java.util.Collection}. * Using {@linkplain java.util.Collection#contains(java.lang.Object) direct access} is strongly * discouraged (it will require a full scan of the file), but the {@link #iterator()} can be * fruitfully used to scan the file, and can be called any number of times, as it opens an * independent input stream at each call. For the same reason, the returned iterator type * ({@link it.unimi.dsi.io.FileLinesCollection.FileLinesIterator}) is {@link java.io.Closeable}, and * should be closed after usage. * *

* Using a suitable {@linkplain #FileLinesCollection(CharSequence, String, boolean) constructor}, it * is possible to specify that the file is compressed in gzip format (in this case, it * will be opened using a {@link GZIPInputStream}). * *

* Note that the first call to {@link #size()} will require a full file scan. * * @author Sebastiano Vigna * @since 0.9.2 * @deprecated Please use {@link FileLinesMutableStringIterable} instead; the {@code zipped} option of this class * can be simulated by passing a {@link GZIPInputStream} as decompressor. */ @Deprecated public class FileLinesCollection extends AbstractCollection { /** The filename upon which this file-lines collection is based. */ private final String filename; /** The encoding of {@link #filename}, or {@code null} for the standard platform encoding. */ private final String encoding; /** The cached size of the collection. */ private int size = -1; /** Whether {@link #filename} is zipped. */ private final boolean zipped; /** Creates a file-lines collection for the specified filename with the specified encoding. * * @param filename a filename. * @param encoding an encoding. */ public FileLinesCollection(final CharSequence filename, final String encoding) { this(filename, encoding, false); } /** Creates a file-lines collection for the specified filename with the specified encoding, optionally assuming * that the file is compressed using gzip format. * * @param filename a filename. * @param encoding an encoding. * @param zipped whether filename is zipped. */ public FileLinesCollection(final CharSequence filename, final String encoding, final boolean zipped) { this.zipped = zipped; this.filename = filename.toString(); this.encoding = encoding; } /** * An iterator over the lines of a {@link FileLinesCollection}. * *

* Instances of this class open an {@link java.io.InputStream}, and thus should be * {@linkplain Closeable#close() closed} after usage. A “safety-net” finaliser tries to * take care of the cases in which closing an instance is impossible. An exhausted iterator, * however, will be closed automagically. * * @deprecated Please use * {@link FileLinesMutableStringIterable#iterator(java.io.InputStream, java.nio.charset.Charset, Class)}; * the {@code zipped} option of this class can be simulated by passing a * {@link GZIPInputStream} as decompressor. */ @Deprecated public static final class FileLinesIterator implements Iterator, SafelyCloseable { private FastBufferedReader fbr; MutableString s = new MutableString(), next; boolean toAdvance = true; private FileLinesIterator(final String filename, final String encoding, final boolean zipped) { try { fbr = encoding != null ? new FastBufferedReader(new InputStreamReader(zipped ? new GZIPInputStream(new FileInputStream(filename)) : new FileInputStream(filename), encoding)) : new FastBufferedReader(new FileReader(filename)); } catch (final IOException e) { throw new RuntimeException(e); } } @Override public boolean hasNext() { if (toAdvance) { try { next = fbr.readLine(s); if (next == null) close(); } catch (final IOException e) { throw new RuntimeException(e); } toAdvance = false; } return next != null; } @Override public MutableString next() { if (! hasNext()) throw new NoSuchElementException(); toAdvance = true; return s; } @Override public void remove() { throw new UnsupportedOperationException(); } @Override public synchronized void close() { if (fbr == null) return; try { fbr.close(); } catch (final IOException e) { throw new RuntimeException(e); } finally { fbr = null; } } @Override protected synchronized void finalize() throws Throwable { try { if (fbr != null) close(); } finally { super.finalize(); } } } @Override public FileLinesIterator iterator() { return new FileLinesIterator(filename, encoding, zipped); } @Override public synchronized int size() { if (size == -1) { final FileLinesIterator i = iterator(); size = 0; while(i.hasNext()) { size++; i.next(); } i.close(); } return size; } /** Returns all lines of the file wrapped by this file-lines collection. * * @return all lines of the file wrapped by this file-lines collection. */ public ObjectList allLines() { final ObjectArrayList result = new ObjectArrayList<>(); for(final Iterator i = iterator(); i.hasNext();) result.add(i.next().copy()); return result; } @Override public Object[] toArray() { throw new UnsupportedOperationException("Use allLines()"); } @Override public T[] toArray(final T[] a) { throw new UnsupportedOperationException("Use allLines()"); } @Override public String toString() { final MutableString separator = new MutableString(System.getProperty("line.separator")); final MutableString s = new MutableString(); for(final MutableString l: this) s.append(l).append(separator); return s.toString(); } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy