All Downloads are FREE. Search and download functionalities are using the official Maven repository.

src.it.unimi.dsi.io.FileLinesByteArrayIterable Maven / Gradle / Ivy

/*
 * DSI utilities
 *
 * Copyright (C) 2020-2023 Sebastiano Vigna
 *
 * This program and the accompanying materials are made available under the
 * terms of the GNU Lesser General Public License v2.1 or later,
 * which is available at
 * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html,
 * or the Apache Software License 2.0, which is available at
 * https://www.apache.org/licenses/LICENSE-2.0.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 * or FITNESS FOR A PARTICULAR PURPOSE.
 *
 * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0
 */

package it.unimi.dsi.io;

import java.io.Closeable;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.lang.reflect.Constructor;
import java.util.Arrays;
import java.util.EnumSet;
import java.util.Iterator;
import java.util.NoSuchElementException;
import java.util.zip.GZIPInputStream;

import it.unimi.dsi.fastutil.BigList;
import it.unimi.dsi.fastutil.Size64;
import it.unimi.dsi.fastutil.bytes.ByteArrays;
import it.unimi.dsi.fastutil.io.FastBufferedInputStream;
import it.unimi.dsi.fastutil.io.FastBufferedInputStream.LineTerminator;
import it.unimi.dsi.fastutil.objects.ObjectArrayList;
import it.unimi.dsi.fastutil.objects.ObjectBigArrayBigList;
import it.unimi.dsi.fastutil.objects.ObjectIterables;
import it.unimi.dsi.fastutil.objects.ObjectList;

/**
 * A wrapper exhibiting the lines of a file as an {@link Iterable} of byte arrays.
 *
 * 

* An instance of this class makes it possible to access the lines of a file as an {@link Iterable} * of byte arrays. Reading is performed using * {@link FastBufferedInputStream#readLine(byte[], EnumSet)}, and follows the rules defined therein. * No decoding is performed. * *

* The result of a call to {@link #iterator()} can be used to scan the file; each call will open an * independent input stream. The returned iterator type * ({@link it.unimi.dsi.io.FileLinesByteArrayIterable.FileLinesIterator FileLinesIterator}) is * {@link java.io.Closeable}, and should be closed after usage. Exhausted iterators, however, will * be closed automagically. * *

* Using a suitable {@linkplain #FileLinesByteArrayIterable(String, long, Class) constructor} it is * possible to specify a decompression class, which must extend {@link InputStream} and provide a * constructor accepting an {@link InputStream} (e.g., {@link GZIPInputStream} if the file is * compressed in gzip format). * *

* Convenience {@linkplain #iterator(InputStream, Class, EnumSet) static methods} makes it possible * to build on the fly an iterator over an input stream using the same conventions. * *

* This class implements {@link #size64()}, which will return the number of lines of the file, * computed with a full scan at the first invocation. However, it is also possible to specify at * construction time the number of lines in the file to skip the first scan. It is responsibility of * the caller that the specified size and the actual number of lines in the file do match. * * @author Sebastiano Vigna * @since 2.6.17 */ public class FileLinesByteArrayIterable implements Iterable, Size64 { /** The filename upon which this file-lines collection is based. */ private final String filename; /** * A constructor for a stream decompressor for this iterable, or {@code null} for no compression. */ private final Constructor decompressor; /** A set of terminators for the underlying {@link FastBufferedInputStream}. */ private final EnumSet terminators; /** The cached size of this iterable. */ private long size = -1; /** * Creates a file-lines byte-array iterable for the specified filename. * * @param filename a filename. */ public FileLinesByteArrayIterable(final String filename) { this(filename, -1); } /** * Creates a file-lines byte-array iterable for the specified filename and size. * * @param filename a filename. * @param size the number of lines in the file. */ public FileLinesByteArrayIterable(final String filename, final long size) { this(filename, size, FastBufferedInputStream.ALL_TERMINATORS); } /** * Creates a file-lines byte-array iterable for the specified filename and size using the given line * terminators. * * @param filename a filename. * @param size the number of lines in the file. * @param terminators line terminators for the underlying {@link FastBufferedInputStream}. */ public FileLinesByteArrayIterable(final String filename, final long size, final EnumSet terminators) { this.filename = filename; this.size = size; this.terminators = terminators; this.decompressor = null; } /** * Creates a file-lines byte-array iterable for the specified filename, optionally assuming that the * file is compressed. * * @param filename a filename. * @param decompressor a class extending {@link InputStream} that will be used as a decompressor, or * {@code null} for no decompression. */ public FileLinesByteArrayIterable(final String filename, final Class decompressor) throws NoSuchMethodException, SecurityException { this(filename, -1, decompressor); } /** * Creates a file-lines byte-array iterable for the specified filename and size, optionally assuming * that the file is compressed. * * @param filename a filename. * @param size the number of lines in the file. * @param decompressor a class extending {@link InputStream} that will be used as a decompressor, or * {@code null} for no decompression. */ public FileLinesByteArrayIterable(final String filename, final long size, final Class decompressor) throws NoSuchMethodException, SecurityException { this(filename, size, FastBufferedInputStream.ALL_TERMINATORS, decompressor); } /** * Creates a file-lines byte-array iterable for the specified filename and size using the given line * terminators and optionally assuming that the file is compressed. * * @param filename a filename. * @param size the number of lines in the file. * @param terminators line terminators for the underlying {@link FastBufferedInputStream}. * @param decompressor a class extending {@link InputStream} that will be used as a decompressor, or * {@code null} for no decompression. */ public FileLinesByteArrayIterable(final String filename, final long size, final EnumSet terminators, final Class decompressor) throws NoSuchMethodException, SecurityException { this.filename = filename; this.size = size; this.terminators = terminators; this.decompressor = decompressor != null ? decompressor.getConstructor(InputStream.class) : null; } /** * An iterator over the lines of a {@link FileLinesByteArrayIterable}. * *

* Instances of this class open an {@link java.io.InputStream}, and thus should be * {@linkplain Closeable#close() closed} after usage. A “safety-net” finaliser tries to * take care of the cases in which closing an instance is impossible. An exhausted iterator, * however, will be closed automagically. */ public static final class FileLinesIterator implements Iterator, SafelyCloseable { private final EnumSet terminators; private FastBufferedInputStream fbis; private byte[] buffer = new byte[1024]; private boolean ready; private int read; private FileLinesIterator(final InputStream ibs, final EnumSet terminators) { this.fbis = new FastBufferedInputStream(ibs); this.terminators = terminators; } @Override public boolean hasNext() { if (ready) return true; if (fbis == null) return false; ready = false; try { read = 0; int len; while((len = fbis.readLine(buffer, read, buffer.length - read, terminators)) == buffer.length - read) { ready = true; read += len; buffer = ByteArrays.grow(buffer, buffer.length + 1); } if (len != -1) { ready = true; read += len; } if (len == -1) close(); } catch (final IOException e) { throw new RuntimeException(e); } return ready; } @Override public byte[] next() { if (! hasNext()) throw new NoSuchElementException(); ready = false; return Arrays.copyOf(buffer, read); } @Override public synchronized void close() { if (fbis == null) return; try { fbis.close(); } catch (final IOException e) { throw new RuntimeException(e); } finally { fbis = null; } } @Override protected synchronized void finalize() throws Throwable { try { if (fbis != null) close(); } finally { super.finalize(); } } } @Override public FileLinesIterator iterator() { try { final InputStream inputStream = decompressor == null ? new FileInputStream(filename) : decompressor.newInstance(new FileInputStream(filename)); return new FileLinesIterator(inputStream, terminators); } catch (final Exception e) { throw new RuntimeException(e); } } /** * A convenience method returning a one-off {@link FileLinesIterator} reading from an input stream. * * @param inputStream an input stream. * @return an iterator returning the lines contained in the provided input stream. */ public static FileLinesIterator iterator(final InputStream inputStream) { return iterator(inputStream, null); } /** * A convenience method returning a one-off {@link FileLinesIterator} reading from an input stream. * * @param inputStream an input stream. * @param decompressor a class extending {@link InputStream} that will be used as a decompressor, or * {@code null} for no decompression. * @return an iterator returning the lines contained in the provided input stream. */ public static FileLinesIterator iterator(final InputStream inputStream, final Class decompressor) { return iterator(inputStream, decompressor, FastBufferedInputStream.ALL_TERMINATORS); } /** * A convenience method returning a one-off {@link FileLinesIterator} reading from an input stream. * * @param inputStream an input stream. * @param terminators line terminators for the underlying {@link FastBufferedInputStream}. * @param decompressor a class extending {@link InputStream} that will be used as a decompressor, or * {@code null} for no decompression. * @return an iterator returning the lines contained in the provided input stream. */ public static FileLinesIterator iterator(InputStream inputStream, final Class decompressor, final EnumSet terminators) { try { if (decompressor != null) inputStream = decompressor.getConstructor(InputStream.class).newInstance(inputStream); return new FileLinesIterator(inputStream, terminators); } catch (final Exception e) { throw new RuntimeException(e); } } @Override public synchronized long size64() { if (size == -1) size = ObjectIterables.size(this); return size; } /** * Returns all lines as a {@linkplain java.util.List list}. * * @implSpec This method iterates over the lines of the file and accumulates the resulting byte * arrays in a standard list. Thus, it will throw an exception on files with more than * {@link Integer#MAX_VALUE} lines. * * @return all lines of the file wrapped by this file-lines byte-array iterable. * @see #allLinesBig() */ public ObjectList allLines() { final ObjectArrayList result = new ObjectArrayList<>(); for (final byte[] a : this) result.add(a); return result; } /** * Returns all lines as a {@linkplain BigList big list}. * * @implSpec This method iterates over the lines of the file and accumulates the resulting byte * arrays. in a {@linkplain BigList big list}. Thus, it supports files with more than * {@link Integer#MAX_VALUE} lines. * * @return all lines of the file wrapped by this file-lines byte-array iterable. * @see #allLines() */ public ObjectBigArrayBigList allLinesBig() { final ObjectBigArrayBigList result = new ObjectBigArrayBigList<>(); for (final byte[] a : this) result.add(a); return result; } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy