All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.aliasi.io.FileLineReader Maven / Gradle / Ivy

Go to download

This is the original Lingpipe: http://alias-i.com/lingpipe/web/download.html There were not made any changes to the source code.

The newest version!
/*
 * LingPipe v. 4.1.0
 * Copyright (C) 2003-2011 Alias-i
 *
 * This program is licensed under the Alias-i Royalty Free License
 * Version 1 WITHOUT ANY WARRANTY, without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the Alias-i
 * Royalty Free License Version 1 for more details.
 *
 * You should have received a copy of the Alias-i Royalty Free License
 * Version 1 along with this program; if not, visit
 * http://alias-i.com/lingpipe/licenses/lingpipe-license-1.txt or contact
 * Alias-i, Inc. at 181 North 11th Street, Suite 401, Brooklyn, NY 11211,
 * +1 (718) 290-9170.
 */

package com.aliasi.io;

import com.aliasi.util.Iterators;
import com.aliasi.util.Streams;
import com.aliasi.util.Strings;

import java.io.LineNumberReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.io.InputStream;
import java.io.IOException;
import java.io.Reader;
import java.io.UnsupportedEncodingException;

import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;

import java.util.zip.GZIPInputStream;

/**
 * A {@code FileLineReader} instance represents the lines of a file.  The
 * lines may be streamed through an iterator or eturned all at once.
 * This class encapsulates good practices for resources and adapts
 * file line reading to for-each loops by implementing the {@link
 * Iterable} interface.
 *
 * 

The definition of a line is defined by the {@link * java.io.BufferedReader} class's {@link java.io.BufferedReader#readLine()} method. * *

* The two standard usage patterns are streaming and all-at-once * reading. For streaming, this class implements the {@link Iterable} * interface, so for-each loops work: * *

{@code
 * File file = ...;
 *FileLineReader lines = new FileLineReader(file,"UTF-8");
 *for (String line : lines) {
 *   processLine(line);
 *}
 *lines.close();}
* * The iterable implementation reads a line at a time using a buffered * reader, so is scalable. * *

The {@link #readLines()} method returns the lines all at once * as a collection, and automatically closes all resources used: * *

{@code
 * List lines = new FileLines(file,"UTF-8").readLines();}
* * The lines are read into the list, so enough memory should be * available to hold the entire file. * *

Instances of this class may be used as ordinary line number * readers, too. The {@link #getLineNumber()} method is particularly * useful for error reporting. * * @author Bob Carpenter * @version 3.9 * @since Lingpipe3.8 */ public class FileLineReader extends LineNumberReader implements Iterable { /** * Construct a new file lines iterator from the specified file * using the specified character encoding, assuming no * compression. * *

Warning: The iterator should be closed using the * {@link #close()} method to avoid any dangling file references. * Closing the JVM also closes, so short programs may avoid * closing the streams explicitly. * *

If the file is not found or the encoding is not supported, * any file-system resources allocated will be released and an * {@code IOException} thrown. * * @param file File from which to read lines. * @param encoding Character encoding. * @throws FileNotFoundException If the file is not found. * @throws UnsupportedEncodingException If the specified encoding * is not supported. */ public FileLineReader(File file, String encoding) throws IOException { this(file,encoding,false); } /** * Construct a new file lines iterator from the specified file * using the specified character encoding, uncompressing gzipped * input if the compresison flag is true. * *

Warning: The iterator should be closed using the * {@link #close()} method to avoid any dangling file references. * Closing the JVM also closes, so short programs may avoid * closing the streams explicitly. * *

If the file is not found or the encoding is not supported, * any file-system resources allocated will be released and an * {@code IOException} thrown. * * @param file File from which to read lines. * @param encoding Character encoding. * @param gzipped Set to {@code true} if file is gzipped. * @throws FileNotFoundException If the file is not found. * @throws UnsupportedEncodingException If the specified encoding * is not supported. */ public FileLineReader(File file, String encoding, boolean gzipped) throws IOException { super(buildReader(file,encoding,gzipped)); } /** * Returns an iterator over the remaining lines of the file. * Because it buffers one line ahead, any use of {@link * #getLineNumber()} from this class will be one greater than it * should be. * *

There is no concurrent protection for this method, so it * should only be used from a single thread. * * @return This iterator. */ public Iterator iterator() { return new Iterators.Buffered() { @Override public String bufferNext() { try { return readLine(); } catch (IOException e) { throw new IllegalStateException("I/O error reading",e); } } }; } /** * Returns the list of lines remaining to be read from this line * iterator and closes all resources. If this method is called * before any calls to {@link #iterator()}, it returns all the * lines read from the file. * * @return The list of lines read from the file. * @throws IOException If there is an underlying I/O error * reading from the file. */ public List readLines() throws IOException { List lineList = new ArrayList(); try { for (String line : this) lineList.add(line); } finally { close(); } return lineList; } /** * Return the list of lines read from the specified file using * the specified character encoding. * * @param in File whose lines are read. * @param encoding Character encoding to decode chars in files. * @return The list of lines read from the file. * @throws UnsupportedEncodingException If the encoding is not * supported on the JVM. * @throws IOException If there is an underlying I/O error * reading from the file. */ public static List readLines(File in, String encoding) throws IOException, UnsupportedEncodingException { FileLineReader reader = new FileLineReader(in,encoding); return reader.readLines(); } /** * Return the array of lines read from the specified file using * the specified character encoding. * * @param in File whose lines are read. * @param encoding Character encoding to decode chars in files. * @return The lines read from the file. * @throws UnsupportedEncodingException If the encoding is not * supported on the JVM. * @throws IOException If there is an underlying I/O error * reading from the file. */ public static String[] readLineArray(File in, String encoding) throws IOException, UnsupportedEncodingException { List lineList = readLines(in,encoding); return lineList.toArray(Strings.EMPTY_STRING_ARRAY); } static Reader buildReader(File file, String encoding, boolean gzipped) throws IOException { InputStream in = null; InputStream zipIn = null; InputStreamReader reader = null; try { in = new FileInputStream(file); zipIn = gzipped ? new GZIPInputStream(in) : in; reader = new InputStreamReader(zipIn,encoding); return reader; } catch (IOException e) { Streams.closeQuietly(reader); Streams.closeQuietly(zipIn); Streams.closeQuietly(in); throw e; } } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy