edu.stanford.nlp.io.IOUtils Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of stanford-parser Show documentation
Show all versions of stanford-parser Show documentation
Stanford Parser processes raw text in English, Chinese, German, Arabic, and French, and extracts constituency parse trees.
package edu.stanford.nlp.io;
import edu.stanford.nlp.util.*;
import java.io.*;
import java.lang.reflect.InvocationTargetException;
import java.net.InetAddress;
import java.net.SocketTimeoutException;
import java.net.URL;
import java.net.URLConnection;
import java.nio.channels.FileChannel;
import java.util.*;
import java.util.function.Consumer;
import java.util.regex.Pattern;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;
/**
* Helper Class for various I/O related things.
*
* @author Kayur Patel
* @author Teg Grenager
* @author Christopher Manning
*/
public class IOUtils {
private static final int SLURP_BUFFER_SIZE = 16000;
private static final int GZIP_FILE_BUFFER_SIZE = 65536;
public static final String eolChar = System.getProperty("line.separator");
public static final String defaultEncoding = "utf-8";
// A class of static methods
private IOUtils() { }
/**
* Write object to a file with the specified name. The file is silently gzipped if the filename ends with .gz.
*
* @param o Object to be written to file
* @param filename Name of the temp file
* @throws IOException If can't write file.
* @return File containing the object
*/
public static File writeObjectToFile(Object o, String filename)
throws IOException {
return writeObjectToFile(o, new File(filename));
}
/**
* Write an object to a specified File. The file is silently gzipped if the filename ends with .gz.
*
* @param o Object to be written to file
* @param file The temp File
* @throws IOException If File cannot be written
* @return File containing the object
*/
public static File writeObjectToFile(Object o, File file) throws IOException {
return writeObjectToFile(o, file, false);
}
/**
* Write an object to a specified File. The file is silently gzipped if the filename ends with .gz.
*
* @param o Object to be written to file
* @param file The temp File
* @param append If true, append to this file instead of overwriting it
* @throws IOException If File cannot be written
* @return File containing the object
*/
public static File writeObjectToFile(Object o, File file, boolean append) throws IOException {
// file.createNewFile(); // cdm may 2005: does nothing needed
OutputStream os = new FileOutputStream(file, append);
if (file.getName().endsWith(".gz")) {
os = new GZIPOutputStream(os);
}
os = new BufferedOutputStream(os);
ObjectOutputStream oos = new ObjectOutputStream(os);
oos.writeObject(o);
oos.close();
return file;
}
/**
* Write object to a file with the specified name.
*
* @param o Object to be written to file
* @param filename Name of the temp file
* @return File containing the object, or null if an exception was caught
*/
public static File writeObjectToFileNoExceptions(Object o, String filename) {
File file = null;
ObjectOutputStream oos = null;
try {
file = new File(filename);
// file.createNewFile(); // cdm may 2005: does nothing needed
oos = new ObjectOutputStream(new BufferedOutputStream(
new GZIPOutputStream(new FileOutputStream(file))));
oos.writeObject(o);
oos.close();
} catch (Exception e) {
e.printStackTrace();
} finally {
closeIgnoringExceptions(oos);
}
return file;
}
/**
* Write object to temp file which is destroyed when the program exits.
*
* @param o Object to be written to file
* @param filename Name of the temp file
* @throws IOException If file cannot be written
* @return File containing the object
*/
public static File writeObjectToTempFile(Object o, String filename)
throws IOException {
File file = File.createTempFile(filename, ".tmp");
file.deleteOnExit();
writeObjectToFile(o, file);
return file;
}
/**
* Write object to a temp file and ignore exceptions.
*
* @param o Object to be written to file
* @param filename Name of the temp file
* @return File containing the object
*/
public static File writeObjectToTempFileNoExceptions(Object o, String filename) {
try {
return writeObjectToTempFile(o, filename);
} catch (Exception e) {
System.err.println("Error writing object to file " + filename);
e.printStackTrace();
return null;
}
}
private static OutputStream getBufferedOutputStream(String path) throws IOException {
OutputStream os = new BufferedOutputStream(new FileOutputStream(path));
if (path.endsWith(".gz")) {
os = new GZIPOutputStream(os);
}
return os;
}
//++ todo [cdm, Aug 2012]: Do we need the below methods? They're kind of weird in unnecessarily bypassing using a Writer.
/**
* Writes a string to a file.
*
* @param contents The string to write
* @param path The file path
* @param encoding The encoding to encode in
* @throws IOException In case of failure
*/
public static void writeStringToFile(String contents, String path, String encoding) throws IOException {
OutputStream writer = getBufferedOutputStream(path);
writer.write(contents.getBytes(encoding));
writer.close();
}
/**
* Writes a string to a file, as UTF-8.
*
* @param contents The string to write
* @param path The file path
* @throws IOException In case of failure
*/
/**
* Writes a string to a file, squashing exceptions
*
* @param contents The string to write
* @param path The file path
* @param encoding The encoding to encode in
* */
public static void writeStringToFileNoExceptions(String contents, String path, String encoding) {
OutputStream writer = null;
try{
if (path.endsWith(".gz")) {
writer = new GZIPOutputStream(new FileOutputStream(path));
} else {
writer = new BufferedOutputStream(new FileOutputStream(path));
}
writer.write(contents.getBytes(encoding));
} catch (Exception e) {
e.printStackTrace();
} finally {
if(writer != null){ closeIgnoringExceptions(writer); }
}
}
/**
* Writes a string to a temporary file
*
* @param contents The string to write
* @param path The file path
* @param encoding The encoding to encode in
* @throws IOException In case of failure
* @return The File written to
*/
public static File writeStringToTempFile(String contents, String path, String encoding) throws IOException {
OutputStream writer;
File tmp = File.createTempFile(path,".tmp");
if (path.endsWith(".gz")) {
writer = new GZIPOutputStream(new FileOutputStream(tmp));
} else {
writer = new BufferedOutputStream(new FileOutputStream(tmp));
}
writer.write(contents.getBytes(encoding));
return tmp;
}
/**
* Writes a string to a temporary file, as UTF-8
*
* @param contents The string to write
* @param path The file path
* @throws IOException In case of failure
*/
public static void writeStringToTempFile(String contents, String path) throws IOException {
writeStringToTempFile(contents, path, "UTF-8");
}
/**
* Writes a string to a temporary file, squashing exceptions
*
* @param contents The string to write
* @param path The file path
* @param encoding The encoding to encode in
* @return The File that was written to
*/
public static File writeStringToTempFileNoExceptions(String contents, String path, String encoding) {
OutputStream writer = null;
File tmp = null;
try {
tmp = File.createTempFile(path,".tmp");
if (path.endsWith(".gz")) {
writer = new GZIPOutputStream(new FileOutputStream(tmp));
} else {
writer = new BufferedOutputStream(new FileOutputStream(tmp));
}
writer.write(contents.getBytes(encoding));
} catch (Exception e) {
e.printStackTrace();
} finally {
closeIgnoringExceptions(writer);
}
return tmp;
}
/**
* Writes a string to a temporary file with UTF-8 encoding, squashing exceptions
*
* @param contents The string to write
* @param path The file path
*/
public static void writeStringToTempFileNoExceptions(String contents, String path) {
writeStringToTempFileNoExceptions(contents, path, "UTF-8");
}
//-- todo [cdm, Aug 2012]: Do we need the below methods? They're kind of weird in unnecessarily bypassing using a Writer.
// todo [cdm, Sep 2013]: Can we remove this next method and its friends? (Weird in silently gzipping, overlaps other functionality.)
/**
* Read an object from a stored file. It is silently ungzipped, regardless of name.
*
* @param file The file pointing to the object to be retrieved
* @throws IOException If file cannot be read
* @throws ClassNotFoundException If reading serialized object fails
* @return The object read from the file.
*/
public static T readObjectFromFile(File file) throws IOException,
ClassNotFoundException {
try {
ObjectInputStream ois = new ObjectInputStream(new BufferedInputStream(
new GZIPInputStream(new FileInputStream(file))));
Object o = ois.readObject();
ois.close();
return ErasureUtils.uncheckedCast(o);
} catch (java.util.zip.ZipException e) {
ObjectInputStream ois = new ObjectInputStream(new BufferedInputStream(
new FileInputStream(file)));
Object o = ois.readObject();
ois.close();
return ErasureUtils.uncheckedCast(o);
}
}
public static DataInputStream getDataInputStream(String filenameUrlOrClassPath) throws IOException {
return new DataInputStream(getInputStreamFromURLOrClasspathOrFileSystem(filenameUrlOrClassPath));
}
public static DataOutputStream getDataOutputStream(String filename) throws IOException {
return new DataOutputStream(getBufferedOutputStream((filename)));
}
/**
* Read an object from a stored file. The file can be anything obtained
* via a URL, the filesystem, or the classpath (eg in a jar file).
*
* @param filename The file pointing to the object to be retrieved
* @throws IOException If file cannot be read
* @throws ClassNotFoundException If reading serialized object fails
* @return The object read from the file.
*/
public static T readObjectFromURLOrClasspathOrFileSystem(String filename) throws IOException, ClassNotFoundException {
ObjectInputStream ois = new ObjectInputStream(getInputStreamFromURLOrClasspathOrFileSystem(filename));
Object o = ois.readObject();
ois.close();
return ErasureUtils.uncheckedCast(o);
}
public static T readObjectAnnouncingTimingFromURLOrClasspathOrFileSystem(String msg, String path) {
T obj;
try {
Timing timing = new Timing();
System.err.print(msg + ' ' + path + " ... ");
obj = IOUtils.readObjectFromURLOrClasspathOrFileSystem(path);
timing.done();
} catch (IOException | ClassNotFoundException e) {
throw new RuntimeIOException(e);
}
return obj;
}
public static T readObjectFromObjectStream(ObjectInputStream ois) throws IOException,
ClassNotFoundException {
Object o = ois.readObject();
return ErasureUtils.uncheckedCast(o);
}
/**
* Read an object from a stored file.
*
* @param filename The filename of the object to be retrieved
* @throws IOException If file cannot be read
* @throws ClassNotFoundException If reading serialized object fails
* @return The object read from the file.
*/
public static T readObjectFromFile(String filename) throws IOException,
ClassNotFoundException {
return ErasureUtils.uncheckedCast(readObjectFromFile(new File(filename)));
}
/**
* Read an object from a stored file without throwing exceptions.
*
* @param file The file pointing to the object to be retrieved
* @return The object read from the file, or null if an exception occurred.
*/
public static T readObjectFromFileNoExceptions(File file) {
Object o = null;
try {
ObjectInputStream ois = new ObjectInputStream(new BufferedInputStream(
new GZIPInputStream(new FileInputStream(file))));
o = ois.readObject();
ois.close();
} catch (IOException e) {
e.printStackTrace();
} catch (ClassNotFoundException e) {
e.printStackTrace();
}
return ErasureUtils.uncheckedCast(o);
}
public static int lineCount(String textFileOrUrl) throws IOException {
BufferedReader r = readerFromString(textFileOrUrl);
int numLines = 0;
while (r.readLine() != null) {
numLines++;
}
return numLines;
}
public static ObjectOutputStream writeStreamFromString(String serializePath)
throws IOException {
ObjectOutputStream oos;
if (serializePath.endsWith(".gz")) {
oos = new ObjectOutputStream(new BufferedOutputStream(
new GZIPOutputStream(new FileOutputStream(serializePath))));
} else {
oos = new ObjectOutputStream(new BufferedOutputStream(
new FileOutputStream(serializePath)));
}
return oos;
}
public static ObjectInputStream readStreamFromString(String filenameOrUrl)
throws IOException {
InputStream is = getInputStreamFromURLOrClasspathOrFileSystem(filenameOrUrl);
return new ObjectInputStream(is);
}
/**
* Locates this file either in the CLASSPATH or in the file system. The CLASSPATH takes priority.
*
* @param name The file or resource name
* @throws FileNotFoundException If the file does not exist
* @return The InputStream of name, or null if not found
*/
private static InputStream findStreamInClasspathOrFileSystem(String name) throws FileNotFoundException {
// ms 10-04-2010:
// - even though this may look like a regular file, it may be a path inside a jar in the CLASSPATH
// - check for this first. This takes precedence over the file system.
InputStream is = IOUtils.class.getClassLoader().getResourceAsStream(name);
// windows File.separator is \, but getting resources only works with /
if (is == null) {
is = IOUtils.class.getClassLoader().getResourceAsStream(name.replaceAll("\\\\", "/"));
// Classpath doesn't like double slashes (e.g., /home/user//foo.txt)
if (is == null) {
is = IOUtils.class.getClassLoader().getResourceAsStream(name.replaceAll("\\\\", "/").replaceAll("/+", "/"));
}
}
// if not found in the CLASSPATH, load from the file system
if (is == null) is = new FileInputStream(name);
// make sure it's not a GZIP stream
if (name.endsWith(".gz")) {
try {
return new GZIPInputStream(is);
} catch (IOException e) {
System.err.println("Resource or file looks like a gzip file, but is not: " + name);
}
}
return is;
}
/**
* Check if this path exists either in the classpath or on the filesystem.
*
* @param name The file or resource name.
* @return true if a call to {@link IOUtils#getBufferedReaderFromClasspathOrFileSystem(String)} would return a valid stream.
*/
public static boolean existsInClasspathOrFileSystem(String name) {
InputStream is = IOUtils.class.getClassLoader().getResourceAsStream(name);
if (is == null) {
is = IOUtils.class.getClassLoader().getResourceAsStream(name.replaceAll("\\\\", "/"));
}
return is != null || new File(name).exists();
}
/**
* Locates this file either using the given URL, or in the CLASSPATH, or in the file system
* The CLASSPATH takes priority over the file system!
* This stream is buffered and gunzipped (if necessary).
*
* @param textFileOrUrl
* @return An InputStream for loading a resource
* @throws IOException
*/
public static InputStream getInputStreamFromURLOrClasspathOrFileSystem(String textFileOrUrl)
throws IOException
{
InputStream in;
if (textFileOrUrl.matches("https?://.*")) {
URL u = new URL(textFileOrUrl);
URLConnection uc = u.openConnection();
in = uc.getInputStream();
if (textFileOrUrl.endsWith(".gz")) {
try {
in = new GZIPInputStream(in);
} catch (IOException e) { }
}
} else {
try {
in = findStreamInClasspathOrFileSystem(textFileOrUrl);
} catch (FileNotFoundException e) {
try {
// Maybe this happens to be some other format of URL?
URL u = new URL(textFileOrUrl);
URLConnection uc = u.openConnection();
in = uc.getInputStream();
} catch (IOException e2) {
// Don't make the original exception a cause, since it is almost certainly bogus
throw new IOException("Unable to resolve \"" +
textFileOrUrl + "\" as either " +
"class path, filename or URL"); // , e2);
}
}
}
// buffer this stream. even gzip streams benefit from buffering,
// such as for the shift reduce parser
in = new BufferedInputStream(in);
return in;
}
/**
* Quietly opens a File. If the file ends with a ".gz" extension,
* automatically opens a GZIPInputStream to wrap the constructed
* FileInputStream.
*/
public static InputStream inputStreamFromFile(File file) throws RuntimeIOException {
try {
InputStream is = new BufferedInputStream(new FileInputStream(file));
if (file.getName().endsWith(".gz")) {
is = new GZIPInputStream(is);
}
return is;
} catch (IOException e) {
throw new RuntimeIOException(e);
}
}
/**
* Open a BufferedReader to a File. If the file's getName() ends in .gz,
* it is interpreted as a gzipped file (and uncompressed). The file is then
* interpreted as a utf-8 text file.
*
* @param file What to read from
* @return The BufferedReader
* @throws RuntimeIOException If there is an I/O problem
*/
public static BufferedReader readerFromFile(File file) {
InputStream is = null;
try {
is = inputStreamFromFile(file);
return new BufferedReader(new InputStreamReader(is, "UTF-8"));
} catch (IOException ioe) {
IOUtils.closeIgnoringExceptions(is);
throw new RuntimeIOException(ioe);
}
}
// todo [cdm 2014]: get rid of this method, using other methods. This will change the semantics to null meaning UTF-8, but that seems better in 2015.
/**
* Open a BufferedReader to a File. If the file's getName() ends in .gz,
* it is interpreted as a gzipped file (and uncompressed). The file is then
* turned into a BufferedReader with the given encoding.
* If the encoding passed in is null, then the system default encoding is used.
*
* @param file What to read from
* @param encoding What charset to use. A null String is interpreted as platform default encoding
* @return The BufferedReader
* @throws RuntimeIOException If there is an I/O problem
*/
public static BufferedReader readerFromFile(File file, String encoding) {
InputStream is = null;
try {
is = inputStreamFromFile(file);
if (encoding == null) {
return new BufferedReader(new InputStreamReader(is));
} else {
return new BufferedReader(new InputStreamReader(is, encoding));
}
} catch (IOException ioe) {
IOUtils.closeIgnoringExceptions(is);
throw new RuntimeIOException(ioe);
}
}
/**
* Open a BufferedReader on stdin. Use the user's default encoding.
*
* @return The BufferedReader
*/
public static BufferedReader readerFromStdin() {
return new BufferedReader(new InputStreamReader(System.in));
}
/**
* Open a BufferedReader on stdin. Use the specified character encoding.
*
* @param encoding CharSet encoding. Maybe be null, in which case the
* platform default encoding is used
* @return The BufferedReader
* @throws IOException If there is an I/O problem
*/
public static BufferedReader readerFromStdin(String encoding) throws IOException {
if (encoding == null) {
return new BufferedReader(new InputStreamReader(System.in));
}
return new BufferedReader(new InputStreamReader(System.in, encoding));
}
/**
* Open a BufferedReader to a file, class path entry or URL specified by a String name.
* If the String starts with https?://, then it is first tried as a URL. It
* is next tried as a resource on the CLASSPATH, and then it is tried
* as a local file. Finally, it is then tried again in case it is some network-available
* file accessible by URL. If the String ends in .gz, it
* is interpreted as a gzipped file (and uncompressed). The file is then
* interpreted as a utf-8 text file.
*
* @param textFileOrUrl What to read from
* @return The BufferedReader
* @throws IOException If there is an I/O problem
*/
public static BufferedReader readerFromString(String textFileOrUrl)
throws IOException {
return new BufferedReader(new InputStreamReader(
getInputStreamFromURLOrClasspathOrFileSystem(textFileOrUrl), "UTF-8"));
}
/**
* Open a BufferedReader to a file or URL specified by a String name. If the
* String starts with https?://, then it is first tried as a URL, otherwise it
* is next tried as a resource on the CLASSPATH, and then finally it is tried
* as a local file or other network-available file . If the String ends in .gz, it
* is interpreted as a gzipped file (and uncompressed), else it is interpreted as
* a regular text file in the given encoding.
* If the encoding passed in is null, then the system default encoding is used.
*
* @param textFileOrUrl What to read from
* @param encoding CharSet encoding. Maybe be null, in which case the
* platform default encoding is used
* @return The BufferedReader
* @throws IOException If there is an I/O problem
*/
public static BufferedReader readerFromString(String textFileOrUrl,
String encoding) throws IOException {
InputStream is = getInputStreamFromURLOrClasspathOrFileSystem(textFileOrUrl);
if (encoding == null) {
return new BufferedReader(new InputStreamReader(is));
}
return new BufferedReader(new InputStreamReader(is, encoding));
}
/**
* Returns an Iterable of the lines in the file.
*
* The file reader will be closed when the iterator is exhausted. IO errors
* will throw an (unchecked) RuntimeIOException
*
* @param path The file whose lines are to be read.
* @return An Iterable containing the lines from the file.
*/
public static Iterable readLines(String path) {
return readLines(path, null);
}
/**
* Returns an Iterable of the lines in the file.
*
* The file reader will be closed when the iterator is exhausted. IO errors
* will throw an (unchecked) RuntimeIOException
*
* @param path The file whose lines are to be read.
* @param encoding The encoding to use when reading lines.
* @return An Iterable containing the lines from the file.
*/
public static Iterable readLines(String path, String encoding) {
return new GetLinesIterable(path, null, encoding);
}
/**
* Returns an Iterable of the lines in the file.
*
* The file reader will be closed when the iterator is exhausted.
*
* @param file The file whose lines are to be read.
* @return An Iterable containing the lines from the file.
*/
public static Iterable readLines(final File file) {
return readLines(file, null, null);
}
/**
* Returns an Iterable of the lines in the file.
*
* The file reader will be closed when the iterator is exhausted.
*
* @param file The file whose lines are to be read.
* @param fileInputStreamWrapper
* The class to wrap the InputStream with, e.g. GZIPInputStream. Note
* that the class must have a constructor that accepts an
* InputStream.
* @return An Iterable containing the lines from the file.
*/
public static Iterable readLines(final File file,
final Class extends InputStream> fileInputStreamWrapper) {
return readLines(file, fileInputStreamWrapper, null);
}
/**
* Returns an Iterable of the lines in the file, wrapping the generated
* FileInputStream with an instance of the supplied class. IO errors will
* throw an (unchecked) RuntimeIOException
*
* @param file The file whose lines are to be read.
* @param fileInputStreamWrapper
* The class to wrap the InputStream with, e.g. GZIPInputStream. Note
* that the class must have a constructor that accepts an
* InputStream.
* @param encoding The encoding to use when reading lines.
* @return An Iterable containing the lines from the file.
*/
public static Iterable readLines(final File file,
final Class extends InputStream> fileInputStreamWrapper,
final String encoding) {
return new GetLinesIterable(file, fileInputStreamWrapper, encoding);
}
static class GetLinesIterable implements Iterable {
final File file;
final String path;
final Class extends InputStream> fileInputStreamWrapper;
final String encoding;
// TODO: better programming style would be to make this two
// separate classes, but we don't expect to make more versions of
// this class anyway
GetLinesIterable(final File file,
final Class extends InputStream> fileInputStreamWrapper,
final String encoding) {
this.file = file;
this.path = null;
this.fileInputStreamWrapper = fileInputStreamWrapper;
this.encoding = encoding;
}
GetLinesIterable(final String path,
final Class extends InputStream> fileInputStreamWrapper,
final String encoding) {
this.file = null;
this.path = path;
this.fileInputStreamWrapper = fileInputStreamWrapper;
this.encoding = encoding;
}
private InputStream getStream() throws IOException {
if (file != null) {
return new FileInputStream(file);
} else if (path != null) {
return getInputStreamFromURLOrClasspathOrFileSystem(path);
} else {
throw new AssertionError("No known path to read");
}
}
public Iterator iterator() {
return new Iterator() {
protected BufferedReader reader = this.getReader();
protected String line = this.getLine();
public boolean hasNext() {
return this.line != null;
}
public String next() {
String nextLine = this.line;
if (nextLine == null) {
throw new NoSuchElementException();
}
line = getLine();
return nextLine;
}
protected String getLine() {
try {
String result = this.reader.readLine();
if (result == null) {
this.reader.close();
}
return result;
} catch (IOException e) {
throw new RuntimeIOException(e);
}
}
protected BufferedReader getReader() {
try {
InputStream stream = getStream();
if (fileInputStreamWrapper != null) {
stream = fileInputStreamWrapper.getConstructor(InputStream.class).newInstance(stream);
}
if (encoding == null) {
return new BufferedReader(new InputStreamReader(stream));
} else {
return new BufferedReader(new InputStreamReader(stream, encoding));
}
} catch (Exception e) {
throw new RuntimeIOException(e);
}
}
@Override
public void remove() {
throw new UnsupportedOperationException();
}
};
}
}
/**
* Given a reader, returns the lines from the reader as an Iterable.
*
* @param r input reader
* @param includeEol whether to keep eol-characters in the returned strings
* @return iterable of lines (as strings)
*/
public static Iterable getLineIterable( Reader r, boolean includeEol) {
if (includeEol) {
return new EolPreservingLineReaderIterable(r);
} else {
return new LineReaderIterable( (r instanceof BufferedReader)? (BufferedReader) r:new BufferedReader(r) );
}
}
public static Iterable getLineIterable( Reader r, int bufferSize, boolean includeEol) {
if (includeEol) {
return new EolPreservingLineReaderIterable(r, bufferSize);
} else {
return new LineReaderIterable( (r instanceof BufferedReader)? (BufferedReader) r:new BufferedReader(r, bufferSize) );
}
}
/**
* Line iterator that uses BufferedReader.readLine()
* EOL-characters are automatically discarded and not included in the strings returns
*/
private static final class LineReaderIterable implements Iterable
{
private final BufferedReader reader;
private LineReaderIterable( BufferedReader reader )
{
this.reader = reader;
}
@Override
public Iterator iterator()
{
return new Iterator() {
private String next = getNext();
private String getNext() {
try {
return reader.readLine();
} catch (IOException ex) {
throw new RuntimeIOException(ex);
}
}
@Override
public boolean hasNext()
{
return this.next != null;
}
@Override
public String next()
{
String nextLine = this.next;
if (nextLine == null) {
throw new NoSuchElementException();
}
next = getNext();
return nextLine;
}
@Override
public void remove()
{
throw new UnsupportedOperationException();
}
};
}
}
/**
* Line iterator that preserves the eol-character exactly as read from reader.
* Line endings are: \r\n,\n,\r
* Lines returns by this iterator will include the eol-characters
**/
private static final class EolPreservingLineReaderIterable implements Iterable
{
private final Reader reader;
private final int bufferSize;
private EolPreservingLineReaderIterable( Reader reader )
{
this(reader, SLURP_BUFFER_SIZE);
}
private EolPreservingLineReaderIterable( Reader reader, int bufferSize )
{
this.reader = reader;
this.bufferSize = bufferSize;
}
@Override
public Iterator iterator()
{
return new Iterator() {
private String next;
private boolean done = false;
private StringBuilder sb = new StringBuilder(80);
private char[] charBuffer = new char[bufferSize];
private int charBufferPos = -1;
private int charsInBuffer = 0;
boolean lastWasLF = false;
private String getNext() {
try {
while (true) {
if (charBufferPos < 0) {
charsInBuffer = reader.read(charBuffer);
if (charsInBuffer < 0) {
// No more!!!
if (sb.length() > 0) {
String line = sb.toString();
// resets the buffer
sb.setLength(0);
return line;
} else {
return null;
}
}
charBufferPos = 0;
}
boolean eolReached = copyUntilEol();
if (eolReached) {
// eol reached
String line = sb.toString();
// resets the buffer
sb.setLength(0);
return line;
}
}
} catch (IOException ex) {
throw new RuntimeIOException(ex);
}
}
private boolean copyUntilEol() {
for (int i = charBufferPos; i < charsInBuffer; i++) {
if (charBuffer[i] == '\n') {
// line end
// copy into our string builder
sb.append(charBuffer, charBufferPos, i - charBufferPos + 1);
// advance character buffer pos
charBufferPos = i+1;
lastWasLF = false;
return true; // end of line reached
} else if (lastWasLF) {
// not a '\n' here - still need to terminate line (but don't include current character)
if (i > charBufferPos) {
sb.append(charBuffer, charBufferPos, i - charBufferPos);
// advance character buffer pos
charBufferPos = i;
lastWasLF = false;
return true; // end of line reached
}
}
lastWasLF = (charBuffer[i] == '\r');
}
sb.append(charBuffer, charBufferPos, charsInBuffer - charBufferPos);
// reset character buffer pos
charBufferPos = -1;
return false;
}
@Override
public boolean hasNext()
{
if (done) return false;
if (next == null) {
next = getNext();
}
if (next == null) {
done = true;
}
return !done;
}
@Override
public String next()
{
if (!hasNext()) { throw new NoSuchElementException(); }
String res = next;
next = null;
return res;
}
@Override
public void remove()
{
throw new UnsupportedOperationException();
}
};
}
}
/**
* Provides an implementation of closing a file for use in a finally block so
* you can correctly close a file without even more exception handling stuff.
* From a suggestion in a talk by Josh Bloch.
*
* @param c The IO resource to close (e.g., a Stream/Reader)
*/
public static void closeIgnoringExceptions(Closeable c) {
if (c != null) {
try {
c.close();
} catch (IOException ioe) {
// ignore
}
}
}
/**
* Iterate over all the files in the directory, recursively.
*
* @param dir The root directory.
* @return All files within the directory.
*/
public static Iterable iterFilesRecursive(final File dir) {
return iterFilesRecursive(dir, (Pattern) null);
}
/**
* Iterate over all the files in the directory, recursively.
*
* @param dir The root directory.
* @param ext A string that must be at the end of all files (e.g. ".txt")
* @return All files within the directory ending in the given extension.
*/
public static Iterable iterFilesRecursive(final File dir,
final String ext) {
return iterFilesRecursive(dir, Pattern.compile(Pattern.quote(ext) + "$"));
}
/**
* Iterate over all the files in the directory, recursively.
*
* @param dir The root directory.
* @param pattern A regular expression that the file path must match. This uses
* Matcher.find(), so use ^ and $ to specify endpoints.
* @return All files within the directory.
*/
public static Iterable iterFilesRecursive(final File dir,
final Pattern pattern) {
return new Iterable() {
public Iterator iterator() {
return new AbstractIterator() {
private final Queue files = new LinkedList(Collections
.singleton(dir));
private File file = this.findNext();
@Override
public boolean hasNext() {
return this.file != null;
}
@Override
public File next() {
File result = this.file;
if (result == null) {
throw new NoSuchElementException();
}
this.file = this.findNext();
return result;
}
private File findNext() {
File next = null;
while (!this.files.isEmpty() && next == null) {
next = this.files.remove();
if (next.isDirectory()) {
files.addAll(Arrays.asList(next.listFiles()));
next = null;
} else if (pattern != null) {
if (!pattern.matcher(next.getPath()).find()) {
next = null;
}
}
}
return next;
}
};
}
};
}
/**
* Returns all the text in the given File.
*/
public static String slurpFile(File file) throws IOException {
return slurpFile(file, null);
}
/**
* Returns all the text in the given File.
*
* @param file The file to read from
* @param encoding The character encoding to assume. This may be null, and
* the platform default character encoding is used.
*/
public static String slurpFile(File file, String encoding) throws IOException {
return IOUtils.slurpReader(IOUtils.encodedInputStreamReader(
new FileInputStream(file), encoding));
}
/**
* Returns all the text in the given File.
*/
public static String slurpGZippedFile(String filename) throws IOException {
Reader r = encodedInputStreamReader(new GZIPInputStream(new FileInputStream(
filename)), null);
return IOUtils.slurpReader(r);
}
/**
* Returns all the text in the given File.
*/
public static String slurpGZippedFile(File file) throws IOException {
Reader r = encodedInputStreamReader(new GZIPInputStream(new FileInputStream(
file)), null);
return IOUtils.slurpReader(r);
}
/**
* Returns all the text in the given file with the given encoding.
*/
public static String slurpFile(String filename, String encoding)
throws IOException {
Reader r = readerFromString(filename, encoding);
return IOUtils.slurpReader(r);
}
/**
* Returns all the text in the given file with the given
* encoding. If the file cannot be read (non-existent, etc.), then
* the method throws an unchecked RuntimeIOException. If the caller
* is willing to tolerate missing files, they should catch that
* exception.
*/
public static String slurpFileNoExceptions(String filename, String encoding) {
try {
return slurpFile(filename, encoding);
} catch (IOException e) {
throw new RuntimeIOException("slurpFile IO problem", e);
}
}
/**
* Returns all the text in the given file
*
* @return The text in the file.
*/
public static String slurpFile(String filename) throws IOException {
return slurpFile(filename, defaultEncoding);
}
/**
* Returns all the text at the given URL.
*/
public static String slurpURLNoExceptions(URL u, String encoding) {
try {
return IOUtils.slurpURL(u, encoding);
} catch (Exception e) {
e.printStackTrace();
return null;
}
}
/**
* Returns all the text at the given URL.
*/
public static String slurpURL(URL u, String encoding) throws IOException {
String lineSeparator = System.getProperty("line.separator");
URLConnection uc = u.openConnection();
uc.setReadTimeout(30000);
InputStream is;
try {
is = uc.getInputStream();
} catch (SocketTimeoutException e) {
// e.printStackTrace();
System.err.println("Time out. Return empty string");
return "";
}
BufferedReader br = new BufferedReader(new InputStreamReader(is, encoding));
StringBuilder buff = new StringBuilder(SLURP_BUFFER_SIZE); // make biggish
for (String temp; (temp = br.readLine()) != null;
) {
buff.append(temp);
buff.append(lineSeparator);
}
br.close();
return buff.toString();
}
public static String getUrlEncoding(URLConnection connection) {
String contentType = connection.getContentType();
String[] values = contentType.split(";");
String charset = defaultEncoding; // might or might not be right....
for (String value : values) {
value = value.trim();
if (value.toLowerCase(Locale.ENGLISH).startsWith("charset=")) {
charset = value.substring("charset=".length());
}
}
return charset;
}
/**
* Returns all the text at the given URL.
*/
public static String slurpURL(URL u) throws IOException {
String lineSeparator = System.getProperty("line.separator");
URLConnection uc = u.openConnection();
String encoding = getUrlEncoding(uc);
InputStream is = uc.getInputStream();
BufferedReader br = new BufferedReader(new InputStreamReader(is, encoding));
StringBuilder buff = new StringBuilder(SLURP_BUFFER_SIZE); // make biggish
for (String temp; (temp = br.readLine()) != null; ) {
buff.append(temp);
buff.append(lineSeparator);
}
br.close();
return buff.toString();
}
/**
* Returns all the text at the given URL.
*/
public static String slurpURLNoExceptions(URL u) {
try {
return slurpURL(u);
} catch (Exception e) {
e.printStackTrace();
return null;
}
}
/**
* Returns all the text at the given URL.
*/
public static String slurpURL(String path) throws Exception {
return slurpURL(new URL(path));
}
/**
* Returns all the text at the given URL. If the file cannot be read
* (non-existent, etc.), then and only then the method returns
* null
.
*/
public static String slurpURLNoExceptions(String path) {
try {
return slurpURL(path);
} catch (Exception e) {
e.printStackTrace();
return null;
}
}
/**
* Returns all the text in the given file with the given
* encoding. If the file cannot be read (non-existent, etc.), then
* the method throws an unchecked RuntimeIOException. If the caller
* is willing to tolerate missing files, they should catch that
* exception.
*/
public static String slurpFileNoExceptions(File file) {
try {
return IOUtils.slurpReader(encodedInputStreamReader(new FileInputStream(file), null));
} catch (IOException e) {
throw new RuntimeIOException(e);
}
}
/**
* Returns all the text in the given file with the given
* encoding. If the file cannot be read (non-existent, etc.), then
* the method throws an unchecked RuntimeIOException. If the caller
* is willing to tolerate missing files, they should catch that
* exception.
*/
public static String slurpFileNoExceptions(String filename) {
try {
return slurpFile(filename);
} catch (IOException e) {
throw new RuntimeIOException(e);
}
}
/**
* Returns all the text from the given Reader.
* Closes the Reader when done.
*
* @return The text in the file.
*/
public static String slurpReader(Reader reader) {
BufferedReader r = new BufferedReader(reader);
StringBuilder buff = new StringBuilder();
try {
char[] chars = new char[SLURP_BUFFER_SIZE];
while (true) {
int amountRead = r.read(chars, 0, SLURP_BUFFER_SIZE);
if (amountRead < 0) {
break;
}
buff.append(chars, 0, amountRead);
}
r.close();
} catch (Exception e) {
throw new RuntimeIOException("slurpReader IO problem", e);
}
return buff.toString();
}
/**
* Send all bytes from the input stream to the output stream.
*
* @param input
* The input bytes.
* @param output
* Where the bytes should be written.
*/
public static void writeStreamToStream(InputStream input, OutputStream output)
throws IOException {
byte[] buffer = new byte[4096];
while (true) {
int len = input.read(buffer);
if (len == -1) {
break;
}
output.write(buffer, 0, len);
}
}
/**
* Read in a CSV formatted file with a header row.
*
* @param path - path to CSV file
* @param quoteChar - character for enclosing strings, defaults to "
* @param escapeChar - character for escaping quotes appearing in quoted strings; defaults to " (i.e. "" is used for " inside quotes, consistent with Excel)
* @return a list of maps representing the rows of the csv. The maps' keys are the header strings and their values are the row contents
* @throws IOException
*/
public static List