au.id.jericho.lib.html.Util Maven / Gradle / Ivy
// Jericho HTML Parser - Java based library for analysing and manipulating HTML
// Version 2.4
// Copyright (C) 2007 Martin Jericho
// http://jerichohtml.sourceforge.net/
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of either one of the following licences:
//
// 1. The Eclipse Public License (EPL) version 1.0,
// included in this distribution in the file licence-epl-1.0.html
// or available at http://www.eclipse.org/legal/epl-v10.html
//
// 2. The GNU Lesser General Public License (LGPL) version 2.1 or later,
// included in this distribution in the file licence-lgpl-2.1.txt
// or available at http://www.gnu.org/licenses/lgpl.txt
//
// This library is distributed on an "AS IS" basis,
// WITHOUT WARRANTY OF ANY KIND, either express or implied.
// See the individual licence texts for more details.
package au.id.jericho.lib.html;
import java.util.*;
import java.io.*;
/**
* Contains miscellaneous utility methods not directly associated with the HTML Parser library.
*/
public final class Util {
private static final int BUFFER_SIZE=2048;
private static final String CSVNewLine=System.getProperty("line.separator");
private Util() {}
/**
* Returns the text loaded from the specified Reader
as a string.
*
* If a null
argument is supplied to this method, an empty string is returned.
* Note that in version 2.0 a null
argument resulted in a null
return value.
*
* To load text from an InputStream
, use getString(new InputStreamReader(inputStream))
.
*
* @param reader the java.io.Reader
from which to load the text.
* @return the text loaded from the specified java.io.Reader
as a string.
* @throws java.io.IOException if an I/O error occurs.
*/
public static String getString(final Reader reader) throws IOException {
if (reader==null) return "";
try {
final BufferedReader in=new BufferedReader(reader,BUFFER_SIZE);
int charsRead;
final char[] copyBuffer=new char[BUFFER_SIZE];
final StringBuffer sb=new StringBuffer();
while ((charsRead=in.read(copyBuffer,0,BUFFER_SIZE))!=-1)
sb.append(copyBuffer,0,charsRead);
in.close();
return sb.toString();
} finally {
reader.close();
}
}
/**
* Outputs the specified array of strings to the specified Writer
in the format of a line for a CSV file.
*
* "CSV" stands for Comma Separated Values.
* There is no formal specification for a CSV file, so there is significant variation in
* the way different applications handle issues like the encoding of different data types and special characters.
*
* Generally, a CSV file contains a list of records separated by line breaks, with each record consisting of a list of
* field values separated by commas.
* Each record in the file should contain the same number of field values, with the values at each position representing the same
* type of data in all the records. In this way the file can also be divided into columns, often with the first line of the
* file containing the column labels.
*
* Columns can have different data types such as text, numeric, date / time and boolean.
* A text value is often delimited with single ('
) or double-quotes ("
),
* especially if the value contains a comma, line feed, or other special character that is significant to the syntax.
* Encoding techniques for including quote characters themselves in text values vary widely.
* Values of other types are generally unquoted to distinguish them from text values.
*
* This method produces output that is readable by MS-Excel, conforming to the following rules:
*
*
* - All values are considered to be of type text, except for the constants {@link Config#ColumnValueTrue}
* and {@link Config#ColumnValueFalse}, representing the boolean values
true
and false
respectively.
* - All text values are enclosed in double-quotes.
*
- Double-quote characters contained in text values are encoded using two consecutive double-quotes (
""
).
* null
values are represented as empty fields.
* - The end of each record is represented by a carriage-return / line-feed (CR/LF) pair.
*
- Line breaks inside text values are represented by a single line feed (LF) character.
*
*
* @param writer the destination java.io.Writer
for the output.
* @throws java.io.IOException if an I/O error occurs.
* @see FormFields#getColumnLabels()
* @see FormFields#getColumnValues(Map)
*/
public static void outputCSVLine(final Writer writer, final String[] values) throws IOException {
for (int i=0; i