All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.sf.okapi.common.UnicodeBOMWriter Maven / Gradle / Ivy

There is a newer version: 1.46.0
Show newest version
/*
 * 09/24/2004
 *
 * UnicodeWriter.java - Writes Unicode output with the proper BOM.
 * Copyright (C) 2004 Robert Futrell
 * robert_futrell at users.sourceforge.net
 * http://fifesoft.com/rsyntaxtextarea
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA.
 */
package net.sf.okapi.common;

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.UnsupportedEncodingException;
import java.io.Writer;


/**
 * Writes Unicode text to an output stream.  If the specified encoding is a
 * Unicode, then the text is preceeded by the proper Unicode BOM.  If it is any
 * other encoding, this class behaves just like OutputStreamWriter.
 * This class is here because Java's OutputStreamWriter apparently
 * doesn't believe in writing BOMs.
 * 

* * For optimum performance, it is recommended that you wrap all instances of * UnicodeWriter with a java.io.BufferedWriter. * * @author Robert Futrell * @version 0.7 */ public class UnicodeBOMWriter extends Writer { /** * If this system property evaluates to "false", ignoring * case, files written out as UTF-8 will not have a BOM written for them. * Otherwise (even if the property is not set), UTF-8 files will have a * BOM written. */ public static final String PROPERTY_WRITE_UTF8_BOM = "UnicodeWriter.writeUtf8BOM"; /** * The writer actually doing the writing. */ private OutputStreamWriter internalOut; private static final byte[] UTF8_BOM = new byte[] { (byte)0xEF, (byte)0xBB, (byte)0xBF }; private static final byte[] UTF16LE_BOM = new byte[] { (byte)0xFF, (byte)0xFE }; private static final byte[] UTF16BE_BOM = new byte[] { (byte)0xFE, (byte)0xFF }; private static final byte[] UTF32LE_BOM = new byte[] { (byte)0xFF, (byte)0xFE, (byte)0x00, (byte)0x00 }; private static final byte[] UTF32BE_BOM = new byte[] { (byte)0x00, (byte)0x00, (byte)0xFE, (byte)0xFF }; /** * This is a utility constructor since the vast majority of the time, this * class will be used to write Unicode files. * * @param fileName The file to which to write the Unicode output. * @param encoding The encoding to use. * @throws UnsupportedEncodingException If the specified encoding is not * supported. * @throws IOException If an IO exception occurs. */ public UnicodeBOMWriter(String fileName, String encoding) throws UnsupportedEncodingException, IOException { this(new FileOutputStream(fileName), encoding); } /** * This is a utility constructor since the vast majority of the time, this * class will be used to write Unicode files. * * @param file The file to which to write the Unicode output. * @param encoding The encoding to use. * @throws UnsupportedEncodingException If the specified encoding is not * supported. * @throws IOException If an IO exception occurs. */ public UnicodeBOMWriter(File file, String encoding) throws UnsupportedEncodingException, IOException { this(new FileOutputStream(file), encoding); } /** * Creates a new writer. * * @param out The output stream to write. * @param encoding The encoding to use. * @throws UnsupportedEncodingException If the specified encoding is not * supported. * @throws IOException If an IO exception occurs. */ public UnicodeBOMWriter(OutputStream out, String encoding) throws UnsupportedEncodingException, IOException { init(out, encoding); } /** * Closes this writer. * * @throws IOException If an IO exception occurs. */ public void close() throws IOException { internalOut.close(); } /** * Flushes the stream. * * @throws IOException If an IO exception occurs. */ public void flush() throws IOException { internalOut.flush(); } /** * Returns the encoding being used to write this output stream (i.e., the * encoding of the file). * * @return The encoding of the stream. */ public String getEncoding() { return internalOut.getEncoding(); } /** * Initializes the internal output stream and writes the BOM if the * specified encoding is a Unicode encoding. * * @param out The output stream we are writing. * @param encoding The encoding in which to write. * @throws UnsupportedEncodingException If the specified encoding isn't * supported. * @throws IOException If an I/O error occurs while writing a BOM. */ private void init(OutputStream out, String encoding) throws UnsupportedEncodingException, IOException { internalOut = new OutputStreamWriter(out, encoding); // Write the proper BOM if they specified a Unicode encoding. // NOTE: Creating an OutputStreamWriter with encoding "UTF-16" // DOES write out the BOM; "UTF-16LE", "UTF-16BE", "UTF-32", "UTF-32LE" // and "UTF-32BE" don't. switch (encoding) { case "UTF-8": if (writeUtf8BOM()) { out.write(UTF8_BOM, 0, UTF8_BOM.length); } break; case "UTF-16LE": out.write(UTF16LE_BOM, 0, UTF16LE_BOM.length); break; case "UTF-16BE": /*"UTF-16".equals(encoding) || */ out.write(UTF16BE_BOM, 0, UTF16BE_BOM.length); break; case "UTF-32LE": out.write(UTF32LE_BOM, 0, UTF32LE_BOM.length); break; case "UTF-32": case "UTF-32BE": out.write(UTF32BE_BOM, 0, UTF32BE_BOM.length); break; } } /** * Writes a portion of an array of characters. * * @param cbuf The buffer of characters. * @param off The offset from which to start writing characters. * @param len The number of characters to write. * @throws IOException If an I/O error occurs. */ public void write(char[] cbuf, int off, int len) throws IOException { internalOut.write(cbuf, off, len); } /** * Writes a single character. * * @param c An integer specifying the character to write. * @throws IOException If an IO error occurs. */ public void write(int c) throws IOException { internalOut.write(c); } /** * Writes a portion of a string. * * @param str The string from which to write. * @param off The offset from which to start writing characters. * @param len The number of characters to write. * @throws IOException If an IO error occurs. */ public void write(String str, int off, int len) throws IOException { internalOut.write(str, off, len); } /** * Returns whether UTF-8 files should have a BOM in them when written. * * @return Whether to write a BOM for UTF-8 files. */ private boolean writeUtf8BOM() { String prop = System.getProperty(PROPERTY_WRITE_UTF8_BOM); if (prop!=null && Boolean.valueOf(prop).equals(Boolean.FALSE)) { return false; } return true; } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy