net.sf.okapi.common.UnicodeBOMWriter Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of okapi-lib Show documentation
There is a newer version: 1.47.0
/*===========================================================================
  Copyright (C) 2021 by the Okapi Framework contributors
-----------------------------------------------------------------------------
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at

  http://www.apache.org/licenses/LICENSE-2.0

  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License.
===========================================================================*/

package net.sf.okapi.common;

import java.io.BufferedWriter;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.nio.charset.Charset;
import java.util.Locale;

/**
 * An UnicodeBOMWriter is a thin wrapper around {@link OutputStreamWriter},
 * except that it adds a BOM (Byte Order Mark) at the beginning;
 *
 * For documentation on the behavior in case of invalid Unicode sequences
 * (malformed surrogates, etc.) see the doc for {@link OutputStreamWriter}
 *
 * @see Charset
 * @see OutputStream
 * @see OutputStreamWriter
 */
public class UnicodeBOMWriter extends Writer {
	private static final byte[] BOM_UTF8 = { (byte) 0xEF, (byte) 0xBB, (byte) 0xBF };
	private static final byte[] BOM_UTF16BE = { (byte) 0xFE, (byte) 0xFF };
	private static final byte[] BOM_UTF16LE = { (byte) 0xFF, (byte) 0xFE };
	private static final byte[] BOM_UTF32BE = { (byte) 0x00, (byte) 0x00, (byte) 0xFE, (byte) 0xFF };
	private static final byte[] BOM_UTF32LE = { (byte) 0xFF, (byte) 0xFE, (byte) 0x00, (byte) 0x00 };

	private final Writer wrappedWriter;

	/**
	 * Creates an UnicodeBOMWriter that uses the named charset.
	 *
	 * @param file        The output {@link File} to write to
	 * @param charsetName The name of a supported {@link Charset}
	 *
	 * @exception IOException If the encoding is not supported or any problems
	 *                        writing to the stream
	 */
	public UnicodeBOMWriter(File file, String charsetName) throws IOException {
		final FileOutputStream fos = new FileOutputStream(file);
		final Charset cs = Charset.forName(charsetName);
		wrappedWriter = new BufferedWriter(new OutputStreamWriter(fos, cs));
		writeBomIfNeeded(fos, cs.name());
	}

	/**
	 * Creates an UnicodeBOMWriter that uses the named charset.
	 *
	 * @param fileName    The name of the output file
	 * @param charsetName The name of a supported {@link Charset}
	 *
	 * @exception IOException If the encoding is not supported or any problems
	 *                        writing to the stream
	 */
	public UnicodeBOMWriter(String fileName, String charsetName) throws IOException {
		this(new File(fileName), charsetName);
	}

	private void writeBomIfNeeded(OutputStream out, String charsetName) throws IOException {
		switch (charsetName) {
			case "UTF-8":
				out.write(BOM_UTF8);
				break;
			case "UTF-16": // OutputStreamWriter already writes a BOM
				break;
			case "UTF-16BE":
				out.write(BOM_UTF16BE);
				break;
			case "UTF-16LE":
				out.write(BOM_UTF16LE);
				break;
			case "UTF-32": // Intentional fall-through
			case "UTF-32BE":
				out.write(BOM_UTF32BE);
				break;
			case "UTF-32LE":
				out.write(BOM_UTF32LE);
				break;
			default:
				break;
		}
	}

	@Override
	public void write(char[] cbuf, int off, int len) throws IOException {
		wrappedWriter.write(cbuf, off, len);
	}

	@Override
	public void flush() throws IOException {
		wrappedWriter.flush();
	}

	@Override
	public void close() throws IOException {
		wrappedWriter.close();
	}
}