net.sf.okapi.common.UnicodeBOMWriter Maven / Gradle / Ivy
/*===========================================================================
Copyright (C) 2021 by the Okapi Framework contributors
-----------------------------------------------------------------------------
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
===========================================================================*/
package net.sf.okapi.common;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.nio.charset.Charset;
import java.util.Locale;
/**
* An UnicodeBOMWriter is a thin wrapper around {@link OutputStreamWriter},
* except that it adds a BOM (Byte Order Mark) at the beginning;
*
* For documentation on the behavior in case of invalid Unicode sequences
* (malformed surrogates, etc.) see the doc for {@link OutputStreamWriter}
*
* @see Charset
* @see OutputStream
* @see OutputStreamWriter
*/
public class UnicodeBOMWriter extends Writer {
private static final byte[] BOM_UTF8 = { (byte) 0xEF, (byte) 0xBB, (byte) 0xBF };
private static final byte[] BOM_UTF16BE = { (byte) 0xFE, (byte) 0xFF };
private static final byte[] BOM_UTF16LE = { (byte) 0xFF, (byte) 0xFE };
private static final byte[] BOM_UTF32BE = { (byte) 0x00, (byte) 0x00, (byte) 0xFE, (byte) 0xFF };
private static final byte[] BOM_UTF32LE = { (byte) 0xFF, (byte) 0xFE, (byte) 0x00, (byte) 0x00 };
private final Writer wrappedWriter;
/**
* Creates an UnicodeBOMWriter that uses the named charset.
*
* @param file The output {@link File} to write to
* @param charsetName The name of a supported {@link Charset}
*
* @exception IOException If the encoding is not supported or any problems
* writing to the stream
*/
public UnicodeBOMWriter(File file, String charsetName) throws IOException {
final FileOutputStream fos = new FileOutputStream(file);
final Charset cs = Charset.forName(charsetName);
wrappedWriter = new BufferedWriter(new OutputStreamWriter(fos, cs));
writeBomIfNeeded(fos, cs.name());
}
/**
* Creates an UnicodeBOMWriter that uses the named charset.
*
* @param fileName The name of the output file
* @param charsetName The name of a supported {@link Charset}
*
* @exception IOException If the encoding is not supported or any problems
* writing to the stream
*/
public UnicodeBOMWriter(String fileName, String charsetName) throws IOException {
this(new File(fileName), charsetName);
}
private void writeBomIfNeeded(OutputStream out, String charsetName) throws IOException {
switch (charsetName) {
case "UTF-8":
out.write(BOM_UTF8);
break;
case "UTF-16": // OutputStreamWriter already writes a BOM
break;
case "UTF-16BE":
out.write(BOM_UTF16BE);
break;
case "UTF-16LE":
out.write(BOM_UTF16LE);
break;
case "UTF-32": // Intentional fall-through
case "UTF-32BE":
out.write(BOM_UTF32BE);
break;
case "UTF-32LE":
out.write(BOM_UTF32LE);
break;
default:
break;
}
}
@Override
public void write(char[] cbuf, int off, int len) throws IOException {
wrappedWriter.write(cbuf, off, len);
}
@Override
public void flush() throws IOException {
wrappedWriter.flush();
}
@Override
public void close() throws IOException {
wrappedWriter.close();
}
}