com.phloc.charset.utf7.UTF7StyleCharsetEncoder Maven / Gradle / Ivy
/* ====================================================================
* Copyright (c) 2006 J.T. Beetstra
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
* ====================================================================
*/
package com.phloc.charset.utf7;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.CharsetEncoder;
import java.nio.charset.CoderResult;
import javax.annotation.Nonnull;
/**
*
* The CharsetEncoder used to encode both variants of the UTF-7 charset and the
* modified-UTF-7 charset.
*
*
* Please note this class does not behave strictly according to the
* specification in Sun Java VMs before 1.6. This is done to get around
* a bug in the implementation of
* {@link java.nio.charset.CharsetEncoder#encode(CharBuffer)}. Unfortunately,
* that method cannot be overridden.
*
*
* @see JDK
* bug 6221056
* @author Jaap Beetstra
*/
final class UTF7StyleCharsetEncoder extends CharsetEncoder
{
private static final float AVG_BYTES_PER_CHAR = 1.5f;
private static final float MAX_BYTES_PER_CHAR = 5.0f;
private static final boolean s_bUseUglyHackToForceCallToFlushInJava5;
static
{
final String version = System.getProperty ("java.specification.version");
final String vendor = System.getProperty ("java.vm.vendor");
s_bUseUglyHackToForceCallToFlushInJava5 = ("1.4".equals (version) || "1.5".equals (version)) &&
"Sun Microsystems Inc.".equals (vendor);
}
private final UTF7StyleCharset m_aCharset;
private final Base64Util m_aBase64;
private final byte m_nShift;
private final byte m_nUnshift;
private final boolean m_bStrict;
private boolean m_bBase64mode;
private int m_nBitsToOutput;
private int m_nSextet;
UTF7StyleCharsetEncoder (@Nonnull final UTF7StyleCharset aCharset,
@Nonnull final Base64Util aBase64,
final boolean bStrict)
{
super (aCharset, AVG_BYTES_PER_CHAR, MAX_BYTES_PER_CHAR);
m_aCharset = aCharset;
m_aBase64 = aBase64;
m_bStrict = bStrict;
m_nShift = aCharset.shift ();
m_nUnshift = aCharset.unshift ();
}
@Override
protected void implReset ()
{
m_bBase64mode = false;
m_nSextet = 0;
m_nBitsToOutput = 0;
}
/**
* {@inheritDoc}
*
* Note that this method might return CoderResult.OVERFLOW
(as is
* required by the specification) if insufficient space is available in the
* output buffer. However, calling it again on JDKs before Java 6 triggers a
* bug in {@link java.nio.charset.CharsetEncoder#flush(ByteBuffer)} causing it
* to throw an IllegalStateException (the buggy method is final
,
* thus cannot be overridden).
*
*
* @see JDK
* bug 6227608
* @param out
* The output byte buffer
* @return A coder-result object describing the reason for termination
*/
@Override
protected CoderResult implFlush (final ByteBuffer out)
{
if (m_bBase64mode)
{
if (out.remaining () < 2)
return CoderResult.OVERFLOW;
if (m_nBitsToOutput != 0)
out.put (m_aBase64.getChar (m_nSextet));
out.put (m_nUnshift);
}
return CoderResult.UNDERFLOW;
}
/**
* {@inheritDoc}
*
* Note that this method might return CoderResult.OVERFLOW
, even
* though there is sufficient space available in the output buffer. This is
* done to force the broken implementation of
* {@link java.nio.charset.CharsetEncoder#encode(CharBuffer)} to call flush
* (the buggy method is final
, thus cannot be overridden).
*
*
* However, String.getBytes() fails if CoderResult.OVERFLOW is returned, since
* this assumes it always allocates sufficient bytes (maxBytesPerChar *
* nr_of_chars). Thus, as an extra check, the size of the input buffer is
* compared against the size of the output buffer. A static variable is used
* to indicate if a broken java version is used.
*
*
* It is not possible to directly write the last few bytes, since more bytes
* might be waiting to be encoded then those available in the input buffer.
*
*
* @see JDK
* bug 6221056
* @param in
* The input character buffer
* @param out
* The output byte buffer
* @return A coder-result object describing the reason for termination
*/
@Override
protected CoderResult encodeLoop (final CharBuffer in, final ByteBuffer out)
{
while (in.hasRemaining ())
{
if (out.remaining () < 4)
return CoderResult.OVERFLOW;
final char ch = in.get ();
if (m_aCharset.canEncodeDirectly (ch))
{
_unshift (out, ch);
out.put ((byte) ch);
}
else
if (!m_bBase64mode && ch == m_nShift)
{
out.put (m_nShift);
out.put (m_nUnshift);
}
else
_encodeBase64 (ch, out);
}
/*
* These lines are required to trick JDK 1.5 and earlier
* into flushing when using Charset.encode(String),
* Charset.encode(CharBuffer) or CharsetEncoder.encode(CharBuffer) Without
* them, the last few bytes may be missing.
*/
if (m_bBase64mode && s_bUseUglyHackToForceCallToFlushInJava5 && out.limit () != MAX_BYTES_PER_CHAR * in.limit ())
return CoderResult.OVERFLOW;
/* */
return CoderResult.UNDERFLOW;
}
/**
*
* Writes the bytes necessary to leave base 64 mode. This might include
* an unshift character.
*
*
* @param out
* @param ch
*/
private void _unshift (final ByteBuffer out, final char ch)
{
if (!m_bBase64mode)
return;
if (m_nBitsToOutput != 0)
out.put (m_aBase64.getChar (m_nSextet));
if (m_aBase64.contains (ch) || ch == m_nUnshift || m_bStrict)
out.put (m_nUnshift);
m_bBase64mode = false;
m_nSextet = 0;
m_nBitsToOutput = 0;
}
/**
*
* Writes the bytes necessary to encode a character in base 64 mode.
* All bytes which are fully determined will be written. The fields
* bitsToOutput
and sextet
are used to remember the
* bytes not yet fully determined.
*
*
* @param out
* @param ch
*/
private void _encodeBase64 (final char ch, final ByteBuffer out)
{
if (!m_bBase64mode)
out.put (m_nShift);
m_bBase64mode = true;
m_nBitsToOutput += 16;
while (m_nBitsToOutput >= 6)
{
m_nBitsToOutput -= 6;
m_nSextet += (ch >> m_nBitsToOutput);
m_nSextet &= 0x3F;
out.put (m_aBase64.getChar (m_nSextet));
m_nSextet = 0;
}
m_nSextet = (ch << (6 - m_nBitsToOutput)) & 0x3F;
}
}