com.helger.charset.utf7.UTF7StyleCharsetEncoder Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of ph-charset Show documentation
Java additional charsets
There is a newer version: 9.5.5
/**
 * Copyright (C) 2014-2020 Philip Helger (www.helger.com)
 * philip[at]helger[dot]com
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *         http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.helger.charset.utf7;

import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.CharsetEncoder;
import java.nio.charset.CoderResult;

import javax.annotation.Nonnull;

/**
 * 
 * The CharsetEncoder used to encode both variants of the UTF-7 charset and the
 * modified-UTF-7 charset.
 * 
 * 
 * Please note this class does not behave strictly according to the
 * specification in Sun Java VMs before 1.6. This is done to get around
 * a bug in the implementation of
 * {@link java.nio.charset.CharsetEncoder#encode(CharBuffer)}. Unfortunately,
 * that method cannot be overridden.
 * 
 *
 * @see JDK
 *      bug 6221056
 * @author Jaap Beetstra
 */
final class UTF7StyleCharsetEncoder extends CharsetEncoder
{
  private static final float AVG_BYTES_PER_CHAR = 1.5f;
  private static final float MAX_BYTES_PER_CHAR = 5.0f;

  private final AbstractUTF7StyleCharset m_aCharset;
  private final UTF7Base64Helper m_aBase64;
  private final byte m_nShift;
  private final byte m_nUnshift;
  private final boolean m_bStrict;
  private boolean m_bBase64mode;
  private int m_nBitsToOutput;
  private int m_nSextet;

  UTF7StyleCharsetEncoder (@Nonnull final AbstractUTF7StyleCharset aCharset,
                           @Nonnull final UTF7Base64Helper aBase64,
                           final boolean bStrict)
  {
    super (aCharset, AVG_BYTES_PER_CHAR, MAX_BYTES_PER_CHAR);
    m_aCharset = aCharset;
    m_aBase64 = aBase64;
    m_bStrict = bStrict;
    m_nShift = aCharset.shift ();
    m_nUnshift = aCharset.unshift ();
  }

  @Override
  protected void implReset ()
  {
    m_bBase64mode = false;
    m_nSextet = 0;
    m_nBitsToOutput = 0;
  }

  /**
   * {@inheritDoc}
   * 
   * Note that this method might return CoderResult.OVERFLOW (as is
   * required by the specification) if insufficient space is available in the
   * output buffer. However, calling it again on JDKs before Java 6 triggers a
   * bug in {@link java.nio.charset.CharsetEncoder#flush(ByteBuffer)} causing it
   * to throw an IllegalStateException (the buggy method is final,
   * thus cannot be overridden).
   * 
   *
   * @see 
   *      JDK bug 6227608
   * @param out
   *        The output byte buffer
   * @return A coder-result object describing the reason for termination
   */
  @Override
  protected CoderResult implFlush (final ByteBuffer out)
  {
    if (m_bBase64mode)
    {
      if (out.remaining () < 2)
        return CoderResult.OVERFLOW;
      if (m_nBitsToOutput != 0)
        out.put (m_aBase64.getChar (m_nSextet));
      out.put (m_nUnshift);
    }
    return CoderResult.UNDERFLOW;
  }

  /**
   * {@inheritDoc}
   * 
   * Note that this method might return CoderResult.OVERFLOW, even
   * though there is sufficient space available in the output buffer. This is
   * done to force the broken implementation of
   * {@link java.nio.charset.CharsetEncoder#encode(CharBuffer)} to call flush
   * (the buggy method is final, thus cannot be overridden).
   * 
   * 
   * However, String.getBytes() fails if CoderResult.OVERFLOW is returned, since
   * this assumes it always allocates sufficient bytes (maxBytesPerChar *
   * nr_of_chars). Thus, as an extra check, the size of the input buffer is
   * compared against the size of the output buffer. A static variable is used
   * to indicate if a broken java version is used.
   * 
   * 
   * It is not possible to directly write the last few bytes, since more bytes
   * might be waiting to be encoded then those available in the input buffer.
   * 
   *
   * @see 
   *      JDK bug 6221056
   * @param in
   *        The input character buffer
   * @param out
   *        The output byte buffer
   * @return A coder-result object describing the reason for termination
   */
  @Override
  protected CoderResult encodeLoop (final CharBuffer in, final ByteBuffer out)
  {
    while (in.hasRemaining ())
    {
      if (out.remaining () < 4)
        return CoderResult.OVERFLOW;
      final char ch = in.get ();
      if (m_aCharset.canEncodeDirectly (ch))
      {
        _unshift (out, ch);
        out.put ((byte) ch);
      }
      else
        if (!m_bBase64mode && ch == m_nShift)
        {
          out.put (m_nShift);
          out.put (m_nUnshift);
        }
        else
          _encodeBase64 (ch, out);
    }

    return CoderResult.UNDERFLOW;
  }

  /**
   * 
   * Writes the bytes necessary to leave base 64 mode. This might include
   * an unshift character.
   * 
   *
   * @param out
   * @param ch
   */
  private void _unshift (final ByteBuffer out, final char ch)
  {
    if (!m_bBase64mode)
      return;
    if (m_nBitsToOutput != 0)
      out.put (m_aBase64.getChar (m_nSextet));
    if (m_aBase64.contains (ch) || ch == m_nUnshift || m_bStrict)
      out.put (m_nUnshift);
    m_bBase64mode = false;
    m_nSextet = 0;
    m_nBitsToOutput = 0;
  }

  /**
   * 
   * Writes the bytes necessary to encode a character in base 64 mode.
   * All bytes which are fully determined will be written. The fields
   * bitsToOutput and sextet are used to remember the
   * bytes not yet fully determined.
   * 
   *
   * @param out
   * @param ch
   */
  private void _encodeBase64 (final char ch, final ByteBuffer out)
  {
    if (!m_bBase64mode)
      out.put (m_nShift);
    m_bBase64mode = true;
    m_nBitsToOutput += 16;
    while (m_nBitsToOutput >= 6)
    {
      m_nBitsToOutput -= 6;
      m_nSextet += (ch >> m_nBitsToOutput);
      m_nSextet &= 0x3F;
      out.put (m_aBase64.getChar (m_nSextet));
      m_nSextet = 0;
    }
    m_nSextet = (ch << (6 - m_nBitsToOutput)) & 0x3F;
  }
}