com.ibm.icu.charset.CharsetEncoderICU Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of icu4j-charset Show documentation
icu4j-charset is a supplemental library for icu4j, implementing Java Charset SPI.
There is a newer version: 76.1
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/**
 *******************************************************************************
 * Copyright (C) 2006-2013, International Business Machines Corporation and    *
 * others. All Rights Reserved.                                                *
 *******************************************************************************
 *
 *******************************************************************************
 */

package com.ibm.icu.charset;

import java.nio.BufferOverflowException;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.IntBuffer;
import java.nio.charset.CharsetEncoder;
import java.nio.charset.CoderResult;
import java.nio.charset.CodingErrorAction;

import com.ibm.icu.impl.Assert;
import com.ibm.icu.lang.UCharacter;
import com.ibm.icu.text.UTF16;

/**
 * An abstract class that provides framework methods of decoding operations for concrete
 * subclasses.
 * In the future this class will contain API that will implement converter semantics of ICU4C.
 * @stable ICU 3.6
 */
public abstract class CharsetEncoderICU extends CharsetEncoder {

    /* this is used in fromUnicode DBCS tables as an "unassigned" marker */
    static final char MISSING_CHAR_MARKER = '\uFFFF';

    byte[] errorBuffer = new byte[30];

    int errorBufferLength = 0;

    /** these are for encodeLoopICU */
    int fromUnicodeStatus;

    int fromUChar32;

    boolean useSubChar1;

    boolean useFallback;

    /* maximum number of indexed UChars */
    static final int EXT_MAX_UCHARS = 19;

    /* store previous UChars/chars to continue partial matches */
    int preFromUFirstCP; /* >=0: partial match */

    char[] preFromUArray = new char[EXT_MAX_UCHARS];

    int preFromUBegin;

    int preFromULength; /* negative: replay */

    char[] invalidUCharBuffer = new char[2];

    int invalidUCharLength;

    Object fromUContext;

    private CharsetCallback.Encoder onUnmappableInput = CharsetCallback.FROM_U_CALLBACK_STOP;

    private CharsetCallback.Encoder onMalformedInput = CharsetCallback.FROM_U_CALLBACK_STOP;

    CharsetCallback.Encoder fromCharErrorBehaviour = new CharsetCallback.Encoder() {
        @Override
        public CoderResult call(CharsetEncoderICU encoder, Object context,
                CharBuffer source, ByteBuffer target, IntBuffer offsets,
                char[] buffer, int length, int cp, CoderResult cr) {
            if (cr.isUnmappable()) {
                return onUnmappableInput.call(encoder, context, source, target,
                        offsets, buffer, length, cp, cr);
            } else /* if (cr.isMalformed()) */ {
                return onMalformedInput.call(encoder, context, source, target,
                        offsets, buffer, length, cp, cr);
            }
            // return CharsetCallback.FROM_U_CALLBACK_STOP.call(encoder, context, source, target, offsets, buffer, length, cp, cr);

        }
    };

    /*
     * Constructs a new encoder for the given charset
     *
     * @param cs
     *            for which the decoder is created
     * @param replacement
     *            the substitution bytes
     */
    CharsetEncoderICU(CharsetICU cs, byte[] replacement) {
        super(cs, (cs.minBytesPerChar + cs.maxBytesPerChar) / 2,
                cs.maxBytesPerChar, replacement);
    }

    /**
     * Is this Encoder allowed to use fallbacks? A fallback mapping is a mapping
     * that will convert a Unicode codepoint sequence to a byte sequence, but
     * the encoded byte sequence will round trip convert to a different
     * Unicode codepoint sequence.
     * @return true if the converter uses fallback, false otherwise.
     * @stable ICU 3.8
     */
    public boolean isFallbackUsed() {
        return useFallback;
    }

    /**
     * Sets whether this Encoder can use fallbacks?
     * @param usesFallback true if the user wants the converter to take
     *  advantage of the fallback mapping, false otherwise.
     * @stable ICU 3.8
     */
    public void setFallbackUsed(boolean usesFallback) {
        useFallback = usesFallback;
    }

    /*
     * Use fallbacks from Unicode to codepage when useFallback or for private-use code points
     * @param c A codepoint
     */
    final boolean isFromUUseFallback(int c) {
        return (useFallback) || isUnicodePrivateUse(c);
    }

    /**
     * Use fallbacks from Unicode to codepage when useFallback or for private-use code points
     */
    static final boolean isFromUUseFallback(boolean iUseFallback, int c) {
        return (iUseFallback) || isUnicodePrivateUse(c);
    }

    private static final boolean isUnicodePrivateUse(int c) {
        // First test for U+E000 to optimize for the most common characters.
        return c >= 0xE000 && (c <= 0xF8FF ||
                c >= 0xF0000 && (c <= 0xFFFFD ||
                (c >= 0x100000 && c <= 0x10FFFD)));
    }

    /**
     * Sets the action to be taken if an illegal sequence is encountered
     *
     * @param newAction
     *            action to be taken
     * @exception IllegalArgumentException
     * @stable ICU 3.6
     */
    @Override
    protected void implOnMalformedInput(CodingErrorAction newAction) {
        onMalformedInput = getCallback(newAction);
    }

    /**
     * Sets the action to be taken if an illegal sequence is encountered
     *
     * @param newAction
     *            action to be taken
     * @exception IllegalArgumentException
     * @stable ICU 3.6
     */
    @Override
    protected void implOnUnmappableCharacter(CodingErrorAction newAction) {
        onUnmappableInput = getCallback(newAction);
    }

    /**
     * Sets the callback encoder method and context to be used if an illegal sequence is encountered.
     * You would normally call this twice to set both the malform and unmappable error. In this case,
     * newContext should remain the same since using a different newContext each time will negate the last
     * one used.
     * @param err CoderResult
     * @param newCallback CharsetCallback.Encoder
     * @param newContext Object
     * @stable ICU 4.0
     */
    public final void setFromUCallback(CoderResult err, CharsetCallback.Encoder newCallback, Object newContext) {
        if (err.isMalformed()) {
            onMalformedInput = newCallback;
        } else if (err.isUnmappable()) {
            onUnmappableInput = newCallback;
        } else {
            /* Error: Only malformed and unmappable are handled. */
        }

        if (fromUContext == null || !fromUContext.equals(newContext)) {
            setFromUContext(newContext);
        }
    }

    /**
     * Sets fromUContext used in callbacks.
     *
     * @param newContext Object
     * @exception IllegalArgumentException The object is an illegal argument for UContext.
     * @stable ICU 4.0
     */
    public final void setFromUContext(Object newContext) {
        fromUContext = newContext;
    }

    private static CharsetCallback.Encoder getCallback(CodingErrorAction action) {
        if (action == CodingErrorAction.REPLACE) {
            return CharsetCallback.FROM_U_CALLBACK_SUBSTITUTE;
        } else if (action == CodingErrorAction.IGNORE) {
            return CharsetCallback.FROM_U_CALLBACK_SKIP;
        } else /* if (action == CodingErrorAction.REPORT) */ {
            return CharsetCallback.FROM_U_CALLBACK_STOP;
        }
    }

    private static final CharBuffer EMPTY = CharBuffer.allocate(0);

    /**
     * Flushes any characters saved in the converter's internal buffer and
     * resets the converter.
     * @param out action to be taken
     * @return result of flushing action and completes the decoding all input.
     *         Returns CoderResult.UNDERFLOW if the action succeeds.
     * @stable ICU 3.6
     */
    @Override
    protected CoderResult implFlush(ByteBuffer out) {
        return encode(EMPTY, out, null, true);
    }

    /**
     * Resets the from Unicode mode of converter
     * @stable ICU 3.6
     */
    @Override
    protected void implReset() {
        errorBufferLength = 0;
        fromUnicodeStatus = 0;
        fromUChar32 = 0;
        fromUnicodeReset();
    }

    private void fromUnicodeReset() {
        preFromUBegin = 0;
        preFromUFirstCP = UConverterConstants.U_SENTINEL;
        preFromULength = 0;
    }

    /**
     * Encodes one or more chars. The default behaviour of the
     * converter is stop and report if an error in input stream is encountered.
     * To set different behaviour use @see CharsetEncoder.onMalformedInput()
     * @param in buffer to decode
     * @param out buffer to populate with decoded result
     * @return result of decoding action. Returns CoderResult.UNDERFLOW if the decoding
     *         action succeeds or more input is needed for completing the decoding action.
     * @stable ICU 3.6
     */
    @Override
    protected CoderResult encodeLoop(CharBuffer in, ByteBuffer out) {
        if (!in.hasRemaining() && this.errorBufferLength == 0) { // make sure the errorBuffer is empty
            // The Java framework should have already substituted what was left.
            fromUChar32 = 0;
            //fromUnicodeReset();
            return CoderResult.UNDERFLOW;
        }
        in.position(in.position() + fromUCountPending());
        /* do the conversion */
        CoderResult ret = encode(in, out, null, false);
        setSourcePosition(in);
        /* No need to reset to keep the proper state of the encoder.
         if (ret.isUnderflow() && in.hasRemaining()) {
            // The Java framework is going to substitute what is left.
            //fromUnicodeReset();
        } */
        return ret;
    }

    /*
     * Implements ICU semantics of buffer management
     * @param source
     * @param target
     * @param offsets
     * @return A CoderResult object that contains the error result when an error occurs.
     */
    abstract CoderResult encodeLoop(CharBuffer source, ByteBuffer target,
            IntBuffer offsets, boolean flush);

    /*
     * Implements ICU semantics for encoding the buffer
     * @param source The input character buffer
     * @param target The output byte buffer
     * @param offsets
     * @param flush true if, and only if, the invoker can provide no
     *  additional input bytes beyond those in the given buffer.
     * @return A CoderResult object that contains the error result when an error occurs.
     */
    final CoderResult encode(CharBuffer source, ByteBuffer target,
            IntBuffer offsets, boolean flush) {

        /* check parameters */
        if (target == null || source == null) {
            throw new IllegalArgumentException();
        }

        /*
         * Make sure that the buffer sizes do not exceed the number range for
         * int32_t because some functions use the size (in units or bytes)
         * rather than comparing pointers, and because offsets are int32_t values.
         *
         * size_t is guaranteed to be unsigned and large enough for the job.
         *
         * Return with an error instead of adjusting the limits because we would
         * not be able to maintain the semantics that either the source must be
         * consumed or the target filled (unless an error occurs).
         * An adjustment would be targetLimit=t+0x7fffffff; for example.
         */

        /* flush the target overflow buffer */
        if (errorBufferLength > 0) {
            byte[] overflowArray;
            int i, length;

            overflowArray = errorBuffer;
            length = errorBufferLength;
            i = 0;
            do {
                if (target.remaining() == 0) {
                    /* the overflow buffer contains too much, keep the rest */
                    int j = 0;

                    do {
                        overflowArray[j++] = overflowArray[i++];
                    } while (i < length);

                    errorBufferLength = (byte) j;
                    return CoderResult.OVERFLOW;
                }

                /* copy the overflow contents to the target */
                target.put(overflowArray[i++]);
                if (offsets != null) {
                    offsets.put(-1); /* no source index available for old output */
                }
            } while (i < length);

            /* the overflow buffer is completely copied to the target */
            errorBufferLength = 0;
        }

        if (!flush && source.remaining() == 0 && preFromULength >= 0) {
            /* the overflow buffer is emptied and there is no new input: we are done */
            return CoderResult.UNDERFLOW;
        }

        /*
         * Do not simply return with a buffer overflow error if
         * !flush && t==targetLimit
         * because it is possible that the source will not generate any output.
         * For example, the skip callback may be called;
         * it does not output anything.
         */

        return fromUnicodeWithCallback(source, target, offsets, flush);

    }

    /*
     * Implementation note for m:n conversions
     *
     * While collecting source units to find the longest match for m:n conversion,
     * some source units may need to be stored for a partial match.
     * When a second buffer does not yield a match on all of the previously stored
     * source units, then they must be "replayed", i.e., fed back into the converter.
     *
     * The code relies on the fact that replaying will not nest -
     * converting a replay buffer will not result in a replay.
     * This is because a replay is necessary only after the _continuation_ of a
     * partial match failed, but a replay buffer is converted as a whole.
     * It may result in some of its units being stored again for a partial match,
     * but there will not be a continuation _during_ the replay which could fail.
     *
     * It is conceivable that a callback function could call the converter
     * recursively in a way that causes another replay to be stored, but that
     * would be an error in the callback function.
     * Such violations will cause assertion failures in a debug build,
     * and wrong output, but they will not cause a crash.
     */
    final CoderResult fromUnicodeWithCallback(CharBuffer source,
            ByteBuffer target, IntBuffer offsets, boolean flush) {
        int sBufferIndex;
        int sourceIndex;
        int errorInputLength;
        boolean converterSawEndOfInput, calledCallback;

        /* variables for m:n conversion */
        CharBuffer replayArray = CharBuffer.allocate(EXT_MAX_UCHARS);
        int replayArrayIndex = 0;
        CharBuffer realSource;
        boolean realFlush;

        CoderResult cr = CoderResult.UNDERFLOW;

        /* get the converter implementation function */
        sourceIndex = 0;

        if (preFromULength >= 0) {
            /* normal mode */
            realSource = null;
            realFlush = false;
        } else {
            /*
             * Previous m:n conversion stored source units from a partial match
             * and failed to consume all of them.
             * We need to "replay" them from a temporary buffer and convert them first.
             */
            realSource = source;
            realFlush = flush;

            //UConverterUtility.uprv_memcpy(replayArray, replayArrayIndex, preFromUArray, 0, -preFromULength*UMachine.U_SIZEOF_UCHAR);
            replayArray.put(preFromUArray, 0, -preFromULength);
            source = replayArray;
            source.position(replayArrayIndex);
            source.limit(replayArrayIndex - preFromULength); //preFromULength is negative, see declaration
            flush = false;

            preFromULength = 0;
        }

        /*
         * loop for conversion and error handling
         *
         * loop {
         *   convert
         *   loop {
         *     update offsets
         *     handle end of input
         *     handle errors/call callback
         *   }
         * }
         */
        for (;;) {
            /* convert */
            cr = encodeLoop(source, target, offsets, flush);
            /*
             * set a flag for whether the converter
             * successfully processed the end of the input
             *
             * need not check cnv.preFromULength==0 because a replay (<0) will cause
             * s 0) {

                        /*
                         * if a converter handles offsets and updates the offsets
                         * pointer at the end, then offset should not change
                         * here;
                         * however, some converters do not handle offsets at all
                         * (sourceIndex<0) or may not update the offsets pointer
                         */
                 /*       offsets.position(offsets.position() + length);
                    }

                    if (sourceIndex >= 0) {
                        sourceIndex += (int) (source.position());
                    }
                } */

                if (preFromULength < 0) {
                    /*
                     * switch the source to new replay units (cannot occur while replaying)
                     * after offset handling and before end-of-input and callback handling
                     */
                    if (realSource == null) {
                        realSource = source;
                        realFlush = flush;

                        //UConverterUtility.uprv_memcpy(replayArray, replayArrayIndex, preFromUArray, 0, -preFromULength*UMachine.U_SIZEOF_UCHAR);
                        replayArray.put(preFromUArray, 0, -preFromULength);

                        source = replayArray;
                        source.position(replayArrayIndex);
                        source.limit(replayArrayIndex - preFromULength);
                        flush = false;
                        if ((sourceIndex += preFromULength) < 0) {
                            sourceIndex = -1;
                        }

                        preFromULength = 0;
                    } else {
                        /* see implementation note before _fromUnicodeWithCallback() */
                        //agljport:todo U_ASSERT(realSource==NULL);
                        Assert.assrt(realSource == null);
                    }
                }

                /* update pointers */
                sBufferIndex = source.position();
                if (cr.isUnderflow()) {
                    if (sBufferIndex < source.limit()) {
                        /*
                         * continue with the conversion loop while there is still input left
                         * (continue converting by breaking out of only the inner loop)
                         */
                        break;
                    } else if (realSource != null) {
                        /* switch back from replaying to the real source and continue */
                        source = realSource;
                        flush = realFlush;
                        sourceIndex = source.position();
                        realSource = null;
                        break;
                    } else if (flush && fromUChar32 != 0) {
                        /*
                         * the entire input stream is consumed
                         * and there is a partial, truncated input sequence left
                         */

                        /* inject an error and continue with callback handling */
                        //err[0]=ErrorCode.U_TRUNCATED_CHAR_FOUND;
                        cr = CoderResult.malformedForLength(1);
                        calledCallback = false; /* new error condition */
                    } else {
                        /* input consumed */
                        if (flush) {
                            /*
                             * return to the conversion loop once more if the flush
                             * flag is set and the conversion function has not
                             * successfully processed the end of the input yet
                             *
                             * (continue converting by breaking out of only the inner loop)
                             */
                            if (!converterSawEndOfInput) {
                                break;
                            }

                            /* reset the converter without calling the callback function */
                            implReset();
                        }

                        /* done successfully */
                        return cr;
                    }
                }

                /*U_FAILURE(*err) */
                {

                    if (calledCallback || cr.isOverflow()
                            || (!cr.isMalformed() && !cr.isUnmappable())) {
                        /*
                         * the callback did not or cannot resolve the error:
                         * set output pointers and return
                         *
                         * the check for buffer overflow is redundant but it is
                         * a high-runner case and hopefully documents the intent
                         * well
                         *
                         * if we were replaying, then the replay buffer must be
                         * copied back into the UConverter
                         * and the real arguments must be restored
                         */
                        if (realSource != null) {
                            int length;

                            //agljport:todo U_ASSERT(cnv.preFromULength==0);

                            length = source.remaining();
                            if (length > 0) {
                                //UConverterUtility.uprv_memcpy(preFromUArray, 0, sourceArray, pArgs.sourceBegin, length*UMachine.U_SIZEOF_UCHAR);
                                source.get(preFromUArray, 0, length);
                                preFromULength = (byte) -length;
                            }
                        }
                        return cr;
                    }
                }

                /* callback handling */
                {
                    int codePoint;

                    /* get and write the code point */
                    codePoint = fromUChar32;
                    errorInputLength = UTF16.append(invalidUCharBuffer, 0,
                            fromUChar32);
                    invalidUCharLength = errorInputLength;

                    /* set the converter state to deal with the next character */
                    fromUChar32 = 0;

                    /* call the callback function */
                    cr = fromCharErrorBehaviour.call(this, fromUContext,
                            source, target, offsets, invalidUCharBuffer,
                            invalidUCharLength, codePoint, cr);
                }

                /*
                 * loop back to the offset handling
                 *
                 * this flag will indicate after offset handling
                 * that a callback was called;
                 * if the callback did not resolve the error, then we return
                 */
                calledCallback = true;
            }
        }
    }

    /*
     * Ascertains if a given Unicode code point (32bit value for handling surrogates)
     * can be converted to the target encoding. If the caller wants to test if a
     * surrogate pair can be converted to target encoding then the
     * responsibility of assembling the int value lies with the caller.
     * For assembling a code point the caller can use UTF16 class of ICU4J and do something like:
     *      *  while(i
     * or
     *      *  String src = new String(mySource);
     *  int i,codepoint;
     *  boolean passed = false;
     *  while(i0xfff)? 2:1;
     *      if(!(CharsetEncoderICU) myConv).canEncode(codepoint)){
     *          passed = false;
     *      }
     *  }
     * 
     *
     * @param codepoint Unicode code point as int value
     * @return true if a character can be converted
     */
    /* TODO This is different from Java's canEncode(char) API.
     * ICU's API should implement getUnicodeSet,
     * and override canEncode(char) which queries getUnicodeSet.
     * The getUnicodeSet should return a frozen UnicodeSet or use a fillin parameter, like ICU4C.
     */
    /*public boolean canEncode(int codepoint) {
        return true;
    }*/
    /**
     * Overrides super class method
     * @stable ICU 3.6
     */
    @Override
    public boolean isLegalReplacement(byte[] repl) {
        return true;
    }

    /*
     * Writes out the specified output bytes to the target byte buffer or to converter internal buffers.
     * @param cnv
     * @param bytesArray
     * @param bytesBegin
     * @param bytesLength
     * @param out
     * @param offsets
     * @param sourceIndex
     * @return A CoderResult object that contains the error result when an error occurs.
     */
    static final CoderResult fromUWriteBytes(CharsetEncoderICU cnv,
            byte[] bytesArray, int bytesBegin, int bytesLength, ByteBuffer out,
            IntBuffer offsets, int sourceIndex) {

        //write bytes
        int obl = bytesLength;
        CoderResult cr = CoderResult.UNDERFLOW;
        int bytesLimit = bytesBegin + bytesLength;
        try {
            for (; bytesBegin < bytesLimit;) {
                out.put(bytesArray[bytesBegin]);
                bytesBegin++;
            }
            // success
            bytesLength = 0;
        } catch (BufferOverflowException ex) {
            cr = CoderResult.OVERFLOW;
        }

        if (offsets != null) {
            while (obl > bytesLength) {
                offsets.put(sourceIndex);
                --obl;
            }
        }
        //write overflow
        cnv.errorBufferLength = bytesLimit - bytesBegin;
        if (cnv.errorBufferLength > 0) {
            int index = 0;
            while (bytesBegin < bytesLimit) {
                cnv.errorBuffer[index++] = bytesArray[bytesBegin++];
            }
            cr = CoderResult.OVERFLOW;
        }
        return cr;
    }

    /*
     * Returns the number of chars held in the converter's internal state
     * because more input is needed for completing the conversion. This function is
     * useful for mapping semantics of ICU's converter interface to those of iconv,
     * and this information is not needed for normal conversion.
     * @return The number of chars in the state. -1 if an error is encountered.
     */
    /*public*/int fromUCountPending() {
        if (preFromULength > 0) {
            return UTF16.getCharCount(preFromUFirstCP) + preFromULength;
        } else if (preFromULength < 0) {
            return -preFromULength;
        } else if (fromUChar32 > 0) {
            return 1;
        } else if (preFromUFirstCP > 0) {
            return UTF16.getCharCount(preFromUFirstCP);
        }
        return 0;
    }

    /**
     *
     * @param source
     */
    private final void setSourcePosition(CharBuffer source) {

        // ok was there input held in the previous invocation of encodeLoop
        // that resulted in output in this invocation?
        source.position(source.position() - fromUCountPending());
    }

    /*
     * Write the codepage substitution character.
     * Subclasses to override this method.
     * For stateful converters, it is typically necessary to handle this
     * specifically for the converter in order to properly maintain the state.
     * @param source The input character buffer
     * @param target The output byte buffer
     * @param offsets
     * @return A CoderResult object that contains the error result when an error occurs.
     */
    CoderResult cbFromUWriteSub(CharsetEncoderICU encoder, CharBuffer source,
            ByteBuffer target, IntBuffer offsets) {
        CharsetICU cs = (CharsetICU) encoder.charset();
        byte[] sub = encoder.replacement();
        if (cs.subChar1 != 0 && encoder.invalidUCharBuffer[0] <= 0xff) {
            return CharsetEncoderICU.fromUWriteBytes(encoder,
                    new byte[] { cs.subChar1 }, 0, 1, target, offsets, source
                            .position());
        } else {
            return CharsetEncoderICU.fromUWriteBytes(encoder, sub, 0,
                    sub.length, target, offsets, source.position());
        }
    }

    /*
     * Write the characters to target.
     * @param source The input character buffer
     * @param target The output byte buffer
     * @param offsets
     * @return A CoderResult object that contains the error result when an error occurs.
     */
    CoderResult cbFromUWriteUChars(CharsetEncoderICU encoder,
            CharBuffer source, ByteBuffer target, IntBuffer offsets) {
        CoderResult cr = CoderResult.UNDERFLOW;

        /* This is a fun one.  Recursion can occur - we're basically going to
         * just retry shoving data through the same converter. Note, if you got
         * here through some kind of invalid sequence, you maybe should emit a
         * reset sequence of some kind. Since this IS an actual conversion,
         * take care that you've changed the callback or the data, or you'll
         * get an infinite loop.
         */

        int oldTargetPosition = target.position();
        int offsetIndex = source.position();

        cr = encoder.encode(source, target, null, false); /* no offsets and no flush */

        if (offsets != null) {
            while (target.position() != oldTargetPosition) {
                offsets.put(offsetIndex);
                oldTargetPosition++;
            }
        }

        /* Note, if you did something like used a stop subcallback, things would get interesting.
         * In fact, here's where we want to return the partially consumed in-source!
         */
        if (cr.isOverflow()) {
            /* Overflowed target. Now, we'll write into the charErrorBuffer.
             * It's a fixed size. If we overflow it...Hm
             */

            /* start the new target at the first free slot in the error buffer */
            int errBuffLen = encoder.errorBufferLength;
            ByteBuffer newTarget = ByteBuffer.wrap(encoder.errorBuffer);
            newTarget.position(errBuffLen); /* set the position at the end of the error buffer */
            encoder.errorBufferLength = 0;

            encoder.encode(source, newTarget, null, false);

            encoder.errorBuffer = newTarget.array();
            encoder.errorBufferLength = newTarget.position();
        }

        return cr;
    }

    /**
     * 
     * Handles a common situation where a character has been read and it may be
     * a lead surrogate followed by a trail surrogate. This method can change
     * the source position and will modify fromUChar32.
     * 
     *
     * 
     * If null is returned, then there was success in reading a
     * surrogate pair, the codepoint is stored in fromUChar32 and
     * fromUChar32 should be reset (to 0) after being read.
     * 
     *
     * @param source
     *            The encoding source.
     * @param lead
     *            A character that may be the first in a surrogate pair.
     * @return CoderResult.malformedForLength(1) or
     *         CoderResult.UNDERFLOW if there is a problem, or
     *         null if there isn't.
     * @see #handleSurrogates(CharBuffer, char)
     * @see #handleSurrogates(char[], int, int, char)
     */
    final CoderResult handleSurrogates(CharBuffer source, char lead) {
        if (!UTF16.isLeadSurrogate(lead)) {
            fromUChar32 = lead;
            return CoderResult.malformedForLength(1);
        }

        if (!source.hasRemaining()) {
            fromUChar32 = lead;
            return CoderResult.UNDERFLOW;
        }

        char trail = source.get();

        if (!UTF16.isTrailSurrogate(trail)) {
            fromUChar32 = lead;
            source.position(source.position() - 1);
            return CoderResult.malformedForLength(1);
        }

        fromUChar32 = UCharacter.getCodePoint(lead, trail);
        return null;
    }

    /**
     * 
     * Same as handleSurrogates(CharBuffer, char), but with arrays. As an added
     * requirement, the calling method must also increment the index if this method returns
     * null.
     * 
     *
     *
     * @param source
     *            The encoding source.
     * @param lead
     *            A character that may be the first in a surrogate pair.
     * @return CoderResult.malformedForLength(1) or
     *         CoderResult.UNDERFLOW if there is a problem, or null if
     *         there isn't.
     * @see #handleSurrogates(CharBuffer, char)
     * @see #handleSurrogates(char[], int, int, char)
     */
    final CoderResult handleSurrogates(char[] sourceArray, int sourceIndex,
            int sourceLimit, char lead) {
        if (!UTF16.isLeadSurrogate(lead)) {
            fromUChar32 = lead;
            return CoderResult.malformedForLength(1);
        }

        if (sourceIndex >= sourceLimit) {
            fromUChar32 = lead;
            return CoderResult.UNDERFLOW;
        }

        char trail = sourceArray[sourceIndex];

        if (!UTF16.isTrailSurrogate(trail)) {
            fromUChar32 = lead;
            return CoderResult.malformedForLength(1);
        }

        fromUChar32 = UCharacter.getCodePoint(lead, trail);
        return null;
    }

    /**
     * Returns the maxCharsPerByte value for the Charset that created this encoder.
     * @return maxCharsPerByte
     * @stable ICU 4.8
     */
    public final float maxCharsPerByte() {
        return ((CharsetICU)(this.charset())).maxCharsPerByte;
    }

    /**
     * Calculates the size of a buffer for conversion from Unicode to a charset.
     * The calculated size is guaranteed to be sufficient for this conversion.
     *
     * It takes into account initial and final non-character bytes that are output
     * by some converters.
     * It does not take into account callbacks which output more than one charset
     * character sequence per call, like escape callbacks.
     * The default (substitution) callback only outputs one charset character sequence.
     *
     * @param length Number of chars to be converted.
     * @param maxCharSize Return value from maxBytesPerChar for the converter
     *                    that will be used.
     * @return Size of a buffer that will be large enough to hold the output of bytes
     *
     * @stable ICU 49
     */
    public static int getMaxBytesForString(int length, int maxCharSize) {
        return ((length + 10) * maxCharSize);
    }

}
    

    

    
            
    
            

    
        
            
                Related Artifacts
                
                     mysql-connector-java mysql
 facebook-messenger com.github.codedrinker
 selenium-java org.seleniumhq.selenium
 instagram-java com.github.sola92
 gson com.google.code.gson
 poi org.apache.poi
 httpclient org.apache.httpcomponents
 json org.json
 facebook-java-api com.google.code.facebook-java-api
 poi-ooxml org.apache.poi
 jackson-databind com.fasterxml.jackson.core
 junit junit
 primefaces org.primefaces
 ojdbc7 com.github.noraui
 jfoenix com.jfoenix
 testng org.testng
 json-simple com.googlecode.json-simple
 selenium-server org.seleniumhq.selenium
 itextpdf com.itextpdf
 spring-core org.springframework
                
            
        
        
            
                Related Groups
                
                     org.springframework
 org.apache.poi
 org.hibernate
 org.springframework.boot
 com.fasterxml.jackson.core
 com.itextpdf
 org.seleniumhq.selenium
 mysql
 org.finos.legend.engine
 org.apache.httpcomponents
 org.apache.logging.log4j
 org.openjfx
 org.apache.commons
 org.json
 com.google.guava
 com.google.zxing
 net.sf.jasperreports
 javax.xml.bind
 ojdbc
 com.google.code.facebook-java-api