All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.ibm.icu.charset.CharsetISO2022 Maven / Gradle / Ivy

Go to download

icu4j-charset is a supplemental library for icu4j, implementing Java Charset SPI.

There is a newer version: 76.1
Show newest version
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
 *******************************************************************************
 * Copyright (C) 2008-2015, International Business Machines Corporation and
 * others. All Rights Reserved.
 *******************************************************************************
 */
package com.ibm.icu.charset;

import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.IntBuffer;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CharsetEncoder;
import java.nio.charset.CoderResult;
import java.util.Arrays;

import com.ibm.icu.charset.CharsetMBCS.CharsetDecoderMBCS;
import com.ibm.icu.charset.CharsetMBCS.CharsetEncoderMBCS;
import com.ibm.icu.lang.UCharacter;
import com.ibm.icu.text.UTF16;
import com.ibm.icu.text.UnicodeSet;

class CharsetISO2022 extends CharsetICU {
    private UConverterDataISO2022 myConverterData;
    private int variant;           // one of enum {ISO_2022_JP, ISO_2022_KR, or ISO_2022_CN}

    private static final byte[] SHIFT_IN_STR    = { 0x0f };
//    private static final byte[] SHIFT_OUT_STR   = { 0x0e };

    private static final byte CR    = 0x0D;
    private static final byte LF    = 0x0A;
/*
    private static final byte H_TAB = 0x09;
    private static final byte SPACE = 0x20;
*/
    private static final char HWKANA_START  = 0xff61;
    private static final char HWKANA_END    = 0xff9f;

    /*
     * 94-character sets with native byte values A1..FE are encoded in ISO 2022
     * as bytes 21..7E. (Subtract 0x80.)
     * 96-character  sets with native bit values A0..FF are encoded in ISO 2022
     * as bytes 20..7F. (Subtract 0x80.)
     * Do not encode C1 control codes with native bytes 80..9F
     * as bytes 00..1F (C0 control codes).
     */
/*
    private static final char GR94_START    = 0xa1;
    private static final char GR94_END      = 0xfe;
*/
    private static final char GR96_START    = 0xa0;
    private static final char GR96_END      = 0xff;

    /* for ISO-2022-JP and -CN implementations */
    // typedef enum {
        /* shared values */
        private static final byte INVALID_STATE = -1;
        private static final byte ASCII         = 0;

        private static final byte SS2_STATE = 0x10;
        private static final byte SS3_STATE = 0x11;

        /* JP */
        private static final byte ISO8859_1 = 1;
        private static final byte ISO8859_7 = 2;
        private static final byte JISX201   = 3;
        private static final byte JISX208   = 4;
        private static final byte JISX212   = 5;
        private static final byte GB2312    = 6;
        private static final byte KSC5601   = 7;
        private static final byte HWKANA_7BIT  = 8; /* Halfwidth Katakana 7 bit */

        /* CN */
        /* the first few enum constants must keep their values because they corresponds to myConverterArray[] */
        private static final byte GB2312_1  = 1;
        private static final byte ISO_IR_165= 2;
        private static final byte CNS_11643 = 3;

        /*
         * these are used in StateEnum and ISO2022State variables,
         * but CNS_11643 must be used to index into myConverterArray[]
         */
        private static final byte CNS_11643_0 = 0x20;
        private static final byte CNS_11643_1 = 0x21;
        private static final byte CNS_11643_2 = 0x22;
        private static final byte CNS_11643_3 = 0x23;
        private static final byte CNS_11643_4 = 0x24;
        private static final byte CNS_11643_5 = 0x25;
        private static final byte CNS_11643_6 = 0x26;
        private static final byte CNS_11643_7 = 0x27;
    // } StateEnum;


    public CharsetISO2022(String icuCanonicalName, String javaCanonicalName, String[] aliases) {
        super(icuCanonicalName, javaCanonicalName, aliases);

        myConverterData = new UConverterDataISO2022();

        int versionIndex = icuCanonicalName.indexOf("version=");
        int version = Integer.decode(icuCanonicalName.substring(versionIndex+8, versionIndex+9)).intValue();

        myConverterData.version = version;

        if (icuCanonicalName.indexOf("locale=ja") > 0) {
            ISO2022InitJP(version);
        } else if (icuCanonicalName.indexOf("locale=zh") > 0) {
            ISO2022InitCN(version);
        } else /* if (icuCanonicalName.indexOf("locale=ko") > 0) */ {
            ISO2022InitKR(version);
        }

        myConverterData.currentEncoder = (CharsetEncoderMBCS)myConverterData.currentConverter.newEncoder();
        myConverterData.currentDecoder = (CharsetDecoderMBCS)myConverterData.currentConverter.newDecoder();
    }

    private void ISO2022InitJP(int version) {
        variant = ISO_2022_JP;

        maxBytesPerChar = 6;
        minBytesPerChar = 1;
        maxCharsPerByte = 1;
        // open the required converters and cache them
        if((jpCharsetMasks[version]&CSM(ISO8859_7)) != 0) {
            myConverterData.myConverterArray[ISO8859_7] = ((CharsetMBCS)CharsetICU.forNameICU("ISO8859_7")).sharedData;
        }
        // myConverterData.myConverterArray[JISX201] = ((CharsetMBCS)CharsetICU.forNameICU("jisx-201")).sharedData;
        myConverterData.myConverterArray[JISX208] = ((CharsetMBCS)CharsetICU.forNameICU("Shift-JIS")).sharedData;
        if ((jpCharsetMasks[version]&CSM(JISX212)) != 0) {
            myConverterData.myConverterArray[JISX212] = ((CharsetMBCS)CharsetICU.forNameICU("jisx-212")).sharedData;
        }
        if ((jpCharsetMasks[version]&CSM(GB2312)) != 0) {
            myConverterData.myConverterArray[GB2312] = ((CharsetMBCS)CharsetICU.forNameICU("ibm-5478")).sharedData;
        }
        if ((jpCharsetMasks[version]&CSM(KSC5601)) != 0) {
            myConverterData.myConverterArray[KSC5601] = ((CharsetMBCS)CharsetICU.forNameICU("ksc_5601")).sharedData;
        }

        // create a generic CharsetMBCS object
        myConverterData.currentConverter = (CharsetMBCS)CharsetICU.forNameICU("icu-internal-25546");
    }

    private void ISO2022InitCN(int version) {
        variant = ISO_2022_CN;

        maxBytesPerChar = 8;
        minBytesPerChar = 1;
        maxCharsPerByte = 1;
        // open the required coverters and cache them.
        myConverterData.myConverterArray[GB2312_1] = ((CharsetMBCS)CharsetICU.forNameICU("ibm-5478")).sharedData;
        if (version == 1) {
            myConverterData.myConverterArray[ISO_IR_165] = ((CharsetMBCS)CharsetICU.forNameICU("iso-ir-165")).sharedData;
        }
        myConverterData.myConverterArray[CNS_11643] = ((CharsetMBCS)CharsetICU.forNameICU("cns-11643-1992")).sharedData;

        // create a generic CharsetMBCS object
        myConverterData.currentConverter = (CharsetMBCS)CharsetICU.forNameICU("icu-internal-25546");
    }

    private void ISO2022InitKR(int version) {
        variant = ISO_2022_KR;

        maxBytesPerChar = 8;
        minBytesPerChar = 1;
        maxCharsPerByte = 1;

        if (version == 1) {
            myConverterData.currentConverter = (CharsetMBCS)CharsetICU.forNameICU("icu-internal-25546");
            myConverterData.currentConverter.subChar1 = fromUSubstitutionChar[0][0];
        } else {
            myConverterData.currentConverter = (CharsetMBCS)CharsetICU.forNameICU("ibm-949");
        }

        myConverterData.currentEncoder = (CharsetEncoderMBCS)myConverterData.currentConverter.newEncoder();
        myConverterData.currentDecoder = (CharsetDecoderMBCS)myConverterData.currentConverter.newDecoder();
    }

    /*
     * ISO 2022 control codes must not be converted from Unicode
     * because they would mess up the byte stream.
     * The bit mask 0x0800c000 has bits set at bit positions 0xe, 0xf, 0x1b
     * corresponding to SO, SI, and ESC.
     */
    private static boolean IS_2022_CONTROL(int c) {
        return (c<0x20) && (((1<= 0xa1a1) &&
                ((short)(value&UConverterConstants.UNSIGNED_BYTE_MASK) <= 0xfe && ((short)(value&UConverterConstants.UNSIGNED_BYTE_MASK) >= 0xa1))) {
            return (value - 0x8080); /* shift down to 21..7e byte range */
        } else {
            return 0; /* not valid for ISO 2022 */
        }
    }

    /*
     * Commented out because Ticket 5691: Call sites now check for validity. They can just += 0x8080 after that.
     *
     * This method does the reverse of _2022FromGR94DBCS(). Given the 2022 code point, it returns the
     * 2 byte value that is in the range A1..FE for each byte. Otherwise it returns the 2022 code point
     * unchanged.
     *
    private static int _2022ToGR94DBCS(int value) {
        int returnValue = value + 0x8080;

        if ((returnValue <= 0xfefe && returnValue >= 0xa1a1) &&
                ((short)(returnValue&UConverterConstants.UNSIGNED_BYTE_MASK) <= 0xfe && ((short)(returnValue&UConverterConstants.UNSIGNED_BYTE_MASK) >= 0xa1))) {
            return returnValue;
        } else {
            return value;
        }
    }*/

    /* is the StateEnum charset value for a DBCS charset? */
    private static boolean IS_JP_DBCS(byte cs) {
        return ((JISX208 <= cs) && (cs <= KSC5601));
    }

    private static short CSM(short cs) {
        return (short)(1<= 0x10000 && !sharedData.mbcs.hasSupplementary()) {
            return 0;
        }
        /* convert the Unicode code point in c into codepage bytes */
        table = sharedData.mbcs.fromUnicodeTable;
        /* get the byte for the output */
        value = CharsetMBCS.MBCS_SINGLE_RESULT_FROM_U(table, sharedData.mbcs.fromUnicodeChars, c);
        /* get the byte for the output */
        retval[0] = value & 0xff;
        if (value >= 0xf00) {
            return 1; /* roundtrip */
        } else if (useFallback ? value>=0x800 : value>=0xc00) {
            return -1; /* fallback taken */
        } else {
            return 0; /* no mapping */
        }
    }

    /*
     * Each of these charset masks (with index x) contains a bit for a charset in exact correspondence
     * to whether that charset is used in the corresponding version x of ISO_2022, locale=ja,version=x
     *
     * Note: The converter uses some leniency:
     * - The escape sequence ESC ( I for half-width 7-bit Katakana is recognized in
     *   all versions, not just JIS7 and JIS8.
     * - ICU does not distinguish between different version so of JIS X 0208.
     */
    private static final short jpCharsetMasks[] = {
        (short)(CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)),
        (short)(CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)),
        (short)(CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7)),
        (short)(CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7)),
        (short)(CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7))
    };

/*
    // typedef enum {
        private static final byte ASCII1 = 0;
        private static final byte LATIN1 = 1;
        private static final byte SBCS   = 2;
        private static final byte DBCS   = 3;
        private static final byte MBCS   = 4;
        private static final byte HWKANA = 5;
    // } Cnv2002Type;
*/

    private static class ISO2022State {
        private byte []cs;  /* Charset number for SI (G0)/SO (G1)/SS2 (G2)/SS3 (G3) */
        private byte g;     /* 0..3 for G0..G3 (SI/SO/SS2/SS3) */
        private byte prevG; /* g before single shift (SS2 or SS3) */

        ISO2022State() {
            cs = new byte[4];
        }

        void reset() {
            Arrays.fill(cs, (byte)0);
            g = 0;
            prevG = 0;
        }
    }

//    private static final byte UCNV_OPTIONS_VERSION_MASK = 0xf;
    private static final byte UCNV_2022_MAX_CONVERTERS  = 10;

    private static class UConverterDataISO2022 {
        UConverterSharedData []myConverterArray;
        CharsetEncoderMBCS currentEncoder;
        CharsetDecoderMBCS currentDecoder;
        CharsetMBCS currentConverter;
        ISO2022State toU2022State;
        ISO2022State fromU2022State;
        int key;
        int version;
        boolean isEmptySegment;

        UConverterDataISO2022() {
            myConverterArray = new UConverterSharedData[UCNV_2022_MAX_CONVERTERS];
            toU2022State = new ISO2022State();
            fromU2022State = new ISO2022State();
            key = 0;
            version = 0;
            isEmptySegment = false;
        }

        void reset() {
            toU2022State.reset();
            fromU2022State.reset();
            isEmptySegment = false;
        }
    }

    private static final byte ESC_2022 = 0x1B; /* ESC */

    // typedef enum {
        private static final byte INVALID_2022              = -1; /* Doesn't correspond to a valid iso 2022 escape sequence */
        private static final byte VALID_NON_TERMINAL_2022   =  0;  /* so far corresponds to a valid iso 2022 escape sequence */
        private static final byte VALID_TERMINAL_2022       =  1;  /* corresponds to a valid iso 2022 escape sequence */
        private static final byte VALID_MAYBE_TERMINAL_2022 =  2;  /* so far matches one iso 2022 escape sequence, but by adding
                                                                     more characters might match another escape sequence */
    // } UCNV_TableStates_2022;

    /*
     * The way these state transition arrays work is:
     * ex : ESC$B is the sequence for JISX208
     *      a) First Iteration: char is ESC
     *          i) Get the value of ESC from normalize_esq_chars_2022[] with int value of ESC as index
     *             int x = normalize_esq_chars_2022[27] which is equal to 1
     *         ii) Search for this value in escSeqStateTable_Key_2022[]
     *             value of x is stored at escSeqStateTable_Key_2022[0]
     *        iii) Save this index as offset
     *         iv) Get state of this sequence from escSeqStateTable_Value_2022[]
     *             escSeqStateTable_value_2022[offset], which is VALID_NON_TERMINAL_2022
     *      b) Switch on this state and continue to next char
     *          i) Get the value of $ from normalize_esq_chars_2022[] with int value of $ as index
     *             which is normalize_esq_chars_2022[36] == 4
     *         ii) x is currently 1(from above)
     *             x<<=5 -- x is now 32
     *             x+=normalize_esq_chars_2022[36]
     *             now x is 36
     *        iii) Search for this value in escSeqStateTable_Key_2022[]
     *             value of x is stored at escSeqStateTable_Key_2022[2], so offset is 2
     *         iv) Get state of this sequence from escSeqStateTable_Value_2022[]
     *             escSeqStateTable_Value_2022[offset], which is VALID_NON_TERMINAL_2022
     *      c) Switch on this state and continue to next char
     *          i) Get the value of B from normalize_esq_chars_2022[] with int value of B as index
     *         ii) x is currently 36 (from above)
     *             x<<=5 -- x is now 1152
     *             x+= normalize_esq_chars_2022[66]
     *             now x is 1161
     *        iii) Search for this value in escSeqStateTable_Key_2022[]
     *             value of x is stored at escSeqStateTable_Key_2022[21], so offset is 21
     *         iv) Get state of this sequence from escSeqStateTable_Value_2022[1]
     *             escSeqStateTable_Value_2022[offset], which is VALID_TERMINAL_2022
     *          v) Get the converter name from escSeqStateTable_Result_2022[21] which is JISX208
     */
     /* Below are the 3 arrays depicting a state transition table */
     private static final byte normalize_esq_chars_2022[] = {
         /* 0       1       2       3       4       5       6       7       8       9 */
            0,      0,      0,      0,      0,      0,      0,      0,      0,      0,
            0,      0,      0,      0,      0,      0,      0,      0,      0,      0,
            0,      0,      0,      0,      0,      0,      0,      1,      0,      0,
            0,      0,      0,      0,      0,      0,      4,      7,     29,      0,
            2,     24,     26,     27,      0,      3,     23,      6,      0,      0,
            0,      0,      0,      0,      0,      0,      0,      0,      0,      0,
            0,      0,      0,      0,      5,      8,      9,     10,     11,     12,
           13,     14,     15,     16,     17,     18,     19,     20,     25,     28,
            0,      0,     21,      0,      0,      0,      0,      0,      0,      0,
           22,      0,      0,      0,      0,      0,      0,      0,      0,      0,
            0,      0,      0,      0,      0,      0,      0,      0,      0,      0,
            0,      0,      0,      0,      0,      0,      0,      0,      0,      0,
            0,      0,      0,      0,      0,      0,      0,      0,      0,      0,
            0,      0,      0,      0,      0,      0,      0,      0,      0,      0,
            0,      0,      0,      0,      0,      0,      0,      0,      0,      0,
            0,      0,      0,      0,      0,      0,      0,      0,      0,      0,
            0,      0,      0,      0,      0,      0,      0,      0,      0,      0,
            0,      0,      0,      0,      0,      0,      0,      0,      0,      0,
            0,      0,      0,      0,      0,      0,      0,      0,      0,      0,
            0,      0,      0,      0,      0,      0,      0,      0,      0,      0,
            0,      0,      0,      0,      0,      0,      0,      0,      0,      0,
            0,      0,      0,      0,      0,      0,      0,      0,      0,      0,
            0,      0,      0,      0,      0,      0,      0,      0,      0,      0,
            0,      0,      0,      0,      0,      0,      0,      0,      0,      0,
            0,      0,      0,      0,      0,      0,      0,      0,      0,      0,
            0,      0,      0,      0,      0,      0,      0,      0,      0,      0,
            0,      0,      0,      0,      0,      0
     };

     private static final short MAX_STATES_2022 = 74;
     private static final int escSeqStateTable_Key_2022[/* MAX_STATES_2022 */] = {
         /* 0        1          2         3        4          5         6         7         8         9 */
            1,      34,        36,       39,      55,        57,       60,       61,     1093,     1096,
         1097,    1098,      1099,     1100,     1101,     1102,     1103,     1104,     1105,     1106,
         1109,    1154,      1157,     1160,     1161,     1176,     1178,     1179,     1254,     1257,
         1768,    1773,      1957,    35105,    36933,    36936,    36937,    36938,    36939,    36940,
        36942,   36943,     36944,    36945,    36946,    36947,    36948,    37640,    37642,    37644,
        37646,   37711,     37744,    37745,    37746,    37747,    37748,    40133,    40136,    40138,
        40139,   40140,     40141,  1123363, 35947624, 35947625, 35947626, 35947627, 35947629, 35947630,
     35947631, 35947635, 35947636, 35947638
     };

     private static final byte escSeqStateTable_Value_2022[/* MAX_STATES_2022 */] = {
         /*         0                           1                           2                           3                       4               */
         VALID_NON_TERMINAL_2022,   VALID_NON_TERMINAL_2022,    VALID_NON_TERMINAL_2022,    VALID_NON_TERMINAL_2022,    VALID_NON_TERMINAL_2022,
             VALID_TERMINAL_2022,       VALID_TERMINAL_2022,    VALID_NON_TERMINAL_2022,        VALID_TERMINAL_2022,        VALID_TERMINAL_2022,
       VALID_MAYBE_TERMINAL_2022,       VALID_TERMINAL_2022,        VALID_TERMINAL_2022,        VALID_TERMINAL_2022,        VALID_TERMINAL_2022,
             VALID_TERMINAL_2022,       VALID_TERMINAL_2022,        VALID_TERMINAL_2022,        VALID_TERMINAL_2022,        VALID_TERMINAL_2022,
             VALID_TERMINAL_2022,   VALID_NON_TERMINAL_2022,        VALID_TERMINAL_2022,        VALID_TERMINAL_2022,        VALID_TERMINAL_2022,
         VALID_NON_TERMINAL_2022,   VALID_NON_TERMINAL_2022,    VALID_NON_TERMINAL_2022,    VALID_NON_TERMINAL_2022,        VALID_TERMINAL_2022,
             VALID_TERMINAL_2022,       VALID_TERMINAL_2022,        VALID_TERMINAL_2022,    VALID_NON_TERMINAL_2022,        VALID_TERMINAL_2022,
             VALID_TERMINAL_2022,       VALID_TERMINAL_2022,        VALID_TERMINAL_2022,        VALID_TERMINAL_2022,        VALID_TERMINAL_2022,
             VALID_TERMINAL_2022,       VALID_TERMINAL_2022,        VALID_TERMINAL_2022,        VALID_TERMINAL_2022,        VALID_TERMINAL_2022,
             VALID_TERMINAL_2022,       VALID_TERMINAL_2022,        VALID_TERMINAL_2022,        VALID_TERMINAL_2022,        VALID_TERMINAL_2022,
             VALID_TERMINAL_2022,       VALID_TERMINAL_2022,        VALID_TERMINAL_2022,        VALID_TERMINAL_2022,        VALID_TERMINAL_2022,
             VALID_TERMINAL_2022,       VALID_TERMINAL_2022,        VALID_TERMINAL_2022,        VALID_TERMINAL_2022,        VALID_TERMINAL_2022,
             VALID_TERMINAL_2022,       VALID_TERMINAL_2022,        VALID_TERMINAL_2022,    VALID_NON_TERMINAL_2022,        VALID_TERMINAL_2022,
             VALID_TERMINAL_2022,       VALID_TERMINAL_2022,        VALID_TERMINAL_2022,        VALID_TERMINAL_2022,        VALID_TERMINAL_2022,
             VALID_TERMINAL_2022,       VALID_TERMINAL_2022,        VALID_TERMINAL_2022,        VALID_TERMINAL_2022
     };

     /* Type def for refactoring changeState_2022 code */
     // typedef enum {
         private static final byte ISO_2022_JP = 1;
         private static final byte ISO_2022_KR = 2;
         private static final byte ISO_2022_CN = 3;
     // } Variant2022;

    /* const UConverterSharedData _ISO2022Data; */
    //private UConverterSharedData _ISO2022JPData;
    //private UConverterSharedData _ISO2022KRData;
    //private UConverterSharedData _ISO2022CNData;

    /******************** to unicode ********************/
    /****************************************************
     * Recognized escape sequenes are
     * (B  ASCII
     * .A  ISO-8859-1
     * .F  ISO-8859-7
     * (J  JISX-201
     * (I  JISX-201
     * $B  JISX-208
     * $@  JISX-208
     * $(D JISX-212
     * $A  GB2312
     * $(C KSC5601
     */
    private final static byte nextStateToUnicodeJP[/* MAX_STATES_2022 */] = {
        /*     0               1               2               3               4               5               6               7               8               9    */
        INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,      SS2_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,
                ASCII,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,        JISX201,    HWKANA_7BIT,        JISX201,  INVALID_STATE,
        INVALID_STATE,  INVALID_STATE,        JISX208,         GB2312,        JISX208,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,
            ISO8859_1,      ISO8859_7,        JISX208,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,        KSC5601,        JISX212,  INVALID_STATE,
        INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,
        INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,
        INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,
        INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE
    };

    private final static byte nextStateToUnicodeCN[/* MAX_STATES_2022 */] = {
        /*     0               1               2               3               4               5               6               7               8               9    */
        INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,      SS2_STATE,      SS3_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,
        INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,
        INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,
        INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,
        INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,       GB2312_1,  INVALID_STATE,     ISO_IR_165,
          CNS_11643_1,    CNS_11643_2,    CNS_11643_3,    CNS_11643_4,    CNS_11643_5,    CNS_11643_6,    CNS_11643_7,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,
        INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE,
        INVALID_STATE,  INVALID_STATE,  INVALID_STATE,  INVALID_STATE
    };

    /* runs through a state machine to determine the escape sequence - codepage correspondence */
    @SuppressWarnings("fallthrough")
    private CoderResult changeState_2022(CharsetDecoderICU decoder, ByteBuffer source, int var) {
        CoderResult err = CoderResult.UNDERFLOW;
        boolean DONE = false;
        byte value;
        int key[] = {myConverterData.key};
        int offset[] = {0};
        int initialToULength = decoder.toULength;
        byte c;
        int malformLength = 0;

        value = VALID_NON_TERMINAL_2022;
        while (source.hasRemaining()) {
            c = source.get();
            malformLength++;
            decoder.toUBytesArray[decoder.toULength++] = c;
            value = getKey_2022(c, key, offset);

            switch(value) {

            case VALID_NON_TERMINAL_2022:
                /* continue with the loop */
                break;

            case VALID_TERMINAL_2022:
                key[0] = 0;
                DONE = true;
                break;

            case INVALID_2022:
                DONE = true;
                break;

            case VALID_MAYBE_TERMINAL_2022:
                /* not ISO_2022 itself, finish here */
                value = VALID_TERMINAL_2022;
                key[0] = 0;
                DONE = true;
                break;
            }
            if (DONE) {
                break;
            }
        }
// DONE:
        myConverterData.key = key[0];

        if (value == VALID_NON_TERMINAL_2022) {
            /* indicate that the escape sequence is incomplete: key !=0 */
            return err;
        } else if (value == INVALID_2022) {
            err = CoderResult.malformedForLength(malformLength);
        } else /* value == VALID_TERMINAL_2022 */ {
            switch (var) {
            case ISO_2022_JP: {
                byte tempState = nextStateToUnicodeJP[offset[0]];
                switch (tempState) {
                case INVALID_STATE:
                    err = CoderResult.malformedForLength(malformLength);
                    break;
                case SS2_STATE:
                    if (myConverterData.toU2022State.cs[2] != 0) {
                        if (myConverterData.toU2022State.g < 2) {
                            myConverterData.toU2022State.prevG = myConverterData.toU2022State.g;
                        }
                        myConverterData.toU2022State.g = 2;
                    } else {
                        /* illegal to have SS2 before a matching designator */
                        err = CoderResult.malformedForLength(malformLength);
                    }
                    break;
                /* case SS3_STATE: not used in ISO-2022-JP-x */
                case ISO8859_1:
                case ISO8859_7:
                    if ((jpCharsetMasks[myConverterData.version] & CSM(tempState)) == 0) {
                        err = CoderResult.unmappableForLength(malformLength);
                    } else {
                        /* G2 charset for SS2 */
                        myConverterData.toU2022State.cs[2] = tempState;
                    }
                    break;
                default:
                    if ((jpCharsetMasks[myConverterData.version] & CSM(tempState)) == 0) {
                        err = CoderResult.unmappableForLength(source.position() - 1);
                    } else {
                        /* G0 charset */
                        myConverterData.toU2022State.cs[0] = tempState;
                    }
                    break;
                } // end of switch
                break;
            }
            case ISO_2022_CN: {
                byte tempState = nextStateToUnicodeCN[offset[0]];
                switch (tempState) {
                case INVALID_STATE:
                    err = CoderResult.unmappableForLength(malformLength);
                    break;
                case SS2_STATE:
                    if (myConverterData.toU2022State.cs[2] != 0) {
                        if (myConverterData.toU2022State.g < 2) {
                            myConverterData.toU2022State.prevG = myConverterData.toU2022State.g;
                        }
                        myConverterData.toU2022State.g = 2;
                    } else {
                        /* illegal to have SS2 before a matching designator */
                        err = CoderResult.malformedForLength(malformLength);
                    }
                    break;
                case SS3_STATE:
                    if (myConverterData.toU2022State.cs[3] != 0) {
                        if (myConverterData.toU2022State.g < 2) {
                            myConverterData.toU2022State.prevG = myConverterData.toU2022State.g;
                        }
                        myConverterData.toU2022State.g = 3;
                    } else {
                        /* illegal to have SS3 before a matching designator */
                        err = CoderResult.malformedForLength(malformLength);
                    }
                    break;
                case ISO_IR_165:
                    if (myConverterData.version == 0) {
                        err = CoderResult.unmappableForLength(malformLength);
                        break;
                    }
                    /* fall through */
                case GB2312_1:
                    /* fall through */
                case CNS_11643_1:
                    myConverterData.toU2022State.cs[1] = tempState;
                    break;
                case CNS_11643_2:
                    myConverterData.toU2022State.cs[2] = tempState;
                    break;
                default:
                    /* other CNS 11643 planes */
                    if (myConverterData.version == 0) {
                        err = CoderResult.unmappableForLength(source.position() - 1);
                    } else {
                        myConverterData.toU2022State.cs[3] = tempState;
                    }
                    break;
                } //end of switch
            }
            break;
            case ISO_2022_KR:
                if (offset[0] == 0x30) {
                    /* nothing to be done, just accept this one escape sequence */
                } else {
                    err = CoderResult.unmappableForLength(malformLength);
                }
                break;
            default:
                err = CoderResult.malformedForLength(malformLength);
                break;
            } // end of switch
        }
        if (!err.isError()) {
            decoder.toULength = 0;
        } else if (err.isMalformed()) {
            if (decoder.toULength > 1) {
                /*
                 * Ticket 5691: consistent illegal sequences:
                 * - We include at least the first byte (ESC) in the illegal sequence.
                 * - If any of the non-initial bytes could be the start of a character,
                 *   we stop the illegal sequece before the first one of those.
                 *   In escape sequences, all following bytes are "printable", that is,
                 *   unless they are completely illegal (>7f in SBCS, outside 21..7e in DBCS),
                 *   they are valid single/lead bytes.
                 *   For simplicity, we always only report the initial ESC byte as the
                 *   illegal sequence and back out all other bytes we looked at.
                 */
                /* Back out some bytes. */
                int backOutDistance = decoder.toULength - 1;
                int bytesFromThisBuffer = decoder.toULength - initialToULength;
                if (backOutDistance <= bytesFromThisBuffer) {
                    /* same as initialToULength<=1 */
                    source.position(source.position() - backOutDistance);
                } else {
                    /* Back out bytes from the previous buffer: Need to replay them. */
                    decoder.preToULength = (byte)(bytesFromThisBuffer - backOutDistance);
                    /* same as -(initalToULength-1) */
                    /* preToULength is negative! */
                    for (int i = 0; i < -(decoder.preToULength); i++) {
                        decoder.preToUArray[i] = decoder.toUBytesArray[i+1];
                    }
                    source.position(source.position() - bytesFromThisBuffer);
                }
                decoder.toULength = 1;
            }
        }

        return err;
    }

    private static byte getKey_2022(byte c, int[]key, int[]offset) {
        int togo;
        int low = 0;
        int hi = MAX_STATES_2022;
        int oldmid = 0;

        togo = normalize_esq_chars_2022[c&UConverterConstants.UNSIGNED_BYTE_MASK];

        if (togo == 0) {
            /* not a valid character anywhere in an escape sequence */
            key[0] = 0;
            offset[0] = 0;
            return INVALID_2022;
        }
        togo = (key[0] << 5) + togo;

        while (hi != low) { /* binary search */
            int mid = (hi+low) >> 1; /* Finds median */

            if (mid == oldmid) {
                break;
            }

            if (escSeqStateTable_Key_2022[mid] > togo) {
                hi = mid;
            } else if (escSeqStateTable_Key_2022[mid] < togo) {
                low = mid;
            } else /* we found it */ {
                key[0] = togo;
                offset[0] = mid;
                return escSeqStateTable_Value_2022[mid];
            }
            oldmid = mid;
        }
        return INVALID_2022;
    }

    /*
     * To Unicode Callback helper function
     */
    private static CoderResult toUnicodeCallback(CharsetDecoderICU cnv, int sourceChar, int targetUniChar) {
        CoderResult err = CoderResult.UNDERFLOW;
        if (sourceChar > 0xff) {
            cnv.toUBytesArray[0] = (byte)(sourceChar>>8);
            cnv.toUBytesArray[1] = (byte)sourceChar;
            cnv.toULength = 2;
        } else {
            cnv.toUBytesArray[0] = (byte)sourceChar;
            cnv.toULength = 1;
        }

        if (targetUniChar == (UConverterConstants.missingCharMarker-1/* 0xfffe */)) {
            err = CoderResult.unmappableForLength(1);
        } else {
            err = CoderResult.malformedForLength(1);
        }

        return err;
    }

    /****************************ISO-2022-JP************************************/
    private class CharsetDecoderISO2022JP extends CharsetDecoderICU {
        public CharsetDecoderISO2022JP(CharsetICU cs) {
            super(cs);
        }

        @Override
        protected void implReset() {
            super.implReset();
            myConverterData.reset();
        }
        /*
         * Map 00..7F to Unicode according to JIS X 0201.
         * */
        private int jisx201ToU(int value) {
            if (value < 0x5c) {
                return value;
            } else if (value == 0x5c) {
                return 0xa5;
            } else if (value == 0x7e) {
                return 0x203e;
            } else { /* value <= 0x7f */
                return value;
            }
        }
        /*
         * Convert a pair of JIS X 208 21..7E bytes to Shift-JIS.
         * If either byte is outside 21..7E make sure that the result is not valid
         * for Shift-JIS so that the converter catches it.
         * Some invalid byte values already turn into equally invalid Shift-JIS
         * byte values and need not be tested explicitly.
         */
        private void _2022ToSJIS(char c1, char c2, byte []bytes) {
            if ((c1&1) > 0) {
                ++c1;
                if (c2 <= 0x5f) {
                    c2 += 0x1f;
                } else if (c2 <= 0x7e) {
                    c2 += 0x20;
                } else {
                    c2 = 0; /* invalid */
                }
            } else {
                if ((c2 >= 0x21) && (c2 <= 0x7e)) {
                    c2 += 0x7e;
                } else {
                    c2 = 0; /* invalid */
                }
            }

            c1 >>=1;
            if (c1 <= 0x2f) {
                c1 += 0x70;
            } else if (c1 <= 0x3f) {
                c1 += 0xb0;
            } else {
                c1 = 0; /* invalid */
            }
            bytes[0] = (byte)(UConverterConstants.UNSIGNED_BYTE_MASK & c1);
            bytes[1] = (byte)(UConverterConstants.UNSIGNED_BYTE_MASK & c2);
        }

        @Override
        @SuppressWarnings("fallthrough")
        protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush) {
            boolean gotoGetTrail = false;
            boolean gotoEscape = false;
            CoderResult err = CoderResult.UNDERFLOW;
            byte []tempBuf = new byte[2];
            int targetUniChar = 0x0000;
            int mySourceChar = 0x0000;
            int mySourceCharTemp = 0x0000; // use for getTrail label call.
            byte cs; /* StateEnum */
            byte csTemp= 0; // use for getTrail label call.

            if (myConverterData.key != 0) {
                /* continue with a partial escape sequence */
                // goto escape;
                gotoEscape = true;
            } else if (toULength == 1 && source.hasRemaining() && target.hasRemaining()) {
                /* continue with a partial double-byte character */
                mySourceChar = (toUBytesArray[0] & UConverterConstants.UNSIGNED_BYTE_MASK);
                toULength = 0;
                cs = myConverterData.toU2022State.cs[myConverterData.toU2022State.g];
                // goto getTrailByte;
                mySourceCharTemp = 0x99;
                gotoGetTrail = true;
            }

            while (source.hasRemaining() || gotoEscape || gotoGetTrail) {
                // This code is here for the goto escape label call above.
                if (gotoEscape) {
                    mySourceCharTemp = ESC_2022;
                }

                targetUniChar = UConverterConstants.missingCharMarker;

                if (gotoEscape || gotoGetTrail || target.hasRemaining()) {
                    if (!gotoEscape && !gotoGetTrail) {
                        mySourceChar = source.get() & UConverterConstants.UNSIGNED_BYTE_MASK;
                        mySourceCharTemp = mySourceChar;
                    }

                    switch (mySourceCharTemp) {
                    case UConverterConstants.SI:
                        if (myConverterData.version == 3) {
                            myConverterData.toU2022State.g = 0;
                            continue;
                        } else {
                            /* only JIS7 uses SI/SO, not ISO-2022-JP-x */
                            myConverterData.isEmptySegment = false;
                            break;
                        }

                    case UConverterConstants.SO:
                        if (myConverterData.version == 3) {
                            /* JIS7: switch to G1 half-width Katakana */
                            myConverterData.toU2022State.cs[1] = HWKANA_7BIT;
                            myConverterData.toU2022State.g = 1;
                            continue;
                        } else {
                            /* only JIS7 uses SI/SO, not ISO-2022-JP-x */
                            myConverterData.isEmptySegment = false; /* reset this, we have a different error */
                            break;
                        }

                    case ESC_2022:
                        if (!gotoEscape) {
                            source.position(source.position() - 1);
                        } else {
                            gotoEscape = false;
                        }
// escape:
                        {
                            int mySourceBefore = source.position();
                            int toULengthBefore = this.toULength;

                            err = changeState_2022(this, source, variant);

                            /* If in ISO-2022-JP only and we successully completed an escape sequence, but previous segment was empty, create an error */
                            if(myConverterData.version == 0 && myConverterData.key == 0 && !err.isError() && myConverterData.isEmptySegment) {
                                err = CoderResult.malformedForLength(source.position() - mySourceBefore);
                                this.toULength = toULengthBefore + (source.position() - mySourceBefore);
                            }
                        }

                        /* invalid or illegal escape sequence */
                        if(err.isError()){
                            myConverterData.isEmptySegment = false; /* Reset to avoid future spurious errors */
                            return err;
                        }
                        /* If we successfully completed an escape sequence, we begin a new segment, empty so far */
                        if(myConverterData.key == 0) {
                            myConverterData.isEmptySegment = true;
                        }

                        continue;
                    /* ISO-2022-JP does not use single-byte (C1) SS2 and SS3 */
                    case CR:
                        /* falls through */
                    case LF:
                        /* automatically reset to single-byte mode */
                        if (myConverterData.toU2022State.cs[0] != ASCII && myConverterData.toU2022State.cs[0] != JISX201) {
                            myConverterData.toU2022State.cs[0] = ASCII;
                        }
                        myConverterData.toU2022State.cs[2] = 0;
                        myConverterData.toU2022State.g = 0;
                        /* falls through */
                    default :
                        /* convert one or two bytes */
                        myConverterData.isEmptySegment = false;
                        cs = myConverterData.toU2022State.cs[myConverterData.toU2022State.g];
                        csTemp = cs;
                        if (gotoGetTrail) {
                            csTemp = (byte)0x99;
                        }
                        if (!gotoGetTrail && ((mySourceChar >= 0xa1) && (mySourceChar <= 0xdf) && myConverterData.version == 4 && !IS_JP_DBCS(cs))) {
                            /* 8-bit halfwidth katakana in any single-byte mode for JIS8 */
                            targetUniChar = mySourceChar + (HWKANA_START - 0xa1);

                            /* return from a single-shift state to the previous one */
                            if (myConverterData.toU2022State.g >= 2) {
                                myConverterData.toU2022State.g = myConverterData.toU2022State.prevG;
                            }
                        } else {
                            switch(csTemp) {
                            case ASCII:
                                if (mySourceChar <= 0x7f) {
                                    targetUniChar = mySourceChar;
                                }
                                break;
                            case ISO8859_1:
                                if (mySourceChar <= 0x7f) {
                                    targetUniChar = mySourceChar + 0x80;
                                }
                                /* return from a single-shift state to the prevous one */
                                myConverterData.toU2022State.g = myConverterData.toU2022State.prevG;
                                break;
                            case ISO8859_7:
                                if (mySourceChar <= 0x7f) {
                                    /* convert mySourceChar+0x80 to use a normal 8-bit table */
                                    targetUniChar = CharsetMBCS.MBCS_SINGLE_SIMPLE_GET_NEXT_BMP(myConverterData.myConverterArray[cs].mbcs,
                                            mySourceChar+0x80);
                                }
                                /* return from a single-shift state to the previous one */
                                myConverterData.toU2022State.g = myConverterData.toU2022State.prevG;
                                break;
                            case JISX201:
                                if (mySourceChar <= 0x7f) {
                                    targetUniChar = jisx201ToU(mySourceChar);
                                }
                                break;
                            case HWKANA_7BIT:
                                if ((mySourceChar >= 0x21) && (mySourceChar <= 0x5f)) {
                                    /* 7-bit halfwidth Katakana */
                                    targetUniChar = mySourceChar + (HWKANA_START - 0x21);
                                    break;
                                }
                            default :
                                /* G0 DBCS */
                                if (gotoGetTrail || source.hasRemaining()) {
// getTrailByte:
                                    int tmpSourceChar;
                                    gotoGetTrail = false;
                                    short trailByte;
                                    boolean leadIsOk, trailIsOk;

                                    trailByte = (short)(source.get(source.position()) & UConverterConstants.UNSIGNED_BYTE_MASK);
                                    /*
                                     * Ticket 5691: consistent illegal sequences:
                                     * - We include at least the first byte in the illegal sequence.
                                     * - If any of the non-initial bytes could be the start of a character,
                                     *   we stop the illegal sequence before the first one of those.
                                     *
                                     * In ISO-2022 DBCS, if the second byte is in the 21..7e range or is
                                     * an ESC/SO/SI, we report only the first byte as the illegal sequence.
                                     * Otherwise we convert or report the pair of bytes.
                                     */
                                    leadIsOk = (short)(UConverterConstants.UNSIGNED_BYTE_MASK & (mySourceChar - 0x21)) <= (0x7e - 0x21);
                                    trailIsOk = (short)(UConverterConstants.UNSIGNED_BYTE_MASK & (trailByte - 0x21)) <= (0x7e - 0x21);
                                    if (leadIsOk && trailIsOk) {
                                        source.get();
                                        tmpSourceChar = (mySourceChar << 8) | trailByte;
                                        if (cs == JISX208) {
                                            _2022ToSJIS((char)mySourceChar, (char)trailByte, tempBuf);
                                            mySourceChar = tmpSourceChar;
                                        } else {
                                            /* Copy before we modify tmpSourceChar so toUnicodeCallback() sees the correct bytes. */
                                            mySourceChar = tmpSourceChar;
                                            if (cs == KSC5601) {
                                                tmpSourceChar += 0x8080; /* = _2022ToGR94DBCS(tmpSourceChar) */
                                            }
                                            tempBuf[0] = (byte)(UConverterConstants.UNSIGNED_BYTE_MASK & (tmpSourceChar >> 8));
                                            tempBuf[1] = (byte)(UConverterConstants.UNSIGNED_BYTE_MASK & tmpSourceChar);
                                        }
                                        targetUniChar = MBCSSimpleGetNextUChar(myConverterData.myConverterArray[cs], ByteBuffer.wrap(tempBuf), false);
                                    } else if (!(trailIsOk || IS_2022_CONTROL(trailByte))) {
                                        /* report a pair of illegal bytes if the second byte is not a DBCS starter */
                                        source.get();
                                        /* add another bit so that the code below writes 2 bytes in case of error */
                                        mySourceChar = 0x10000 | (mySourceChar << 8) | trailByte;
                                    }
                                } else {
                                    toUBytesArray[0] = (byte)mySourceChar;
                                    toULength = 1;
                                    // goto endloop
                                    return err;
                                }
                            } /* end of inner switch */
                        }
                        break;
                    } /* end of outer switch */

                    if (targetUniChar < (UConverterConstants.missingCharMarker-1/*0xfffe*/)) {
                        if (offsets != null) {
                            offsets.put(target.remaining(), source.remaining() - (mySourceChar <= 0xff ? 1 : 2));
                        }
                        target.put((char)targetUniChar);
                    } else if (targetUniChar > UConverterConstants.missingCharMarker) {
                        /* disassemble the surrogate pair and write to output */
                        targetUniChar -= 0x0010000;
                        target.put((char)(0xd800 + (char)(targetUniChar>>10)));
                        target.position(target.position()-1);
                        if (offsets != null) {
                            offsets.put(target.remaining(), source.remaining() - (mySourceChar <= 0xff ? 1 : 2));
                        }
                        target.get();
                        if (target.hasRemaining()) {
                            target.put((char)(0xdc00+(char)(targetUniChar&0x3ff)));
                            target.position(target.position()-1);
                            if (offsets != null) {
                                offsets.put(target.remaining(), source.remaining() - (mySourceChar <= 0xff ? 1 : 2));
                            }
                            target.get();
                        } else {
                            charErrorBufferArray[charErrorBufferLength++] =
                                (char)(0xdc00+(char)(targetUniChar&0x3ff));
                        }
                    } else {
                        /* Call the callback function */
                        err = toUnicodeCallback(this, mySourceChar, targetUniChar);
                        break;
                    }
                } else { /* goes with "if (target.hasRemaining())" way up near the top of the function */
                    err = CoderResult.OVERFLOW;
                    break;
                }
            }
//endloop:
            return err;
        }
    } // end of class CharsetDecoderISO2022JP

    /****************************ISO-2022-CN************************************/
    private class CharsetDecoderISO2022CN extends CharsetDecoderICU {
        public CharsetDecoderISO2022CN(CharsetICU cs) {
            super(cs);
        }

        @Override
        protected void implReset() {
            super.implReset();
            myConverterData.reset();
        }

        @Override
        @SuppressWarnings("fallthrough")
        protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush) {
            CoderResult err = CoderResult.UNDERFLOW;
            byte[] tempBuf = new byte[3];
            int targetUniChar = 0x0000;
            int mySourceChar = 0x0000;
            int mySourceCharTemp = 0x0000;
            boolean gotoEscape = false;
            boolean gotoGetTrailByte = false;

            if (myConverterData.key != 0) {
                /* continue with a partial escape sequence */
                // goto escape;
                gotoEscape = true;
            } else if (toULength == 1 && source.hasRemaining() && target.hasRemaining()) {
                /* continue with a partial double-byte character */
                mySourceChar = (toUBytesArray[0] & UConverterConstants.UNSIGNED_BYTE_MASK);
                toULength = 0;
                targetUniChar = UConverterConstants.missingCharMarker;
                // goto getTrailByte
                gotoGetTrailByte = true;
            }

            while (source.hasRemaining() || gotoGetTrailByte || gotoEscape) {
                targetUniChar = UConverterConstants.missingCharMarker;

                if (target.hasRemaining() || gotoEscape) {
                    if (gotoEscape) {
                        mySourceChar = ESC_2022; // goto escape label
                        mySourceCharTemp = mySourceChar;
                    } else if (gotoGetTrailByte) {
                        mySourceCharTemp = 0xff; // goto getTrailByte; set mySourceCharTemp to go to default
                    } else {
                        mySourceChar = UConverterConstants.UNSIGNED_BYTE_MASK & source.get();
                        mySourceCharTemp = mySourceChar;
                    }

                    switch (mySourceCharTemp) {
                    case UConverterConstants.SI:
                        myConverterData.toU2022State.g = 0;
                        if (myConverterData.isEmptySegment) {
                            myConverterData.isEmptySegment = false; /* we are handling it, reset to avoid future spurious errors */
                            err = CoderResult.malformedForLength(1);
                            this.toUBytesArray[0] = (byte)mySourceChar;
                            this.toULength = 1;
                            return err;
                        }
                        continue;

                    case UConverterConstants.SO:
                        if (myConverterData.toU2022State.cs[1] != 0) {
                            myConverterData.toU2022State.g = 1;
                            myConverterData.isEmptySegment = true;  /* Begin a new segment, empty so far */
                            continue;
                        } else {
                            /* illegal to have SO before a matching designator */
                            myConverterData.isEmptySegment = false; /* Handling a different error, reset this to avoid future spurious errs */
                            break;
                        }

                    case ESC_2022:
                        if (!gotoEscape) {
                            source.position(source.position()-1);
                        }
// escape label
                        gotoEscape = false;
                        {
                            int mySourceBefore = source.position();
                            int toULengthBefore = this.toULength;

                            err = changeState_2022(this, source, ISO_2022_CN);

                            /* After SO there must be at least one character before a designator (designator error handled separately) */
                            if(myConverterData.key == 0 && !err.isError() && myConverterData.isEmptySegment) {
                                err = CoderResult.malformedForLength(source.position() - mySourceBefore);
                                this.toULength = toULengthBefore + (source.position() - mySourceBefore);
                            }
                        }

                        /* invalid or illegal escape sequence */
                        if(err.isError()){
                            myConverterData.isEmptySegment = false; /* Reset to avoid future spurious errors */
                            return err;
                        }
                        continue;

                    /*ISO-2022-CN does not use single-byte (C1) SS2 and SS3 */
                    case CR:
                        /* falls through */
                    case LF:
                        myConverterData.toU2022State.reset();
                        /* falls through */
                    default:
                        /* converter one or two bytes */
                        myConverterData.isEmptySegment = false;
                        if (myConverterData.toU2022State.g != 0 || gotoGetTrailByte) {
                            if (source.hasRemaining() || gotoGetTrailByte) {
                                UConverterSharedData cnv;
                                byte tempState;
                                int tempBufLen;
                                boolean leadIsOk, trailIsOk;
                                short trailByte;
// getTrailByte: label
                                gotoGetTrailByte = false; // reset gotoGetTrailByte

                                trailByte = (short)(source.get(source.position()) & UConverterConstants.UNSIGNED_BYTE_MASK);
                                /*
                                 * Ticket 5691: consistent illegal sequences:
                                 * - We include at least the first byte in the illegal sequence.
                                 * - If any of the non-initial bytes could be the start of a character,
                                 *   we stop the illegal sequence before the first one of those.
                                 *
                                 * In ISO-2022 DBCS, if the second byte is in the range 21..7e range or is
                                 * an ESC/SO/SI, we report only the first byte as the illegal sequence.
                                 * Otherwise we convert or report the pair of bytes.
                                 */
                                leadIsOk = (short)(UConverterConstants.UNSIGNED_BYTE_MASK & (mySourceChar - 0x21)) <= (0x7e - 0x21);
                                trailIsOk = (short)(UConverterConstants.UNSIGNED_BYTE_MASK & (trailByte - 0x21)) <= (0x7e - 0x21);
                                if (leadIsOk && trailIsOk) {
                                    source.get();
                                    tempState = myConverterData.toU2022State.cs[myConverterData.toU2022State.g];
                                    if (tempState > CNS_11643_0) {
                                        cnv = myConverterData.myConverterArray[CNS_11643];
                                        tempBuf[0] = (byte)(0x80 + (tempState - CNS_11643_0));
                                        tempBuf[1] = (byte)mySourceChar;
                                        tempBuf[2] = (byte)trailByte;
                                        tempBufLen = 3;
                                    } else {
                                        cnv = myConverterData.myConverterArray[tempState];
                                        tempBuf[0] = (byte)mySourceChar;
                                        tempBuf[1] = (byte)trailByte;
                                        tempBufLen = 2;
                                    }
                                    ByteBuffer tempBuffer = ByteBuffer.wrap(tempBuf);
                                    tempBuffer.limit(tempBufLen);
                                    targetUniChar = MBCSSimpleGetNextUChar(cnv, tempBuffer, false);
                                    mySourceChar = (mySourceChar << 8) | trailByte;

                                } else if (!(trailIsOk || IS_2022_CONTROL(trailByte))) {
                                    /* report a pair of illegal bytes if the second byte is not a DBCS starter */
                                    source.get();
                                    /* add another bit so that the code below writes 2 bytes in case of error */
                                    mySourceChar = 0x10000 | (mySourceChar << 8) | trailByte;
                                }
                                if (myConverterData.toU2022State.g >= 2) {
                                    /* return from a single-shift state to the previous one */
                                    myConverterData.toU2022State.g = myConverterData.toU2022State.prevG;
                                }
                            } else {
                                toUBytesArray[0] = (byte)mySourceChar;
                                toULength = 1;
                                // goto endloop;
                                return err;
                            }
                        } else {
                            if (mySourceChar <= 0x7f) {
                                targetUniChar = (char)mySourceChar;
                            }
                        }
                        break;
                    }
                    if ((UConverterConstants.UNSIGNED_INT_MASK&targetUniChar) < (UConverterConstants.UNSIGNED_INT_MASK&(UConverterConstants.missingCharMarker-1))) {
                        if (offsets != null) {
                            offsets.array()[target.position()] = source.remaining() - (mySourceChar <= 0xff ? 1 : 2);
                        }
                        target.put((char)targetUniChar);
                    } else if ((UConverterConstants.UNSIGNED_INT_MASK&targetUniChar) > (UConverterConstants.UNSIGNED_INT_MASK&(UConverterConstants.missingCharMarker))) {
                        /* disassemble the surrogate pair and write to output */
                        targetUniChar -= 0x0010000;
                        target.put((char)(0xd800+(char)(targetUniChar>>10)));
                        if (offsets != null) {
                            offsets.array()[target.position()-1] = source.position() - (mySourceChar <= 0xff ? 1 : 2);
                        }
                        if (target.hasRemaining()) {
                            target.put((char)(0xdc00+(char)(targetUniChar&0x3ff)));
                            if (offsets != null) {
                                offsets.array()[target.position()-1] = source.position() - (mySourceChar <= 0xff ? 1 : 2);
                            }
                        } else {
                            charErrorBufferArray[charErrorBufferLength++] = (char)(0xdc00+(char)(targetUniChar&0x3ff));
                        }
                    } else {
                        /* Call the callback function */
                        err = toUnicodeCallback(this, mySourceChar, targetUniChar);
                        break;
                    }

                } else {
                    err = CoderResult.OVERFLOW;
                    break;
                }
            }

            return err;
        }

    }
    /************************ ISO-2022-KR ********************/
    private class CharsetDecoderISO2022KR extends CharsetDecoderICU {
        public CharsetDecoderISO2022KR(CharsetICU cs) {
            super(cs);
        }

        @Override
        protected void implReset() {
            super.implReset();
            setInitialStateToUnicodeKR();
            myConverterData.reset();
        }

        @Override
        protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush) {
            CoderResult err = CoderResult.UNDERFLOW;
            int mySourceChar = 0x0000;
            int targetUniChar = 0x0000;
            byte[] tempBuf = new byte[2];
            boolean usingFallback;
            boolean gotoGetTrailByte = false;
            boolean gotoEscape = false;

            if (myConverterData.version == 1) {
                return decodeLoopIBM(myConverterData.currentDecoder, source, target, offsets, flush);
            }

            /* initialize state */
            usingFallback = isFallbackUsed();

            if (myConverterData.key != 0) {
                /* continue with a partial escape sequence */
                gotoEscape = true;
            } else if (toULength == 1 && source.hasRemaining() && target.hasRemaining()) {
                /* continue with a partial double-byte character */
                mySourceChar = (toUBytesArray[0] & UConverterConstants.UNSIGNED_BYTE_MASK);
                toULength = 0;
                gotoGetTrailByte = true;
            }

            while (source.hasRemaining() || gotoGetTrailByte || gotoEscape) {
                if (target.hasRemaining() || gotoGetTrailByte || gotoEscape) {
                    if (!gotoGetTrailByte && !gotoEscape) {
                        mySourceChar = (char)(source.get() & UConverterConstants.UNSIGNED_BYTE_MASK);
                    }

                    if (!gotoGetTrailByte && !gotoEscape && mySourceChar == UConverterConstants.SI) {
                        myConverterData.toU2022State.g = 0;
                        if (myConverterData.isEmptySegment) {
                            myConverterData.isEmptySegment = false; /* we are handling it, reset to avoid future spurious errors */
                            err = CoderResult.malformedForLength(1);
                            this.toUBytesArray[0] = (byte)mySourceChar;
                            this.toULength = 1;
                            return err;
                        }
                        /* consume the source */
                        continue;
                    } else if (!gotoGetTrailByte && !gotoEscape && mySourceChar == UConverterConstants.SO) {
                        myConverterData.toU2022State.g = 1;
                        myConverterData.isEmptySegment = true;
                        /* consume the source */
                        continue;
                    } else if (!gotoGetTrailByte && (gotoEscape || mySourceChar == ESC_2022)) {
                        if (!gotoEscape) {
                            source.position(source.position()-1);
                        }
// escape label
                        gotoEscape = false; // reset gotoEscape flag
                        myConverterData.isEmptySegment = false; /* Any invalid ESC sequences will be detected separately, so just reset this */
                        err = changeState_2022(this, source, ISO_2022_KR);
                        if (err.isError()) {
                            return err;
                        }
                        continue;
                    }
                    myConverterData.isEmptySegment = false; /* Any invalid char errors will be detected separately, so just reset this */
                    if (myConverterData.toU2022State.g == 1 || gotoGetTrailByte) {
                        if (source.hasRemaining() || gotoGetTrailByte) {
                            boolean leadIsOk, trailIsOk;
                            short trailByte;
// getTrailByte label
                            gotoGetTrailByte = false; // reset gotoGetTrailByte flag

                            trailByte = (short)(source.get(source.position()) & UConverterConstants.UNSIGNED_BYTE_MASK);
                            targetUniChar = UConverterConstants.missingCharMarker;
                            /*
                             * Ticket 5691: consistent illegal sequences:
                             * - We include at least the first byte in the illegal sequence.
                             * - If any of the non-initial bytes could be the start of a character,
                             *   we stop the illegal sequence before the first one of those.
                             *
                             * In ISO-2022 DBCS, if the second byte is in the 21..7e range or is
                             * an ESC/SO/SI, we report only the first byte as the illegal sequence.
                             * Otherwise we convert or report the pair of bytes.
                             */
                            leadIsOk = (short)(UConverterConstants.UNSIGNED_BYTE_MASK & (mySourceChar - 0x21)) <= (0x7e - 0x21);
                            trailIsOk = (short)(UConverterConstants.UNSIGNED_BYTE_MASK & (trailByte - 0x21)) <= (0x7e - 0x21);
                            if (leadIsOk && trailIsOk) {
                                source.get();
                                tempBuf[0] = (byte)(mySourceChar + 0x80);
                                tempBuf[1] = (byte)(trailByte + 0x80);
                                targetUniChar = MBCSSimpleGetNextUChar(myConverterData.currentConverter.sharedData, ByteBuffer.wrap(tempBuf), usingFallback);
                                mySourceChar = (char)((mySourceChar << 8) | trailByte);
                            } else if (!(trailIsOk || IS_2022_CONTROL(trailByte))) {
                                /* report a pair of illegal bytes if the second byte is not a DBCS starter */
                                source.get();
                                /* add another bit so that the code below writes 2 bytes in case of error */
                                mySourceChar = (char)(0x10000 | (mySourceChar << 8) | trailByte);
                            }
                        } else {
                            toUBytesArray[0] = (byte)mySourceChar;
                            toULength = 1;
                            break;
                        }
                    } else if (mySourceChar <= 0x7f) {
                        int savedSourceLimit = source.limit();
                        int savedSourcePosition = source.position();
                        source.limit(source.position());
                        source.position(source.position()-1);
                        targetUniChar = MBCSSimpleGetNextUChar(myConverterData.currentConverter.sharedData, source, usingFallback);
                        source.limit(savedSourceLimit);
                        source.position(savedSourcePosition);
                    } else {
                        targetUniChar = 0xffff;
                    }
                    if (targetUniChar < 0xfffe) {
                        target.put((char)targetUniChar);
                        if (offsets != null) {
                            offsets.array()[target.position()] = source.position() - (mySourceChar <= 0xff ? 1 : 2);
                        }
                    } else {
                        /* Call the callback function */
                        err = toUnicodeCallback(this, mySourceChar, targetUniChar);
                        break;
                    }
                } else {
                    err = CoderResult.OVERFLOW;
                    break;
                }
            }

            return err;
        }

        protected CoderResult decodeLoopIBM(CharsetDecoderMBCS cnv, ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush) {
            CoderResult err = CoderResult.UNDERFLOW;
            int sourceStart;
            int sourceLimit;
            int argSource;
            int argTarget;
            boolean gotoEscape = false;
            int oldSourceLimit;

            /* remember the original start of the input for offsets */
            sourceStart = argSource = source.position();

            if (myConverterData.key != 0) {
                /* continue with a partial escape sequence */
                gotoEscape = true;
            }

            while (gotoEscape || (!err.isError() && source.hasRemaining())) {
                if (!gotoEscape) {
                    /* Find the end of the buffer e.g : Next Escape Seq | end of Buffer */
                    int oldSourcePos = source.position();
                    sourceLimit = getEndOfBuffer_2022(source);
                    source.position(oldSourcePos);
                    if (source.position() != sourceLimit) {
                        /*
                         * get the current partial byte sequence
                         *
                         * it needs to be moved between the public and the subconverter
                         * so that the conversion frameword, which only sees the public
                         * converter, can handle truncated and illegal input etc.
                         */
                        if (toULength > 0) {
                            cnv.toUBytesArray = toUBytesArray.clone();
                        }
                        cnv.toULength = toULength;

                        /*
                         * Convert up to the end of the input, or to before the next escape character.
                         * Does not handle conversion extensions because the preToU[] state etc.
                         * is not copied.
                         */
                        argTarget = target.position();
                        oldSourceLimit = source.limit(); // save the old source limit change to new one
                        source.limit(sourceLimit);
                        err = myConverterData.currentDecoder.cnvMBCSToUnicodeWithOffsets(source, target, offsets, flush);
                        source.limit(oldSourceLimit); // restore source limit;
                        if (offsets != null && sourceStart != argSource) {
                            /* update offsets to base them on the actual start of the input */
                            int delta = argSource - sourceStart;
                            while (argTarget < target.position()) {
                                int currentOffset = offsets.get();
                                offsets.position(offsets.position()-1);
                                if (currentOffset >= 0) {
                                    offsets.put(currentOffset + delta);
                                    offsets.position(offsets.position()-1);
                                }
                                offsets.get();
                                target.get();
                            }
                        }
                        argSource = source.position();

                        /* copy input/error/overflow buffers */
                        if (cnv.toULength > 0) {
                            toUBytesArray = cnv.toUBytesArray.clone();
                        }
                        toULength = cnv.toULength;

                        if (err.isOverflow()) {
                            if (cnv.charErrorBufferLength > 0) {
                                charErrorBufferArray = cnv.charErrorBufferArray.clone();
                            }
                            charErrorBufferLength = cnv.charErrorBufferLength;
                            cnv.charErrorBufferLength = 0;
                        }
                    }

                    if (err.isError() || err.isOverflow() || (source.position() == source.limit())) {
                        return err;
                    }
                }
// escape label
                gotoEscape = false;
                err = changeState_2022(this, source, ISO_2022_KR);
            }
            return err;
        }
    }

    /******************** from unicode **********************/
    /* preference order of JP charsets */
    private final static byte []jpCharsetPref = {
        ASCII,
        JISX201,
        ISO8859_1,
        JISX208,
        ISO8859_7,
        JISX212,
        GB2312,
        KSC5601,
        HWKANA_7BIT
    };
    /*
     * The escape sequences must be in order of the enum constants like JISX201 = 3,
     * not in order of jpCharsetPref[]!
     */
    private final static byte [][]escSeqChars = {
            { 0x1B, 0x28, 0x42},        /* (B  ASCII       */
            { 0x1B, 0x2E, 0x41},        /* .A  ISO-8859-1  */
            { 0x1B, 0x2E, 0x46},        /* .F  ISO-8859-7  */
            { 0x1B, 0x28, 0x4A},        /* (J  JISX-201    */
            { 0x1B, 0x24, 0x42},        /* $B  JISX-208    */
            { 0x1B, 0x24, 0x28, 0x44},  /* $(D JISX-212    */
            { 0x1B, 0x24, 0x41},        /* $A  GB2312      */
            { 0x1B, 0x24, 0x28, 0x43},  /* $(C KSC5601     */
            { 0x1B, 0x28, 0x49}         /* (I  HWKANA_7BIT */
    };
    /*
     * JIS X 0208 has fallbacks from Unicode half-width Katakana to full-width (DBCS)
     * Katakana.
     * Now that we use a Shift-JIS table for JIS X 0208 we need to hardcode these fallbacks
     * because Shift-JIS roundtrips half-width Katakana to single bytes.
     * These were the only fallbacks in ICU's jisx-208.ucm file.
     */
    private final static char []hwkana_fb = {
        0x2123,  /* U+FF61 */
        0x2156,
        0x2157,
        0x2122,
        0x2126,
        0x2572,
        0x2521,
        0x2523,
        0x2525,
        0x2527,
        0x2529,
        0x2563,
        0x2565,
        0x2567,
        0x2543,
        0x213C,  /* U+FF70 */
        0x2522,
        0x2524,
        0x2526,
        0x2528,
        0x252A,
        0x252B,
        0x252D,
        0x252F,
        0x2531,
        0x2533,
        0x2535,
        0x2537,
        0x2539,
        0x253B,
        0x253D,
        0x253F,  /* U+FF80 */
        0x2541,
        0x2544,
        0x2546,
        0x2548,
        0x254A,
        0x254B,
        0x254C,
        0x254D,
        0x254E,
        0x254F,
        0x2552,
        0x2555,
        0x2558,
        0x255B,
        0x255E,
        0x255F,  /* U+FF90 */
        0x2560,
        0x2561,
        0x2562,
        0x2564,
        0x2566,
        0x2568,
        0x2569,
        0x256A,
        0x256B,
        0x256C,
        0x256D,
        0x256F,
        0x2573,
        0x212B,
        0x212C   /* U+FF9F */
    };

    protected byte [][]fromUSubstitutionChar = new byte[][]{ { (byte)0x1A }, { (byte)0x2F, (byte)0x7E} };
    /****************************ISO-2022-JP************************************/
    private class CharsetEncoderISO2022JP extends CharsetEncoderICU {
        public CharsetEncoderISO2022JP(CharsetICU cs) {
            super(cs, fromUSubstitutionChar[0]);
        }

        @Override
        protected void implReset() {
            super.implReset();
            myConverterData.reset();
        }
        /* Map Unicode to 00..7F according to JIS X 0201. Return U+FFFE if unmappable. */
        private int jisx201FromU(int value) {
            if (value <= 0x7f) {
                if (value != 0x5c && value != 0x7e) {
                    return value;
                }
            } else if (value == 0xa5) {
                return 0x5c;
            } else if (value == 0x203e) {
                return 0x7e;
            }
            return (int)(UConverterConstants.UNSIGNED_INT_MASK & 0xfffe);
        }

        /*
         * Take a valid Shift-JIS byte pair, check that it is in the range corresponding
         * to JIS X 0208, and convert it to a pair of 21..7E bytes.
         * Return 0 if the byte pair is out of range.
         */
        private int _2022FromSJIS(int value) {
            short trail;

            if (value > 0xEFFC) {
                return 0; /* beyond JIS X 0208 */
            }

            trail = (short)(value & UConverterConstants.UNSIGNED_BYTE_MASK);

            value &= 0xff00; /* lead byte */
            if (value <= 0x9f00) {
                value -= 0x7000;
            } else { /* 0xe000 <= value <= 0xef00 */
                value -= 0xb000;
            }

            value <<= 1;

            if (trail <= 0x9e) {
                value -= 0x100;
                if (trail <= 0x7e) {
                    value |= ((trail - 0x1f) & UConverterConstants.UNSIGNED_BYTE_MASK);
                } else {
                    value |= ((trail - 0x20) & UConverterConstants.UNSIGNED_BYTE_MASK);
                }
            } else { /* trail <= 0xfc */
                value |= ((trail - 0x7e) & UConverterConstants.UNSIGNED_BYTE_MASK);
            }

            return value;
        }
        /* This overrides the cbFromUWriteSub method in CharsetEncoderICU */
        @Override
        CoderResult cbFromUWriteSub (CharsetEncoderICU encoder,
                CharBuffer source, ByteBuffer target, IntBuffer offsets){
                CoderResult err = CoderResult.UNDERFLOW;
                byte[] buffer = new byte[8];
                int i = 0;
                byte[] subchar;
                subchar = encoder.replacement();

                byte cs;
                if (myConverterData.fromU2022State.g == 1) {
                    /* JIS7: switch from G1 to G0 */
                    myConverterData.fromU2022State.g = 0;
                    buffer[i++] = UConverterConstants.SI;
                }
                cs = myConverterData.fromU2022State.cs[0];

                if (cs != ASCII && cs != JISX201) {
                    /* not in ASCII or JIS X 0201: switch to ASCII */
                    myConverterData.fromU2022State.cs[0] = ASCII;
                    buffer[i++] = 0x1B;
                    buffer[i++] = 0x28;
                    buffer[i++] = 0x42;
                }

                buffer[i++] = subchar[0];

                err = CharsetEncoderICU.fromUWriteBytes(this, buffer, 0, i, target, offsets, source.position() - 1);

                return err;
            }

        @Override
        protected CoderResult encodeLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush) {
            CoderResult err = CoderResult.UNDERFLOW;
            int sourceChar;
            byte cs, g;
            int choiceCount;
            int len, outLen;
            byte[] choices = new byte[10];
            int targetValue = 0;
            boolean usingFallback;
            byte[] buffer = new byte[8];
            boolean getTrail = false; // use for getTrail label
            int oldSourcePos; // for proper error handling

            choiceCount = 0;

            /* check if the last codepoint of previous buffer was a lead surrogate */
            if ((sourceChar = fromUChar32) != 0 && target.hasRemaining()) {
                getTrail = true;
            }

            while (getTrail || source.hasRemaining()) {
                if (getTrail || target.hasRemaining()) {
                    oldSourcePos = source.position();
                    if (!getTrail) { /* skip if going to getTrail label */
                        sourceChar = source.get();
                    }
                    /* check if the char is a First surrogate */
                    if (getTrail || UTF16.isSurrogate((char)sourceChar)) {
                        if (getTrail || UTF16.isLeadSurrogate((char)sourceChar)) {
// getTrail:
                            if (getTrail) {
                                getTrail = false;
                            }
                            /* look ahead to find the trail surrogate */
                            if (source.hasRemaining()) {
                                /* test the following code unit */
                                char trail = source.get();
                                /* go back to the previous position */
                                source.position(source.position()-1);
                                if (UTF16.isTrailSurrogate(trail)) {
                                    source.get();
                                    sourceChar = UCharacter.getCodePoint((char)sourceChar, trail);
                                    fromUChar32 = 0x00;
                                    /* convert this supplementary code point */
                                    /* exit this condition tree */
                                } else {
                                    /* this is an unmatched lead code unit (1st surrogate) */
                                    /* callback(illegal) */
                                    err = CoderResult.malformedForLength(1);
                                    fromUChar32 = sourceChar;
                                    break;
                                }
                            } else {
                                /* no more input */
                                fromUChar32 = sourceChar;
                                break;
                            }
                        } else {
                            /* this is an unmatched trail code unit (2nd surrogate) */
                            /* callback(illegal) */
                            err = CoderResult.malformedForLength(1);
                            fromUChar32 = sourceChar;
                            break;
                        }
                    }

                    /* do not convert SO/SI/ESC */
                    if (IS_2022_CONTROL(sourceChar)) {
                        /* callback(illegal) */
                        err = CoderResult.malformedForLength(1);
                        fromUChar32 = sourceChar;
                        break;
                    }

                    /* do the conversion */

                    if (choiceCount == 0) {
                        char csm;
                        /*
                         * The csm variable keeps track of which charsets are allowed
                         * and not used yet while building the choices[].
                         */
                        csm = (char)jpCharsetMasks[myConverterData.version];
                        choiceCount = 0;

                        /* JIS7/8: try single-byte half-width Katakana before JISX208 */
                        if (myConverterData.version == 3 || myConverterData.version == 4) {
                            choices[choiceCount++] = HWKANA_7BIT;
                        }
                        /* Do not try single-bit half-width Katakana for other versions. */
                        csm &= ~CSM(HWKANA_7BIT);

                        /* try the current G0 charset */
                        choices[choiceCount++] = cs = myConverterData.fromU2022State.cs[0];
                        csm &= ~CSM(cs);

                        /* try the current G2 charset */
                        if ((cs = myConverterData.fromU2022State.cs[2]) != 0) {
                            choices[choiceCount++] = cs;
                            csm &= ~CSM(cs);
                        }

                        /* try all the other charsets */
                        for (int i = 0; i < jpCharsetPref.length; i++) {
                            cs = jpCharsetPref[i];
                            if ((CSM(cs) & csm) != 0) {
                                choices[choiceCount++] = cs;
                                csm &= ~CSM(cs);
                            }
                        }
                    }

                    cs = g = 0;
                    /*
                     * len==0:  no mapping found yet
                     * len<0:   found a fallback result:  continue looking for a roundtrip but no further fallbacks
                     * len>0:   found a roundtrip result, done
                     */
                    len = 0;
                    /*
                     * We will turn off usingFallBack after finding a fallback,
                     * but we still get fallbacks from PUA code points as usual.
                     * Therefore, we will also need to check that we don't overwrite
                     * an early fallback with a later one.
                     */
                    usingFallback = useFallback;

                    for (int i = 0; i < choiceCount && len <= 0; i++) {
                        int[] value = new int[1];
                        int len2;
                        byte cs0 = choices[i];
                        switch (cs0) {
                        case ASCII:
                            if (sourceChar <= 0x7f) {
                                targetValue = sourceChar;
                                len = 1;
                                cs = cs0;
                                g = 0;
                            }
                            break;
                        case ISO8859_1:
                            if (GR96_START <= sourceChar && sourceChar <= GR96_END) {
                                targetValue = sourceChar - 0x80;
                                len = 1;
                                cs = cs0;
                                g = 2;
                            }
                            break;
                        case HWKANA_7BIT:
                            if (sourceChar <= HWKANA_END && sourceChar >= HWKANA_START) {
                                if (myConverterData.version == 3) {
                                    /* JIS7: use G1 (SO) */
                                    /* Shift U+FF61..U+FF9F to bytes 21..5F. */
                                    targetValue = (int)(UConverterConstants.UNSIGNED_INT_MASK & (sourceChar - (HWKANA_START - 0x21)));
                                    len = 1;
                                    myConverterData.fromU2022State.cs[1] = cs = cs0; /* do not output an escape sequence */
                                    g = 1;
                                } else if (myConverterData.version == 4) {
                                    /* JIS8: use 8-bit bytes with any single-byte charset, see escape sequence output below */
                                    /* Shift U+FF61..U+FF9F to bytes A1..DF. */
                                    targetValue = (int)(UConverterConstants.UNSIGNED_INT_MASK & (sourceChar - (HWKANA_START - 0xa1)));
                                    len = 1;

                                    cs = myConverterData.fromU2022State.cs[0];
                                    if (IS_JP_DBCS(cs)) {
                                        /* switch from a DBCS charset to JISX201 */
                                        cs = JISX201;
                                    }
                                    /* else stay in the current G0 charset */
                                    g = 0;
                                }
                                /* else do not use HWKANA_7BIT with other versions */
                            }
                            break;
                        case JISX201:
                            /* G0 SBCS */
                            value[0] = jisx201FromU(sourceChar);
                            if (value[0] <= 0x7f) {
                                targetValue = value[0];
                                len = 1;
                                cs = cs0;
                                g = 0;
                                usingFallback = false;
                            }
                            break;
                        case JISX208:
                            /* G0 DBCS from JIS table */
                            myConverterData.currentConverter.sharedData = myConverterData.myConverterArray[cs0];
                            myConverterData.currentConverter.sharedData.mbcs.outputType = CharsetMBCS.MBCS_OUTPUT_2;
                            len2 = myConverterData.currentEncoder.fromUChar32(sourceChar, value, usingFallback);
                            //len2 = MBCSFromUChar32_ISO2022(myConverterData.myConverterArray[cs0], sourceChar, value, usingFallback, CharsetMBCS.MBCS_OUTPUT_2);
                            if (len2 == 2 || (len2 == -2 && len == 0)) { /* only accept DBCS: abs(len) == 2 */
                                value[0] = _2022FromSJIS(value[0]);
                                if (value[0] != 0) {
                                    targetValue = value[0];
                                    len = len2;
                                    cs = cs0;
                                    g = 0;
                                    usingFallback = false;
                                }
                            } else if (len == 0 && usingFallback  && sourceChar <= HWKANA_END && sourceChar >= HWKANA_START) {
                                targetValue = hwkana_fb[sourceChar - HWKANA_START];
                                len = -2;
                                cs = cs0;
                                g = 0;
                                usingFallback = false;
                            }
                            break;
                        case ISO8859_7:
                            /* G0 SBCS forced to 7-bit output */
                            len2 = MBCSSingleFromUChar32(myConverterData.myConverterArray[cs0], sourceChar, value, usingFallback);
                            if (len2 != 0 && !(len2 < 0 && len != 0) && GR96_START <= value[0] && value[0] <= GR96_END) {
                                targetValue = value[0] - 0x80;
                                len = len2;
                                cs = cs0;
                                g = 2;
                                usingFallback = false;
                            }
                            break;
                        default :
                            /* G0 DBCS */
                            myConverterData.currentConverter.sharedData = myConverterData.myConverterArray[cs0];
                            myConverterData.currentConverter.sharedData.mbcs.outputType = CharsetMBCS.MBCS_OUTPUT_2;
                            len2 = myConverterData.currentEncoder.fromUChar32(sourceChar, value, usingFallback);
                            //len2 = MBCSFromUChar32_ISO2022(myConverterData.myConverterArray[cs0], sourceChar, value, usingFallback, CharsetMBCS.MBCS_OUTPUT_2);
                            if (len2 == 2 || (len2 == -2 && len == 0)) { /* only accept DBCS: abs(len)==2 */
                                if (cs0 == KSC5601) {
                                    /*
                                     * Check for valid bytes for the encoding scheme.
                                     * This is necessary because the sub-converter (windows-949)
                                     * has a broader encoding scheme than is valid for 2022.
                                     */
                                    value[0] = _2022FromGR94DBCS(value[0]);
                                    if (value[0] == 0) {
                                        break;
                                    }
                                }
                                targetValue = value[0];
                                len = len2;
                                cs = cs0;
                                g = 0;
                                usingFallback = false;
                            }
                            break;
                        }
                    }

                    if (len != 0) {
                        if (len < 0) {
                            len = -len; /* fallback */
                        }
                        outLen = 0;

                        /* write SI if necessary (only for JIS7 */
                        if (myConverterData.fromU2022State.g == 1 && g == 0) {
                            buffer[outLen++] = UConverterConstants.SI;
                            myConverterData.fromU2022State.g = 0;
                        }

                        /* write the designation sequence if necessary */
                        if (cs != myConverterData.fromU2022State.cs[g]) {
                            for (int i = 0; i < escSeqChars[cs].length; i++) {
                                buffer[outLen++] = escSeqChars[cs][i];
                            }
                            myConverterData.fromU2022State.cs[g] = cs;

                            /* invalidate the choices[] */
                            choiceCount = 0;
                        }

                        /* write the shift sequence if necessary */
                        if (g != myConverterData.fromU2022State.g) {
                            switch (g) {
                            /* case 0 handled before writing escapes */
                            case 1:
                                buffer[outLen++] = UConverterConstants.SO;
                                myConverterData.fromU2022State.g = 1;
                                break;
                            default : /* case 2 */
                                buffer[outLen++] = 0x1b;
                                buffer[outLen++] = 0x4e;
                                break;
                            /* case 3: no SS3 in ISO-2022-JP-x */
                            }
                        }

                        /* write the output bytes */
                        if (len == 1) {
                            buffer[outLen++] = (byte)targetValue;
                        } else { /* len == 2 */
                            buffer[outLen++] = (byte)(targetValue >> 8);
                            buffer[outLen++] = (byte)targetValue;
                        }
                    }else {
                        /*
                         * if we cannot find the character after checking all codepages
                         * then this is an error.
                         */
                        err = CoderResult.unmappableForLength(source.position()-oldSourcePos);
                        fromUChar32 = sourceChar;
                        break;
                    }

                    if (sourceChar == CR || sourceChar == LF) {
                        /* reset the G2 state at the end of a line (conversion got use into ASCII or JISX201 already) */
                        myConverterData.fromU2022State.cs[2] = 0;
                        choiceCount = 0;
                    }

                    /* output outLen>0 bytes in buffer[] */
                    if (outLen == 1) {
                        target.put(buffer[0]);
                        if (offsets != null) {
                            offsets.put(source.remaining() - 1); /* -1 known to be ASCII */
                        }
                    } else if (outLen == 2 && (target.position() + 2) <= target.limit()) {
                        target.put(buffer[0]);
                        target.put(buffer[1]);
                        if (offsets != null) {
                            int sourceIndex = source.position() - 1;
                            offsets.put(sourceIndex);
                            offsets.put(sourceIndex);
                        }
                    } else {
                        err = CharsetEncoderICU.fromUWriteBytes(this, buffer, 0, outLen, target, offsets, source.position()-1);
                    }
                } else {
                    err = CoderResult.OVERFLOW;
                    break;
                }
            }

            /*
             * the end of the input stream and detection of truncated input
             * are handled by the framework, but for ISO-2022-JP conversion
             * we need to be in ASCII mode at the very end
             *
             * conditions:
             *  successful
             *  in SO mode or not in ASCII mode
             *  end of input and no truncated input
             */
            if (!err.isError() &&
                    (myConverterData.fromU2022State.g != 0 || myConverterData.fromU2022State.cs[0] != ASCII) &&
                    flush && !source.hasRemaining() && fromUChar32 == 0) {
                int sourceIndex;

                outLen = 0;

                if (myConverterData.fromU2022State.g != 0) {
                    buffer[outLen++] = UConverterConstants.SI;
                    myConverterData.fromU2022State.g = 0;
                }

                if (myConverterData.fromU2022State.cs[0] != ASCII) {
                    for (int i = 0; i < escSeqChars[ASCII].length; i++) {
                        buffer[outLen++] = escSeqChars[ASCII][i];
                    }
                    myConverterData.fromU2022State.cs[0] = ASCII;
                }

                /* get the source index of the last input character */
                sourceIndex = source.position();
                if (sourceIndex > 0) {
                    --sourceIndex;
                    if (UTF16.isTrailSurrogate(source.get(sourceIndex)) &&
                            (sourceIndex == 0 || UTF16.isLeadSurrogate(source.get(sourceIndex-1)))) {
                        --sourceIndex;
                    }
                } else {
                    sourceIndex = -1;
                }

                err = CharsetEncoderICU.fromUWriteBytes(this, buffer, 0, outLen, target, offsets, sourceIndex);
            }
            return err;
        }
    }
    /****************************ISO-2022-CN************************************/
    /*
     * Rules for ISO-2022-CN Encoding:
     * i)   The designator sequence must appear once on a line before any instance
     *      of chracter set it designates.
     * ii)  If two lines contain characters from the same character set, both lines
     *      must include the designator sequence.
     * iii) Once the designator sequence is known, a shifting sequence has to be found
     *      to invoke the shifting
     * iv)  All lines start in ASCII and end in ASCII.
     * v)   Four shifting sequences are employed for this purpose:
     *      Sequence    ASCII Eq    Charsets
     *      ---------   ---------   --------
     *      SI                  US-ASCII
     *      SO                  CNS-11643-1992 Plane 1, GB2312, ISO-IR-165
     *      SS2         N      CNS-11643-1992 Plane 2
     *      SS3         O      CNS-11643-1992 Planes 3-7
     * vi)
     *      SOdesignator    : ESC "$" ")" finalchar_for_SO
     *      SS2designator   : ESC "$" "*" finalchar_for_SS2
     *      SS3designator   : ESC "$" "+" finalchar_for_SS3
     *
     *      ESC $ ) A       Indicates the bytes following SO are Chinese
     *       characters as defined in GB 2312-80, until
     *       another SOdesignation appears
     *
     *      ESC $ ) E       Indicates the bytes following SO are as defined
     *       in ISO-IR-165 (for details, see section 2.1),
     *       until another SOdesignation appears
     *
     *      ESC $ ) G       Indicates the bytes following SO are as defined
     *       in CNS 11643-plane-1, until another SOdesignation appears
     *
     *      ESC $ * H       Indicates teh two bytes immediately following
     *       SS2 is a Chinese character as defined in CNS
     *       11643-plane-2, until another SS2designation
     *       appears
     *       (Meaning N must preceed ever 2 byte sequence.)
     *
     *      ESC $ + I       Indicates the immediate two bytes following SS3
     *       is a Chinese character as defined in CNS
     *       11643-plane-3, until another SS3designation
     *       appears
     *       (Meaning O must preceed every 2 byte sequence.)
     *
     *      ESC $ + J       Indicates the immediate two bytes following SS3
     *       is a Chinese character as defined in CNS
     *       11643-plane-4, until another SS3designation
     *       appears
     *       (In English: O must preceed every 2 byte sequence.)
     *
     *      ESC $ + K       Indicates the immediate two bytes following SS3
     *       is a Chinese character as defined in CNS
     *       11643-plane-5, until another SS3designation
     *       appears
     *
     *      ESC $ + L       Indicates the immediate two bytes following SS3
     *       is a Chinese character as defined in CNS
     *       11643-plane-6, until another SS3designation
     *       appears
     *
     *      ESC $ + M       Indicates the immediate two bytes following SS3
     *       is a Chinese character as defined in CNS
     *       11643-plane-7, until another SS3designation
     *       appears
     *
     *      As in ISO-2022-CN, each line starts in ASCII, and ends in ASCII, and
     *      has its own designation information before any Chinese chracters
     *      appears
     */

    /* The following are defined this way to make strings truely readonly */
    private final static byte[] GB_2312_80_STR = { 0x1B, 0x24, 0x29, 0x41 };
    private final static byte[] ISO_IR_165_STR = { 0x1B, 0x24, 0x29, 0x45 };
    private final static byte[] CNS_11643_1992_Plane_1_STR = { 0x1B, 0x24, 0x29, 0x47 };
    private final static byte[] CNS_11643_1992_Plane_2_STR = { 0x1B, 0x24, 0x2A, 0x48 };
    private final static byte[] CNS_11643_1992_Plane_3_STR = { 0x1B, 0x24, 0x2B, 0x49 };
    private final static byte[] CNS_11643_1992_Plane_4_STR = { 0x1B, 0x24, 0x2B, 0x4A };
    private final static byte[] CNS_11643_1992_Plane_5_STR = { 0x1B, 0x24, 0x2B, 0x4B };
    private final static byte[] CNS_11643_1992_Plane_6_STR = { 0x1B, 0x24, 0x2B, 0x4C };
    private final static byte[] CNS_11643_1992_Plane_7_STR = { 0x1B, 0x24, 0x2B, 0x4D };

    /************************ ISO2022-CN Data *****************************/
    private final static byte[][] escSeqCharsCN = {
        SHIFT_IN_STR,
        GB_2312_80_STR,
        ISO_IR_165_STR,
        CNS_11643_1992_Plane_1_STR,
        CNS_11643_1992_Plane_2_STR,
        CNS_11643_1992_Plane_3_STR,
        CNS_11643_1992_Plane_4_STR,
        CNS_11643_1992_Plane_5_STR,
        CNS_11643_1992_Plane_6_STR,
        CNS_11643_1992_Plane_7_STR,
    };

    private class CharsetEncoderISO2022CN extends CharsetEncoderICU {
        public CharsetEncoderISO2022CN(CharsetICU cs) {
            super(cs, fromUSubstitutionChar[0]);
        }

        @Override
        protected void implReset() {
            super.implReset();
            myConverterData.reset();
        }

        /* This overrides the cbFromUWriteSub method in CharsetEncoderICU */
        @Override
        CoderResult cbFromUWriteSub (CharsetEncoderICU encoder,
            CharBuffer source, ByteBuffer target, IntBuffer offsets){
            CoderResult err = CoderResult.UNDERFLOW;
            byte[] buffer = new byte[8];
            int i = 0;
            byte[] subchar;
            subchar = encoder.replacement();

            if (myConverterData.fromU2022State.g != 0) {
                /* not in ASCII mode: switch to ASCII */
                myConverterData.fromU2022State.g = 0;
                buffer[i++] = UConverterConstants.SI;
            }
            buffer[i++] = subchar[0];

            err = CharsetEncoderICU.fromUWriteBytes(this, buffer, 0, i, target, offsets, source.position() - 1);

            return err;
        }

        @Override
        protected CoderResult encodeLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush) {
            CoderResult err = CoderResult.UNDERFLOW;
            int sourceChar;
            byte[] buffer = new byte[8];
            int len;
            byte[] choices = new byte[3];
            int choiceCount;
            int targetValue = 0;
            boolean usingFallback;
            boolean gotoGetTrail = false;
            int oldSourcePos; // For proper error handling

            choiceCount = 0;

            /* check if the last codepoint of previous buffer was a lead surrogate */
            if ((sourceChar = fromUChar32) != 0 && target.hasRemaining()) {
                // goto getTrail label
                gotoGetTrail = true;
            }

            while (source.hasRemaining() || gotoGetTrail) {
                if (target.hasRemaining() || gotoGetTrail) {
                    oldSourcePos = source.position();
                    if (!gotoGetTrail) {
                        sourceChar = source.get();
                    }
                    /* check if the char is a First surrogate */
                    if (UTF16.isSurrogate((char)sourceChar) || gotoGetTrail) {
                        if (UTF16.isLeadSurrogate((char)sourceChar) || gotoGetTrail) {
// getTrail label
                            /* reset gotoGetTrail flag*/
                             gotoGetTrail = false;

                            /* look ahead to find the trail surrogate */
                            if (source.hasRemaining()) {
                                /* test the following code unit */
                                char trail = source.get();
                                source.position(source.position()-1);
                                if (UTF16.isTrailSurrogate(trail)) {
                                    source.get();
                                    sourceChar = UCharacter.getCodePoint((char)sourceChar, trail);
                                    fromUChar32 = 0x00;
                                    /* convert this supplementary code point */
                                    /* exit this condition tree */
                                } else {
                                    /* this is an unmatched lead code unit (1st surrogate) */
                                    /* callback(illegal) */
                                    err = CoderResult.malformedForLength(1);
                                    fromUChar32 = sourceChar;
                                    break;
                                }
                            } else {
                                /* no more input */
                                fromUChar32 = sourceChar;
                                break;
                            }
                        } else {
                            /* this is an unmatched trail code unit (2nd surrogate) */
                            /* callback(illegal) */
                            err = CoderResult.malformedForLength(1);
                            fromUChar32 = sourceChar;
                            break;
                        }
                    }

                    /* do the conversion */
                    if (sourceChar <= 0x007f) {
                        /* do not converter SO/SI/ESC */
                        if (IS_2022_CONTROL(sourceChar)) {
                            /* callback(illegal) */
                            err = CoderResult.malformedForLength(1);
                            fromUChar32 = sourceChar;
                            break;
                        }

                        /* US-ASCII */
                        if (myConverterData.fromU2022State.g == 0) {
                            buffer[0] = (byte)sourceChar;
                            len = 1;
                        } else {
                            buffer[0] = UConverterConstants.SI;
                            buffer[1] = (byte)sourceChar;
                            len = 2;
                            myConverterData.fromU2022State.g = 0;
                            choiceCount = 0;
                        }

                        if (sourceChar == CR || sourceChar == LF) {
                            /* reset the state at the end of a line */
                            myConverterData.fromU2022State.reset();
                            choiceCount = 0;
                        }
                    } else {
                        /* convert U+0080..U+10ffff */
                        int i;
                        byte cs, g;

                        if (choiceCount == 0) {
                            /* try the current SO/G1 converter first */
                            choices[0] = myConverterData.fromU2022State.cs[1];

                            /* default to GB2312_1 if none is designated yet */
                            if (choices[0] == 0) {
                                choices[0] = GB2312_1;
                            }
                            if (myConverterData.version == 0) {
                                /* ISO-2022-CN */
                                /* try other SO/G1 converter; a CNS_11643_1 lookup may result in any plane */
                                if (choices[0] == GB2312_1) {
                                    choices[1] = CNS_11643_1;
                                } else {
                                    choices[1] = GB2312_1;
                                }

                                choiceCount = 2;
                            } else if (myConverterData.version == 1) {
                                /* ISO-2022-CN-EXT */

                                /* try one of the other converters */
                                switch (choices[0]) {
                                case GB2312_1:
                                    choices[1] = CNS_11643_1;
                                    choices[2] = ISO_IR_165;
                                    break;
                                case ISO_IR_165:
                                    choices[1] = GB2312_1;
                                    choices[2] = CNS_11643_1;
                                    break;
                                default :
                                    choices[1] = GB2312_1;
                                    choices[2] = ISO_IR_165;
                                    break;
                                }

                                choiceCount = 3;
                            } else {
                                /* ISO-2022-CN-CNS */
                                choices[0] = CNS_11643_1;
                                choices[1] = GB2312_1;

                                choiceCount = 2;
                            }
                        }

                        cs = g = 0;
                        /*
                         * len==0:  no mapping found yet
                         * len<0:   found a fallback result: continue looking for a roundtrip but no further fallbacks
                         * len>0:   found a roundtrip result, done
                         */
                        len = 0;
                        /*
                         * We will turn off usingFallback after finding a fallback,
                         * but we still get fallbacks from PUA code points as usual.
                         * Therefore, we will also need to check that we don't overwrite
                         * an early fallback with a later one.
                         */
                        usingFallback = useFallback;

                        for (i = 0; i < choiceCount && len <= 0; ++i) {
                            byte cs0 = choices[i];
                            if (cs0 > 0) {
                                int[] value = new int[1];
                                int len2;
                                if (cs0 > CNS_11643_0) {
                                    myConverterData.currentConverter.sharedData = myConverterData.myConverterArray[CNS_11643];
                                    myConverterData.currentConverter.sharedData.mbcs.outputType = CharsetMBCS.MBCS_OUTPUT_3;
                                    len2 = myConverterData.currentEncoder.fromUChar32(sourceChar, value, usingFallback);
                                    //len2 = MBCSFromUChar32_ISO2022(myConverterData.myConverterArray[CNS_11643],
                                    //        sourceChar, value, usingFallback, CharsetMBCS.MBCS_OUTPUT_3);
                                    if (len2 == 3 || (len2 == -3 && len == 0)) {
                                        targetValue = value[0];
                                        cs = (byte)(CNS_11643_0 + (value[0] >> 16) - 0x80);
                                        if (len2 >= 0) {
                                            len = 2;
                                        } else {
                                            len = -2;
                                            usingFallback = false;
                                        }
                                        if (cs == CNS_11643_1) {
                                            g = 1;
                                        } else if (cs == CNS_11643_2) {
                                            g = 2;
                                        } else if (myConverterData.version == 1) { /* plane 3..7 */
                                            g = 3;
                                        } else {
                                            /* ISO-2022-CN (without -EXT) does not support plane 3..7 */
                                            len = 0;
                                        }
                                    }
                                } else {
                                    /* GB2312_1 or ISO-IR-165 */
                                    myConverterData.currentConverter.sharedData = myConverterData.myConverterArray[cs0];
                                    myConverterData.currentConverter.sharedData.mbcs.outputType = CharsetMBCS.MBCS_OUTPUT_2;
                                    len2 = myConverterData.currentEncoder.fromUChar32(sourceChar, value, usingFallback);
                                    //len2 = MBCSFromUChar32_ISO2022(myConverterData.myConverterArray[cs0],
                                    //        sourceChar, value, usingFallback, CharsetMBCS.MBCS_OUTPUT_2);
                                    if (len2 == 2 || (len2 == -2 && len == 0)) {
                                        targetValue = value[0];
                                        len = len2;
                                        cs = cs0;
                                        g = 1;
                                        usingFallback = false;
                                    }
                                }
                            }
                        }

                        if (len != 0) {
                            len = 0; /* count output bytes; it must have ben abs(len) == 2 */

                            /* write the designation sequence if necessary */
                            if (cs != myConverterData.fromU2022State.cs[g]) {
                                if (cs < CNS_11643) {
                                    for (int n = 0; n < escSeqCharsCN[cs].length; n++) {
                                        buffer[n] = escSeqCharsCN[cs][n];
                                    }
                                } else {
                                    for (int n = 0; n < escSeqCharsCN[CNS_11643 + (cs - CNS_11643_1)].length; n++) {
                                        buffer[n] = escSeqCharsCN[CNS_11643 + (cs - CNS_11643_1)][n];
                                    }
                                }
                                len = 4;
                                myConverterData.fromU2022State.cs[g] = cs;
                                if (g == 1) {
                                    /* changing the SO/G1 charset invalidates the choices[] */
                                    choiceCount = 0;
                                }
                            }

                            /* write the shift sequence if necessary */
                            if (g != myConverterData.fromU2022State.g) {
                                switch (g) {
                                case 1:
                                    buffer[len++] = UConverterConstants.SO;

                                    /* set the new state only if it is the locking shift SO/G1, not for SS2 or SS3 */
                                    myConverterData.fromU2022State.g = 1;
                                    break;
                                case 2:
                                    buffer[len++] = 0x1b;
                                    buffer[len++] = 0x4e;
                                    break;
                                default: /* case 3 */
                                    buffer[len++] = 0x1b;
                                    buffer[len++] = 0x4f;
                                    break;
                                }
                            }

                            /* write the two output bytes */
                            buffer[len++] = (byte)(targetValue >> 8);
                            buffer[len++] = (byte)targetValue;
                        } else {
                            /* if we cannot find the character after checking all codepages
                             * then this is an error
                             */
                            err = CoderResult.unmappableForLength(source.position()-oldSourcePos);
                            fromUChar32 = sourceChar;
                            break;
                        }
                    }
                    /* output len>0 bytes in buffer[] */
                    if (len == 1) {
                        target.put(buffer[0]);
                        if (offsets != null) {
                            offsets.put(source.position()-1);
                        }
                    } else if (len == 2 && (target.remaining() >= 2)) {
                        target.put(buffer[0]);
                        target.put(buffer[1]);
                        if (offsets != null) {
                            int sourceIndex = source.position();
                            offsets.put(sourceIndex);
                            offsets.put(sourceIndex);
                        }
                    } else {
                        err = CharsetEncoderICU.fromUWriteBytes(this, buffer, 0, len, target, offsets, source.position()-1);
                        if (err.isError()) {
                            break;
                        }
                    }
                } else {
                    err = CoderResult.OVERFLOW;
                    break;
                }
            } /* end while (source.hasRemaining() */

            /*
             * the end of the input stream and detection of truncated input
             * are handled by the framework, but for ISO-2022-CN conversion
             * we need to be in ASCII mode at the very end
             *
             * condtions:
             *   succesful
             *   not in ASCII mode
             *   end of input and no truncated input
             */
            if (!err.isError() && myConverterData.fromU2022State.g != 0 && flush && !source.hasRemaining() && fromUChar32 == 0) {
                int sourceIndex;

                /* we are switching to ASCII */
                myConverterData.fromU2022State.g = 0;

                /* get the source index of the last input character */
                sourceIndex = source.position();
                if (sourceIndex > 0) {
                    --sourceIndex;
                    if (UTF16.isTrailSurrogate(source.get(sourceIndex)) &&
                            (sourceIndex == 0 || UTF16.isLeadSurrogate(source.get(sourceIndex-1)))) {
                        --sourceIndex;
                    }
                } else {
                    sourceIndex = -1;
                }

                err = CharsetEncoderICU.fromUWriteBytes(this, SHIFT_IN_STR, 0, 1, target, offsets, sourceIndex);
            }

            return err;
        }
    }
    /******************************** ISO-2022-KR *****************************/
    /*
     *   Rules for ISO-2022-KR encoding
     *   i) The KSC5601 designator sequence should appear only once in a file,
     *      at the begining of a line before any KSC5601 characters. This usually
     *      means that it appears by itself on the first line of the file
     *  ii) There are only 2 shifting sequences SO to shift into double byte mode
     *      and SI to shift into single byte mode
     */
    private class CharsetEncoderISO2022KR extends CharsetEncoderICU {
        public CharsetEncoderISO2022KR(CharsetICU cs) {
            super(cs, fromUSubstitutionChar[myConverterData.version]);
        }

        @Override
        protected void implReset() {
            super.implReset();
            myConverterData.reset();
            setInitialStateFromUnicodeKR(this);
        }

        /* This overrides the cbFromUWriteSub method in CharsetEncoderICU */
        @Override
        CoderResult cbFromUWriteSub (CharsetEncoderICU encoder,
            CharBuffer source, ByteBuffer target, IntBuffer offsets){
            CoderResult err = CoderResult.UNDERFLOW;
            byte[] buffer = new byte[8];
            int length, i = 0;
            byte[] subchar;

            subchar = encoder.replacement();
            length = subchar.length;

            if (myConverterData.version == 0) {
                if (length == 1) {
                    if (encoder.fromUnicodeStatus != 0) {
                        /* in DBCS mode: switch to SBCS */
                        encoder.fromUnicodeStatus = 0;
                        buffer[i++] = UConverterConstants.SI;
                    }
                    buffer[i++] = subchar[0];
                } else { /* length == 2 */
                    if (encoder.fromUnicodeStatus == 0) {
                        /* in SBCS mode: switch to DBCS */
                        encoder.fromUnicodeStatus = 1;
                        buffer[i++] = UConverterConstants.SO;
                    }
                    buffer[i++] = subchar[0];
                    buffer[i++] = subchar[1];
                }
                err = CharsetEncoderICU.fromUWriteBytes(this, buffer, 0, i, target, offsets, source.position() - 1);
            } else {
                /* save the subvonverter's substitution string */
                byte[] currentSubChars = myConverterData.currentEncoder.replacement();

                /* set our substitution string into the subconverter */
                myConverterData.currentEncoder.replaceWith(subchar);
                myConverterData.currentConverter.subChar1 = fromUSubstitutionChar[0][0];
                /* let the subconverter write the subchar, set/retrieve fromUChar32 state */
                myConverterData.currentEncoder.fromUChar32 = encoder.fromUChar32;
                err = myConverterData.currentEncoder.cbFromUWriteSub(myConverterData.currentEncoder, source, target, offsets);
                encoder.fromUChar32 = myConverterData.currentEncoder.fromUChar32;

                /* restore the subconverter's substitution string */
                myConverterData.currentEncoder.replaceWith(currentSubChars);

                if (err.isOverflow()) {
                    if (myConverterData.currentEncoder.errorBufferLength > 0) {
                        encoder.errorBuffer = myConverterData.currentEncoder.errorBuffer.clone();
                    }
                    encoder.errorBufferLength = myConverterData.currentEncoder.errorBufferLength;
                    myConverterData.currentEncoder.errorBufferLength = 0;
                }
            }

            return err;
        }

        private CoderResult encodeLoopIBM(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush) {
            CoderResult err = CoderResult.UNDERFLOW;

            myConverterData.currentEncoder.fromUChar32 = fromUChar32;
            err = myConverterData.currentEncoder.cnvMBCSFromUnicodeWithOffsets(source, target, offsets, flush);
            fromUChar32 = myConverterData.currentEncoder.fromUChar32;

            if (err.isOverflow()) {
                if (myConverterData.currentEncoder.errorBufferLength > 0) {
                    errorBuffer = myConverterData.currentEncoder.errorBuffer.clone();
                }
                errorBufferLength = myConverterData.currentEncoder.errorBufferLength;
                myConverterData.currentEncoder.errorBufferLength = 0;
            }

            return err;
        }

        @Override
        protected CoderResult encodeLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush) {
            CoderResult err = CoderResult.UNDERFLOW;
            int[] targetByteUnit = { 0x0000 };
            int sourceChar = 0x0000;
            boolean isTargetByteDBCS;
            boolean oldIsTargetByteDBCS;
            boolean usingFallback;
            int length = 0;
            boolean gotoGetTrail = false; // for goto getTrail label call

            /*
             * if the version is 1 then the user is requesting
             * conversion with ibm-25546 pass the argument to
             * MBCS converter and return
             */
            if (myConverterData.version == 1) {
                return encodeLoopIBM(source, target, offsets, flush);
            }

            usingFallback = useFallback;
            isTargetByteDBCS = fromUnicodeStatus == 0 ? false : true;
            if ((sourceChar = fromUChar32) != 0 && target.hasRemaining()) {
                gotoGetTrail = true;
            }

            while (source.hasRemaining() || gotoGetTrail) {
                targetByteUnit[0] = UConverterConstants.missingCharMarker;

                if (target.hasRemaining() || gotoGetTrail) {
                    if (!gotoGetTrail) {
                        sourceChar = source.get();

                        /* do not convert SO/SI/ESC */
                        if (IS_2022_CONTROL(sourceChar)) {
                            /* callback(illegal) */
                            err = CoderResult.malformedForLength(1);
                            fromUChar32 = sourceChar;
                            break;
                        }
                        myConverterData.currentConverter.sharedData.mbcs.outputType = CharsetMBCS.MBCS_OUTPUT_2;
                        length = myConverterData.currentEncoder.fromUChar32(sourceChar, targetByteUnit, usingFallback);
                        //length = MBCSFromUChar32_ISO2022(myConverterData.currentConverter.sharedData, sourceChar, targetByteUnit, usingFallback, CharsetMBCS.MBCS_OUTPUT_2);
                        if (length < 0) {
                            length = -length; /* fallback */
                        }
                        /* only DBCS or SBCS characters are expected */
                        /* DB characters with high bit set to 1 are expected */
                        if (length > 2 || length == 0 ||
                                (length == 1 && targetByteUnit[0] > 0x7f) ||
                                (length ==2 &&
                                        ((char)(targetByteUnit[0] - 0xa1a1) > (0xfefe - 0xa1a1) ||
                                        ((targetByteUnit[0] - 0xa1) & UConverterConstants.UNSIGNED_BYTE_MASK) > (0xfe - 0xa1)))) {
                            targetByteUnit[0] = UConverterConstants.missingCharMarker;
                        }
                    }
                    if (!gotoGetTrail && targetByteUnit[0] != UConverterConstants.missingCharMarker) {
                        oldIsTargetByteDBCS = isTargetByteDBCS;
                        isTargetByteDBCS = (targetByteUnit[0] > 0x00FF);
                        /* append the shift sequence */
                        if (oldIsTargetByteDBCS != isTargetByteDBCS) {
                            if (isTargetByteDBCS) {
                                target.put((byte)UConverterConstants.SO);
                            } else {
                                target.put((byte)UConverterConstants.SI);
                            }
                            if (offsets != null) {
                                offsets.put(source.position()-1);
                            }
                        }
                        /* write the targetUniChar to target */
                        if (targetByteUnit[0] <= 0x00FF) {
                            if (target.hasRemaining()) {
                                target.put((byte)targetByteUnit[0]);
                                if (offsets != null) {
                                    offsets.put(source.position()-1);
                                }
                            } else {
                                errorBuffer[errorBufferLength++] = (byte)targetByteUnit[0];
                                err = CoderResult.OVERFLOW;
                            }
                        } else {
                            if (target.hasRemaining()) {
                                target.put((byte)(UConverterConstants.UNSIGNED_BYTE_MASK & ((targetByteUnit[0]>>8) - 0x80)));
                                if (offsets != null) {
                                    offsets.put(source.position()-1);
                                }
                                if (target.hasRemaining()) {
                                    target.put((byte)(UConverterConstants.UNSIGNED_BYTE_MASK & (targetByteUnit[0]- 0x80)));
                                    if (offsets != null) {
                                        offsets.put(source.position()-1);
                                    }
                                } else {
                                    errorBuffer[errorBufferLength++] = (byte)(UConverterConstants.UNSIGNED_BYTE_MASK & (targetByteUnit[0] - 0x80));
                                    err = CoderResult.OVERFLOW;
                                }

                            } else {
                                errorBuffer[errorBufferLength++] = (byte)(UConverterConstants.UNSIGNED_BYTE_MASK & ((targetByteUnit[0]>>8) - 0x80));
                                errorBuffer[errorBufferLength++] = (byte)(UConverterConstants.UNSIGNED_BYTE_MASK & (targetByteUnit[0]- 0x80));
                                err = CoderResult.OVERFLOW;
                            }
                        }
                    } else {
                        /* oops.. the code point is unassigned
                         * set the error and reason
                         */

                        /* check if the char is a First surrogate */
                        if (gotoGetTrail || UTF16.isSurrogate((char)sourceChar)) {
                            if (gotoGetTrail || UTF16.isLeadSurrogate((char)sourceChar)) {
// getTrail label
                                // reset gotoGetTrail flag
                                gotoGetTrail = false;

                                /* look ahead to find the trail surrogate */
                                if (source.hasRemaining()) {
                                    /* test the following code unit */
                                    char trail = source.get();
                                    source.position(source.position()-1);
                                    if (UTF16.isTrailSurrogate(trail)) {
                                        source.get();
                                         sourceChar = UCharacter.getCodePoint((char)sourceChar, trail);
                                         err = CoderResult.unmappableForLength(2);
                                         /* convert this surrogate code point */
                                         /* exit this condition tree */
                                    } else {
                                        /* this is an unmatched lead code unit (1st surrogate) */
                                        /* callback(illegal) */
                                        err = CoderResult.malformedForLength(1);
                                    }
                                } else {
                                    /* no more input */
                                    err = CoderResult.UNDERFLOW;
                                }
                            } else {
                                /* this is an unmatched trail code unit (2nd surrogate ) */
                                /* callback(illegal) */
                                err = CoderResult.malformedForLength(1);
                            }
                        } else {
                            /* callback(unassigned) for a BMP code point */
                            err = CoderResult.unmappableForLength(1);
                        }

                        fromUChar32 = sourceChar;
                        break;
                    }
                } else {
                    err = CoderResult.OVERFLOW;
                    break;
                }
            }
            /*
             * the end of the input stream and detection of truncated input
             * are handled by the framework, but for ISO-2022-KR conversion
             * we need to be inASCII mode at the very end
             *
             * conditions:
             *  successful
             *  not in ASCII mode
             *  end of  input and no truncated input
             */
            if (!err.isError() && isTargetByteDBCS && flush && !source.hasRemaining() && fromUChar32 == 0) {
                int sourceIndex;

                /* we are switching to ASCII */
                isTargetByteDBCS = false;

                /* get the source index of the last input character */
                sourceIndex = source.position();
                if (sourceIndex > 0) {
                    --sourceIndex;
                    if (UTF16.isTrailSurrogate(source.get(sourceIndex)) && UTF16.isLeadSurrogate(source.get(sourceIndex-1))) {
                        --sourceIndex;
                    }
                } else {
                    sourceIndex = -1;
                }

                CharsetEncoderICU.fromUWriteBytes(this, SHIFT_IN_STR, 0, 1, target, offsets, sourceIndex);
            }
            /*save the state and return */
            fromUnicodeStatus = isTargetByteDBCS ? 1 : 0;

            return err;
        }
    }

    @Override
    public CharsetDecoder newDecoder() {
        switch (variant) {
        case ISO_2022_JP:
            return new CharsetDecoderISO2022JP(this);

        case ISO_2022_CN:
            return new CharsetDecoderISO2022CN(this);

        case ISO_2022_KR:
            setInitialStateToUnicodeKR();
            return new CharsetDecoderISO2022KR(this);

        default: /* should not happen */
            return null;
        }
    }

    @Override
    public CharsetEncoder newEncoder() {
        CharsetEncoderICU cnv;

        switch (variant) {
        case ISO_2022_JP:
            return new CharsetEncoderISO2022JP(this);

        case ISO_2022_CN:
            return new CharsetEncoderISO2022CN(this);

        case ISO_2022_KR:
            cnv = new CharsetEncoderISO2022KR(this);
            setInitialStateFromUnicodeKR(cnv);
            return cnv;

        default: /* should not happen */
            return null;
        }
    }

    private void setInitialStateToUnicodeKR() {
        if (myConverterData.version == 1) {
            myConverterData.currentDecoder.toUnicodeStatus = 0;     /* offset */
            myConverterData.currentDecoder.mode = 0;                /* state */
            myConverterData.currentDecoder.toULength = 0;           /* byteIndex */
        }
    }
    private void setInitialStateFromUnicodeKR(CharsetEncoderICU cnv) {
        /* ISO-2022-KR the designator sequence appears only once
         * in a file so we append it only once
         */
        if (cnv.errorBufferLength == 0) {
            cnv.errorBufferLength = 4;
            cnv.errorBuffer[0] = 0x1b;
            cnv.errorBuffer[1] = 0x24;
            cnv.errorBuffer[2] = 0x29;
            cnv.errorBuffer[3] = 0x43;
        }
        if (myConverterData.version == 1) {
            ((CharsetMBCS)myConverterData.currentEncoder.charset()).subChar1 = 0x1A;
            myConverterData.currentEncoder.fromUChar32 = 0;
            myConverterData.currentEncoder.fromUnicodeStatus = 1; /* prevLength */
        }
    }

    @Override
    void getUnicodeSetImpl(UnicodeSet setFillIn, int which) {
        int i;
        /*open a set and initialize it with code points that are algorithmically round-tripped */

        switch(variant){
        case ISO_2022_JP:
           /*include JIS X 0201 which is hardcoded */
            setFillIn.add(0xa5);
            setFillIn.add(0x203e);
            if((jpCharsetMasks[myConverterData.version]&CSM(ISO8859_1))!=0){
                /*include Latin-1 some variants of JP */
                setFillIn.add(0, 0xff);

            }
            else {
                /* include ASCII for JP */
                setFillIn.add(0, 0x7f);
             }
            if(myConverterData.version==3 || myConverterData.version==4 ||which == ROUNDTRIP_AND_FALLBACK_SET){
            /*
             * Do not test(jpCharsetMasks[myConverterData.version]&CSM(HWKANA_7BIT))!=0 because the bit
             * is on for all JP versions although version 3 & 4 (JIS7 and JIS8) use half-width Katakana.
             * This is because all ISO_2022_JP variant are lenient in that they accept (in toUnicode) half-width
             * Katakana via ESC.
             * However, we only emit (fromUnicode) half-width Katakana according to the
             * definition of each variant.
             *
             * When including fallbacks,
             * we need to include half-width Katakana Unicode code points for all JP variants because
             * JIS X 0208 has hardcoded fallbacks for them (which map to full-width Katakana).
             */
            /* include half-width Katakana for JP */
                setFillIn.add(HWKANA_START, HWKANA_END);
             }
            break;
        case ISO_2022_CN:
            /* Include ASCII for CN */
            setFillIn.add(0, 0x7f);
            break;
        case ISO_2022_KR:
            /* there is only one converter for KR */
          myConverterData.currentConverter.getUnicodeSetImpl(setFillIn, which);
          break;
        default:
            break;
        }

        //TODO Replaced by ucnv_MBCSGetFilteredUnicodeSetForUnicode() until
        for(i=0; i




© 2015 - 2024 Weber Informatics LLC | Privacy Policy