All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.ctc.wstx.io.EBCDICCodec Maven / Gradle / Ivy

/* Woodstox XML processor
 *
 * Copyright (c) 2004 Tatu Saloranta, [email protected]
 *
 * Licensed under the License specified in file LICENSE, included with
 * the source code.
 * You may not use this file except in compliance with the License.
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.ctc.wstx.io;

/**
 * This is a container class for EBCDIC code page(s) that we need
 * to properly bootstrap EBCDIC encoded xml documents.
 */
public final class EBCDICCodec
{
    final static int[] sCodePage037;
    static {
        /* First, let's fill these in as per:
         *
         * http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/EBCDIC/CP037.TXT
         */
        int[] ch = new int[256];
        ch[0] = 0x0000;
        ch[0x01] = 0x0001;
        ch[0x02] = 0x0002;
        ch[0x03] = 0x0003;
        ch[0x04] = 0x009C;
        ch[0x05] = 0x0009;
        ch[0x06] = 0x0086;
        ch[0x07] = 0x007F;
        ch[0x08] = 0x0097;
        ch[0x09] = 0x008D;
        ch[0x0A] = 0x008E;
        ch[0x0B] = 0x000B;
        ch[0x0C] = 0x000C;
        ch[0x0D] = 0x000D;
        ch[0x0E] = 0x000E;
        ch[0x0F] = 0x000F;
        ch[0x10] = 0x0010;
        ch[0x11] = 0x0011;
        ch[0x12] = 0x0012;
        ch[0x13] = 0x0013;
        ch[0x14] = 0x009D;
        ch[0x15] = 0x0085;
        ch[0x16] = 0x0008;
        ch[0x17] = 0x0087;
        ch[0x18] = 0x0018;
        ch[0x19] = 0x0019;
        ch[0x1A] = 0x0092;
        ch[0x1B] = 0x008F;
        ch[0x1C] = 0x001C;
        ch[0x1D] = 0x001D;
        ch[0x1E] = 0x001E;
        ch[0x1F] = 0x001F;
        ch[0x20] = 0x0080;
        ch[0x21] = 0x0081;
        ch[0x22] = 0x0082;
        ch[0x23] = 0x0083;
        ch[0x24] = 0x0084;
        ch[0x25] = 0x000A;
        ch[0x26] = 0x0017;
        ch[0x27] = 0x001B;
        ch[0x28] = 0x0088;
        ch[0x29] = 0x0089;
        ch[0x2A] = 0x008A;
        ch[0x2B] = 0x008B;
        ch[0x2C] = 0x008C;
        ch[0x2D] = 0x0005;
        ch[0x2E] = 0x0006;
        ch[0x2F] = 0x0007;
        ch[0x30] = 0x0090;
        ch[0x31] = 0x0091;
        ch[0x32] = 0x0016;
        ch[0x33] = 0x0093;
        ch[0x34] = 0x0094;
        ch[0x35] = 0x0095;
        ch[0x36] = 0x0096;
        ch[0x37] = 0x0004;
        ch[0x38] = 0x0098;
        ch[0x39] = 0x0099;
        ch[0x3A] = 0x009A;
        ch[0x3B] = 0x009B;
        ch[0x3C] = 0x0014;
        ch[0x3D] = 0x0015;
        ch[0x3E] = 0x009E;
        ch[0x3F] = 0x001A;
        ch[0x40] = 0x0020;
        ch[0x41] = 0x00A0;
        ch[0x42] = 0x00E2;
        ch[0x43] = 0x00E4;
        ch[0x44] = 0x00E0;
        ch[0x45] = 0x00E1;
        ch[0x46] = 0x00E3;
        ch[0x47] = 0x00E5;
        ch[0x48] = 0x00E7;
        ch[0x49] = 0x00F1;
        ch[0x4A] = 0x00A2;
        ch[0x4B] = 0x002E;
        ch[0x4C] = 0x003C;
        ch[0x4D] = 0x0028;
        ch[0x4E] = 0x002B;
        ch[0x4F] = 0x007C;
        ch[0x50] = 0x0026;
        ch[0x51] = 0x00E9;
        ch[0x52] = 0x00EA;
        ch[0x53] = 0x00EB;
        ch[0x54] = 0x00E8;
        ch[0x55] = 0x00ED;
        ch[0x56] = 0x00EE;
        ch[0x57] = 0x00EF;
        ch[0x58] = 0x00EC;
        ch[0x59] = 0x00DF;
        ch[0x5A] = 0x0021;
        ch[0x5B] = 0x0024;
        ch[0x5C] = 0x002A;
        ch[0x5D] = 0x0029;
        ch[0x5E] = 0x003B;
        ch[0x5F] = 0x00AC;
        ch[0x60] = 0x002D;
        ch[0x61] = 0x002F;
        ch[0x62] = 0x00C2;
        ch[0x63] = 0x00C4;
        ch[0x64] = 0x00C0;
        ch[0x65] = 0x00C1;
        ch[0x66] = 0x00C3;
        ch[0x67] = 0x00C5;
        ch[0x68] = 0x00C7;
        ch[0x69] = 0x00D1;
        ch[0x6A] = 0x00A6;
        ch[0x6B] = 0x002C;
        ch[0x6C] = 0x0025;
        ch[0x6D] = 0x005F;
        ch[0x6E] = 0x003E;
        ch[0x6F] = 0x003F;
        ch[0x70] = 0x00F8;
        ch[0x71] = 0x00C9;
        ch[0x72] = 0x00CA;
        ch[0x73] = 0x00CB;
        ch[0x74] = 0x00C8;
        ch[0x75] = 0x00CD;
        ch[0x76] = 0x00CE;
        ch[0x77] = 0x00CF;
        ch[0x78] = 0x00CC;
        ch[0x79] = 0x0060;
        ch[0x7A] = 0x003A;
        ch[0x7B] = 0x0023;
        ch[0x7C] = 0x0040;
        ch[0x7D] = 0x0027;
        ch[0x7E] = 0x003D;
        ch[0x7F] = 0x0022;

        ch[0x80] = 0x00D8;
        ch[0x81] = 0x0061;
        ch[0x82] = 0x0062;
        ch[0x83] = 0x0063;
        ch[0x84] = 0x0064;
        ch[0x85] = 0x0065;
        ch[0x86] = 0x0066;
        ch[0x87] = 0x0067;
        ch[0x88] = 0x0068;
        ch[0x89] = 0x0069;
        ch[0x8A] = 0x00AB;
        ch[0x8B] = 0x00BB;
        ch[0x8C] = 0x00F0;
        ch[0x8D] = 0x00FD;
        ch[0x8E] = 0x00FE;
        ch[0x8F] = 0x00B1;
        ch[0x90] = 0x00B0;
        ch[0x91] = 0x006A;
        ch[0x92] = 0x006B;
        ch[0x93] = 0x006C;
        ch[0x94] = 0x006D;
        ch[0x95] = 0x006E;
        ch[0x96] = 0x006F;
        ch[0x97] = 0x0070;
        ch[0x98] = 0x0071;
        ch[0x99] = 0x0072;
        ch[0x9A] = 0x00AA;
        ch[0x9B] = 0x00BA;
        ch[0x9C] = 0x00E6;
        ch[0x9D] = 0x00B8;
        ch[0x9E] = 0x00C6;
        ch[0x9F] = 0x00A4;
        ch[0xA0] = 0x00B5;
        ch[0xA1] = 0x007E;
        ch[0xA2] = 0x0073;
        ch[0xA3] = 0x0074;
        ch[0xA4] = 0x0075;
        ch[0xA5] = 0x0076;
        ch[0xA6] = 0x0077;
        ch[0xA7] = 0x0078;
        ch[0xA8] = 0x0079;
        ch[0xA9] = 0x007A;
        ch[0xAA] = 0x00A1;
        ch[0xAB] = 0x00BF;
        ch[0xAC] = 0x00D0;
        ch[0xAD] = 0x00DD;
        ch[0xAE] = 0x00DE;
        ch[0xAF] = 0x00AE;
        ch[0xB0] = 0x005E;
        ch[0xB1] = 0x00A3;
        ch[0xB2] = 0x00A5;
        ch[0xB3] = 0x00B7;
        ch[0xB4] = 0x00A9;
        ch[0xB5] = 0x00A7;
        ch[0xB6] = 0x00B6;
        ch[0xB7] = 0x00BC;
        ch[0xB8] = 0x00BD;
        ch[0xB9] = 0x00BE;
        ch[0xBA] = 0x005B;
        ch[0xBB] = 0x005D;
        ch[0xBC] = 0x00AF;
        ch[0xBD] = 0x00A8;
        ch[0xBE] = 0x00B4;
        ch[0xBF] = 0x00D7;
        ch[0xC0] = 0x007B;
        ch[0xC1] = 0x0041;
        ch[0xC2] = 0x0042;
        ch[0xC3] = 0x0043;
        ch[0xC4] = 0x0044;
        ch[0xC5] = 0x0045;
        ch[0xC6] = 0x0046;
        ch[0xC7] = 0x0047;
        ch[0xC8] = 0x0048;
        ch[0xC9] = 0x0049;
        ch[0xCA] = 0x00AD;
        ch[0xCB] = 0x00F4;
        ch[0xCC] = 0x00F6;
        ch[0xCD] = 0x00F2;
        ch[0xCE] = 0x00F3;
        ch[0xCF] = 0x00F5;
        ch[0xD0] = 0x007D;
        ch[0xD1] = 0x004A;
        ch[0xD2] = 0x004B;
        ch[0xD3] = 0x004C;
        ch[0xD4] = 0x004D;
        ch[0xD5] = 0x004E;
        ch[0xD6] = 0x004F;
        ch[0xD7] = 0x0050;
        ch[0xD8] = 0x0051;
        ch[0xD9] = 0x0052;
        ch[0xDA] = 0x00B9;
        ch[0xDB] = 0x00FB;
        ch[0xDC] = 0x00FC;
        ch[0xDD] = 0x00F9;
        ch[0xDE] = 0x00FA;
        ch[0xDF] = 0x00FF;
        ch[0xE0] = 0x005C;
        ch[0xE1] = 0x00F7;
        ch[0xE2] = 0x0053;
        ch[0xE3] = 0x0054;
        ch[0xE4] = 0x0055;
        ch[0xE5] = 0x0056;
        ch[0xE6] = 0x0057;
        ch[0xE7] = 0x0058;
        ch[0xE8] = 0x0059;
        ch[0xE9] = 0x005A;
        ch[0xEA] = 0x00B2;
        ch[0xEB] = 0x00D4;
        ch[0xEC] = 0x00D6;
        ch[0xED] = 0x00D2;
        ch[0xEE] = 0x00D3;
        ch[0xEF] = 0x00D5;
        ch[0xF0] = 0x0030;
        ch[0xF1] = 0x0031;
        ch[0xF2] = 0x0032;
        ch[0xF3] = 0x0033;
        ch[0xF4] = 0x0034;
        ch[0xF5] = 0x0035;
        ch[0xF6] = 0x0036;
        ch[0xF7] = 0x0037;
        ch[0xF8] = 0x0038;
        ch[0xF9] = 0x0039;
        ch[0xFA] = 0x00B3;
        ch[0xFB] = 0x00DB;
        ch[0xFC] = 0x00DC;
        ch[0xFD] = 0x00D9;
        ch[0xFE] = 0x00DA;
        ch[0xFF] = 0x009F;

       /* And then massage it so we can catch problems with control
        * chars same way as we'd do with, say, Latin1 input.
        */
        for (int i = 0, len = ch.length; i < len; ++i) {
            int c = ch[i];
            if (c >= 0x7F && c <= 0x9F) {
                /* 21-Sep-2007, TSa: Hmmh. In a way, it should be dealt
                 *   as per xml rules, and not allowed in xml declaration
                 *   before encoding declaration. But that won't work well
                 *   with real docs. Converting it earlier works better...
                 *   so for now that seems the way it needs to be done. ;-/
                 */
                if (c == 0x85) {
                    ch[i] = BaseReader.CONVERT_NEL_TO;
                } else {
                    ch[i] = -c;
                }
            }
        }

        sCodePage037 = ch;
    }

    private EBCDICCodec() { }

    /**
     * @return Code table for EBCDIC code page 037 (US, Canada etc)
     */
    public static int[] getCp037Mapping()
    {
        return sCodePage037;
    }
}





© 2015 - 2025 Weber Informatics LLC | Privacy Policy