com.ctc.wstx.io.EBCDICCodec Maven / Gradle / Ivy
/* Woodstox XML processor
*
* Copyright (c) 2004 Tatu Saloranta, [email protected]
*
* Licensed under the License specified in file LICENSE, included with
* the source code.
* You may not use this file except in compliance with the License.
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.ctc.wstx.io;
/**
* This is a container class for EBCDIC code page(s) that we need
* to properly bootstrap EBCDIC encoded xml documents.
*/
public final class EBCDICCodec
{
final static int[] sCodePage037;
static {
/* First, let's fill these in as per:
*
* http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/EBCDIC/CP037.TXT
*/
int[] ch = new int[256];
ch[0] = 0x0000;
ch[0x01] = 0x0001;
ch[0x02] = 0x0002;
ch[0x03] = 0x0003;
ch[0x04] = 0x009C;
ch[0x05] = 0x0009;
ch[0x06] = 0x0086;
ch[0x07] = 0x007F;
ch[0x08] = 0x0097;
ch[0x09] = 0x008D;
ch[0x0A] = 0x008E;
ch[0x0B] = 0x000B;
ch[0x0C] = 0x000C;
ch[0x0D] = 0x000D;
ch[0x0E] = 0x000E;
ch[0x0F] = 0x000F;
ch[0x10] = 0x0010;
ch[0x11] = 0x0011;
ch[0x12] = 0x0012;
ch[0x13] = 0x0013;
ch[0x14] = 0x009D;
ch[0x15] = 0x0085;
ch[0x16] = 0x0008;
ch[0x17] = 0x0087;
ch[0x18] = 0x0018;
ch[0x19] = 0x0019;
ch[0x1A] = 0x0092;
ch[0x1B] = 0x008F;
ch[0x1C] = 0x001C;
ch[0x1D] = 0x001D;
ch[0x1E] = 0x001E;
ch[0x1F] = 0x001F;
ch[0x20] = 0x0080;
ch[0x21] = 0x0081;
ch[0x22] = 0x0082;
ch[0x23] = 0x0083;
ch[0x24] = 0x0084;
ch[0x25] = 0x000A;
ch[0x26] = 0x0017;
ch[0x27] = 0x001B;
ch[0x28] = 0x0088;
ch[0x29] = 0x0089;
ch[0x2A] = 0x008A;
ch[0x2B] = 0x008B;
ch[0x2C] = 0x008C;
ch[0x2D] = 0x0005;
ch[0x2E] = 0x0006;
ch[0x2F] = 0x0007;
ch[0x30] = 0x0090;
ch[0x31] = 0x0091;
ch[0x32] = 0x0016;
ch[0x33] = 0x0093;
ch[0x34] = 0x0094;
ch[0x35] = 0x0095;
ch[0x36] = 0x0096;
ch[0x37] = 0x0004;
ch[0x38] = 0x0098;
ch[0x39] = 0x0099;
ch[0x3A] = 0x009A;
ch[0x3B] = 0x009B;
ch[0x3C] = 0x0014;
ch[0x3D] = 0x0015;
ch[0x3E] = 0x009E;
ch[0x3F] = 0x001A;
ch[0x40] = 0x0020;
ch[0x41] = 0x00A0;
ch[0x42] = 0x00E2;
ch[0x43] = 0x00E4;
ch[0x44] = 0x00E0;
ch[0x45] = 0x00E1;
ch[0x46] = 0x00E3;
ch[0x47] = 0x00E5;
ch[0x48] = 0x00E7;
ch[0x49] = 0x00F1;
ch[0x4A] = 0x00A2;
ch[0x4B] = 0x002E;
ch[0x4C] = 0x003C;
ch[0x4D] = 0x0028;
ch[0x4E] = 0x002B;
ch[0x4F] = 0x007C;
ch[0x50] = 0x0026;
ch[0x51] = 0x00E9;
ch[0x52] = 0x00EA;
ch[0x53] = 0x00EB;
ch[0x54] = 0x00E8;
ch[0x55] = 0x00ED;
ch[0x56] = 0x00EE;
ch[0x57] = 0x00EF;
ch[0x58] = 0x00EC;
ch[0x59] = 0x00DF;
ch[0x5A] = 0x0021;
ch[0x5B] = 0x0024;
ch[0x5C] = 0x002A;
ch[0x5D] = 0x0029;
ch[0x5E] = 0x003B;
ch[0x5F] = 0x00AC;
ch[0x60] = 0x002D;
ch[0x61] = 0x002F;
ch[0x62] = 0x00C2;
ch[0x63] = 0x00C4;
ch[0x64] = 0x00C0;
ch[0x65] = 0x00C1;
ch[0x66] = 0x00C3;
ch[0x67] = 0x00C5;
ch[0x68] = 0x00C7;
ch[0x69] = 0x00D1;
ch[0x6A] = 0x00A6;
ch[0x6B] = 0x002C;
ch[0x6C] = 0x0025;
ch[0x6D] = 0x005F;
ch[0x6E] = 0x003E;
ch[0x6F] = 0x003F;
ch[0x70] = 0x00F8;
ch[0x71] = 0x00C9;
ch[0x72] = 0x00CA;
ch[0x73] = 0x00CB;
ch[0x74] = 0x00C8;
ch[0x75] = 0x00CD;
ch[0x76] = 0x00CE;
ch[0x77] = 0x00CF;
ch[0x78] = 0x00CC;
ch[0x79] = 0x0060;
ch[0x7A] = 0x003A;
ch[0x7B] = 0x0023;
ch[0x7C] = 0x0040;
ch[0x7D] = 0x0027;
ch[0x7E] = 0x003D;
ch[0x7F] = 0x0022;
ch[0x80] = 0x00D8;
ch[0x81] = 0x0061;
ch[0x82] = 0x0062;
ch[0x83] = 0x0063;
ch[0x84] = 0x0064;
ch[0x85] = 0x0065;
ch[0x86] = 0x0066;
ch[0x87] = 0x0067;
ch[0x88] = 0x0068;
ch[0x89] = 0x0069;
ch[0x8A] = 0x00AB;
ch[0x8B] = 0x00BB;
ch[0x8C] = 0x00F0;
ch[0x8D] = 0x00FD;
ch[0x8E] = 0x00FE;
ch[0x8F] = 0x00B1;
ch[0x90] = 0x00B0;
ch[0x91] = 0x006A;
ch[0x92] = 0x006B;
ch[0x93] = 0x006C;
ch[0x94] = 0x006D;
ch[0x95] = 0x006E;
ch[0x96] = 0x006F;
ch[0x97] = 0x0070;
ch[0x98] = 0x0071;
ch[0x99] = 0x0072;
ch[0x9A] = 0x00AA;
ch[0x9B] = 0x00BA;
ch[0x9C] = 0x00E6;
ch[0x9D] = 0x00B8;
ch[0x9E] = 0x00C6;
ch[0x9F] = 0x00A4;
ch[0xA0] = 0x00B5;
ch[0xA1] = 0x007E;
ch[0xA2] = 0x0073;
ch[0xA3] = 0x0074;
ch[0xA4] = 0x0075;
ch[0xA5] = 0x0076;
ch[0xA6] = 0x0077;
ch[0xA7] = 0x0078;
ch[0xA8] = 0x0079;
ch[0xA9] = 0x007A;
ch[0xAA] = 0x00A1;
ch[0xAB] = 0x00BF;
ch[0xAC] = 0x00D0;
ch[0xAD] = 0x00DD;
ch[0xAE] = 0x00DE;
ch[0xAF] = 0x00AE;
ch[0xB0] = 0x005E;
ch[0xB1] = 0x00A3;
ch[0xB2] = 0x00A5;
ch[0xB3] = 0x00B7;
ch[0xB4] = 0x00A9;
ch[0xB5] = 0x00A7;
ch[0xB6] = 0x00B6;
ch[0xB7] = 0x00BC;
ch[0xB8] = 0x00BD;
ch[0xB9] = 0x00BE;
ch[0xBA] = 0x005B;
ch[0xBB] = 0x005D;
ch[0xBC] = 0x00AF;
ch[0xBD] = 0x00A8;
ch[0xBE] = 0x00B4;
ch[0xBF] = 0x00D7;
ch[0xC0] = 0x007B;
ch[0xC1] = 0x0041;
ch[0xC2] = 0x0042;
ch[0xC3] = 0x0043;
ch[0xC4] = 0x0044;
ch[0xC5] = 0x0045;
ch[0xC6] = 0x0046;
ch[0xC7] = 0x0047;
ch[0xC8] = 0x0048;
ch[0xC9] = 0x0049;
ch[0xCA] = 0x00AD;
ch[0xCB] = 0x00F4;
ch[0xCC] = 0x00F6;
ch[0xCD] = 0x00F2;
ch[0xCE] = 0x00F3;
ch[0xCF] = 0x00F5;
ch[0xD0] = 0x007D;
ch[0xD1] = 0x004A;
ch[0xD2] = 0x004B;
ch[0xD3] = 0x004C;
ch[0xD4] = 0x004D;
ch[0xD5] = 0x004E;
ch[0xD6] = 0x004F;
ch[0xD7] = 0x0050;
ch[0xD8] = 0x0051;
ch[0xD9] = 0x0052;
ch[0xDA] = 0x00B9;
ch[0xDB] = 0x00FB;
ch[0xDC] = 0x00FC;
ch[0xDD] = 0x00F9;
ch[0xDE] = 0x00FA;
ch[0xDF] = 0x00FF;
ch[0xE0] = 0x005C;
ch[0xE1] = 0x00F7;
ch[0xE2] = 0x0053;
ch[0xE3] = 0x0054;
ch[0xE4] = 0x0055;
ch[0xE5] = 0x0056;
ch[0xE6] = 0x0057;
ch[0xE7] = 0x0058;
ch[0xE8] = 0x0059;
ch[0xE9] = 0x005A;
ch[0xEA] = 0x00B2;
ch[0xEB] = 0x00D4;
ch[0xEC] = 0x00D6;
ch[0xED] = 0x00D2;
ch[0xEE] = 0x00D3;
ch[0xEF] = 0x00D5;
ch[0xF0] = 0x0030;
ch[0xF1] = 0x0031;
ch[0xF2] = 0x0032;
ch[0xF3] = 0x0033;
ch[0xF4] = 0x0034;
ch[0xF5] = 0x0035;
ch[0xF6] = 0x0036;
ch[0xF7] = 0x0037;
ch[0xF8] = 0x0038;
ch[0xF9] = 0x0039;
ch[0xFA] = 0x00B3;
ch[0xFB] = 0x00DB;
ch[0xFC] = 0x00DC;
ch[0xFD] = 0x00D9;
ch[0xFE] = 0x00DA;
ch[0xFF] = 0x009F;
/* And then massage it so we can catch problems with control
* chars same way as we'd do with, say, Latin1 input.
*/
for (int i = 0, len = ch.length; i < len; ++i) {
int c = ch[i];
if (c >= 0x7F && c <= 0x9F) {
/* 21-Sep-2007, TSa: Hmmh. In a way, it should be dealt
* as per xml rules, and not allowed in xml declaration
* before encoding declaration. But that won't work well
* with real docs. Converting it earlier works better...
* so for now that seems the way it needs to be done. ;-/
*/
if (c == 0x85) {
ch[i] = BaseReader.CONVERT_NEL_TO;
} else {
ch[i] = -c;
}
}
}
sCodePage037 = ch;
}
private EBCDICCodec() { }
/**
* @return Code table for EBCDIC code page 037 (US, Canada etc)
*/
public static int[] getCp037Mapping()
{
return sCodePage037;
}
}