com.ibm.as400.access.GenerateConverterTable Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of jt400 Show documentation
Show all versions of jt400 Show documentation
The Open Source version of the IBM Toolbox for Java
The newest version!
///////////////////////////////////////////////////////////////////////////////
//
// JTOpen (IBM Toolbox for Java - OSS version)
//
// Filename: GenerateConverterTable.java
//
// The source code contained herein is licensed under the IBM Public License
// Version 1.0, which has been approved by the Open Source Initiative.
// Copyright (C) 1997-2016 International Business Machines Corporation and
// others. All rights reserved.
//
///////////////////////////////////////////////////////////////////////////////
package com.ibm.as400.access;
import java.io.*;
import java.lang.reflect.Field;
import java.sql.Blob;
import java.sql.Clob;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.*;
public class GenerateConverterTable {
private static final String copyright = "Copyright (C) 1997-2016 International Business Machines Corporation and others.";
private static final int MAX_SURROGATE_LENGTH = 2000;
private static final int MAX_TO_EBCDIC_LENGTH = 20000;
static AS400 sys = null;
static Connection connection_ = null;
static boolean compress_ = true; // Compress the conversion table
// Note: turn this off for debugging purposes
static boolean codePointPerLine_ = false; // Should only 1 code point be
// printed per line
static boolean ascii_ = false; // Indicates if listed ccsids are ascii tables
// or not
static boolean bidi_ = false; // Indicates if listed ccsids are bidi tables or
// not
// Note: bidi_ and ascii_ cannot both be true
static boolean showOffsets_ = false; // Indicates of the offsets should be
// printed in the tables
static boolean useJdbc_ = false; // Use JDBC to retrieve the table information
public static void main(String[] args) {
if (args.length < 4) {
System.out.println(
"Usage: java com.ibm.as400.access.GenerateConverterTable system uid pwd [-nocompress] [-ascii] [-bidi] [-showOffsets] [-codePointPerLine] [-useJdbc] ccsid [ccsid2] [ccsid3] [ccsid4] ...");
System.exit(0);
}
try {
sys = new AS400(args[0], args[1], args[2]);
sys.connectService(AS400.CENTRAL);
} catch (Exception e) {
e.printStackTrace();
System.exit(0);
}
int start = 3;
if (args[start].equals("-nocompress")) {
compress_ = false;
++start;
}
if (args[start].equals("-ascii")) {
ascii_ = true;
++start;
}
if (args[start].equals("-bidi")) {
bidi_ = true;
++start;
}
if (args[start].equals("-showOffsets")) {
showOffsets_ = true;
++start;
}
if (args[start].equals("-codePointPerLine")) {
codePointPerLine_ = true;
++start;
}
if (args[start].equals("-useJdbc")) {
useJdbc_ = true;
try {
Class.forName("com.ibm.as400.access.AS400JDBCDriver");
connection_ = DriverManager.getConnection("jdbc:as400:" + args[0],
args[1], args[2]);
} catch (Exception e) {
e.printStackTrace();
System.exit(0);
}
++start;
}
for (int i = start; i < args.length; ++i) {
go((Integer.parseInt(args[i])));
}
}
static String formattedChar(char x) {
int num = 0xFFFF & (int) x;
String s = "\\u";
if (num < 16)
s += "0";
if (num < 256)
s += "0";
if (num < 4096)
s += "0";
s += Integer.toHexString(num).toUpperCase();
return s;
}
static void go(int ccsid) {
char[] tableToUnicode = new char[0];
char[] tableToUnicodeSpaces = new char[0];
char[] tableToEbcdic = new char[0];
char[][] surrogateTable = null;
char[][] tripletTable = null;
char[][] quadTable = null;
// int numTables1 = 1;
// int numTables2 = 1;
boolean ebcdicIsDBCS = false;
int doubleByteFormat = NLSTableDownload.DOUBLE_BYTE_FROM_CCSID;
int originalCcsid = ccsid;
try {
if (useJdbc_) {
if (ConvTable.isMixedCCSID(originalCcsid)) {
//
// If we have a mixed CCSID then we will download in two pieces
// We use the convention that a CCSID that starts with 10xxxxx is
// SINGLE BYTE part of mixed
// and a CCSID that starts with 20xxxxx is DOUBLE BYTE part of mixed
go(1000000 + ccsid);
go(2000000 + ccsid);
return;
} else {
Class.forName("com.ibm.as400.access.AS400JDBCDriver");
try {
if (ccsid > 2000000) {
ebcdicIsDBCS = true;
} else if (ccsid > 1000000) {
ebcdicIsDBCS = false;
} else {
ebcdicIsDBCS = jdbcIsDBCS(connection_, ccsid);
}
if (ebcdicIsDBCS) {
tableToUnicodeSpaces = jdbcToUnicodeSpacesDBCS(connection_,
ccsid);
tableToEbcdic = jdbcToEbcdicDBCS(connection_, ccsid);
} else {
tableToUnicode = jdbcToUnicode(connection_, ccsid);
tableToEbcdic = jdbcToEbcdic(connection_, ccsid);
}
} catch (Exception e) {
System.out.println("Error downloading table using JDBC ");
e.printStackTrace(System.out);
System.exit(1);
}
}
} else {
AS400ImplRemote impl = (AS400ImplRemote) sys.getImpl();
NLSTableDownload down = new NLSTableDownload(impl);
down.connect();
if (ccsid == 1089) // There are currently no tables for
// 1089->13488->1089;
// use 61952 instead, since it would be the same
// anyway.
{
System.out.println("Special case for ccsid 1089.");
System.out.println("Retrieving " + ccsid + "->61952 table...");
tableToUnicode = down.download(ccsid, 61952,
NLSTableDownload.SINGLE_BYTE_FROM_CCSID);
} else if (ccsid == 1376) {
// This is double byte so fall into bottom path
tableToUnicode = null;
} else if (ccsid == 1371) {
tableToUnicode = null;
doubleByteFormat = NLSTableDownload.MIXED_BYTE_FROM_CCSID;
} else {
System.out.println("Retrieving " + ccsid + "->13488 table...");
tableToUnicode = down.download(ccsid, 13488,
NLSTableDownload.SINGLE_BYTE_FROM_CCSID);
}
if (tableToUnicode == null || tableToUnicode.length == 0) {
String reason = "";
if (tableToUnicode == null) {
reason = "tableToUnicode is null";
} else {
reason = "tableToUnicode.length is 0";
}
if (ccsid == 1175) {
System.out.println("Aborting since CCSD 1175 failed to download");
throw new Exception("Aborting since CCSD 1175 failed to download");
}
System.out
.println(ccsid + " must be double-byte because download failed ("
+ reason + "). Performing secondary retrieve of " + ccsid
+ "->1200 table...");
ebcdicIsDBCS = true;
down.disconnect();
down.connect();
tableToUnicode = down.download(ccsid, 1200, doubleByteFormat);
}
down.disconnect();
down.connect();
if (ccsid == 1089) {
System.out.println("Special case for ccsid 1089.");
System.out.println("Retrieving 61952->" + ccsid + " table...");
tableToEbcdic = down.download(61952, ccsid,
NLSTableDownload.DOUBLE_BYTE_FROM_CCSID);
} else {
/* Use 1200 instead of 13488 */
System.out.println("Retrieving 1200->" + ccsid + " table...");
tableToEbcdic = down.download(1200, ccsid,
NLSTableDownload.DOUBLE_BYTE_FROM_CCSID);
}
}
System.out.println(" Size: " + tableToUnicode.length + " or "
+ tableToUnicodeSpaces.length);
if (tableToUnicode.length > 65536
|| tableToUnicodeSpaces.length > 131072) {
System.out.println("Size is > 65536 or 131072. Fixing table");
int next = 0;
int from = 0;
char[] newTable = new char[65536];
int lastFrom = 0;
int lastTo = 0;
boolean hasSpaces = false;
int tableLength = tableToUnicode.length;
if (tableLength > 0) { /* Old logic without spaces */
while (from < tableLength && next < 65536) {
int c = 0xFFFF & (int) tableToUnicode[from];
// If we didn't process a variation selector, ignore it.
while (c >= 0xFE00 && c <= 0xFE0F) {
from++;
c = 0xFFFF & (int) tableToUnicode[from];
}
if (next > 0xECAA && next <= 0xECD0) {
System.out.println("Next=0x" + Integer.toHexString(next) + " to="
+ Integer.toHexString(c));
}
int nextchar = 0;
if (from + 1 < tableToUnicode.length) {
nextchar = 0xFFFF & (int) tableToUnicode[from + 1];
}
if (
// in surrogate range
((c >= 0xD800) && (c <= 0xDFFF)) ||
// Uses Variation selector
((nextchar >= 0xFE00) && (nextchar <= 0xFE0F))
||
// Uses combining character
((nextchar == 0x309A) && (c != 0x3099))
|| /*
* In 835 there are two combining characters next to each
* other
*/
/* In that case, we do not combine */
(c != 0xFFfd && nextchar == 0x300)
|| (c != 0xffd && c != 0x300 && nextchar == 0x301) ||
// Weird cases..
(c == 0x2e5 && nextchar == 0x2e9)
|| (c == 0x2e9 && nextchar == 0x2e5)) {
// Mark as surrogate
newTable[next] = (char) 0xD800;
lastFrom = next;
lastTo = 0xD800;
// add to surrogate table
if (surrogateTable == null) {
surrogateTable = new char[65536][];
}
char[] pair = new char[2];
surrogateTable[next] = pair;
pair[0] = (char) (0xFFFF & (int) tableToUnicode[from]);
pair[1] = (char) (0xFFFF & (int) tableToUnicode[from + 1]);
/*
* System.out.println("Warning: Sub at offset "+Integer.
* toHexString(next )+" for "+Integer.toHexString(0xFFFF & (int)
* table1[from])+" "+Integer.toHexString(0xFFFF & (int)
* table1[from+1]));
*/
from += 2;
} else {
newTable[next] = (char) c;
if (c != 0xFFFD) {
lastFrom = next;
lastTo = c;
}
from++;
}
next++;
} /* while */
} else {
/* New logic with spaces */
tableLength = tableToUnicodeSpaces.length;
while (from < tableLength && next < 65536) {
int c0 = 0;
int c1 = 0;
int c2 = 0;
int c3 = 0;
int c4 = 0;
int characterCount = 0;
c0 = 0xFFFF & (int) tableToUnicodeSpaces[from];
// If we didn't process a variation selector, ignore it.
// In CCSID 1371 FE60 => FE00 and FE61 => FE01, etc
if (ccsid != 2001371) {
while (c0 >= 0xFE00 && c0 <= 0xFE0F) {
from++;
c0 = 0xFFFF & (int) tableToUnicodeSpaces[from];
}
}
c1 = 0xFFFF & (int) tableToUnicodeSpaces[from + 1];
if (c1 == 0x3000) {
characterCount = 1;
} else {
c2 = 0xFFFF & (int) tableToUnicodeSpaces[from + 2];
if (c2 == 0x3000) {
characterCount = 2;
} else {
c3 = 0xFFFF & (int) tableToUnicodeSpaces[from + 3];
if (c3 == 0x3000) {
characterCount = 3;
} else {
c4 = 0xFFFF & (int) tableToUnicodeSpaces[from + 4];
if (c4 == 0x3000) {
characterCount = 4;
} else {
throw new Exception(
"Character count is too large for from=0x"
+ Integer.toHexString(from));
}
}
}
}
if (characterCount == 1) {
newTable[next] = (char) c0;
from += 2;
} else if (characterCount == 2) {
newTable[next] = (char) 0xD800;
// add to surrogate table
if (surrogateTable == null) {
surrogateTable = new char[65536][];
}
char[] pair = new char[2];
surrogateTable[next] = pair;
pair[0] = (char) (0xFFFF & (int) c0);
pair[1] = (char) (0xFFFF & (int) c1);
from += 3;
} else if (characterCount == 3) { /* Character count must be 3 */
newTable[next] = (char) 0xD801;
// add to triple table
if (tripletTable == null) {
tripletTable = new char[65536][];
}
char[] triple = new char[3];
tripletTable[next] = triple;
triple[0] = (char) (0xFFFF & (int) c0);
triple[1] = (char) (0xFFFF & (int) c1);
triple[2] = (char) (0xFFFF & (int) c2);
from += 4;
} else if (characterCount == 4) { /* character count must be 4 */
newTable[next] = (char) 0xD802;
// add to triple table
if (quadTable == null) {
quadTable = new char[65536][];
}
char[] quad = new char[4];
quadTable[next] = quad;
quad[0] = (char) (0xFFFF & (int) c0);
quad[1] = (char) (0xFFFF & (int) c1);
quad[2] = (char) (0xFFFF & (int) c2);
quad[3] = (char) (0xFFFF & (int) c3);
from += 5;
} else {
throw new Exception(
"Character count is invalid " + characterCount);
}
next++;
} /* while */
/* Fill out the rest of the table */
while (next < 65536) {
newTable[next] = (char) 0xFFFD;
next++;
}
}
tableToUnicode = newTable;
} else if (tableToUnicode.length == 0) {
// Create a tableToUnicode from tableToUnicodeSpaces
int len = tableToUnicodeSpaces.length / 2;
tableToUnicode = new char[len];
for (int i = 0; i < len; i++) {
tableToUnicode[i] = tableToUnicodeSpaces[i * 2];
}
}
System.out.println(" Size: " + tableToEbcdic.length);
// If a mixed CCSID, then we need to fix the tableToEbcdic
// Convert the table to a byte array
// Go through table and process SI/I)
if (doubleByteFormat == NLSTableDownload.MIXED_BYTE_FROM_CCSID) {
char[] newTableToEbcdic = new char[65536];
byte[] oldTableToEbcdic = new byte[tableToEbcdic.length * 2];
for (int i = 0; i < tableToEbcdic.length; i++) {
oldTableToEbcdic[2 * i] = (byte) (0xFF & (tableToEbcdic[i] >> 8));
oldTableToEbcdic[2 * i + 1] = (byte) (tableToEbcdic[i] & 0xFF);
}
boolean singleByte = true;
int newTableOffset = 0;
for (int i = 0; i < oldTableToEbcdic.length; i++) {
int b = 0xFF & oldTableToEbcdic[i];
while ((i > 0x0F) && (i < oldTableToEbcdic.length)
&& ((b == 0x0E) || (b == 0x0F))) {
if (b == 0x0E) {
singleByte = false;
i++;
} else {
singleByte = true;
i++;
}
if (i < oldTableToEbcdic.length) {
b = 0xFF & oldTableToEbcdic[i];
}
}
if (i < oldTableToEbcdic.length) {
if (singleByte) {
if (newTableOffset < newTableToEbcdic.length) {
newTableToEbcdic[newTableOffset] = (char) b;
newTableOffset++;
}
} else {
i++;
if (i < oldTableToEbcdic.length) {
int c = 0xFF & oldTableToEbcdic[i];
if (newTableOffset < newTableToEbcdic.length) {
if (newTableOffset == 0x431) {
newTableToEbcdic[newTableOffset] = (char) ((b << 8) + c);
newTableOffset++;
} else {
newTableToEbcdic[newTableOffset] = (char) ((b << 8) + c);
newTableOffset++;
}
}
}
}
}
} /* for i */
while (newTableOffset < newTableToEbcdic.length) {
newTableToEbcdic[newTableOffset] = (char) 0xFEFE;
newTableOffset++;
}
tableToEbcdic = newTableToEbcdic;
} /* If Mixed byte */
sys.disconnectAllServices();
} catch (Exception e) {
e.printStackTrace(System.out);
}
// Do any necessary fixup
if (ccsid == 290) {
tableToUnicode[0xE1] = '\u20ac';
char toEbcdic = tableToEbcdic[0x20ac / 2];
toEbcdic = (char) ((0xE1 << 8) | (toEbcdic & 0xFF));
tableToEbcdic[0x20ac / 2] = toEbcdic;
}
// Verify the mapping
verifyRoundTrip(tableToUnicode, tableToEbcdic, ebcdicIsDBCS, ccsid,
surrogateTable, tripletTable, quadTable);
System.out.println("****************************************");
System.out.println("Verify round 2 ");
System.out.println("****************************************");
verifyRoundTrip(tableToUnicode, tableToEbcdic, ebcdicIsDBCS, ccsid,
surrogateTable, tripletTable, quadTable);
// Compress the ccsid table
if (ebcdicIsDBCS) {
if (compress_) {
System.out
.println("Compressing " + ccsid + "->13488 conversion table...");
char[] arr = compress(tableToUnicode);
System.out
.println("Old compression length: " + arr.length + " characters.");
char[] temparr = compressBetter(tableToUnicode);
System.out.println(
"New compression length: " + temparr.length + " characters.");
if (temparr.length > arr.length) {
System.out
.println("WARNING: New algorithm WORSE than old algorithm!");
}
System.out.println("Verifying compressed table...");
arr = decompressBetter(temparr);
if (arr.length != tableToUnicode.length) {
System.out.println("Verification failed, lengths not equal: "
+ arr.length + " != " + tableToUnicode.length);
int c = 0;
while (c < arr.length && arr[c] == tableToUnicode[c])
++c;
System.out.println("First mismatch at index " + c + ": "
+ (int) arr[c] + " != " + (int) tableToUnicode[c]);
} else {
boolean bad = false;
for (int c = 0; c < arr.length; ++c) {
if (arr[c] != tableToUnicode[c]) {
bad = true;
System.out.println(c + ": " + Integer.toHexString((int) arr[c])
+ " != " + Integer.toHexString((int) tableToUnicode[c]));
}
}
if (bad) {
System.out.println("Mismatches found in table.");
} else {
tableToUnicode = temparr;
System.out.println("Table verified.");
}
}
}
}
int fileCcsid = ccsid;
if (doubleByteFormat == NLSTableDownload.MIXED_BYTE_FROM_CCSID) {
fileCcsid = ccsid + 1100000;
System.out
.println("Create file using " + fileCcsid + " since MIXED CCSID ");
}
// Write out the ccsid table
StringBuffer surrogateInitStringBuffer = new StringBuffer();
try {
String fName = "ConvTable" + fileCcsid + ".java";
FileWriter f = new FileWriter(fName);
writeHeader(f, fileCcsid, sys.getSystemName());
if (ascii_) {
f.write(
"class ConvTable" + fileCcsid + " extends ConvTableAsciiMap\n{\n");
} else if (bidi_) {
f.write(
"class ConvTable" + fileCcsid + " extends ConvTableBidiMap\n{\n");
} else if (ebcdicIsDBCS) {
f.write(
"class ConvTable" + fileCcsid + " extends ConvTableDoubleMap\n{\n");
} else {
f.write(
"class ConvTable" + fileCcsid + " extends ConvTableSingleMap\n{\n");
}
f.write(" private static char[] toUnicodeArray_; \n");
f.write(
" private static final String copyright = \"Copyright (C) 1997-2016 International Business Machines Corporation and others.\";\n");
f.write(" // toUnicode_ length is " + tableToUnicode.length + "\n");
f.write(" private static final String toUnicode_ = \n");
System.out.print("Writing table for conversion from " + ccsid
+ " to 13488... to " + fName + "\n");
writeTable(f, tableToUnicode, 0, tableToUnicode.length);
f.write("\n");
f.write("\n");
// Write out the surrogateTable if it exists
int surrogateLength = 0;
if (surrogateTable != null) {
f.write("\n");
for (int i = 0; i < surrogateTable.length; i++) {
char[] pair = surrogateTable[i];
if (pair != null) {
surrogateLength++;
}
} /* for i */
int surrogateCount = 0;
char[][] compressedSurrogateTable = new char[surrogateLength][];
for (int i = 0; i < surrogateTable.length; i++) {
char[] pair = surrogateTable[i];
if (pair != null) {
char[] triplet = new char[3];
triplet[0] = (char) i;
triplet[1] = pair[0];
triplet[2] = pair[1];
compressedSurrogateTable[surrogateCount] = triplet;
surrogateCount++;
}
} /* for i */
f.write(
" // Number of surrogateMappings is " + surrogateLength + "\n");
if (surrogateLength < MAX_SURROGATE_LENGTH) {
f.write(
" private static final char[][] toUnicodeSurrogateMappings = { \n");
System.out.print("Writing surrogate table for conversion from "
+ ccsid + " to 1200... to " + fName + "\n");
for (int i = 0; i < compressedSurrogateTable.length; i++) {
char[] triplet = compressedSurrogateTable[i];
if (triplet != null) {
f.write("{'" + formattedChar((char) triplet[0]) + "','"
+ formattedChar(triplet[1]) + "','"
+ formattedChar(triplet[2]) + "'},\n");
}
} /* for i */
f.write("};\n");
f.write("\n");
f.write("\n");
} else {
// We must break into pieces
f.write(
" private static char[][] toUnicodeSurrogateMappings = new char["
+ surrogateLength + "][];\n");
int startIndex = 0;
while (startIndex < surrogateLength) {
f.write(" private static void initToUnicodeSurrogateMappings"
+ startIndex + "() { \n");
f.write(" char[][] toUnicodeSurrogateMappingsPiece = {\n");
for (int i = 0; i < MAX_SURROGATE_LENGTH
&& (i + startIndex < surrogateLength); i++) {
char[] triplet = compressedSurrogateTable[startIndex + i];
if (triplet != null) {
f.write(" {'" + formattedChar((char) (triplet[0])) + "','"
+ formattedChar(triplet[1]) + "','"
+ formattedChar(triplet[2]) + "'},\n");
}
} /* for i */
f.write(" };\n");
f.write(
" for (int j = 0; j < toUnicodeSurrogateMappingsPiece.length ; j++) {\n");
f.write(" toUnicodeSurrogateMappings[" + startIndex
+ "+j]= new char[3];\n");
f.write(" toUnicodeSurrogateMappings[" + startIndex
+ "+j][0] = toUnicodeSurrogateMappingsPiece[j][0];\n");
f.write(" toUnicodeSurrogateMappings[" + startIndex
+ "+j][1] = toUnicodeSurrogateMappingsPiece[j][1];\n");
f.write(" toUnicodeSurrogateMappings[" + startIndex
+ "+j][2] = toUnicodeSurrogateMappingsPiece[j][2];\n");
f.write(" }\n");
f.write(" }\n");
f.write("\n");
surrogateInitStringBuffer.append(
" initToUnicodeSurrogateMappings" + startIndex + "();\n");
startIndex += MAX_SURROGATE_LENGTH;
}
}
} /* if surrogate table */
// Write out the tripleTable if it exists
int tripletLength = 0;
if (tripletTable != null) {
f.write("\n");
for (int i = 0; i < tripletTable.length; i++) {
char[] pair = tripletTable[i];
if (pair != null) {
tripletLength++;
}
} /* for i */
int tripletCount = 0;
char[][] compressedTripletTable = new char[tripletLength][];
for (int i = 0; i < tripletTable.length; i++) {
char[] triplet = tripletTable[i];
if (triplet != null) {
char[] entry = new char[4];
entry[0] = (char) i;
entry[1] = triplet[0];
entry[2] = triplet[1];
entry[3] = triplet[2];
compressedTripletTable[tripletCount] = entry;
tripletCount++;
}
} /* for i */
f.write(" // Number of tripletMappings is " + tripletLength + "\n");
if (tripletLength < MAX_SURROGATE_LENGTH) {
f.write(
" private static final char[][] toUnicodeTripletMappings = { \n");
System.out.print("Writing triplet table for conversion from " + ccsid
+ " to 13488... to " + fName + "\n");
for (int i = 0; i < compressedTripletTable.length; i++) {
char[] entry = compressedTripletTable[i];
if (entry != null) {
f.write("{'" + formattedChar((char) entry[0]) + "','"
+ formattedChar(entry[1]) + "','" + formattedChar(entry[2])
+ "','" + formattedChar(entry[3]) + "'},\n");
}
} /* for i */
f.write("};\n");
f.write("\n");
f.write("\n");
} else {
// We must break into pieces
f.write(
" private static char[][] toUnicodeTripletMappings = new char["
+ tripletLength + "][];\n");
int startIndex = 0;
while (startIndex < tripletLength) {
f.write(" private static void initToUnicodeTripletMappings"
+ startIndex + "() { \n");
f.write(" char[][] toUnicodeTripletMappingsPiece = {\n");
for (int i = 0; i < MAX_SURROGATE_LENGTH
&& (i + startIndex < tripletLength); i++) {
char[] entry = compressedTripletTable[startIndex + i];
if (entry != null) {
f.write(" {'" + formattedChar((char) (entry[0])) + "','"
+ formattedChar(entry[1]) + "','" + formattedChar(entry[2])
+ "','" + formattedChar(entry[3]) + "'},\n");
}
} /* for i */
f.write(" };\n");
f.write(
" for (int j = 0; j < toUnicodeTripletMappingsPiece.length ; j++) {\n");
f.write(" toUnicodeTripletMappings[" + startIndex
+ "+j]= new char[3];\n");
f.write(" toUnicodeTripletMappings[" + startIndex
+ "+j][0] = toUnicodeTripletMappingsPiece[j][0];\n");
f.write(" toUnicodeTripletMappings[" + startIndex
+ "+j][1] = toUnicodeTripletMappingsPiece[j][1];\n");
f.write(" toUnicodeTripletMappings[" + startIndex
+ "+j][2] = toUnicodeTripletMappingsPiece[j][2];\n");
f.write(" }\n");
f.write(" }\n");
f.write("\n");
surrogateInitStringBuffer.append(
" initToUnicodeTripletMappings" + startIndex + "();\n");
startIndex += MAX_SURROGATE_LENGTH;
}
}
} /* if triplet table */
// Write out the quadTable if it exists
int quadLength = 0;
if (quadTable != null) {
f.write("\n");
for (int i = 0; i < quadTable.length; i++) {
char[] pair = quadTable[i];
if (pair != null) {
quadLength++;
}
} /* for i */
int quadCount = 0;
char[][] compressedquadTable = new char[quadLength][];
for (int i = 0; i < quadTable.length; i++) {
char[] quad = quadTable[i];
if (quad != null) {
char[] entry = new char[5];
entry[0] = (char) i;
entry[1] = quad[0];
entry[2] = quad[1];
entry[3] = quad[2];
entry[4] = quad[3];
compressedquadTable[quadCount] = entry;
quadCount++;
}
} /* for i */
f.write(" // Number of quadMappings is " + quadLength + "\n");
if (quadLength < MAX_SURROGATE_LENGTH) {
f.write(
" private static final char[][] toUnicodeQuadMappings = { \n");
System.out.print("Writing quad table for conversion from " + ccsid
+ " to 1200... to " + fName + "\n");
for (int i = 0; i < compressedquadTable.length; i++) {
char[] entry = compressedquadTable[i];
if (entry != null) {
f.write("{'" + formattedChar((char) entry[0]) + "','"
+ formattedChar(entry[1]) + "','" + formattedChar(entry[2])
+ "','" + formattedChar(entry[3]) + "','"
+ formattedChar(entry[4]) + "'},\n");
}
} /* for i */
f.write("};\n");
f.write("\n");
f.write("\n");
} else {
// We must break into pieces
f.write(" private static char[][] toUnicodeQuadMappings = new char["
+ quadLength + "][];\n");
int startIndex = 0;
while (startIndex < quadLength) {
f.write(" private static void initToUnicodeQuadMappings"
+ startIndex + "() { \n");
f.write(" char[][] toUnicodeQuadMappingsPiece = {\n");
for (int i = 0; i < MAX_SURROGATE_LENGTH
&& (i + startIndex < quadLength); i++) {
char[] entry = compressedquadTable[startIndex + i];
if (entry != null) {
f.write(" {'" + formattedChar((char) (entry[0])) + "','"
+ formattedChar(entry[1]) + "','" + formattedChar(entry[2])
+ "','" + formattedChar(entry[3]) + "','"
+ formattedChar(entry[4]) + "'},\n");
}
} /* for i */
f.write(" };\n");
f.write(
" for (int j = 0; j < toUnicodeQuadMappingsPiece.length ; j++) {\n");
f.write(" toUnicodeQuadMappings[" + startIndex
+ "+j]= new char[3];\n");
f.write(" toUnicodeQuadMappings[" + startIndex
+ "+j][0] = toUnicodeQuadMappingsPiece[j][0];\n");
f.write(" toUnicodeQuadMappings[" + startIndex
+ "+j][1] = toUnicodeQuadMappingsPiece[j][1];\n");
f.write(" toUnicodeQuadMappings[" + startIndex
+ "+j][2] = toUnicodeQuadMappingsPiece[j][2];\n");
f.write(" toUnicodeQuadMappings[" + startIndex
+ "+j][3] = toUnicodeQuadMappingsPiece[j][3];\n");
f.write(" }\n");
f.write(" }\n");
f.write("\n");
surrogateInitStringBuffer
.append(" initToUnicodeQuadMappings" + startIndex + "();\n");
startIndex += MAX_SURROGATE_LENGTH;
}
}
} /* if quad table */
f.close();
} catch (Exception e) {
e.printStackTrace();
}
// Compress the Unicode table
if (compress_) {
System.out
.println("Compressing 13488->" + ccsid + " conversion table...");
char[] arr = compress(tableToEbcdic);
System.out
.println("Old compression length: " + arr.length + " characters.");
char[] temparr = compressBetter(tableToEbcdic);
System.out.println(
"New compression length: " + temparr.length + " characters.");
if (temparr.length > arr.length) {
System.out.println("WARNING: New algorithm WORSE than old algorithm!");
}
System.out.println("Verifying compressed table...");
arr = decompressBetter(temparr);
if (arr.length != tableToEbcdic.length) {
System.out.println("Verification failed, lengths not equal: "
+ arr.length + " != " + tableToEbcdic.length);
int c = 0;
while (c < arr.length && arr[c] == tableToEbcdic[c])
++c;
System.out.println("First mismatch at index " + c + ": " + (int) arr[c]
+ " != " + (int) tableToEbcdic[c]);
tableToEbcdic = temparr;
} else {
boolean bad = false;
for (int c = 0; c < arr.length; ++c) {
if (arr[c] != tableToEbcdic[c]) {
bad = true;
System.out.println(c + ": " + Integer.toHexString((int) arr[c])
+ " != " + Integer.toHexString((int) tableToEbcdic[c]));
}
}
if (bad) {
System.out.println("Mismatches found in table.");
} else {
tableToEbcdic = temparr;
System.out.println("Table verified.");
}
}
}
// Write out the Unicode table
try {
String fName = "ConvTable" + fileCcsid + ".java";
FileWriter f = new FileWriter(fName, true);
System.out.print("Writing table for conversion from 13488 to " + ccsid
+ "... to " + fName + "\n");
f.write(" private static char[] fromUnicodeArray_; \n");
f.write(" // fromUnicode length = " + tableToEbcdic.length + "\n");
if (tableToEbcdic.length < MAX_TO_EBCDIC_LENGTH) {
f.write(" private static final String fromUnicode_ = \n");
writeTable(f, tableToEbcdic, 0, tableToEbcdic.length);
f.write("\n");
} else {
int oneFourth = tableToEbcdic.length / 4;
f.write(" private static final String fromUnicode0_ = \n");
writeTable(f, tableToEbcdic, 0, oneFourth);
f.write("\n");
f.write(" private static final String fromUnicode1_ = \n");
writeTable(f, tableToEbcdic, oneFourth, oneFourth);
f.write("\n");
f.write(" private static final String fromUnicode2_ = \n");
writeTable(f, tableToEbcdic, 2 * oneFourth, oneFourth);
f.write("\n");
f.write(" private static final String fromUnicode3_ = \n");
writeTable(f, tableToEbcdic, 3 * oneFourth,
tableToEbcdic.length - 3 * oneFourth);
f.write("\n");
}
f.write(" static {\n");
f.write(" toUnicodeArray_ = toUnicode_.toCharArray();\n");
if (tableToEbcdic.length < MAX_TO_EBCDIC_LENGTH) {
f.write(" fromUnicodeArray_ = fromUnicode_.toCharArray();\n");
} else {
/*
* Note: recent compilers try to optimized and add
* fromUnicode0_+fromUnicode1_ to the constant pool
*/
/* Using sb.append disables this optimization */
f.write(" StringBuffer sb = new StringBuffer(); \n");
f.write(" sb.append(fromUnicode0_); \n");
f.write(" sb.append(fromUnicode1_); \n");
f.write(" sb.append(fromUnicode2_); \n");
f.write(" sb.append(fromUnicode3_); \n");
f.write(" fromUnicodeArray_ = sb.toString().toCharArray();\n");
}
f.write(surrogateInitStringBuffer.toString());
f.write(" }\n");
f.write("\n ConvTable" + fileCcsid + "()\n {\n");
f.write(" super(" + fileCcsid + ", ");
f.write("toUnicodeArray_, ");
if (surrogateTable != null) {
f.write("fromUnicodeArray_,");
if (quadTable != null) {
f.write(
"toUnicodeSurrogateMappings,toUnicodeTripletMappings,toUnicodeQuadMappings);\n");
} else if (tripletTable != null) {
f.write("toUnicodeSurrogateMappings,toUnicodeTripletMappings);\n");
} else {
f.write("toUnicodeSurrogateMappings,null);\n");
}
} else {
f.write("fromUnicodeArray_);\n");
}
f.write(" }\n\n");
f.write("\n ConvTable" + fileCcsid + "(int ccsid)\n {\n");
f.write(" super(ccsid, ");
f.write("toUnicodeArray_, ");
if (surrogateTable != null) {
f.write("fromUnicodeArray_,");
if (quadTable != null) {
f.write(
"toUnicodeSurrogateMappings,toUnicodeTripletMappings,toUnicodeQuadMappings);\n");
} else if (tripletTable != null) {
f.write("toUnicodeSurrogateMappings,toUnicodeTripletMappings);\n");
} else {
f.write("toUnicodeSurrogateMappings,null);\n");
}
} else {
f.write("fromUnicodeArray_);\n");
}
f.write(" }\n}\n");
f.close();
} catch (Exception e) {
e.printStackTrace();
}
System.out.print("Done.\n");
}
private static void writeTable(FileWriter f, char[] table, int start,
int length) throws IOException {
for (int i = start; i < start + length; i = i + 16) {
if (showOffsets_) {
f.write("/* " + Integer.toHexString(i) + " */ \"");
} else {
f.write(" \"");
}
for (int j = 0; j < 16 && (i + j) < start + length; ++j) {
int num = (int) table[i + j]; // these each contain 2 single
// byte chars, but we write it
// like this to save space
if (num == 0x0008)
f.write("\\b");
else if (num == 0x0009)
f.write("\\t");
// else if(num == 0x000A) f.write("\\r");
else if (num == 0x000A)
f.write("\\n");
else if (num == 0x000C)
f.write("\\f");
// else if(num == 0x000D) f.write("\\n");
else if (num == 0x000D)
f.write("\\r");
else if (num == 0x0022)
f.write("\\\"");
else if (num == 0x0027)
f.write("\\'");
else if (num == 0x005C)
f.write("\\\\");
else {
String s = "\\u";
if (num < 16)
s += "0";
if (num < 256)
s += "0";
if (num < 4096)
s += "0";
s += Integer.toHexString(num).toUpperCase();
f.write(s);
}
if (codePointPerLine_) {
if (j < 15) {
if (showOffsets_) {
f.write("\" +\n/* " + Integer.toHexString(i + j + 1) + " */ \"");
} else {
f.write("\" +\n \"");
}
}
}
}
if (i + 16 < start + length)
f.write("\" +\n");
else
f.write("\";\n");
}
}
private static boolean verifyRoundTrip(char[] tableToUnicode,
char[] tableToEbcdic, boolean ebcdicIsDBCS, int ccsid,
char[][] surrogateTable, char[][] tripletTable, char[][] quadTable) {
String ebcdicPrefix = "X";
if (ebcdicIsDBCS) {
ebcdicPrefix = "GX";
}
// Make sure substitution is correct for Unicode to ebcdic
if (!ebcdicIsDBCS) {
int piece = 0xFFFF & tableToEbcdic[0x1a / 2];
if ((piece >> 8) != 0x3f) {
System.out.println("Fixing sub char in tableToEbcdic == sub was 0x"
+ Integer.toHexString(piece >> 8));
piece = (0x3f << 8) | (0xFF & piece);
tableToEbcdic[0x1a / 2] = (char) piece;
}
}
System.out.println("Checking round trip from EBCDIC for CCSID " + ccsid);
boolean passed = true;
StringBuffer sb1 = new StringBuffer();
StringBuffer sb2 = new StringBuffer();
StringBuffer sb3 = new StringBuffer();
char[] char1Buffer = new char[1];
char[] char1Buffer2 = new char[1];
char unicodeChars[];
for (int i = 0; i < tableToUnicode.length; i++) {
char1Buffer[0] = tableToUnicode[i];
if ((char1Buffer[0] == 0xd800) && (surrogateTable != null)) {
unicodeChars = surrogateTable[i];
} else if ((char1Buffer[0] == 0xd801) && (tripletTable != null)) {
unicodeChars = tripletTable[i];
} else if ((char1Buffer[0] == 0xd802) && (quadTable != null)) {
unicodeChars = quadTable[i];
} else {
unicodeChars = char1Buffer;
}
if (unicodeChars[0] != 0xfffd) {
int ebcdicChar = 0;
if (ebcdicIsDBCS) {
if (unicodeChars.length == 1) {
ebcdicChar = 0xFFFF & tableToEbcdic[unicodeChars[0]];
} else if (unicodeChars.length == 2) {
ebcdicChar = findEbcdicSurrogate(surrogateTable, unicodeChars);
} else if (unicodeChars.length == 3) {
ebcdicChar = findEbcdicTriplet(tripletTable, unicodeChars);
} else if (unicodeChars.length == 4) {
ebcdicChar = findEbcdicQuad(quadTable, unicodeChars);
}
} else {
int piece = 0xFFFF & tableToEbcdic[unicodeChars[0] / 2];
if (unicodeChars[0] % 2 == 0) {
ebcdicChar = piece >> 8;
} else {
ebcdicChar = piece & 0xFF;
}
}
if (i != ebcdicChar) {
if ((unicodeChars[0] != 0x1a) && (unicodeChars[0] != 0xd800)
&& (unicodeChars[0] != 0xd801)
&& ((ebcdicChar == 0xFEFE) || (ebcdicChar == 0x3F))) {
sb1.append("Fixing up EBCDIC RoundTrip Failure " + ebcdicPrefix
+ "'" + Integer.toHexString(i) + "'" + " -> UX'"
+ Integer.toHexString(unicodeChars[0]) + "'" + " -> "
+ ebcdicPrefix + "'" + Integer.toHexString(ebcdicChar) + "'\n");
if (ebcdicIsDBCS) {
tableToEbcdic[unicodeChars[0]] = (char) i;
} else {
int piece = 0xFFFF & tableToEbcdic[unicodeChars[0] / 2];
if (unicodeChars[0] % 2 == 0) {
piece = (i << 8) | (piece & 0x00FF);
} else {
piece = (piece & 0xFF00) | i;
}
tableToEbcdic[unicodeChars[0] / 2] = (char) piece;
}
passed = false;
} else {
char[] unicodeChars2;
try {
char1Buffer2[0] = tableToUnicode[ebcdicChar];
if (char1Buffer2[0] == 0xd800) {
unicodeChars2 = surrogateTable[ebcdicChar];
} else if (char1Buffer2[0] == 0xd801) {
unicodeChars2 = tripletTable[ebcdicChar];
} else {
unicodeChars2 = char1Buffer2;
}
} catch (ArrayIndexOutOfBoundsException e) {
System.out.println("ERROR.. ArrayIndexOutOfBounds");
System.out.println(
"ebcdicChar=0x" + Integer.toHexString((int) ebcdicChar));
System.out.println("i=" + i);
System.out.println("unicodeChar=0x"
+ Integer.toHexString((int) unicodeChars[0]));
throw e;
}
boolean matches = true;
if (unicodeChars2.length != unicodeChars.length) {
matches = false;
} else {
for (int j = 0; j < unicodeChars.length; j++) {
if (unicodeChars[j] != unicodeChars2[j]) {
matches = false;
}
}
}
if (matches) {
sb2.append("Secondary EBCDIC mapping " + ebcdicPrefix + "'"
+ Integer.toHexString(i) + "'" + " -> UX'");
for (int j = 0; j < unicodeChars.length; j++) {
sb2.append(Integer.toHexString(unicodeChars[j]) + ".");
}
sb2.append("'" + " -> " + ebcdicPrefix + "'"
+ Integer.toHexString(ebcdicChar) + "'" + " -> UX'");
for (int j = 0; j < unicodeChars2.length; j++) {
sb2.append(Integer.toHexString(unicodeChars2[j]) + ".");
}
sb2.append("'\n");
} else {
sb3.append("EBCDIC RoundTrip Failure 2 " + ebcdicPrefix + "'"
+ Integer.toHexString(i) + "'" + " -> UX'");
for (int j = 0; j < unicodeChars.length; j++) {
sb3.append(Integer.toHexString(unicodeChars[j]) + ".");
}
sb3.append("'" + " -> " + ebcdicPrefix + "'"
+ Integer.toHexString(ebcdicChar) + "'" + " -> UX'");
for (int j = 0; j < unicodeChars2.length; j++) {
sb3.append(Integer.toHexString(unicodeChars2[j]) + ".");
}
sb3.append("'\n");
passed = false;
}
}
}
}
}
System.out.println(sb2);
System.out.println(sb1);
System.out.println(sb3);
sb1.setLength(0);
sb2.setLength(0);
sb3.setLength(0);
for (int i = 0; i < tableToEbcdic.length; i++) {
int ebcdicChar;
if (ebcdicIsDBCS) {
ebcdicChar = 0xFFFF & tableToEbcdic[i];
} else {
int piece = 0xFFFF & tableToEbcdic[i / 2];
if (i % 2 == 0) {
ebcdicChar = piece >> 8;
} else {
ebcdicChar = piece & 0xFF;
}
}
if ((ebcdicChar != 0xfefe) && (ebcdicChar != 0x3f)) {
char1Buffer[0] = tableToUnicode[ebcdicChar];
if (char1Buffer[0] == 0xd800) {
unicodeChars = surrogateTable[ebcdicChar];
} else if (char1Buffer[0] == 0xd801) {
unicodeChars = tripletTable[ebcdicChar];
} else {
unicodeChars = char1Buffer;
}
if (i != unicodeChars[0]) {
if (unicodeChars[0] == 0xFFFD) {
sb1.append("Unicode RoundTrip Failure UX'" + Integer.toHexString(i)
+ "'" + " -> " + ebcdicPrefix + "'"
+ Integer.toHexString(ebcdicChar) + "'" + " -> UX'");
for (int j = 0; j < unicodeChars.length; j++) {
sb1.append(Integer.toHexString(unicodeChars[j]) + ".");
}
sb1.append("'\n");
if (tableToUnicode[ebcdicChar] == 0xfffd) {
tableToUnicode[ebcdicChar] = (char) i;
char1Buffer[0] = tableToUnicode[ebcdicChar];
unicodeChars = char1Buffer;
sb1.append("Fixed up ................ UX'"
+ Integer.toHexString(i) + "'" + " -> " + ebcdicPrefix + "'"
+ Integer.toHexString(ebcdicChar) + "'" + " -> UX'");
for (int j = 0; j < unicodeChars.length; j++) {
sb1.append(Integer.toHexString(unicodeChars[j]) + ".");
}
sb1.append("'\n");
}
passed = false;
} else {
int ebcdicChar2 = 0;
if (ebcdicIsDBCS) {
if (unicodeChars.length == 1) {
ebcdicChar2 = 0xFFFF & tableToEbcdic[unicodeChars[0]];
} else if (unicodeChars.length == 2) {
ebcdicChar2 = findEbcdicSurrogate(surrogateTable, unicodeChars);
} else if (unicodeChars.length == 3) {
ebcdicChar2 = findEbcdicTriplet(tripletTable, unicodeChars);
}
} else {
int piece = 0xFFFF & tableToEbcdic[unicodeChars[0] / 2];
if (unicodeChars[0] % 2 == 0) {
ebcdicChar2 = piece >> 8;
} else {
ebcdicChar2 = piece & 0xFF;
}
}
if (ebcdicChar2 == ebcdicChar) {
sb2.append("Secondary Unicode mapping UX'"
+ Integer.toHexString(i) + "'" + " -> " + ebcdicPrefix + "'"
+ Integer.toHexString(ebcdicChar) + "'" + " -> UX'");
for (int j = 0; j < unicodeChars.length; j++) {
sb2.append(Integer.toHexString(unicodeChars[j]) + ".");
}
sb2.append("'" + " -> " + ebcdicPrefix + "'"
+ Integer.toHexString(ebcdicChar2) + "'\n");
} else {
sb3.append("Unicode RoundTrip Failure 2 UX'"
+ Integer.toHexString(i) + "'" + " -> " + ebcdicPrefix + "'"
+ Integer.toHexString(ebcdicChar) + "'" + " -> UX'");
for (int j = 0; j < unicodeChars.length; j++) {
sb3.append(Integer.toHexString(unicodeChars[j]) + ".");
}
sb3.append("'" + " -> " + ebcdicPrefix + "'"
+ Integer.toHexString(ebcdicChar2) + "'\n");
passed = false;
}
}
}
}
}
System.out.println(sb2);
System.out.println(sb1);
System.out.println(sb3);
return passed;
}
private static int findEbcdicQuad(char[][] quadTable, char[] unicodeChars) {
for (int i = 0; i < quadTable.length; i++) {
if (quadTable[i] != null) {
if ((quadTable[i][0] == unicodeChars[0])
&& (quadTable[i][1] == unicodeChars[1])
&& (quadTable[i][2] == unicodeChars[2])
&& (quadTable[i][3] == unicodeChars[3])) {
return i;
}
}
}
return 0;
}
private static int findEbcdicTriplet(char[][] tripletTable,
char[] unicodeChars) {
for (int i = 0; i < tripletTable.length; i++) {
if (tripletTable[i] != null) {
if ((tripletTable[i][0] == unicodeChars[0])
&& (tripletTable[i][1] == unicodeChars[1])
&& (tripletTable[i][2] == unicodeChars[2])) {
return i;
}
}
}
return 0;
}
private static int findEbcdicSurrogate(char[][] surrogateTable,
char[] unicodeChars) {
for (int i = 0; i < surrogateTable.length; i++) {
if (surrogateTable[i] != null) {
if ((surrogateTable[i][0] == unicodeChars[0])
&& (surrogateTable[i][1] == unicodeChars[1])) {
return i;
}
}
}
return 0;
}
private static final char repSig = '\uFFFF'; // compression indication
// character
private static final char cic_ = repSig;
private static final char rampSig = '\uFFFE'; // ramp indication character
private static final char ric_ = rampSig;
private static final char hbSig = '\u0000'; // high-byte compression
// indication character
private static final char pad = '\u0000'; // pad character
static int repeatCheck(char[] arr, int startingIndex) {
int index = startingIndex + 1;
while (index < arr.length && arr[index] == arr[index - 1]) {
++index;
}
return (index - startingIndex);
}
static final int rampCheck(char[] arr, int startingIndex) {
int index = startingIndex + 1;
while (index < arr.length && arr[index] == arr[index - 1] + 1) {
++index;
}
return (index - startingIndex);
}
static int hbCheck(char[] arr, int startingIndex) {
int index = startingIndex + 1;
while (index < arr.length) {
// check for repeat
// for 6 repeated chars, we'd need either 3 hb-compressed chars or 3
// repeatsig chars, so it's a toss up
if (repeatCheck(arr, index) > 6)
return (index - startingIndex); // at this point though, it's better to
// stop and do the repeat
// check for ramp, same reason
if (rampCheck(arr, index) > 6)
return (index - startingIndex);
// OK, finally check for hb
if ((arr[index] & 0xFF00) != (arr[index - 1] & 0xFF00))
return (index - startingIndex);
++index;
}
return (index - startingIndex);
}
static int numRepeats;
static int numRamps;
static int hbRepeats;
static int charRepeats;
// This is the new way - 05/04/2000.
static char[] compressBetter(char[] arr) {
numRepeats = 0;
numRamps = 0;
hbRepeats = 0;
charRepeats = 0;
// This uses the "correct" compression scheme from my invention disclosure
// It also employs high-byte compression, something that I did not include
// in my disclosure.
StringBuffer buf = new StringBuffer();
for (int i = 0; i < arr.length; ++i) {
int repNum = repeatCheck(arr, i);
if (repNum > 3) // had enough repeats
{
numRepeats++;
buf.append(repSig);
buf.append((char) repNum);
buf.append(arr[i]);
i += repNum - 1;
} else {
int rampNum = rampCheck(arr, i);
if (rampNum > 3) // had enough in the ramp
{
numRamps++;
buf.append(rampSig);
buf.append((char) rampNum);
buf.append(arr[i]);
i += rampNum - 1;
} else {
int hbNum = hbCheck(arr, i);
--hbNum; // don't include the first char, since we always append it.
if (hbNum >= 6) {
// System.out.print("HBNUM is "+Integer.toHexString((int)hbNum)+";
// ");
hbRepeats++;
// pattern is this: ss ss nn nn hh tt xx xx xx xx ...
// where ss ss is hbSig
// nn nn is hbNum
// hh tt is the first char (hh is the repeated high byte)
// xx is the lower byte of the next char in the sequence
// xx repeats hbNum/2 times so that
// hbNum is the total number of repeated db chars in the ciphertext,
// not including the first char.
// Note that there may be, in actuality, hbNum*2 +1 chars in the
// cleartext that fit into the
// conversion, but since we'd have to fill out the last char with an
// empty byte, there's no point
// in doing it anyway. Besides, it might be able to be compressed
// via another scheme with itself as
// the starting character.
// int start = i;
buf.append(hbSig);
if ((0xFFFFFFFFl & hbNum) % 2 == 1) // odd number
{
--hbNum; // no point in doing the last char
}
// System.out.println("Appending
// "+Integer.toHexString((int)((char)(hbNum/2))));
buf.append((char) (hbNum / 2)); // hbNum is always even, so this
// comes out.
// System.out.print("hb comp: "+Integer.toHexString(hbNum)+": ");
// for (int b=0; b>> 8));
char c2 = (char) (highByteMask + (0x00FF & both));
buf.append(c1);
buf.append(c2);
// System.out.print(Integer.toHexString((int)c1)+"
// "+Integer.toHexString((int)c2)+" ");
}
// System.out.println(Integer.toHexString((int)arr[i+hbNum]));
i = i + hbNum - 1;
}
} else {
buf.append(arr[i]);
charRepeats++;
}
}
System.out.println("Decompression stats: " + numRepeats + " repeats, "
+ numRamps + " ramps, " + hbRepeats + " highbytes, " + charRepeats
+ " regular.");
numRepeats = 0;
numRamps = 0;
hbRepeats = 0;
charRepeats = 0;
return buf.toString().toCharArray();
}
// This is the old way
static char[] compress(char[] arr) {
if (arr.length < 3)
return arr;
StringBuffer buf = new StringBuffer();
char oldold = arr[0];
char old = arr[1];
int count = 0;
boolean inCompression = false; // this flags if we are repeating the same
// character
boolean inRamp = false; // this flags if each subsequent characters is the
// previous character + 1
for (int i = 2; i < arr.length; ++i) {
if (!inCompression && !inRamp) {
if (arr[i] == old && arr[i] == oldold) // Check for repeating
{
inCompression = true;
buf.append(cic_);
buf.append(oldold);
count = 3;
} else if (arr[i] == old + 1 && arr[i] == oldold + 2) // Check for ramp
{
inRamp = true;
buf.append(ric_);
buf.append(oldold);
} else if (oldold == cic_) // Check for duplicate cic
{
buf.append(cic_);
} else if (oldold == ric_) // Check for duplicate ric
{
buf.append(ric_);
} else // Just copy it normal
{
buf.append(oldold);
}
oldold = old;
old = arr[i];
} else if (inCompression) {
if (arr[i] == old && arr[i] == oldold) // Still repeating?
{
++count;
oldold = old;
old = arr[i];
} else // Not repeating anymore
{
inCompression = false;
char c = (char) count;
if (count == 0x0008)
c = '\b';
else if (count == 0x0009)
c = '\t';
// else if(count == 0x000A) c = '\r'; // Had trouble with 0x0A and
// 0x0D getting messed up.
else if (count == 0x000A)
c = '\n'; // Had trouble with 0x0A and 0x0D getting messed up.
else if (count == 0x000C)
c = '\f';
// else if(count == 0x000D) c = '\n';
else if (count == 0x000D)
c = '\r';
else if (count == 0x0022)
c = '\"';
else if (count == 0x0027)
c = '\'';
else if (count == 0x005C)
c = '\\';
buf.append(c);
oldold = arr[i++]; // yes this is right... think about it.
if (i < arr.length) {
old = arr[i];
} else {
old = 0;
}
}
} else { // must be in ramp
if (arr[i] == old + 1 && arr[i] == oldold + 2) {
oldold = old;
old = arr[i];
} else {
inRamp = false;
buf.append(old);
oldold = arr[i++];
old = arr[i];
}
}
}
if (inCompression) {
char c = (char) count;
if (count == 0x0008)
c = '\b';
else if (count == 0x0009)
c = '\t';
else if (count == 0x000A)
c = '\n';
else if (count == 0x000C)
c = '\f';
// else if(count == 0x000D) c = '\n';
else if (count == 0x000D)
c = '\r';
else if (count == 0x0022)
c = '\"';
else if (count == 0x0027)
c = '\'';
else if (count == 0x005C)
c = '\\';
buf.append(c);
}
if (inRamp) {
buf.append(old);
}
return buf.toString().toCharArray();
}
static void writeHeader(FileWriter f, int ccsid, String system)
throws Exception {
int dotIndex = system.indexOf('.');
if (dotIndex > 0) {
system = system.substring(0, dotIndex);
}
Date currentDate = new Date();
// Look up the version dynamically
Class copyrightClass = Copyright.class;
Field field = copyrightClass.getField("version");
String jtopenVersion = (String) field.get(null);
f.write(
"///////////////////////////////////////////////////////////////////////////////\n");
f.write("//\n");
f.write("// JTOpen (IBM Toolbox for Java - OSS version)\n");
f.write("//\n");
f.write("// Filename: ConvTable" + ccsid + ".java\n");
f.write("//\n");
f.write(
"// The source code contained herein is licensed under the IBM Public License\n");
f.write(
"// Version 1.0, which has been approved by the Open Source Initiative.\n");
f.write(
"// Copyright (C) 1997-2016 International Business Machines Corporation and\n");
f.write("// others. All rights reserved.\n");
f.write("//\n");
f.write("// Generated " + currentDate + " from " + system + "\n");
StringBuffer sb = new StringBuffer();
if (compress_ == false)
sb.append(" -nocompress");
if (ascii_ == true)
sb.append(" -ascii");
if (bidi_ == true)
sb.append(" -bidi");
if (showOffsets_ == true)
sb.append(" -showOffsets");
if (codePointPerLine_ == true)
sb.append(" -codePointPerLine");
if (useJdbc_ == true)
sb.append(" -useJdbc");
if (sb.length() > 0) {
f.write("// Generation Options:" + sb.toString() + "\n");
}
f.write("// Using " + jtopenVersion + "\n");
f.write(
"///////////////////////////////////////////////////////////////////////////////\n\n");
f.write("package com.ibm.as400.access;\n\n");
}
//
// Methods for using JDBC to handle the translation tables
//
static boolean jdbcIsDBCS(Connection connection, int ccsid)
throws SQLException {
boolean isDBCS;
Statement stmt = connection.createStatement();
try {
stmt.executeUpdate("CREATE TABLE QTEMP.GENERATE" + ccsid
+ "(C1 VARCHAR(80) CCSID " + ccsid + ")");
isDBCS = false;
} catch (SQLException sqlex) {
int sqlcode = sqlex.getErrorCode();
if (sqlcode == -189) { // CCSID not valid
stmt.executeUpdate("CREATE TABLE QTEMP.GENERATE" + ccsid
+ "(C1 VARGRAPHIC(80) CCSID " + ccsid + ")");
isDBCS = true;
} else {
throw sqlex;
}
}
stmt.close();
return isDBCS;
}
private static char[] jdbcToEbcdic(Connection connection, int ccsid)
throws Exception {
if (ccsid > 1000000) {
ccsid = ccsid - 1000000;
}
PreparedStatement ps = connection.prepareStatement(
"select cast(CAST(CAST(? AS DBCLOB(1M) CCSID 1200) AS CLOB(1M) CCSID "
+ ccsid + ") as BLOB(1M)) from sysibm.sysdummy1");
char[] allChar65536 = new char[65536];
for (int i = 0; i < 0xD800; i++) {
allChar65536[i] = (char) i;
}
// Fill in the surrogate range with substitution characters
for (int i = 0xD800; i < 0xF900; i++) {
allChar65536[i] = '\u001a';
}
for (int i = 0xF900; i < 0x10000; i++) {
allChar65536[i] = (char) i;
}
Clob clob = ((AS400JDBCConnection) connection).createClob();
clob.setString(1, new String(allChar65536));
ps.setClob(1, clob);
ResultSet rs = ps.executeQuery();
rs.next();
byte[] byteAnswer = rs.getBytes(1);
if (byteAnswer.length != 65536) {
// We must have shift.out shift in combination that we don't use for
// single byte conversions
// Remove them.
byteAnswer = removeDoubleByteEbcdic(byteAnswer);
}
rs.close();
ps.close();
char[] answer = new char[byteAnswer.length / 2];
for (int i = 0; i < byteAnswer.length; i += 2) {
answer[i
/ 2] = (char) ((byteAnswer[i] << 8) | (0xFF & byteAnswer[i + 1]));
}
return answer;
}
private static byte[] removeDoubleByteEbcdic(byte[] inBytes)
throws Exception {
byte[] outBytes = new byte[65536];
// Just copy the control characters
System.arraycopy(inBytes, 0, outBytes, 0, 0x20);
int toIndex = 0x20;
boolean singleByte = true;
for (int i = 0x20; i < inBytes.length; i++) {
byte b = inBytes[i];
if (singleByte) {
if (b == 0x0E) {
singleByte = false;
} else if (b == 0x0F) {
throw new Exception("Illegal 0x0f found in singleByte mode");
} else {
if (toIndex >= outBytes.length) {
throw new Exception("ERROR: toIndexInvalid");
}
outBytes[toIndex] = b;
toIndex++;
}
} else {
if (b == 0x0F) {
singleByte = true;
} else if (b == 0x0e) {
throw new Exception("Illegal 0x0e found in doubleByte mode");
} else {
if (toIndex >= outBytes.length) {
throw new Exception("ERROR: toIndexInvalid");
}
outBytes[toIndex] = 0x3f;
toIndex++;
i++; // Skip extra DB charater
}
}
}
if (toIndex != 65536) {
throw new Exception("To index is " + toIndex + " should be 65536");
}
return outBytes;
}
private static char[] removeSingleByteEbcdic(byte[] inBytes,
boolean mixedCcsid) throws Exception {
char[] outChars = new char[65536];
boolean singleByte = false;
if (mixedCcsid)
singleByte = true;
int toIndex = 0x0;
for (int i = 0x0; i < inBytes.length; i++) {
byte b = inBytes[i];
if (singleByte) {
if (b == 0x0E) {
singleByte = false;
} else if (b == 0x0F) {
throw new Exception("Illegal 0x0f found in singleByte mode");
} else {
outChars[toIndex] = '\uFEFE';
toIndex++;
}
} else {
if (b == 0x0F && mixedCcsid) {
singleByte = true;
} else if (b == 0x0e && mixedCcsid) {
throw new Exception("Illegal 0x0e found in doubleByte mode");
} else {
outChars[toIndex] = (char) ((b << 8) | (0xFF & inBytes[i + 1]));
toIndex++;
i++;
}
}
}
if (toIndex != 65536) {
throw new Exception("To index is " + toIndex + " should be 65536");
}
return outChars;
}
private static char[] removeSingleByteEbcdicAndSpaces(byte[] inBytes,
boolean mixedCcsid) throws Exception {
char[] outChars = new char[65536];
boolean singleByte = false;
if (mixedCcsid)
singleByte = true;
int toIndex = 0x0;
for (int i = 0x0; i < inBytes.length; i++) {
byte b = inBytes[i];
if (singleByte) {
if (b == 0x0E) {
singleByte = false;
} else if (b == 0x0F) {
throw new Exception("Illegal 0x0f found in singleByte mode");
} else {
outChars[toIndex] = '\uFEFE';
toIndex++;
// Skip over next space if there
if (inBytes[i+1] == 0x40) {
i++;
}
}
} else {
if (b == 0x0F && mixedCcsid) {
singleByte = true;
// Skip over the next space if there
if (inBytes[i+1] == 0x40) {
i++;
}
} else if (b == 0x0e && mixedCcsid) {
throw new Exception("Illegal 0x0e found in doubleByte mode");
} else {
outChars[toIndex] = (char) ((b << 8) | (0xFF & inBytes[i + 1]));
toIndex++;
i++;
}
}
}
if (toIndex != 65536) {
throw new Exception("To index is " + toIndex + " should be 65536");
}
return outChars;
}
private static char[] jdbcToUnicode(Connection connection, int ccsid)
throws SQLException {
if (ccsid > 1000000) {
ccsid = ccsid - 1000000;
}
PreparedStatement ps = connection.prepareStatement(
"select cast(CAST(CAST(? AS VARCHAR(256) FOR BIT DATA) AS VARCHAR(256) CCSID "
+ ccsid + ") as VARGRAPHIC(256) CCSID 1200) from sysibm.sysdummy1");
byte[] all256 = new byte[256];
for (int i = 0; i < 256; i++) {
// For 0x0E and 0x0F, they do not translate correctly for mixed CCSID
// We will fix them up below.
if (i == 0x0E) {
all256[i] = (byte) 0x3F;
} else if (i == 0x0F) {
all256[i] = (byte) 0x3F;
} else {
all256[i] = (byte) i;
}
}
ps.setBytes(1, all256);
ResultSet rs = ps.executeQuery();
rs.next();
String answer = rs.getString(1);
rs.close();
ps.close();
char[] charAnswer = answer.toCharArray();
// Fix 0x0e/ 0x0F
charAnswer[0x0e] = '\u000e';
charAnswer[0x0f] = '\u000f';
return charAnswer;
}
private static char[] jdbcToEbcdicDBCS(Connection connection, int ccsid)
throws Exception {
boolean mixedCcsid = false;
if (ccsid > 2000000) {
ccsid = ccsid - 2000000;
mixedCcsid = true;
}
PreparedStatement ps;
try {
ps = connection.prepareStatement(
"select cast(CAST(CAST(? AS DBCLOB(1M) CCSID 1200) AS DBCLOB(1M) CCSID "
+ ccsid + ") as BLOB(1M)) from sysibm.sysdummy1");
} catch (SQLException sqlex) {
String message = sqlex.toString();
// If DBCLOB is not valid, try clob
if (message.indexOf("SQL0189") >= 0) {
ps = connection.prepareStatement(
"select cast(CAST(CAST(? AS DBCLOB(1M) CCSID 1200) AS CLOB(1M) CCSID "
+ ccsid + ") as BLOB(1M)) from sysibm.sysdummy1");
} else {
throw sqlex;
}
}
char[] answer;
// Workaround for CCSID 1399 bug
if (ccsid != 1399) {
char[] allChar65536 = new char[65536];
for (int i = 0; i < 0xD800; i++) {
if (i <= 0x80) {
allChar65536[i] = (char) '\u001a';
} else {
allChar65536[i] = (char) i;
}
}
// Fill in the surrogate range with double width substitution characters
for (int i = 0xD800; i < 0xE000; i++) {
allChar65536[i] = '\ufffd';
}
for (int i = 0xE000; i < 0x10000; i++) {
// Don't translate Variation selectors
if (i >= 0xFE00 && i <= 0xFF0F) {
allChar65536[i] = '\ufffd';
} else {
allChar65536[i] = (char) i;
}
}
Clob clob = ((AS400JDBCConnection) connection).createClob();
clob.setString(1, new String(allChar65536));
ps.setClob(1, clob);
ResultSet rs = ps.executeQuery();
rs.next();
byte[] byteAnswer = rs.getBytes(1);
rs.close();
ps.close();
if (byteAnswer.length != 2 * 65536) {
// We must have shift.out shift.in combinations.
// We need to remove the single bytes.
answer = removeSingleByteEbcdic(byteAnswer, mixedCcsid);
} else {
answer = new char[byteAnswer.length / 2];
for (int i = 0; i < byteAnswer.length; i += 2) {
answer[i
/ 2] = (char) ((byteAnswer[i] << 8) | (0xFF & byteAnswer[i + 1]));
}
}
} else {
// CCSID 1399 path - Put spaces between to fix problem
char[] allChar65536 = new char[65536 * 2];
for (int i = 0; i < 0xD800; i++) {
if (i <= 0x80) {
allChar65536[2*i] = (char) '\u001a';
} else {
allChar65536[2*i] = (char) i;
}
allChar65536[2*i+1]=' ';
}
// Fill in the surrogate range with double width substitution characters
for (int i = 0xD800; i < 0xE000; i++) {
allChar65536[2*i] = '\ufffd';
allChar65536[2*i+1]=' ';
}
for (int i = 0xE000; i < 0x10000; i++) {
// Don't translate Variation selectors
if (i >= 0xFE00 && i <= 0xFF0F) {
allChar65536[2*i] = '\ufffd';
} else {
allChar65536[2*i] = (char) i;
}
allChar65536[2*i+1]=' ';
}
Clob clob = ((AS400JDBCConnection) connection).createClob();
clob.setString(1, new String(allChar65536));
ps.setClob(1, clob);
ResultSet rs = ps.executeQuery();
rs.next();
byte[] byteAnswer = rs.getBytes(1);
rs.close();
ps.close();
answer = removeSingleByteEbcdicAndSpaces(byteAnswer, mixedCcsid);
}
return answer;
}
private static char[] jdbcToUnicodeSpacesDBCS(Connection connection,
int ccsid) throws SQLException {
// The database doesn't allow CLOB CCSID 65535 or BLOB to be converted to
// CLOB / DBCLOB. We'll need to process this in pieces.
// The database doesn't allow VARCHAR CCSID 65535 to be convert to GRAPHIC.
// Need to figure out how to handle that also.
// Create a huge SQL statement with the necessary literals.
// For this to work, the job must be changed to the specified CCSID if mixed
// or the associated CCSID if no double byte
boolean mixed = false;
// Start at 0x00 and handle 8192 double byte characters at at time
// int BLOCKSIZE = 8192;
int BLOCKSIZE = 1024;
String sql = "select cast(INTERPRET(CAST(? AS CHAR(" + (BLOCKSIZE * 4)
+ ") FOR BIT DATA) AS GRAPHIC(" + (BLOCKSIZE * 2) + ") CCSID " + ccsid
+ ") as VARGRAPHIC(8200) CCSID 1200) from sysibm.sysdummy1";
if (ccsid > 2000000) {
ccsid = ccsid - 2000000;
mixed = true;
sql = "select cast(CAST(CAST(? AS VARCHAR(16390) FOR BIT DATA) AS VARCHAR(16390) CCSID "
+ ccsid + ") as VARGRAPHIC(8200) CCSID 1200) from sysibm.sysdummy1";
}
PreparedStatement ps = connection.prepareStatement(sql);
int OUTERLOOP = 65536 / BLOCKSIZE;
byte[] piece8192;
if (mixed) {
piece8192 = new byte[BLOCKSIZE * 4 + 2];
} else {
piece8192 = new byte[BLOCKSIZE * 4];
}
int offset = 0;
if (mixed) {
piece8192[0] = 0x0E;
piece8192[BLOCKSIZE * 4 + 1] = 0x0F;
offset = 1;
}
StringBuffer sb = new StringBuffer();
int cp;
for (int i = 0; i < OUTERLOOP; i++) {
for (int j = 0; j < BLOCKSIZE; j++) {
cp = i * BLOCKSIZE + j;
// Filter out all the 0E/0F for mixed CCSIDS
// Filter out the lower 0x100 for mixed CCSIDS
if (mixed) {
if (cp < 0x100 || (cp / 256) == 0x0E || (cp / 256) == 0x0F
|| (cp % 256) == 0x0E || (cp % 256) == 0x0F) {
cp = 0xFEFE;
}
}
piece8192[offset + 4 * j] = (byte) (cp / 256);
piece8192[offset + 4 * j + 1] = (byte) cp;
// Separate the translated characters with double byte spaces.
piece8192[offset + 4 * j + 2] = (byte) 0x40;
piece8192[offset + 4 * j + 3] = (byte) 0x40;
} /* for j */
if (offset == 1) {
piece8192[BLOCKSIZE * 4 + 1] = 0x0F;
}
ps.setBytes(1, piece8192);
ResultSet rs = ps.executeQuery();
rs.next();
String answer = rs.getString(1);
if (answer == null) {
System.out.println(
"ERROR: got null processing block " + i + " of size " + BLOCKSIZE);
System.out.println("INPUT BYTES: = " + dumpBytes(" ", piece8192));
} else if (answer.length() != BLOCKSIZE) {
// This is OK since there may be surrogate pairs which will be handled
// later
//
// System.out.println("ERROR: got size = "+answer.length()+" processing
// block "+i+" of size "+BLOCKSIZE);
// System.out.println("INPUT BYTES: = "+dumpBytes(" ",piece8192));
// System.out.println("OUTPUT STRING: = "+dumpUnicodeString(" ",
// answer));
}
sb.append(answer);
rs.close();
} /* for i */
ps.close();
return sb.toString().toCharArray();
} /* jdbcToUnicodeDBCS */
private static String dumpUnicodeString(String pad, String data) {
StringBuffer sb = new StringBuffer();
char[] charArray = data.toCharArray();
for (int i = 0; i < charArray.length; i++) {
sb.append(pad);
int value = 0xffff & charArray[i];
if (value < 0x10)
sb.append("0");
if (value < 0x100)
sb.append("0");
if (value < 0x1000)
sb.append("0");
sb.append(Integer.toHexString(value));
}
return sb.toString();
}
private static String dumpBytes(String pad, byte[] block) {
StringBuffer sb = new StringBuffer();
for (int i = 0; i < block.length; i++) {
sb.append(pad);
int value = 0xff & block[i];
if (value < 0x10)
sb.append("0");
sb.append(Integer.toHexString(value));
}
return sb.toString();
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy