src.com.ibm.as400.access.GenerateConverterTable Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of jt400 Show documentation
The Open Source version of the IBM Toolbox for Java
There is a newer version: 20.0.8
///////////////////////////////////////////////////////////////////////////////
//
// JTOpen (IBM Toolbox for Java - OSS version)
//
// Filename:  AS400.java
//
// The source code contained herein is licensed under the IBM Public License
// Version 1.0, which has been approved by the Open Source Initiative.
// Copyright (C) 1997-2016 International Business Machines Corporation and
// others.  All rights reserved.
//
///////////////////////////////////////////////////////////////////////////////

package com.ibm.as400.access;
import java.io.*;
import java.lang.reflect.Field;
import java.util.*;

public class GenerateConverterTable
{
  private static final String copyright = "Copyright (C) 1997-2016 International Business Machines Corporation and others.";
  static AS400 sys = null;

  static boolean compress_ = true; // Compress the conversion table
                                   // Note: turn this off for debugging purposes

  static boolean codePointPerLine_ = false; // Should only 1 code point be printed per line
  static boolean ascii_ = false; // Indicates if listed ccsids are ascii tables or not

  static boolean bidi_ = false; // Indicates if listed ccsids are bidi tables or not
  // Note: bidi_ and ascii_ cannot both be true

  static boolean showOffsets_ = false;  // Indicates of the offsets should be printed in the tables
  public static void main(String[] args)
  {
    if (args.length < 4)
    {
      System.out.println("Usage: java com.ibm.as400.access.GenerateConverterTable system uid pwd [-nocompress] [-ascii] [-bidi] [-showOffsets] [-codePointPerLine] ccsid [ccsid2] [ccsid3] [ccsid4] ...");
      System.exit(0);
    }

    try
    {
      sys = new AS400(args[0], args[1], args[2]);
      sys.connectService(AS400.CENTRAL);
    }
    catch (Exception e)
    {
      e.printStackTrace();
      System.exit(0);
    }

    int start = 3;
    if (args[start].equals("-nocompress"))
    {
      compress_ = false;
      ++start;
    }
    if (args[start].equals("-ascii"))
    {
      ascii_ = true;
      ++start;
    }
    if (args[start].equals("-bidi"))
    {
      bidi_ = true;
      ++start;
    }

    if (args[start].equals("-showOffsets"))
    {
      showOffsets_ = true;
      ++start;
    }

    if (args[start].equals("-codePointPerLine"))
    {
      codePointPerLine_ = true;
      ++start;
    }

    for (int i=start; i13488->1089; use 61952 instead, since it would be the same anyway.
      {
        System.out.println("Special case for ccsid 1089.");
        System.out.println("Retrieving "+ccsid+"->61952 table...");
        tableToUnicode = down.download(ccsid, 61952, false);
      }
      else
      {
        System.out.println("Retrieving "+ccsid+"->13488 table...");
        tableToUnicode = down.download(ccsid, 13488, false);
      }
      if (tableToUnicode == null || tableToUnicode.length == 0)
      {
        System.out.println(ccsid+" must be double-byte. Performing secondary retrieve of "+ccsid+"->1200 table...");
        table1IsDBCS = true;
        down.disconnect();
        down.connect();
        tableToUnicode = down.download(ccsid, 1200, true);
      }

      System.out.println("  Size: "+tableToUnicode.length);
      if (tableToUnicode.length > 65536) {
	  System.out.println("Size is > 65536.  Fixing table");
	  int next = 0; 
	  int from = 0; 
	  char[] newTable = new char[65536];
	  while (from < tableToUnicode.length && next < 65536) {

	    
	    int c = 0xFFFF & (int) tableToUnicode[from];
      if (next > 0xECAA && next <= 0xECD0) {
        System.out.println("Next=0x"+Integer.toHexString(next)+" to="+Integer.toHexString(c));
      }

	    int nextchar = 0; 
	      if (from +1 < tableToUnicode.length) {
	        nextchar =  0xFFFF & (int) tableToUnicode[from+1];
	      }
	      
	      if (
	          // in surrogate range
	          ((c >= 0xD800) && (c <=0xDFFF)) ||
	          // Uses combining character
	          (nextchar == 0x309A) ||
	          (c != 0xFFfd && nextchar == 0x300) || 
	          (c != 0xffd && c != 0x300 && nextchar == 0x301) ||
	          // Weird cases.. 
	          (c == 0x2e5 && nextchar == 0x2e9) ||
	          (c == 0x2e9 && nextchar == 0x2e5)) {
		  // Mark as surrogate
      newTable[next]=(char) 0xD800;
      
		  // add to surrogate table
		  if (surrogateTable == null) {
		      surrogateTable = new char[65536][]; 
		  }
		  char[] pair = new char[2];
		  surrogateTable[next] = pair; 
		  pair[0] = (char) (0xFFFF & (int) tableToUnicode[from]);
		  pair[1] = (char) (0xFFFF & (int) tableToUnicode[from+1]);
		  /* System.out.println("Warning: Sub at offset "+Integer.toHexString(next)+" for "+Integer.toHexString(0xFFFF & (int) table1[from])+" "+Integer.toHexString(0xFFFF & (int) table1[from+1]));  */ 
		  from +=2;
	      } else {
		  newTable[next]=(char) c; 
		  from++; 
	      }
	      next++; 
	  }
	  tableToUnicode = newTable; 

      } 
      down.disconnect();
      down.connect();
      if (ccsid == 1089)
      {
        System.out.println("Special case for ccsid 1089.");
        System.out.println("Retrieving 61952->"+ccsid+" table...");
        tableToEbcdic = down.download(61952, ccsid, true);
      }
      else
      {
	  /* Use 1200 instead of 13488 */ 
        System.out.println("Retrieving 1200->"+ccsid+" table...");
        tableToEbcdic = down.download(1200, ccsid, true);
      }
      System.out.println("  Size: "+tableToEbcdic.length);

      sys.disconnectAllServices();
    }
    catch (Exception e)
    {
      e.printStackTrace();
    }

    
    // Verify the mapping 
    verifyRoundTrip(tableToUnicode, tableToEbcdic, table1IsDBCS); 
    
    System.out.println("****************************************");
    System.out.println("Verify round 2 "); 
    System.out.println("****************************************");
    verifyRoundTrip(tableToUnicode, tableToEbcdic, table1IsDBCS); 
    
    // Compress the ccsid table
    if (table1IsDBCS)
    {
      if (compress_)
      {
        System.out.println("Compressing "+ccsid+"->13488 conversion table...");
        char[] arr = compress(tableToUnicode);
        System.out.println("Old compression length: "+arr.length+" characters.");
        char[] temparr = compressBetter(tableToUnicode);
        System.out.println("New compression length: "+temparr.length+" characters.");
        if (temparr.length > arr.length)
        {
          System.out.println("WARNING: New algorithm WORSE than old algorithm!");
        }
        System.out.println("Verifying compressed table...");
        arr = decompressBetter(temparr);
        if (arr.length != tableToUnicode.length)
        {
          System.out.println("Verification failed, lengths not equal: "+arr.length+" != "+tableToUnicode.length);
          int c = 0;
          while (c < arr.length && arr[c] == tableToUnicode[c]) ++c;
          System.out.println("First mismatch at index "+c+": "+(int)arr[c]+" != "+(int)tableToUnicode[c]);
        }
        else
        {
          boolean bad = false;
          for (int c=0; c"+ccsid+" conversion table...");
      char[] arr = compress(tableToEbcdic);
      System.out.println("Old compression length: "+arr.length+" characters.");
      char[] temparr = compressBetter(tableToEbcdic);
      System.out.println("New compression length: "+temparr.length+" characters.");
      if (temparr.length > arr.length)
      {
        System.out.println("WARNING: New algorithm WORSE than old algorithm!");
      }
      System.out.println("Verifying compressed table...");
      arr = decompressBetter(temparr);
      if (arr.length != tableToEbcdic.length)
      {
        System.out.println("Verification failed, lengths not equal: "+arr.length+" != "+tableToEbcdic.length);
        int c = 0;
        while (c < arr.length && arr[c] == tableToEbcdic[c]) ++c;
        System.out.println("First mismatch at index "+c+": "+(int)arr[c]+" != "+(int)tableToEbcdic[c]);
        tableToEbcdic = temparr;
      }
      else
      {
        boolean bad = false;
        for (int c=0; c UX'"+Integer.toHexString(unicodeChar)+"'"+
                             " -> GX'"+Integer.toHexString(ebcdicChar)+"'\n");
            tableToEbcdic[unicodeChar] = (char) i;
            passed = false;
          } else {
            if (ebcdicChar >= tableToUnicode.length) {
              sb2.append("Invalid ebcdic char "+Integer.toHexString(ebcdicChar)+" unicodeChar="+Integer.toHexString(unicodeChar)+ " originalEbcdic="+Integer.toHexString(i)+"\n");
              passed =false; 
            } else {
              
              int unicodeChar2 = 0xFFFF & tableToUnicode[ebcdicChar];
              if (unicodeChar2 == unicodeChar) {
                sb2.append("Secondary EBCDIC mapping GX'"
                    + Integer.toHexString(i) + "'" + " -> UX'"
                    + Integer.toHexString(unicodeChar) + "'" + " -> GX'"
                    + Integer.toHexString(ebcdicChar) + "'" + " -> UX'"
                    + Integer.toHexString(unicodeChar2) + "'\n");

              } else {
                sb3.append("EBCDIC RoundTrip Failure2 GX'"
                    + Integer.toHexString(i) + "'" + " -> UX'"
                    + Integer.toHexString(unicodeChar) + "'" + " -> GX'"
                    + Integer.toHexString(ebcdicChar) + "'" + " -> UX'"
                    + Integer.toHexString(unicodeChar2) + "'\n");
                passed = false;

              }
            }
          }
        }
      }
    }
    System.out.println(sb2); 
    System.out.println(sb1); 
    System.out.println(sb3); 
    
    sb1.setLength(0); 
    sb2.setLength(0); 
    sb3.setLength(0); 
    
    for (int i = 0; i < tableToEbcdic.length; i++) {
      int ebcdicChar = 0xFFFF & tableToEbcdic[i];
      if (ebcdicChar != 0xfefe && ebcdicChar != 0x3f ) {
        if (ebcdicChar > tableToUnicode.length) {
          sb1.append("Unicode RoundTrip Failure UX'"
              + Integer.toHexString(i) + "'" + " -> GX'"
              + Integer.toHexString(ebcdicChar) + "' -> IndxOutOfBounds\n");

        } else {
          int unicodeChar = 0xFFFF & tableToUnicode[ebcdicChar];
          if (i != unicodeChar) {
            if (unicodeChar == 0xFFFD) {
              sb1.append("Unicode RoundTrip Failure UX'"
                  + Integer.toHexString(i) + "'" + " -> GX'"
                  + Integer.toHexString(ebcdicChar) + "'" + " -> UX'"
                  + Integer.toHexString(unicodeChar) + "'\n");
              passed = false;
            } else {
              int ebcdicChar2 = 0xFFFF & tableToEbcdic[unicodeChar];
              if (ebcdicChar2 == ebcdicChar) {
                sb2.append("Secondary Unicode mapping UX'"
                    + Integer.toHexString(i) + "'" + " -> GX'"
                    + Integer.toHexString(ebcdicChar) + "'" + " -> UX'"
                    + Integer.toHexString(unicodeChar) + "'" + " -> GX'"
                    + Integer.toHexString(ebcdicChar2) + "'\n");

              } else {
                sb3.append("Unicode RoundTrip Failure2 UX'"
                    + Integer.toHexString(i) + "'" + " -> GX'"
                    + Integer.toHexString(ebcdicChar) + "'" + " -> UX'"
                    + Integer.toHexString(unicodeChar) + "'" + " -> GX'"
                    + Integer.toHexString(ebcdicChar2) + "'\n");
                passed = false;

              }

            }
          }
        }
      }
    }
    System.out.println(sb2); 
    System.out.println(sb1); 
    System.out.println(sb3); 
    
    return passed;  
    
  }






  private static final char repSig = '\uFFFF'; // compression indication character
  private static final char cic_ = repSig;

  private static final char rampSig = '\uFFFE'; // ramp indication character
  private static final char ric_ = rampSig;

  private static final char hbSig = '\u0000'; // high-byte compression indication character
  private static final char pad = '\u0000'; // pad character



  static int repeatCheck(char[] arr, int startingIndex)
  {
    int index = startingIndex+1;
    while (index < arr.length && arr[index] == arr[index-1])
    {
      ++index;
    }
    return(index-startingIndex);
  }


  static final int rampCheck(char[] arr, int startingIndex)
  {
    int index = startingIndex+1;
    while (index < arr.length && arr[index] == arr[index-1]+1)
    {
      ++index;
    }
    return(index-startingIndex);
  }


  static int hbCheck(char[] arr, int startingIndex)
  {
    int index = startingIndex+1;
    while (index < arr.length)
    {
      // check for repeat
      // for 6 repeated chars, we'd need either 3 hb-compressed chars or 3 repeatsig chars, so it's a toss up
      if (repeatCheck(arr, index) > 6) return(index-startingIndex); // at this point though, it's better to stop and do the repeat

      // check for ramp, same reason
      if (rampCheck(arr, index) > 6) return(index-startingIndex);

      // OK, finally check for hb
      if ((arr[index] & 0xFF00) != (arr[index-1] & 0xFF00)) return(index-startingIndex);

      ++index;
    }
    return(index-startingIndex);
  }


  static int numRepeats;
  static int numRamps;
  static int hbRepeats;
  static int charRepeats;

  // This is the new way - 05/04/2000.
  static char[] compressBetter(char[] arr)
  {
    numRepeats = 0;
    numRamps = 0;
    hbRepeats = 0;
    charRepeats = 0;

    // This uses the "correct" compression scheme from my invention disclosure
    // It also employs high-byte compression, something that I did not include in my disclosure.
    StringBuffer buf = new StringBuffer();

    for (int i=0; i 3) // had enough repeats
      {
        numRepeats++;
        buf.append(repSig);
        buf.append((char)repNum);
        buf.append(arr[i]);
        i += repNum-1;
      }
      else
      {
        int rampNum = rampCheck(arr, i);
        if (rampNum > 3) // had enough in the ramp
        {
          numRamps++;
          buf.append(rampSig);
          buf.append((char)rampNum);
          buf.append(arr[i]);
          i += rampNum-1;
        }
        else
        {
          int hbNum = hbCheck(arr, i);
          --hbNum; // don't include the first char, since we always append it.
          if (hbNum >= 6)
          {
//            System.out.print("HBNUM is "+Integer.toHexString((int)hbNum)+"; ");
            hbRepeats++;
            // pattern is this: ss ss nn nn hh tt xx xx xx xx ...
            // where ss ss is hbSig
            //       nn nn is hbNum
            //       hh tt is the first char (hh is the repeated high byte)
            //       xx is the lower byte of the next char in the sequence
            //       xx repeats hbNum/2 times so that
            //       hbNum is the total number of repeated db chars in the ciphertext, not including the first char.
            //       Note that there may be, in actuality, hbNum*2 +1 chars in the cleartext that fit into the
            //       conversion, but since we'd have to fill out the last char with an empty byte, there's no point
            //       in doing it anyway. Besides, it might be able to be compressed via another scheme with itself as
            //       the starting character.
            // int start = i;
            buf.append(hbSig);
            if (hbNum % 2 == 1) // odd number
            {
              --hbNum; // no point in doing the last char
            }
//            System.out.println("Appending "+Integer.toHexString((int)((char)(hbNum/2))));
            buf.append((char)(hbNum/2)); // hbNum is always even, so this comes out.
//            System.out.print("hb comp: "+Integer.toHexString(hbNum)+": ");
//            for (int b=0; b>> 8));
            char c2 = (char)(highByteMask + (0x00FF & both));
            buf.append(c1);
            buf.append(c2);
//            System.out.print(Integer.toHexString((int)c1)+" "+Integer.toHexString((int)c2)+" ");
          }
//          System.out.println(Integer.toHexString((int)arr[i+hbNum]));
          i = i + hbNum - 1;
        }
      }
      else
      {
        buf.append(arr[i]);
        charRepeats++;
      }
    }
    System.out.println("Decompression stats: "+numRepeats+" repeats, "+numRamps+" ramps, "+hbRepeats+" highbytes, "+charRepeats+" regular.");
    numRepeats = 0;
    numRamps = 0;
    hbRepeats = 0;
    charRepeats = 0;
    return buf.toString().toCharArray();
  }


  // This is the old way
  static char[] compress(char[] arr)
  {

    if (arr.length < 3) return arr;
    StringBuffer buf = new StringBuffer();
    char oldold = arr[0];
    char old = arr[1];
    int count = 0;
    boolean inCompression = false; // this flags if we are repeating the same character
    boolean inRamp = false; // this flags if each subsequent characters is the previous character + 1

    for (int i=2; i 0) {
      system=system.substring(0,dotIndex); 
    }
    Date currentDate = new Date(); 
    // Look up the version dynamically
    Class copyrightClass = Copyright.class; 
    Field field = copyrightClass.getField("version"); 
    String jtopenVersion = (String) field.get(null); 
    
    f.write("///////////////////////////////////////////////////////////////////////////////\n");
    f.write("//\n");
    f.write("// JTOpen (IBM Toolbox for Java - OSS version)\n");
    f.write("//\n");
    f.write("// Filename:  ConvTable"+ccsid+".java\n");
    f.write("//\n");
    f.write("// The source code contained herein is licensed under the IBM Public License\n");
    f.write("// Version 1.0, which has been approved by the Open Source Initiative.\n");
    f.write("// Copyright (C) 1997-2016 International Business Machines Corporation and\n");
    f.write("// others.  All rights reserved.\n");
    f.write("//\n");
    f.write("// Generated "+currentDate+" from "+system+"\n");
    f.write("// Using "+jtopenVersion+"\n"); 
    f.write("///////////////////////////////////////////////////////////////////////////////\n\n");
    f.write("package com.ibm.as400.access;\n\n");
  }

}