src.com.ibm.as400.access.GenerateConverterTable Maven / Gradle / Ivy

Go to download
///////////////////////////////////////////////////////////////////////////////
//
// JTOpen (IBM Toolbox for Java - OSS version)
//
// Filename:  AS400.java
//
// The source code contained herein is licensed under the IBM Public License
// Version 1.0, which has been approved by the Open Source Initiative.
// Copyright (C) 1997-2016 International Business Machines Corporation and
// others.  All rights reserved.
//
///////////////////////////////////////////////////////////////////////////////

package com.ibm.as400.access;
import java.io.*;
import java.lang.reflect.Field;
import java.util.*;

public class GenerateConverterTable
{
  private static final String copyright = "Copyright (C) 1997-2016 International Business Machines Corporation and others.";
  static AS400 sys = null;

  static boolean compress_ = true; // Compress the conversion table
                                   // Note: turn this off for debugging purposes

  static boolean codePointPerLine_ = false; // Should only 1 code point be printed per line
  static boolean ascii_ = false; // Indicates if listed ccsids are ascii tables or not

  static boolean bidi_ = false; // Indicates if listed ccsids are bidi tables or not
  // Note: bidi_ and ascii_ cannot both be true

  static boolean showOffsets_ = false;  // Indicates of the offsets should be printed in the tables
  public static void main(String[] args)
  {
    if (args.length < 4)
    {
      System.out.println("Usage: java com.ibm.as400.access.GenerateConverterTable system uid pwd [-nocompress] [-ascii] [-bidi] [-showOffsets] [-codePointPerLine] ccsid [ccsid2] [ccsid3] [ccsid4] ...");
      System.exit(0);
    }

    try
    {
      sys = new AS400(args[0], args[1], args[2]);
      sys.connectService(AS400.CENTRAL);
    }
    catch (Exception e)
    {
      e.printStackTrace();
      System.exit(0);
    }

    int start = 3;
    if (args[start].equals("-nocompress"))
    {
      compress_ = false;
      ++start;
    }
    if (args[start].equals("-ascii"))
    {
      ascii_ = true;
      ++start;
    }
    if (args[start].equals("-bidi"))
    {
      bidi_ = true;
      ++start;
    }

    if (args[start].equals("-showOffsets"))
    {
      showOffsets_ = true;
      ++start;
    }

    if (args[start].equals("-codePointPerLine"))
    {
      codePointPerLine_ = true;
      ++start;
    }

    for (int i=start; i13488->1089; use 61952 instead, since it would be the same anyway.
      {
        System.out.println("Special case for ccsid 1089.");
        System.out.println("Retrieving "+ccsid+"->61952 table...");
        tableToUnicode = down.download(ccsid, 61952, false);
      }
      else
      {
        System.out.println("Retrieving "+ccsid+"->13488 table...");
        tableToUnicode = down.download(ccsid, 13488, false);
      }
      if (tableToUnicode == null || tableToUnicode.length == 0)
      {
        System.out.println(ccsid+" must be double-byte. Performing secondary retrieve of "+ccsid+"->1200 table...");
        table1IsDBCS = true;
        down.disconnect();
        down.connect();
        tableToUnicode = down.download(ccsid, 1200, true);
      }

      System.out.println("  Size: "+tableToUnicode.length);
      if (tableToUnicode.length > 65536) {
	  System.out.println("Size is > 65536.  Fixing table");
	  int next = 0; 
	  int from = 0; 
	  char[] newTable = new char[65536];
	  while (from < tableToUnicode.length && next < 65536) {

	    
	    int c = 0xFFFF & (int) tableToUnicode[from];
      if (next > 0xECAA && next <= 0xECD0) {
        System.out.println("Next=0x"+Integer.toHexString(next)+" to="+Integer.toHexString(c));
      }

	    int nextchar = 0; 
	      if (from +1 < tableToUnicode.length) {
	        nextchar =  0xFFFF & (int) tableToUnicode[from+1];
	      }
	      
	      if (
	          // in surrogate range
	          ((c >= 0xD800) && (c <=0xDFFF)) ||
	          // Uses combining character
	          (nextchar == 0x309A) ||
	          (c != 0xFFfd && nextchar == 0x300) || 
	          (c != 0xffd && c != 0x300 && nextchar == 0x301) ||
	          // Weird cases.. 
	          (c == 0x2e5 && nextchar == 0x2e9) ||
	          (c == 0x2e9 && nextchar == 0x2e5)) {
		  // Mark as surrogate
      newTable[next]=(char) 0xD800;
      
		  // add to surrogate table
		  if (surrogateTable == null) {
		      surrogateTable = new char[65536][]; 
		  }
		  char[] pair = new char[2];
		  surrogateTable[next] = pair; 
		  pair[0] = (char) (0xFFFF & (int) tableToUnicode[from]);
		  pair[1] = (char) (0xFFFF & (int) tableToUnicode[from+1]);
		  /* System.out.println("Warning: Sub at offset "+Integer.toHexString(next)+" for "+Integer.toHexString(0xFFFF & (int) table1[from])+" "+Integer.toHexString(0xFFFF & (int) table1[from+1]));  */ 
		  from +=2;
	      } else {
		  newTable[next]=(char) c; 
		  from++; 
	      }
	      next++; 
	  }
	  tableToUnicode = newTable; 

      } 
      down.disconnect();
      down.connect();
      if (ccsid == 1089)
      {
        System.out.println("Special case for ccsid 1089.");
        System.out.println("Retrieving 61952->"+ccsid+" table...");
        tableToEbcdic = down.download(61952, ccsid, true);
      }
      else
      {
	  /* Use 1200 instead of 13488 */ 
        System.out.println("Retrieving 1200->"+ccsid+" table...");
        tableToEbcdic = down.download(1200, ccsid, true);
      }
      System.out.println("  Size: "+tableToEbcdic.length);

      sys.disconnectAllServices();
    }
    catch (Exception e)
    {
      e.printStackTrace();
    }

    
    // Verify the mapping 
    verifyRoundTrip(tableToUnicode, tableToEbcdic, table1IsDBCS); 
    
    System.out.println("****************************************");
    System.out.println("Verify round 2 "); 
    System.out.println("****************************************");
    verifyRoundTrip(tableToUnicode, tableToEbcdic, table1IsDBCS); 
    
    // Compress the ccsid table
    if (table1IsDBCS)
    {
      if (compress_)
      {
        System.out.println("Compressing "+ccsid+"->13488 conversion table...");
        char[] arr = compress(tableToUnicode);
        System.out.println("Old compression length: "+arr.length+" characters.");
        char[] temparr = compressBetter(tableToUnicode);
        System.out.println("New compression length: "+temparr.length+" characters.");
        if (temparr.length > arr.length)
        {
          System.out.println("WARNING: New algorithm WORSE than old algorithm!");
        }
        System.out.println("Verifying compressed table...");
        arr = decompressBetter(temparr);
        if (arr.length != tableToUnicode.length)
        {
          System.out.println("Verification failed, lengths not equal: "+arr.length+" != "+tableToUnicode.length);
          int c = 0;
          while (c < arr.length && arr[c] == tableToUnicode[c]) ++c;
          System.out.println("First mismatch at index "+c+": "+(int)arr[c]+" != "+(int)tableToUnicode[c]);
        }
        else
        {
          boolean bad = false;
          for (int c=0; c"+ccsid+" conversion table...");
      char[] arr = compress(tableToEbcdic);
      System.out.println("Old compression length: "+arr.length+" characters.");
      char[] temparr = compressBetter(tableToEbcdic);
      System.out.println("New compression length: "+temparr.length+" characters.");
      if (temparr.length > arr.length)
      {
        System.out.println("WARNING: New algorithm WORSE than old algorithm!");
      }
      System.out.println("Verifying compressed table...");
      arr = decompressBetter(temparr);
      if (arr.length != tableToEbcdic.length)
      {
        System.out.println("Verification failed, lengths not equal: "+arr.length+" != "+tableToEbcdic.length);
        int c = 0;
        while (c < arr.length && arr[c] == tableToEbcdic[c]) ++c;
        System.out.println("First mismatch at index "+c+": "+(int)arr[c]+" != "+(int)tableToEbcdic[c]);
        tableToEbcdic = temparr;
      }
      else
      {
        boolean bad = false;
        for (int c=0; c UX'"+Integer.toHexString(unicodeChar)+"'"+
                             " -> GX'"+Integer.toHexString(ebcdicChar)+"'\n");
            tableToEbcdic[unicodeChar] = (char) i;
            passed = false;
          } else {
            if (ebcdicChar >= tableToUnicode.length) {
              sb2.append("Invalid ebcdic char "+Integer.toHexString(ebcdicChar)+" unicodeChar="+Integer.toHexString(unicodeChar)+ " originalEbcdic="+Integer.toHexString(i)+"\n");
              passed =false; 
            } else {
              
              int unicodeChar2 = 0xFFFF & tableToUnicode[ebcdicChar];
              if (unicodeChar2 == unicodeChar) {
                sb2.append("Secondary EBCDIC mapping GX'"
                    + Integer.toHexString(i) + "'" + " -> UX'"
                    + Integer.toHexString(unicodeChar) + "'" + " -> GX'"
                    + Integer.toHexString(ebcdicChar) + "'" + " -> UX'"
                    + Integer.toHexString(unicodeChar2) + "'\n");

              } else {
                sb3.append("EBCDIC RoundTrip Failure2 GX'"
                    + Integer.toHexString(i) + "'" + " -> UX'"
                    + Integer.toHexString(unicodeChar) + "'" + " -> GX'"
                    + Integer.toHexString(ebcdicChar) + "'" + " -> UX'"
                    + Integer.toHexString(unicodeChar2) + "'\n");
                passed = false;

              }
            }
          }
        }
      }
    }
    System.out.println(sb2); 
    System.out.println(sb1); 
    System.out.println(sb3); 
    
    sb1.setLength(0); 
    sb2.setLength(0); 
    sb3.setLength(0); 
    
    for (int i = 0; i < tableToEbcdic.length; i++) {
      int ebcdicChar = 0xFFFF & tableToEbcdic[i];
      if (ebcdicChar != 0xfefe && ebcdicChar != 0x3f ) {
        if (ebcdicChar > tableToUnicode.length) {
          sb1.append("Unicode RoundTrip Failure UX'"
              + Integer.toHexString(i) + "'" + " -> GX'"
              + Integer.toHexString(ebcdicChar) + "' -> IndxOutOfBounds\n");

        } else {
          int unicodeChar = 0xFFFF & tableToUnicode[ebcdicChar];
          if (i != unicodeChar) {
            if (unicodeChar == 0xFFFD) {
              sb1.append("Unicode RoundTrip Failure UX'"
                  + Integer.toHexString(i) + "'" + " -> GX'"
                  + Integer.toHexString(ebcdicChar) + "'" + " -> UX'"
                  + Integer.toHexString(unicodeChar) + "'\n");
              passed = false;
            } else {
              int ebcdicChar2 = 0xFFFF & tableToEbcdic[unicodeChar];
              if (ebcdicChar2 == ebcdicChar) {
                sb2.append("Secondary Unicode mapping UX'"
                    + Integer.toHexString(i) + "'" + " -> GX'"
                    + Integer.toHexString(ebcdicChar) + "'" + " -> UX'"
                    + Integer.toHexString(unicodeChar) + "'" + " -> GX'"
                    + Integer.toHexString(ebcdicChar2) + "'\n");

              } else {
                sb3.append("Unicode RoundTrip Failure2 UX'"
                    + Integer.toHexString(i) + "'" + " -> GX'"
                    + Integer.toHexString(ebcdicChar) + "'" + " -> UX'"
                    + Integer.toHexString(unicodeChar) + "'" + " -> GX'"
                    + Integer.toHexString(ebcdicChar2) + "'\n");
                passed = false;

              }

            }
          }
        }
      }
    }
    System.out.println(sb2); 
    System.out.println(sb1); 
    System.out.println(sb3); 
    
    return passed;  
    
  }






  private static final char repSig = '\uFFFF'; // compression indication character
  private static final char cic_ = repSig;

  private static final char rampSig = '\uFFFE'; // ramp indication character
  private static final char ric_ = rampSig;

  private static final char hbSig = '\u0000'; // high-byte compression indication character
  private static final char pad = '\u0000'; // pad character



  static int repeatCheck(char[] arr, int startingIndex)
  {
    int index = startingIndex+1;
    while (index < arr.length && arr[index] == arr[index-1])
    {
      ++index;
    }
    return(index-startingIndex);
  }


  static final int rampCheck(char[] arr, int startingIndex)
  {
    int index = startingIndex+1;
    while (index < arr.length && arr[index] == arr[index-1]+1)
    {
      ++index;
    }
    return(index-startingIndex);
  }


  static int hbCheck(char[] arr, int startingIndex)
  {
    int index = startingIndex+1;
    while (index < arr.length)
    {
      // check for repeat
      // for 6 repeated chars, we'd need either 3 hb-compressed chars or 3 repeatsig chars, so it's a toss up
      if (repeatCheck(arr, index) > 6) return(index-startingIndex); // at this point though, it's better to stop and do the repeat

      // check for ramp, same reason
      if (rampCheck(arr, index) > 6) return(index-startingIndex);

      // OK, finally check for hb
      if ((arr[index] & 0xFF00) != (arr[index-1] & 0xFF00)) return(index-startingIndex);

      ++index;
    }
    return(index-startingIndex);
  }


  static int numRepeats;
  static int numRamps;
  static int hbRepeats;
  static int charRepeats;

  // This is the new way - 05/04/2000.
  static char[] compressBetter(char[] arr)
  {
    numRepeats = 0;
    numRamps = 0;
    hbRepeats = 0;
    charRepeats = 0;

    // This uses the "correct" compression scheme from my invention disclosure
    // It also employs high-byte compression, something that I did not include in my disclosure.
    StringBuffer buf = new StringBuffer();

    for (int i=0; i 3) // had enough repeats
      {
        numRepeats++;
        buf.append(repSig);
        buf.append((char)repNum);
        buf.append(arr[i]);
        i += repNum-1;
      }
      else
      {
        int rampNum = rampCheck(arr, i);
        if (rampNum > 3) // had enough in the ramp
        {
          numRamps++;
          buf.append(rampSig);
          buf.append((char)rampNum);
          buf.append(arr[i]);
          i += rampNum-1;
        }
        else
        {
          int hbNum = hbCheck(arr, i);
          --hbNum; // don't include the first char, since we always append it.
          if (hbNum >= 6)
          {
//            System.out.print("HBNUM is "+Integer.toHexString((int)hbNum)+"; ");
            hbRepeats++;
            // pattern is this: ss ss nn nn hh tt xx xx xx xx ...
            // where ss ss is hbSig
            //       nn nn is hbNum
            //       hh tt is the first char (hh is the repeated high byte)
            //       xx is the lower byte of the next char in the sequence
            //       xx repeats hbNum/2 times so that
            //       hbNum is the total number of repeated db chars in the ciphertext, not including the first char.
            //       Note that there may be, in actuality, hbNum*2 +1 chars in the cleartext that fit into the
            //       conversion, but since we'd have to fill out the last char with an empty byte, there's no point
            //       in doing it anyway. Besides, it might be able to be compressed via another scheme with itself as
            //       the starting character.
            // int start = i;
            buf.append(hbSig);
            if (hbNum % 2 == 1) // odd number
            {
              --hbNum; // no point in doing the last char
            }
//            System.out.println("Appending "+Integer.toHexString((int)((char)(hbNum/2))));
            buf.append((char)(hbNum/2)); // hbNum is always even, so this comes out.
//            System.out.print("hb comp: "+Integer.toHexString(hbNum)+": ");
//            for (int b=0; b>> 8));
            char c2 = (char)(highByteMask + (0x00FF & both));
            buf.append(c1);
            buf.append(c2);
//            System.out.print(Integer.toHexString((int)c1)+" "+Integer.toHexString((int)c2)+" ");
          }
//          System.out.println(Integer.toHexString((int)arr[i+hbNum]));
          i = i + hbNum - 1;
        }
      }
      else
      {
        buf.append(arr[i]);
        charRepeats++;
      }
    }
    System.out.println("Decompression stats: "+numRepeats+" repeats, "+numRamps+" ramps, "+hbRepeats+" highbytes, "+charRepeats+" regular.");
    numRepeats = 0;
    numRamps = 0;
    hbRepeats = 0;
    charRepeats = 0;
    return buf.toString().toCharArray();
  }


  // This is the old way
  static char[] compress(char[] arr)
  {

    if (arr.length < 3) return arr;
    StringBuffer buf = new StringBuffer();
    char oldold = arr[0];
    char old = arr[1];
    int count = 0;
    boolean inCompression = false; // this flags if we are repeating the same character
    boolean inRamp = false; // this flags if each subsequent characters is the previous character + 1

    for (int i=2; i 0) {
      system=system.substring(0,dotIndex); 
    }
    Date currentDate = new Date(); 
    // Look up the version dynamically
    Class copyrightClass = Copyright.class; 
    Field field = copyrightClass.getField("version"); 
    String jtopenVersion = (String) field.get(null); 
    
    f.write("///////////////////////////////////////////////////////////////////////////////\n");
    f.write("//\n");
    f.write("// JTOpen (IBM Toolbox for Java - OSS version)\n");
    f.write("//\n");
    f.write("// Filename:  ConvTable"+ccsid+".java\n");
    f.write("//\n");
    f.write("// The source code contained herein is licensed under the IBM Public License\n");
    f.write("// Version 1.0, which has been approved by the Open Source Initiative.\n");
    f.write("// Copyright (C) 1997-2016 International Business Machines Corporation and\n");
    f.write("// others.  All rights reserved.\n");
    f.write("//\n");
    f.write("// Generated "+currentDate+" from "+system+"\n");
    f.write("// Using "+jtopenVersion+"\n"); 
    f.write("///////////////////////////////////////////////////////////////////////////////\n\n");
    f.write("package com.ibm.as400.access;\n\n");
  }

}