 
                        
        
                        
        com.ibm.as400.access.BidiConvert Maven / Gradle / Ivy
Show all versions of jt400 Show documentation
///////////////////////////////////////////////////////////////////////////////
//
// JTOpen (IBM Toolbox for Java - OSS version)
//
// Filename:  BidiConvert.java
//
// The source code contained herein is licensed under the IBM Public License
// Version 1.0, which has been approved by the Open Source Initiative.
// Copyright (C) 2006 International Business Machines Corporation and
// others.  All rights reserved.
//
///////////////////////////////////////////////////////////////////////////////
package com.ibm.as400.access;
import java.util.StringTokenizer;
import java.nio.CharBuffer;
/**
 *  BidiConvert is a convenience class to facilitate invocation of the
 *  Bidi layout transformations by converter code in the JVM. 
 *  It contains 2 public methods:  toUnicode() and toSbcs().
 *  
 *  Method toUnicode() must be called  by the ByteToChar converters
 *  after converting the SBCS byte data into Unicode.
 *  
 *  Method toSbcs() must be called by the CharToByte converters before
 *  converting the Unicode string data into byte data.
 *
 */
public class BidiConvert
{
/**
 *  Default Bidi parms for Unicode string
 */
    final static char[] DEFAULT_U = new char[] {'I', 'L', 'Y', 'N', 'N', 'U', 'A', 'A', 'A', 'A'};
/**
 *  Default Bidi parms for SBCS string
 */
    final static char[] DEFAULT_S = new char[] {'V', 'L', 'N', 'S', 'N', 'U', 'A', 'A', 'A', 'A'};
/**
 *  Character for default option in Bidi parms
 */
    final static char DEFCHAR = '-';
    // The 5 following members are used to cache the parms and result of
    // isMatchingEncoding(), because it will typically be called several times
    // with the same data, from within isBidiData() and from within toSbcs()
    // or toUnicode().
    private String lastEncoding;
    private String lastBidiParms;
    private boolean lastMatching;
    private BidiFlagSet bfs_U;
    private BidiFlagSet bfs_S;
/**
 *  This method isolates a part with format "X(YYYY)" within a string, where "X"
 *  is a one-letter prefix followed by a left parenthesis, and "YYYY" is an
 *  arbitrary string.
 *  @param  str         The string to scan.
 *  @param  prefix      The one-letter prefix with the left parenthesis.
 *  @return the string within parentheses following the prefix.
 */
    private String getPart(String str, String prefix)
    {
        String          part;
        StringTokenizer stok;
        stok = new StringTokenizer(str, ",)");
        part = "";
        while (stok.hasMoreTokens())
        {
            part = stok.nextToken();
            if (part.startsWith(prefix))
                return part.substring(2);
        }
        return "";
    }
/**
 *  This method isolates a part with format "X(YYYY)" within a string, where "X"
 *  is a one-letter prefix followed by a left parenthesis, and "YYYY" is an
 *  arbitrary string.  It returns it as a char array, completed to the
 *  maximum number of elements from a default array.
 *  @param  str         The string to scan.
 *  @param  prefix      The one-letter prefix with the left parenthesis.
 *  @param  defChars    The default char array
 *  @return the string within parentheses following the prefix.
 */
    private char[] getChars(String str, String prefix, char[] defChars)
    {
        String          part;
        int             len, lenDef;
        char[]          ans;
        part = getPart(str, prefix);
        len = part.length();
        lenDef = defChars.length;
        if (len > lenDef)  len = lenDef;
        ans = new char[lenDef];
        part.getChars(0, len, ans, 0);
        for (int i = 0; i < lenDef; i++)
            if ((i >= len) || (DEFCHAR == ans[i]))  ans[i] = defChars[i];
        return ans;
    }
/**
 *  This method checks if the encoding of the conversion is included in a
 *  list of encodings within bidiParms.  If positive, or if bidiParms
 *  contains no list of encodings, return true; otherwise return false.
 *  @param  encoding    The encoding of the data outside the JVM.
 *  @param  bidiParms   The environment variable containing the specifications of the Bidi attributes.
 *  @return true if encoding appears in bidiParms, or if bidiParms ignores encodings.
 */
    private boolean isMatchingEncoding(String encoding, String bidiParms)
    {
        String          part, enc;
        StringTokenizer stok;
        if ((encoding == lastEncoding) && (bidiParms == lastBidiParms))
            return lastMatching;
        lastEncoding = encoding;
        lastBidiParms = bidiParms;
        bfs_U = null;
        bfs_S = null;
        part = getPart(bidiParms, "C(");
        if (part.equals(""))  return (lastMatching = true);
        stok = new StringTokenizer(part, ";");
        while (stok.hasMoreTokens())
        {
            enc = stok.nextToken();
            if (enc.equals(encoding))  return (lastMatching = true);
        }
        return (lastMatching = false);
    }
/**
 *  This method checks the orientation if RTL in either the U or S part of Bidi parms.
 *  @param  bidiParms   The environment variable containing the specifications of the Bidi attributes.
 *  @return             true if either orientation is RTL.
 */
    private boolean isFlagRtl(String bidiParms)
    {
        BidiFlag bf;
        if (null == bfs_U)  bfs_U = new BidiFlagSet(getChars(bidiParms, "U(", DEFAULT_U));
        bf = bfs_U.getOrientation();
        if (BidiFlag.ORIENTATION_RTL == bf)  return true;
        if (BidiFlag.ORIENTATION_CONTEXT_RTL == bf)  return true;
        if (null == bfs_S)  bfs_S = new BidiFlagSet(getChars(bidiParms, "S(", DEFAULT_S));
        bf = bfs_S.getOrientation();
        if (BidiFlag.ORIENTATION_RTL == bf)  return true;
        if (BidiFlag.ORIENTATION_CONTEXT_RTL == bf)  return true;
        return false;
    }
/**
 *  This method checks if a char array contains R or AL characters.
 *  @param  input       The input char array containing text in Unicode.
 *  @param  inStart     Offset in input array.
 *  @param  inEnd       Offset of last byte to be converted.
 *  @return             true if the string needs Bidi transformation.
 */
    private boolean isBidiData(char[] input, int inStart, int inEnd)
    {
        char c;
        for (int i = inStart; i < inEnd; i++)
        {
            c = input[i];
            // Hebrew, Arabic, Syria, Thaana
            if ((c >= 0x0590) && (c <= 0x07BF))  return true;
            // Hebrew Presentation Forms and Arabic Presentation Forms-A
            if ((c >= 0xFB1D) && (c <= 0xFDFF))  return true;
            // Arabic Presentation Forms-B
            if ((c >= 0xFE70) && (c <= 0xFEFC))  return true;
            // RLM
            if (c == 0x200F)  return true;
            // RLE, RLO
            if ((c == 0x202B) || (c == 0x202E))  return true;
        }
        return false;
    }
/**
 *  This method checks if the environment allows Bidi transformations for the current encoding.
 *  @param  bidiParms   The environment variable containing the specifications of the Bidi attributes.
 *  @param  encoding    The encoding of the data outside the JVM.
 *  @return             true if the environment allows Bidi transformations for the current encoding.
 */
    public boolean isBidiEnv(String bidiParms, String encoding)
    {
        if (encoding.equals("Cp850"))  return false;
        Mlog.putLine( "isBidiEnv " +  bidiParms + " encoding=" + encoding );
//        if (bidiParms == null)  return false;
        if (bidiParms == null)  return Mlog.logReturn( false );
//        if (bidiParms.equals("NO"))  return false;
        if (bidiParms.equals("NO"))  return Mlog.logReturn( false );
//        if (!isMatchingEncoding(encoding, bidiParms))  return false;
        if (!isMatchingEncoding(encoding, bidiParms))  return Mlog.logReturn( false );
//        return true;
        return Mlog.logReturn( true );
    }
/**
 *  This method checks if a char array needs Bidi transformation.
 *  @param  input       The input char array containing text in Unicode.
 *  @param  inStart     Offset in input array.
 *  @param  inEnd       Offset of last byte to be converted.
 *  @param  bidiParms   The environment variable containing the specifications of the Bidi attributes.
 *  @param  encoding    The encoding of the data outside the JVM.
 *  @return             true if the string needs Bidi transformation.
 */
    public boolean isBidiData(char[] input, int inStart, int inEnd, String bidiParms, String encoding)
    {
        if (false == isBidiEnv( bidiParms, encoding ))  return false;
        Mlog.putLine( "isBidiData_char[]  inStart=" + inStart + "  inEnd=" + inEnd );
        Mlog.putHexLine( input, inStart, inEnd, true );
//        if (inEnd <= inStart)  return false
        if (inEnd <= inStart)  return Mlog.logReturn( false );
//        if (isFlagRtl(bidiParms))  return true;
        if (isFlagRtl(bidiParms))  return Mlog.logReturn( true );
//        return isBidiData( input, inStart, inEnd );
        return Mlog.logReturn( isBidiData( input, inStart, inEnd ) );
    }
/**
 *  This method checks if a string needs Bidi transformation.
 *  @param  input       The input string.
 *  @param  bidiParms   The environment variable containing the specifications of the Bidi attributes.
 *  @param  encoding    The encoding of the data outside the JVM.
 *  @return             true if the string needs Bidi transformation.
 */
    public boolean isBidiData(String input, String bidiParms, String encoding)
    {
        if (false == isBidiEnv( bidiParms, encoding ))  return false;
        Mlog.putLine( "isBidiData_String" );
        Mlog.putHexLine( input, true );
//        if (input.length() <= 0)  return false
        if (input.length() <= 0)  return Mlog.logReturn( false );
//        if (isFlagRtl(bidiParms))  return true;
        if (isFlagRtl(bidiParms))  return Mlog.logReturn( true );
        char[] text = input.toCharArray();
//        return isBidiData( text, 0, text.length );
        return Mlog.logReturn( isBidiData( text, 0, text.length ) );
    }
/**
 *  This method checks if a CharBuffer needs Bidi transformation.
 *  @param  input       The input buffer.
 *  @param  UseBuffer   flag to prevent use of char array in CharBuffer.
 *  @param  bidiParms   The environment variable containing the specifications of the Bidi attributes.
 *  @param  encoding    The encoding of the data outside the JVM.
 *  @return             true if the buffer needs Bidi transformation.
 */
    public boolean isBidiData(CharBuffer input, boolean UseBuffer, String bidiParms, String encoding)
    {
        char[]          text;
        boolean         flag;
        if (false == isBidiEnv( bidiParms, encoding ))  return false;
        Mlog.putLine( "isBidiData_CharBuffer  UseBuffer=" + UseBuffer +
                      "  position=" + input.position() + "  limit=" + input.limit() );
        Mlog.putHexLine( input.toString(), true );
//        if (!input.hasRemaining())  return false
        if (!input.hasRemaining())  return Mlog.logReturn( false );
//        if (isFlagRtl(bidiParms))  return true;
        if (isFlagRtl(bidiParms))  return Mlog.logReturn( true );
        if (input.hasArray() && !UseBuffer)
        {
            text = input.array();
            int offset = input.arrayOffset();
            flag = isBidiData( text, offset + input.position(), offset + input.limit() );
//            return flag;
            return Mlog.logReturn( flag );
        }
        // We need to save the buffer state, but don't want to use mark() since
        // the caller may have set his own mark.
        int oldPos = input.position();
        text = new char[1];
        flag = false;
        while (input.hasRemaining())
        {
            text[0] = input.get();
            if (isBidiData( text, 0, 1 ))
            {
                flag = true;
                break;
            }
        }
        // Reset the buffer to its original state
        input.position( oldPos );
//        return flag;
        return Mlog.logReturn( flag );
    }
/**
 *  This method creates a char array from a subset of another char array.
 *  @param  input       The input char array.
 *  @param  inStart     Offset in input array.
 *  @param  inEnd       Offset of last byte to be converted.
 *  @return             The char array containing characters from inStart to inEnd.
 */
    private char[] subchars(char[] input, int inStart, int inEnd)
    {
        int len = inEnd - inStart;
        char[] result = new char[len];
        System.arraycopy( input, inStart, result, 0, len );
        return result;
    }
/**
 *  This method converts a char array from the Bidi attributes used outside the
 *  JVM to those used inside the JVM.
 *  @param  input       The input char array.
 *  @param  inStart     Offset in input array.
 *  @param  inEnd       Offset of last byte to be converted.
 *  @param  bidiParms   The environment variable containing the specifications of the Bidi attributes.
 *  @return The input char array transformed to the layout specified by bidiParms.
 */
    private char[] toUnicode(char[] input, int inStart, int inEnd, String bidiParms)
    {
        char[]          charsU, charsS;
        BidiTransform   bdx;
        BidiText        src, dst;
        charsU = getChars(bidiParms, "U(", DEFAULT_U);
        charsS = getChars(bidiParms, "S(", DEFAULT_S);
        bdx = new BidiTransform();
        if (null == bfs_U)  bfs_U = new BidiFlagSet(charsU);
        bdx.flags = bfs_U;
        bdx.roundTrip = ('R' == charsU[5]);
        bdx.options = new ArabicOptionSet(charsU);
        if (null == bfs_S)  bfs_S = new BidiFlagSet(charsS);
        src = new BidiText( bfs_S );
        src.setCharsRef( input, inStart, inEnd - inStart );
        dst = src.transform(bdx);
        return dst.data;
    }
/**
 *  This method converts a char array from the Bidi attributes used outside the
 *  JVM to those used inside the JVM.
 *  @param  input       The input char array.
 *  @param  inStart     Offset in input array.
 *  @param  inEnd       Offset of last byte to be converted.
 *  @param  bidiParms   The environment variable containing the specifications of the Bidi attributes.
 *  @param  encoding    The encoding of the data outside the JVM.
 *  @return The input char array transformed to the layout specified by bidiParms.
 */
    public char[] toUnicode(char[] input, int inStart, int inEnd, String bidiParms, String encoding)
    {
        Mlog.printStackTrace( "toUnicode_char[]") ;
        Mlog.putLine( "toUnicode_char[]  inStart=" + inStart + "  inEnd=" + inEnd );
        Mlog.putHexLine( input, inStart, inEnd, true );
//        if (false == isBidiEnv( bidiParms, encoding ))  return subchars( input, inStart, inEnd);
        if (false == isBidiEnv( bidiParms, encoding ))
            return Mlog.logReturn( subchars( input, inStart, inEnd) );
//        return toUnicode( input, inStart, inEnd, bidiParms );
        return Mlog.logReturn( toUnicode( input, inStart, inEnd, bidiParms ) );
    }
/**
 *  This method converts a string from the Bidi attributes used outside the
 *  JVM to those used inside the JVM.
 *  @param  input       The input string.
 *  @param  bidiParms   The environment variable containing the specifications of the Bidi attributes.
 *  @param  encoding    The encoding of the data outside the JVM.
 *  @return The input string transformed to the layout specified by bidiParms.
 */
    public String toUnicode(String input, String bidiParms, String encoding)
    {
        Mlog.printStackTrace( "toUnicode_String" );
        Mlog.putLine( "toUnicode_String input:" );
        Mlog.putHexLine( input, true );
//        if (false == isBidiEnv( bidiParms, encoding ))  return input;
        if (false == isBidiEnv( bidiParms, encoding ))
            return Mlog.logReturn( input );
//        return new String( toUnicode(input.toCharArray(), 0, input.length(), bidiParms) );
        return new String( Mlog.logReturn( toUnicode(input.toCharArray(), 0, input.length(), bidiParms ) ) );
    }
/**
 *  This method converts a CharBuffer from the Bidi attributes used outside the
 *  JVM to those used inside the JVM.
 *  @param  input       The input buffer.
 *  @param  UseBuffer   flag to prevent use of char array in CharBuffer.
 *  @param  bidiParms   The environment variable containing the specifications of the Bidi attributes.
 *  @param  encoding    The encoding of the data outside the JVM.
 *  @return another CharBuffer with its data input string transformed to the layout specified by bidiParms.
 */
    public CharBuffer toUnicode(CharBuffer input, boolean UseBuffer, String bidiParms, String encoding)
    {
        char[]          text;
        Mlog.printStackTrace( "toUnicode_CharBuffer" );
        Mlog.putLine( "toUnicode_CharBuffer  UseBuffer=" + UseBuffer +
                      "  position=" + input.position() + "  limit=" + input.limit() );
        Mlog.putHexLine( input.toString(), true );
//        if (false == isBidiEnv( bidiParms, encoding ))  return input;
        if (false == isBidiEnv( bidiParms, encoding ))
            return Mlog.logReturn( input );
        if (input.hasArray() && !UseBuffer)
        {
            text = input.array();
            int offset = input.arrayOffset();
            text = toUnicode( text, offset + input.position(), offset + input.limit(), bidiParms );
            Mlog.putLine( "after array: position=" + input.position() + "  limit=" + input.limit() );
//            return CharBuffer.wrap( text );
            return Mlog.logReturn( CharBuffer.wrap( text ) );
        }
        // We need to save the buffer state, but don't want to use mark() since
        // the caller may have set his own mark.
        int oldPos = input.position();
        // Since the Bidi layout code is geared towards char arrays, we have no
        // option but to extract the data for the buffer into an array
        text = new char[input.remaining()];
        input.get( text );
        // Reset the buffer to its original state
        input.position( oldPos );
        text = toUnicode( text, 0, text.length, bidiParms );
        Mlog.putLine( "after buffer: position=" + input.position() + "  limit=" + input.limit() );
//        return CharBuffer.wrap( text );
        return Mlog.logReturn( CharBuffer.wrap( text ) );
    }
/**
 *  This method converts a char array from the Bidi attributes used inside the
 *  JVM to those used outside the JVM.
 *  @param  input       The input char array.
 *  @param  inStart     Offset in input array.
 *  @param  inEnd       Offset of last byte to be converted.
 *  @param  bidiParms   The environment variable containing the specifications of the Bidi attributes.
 *  @return The input char array transformed to the layout specified by bidiParms.
 */
    private char[] toSbcs(char[] input, int inStart, int inEnd, String bidiParms)
    {
        char[]          charsU, charsS;
        BidiTransform   bdx;
        BidiText        src, dst;
        charsU = getChars(bidiParms, "U(", DEFAULT_U);
        charsS = getChars(bidiParms, "S(", DEFAULT_S);
        bdx = new BidiTransform();
        if (null == bfs_S)  bfs_S = new BidiFlagSet(charsS);
        bdx.flags = bfs_S;
        bdx.roundTrip = ('R' == charsS[5]);
        bdx.options = new ArabicOptionSet(charsS);
        if (null == bfs_U)  bfs_U = new BidiFlagSet(charsU);
        src = new BidiText( bfs_U );
        src.setCharsRef( input, inStart, inEnd - inStart );
        dst = src.transform(bdx);
        return dst.data;
    }
/**
 *  This method converts a char array from the Bidi attributes used inside the
 *  JVM to those used outside the JVM.
 *  @param  input       The input char array.
 *  @param  inStart     Offset in input array.
 *  @param  inEnd       Offset of last byte to be converted.
 *  @param  bidiParms   The environment variable containing the specifications of the Bidi attributes.
 *  @param  encoding    The encoding of the data outside the JVM.
 *  @return The input char array transformed to the layout specified by bidiParms.
 */
    public char[] toSbcs(char[] input, int inStart, int inEnd, String bidiParms, String encoding)
    {
        BidiTransform   bdx;
        BidiText        src, dst;
        Mlog.printStackTrace( "toSbcs_char[]" );
        Mlog.putLine( "toSbcs_char[]  inStart=" + inStart + "  inEnd=" + inEnd );
        Mlog.putHexLine( input, inStart, inEnd, true );
//        if (false == isBidiEnv( bidiParms, encoding ))  return subchars( input, inStart, inEnd);
        if (false == isBidiEnv( bidiParms, encoding ))
            return Mlog.logReturn( subchars( input, inStart, inEnd) );
//        return toSbcs( input, inStart, inEnd, bidiParms );
        return Mlog.logReturn( toSbcs( input, inStart, inEnd, bidiParms ) );
    }
/**
 *  This method converts a string from the Bidi attributes used inside the
 *  JVM to those used outside the JVM.
 *  @param  input       The input string.
 *  @param  bidiParms   The environment variable containing the specifications of the Bidi attributes.
 *  @param  encoding    The encoding of the data outside the JVM.
 *  @return The input string transformed to the layout specified by bidiParms.
 */
    public String toSbcs(String input, String bidiParms, String encoding)
    {
        Mlog.printStackTrace( "toSbcs_String input:" );
        Mlog.putHexLine( input, true );
//        if (false == isBidiEnv( bidiParms, encoding ))  return input;
        if (false == isBidiEnv( bidiParms, encoding ))
            return Mlog.logReturn( input );
//        return new String( toSbcs( input.toCharArray(), 0, input.length(), bidiParms ) );
        return Mlog.logReturn( new String( toSbcs( input.toCharArray(), 0, input.length(), bidiParms ) ) );
    }
/**
 *  This method converts a CharBuffer from the Bidi attributes used inside the
 *  JVM to those used outside the JVM.
 *  @param  input       The input buffer.
 *  @param  UseBuffer   flag to prevent use of char array in CharBuffer.
 *  @param  bidiParms   The environment variable containing the specifications of the Bidi attributes.
 *  @param  encoding    The encoding of the data outside the JVM.
 *  @return another CharBuffer with its data input string transformed to the layout specified by bidiParms.
 */
    public CharBuffer toSbcs(CharBuffer input, boolean UseBuffer, String bidiParms, String encoding)
    {
        char[]          text;
        Mlog.printStackTrace( "toSbcs_CharBuffer" );
        Mlog.putLine( "toSbcs_CharBuffer  UseBuffer=" + UseBuffer +
                      "  position=" + input.position() + "  limit=" + input.limit() );
        Mlog.putHexLine( input.toString(), true );
//        if (false == isBidiEnv( bidiParms, encoding ))  return input;
        if (false == isBidiEnv( bidiParms, encoding ))
            return Mlog.logReturn( input );
        if (input.hasArray() && !UseBuffer)
        {
            text = input.array();
            int offset = input.arrayOffset();
            text = toSbcs( text, offset + input.position(), offset + input.limit(), bidiParms );
            Mlog.putLine( "after array: position=" + input.position() + "  limit=" + input.limit() );
//            return CharBuffer.wrap( text );
            return Mlog.logReturn( CharBuffer.wrap( text ) );
        }
        // We need to save the buffer state, but don't want to use mark() since
        // the caller may have set his own mark.
        int oldPos = input.position();
        // Since the Bidi layout code is geared towards char arrays, we have no
        // option but to extract the data for the buffer into an array
        text = new char[input.remaining()];
        input.get( text );
        // Reset the buffer to its original state
        input.position( oldPos );
        text = toSbcs( text, 0, text.length, bidiParms );
        Mlog.putLine( "after buffer: position=" + input.position() + "  limit=" + input.limit() );
//        return CharBuffer.wrap( text );
        return Mlog.logReturn( CharBuffer.wrap( text ) );
    }
}