com.adobe.agl.charset.CharsetEncoderICU Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of aem-sdk-api Show documentation
The Adobe Experience Manager SDK
There is a newer version: 2024.11.18751.20241128T090041Z-241100
/**
 *******************************************************************************
 * Copyright (C) 2006-2008, International Business Machines Corporation and    *
 * others. All Rights Reserved.                                                *
 *******************************************************************************
 *
 *******************************************************************************
 */
/*
 * File: CharsetEncoderICU.java
 * ************************************************************************
 *
 * ADOBE CONFIDENTIAL
 * ___________________
 *
 *  Copyright 2012 Adobe Systems Incorporated
 *  All Rights Reserved.
 *
 * NOTICE:  All information contained herein is, and remains
 * the property of Adobe Systems Incorporated and its suppliers,
 * if any.  The intellectual and technical concepts contained
 * herein are proprietary to Adobe Systems Incorporated and its
 * suppliers and are protected by trade secret or copyright law.
 * Dissemination of this information or reproduction of this material
 * is strictly forbidden unless prior written permission is obtained
 * from Adobe Systems Incorporated.
 **************************************************************************/

package com.adobe.agl.charset;

import java.nio.BufferOverflowException;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.IntBuffer;
import java.nio.charset.CharsetEncoder;
import java.nio.charset.CoderResult;

import com.adobe.agl.text.UTF16;

/**
 * An abstract class that provides framework methods of decoding operations for concrete
 * subclasses. 
 * In the future this class will contain API that will implement converter semantics of ICU4C.
 * @stable ICU 3.6
 */
public abstract class CharsetEncoderICU extends CharsetEncoder {

    /* this is used in fromUnicode DBCS tables as an "unassigned" marker */
    static final char MISSING_CHAR_MARKER = '\uFFFF';

    byte[] errorBuffer = new byte[30];

    int errorBufferLength = 0;

    /** these are for encodeLoopICU */
    int fromUnicodeStatus;

    int fromUChar32;

    boolean useSubChar1;

    boolean useFallback;

    /* maximum number of indexed UChars */
    static final int EXT_MAX_UCHARS = 19;

    /* store previous UChars/chars to continue partial matches */
    int preFromUFirstCP; /* >=0: partial match */

    char[] preFromUArray = new char[EXT_MAX_UCHARS];

    int preFromUBegin;

    int preFromULength; /* negative: replay */

    char[] invalidUCharBuffer = new char[2];

    int invalidUCharLength;

    Object fromUContext;

    /*
     * Construcs a new encoder for the given charset
     * 
     * @param cs
     *            for which the decoder is created
     * @param replacement
     *            the substitution bytes
     */
    CharsetEncoderICU(CharsetICU cs, byte[] replacement) {
        super(cs, (cs.minBytesPerChar + cs.maxBytesPerChar) / 2,
                cs.maxBytesPerChar, replacement);
    }

    /**
     * Is this Encoder allowed to use fallbacks? A fallback mapping is a mapping
     * that will convert a Unicode codepoint sequence to a byte sequence, but
     * the encoded byte sequence will round trip convert to a different
     * Unicode codepoint sequence.
     * @return true if the converter uses fallback, false otherwise.
     * @stable ICU 3.8
     */
    public boolean isFallbackUsed() {
        return useFallback;
    }

    /**
     * Sets whether this Encoder can use fallbacks?
     * @param usesFallback true if the user wants the converter to take
     *  advantage of the fallback mapping, false otherwise.
     * @stable ICU 3.8
     */
    public void setFallbackUsed(boolean usesFallback) {
        useFallback = usesFallback;
    }

   
    /**
     * Sets fromUContext used in callbacks.
     * 
     * @param newContext Object
     * @exception IllegalArgumentException
     * @draft ICU 4.0
     * @provisional This API might change or be removed in a future release.
     */
    public final void setFromUContext(Object newContext) {
        fromUContext = newContext;
    }

    private static final CharBuffer EMPTY = CharBuffer.allocate(0);


    /**
     * Resets the from Unicode mode of converter
     * @stable ICU 3.6
     */
    protected void implReset() {
        errorBufferLength = 0;
        fromUnicodeStatus = 0;
        fromUChar32 = 0;
        fromUnicodeReset();
    }

    private void fromUnicodeReset() {
        preFromUBegin = 0;
        preFromUFirstCP = UConverterConstants.U_SENTINEL;
        preFromULength = 0;
    }

    /*
     * Implements ICU semantics of buffer management
     * @param source
     * @param target
     * @param offsets
     * @return A CoderResult object that contains the error result when an error occurs.
     */
    abstract CoderResult encodeLoop(CharBuffer source, ByteBuffer target,
            IntBuffer offsets, boolean flush);


    /*
     * Ascertains if a given Unicode code point (32bit value for handling surrogates)
     * can be converted to the target encoding. If the caller wants to test if a
     * surrogate pair can be converted to target encoding then the
     * responsibility of assembling the int value lies with the caller.
     * For assembling a code point the caller can use UTF16 class of ICU4J and do something like:
     *      *  while(i
     * or
     *      *  String src = new String(mySource);
     *  int i,codepoint;
     *  boolean passed = false;
     *  while(i0xfff)? 2:1;
     *      if(!(CharsetEncoderICU) myConv).canEncode(codepoint)){
     *          passed = false;
     *      }
     *  }
     * 
     *
     * @param codepoint Unicode code point as int value
     * @return true if a character can be converted
     */
    /* TODO This is different from Java's canEncode(char) API.
     * ICU's API should implement getUnicodeSet,
     * and override canEncode(char) which queries getUnicodeSet.
     * The getUnicodeSet should return a frozen UnicodeSet or use a fillin parameter, like ICU4C.
     */
    /*public boolean canEncode(int codepoint) {
        return true;
    }*/
    /**
     * @param codepoint Unicode code point as int array
     * @return true if the input sequence can be converted
     */
    public boolean canEncode(int[] codepoint) {
	return true;
    }
    /**
     * Overrides super class method
     * @stable ICU 3.6 
     */
    public boolean isLegalReplacement(byte[] repl) {
        return true;
    }

    /*
     * Writes out the specified output bytes to the target byte buffer or to converter internal buffers.
     * @param cnv
     * @param bytesArray
     * @param bytesBegin
     * @param bytesLength
     * @param out
     * @param offsets
     * @param sourceIndex
     * @return A CoderResult object that contains the error result when an error occurs.
     */
    static final CoderResult fromUWriteBytes(CharsetEncoderICU cnv,
            byte[] bytesArray, int bytesBegin, int bytesLength, ByteBuffer out,
            IntBuffer offsets, int sourceIndex) {

        //write bytes
        int obl = bytesLength;
        CoderResult cr = CoderResult.UNDERFLOW;
        int bytesLimit = bytesBegin + bytesLength;
        try {
            for (; bytesBegin < bytesLimit;) {
                out.put(bytesArray[bytesBegin]);
                bytesBegin++;
            }
            // success 
            bytesLength = 0;
        } catch (BufferOverflowException ex) {
            cr = CoderResult.OVERFLOW;
        }

        if (offsets != null) {
            while (obl > bytesLength) {
                offsets.put(sourceIndex);
                --obl;
            }
        }
        //write overflow 
        cnv.errorBufferLength = bytesLimit - bytesBegin;
        if (cnv.errorBufferLength > 0) {
            int index = 0;
            while (bytesBegin < bytesLimit) {
                cnv.errorBuffer[index++] = bytesArray[bytesBegin++];
            }
            cr = CoderResult.OVERFLOW;
        }
        return cr;
    }

    /*
     * Returns the number of chars held in the converter's internal state
     * because more input is needed for completing the conversion. This function is 
     * useful for mapping semantics of ICU's converter interface to those of iconv,
     * and this information is not needed for normal conversion.
     * @return The number of chars in the state. -1 if an error is encountered.
     */
    /*public*/int fromUCountPending() {
        if (preFromULength > 0) {
            return UTF16.getCharCount(preFromUFirstCP) + preFromULength;
        } else if (preFromULength < 0) {
            return -preFromULength;
        } else if (fromUChar32 > 0) {
            return 1;
        } else if (preFromUFirstCP > 0) {
            return UTF16.getCharCount(preFromUFirstCP);
        }
        return 0;
    }

    /**
     * 
     * @param source
     */
    private final void setSourcePosition(CharBuffer source) {

        // ok was there input held in the previous invocation of encodeLoop 
        // that resulted in output in this invocation?
        source.position(source.position() - fromUCountPending());
    }

    /*
     * Write the codepage substitution character.
     * Subclasses to override this method.
     * For stateful converters, it is typically necessary to handle this
     * specificially for the converter in order to properly maintain the state.
     * @param source The input character buffer
     * @param target The output byte buffer
     * @param offsets
     * @return A CoderResult object that contains the error result when an error occurs.
     */
    CoderResult cbFromUWriteSub(CharsetEncoderICU encoder, CharBuffer source,
            ByteBuffer target, IntBuffer offsets) {
        CharsetICU cs = (CharsetICU) encoder.charset();
        byte[] sub = encoder.replacement();
        if (cs.subChar1 != 0 && encoder.invalidUCharBuffer[0] <= 0xff) {
            return CharsetEncoderICU.fromUWriteBytes(encoder,
                    new byte[] { cs.subChar1 }, 0, 1, target, offsets, source
                            .position());
        } else {
            return CharsetEncoderICU.fromUWriteBytes(encoder, sub, 0,
                    sub.length, target, offsets, source.position());
        }
    }

}