All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.adobe.agl.charset.CharsetEncoderICU Maven / Gradle / Ivy

There is a newer version: 2024.11.18751.20241128T090041Z-241100
Show newest version
/**
 *******************************************************************************
 * Copyright (C) 2006-2008, International Business Machines Corporation and    *
 * others. All Rights Reserved.                                                *
 *******************************************************************************
 *
 *******************************************************************************
 */
/*
 * File: CharsetEncoderICU.java
 * ************************************************************************
 *
 * ADOBE CONFIDENTIAL
 * ___________________
 *
 *  Copyright 2012 Adobe Systems Incorporated
 *  All Rights Reserved.
 *
 * NOTICE:  All information contained herein is, and remains
 * the property of Adobe Systems Incorporated and its suppliers,
 * if any.  The intellectual and technical concepts contained
 * herein are proprietary to Adobe Systems Incorporated and its
 * suppliers and are protected by trade secret or copyright law.
 * Dissemination of this information or reproduction of this material
 * is strictly forbidden unless prior written permission is obtained
 * from Adobe Systems Incorporated.
 **************************************************************************/

package com.adobe.agl.charset;

import java.nio.BufferOverflowException;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.IntBuffer;
import java.nio.charset.CharsetEncoder;
import java.nio.charset.CoderResult;

import com.adobe.agl.text.UTF16;

/**
 * An abstract class that provides framework methods of decoding operations for concrete
 * subclasses. 
 * In the future this class will contain API that will implement converter semantics of ICU4C.
 * @stable ICU 3.6
 */
public abstract class CharsetEncoderICU extends CharsetEncoder {

    /* this is used in fromUnicode DBCS tables as an "unassigned" marker */
    static final char MISSING_CHAR_MARKER = '\uFFFF';

    byte[] errorBuffer = new byte[30];

    int errorBufferLength = 0;

    /** these are for encodeLoopICU */
    int fromUnicodeStatus;

    int fromUChar32;

    boolean useSubChar1;

    boolean useFallback;

    /* maximum number of indexed UChars */
    static final int EXT_MAX_UCHARS = 19;

    /* store previous UChars/chars to continue partial matches */
    int preFromUFirstCP; /* >=0: partial match */

    char[] preFromUArray = new char[EXT_MAX_UCHARS];

    int preFromUBegin;

    int preFromULength; /* negative: replay */

    char[] invalidUCharBuffer = new char[2];

    int invalidUCharLength;

    Object fromUContext;

    /*
     * Construcs a new encoder for the given charset
     * 
     * @param cs
     *            for which the decoder is created
     * @param replacement
     *            the substitution bytes
     */
    CharsetEncoderICU(CharsetICU cs, byte[] replacement) {
        super(cs, (cs.minBytesPerChar + cs.maxBytesPerChar) / 2,
                cs.maxBytesPerChar, replacement);
    }

    /**
     * Is this Encoder allowed to use fallbacks? A fallback mapping is a mapping
     * that will convert a Unicode codepoint sequence to a byte sequence, but
     * the encoded byte sequence will round trip convert to a different
     * Unicode codepoint sequence.
     * @return true if the converter uses fallback, false otherwise.
     * @stable ICU 3.8
     */
    public boolean isFallbackUsed() {
        return useFallback;
    }

    /**
     * Sets whether this Encoder can use fallbacks?
     * @param usesFallback true if the user wants the converter to take
     *  advantage of the fallback mapping, false otherwise.
     * @stable ICU 3.8
     */
    public void setFallbackUsed(boolean usesFallback) {
        useFallback = usesFallback;
    }

   
    /**
     * Sets fromUContext used in callbacks.
     * 
     * @param newContext Object
     * @exception IllegalArgumentException
     * @draft ICU 4.0
     * @provisional This API might change or be removed in a future release.
     */
    public final void setFromUContext(Object newContext) {
        fromUContext = newContext;
    }

    private static final CharBuffer EMPTY = CharBuffer.allocate(0);


    /**
     * Resets the from Unicode mode of converter
     * @stable ICU 3.6
     */
    protected void implReset() {
        errorBufferLength = 0;
        fromUnicodeStatus = 0;
        fromUChar32 = 0;
        fromUnicodeReset();
    }

    private void fromUnicodeReset() {
        preFromUBegin = 0;
        preFromUFirstCP = UConverterConstants.U_SENTINEL;
        preFromULength = 0;
    }

    /*
     * Implements ICU semantics of buffer management
     * @param source
     * @param target
     * @param offsets
     * @return A CoderResult object that contains the error result when an error occurs.
     */
    abstract CoderResult encodeLoop(CharBuffer source, ByteBuffer target,
            IntBuffer offsets, boolean flush);


    /*
     * Ascertains if a given Unicode code point (32bit value for handling surrogates)
     * can be converted to the target encoding. If the caller wants to test if a
     * surrogate pair can be converted to target encoding then the
     * responsibility of assembling the int value lies with the caller.
     * For assembling a code point the caller can use UTF16 class of ICU4J and do something like:
     * 
     *  while(i
     * or
     * 
     *  String src = new String(mySource);
     *  int i,codepoint;
     *  boolean passed = false;
     *  while(i0xfff)? 2:1;
     *      if(!(CharsetEncoderICU) myConv).canEncode(codepoint)){
     *          passed = false;
     *      }
     *  }
     * 
* * @param codepoint Unicode code point as int value * @return true if a character can be converted */ /* TODO This is different from Java's canEncode(char) API. * ICU's API should implement getUnicodeSet, * and override canEncode(char) which queries getUnicodeSet. * The getUnicodeSet should return a frozen UnicodeSet or use a fillin parameter, like ICU4C. */ /*public boolean canEncode(int codepoint) { return true; }*/ /** * @param codepoint Unicode code point as int array * @return true if the input sequence can be converted */ public boolean canEncode(int[] codepoint) { return true; } /** * Overrides super class method * @stable ICU 3.6 */ public boolean isLegalReplacement(byte[] repl) { return true; } /* * Writes out the specified output bytes to the target byte buffer or to converter internal buffers. * @param cnv * @param bytesArray * @param bytesBegin * @param bytesLength * @param out * @param offsets * @param sourceIndex * @return A CoderResult object that contains the error result when an error occurs. */ static final CoderResult fromUWriteBytes(CharsetEncoderICU cnv, byte[] bytesArray, int bytesBegin, int bytesLength, ByteBuffer out, IntBuffer offsets, int sourceIndex) { //write bytes int obl = bytesLength; CoderResult cr = CoderResult.UNDERFLOW; int bytesLimit = bytesBegin + bytesLength; try { for (; bytesBegin < bytesLimit;) { out.put(bytesArray[bytesBegin]); bytesBegin++; } // success bytesLength = 0; } catch (BufferOverflowException ex) { cr = CoderResult.OVERFLOW; } if (offsets != null) { while (obl > bytesLength) { offsets.put(sourceIndex); --obl; } } //write overflow cnv.errorBufferLength = bytesLimit - bytesBegin; if (cnv.errorBufferLength > 0) { int index = 0; while (bytesBegin < bytesLimit) { cnv.errorBuffer[index++] = bytesArray[bytesBegin++]; } cr = CoderResult.OVERFLOW; } return cr; } /* * Returns the number of chars held in the converter's internal state * because more input is needed for completing the conversion. This function is * useful for mapping semantics of ICU's converter interface to those of iconv, * and this information is not needed for normal conversion. * @return The number of chars in the state. -1 if an error is encountered. */ /*public*/int fromUCountPending() { if (preFromULength > 0) { return UTF16.getCharCount(preFromUFirstCP) + preFromULength; } else if (preFromULength < 0) { return -preFromULength; } else if (fromUChar32 > 0) { return 1; } else if (preFromUFirstCP > 0) { return UTF16.getCharCount(preFromUFirstCP); } return 0; } /** * * @param source */ private final void setSourcePosition(CharBuffer source) { // ok was there input held in the previous invocation of encodeLoop // that resulted in output in this invocation? source.position(source.position() - fromUCountPending()); } /* * Write the codepage substitution character. * Subclasses to override this method. * For stateful converters, it is typically necessary to handle this * specificially for the converter in order to properly maintain the state. * @param source The input character buffer * @param target The output byte buffer * @param offsets * @return A CoderResult object that contains the error result when an error occurs. */ CoderResult cbFromUWriteSub(CharsetEncoderICU encoder, CharBuffer source, ByteBuffer target, IntBuffer offsets) { CharsetICU cs = (CharsetICU) encoder.charset(); byte[] sub = encoder.replacement(); if (cs.subChar1 != 0 && encoder.invalidUCharBuffer[0] <= 0xff) { return CharsetEncoderICU.fromUWriteBytes(encoder, new byte[] { cs.subChar1 }, 0, 1, target, offsets, source .position()); } else { return CharsetEncoderICU.fromUWriteBytes(encoder, sub, 0, sub.length, target, offsets, source.position()); } } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy