com.adobe.agl.charset.CharsetEncoderICU Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of aem-sdk-api Show documentation
Show all versions of aem-sdk-api Show documentation
The Adobe Experience Manager SDK
/**
*******************************************************************************
* Copyright (C) 2006-2008, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*
*******************************************************************************
*/
/*
* File: CharsetEncoderICU.java
* ************************************************************************
*
* ADOBE CONFIDENTIAL
* ___________________
*
* Copyright 2012 Adobe Systems Incorporated
* All Rights Reserved.
*
* NOTICE: All information contained herein is, and remains
* the property of Adobe Systems Incorporated and its suppliers,
* if any. The intellectual and technical concepts contained
* herein are proprietary to Adobe Systems Incorporated and its
* suppliers and are protected by trade secret or copyright law.
* Dissemination of this information or reproduction of this material
* is strictly forbidden unless prior written permission is obtained
* from Adobe Systems Incorporated.
**************************************************************************/
package com.adobe.agl.charset;
import java.nio.BufferOverflowException;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.IntBuffer;
import java.nio.charset.CharsetEncoder;
import java.nio.charset.CoderResult;
import com.adobe.agl.text.UTF16;
/**
* An abstract class that provides framework methods of decoding operations for concrete
* subclasses.
* In the future this class will contain API that will implement converter semantics of ICU4C.
* @stable ICU 3.6
*/
public abstract class CharsetEncoderICU extends CharsetEncoder {
/* this is used in fromUnicode DBCS tables as an "unassigned" marker */
static final char MISSING_CHAR_MARKER = '\uFFFF';
byte[] errorBuffer = new byte[30];
int errorBufferLength = 0;
/** these are for encodeLoopICU */
int fromUnicodeStatus;
int fromUChar32;
boolean useSubChar1;
boolean useFallback;
/* maximum number of indexed UChars */
static final int EXT_MAX_UCHARS = 19;
/* store previous UChars/chars to continue partial matches */
int preFromUFirstCP; /* >=0: partial match */
char[] preFromUArray = new char[EXT_MAX_UCHARS];
int preFromUBegin;
int preFromULength; /* negative: replay */
char[] invalidUCharBuffer = new char[2];
int invalidUCharLength;
Object fromUContext;
/*
* Construcs a new encoder for the given charset
*
* @param cs
* for which the decoder is created
* @param replacement
* the substitution bytes
*/
CharsetEncoderICU(CharsetICU cs, byte[] replacement) {
super(cs, (cs.minBytesPerChar + cs.maxBytesPerChar) / 2,
cs.maxBytesPerChar, replacement);
}
/**
* Is this Encoder allowed to use fallbacks? A fallback mapping is a mapping
* that will convert a Unicode codepoint sequence to a byte sequence, but
* the encoded byte sequence will round trip convert to a different
* Unicode codepoint sequence.
* @return true if the converter uses fallback, false otherwise.
* @stable ICU 3.8
*/
public boolean isFallbackUsed() {
return useFallback;
}
/**
* Sets whether this Encoder can use fallbacks?
* @param usesFallback true if the user wants the converter to take
* advantage of the fallback mapping, false otherwise.
* @stable ICU 3.8
*/
public void setFallbackUsed(boolean usesFallback) {
useFallback = usesFallback;
}
/**
* Sets fromUContext used in callbacks.
*
* @param newContext Object
* @exception IllegalArgumentException
* @draft ICU 4.0
* @provisional This API might change or be removed in a future release.
*/
public final void setFromUContext(Object newContext) {
fromUContext = newContext;
}
private static final CharBuffer EMPTY = CharBuffer.allocate(0);
/**
* Resets the from Unicode mode of converter
* @stable ICU 3.6
*/
protected void implReset() {
errorBufferLength = 0;
fromUnicodeStatus = 0;
fromUChar32 = 0;
fromUnicodeReset();
}
private void fromUnicodeReset() {
preFromUBegin = 0;
preFromUFirstCP = UConverterConstants.U_SENTINEL;
preFromULength = 0;
}
/*
* Implements ICU semantics of buffer management
* @param source
* @param target
* @param offsets
* @return A CoderResult object that contains the error result when an error occurs.
*/
abstract CoderResult encodeLoop(CharBuffer source, ByteBuffer target,
IntBuffer offsets, boolean flush);
/*
* Ascertains if a given Unicode code point (32bit value for handling surrogates)
* can be converted to the target encoding. If the caller wants to test if a
* surrogate pair can be converted to target encoding then the
* responsibility of assembling the int value lies with the caller.
* For assembling a code point the caller can use UTF16 class of ICU4J and do something like:
*
* while(i
* or
*
* String src = new String(mySource);
* int i,codepoint;
* boolean passed = false;
* while(i0xfff)? 2:1;
* if(!(CharsetEncoderICU) myConv).canEncode(codepoint)){
* passed = false;
* }
* }
*
*
* @param codepoint Unicode code point as int value
* @return true if a character can be converted
*/
/* TODO This is different from Java's canEncode(char) API.
* ICU's API should implement getUnicodeSet,
* and override canEncode(char) which queries getUnicodeSet.
* The getUnicodeSet should return a frozen UnicodeSet or use a fillin parameter, like ICU4C.
*/
/*public boolean canEncode(int codepoint) {
return true;
}*/
/**
* @param codepoint Unicode code point as int array
* @return true if the input sequence can be converted
*/
public boolean canEncode(int[] codepoint) {
return true;
}
/**
* Overrides super class method
* @stable ICU 3.6
*/
public boolean isLegalReplacement(byte[] repl) {
return true;
}
/*
* Writes out the specified output bytes to the target byte buffer or to converter internal buffers.
* @param cnv
* @param bytesArray
* @param bytesBegin
* @param bytesLength
* @param out
* @param offsets
* @param sourceIndex
* @return A CoderResult object that contains the error result when an error occurs.
*/
static final CoderResult fromUWriteBytes(CharsetEncoderICU cnv,
byte[] bytesArray, int bytesBegin, int bytesLength, ByteBuffer out,
IntBuffer offsets, int sourceIndex) {
//write bytes
int obl = bytesLength;
CoderResult cr = CoderResult.UNDERFLOW;
int bytesLimit = bytesBegin + bytesLength;
try {
for (; bytesBegin < bytesLimit;) {
out.put(bytesArray[bytesBegin]);
bytesBegin++;
}
// success
bytesLength = 0;
} catch (BufferOverflowException ex) {
cr = CoderResult.OVERFLOW;
}
if (offsets != null) {
while (obl > bytesLength) {
offsets.put(sourceIndex);
--obl;
}
}
//write overflow
cnv.errorBufferLength = bytesLimit - bytesBegin;
if (cnv.errorBufferLength > 0) {
int index = 0;
while (bytesBegin < bytesLimit) {
cnv.errorBuffer[index++] = bytesArray[bytesBegin++];
}
cr = CoderResult.OVERFLOW;
}
return cr;
}
/*
* Returns the number of chars held in the converter's internal state
* because more input is needed for completing the conversion. This function is
* useful for mapping semantics of ICU's converter interface to those of iconv,
* and this information is not needed for normal conversion.
* @return The number of chars in the state. -1 if an error is encountered.
*/
/*public*/int fromUCountPending() {
if (preFromULength > 0) {
return UTF16.getCharCount(preFromUFirstCP) + preFromULength;
} else if (preFromULength < 0) {
return -preFromULength;
} else if (fromUChar32 > 0) {
return 1;
} else if (preFromUFirstCP > 0) {
return UTF16.getCharCount(preFromUFirstCP);
}
return 0;
}
/**
*
* @param source
*/
private final void setSourcePosition(CharBuffer source) {
// ok was there input held in the previous invocation of encodeLoop
// that resulted in output in this invocation?
source.position(source.position() - fromUCountPending());
}
/*
* Write the codepage substitution character.
* Subclasses to override this method.
* For stateful converters, it is typically necessary to handle this
* specificially for the converter in order to properly maintain the state.
* @param source The input character buffer
* @param target The output byte buffer
* @param offsets
* @return A CoderResult object that contains the error result when an error occurs.
*/
CoderResult cbFromUWriteSub(CharsetEncoderICU encoder, CharBuffer source,
ByteBuffer target, IntBuffer offsets) {
CharsetICU cs = (CharsetICU) encoder.charset();
byte[] sub = encoder.replacement();
if (cs.subChar1 != 0 && encoder.invalidUCharBuffer[0] <= 0xff) {
return CharsetEncoderICU.fromUWriteBytes(encoder,
new byte[] { cs.subChar1 }, 0, 1, target, offsets, source
.position());
} else {
return CharsetEncoderICU.fromUWriteBytes(encoder, sub, 0,
sub.length, target, offsets, source.position());
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy