All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.fop.afp.fonts.CharactersetEncoder Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

/* $Id$ */

package org.apache.fop.afp.fonts;

import java.io.IOException;
import java.io.OutputStream;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.CharacterCodingException;
import java.nio.charset.Charset;
import java.nio.charset.CharsetEncoder;
import java.nio.charset.CodingErrorAction;

/**
 * An abstraction that wraps the encoding mechanism for encoding a Unicode character sequence into a
 * specified format.
 */
public abstract class CharactersetEncoder {

    private final CharsetEncoder encoder;

    private CharactersetEncoder(String encoding) {
        this.encoder = Charset.forName(encoding).newEncoder();
        this.encoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
    }

    /**
     * Tells whether or not this encoder can encode the given character.
     *
     * @param c the character
     * @return true if, and only if, this encoder can encode the given character
     * @throws IllegalStateException - If an encoding operation is already in progress
     */
    final boolean canEncode(char c) {
        return encoder.canEncode(c);
    }

    /**
     * Encodes a character sequence to a byte array.
     *
     * @param chars the character sequence
     * @return the encoded character sequence
     * @throws CharacterCodingException if the encoding operation fails
     */
    final EncodedChars encode(CharSequence chars) throws CharacterCodingException {
        ByteBuffer bb;
        // encode method is not thread safe
        synchronized (encoder) {
            bb = encoder.encode(CharBuffer.wrap(chars));
        }
        if (bb.hasArray()) {
            return getEncodedChars(bb.array(), bb.limit());
        } else {
            bb.rewind();
            byte[] bytes = new byte[bb.remaining()];
            bb.get(bytes);
            return getEncodedChars(bytes, bytes.length);
        }
    }

    abstract EncodedChars getEncodedChars(byte[] byteArray, int length);

    /**
     * Encodes chars into a format specified by encoding.
     *
     * @param chars the character sequence
     * @param encoding the encoding type
     * @return encoded data
     * @throws CharacterCodingException if encoding fails
     */
    public static EncodedChars encodeSBCS(CharSequence chars, String encoding)
            throws CharacterCodingException {
        CharactersetEncoder encoder = CharacterSetType.SINGLE_BYTE.getEncoder(encoding);
        return encoder.encode(chars);
    }

    /**
     * The EBCDIC double byte encoder is used for encoding IBM format DBCS (double byte character
     * sets) with an EBCDIC code-page. Given a double byte EBCDIC code page and a Unicode character
     * sequence it will return its EBCDIC code-point, however, the "Shift In - Shift Out" operators
     * are removed from the sequence of bytes. These are only used in Line Data.
     */
    static final class EbcdicDoubleByteLineDataEncoder extends CharactersetEncoder {
        EbcdicDoubleByteLineDataEncoder(String encoding) {
            super(encoding);
        }
        @Override
        EncodedChars getEncodedChars(byte[] byteArray, int length) {
            if (byteArray[0] == 0x0E && byteArray[length - 1] == 0x0F) {
                return new EncodedChars(byteArray, 1, length - 2, true);
            }
            return new EncodedChars(byteArray, true);
        }
    }

    /**
     * The default encoder is used for encoding IBM format SBCS (single byte character sets), this
     * the primary format for most Latin character sets. This can also be used for Unicode double-
     * byte character sets (DBCS).
     */
    static final class DefaultEncoder extends CharactersetEncoder {
        private final boolean isDBCS;

        DefaultEncoder(String encoding, boolean isDBCS) {
            super(encoding);
            this.isDBCS = isDBCS;
        }

        @Override
        EncodedChars getEncodedChars(byte[] byteArray, int length) {
            return new EncodedChars(byteArray, isDBCS);
        }
    }

    /**
     * A container for encoded character bytes
     */
    // CSOFF: FinalClass - disabling "final" modifier so that this class can be mocked
    public static class EncodedChars {

        private final byte[] bytes;
        private final int offset;
        private final int length;
        private final boolean isDBCS;

        private EncodedChars(byte[] bytes, int offset, int length, boolean isDBCS) {
            if (offset < 0 || length < 0 || offset + length > bytes.length) {
                throw new IllegalArgumentException();
            }
            this.bytes = bytes;
            this.offset = offset;
            this.length = length;
            this.isDBCS = isDBCS;
        }

        private EncodedChars(byte[] bytes, boolean isDBCS) {
            this(bytes, 0, bytes.length, isDBCS);
        }

        /**
         * write length bytes from offset to the output stream
         *
         * @param out output to write the bytes to
         * @param offset the offset where to write
         * @param length the length to write
         * @throws IOException if an I/O error occurs
         */
        public void writeTo(OutputStream out, int offset, int length) throws IOException {
            if (offset < 0 || length < 0 || offset + length > bytes.length) {
                throw new IllegalArgumentException();
            }
            out.write(bytes, this.offset + offset, length);
        }

        /**
         * The number of containing bytes.
         *
         * @return the length
         */
        public int getLength() {
            return length;
        }

        /**
         * Indicates whether or not the EncodedChars object wraps double byte characters.
         *
         * @return true if the wrapped characters are double byte (DBCSs)
         */
        public boolean isDBCS() {
            return isDBCS;
        }

        /**
         * The bytes
         *
         * @return the bytes
         */
        public byte[] getBytes() {
            // return copy just in case
            byte[] copy = new byte[bytes.length];
            System.arraycopy(bytes, 0, copy, 0, bytes.length);
            return copy;
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy