All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.freeutils.charset.EscapedByteLookupCharset Maven / Gradle / Ivy

/*
 *  Copyright © 2005-2015 Amichai Rothman
 *
 *  This file is part of JCharset - the Java Charset package.
 *
 *  JCharset is free software: you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation, either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  JCharset is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with JCharset.  If not, see .
 *
 *  For additional info see http://www.freeutils.net/source/jcharset/
 */

package net.freeutils.charset;

import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CharsetEncoder;
import java.nio.charset.CoderResult;

/**
 * The EscapedByteLookupCharset class handles the encoding and
 * decoding of simple charsets where the byte-to-char conversion
 * is performed using a simple lookup table, with the addition of a special
 * escape byte, such that the single byte following it is converted using
 * an alternate lookup table.
 *
 * @author  Amichai Rothman
 * @since   2007-03-26
 */
public abstract class EscapedByteLookupCharset extends Charset {

    int[] BYTE_TO_CHAR;
    int[] BYTE_TO_CHAR_ESCAPED;
    int[][] CHAR_TO_BYTE;
    int[][] CHAR_TO_BYTE_ESCAPED;
    byte ESCAPE;

    /**
     * Initializes a new charset with the given canonical name and alias
     * set, and byte-to-char/char-to-byte lookup tables.
     *
     * @param canonicalName the canonical name of this charset
     * @param aliases an array of this charset's aliases, or null if it has no aliases
     * @param escape the special escape byte value
     * @param byteToChar a byte-to-char conversion table for this charset
     * @param byteToCharEscaped a byte-to-char conversion table for this charset
     *        for the escaped characters
     * @param charToByte a char-to-byte conversion table for this charset. It can
     *        be generated on-the-fly by calling
     *        {@link ByteLookupCharset#createInverseLookupTable
     *        createInverseLookupTable(byteToChar)}.
     * @param charToByteEscaped a char-to-byte conversion table for this charset
     *        for the escaped characters
     * @throws java.nio.charset.IllegalCharsetNameException
     *         if the canonical name or any of the aliases are illegal
     */
    protected EscapedByteLookupCharset(String canonicalName, String[] aliases,
        byte escape, int[] byteToChar, int[] byteToCharEscaped,
        int[][] charToByte, int[][] charToByteEscaped) {
        super(canonicalName, aliases);
        ESCAPE = escape;
        BYTE_TO_CHAR = byteToChar;
        CHAR_TO_BYTE = charToByte;
        BYTE_TO_CHAR_ESCAPED = byteToCharEscaped;
        CHAR_TO_BYTE_ESCAPED = charToByteEscaped;
    }

    /**
     * Tells whether or not this charset contains the given charset.
     *
     * 

A charset C is said to contain a charset D if, * and only if, every character representable in D is also * representable in C. If this relationship holds then it is * guaranteed that every string that can be encoded in D can also be * encoded in C without performing any replacements. * *

That C contains D does not imply that each character * representable in C by a particular byte sequence is represented * in D by the same byte sequence, although sometimes this is the * case. * *

Every charset contains itself. * *

This method computes an approximation of the containment relation: * If it returns true then the given charset is known to be * contained by this charset; if it returns false, however, then * it is not necessarily the case that the given charset is not contained * in this charset. * * @return true if, and only if, the given charset * is contained in this charset */ public boolean contains(Charset cs) { return this.getClass().isInstance(cs); } /** * Constructs a new decoder for this charset. * * @return a new decoder for this charset */ public CharsetDecoder newDecoder() { return new Decoder(this); } /** * Constructs a new encoder for this charset. * * @return a new encoder for this charset * * @throws UnsupportedOperationException * if this charset does not support encoding */ public CharsetEncoder newEncoder() { return new Encoder(this); } /** * The Encoder inner class handles the encoding of the * charset using the lookup tables. */ protected class Encoder extends CharsetEncoder { /** * Constructs an Encoder. * * @param cs the charset to which this encoder belongs */ protected Encoder(Charset cs) { super(cs, 1f, 2f); } /** * Constructs an Encoder. * * @param cs the charset to which this encoder belongs * @param averageBytesPerChar a positive float value indicating the expected * number of bytes that will be produced for each input character * * @param maxBytesPerChar a positive float value indicating the maximum * number of bytes that will be produced for each input character */ protected Encoder(Charset cs, float averageBytesPerChar, float maxBytesPerChar) { super(cs, averageBytesPerChar, maxBytesPerChar); } /** * Encodes one or more characters into one or more bytes. * * @param in the input character buffer * @param out the output byte buffer * @return a coder-result object describing the reason for termination */ protected CoderResult encodeLoop(CharBuffer in, ByteBuffer out) { int b, c; int[] table; int remaining = in.remaining(); while (remaining-- > 0) { // make sure we have room for output if (out.remaining() < 1) return CoderResult.OVERFLOW; // get next char c = in.get(); // look for corresponding regular byte table = CHAR_TO_BYTE[c >> 8]; b = table == null ? -1 : table[c & 0xFF]; if (b == -1) { // look for corresponding escaped byte table = CHAR_TO_BYTE_ESCAPED[c >> 8]; b = table == null ? -1 : table[c & 0xFF]; if (b == -1) { // there's no regular nor escaped byte - it's unmappable in.position(in.position() - 1); // unread the char return CoderResult.unmappableForLength(1); } // it's an escapable char, make sure we have room for output if (out.remaining() < 2) { in.position(in.position() - 1); // unread the char return CoderResult.OVERFLOW; } // write the escape byte (output byte will follow) out.put(ESCAPE); } // write the output byte out.put((byte)(b & 0xFF)); } // no more input available return CoderResult.UNDERFLOW; } } /** * The Decoder inner class handles the decoding of the * charset using the inverse lookup tables. */ protected class Decoder extends CharsetDecoder { /** * Constructs a Decoder. * * @param cs the charset to which this decoder belongs */ protected Decoder(Charset cs) { super(cs, 1f, 1f); } /** * Constructs a Decoder. * * @param cs the charset to which this decoder belongs * @param averageCharsPerByte a positive float value indicating the expected * number of characters that will be produced for each input byte * @param maxCharsPerByte a positive float value indicating the maximum * number of characters that will be produced for each input byte */ protected Decoder(Charset cs, float averageCharsPerByte, float maxCharsPerByte) { super(cs, averageCharsPerByte, maxCharsPerByte); } /** * Decodes one or more bytes into one or more characters. * * @param in the input byte buffer * @param out the output character buffer * @return a coder-result object describing the reason for termination */ protected CoderResult decodeLoop(ByteBuffer in, CharBuffer out) { int b, c; int remaining = in.remaining(); while (remaining-- > 0) { // make sure we have room for output if (out.remaining() < 1) return CoderResult.OVERFLOW; // get next byte b = in.get(); if (b == ESCAPE) { // it's the escape byte - make sure we have the next byte if (remaining-- == 0) { in.position(in.position() - 1); // unread the byte return CoderResult.UNDERFLOW; } // get next byte b = in.get(); // look for corresponding escaped char c = BYTE_TO_CHAR_ESCAPED[b & 0xFF]; } else { // look for corresponding regular char c = BYTE_TO_CHAR[b & 0xFF]; } if (c == -1) { // there's no regular nor escaped char - it's malformed in.position(in.position() - 1); // unread the byte return CoderResult.malformedForLength(1); } // write the output char out.put((char)c); } // no more input available return CoderResult.UNDERFLOW; } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy