All Downloads are FREE. Search and download functionalities are using the official Maven repository.
Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
net.freeutils.charset.PackedGSMCharset Maven / Gradle / Ivy
/*
* Copyright © 2005-2015 Amichai Rothman
*
* This file is part of JCharset - the Java Charset package.
*
* JCharset is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 2 of the License, or
* (at your option) any later version.
*
* JCharset is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with JCharset. If not, see .
*
* For additional info see http://www.freeutils.net/source/jcharset/
*/
package net.freeutils.charset;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CharsetEncoder;
import java.nio.charset.CoderResult;
/**
* The PackedGSMCharset class handles the encoding and decoding of the
* GSM default encoding charset, with packing as per GSM 03.38 spec.
*
* The encoding and decoding are based on the mapping at
* http://www.unicode.org/Public/MAPPINGS/ETSI/GSM0338.TXT
*
* @author Amichai Rothman
* @since 2007-03-20
*/
public class PackedGSMCharset extends GSMCharset {
static final int BUFFER_SIZE = 256;
/**
* Initializes a new charset with the given canonical name and alias
* set, and byte-to-char/char-to-byte lookup tables.
*
* @param canonicalName the canonical name of this charset
* @param aliases an array of this charset's aliases, or null if it has no aliases
* @param byteToChar a byte-to-char conversion table for this charset
* @param byteToCharEscaped a byte-to-char conversion table for this charset
* for the escaped characters
* @param charToByte a char-to-byte conversion table for this charset. It can
* be generated on-the-fly by calling createInverseLookupTable(byteToChar).
* @param charToByteEscaped a char-to-byte conversion table for this charset
* for the escaped characters
* @throws java.nio.charset.IllegalCharsetNameException
* if the canonical name or any of the aliases are illegal
*/
protected PackedGSMCharset(String canonicalName, String[] aliases,
int[] byteToChar, int[] byteToCharEscaped,
int[][] charToByte, int[][] charToByteEscaped) {
super(canonicalName, aliases,
byteToChar, byteToCharEscaped, charToByte, charToByteEscaped);
}
/**
* Constructs a new decoder for this charset.
*
* @return a new decoder for this charset
*/
public CharsetDecoder newDecoder() {
return new Decoder(this);
}
/**
* Constructs a new encoder for this charset.
*
* @return a new encoder for this charset
*
* @throws UnsupportedOperationException
* if this charset does not support encoding
*/
public CharsetEncoder newEncoder() {
return new Encoder(this);
}
/**
* The Encoder inner class handles the encoding of the
* Packed GSM default encoding charset.
*/
protected class Encoder extends GSMCharset.Encoder {
int bitpos;
byte current;
ByteBuffer buf;
/**
* Constructs an Encoder.
*
* @param cs the charset to which this encoder belongs
*/
protected Encoder(Charset cs) {
super(cs, 7 / 8f, 2f);
buf = ByteBuffer.allocate(BUFFER_SIZE);
implReset();
}
/**
* Resets this encoder, clearing any charset-specific internal state.
*/
protected void implReset() {
bitpos = 0;
current = 0;
buf.limit(0);
}
/**
* Flushes this encoder.
*
* @param out the output byte buffer
*
* @return a coder-result object, either {@link CoderResult#UNDERFLOW} or
* {@link CoderResult#OVERFLOW}
*/
protected CoderResult implFlush(ByteBuffer out) {
// flush buffer
CoderResult result = pack(buf, out);
// flush last (current) partial byte if it exists
if (bitpos != 0) {
if (!out.hasRemaining())
return CoderResult.OVERFLOW;
out.put(current); // write final leftover byte
}
return result;
}
/**
* Encodes one or more characters into one or more bytes.
*
* @param in the input character buffer
* @param out the output byte buffer
* @return a coder-result object describing the reason for termination
*/
protected CoderResult encodeLoop(CharBuffer in, ByteBuffer out) {
CoderResult result;
while (true) {
// output buffered data
if (buf.hasRemaining()) {
result = pack(buf, out);
if (result == CoderResult.OVERFLOW)
return result;
}
// process new data into buffer
buf.clear();
result = super.encodeLoop(in, buf);
buf.flip();
// stop if out of input or error
if (!buf.hasRemaining() || result.isError())
return result;
}
}
/**
* Packs the given data into full bytes.
*
* @param in the input byte buffer
* @param out the output byte buffer
* @return a coder-result object, either {@link CoderResult#UNDERFLOW} or
* {@link CoderResult#OVERFLOW}
*/
protected CoderResult pack(ByteBuffer in, ByteBuffer out) {
int remaining = in.remaining();
while (remaining-- > 0) {
if (!out.hasRemaining())
return CoderResult.OVERFLOW;
byte b = (byte)(in.get() & 0x7F); // remove top bit
// assign first half of partial bits
current |= (byte) ((b & 0xFF) << bitpos);
// assign second half of partial bits (if exist)
if (bitpos >= 2) {
out.put(current);
current = 0;
current |= (b >> (8 - bitpos));
}
bitpos = (bitpos + 7) % 8;
if (bitpos == 0) {
out.put(current);
current = 0;
}
}
return CoderResult.UNDERFLOW;
}
}
/**
* The Decoder inner class handles the decoding of the
* Packed GSM default encoding charset.
*/
protected class Decoder extends GSMCharset.Decoder {
int bitpos;
byte current;
byte prev;
int unpackedCount;
ByteBuffer buf;
/**
* Constructs a Decoder.
*
* @param cs the charset to which this decoder belongs
*/
protected Decoder(Charset cs) {
super(cs, 8 / 7f, 2f);
buf = ByteBuffer.allocate(BUFFER_SIZE);
implReset();
}
/**
* Resets this decoder, clearing any charset-specific internal state.
*/
protected void implReset() {
bitpos = 0;
current = 0;
prev = 0;
unpackedCount = 0;
buf.limit(0);
}
/**
* Flushes this decoder.
*
* @param out the output character buffer
*
* @return a coder-result object, either {@link CoderResult#UNDERFLOW} or
* {@link CoderResult#OVERFLOW}
*/
protected CoderResult implFlush(CharBuffer out) {
// this fixes an ambiguity bug in the specs
// where the last of 8 packed bytes is 0
// and it's impossible to distinguish whether it is a
// trailing '@' character (which is mapped to 0)
// or extra zero-bit padding for 7 actual data bytes.
//
// we opt for the latter, since it's far more likely,
// at the cost of losing a trailing '@' character
// in strings whose unpacked size modulo 8 is 0,
// and whose last character is '@'.
//
// an application that wishes to handle this rare case
// properly must disambiguate this case externally, such
// as by obtaining the original string length, and
// appending the trailing '@' if the length
// shows that there is one character missing.
if (unpackedCount % 8 == 0) {
int pos = out.position();
if (pos > 0 && out.get(pos - 1) == '@')
out.position(pos - 1);
}
return CoderResult.UNDERFLOW;
}
/**
* Decodes one or more bytes into one or more characters.
*
* @param in the input byte buffer
* @param out the output character buffer
* @return a coder-result object describing the reason for termination
*/
protected CoderResult decodeLoop(ByteBuffer in, CharBuffer out) {
while (true) {
// unpack input data into buffer
unpackedCount -= buf.remaining(); // it will be counted again after unpack
buf.compact(); // move data to beginning and prepare to write more
CoderResult unpackResult = unpack(in, buf);
buf.flip(); // prepare to read
if (!buf.hasRemaining())
return unpackResult; // underflow
unpackedCount += buf.remaining();
// decode buffered unpacked data to output
CoderResult decodeResult = super.decodeLoop(buf, out);
// handle out of output space and buffer still has data in it
if (buf.hasRemaining() || decodeResult.isError()) {
if (decodeResult.isUnderflow()) { // last byte is escape byte
// if there's more input or at least another unpacked byte
// (the 8th doesn't require reading from input), just continue
if (in.hasRemaining() || unpackResult.isOverflow())
continue;
// otherwise we really need more input, so undo the last byte
// (escape sequence which was cut in middle) so caller can
// properly handle malformed input if there is no more input
in.position(in.position() - 1); // unread the byte
bitpos = (bitpos + 9) % 8; // undo its unpacking too
current = prev;
buf.limit(buf.position());
unpackedCount--;
}
return decodeResult;
}
}
}
/**
* Unpacks the given data into original bytes.
*
* @param in the input byte buffer
* @param out the output byte buffer
* @return a coder-result object, either {@link CoderResult#UNDERFLOW} or
* {@link CoderResult#OVERFLOW}
*/
protected CoderResult unpack(ByteBuffer in, ByteBuffer out) {
byte b;
int remaining = out.remaining();
while (remaining-- > 0) {
if (!in.hasRemaining() && bitpos != 1)
return CoderResult.UNDERFLOW;
if (bitpos == 0) {
prev = current;
current = in.get();
}
// remove top bit and assign first half of partial bits
b = (byte)(((current & 0xFF) >> bitpos) & 0x7F);
// remove top bit and assign second half of partial bits (if exist)
if (bitpos >= 2) {
prev = current;
current = in.get();
b |= (byte)((current << (8 - bitpos)) & 0x7F);
}
bitpos = (bitpos + 7) % 8;
out.put(b);
}
return CoderResult.OVERFLOW;
}
}
/**
* Unpacks the given data into original bytes.
*
* This is an external utility method and is not used
* internally by the Charset implementation.
*
* @param in the input bytes
* @return the unpacked output bytes
*/
public static byte[] unpack(byte[] in) {
byte[] out = new byte[(in.length * 8) / 7];
int len = out.length;
int current = 0;
int bitpos = 0;
for (int i = 0; i < len; i++) {
// remove top bit and assign first half of partial bits
out[i] = (byte)(((in[current] & 0xFF) >> bitpos) & 0x7F);
// remove top bit and assign second half of partial bits (if exist)
if (bitpos >= 2)
out[i] |= (byte)((in[++current] << (8 - bitpos)) & 0x7F);
bitpos = (bitpos + 7) % 8;
if (bitpos == 0)
current++;
}
// this fixes an ambiguity bug in the specs
// where the last of 8 packed bytes is 0
// and it's impossible to distinguish whether it is a
// trailing '@' character (which is mapped to 0)
// or extra zero-bit padding for 7 actual data bytes.
//
// we opt for the latter, since it's far more likely,
// at the cost of losing a trailing '@' character
// in strings whose unpacked size modulo 8 is 0,
// and whose last character is '@'.
//
// an application that wishes to handle this rare case
// properly must disambiguate this case externally, such
// as by obtaining the original string length, and
// appending the trailing '@' if the length
// shows that there is one character missing.
if (len % 8 == 0 && len > 0 && out[len - 1] == 0) {
byte[] fixed = new byte[len - 1];
System.arraycopy(out, 0, fixed, 0, len - 1);
out = fixed;
}
return out;
}
/**
* Packs the given data into full bytes.
*
* This is an external utility method and is not used
* internally by the Charset implementation.
*
* @param in the input bytes
* @return the packed output bytes
*/
public static byte[] pack(byte[] in) {
byte[] out = new byte[(int)Math.ceil((in.length * 7) / 8f)];
int len = in.length;
int current = 0;
int bitpos = 0;
for (int i = 0; i < len; i++) {
byte b = (byte)(in[i] & 0x7F); // remove top bit
// assign first half of partial bits
out[current] |= (byte) ((b & 0xFF) << bitpos);
// assign second half of partial bits (if exist)
if (bitpos >= 2)
out[++current] |= (b >> (8 - bitpos));
bitpos = (bitpos + 7) % 8;
if (bitpos == 0)
current++;
}
return out;
}
}