All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.antiaction.common.json.JSONEncoding Maven / Gradle / Ivy

/*
 * JSON library.
 * Copyright 2012-2013 Antiaction (http://antiaction.com/)
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.antiaction.common.json;

import java.io.IOException;
import java.io.PushbackInputStream;
import java.nio.charset.Charset;

/**
 * Class used to determine the UTF encoding used to encode a JSON text based
 * on the first 4 bytes of encoded text.
 * Methods are also available to get the appropriate decoder/encoder based on
 * the encoding identifier.
 * For performance issues this class uses ThreadLocal instances to
 * add concurrency support.
 * This class uses ThreadLocal and is thread safe to the point
 * that the same instance is returned to the same thread.
 *
 * BOM
 * 00 00 FE FF  UTF-32, big-endian
 * FF FE 00 00  UTF-32, little-endian
 * FE FF        UTF-16, big-endian
 * FF FE        UTF-16, little-endian
 * EF BB BF     UTF-8
 * Data
 * xx xx xx xx  UTF-8
 * 00 xx 00 xx  UTF-16BE
 * xx 00 xx 00  UTF-16LE
 * 00 00 00 xx  UTF-32BE
 * xx 00 00 00  UTF-32LE
 *
 * @author Nicholas
 * Created on 08/10/2012
 */
public class JSONEncoding {

	/** Error encoding identifier. */
	public static final int E_UNKNOWN = -1;

	/** UTF-8 encoding identifier. (xx xx xx xx) */
	public static final int E_UTF8 = 0;

	/** UTF-16BE encoding identifier. (00 xx 00 xx) */
	public static final int E_UTF16BE = 1;

	/** UTF-16LE encoding identifier. (xx 00 xx 00) */
	public static final int E_UTF16LE = 2;

	/** UTF-32BE encoding identifier. (00 00 00 xx) */
	public static final int E_UTF32BE = 3;

	/** UTF-32LE encoding identifier. (xx 00 00 00) */
	public static final int E_UTF32LE = 4;

	/**
	 * Determine the UTF encoding used based of the first 4 bytes of the JSON text.
	 * @param in JSON text InputStream
	 * @return identified encoding or error
	 * @throws IOException if an i/o error occurs while determining the encoding used
	 */
	public static int encoding(PushbackInputStream in) throws IOException {
		int encoding = E_UNKNOWN;
		byte[] bytes = new byte[ 4 ];
		int offset = 0;
		int len = 4;
		int read = 0;
		while ( read != -1 && len > 0 ) {
			offset += read;
			len -= read;
			read = in.read( bytes, offset, len );
		}
		len = offset;
		offset = 0;
		if ( len >= 2 ) {
			int b0;
			int b1;
			int b2 = 0;
			int b3 = 0;
			int mask = 0;
			switch ( len ) {
			default:
				b3 = bytes[ 3 ] & 255;
				if ( b3 != 0 ) {
					mask |=  8;
				}
			case 3:
				b2 = bytes[ 2 ] & 255;
				if ( b2 != 0 ) {
					mask |= 4;
				}
			case 2:
				b1 = bytes[ 1 ] & 255;
				if ( b1 != 0 ) {
					mask |=  2;
				}
				b0 = bytes[ 0 ] & 255;
				if ( b0 != 0 ) {
					mask |= 1;
				}
				break;
			}
			switch ( len ) {
			case 2:
				if ( mask == 3 ) {
					if ( b0 == 0xFE && b1 == 0xFF ) {
						encoding = E_UTF16BE;
						offset += 2;
						len -= 2;
					}
					else if ( b0 == 0xFF && b1 == 0xFE ) {
						encoding = E_UTF16LE;
						offset += 2;
						len -= 2;
					}
					else {
						encoding = E_UTF8;
					}
				}
				break;
			case 3:
				if ( mask == 7 ) {
					if ( b0 == 0xEF && b1 == 0xBB && b2 == 0xBF ) {
						encoding = E_UTF8;
						offset += 3;
						len -= 3;
					}
					else {
						encoding = E_UTF8;
					}
				}
				break;
			default:
				switch ( mask ) {
				case 0x0F:
					// 1111
					if ( b0 == 0xEF && b1 == 0xBB && b2 == 0xBF ) {
						encoding = E_UTF8;
						offset += 3;
						len -= 3;
					}
					else {
						encoding = E_UTF8;
					}
					break;
				case 0x0E:
					// 0111
					break;
				case 0x0D:
					// 1011
					break;
				case 0x0C:
					// 0011
					if ( b2 == 0xFE && b3 == 0xFF ) {
						encoding = E_UTF32BE;
						offset += 4;
						len -= 4;
					}
					break;
				case 0x0B:
					// 1101
					if ( b0 == 0xFE && b1 == 0xFF ) {
						encoding = E_UTF16BE;
						offset += 2;
						len -= 2;
					}
					break;
				case 0x0A:
					// 0101
					encoding = E_UTF16BE;
					break;
				case 0x09:
					// 1001
					break;
				case 0x08:
					// 0001
					encoding = E_UTF32BE;
					break;
				case 0x07:
					// 1110
					if ( b0 == 0xFF && b1 == 0xFE ) {
						encoding = E_UTF16LE;
						offset += 2;
						len -= 2;
					}
					break;
				case 0x06:
					// 0110
					break;
				case 0x05:
					// 1010
					encoding = E_UTF16LE;
					break;
				case 0x04:
					// 0010
					break;
				case 0x03:
					// 1100
					if ( b0 == 0xFF && b1 == 0xFE ) {
						encoding = E_UTF32LE;
						offset += 4;
						len -= 4;
					}
					break;
				case 0x02:
					// 0100
					break;
				case 0x01:
					// 1000
					encoding = E_UTF32LE;
					break;
				case 0x00:
					// 0000
					break;
				}
				break;
			}
		}
		if ( len > 0 ) {
			in.unread( bytes, offset, len );
		}
		return encoding;
	}

	/**
	 * Manage JSONEncoding instances for each calling .
	 */
	private static final ThreadLocal JSONEncodingTL =
		new ThreadLocal() {
		@Override
		public JSONEncoding initialValue() {
			return new JSONEncoding();
		}
	};

	/**
	 * Returns a JSONEncoding instance for the current thread.
	 * @return a JSONEncoding instance for the current thread.
	 */
	public static JSONEncoding getJSONEncoding() {
		return JSONEncodingTL.get();
	}

	/** Cached UTF-8 decoder. */
	private JSONDecoder utf8_decoder;

	/** Cached UTF-16BE decoder. */
	private JSONDecoder utf16be_decoder;

	/** Cached UTF-16LE decoder. */
	private JSONDecoder utf16le_decoder;

	/**
	 * Internal constructor used by the ThreadLocal class.
	 */
	private JSONEncoding() {
	}

	/**
	 * Returns a JSON Decoder based on the encoding identifier.
	 * @param encoding encoding identifier
	 * @return a JSON Decoder based on the encoding identifier.
	 */
	public JSONDecoder getJSONDecoder(int encoding) {
		JSONDecoder jsondecoder = null;
		Charset charset;
		switch ( encoding ) {
		case E_UTF8:
			if (utf8_decoder == null) {
				charset = Charset.forName( "UTF-8" );
				utf8_decoder = new JSONDecoderCharset( charset );
			}
			jsondecoder = utf8_decoder;
			break;
		case E_UTF16BE:
			if ( utf16be_decoder == null ) {
				charset = Charset.forName( "UTF-16BE" );
				utf16be_decoder = new JSONDecoderCharset( charset );
			}
			jsondecoder = utf16be_decoder;
			break;
		case E_UTF16LE:
			if ( utf16le_decoder == null ) {
				charset = Charset.forName( "UTF-16LE" );
				utf16le_decoder = new JSONDecoderCharset( charset );
			}
			jsondecoder = utf16le_decoder;
			break;
		case E_UTF32BE:
		case E_UTF32LE:
		case E_UNKNOWN:
		default:
			throw new IllegalArgumentException( "Unsupported encoding!" );
		}
		return jsondecoder;
	}

	/** Cached UTF-8 encoder. */
	private JSONEncoder utf8_encoder;

	/** Cached UTF-16BE encoder. */
	private JSONEncoder utf16be_encoder;

	/** Cached UTF-16LE encoder. */
	private JSONEncoder utf16le_encoder;

	/**
	 * Returns a JSON Encoder based on the encoding identifier.
	 * @param encoding encoding identifier
	 * @return a JSON Encoder based on the encoding identifier
	 */
	public JSONEncoder getJSONEncoder(int encoding) {
		JSONEncoder jsonencoder = null;
		Charset charset;
		switch ( encoding ) {
		case E_UTF8:
			if (utf8_encoder == null) {
				charset = Charset.forName( "UTF-8" );
				utf8_encoder = new JSONEncoderCharset( charset );
			}
			jsonencoder = utf8_encoder;
			break;
		case E_UTF16BE:
			if ( utf16be_encoder == null ) {
				charset = Charset.forName( "UTF-16BE" );
				utf16be_encoder = new JSONEncoderCharset( charset );
			}
			jsonencoder = utf16be_encoder;
			break;
		case E_UTF16LE:
			if ( utf16le_encoder == null ) {
				charset = Charset.forName( "UTF-16LE" );
				utf16le_encoder = new JSONEncoderCharset( charset );
			}
			jsonencoder = utf16le_encoder;
			break;
		case E_UTF32BE:
		case E_UTF32LE:
		case E_UNKNOWN:
		default:
			throw new IllegalArgumentException( "Unsupported encoding!" );
		}
		return jsonencoder;
	}

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy