All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.cocoon.components.serializers.encoding.CompiledCharset Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 * 
 *      http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.cocoon.components.serializers.encoding;

/**
 * 
 * 
 * @version $Id: CompiledCharset.java 587752 2007-10-24 02:47:02Z vgritsenko $
 */
public abstract class CompiledCharset extends AbstractCharset {

    /** The encoding table of this Charset. */
    protected byte encoding[];

    /**
     * Create a new instance of this CompiledCharset.
     * 

* After construction, the compile() method will have to * be called for proper operation of this Charset. * * @param name This Charset name. * @param aliases This Charset alias names. * @throws NullPointerException If one of the arguments is null. */ protected CompiledCharset(String name, String aliases[]) { super(name, aliases); this.encoding = new byte[8192]; for (int x = 0; x < this.encoding.length; x++) this.encoding[x] = 0; } /** * Create a new instance of this CompiledCharset. *

* The encodings table passed to this constructor needs to be 8192 * bytes long, or (in other words), must contain exactly 65536 bits. *

*

* As in the Java Programming Language a char can assume * values between 0 (zero) and 65535 (inclusive), each bit in the specified * array refers to a specific char value. *

*

* When this specific bit is set to 1 (one or true) we assume that the * charset can encode the given character, while when the bit is * set to 0 (zero or false), the character cannot be represented using * this Charset. *

*

* For example, the US-ASCII Charset can represent * only Java characters between 0 (zero) and 255 (inclusive), therefore * the specified byte array will contain only 256 true bits. *

*

* To check if a character can be encoded by this Charset, * given "c" as the character to verify, one * can write this simple formula: *

*

* ((encoding[c >> 3] & (1 << (c & 0x07))) > 0) *

*

* If the result of this operation is 0 (zero) the bit was set to zero, * and therefore "c" cannot be represented in * this Charset, while if the result is greater than 0 (zero) * the character "c" can actually be represented * by this Charset *

* * @param name This Charset name. * @param aliases This Charset alias names. * @param encoding This Charset encoding table as specified * above. * @throws NullPointerException If one of the arguments is null. * @throws IllegalArgumentException If the length of the encoding table * is not 8192 precisely. */ protected CompiledCharset(String name, String aliases[], byte encoding[]) throws NullPointerException, IllegalArgumentException { super(name, aliases); if (encoding == null) throw new NullPointerException("Invalid table"); if (encoding.length != 8192) { throw new IllegalArgumentException("Invalid encoding table size: " + "current length is " + encoding.length + ", required 8192."); } this.encoding = encoding; } /** * Check if the specified character is representable by this specifiec * Charset instance. *

*/ public boolean allows(char c) { /* This is tied to haw the compiler does stuff. */ return((this.encoding[c >> 3] & (1 << (c & 0x07))) > 0); } /** * Compile the encoding table of this CompiledCharset. *

* This method will invoke the compile(...) method for any * possible value of a Java character (65536 times, from 0, zero, to * 65535 inclusive), building the encoding table of the characters this * Charset can successfully represent. */ protected final void compile() { for (int x = 0; x <= Character.MAX_VALUE; x ++) { if (this.compile((char)x)) { int pos = x >> 3; encoding[pos] = (byte) (encoding[pos] | (1 << (x & 0x07))); } } } /** * Return true or false wether this encoding can encode the specified * character or not. *

* This method is equivalent to the allows(...) method, but * it will be called upon construction of the encoding table. *

*/ protected abstract boolean compile(char c); }