org.apache.cocoon.components.serializers.encoding.CompiledCharset Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.cocoon.components.serializers.encoding;
/**
*
*
* @version $Id: CompiledCharset.java 587752 2007-10-24 02:47:02Z vgritsenko $
*/
public abstract class CompiledCharset extends AbstractCharset {
/** The encoding table of this Charset
. */
protected byte encoding[];
/**
* Create a new instance of this CompiledCharset
.
*
* After construction, the compile()
method will have to
* be called for proper operation of this Charset
.
*
* @param name This Charset
name.
* @param aliases This Charset
alias names.
* @throws NullPointerException If one of the arguments is null.
*/
protected CompiledCharset(String name, String aliases[]) {
super(name, aliases);
this.encoding = new byte[8192];
for (int x = 0; x < this.encoding.length; x++) this.encoding[x] = 0;
}
/**
* Create a new instance of this CompiledCharset
.
*
* The encodings table passed to this constructor needs to be 8192
* bytes long, or (in other words), must contain exactly 65536 bits.
*
*
* As in the Java Programming Language a char
can assume
* values between 0 (zero) and 65535 (inclusive), each bit in the specified
* array refers to a specific char
value.
*
*
* When this specific bit is set to 1 (one or true) we assume that the
* charset can encode the given character, while when the bit is
* set to 0 (zero or false), the character cannot be represented using
* this Charset
.
*
*
* For example, the US-ASCII Charset
can represent
* only Java characters between 0 (zero) and 255 (inclusive), therefore
* the specified byte array will contain only 256 true bits.
*
*
* To check if a character can be encoded by this Charset
,
* given "c
" as the character to verify, one
* can write this simple formula:
*
*
* ((encoding[c >> 3] & (1 << (c & 0x07))) > 0)
*
*
* If the result of this operation is 0 (zero) the bit was set to zero,
* and therefore "c
" cannot be represented in
* this Charset
, while if the result is greater than 0 (zero)
* the character "c
" can actually be represented
* by this Charset
*
*
* @param name This Charset
name.
* @param aliases This Charset
alias names.
* @param encoding This Charset
encoding table as specified
* above.
* @throws NullPointerException If one of the arguments is null.
* @throws IllegalArgumentException If the length of the encoding table
* is not 8192 precisely.
*/
protected CompiledCharset(String name, String aliases[], byte encoding[])
throws NullPointerException, IllegalArgumentException {
super(name, aliases);
if (encoding == null) throw new NullPointerException("Invalid table");
if (encoding.length != 8192) {
throw new IllegalArgumentException("Invalid encoding table size: "
+ "current length is " + encoding.length + ", required 8192.");
}
this.encoding = encoding;
}
/**
* Check if the specified character is representable by this specifiec
* Charset
instance.
*
*/
public boolean allows(char c) {
/* This is tied to haw the compiler does stuff. */
return((this.encoding[c >> 3] & (1 << (c & 0x07))) > 0);
}
/**
* Compile the encoding table of this CompiledCharset
.
*
* This method will invoke the compile(...)
method for any
* possible value of a Java character (65536 times, from 0, zero, to
* 65535 inclusive), building the encoding table of the characters this
* Charset
can successfully represent.
*/
protected final void compile() {
for (int x = 0; x <= Character.MAX_VALUE; x ++) {
if (this.compile((char)x)) {
int pos = x >> 3;
encoding[pos] = (byte) (encoding[pos] | (1 << (x & 0x07)));
}
}
}
/**
* Return true or false wether this encoding can encode the specified
* character or not.
*
* This method is equivalent to the allows(...)
method, but
* it will be called upon construction of the encoding table.
*
*/
protected abstract boolean compile(char c);
}