org.enhydra.xml.io.Encodings Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of xmlc-all-runtime
Enhydra XMLC compiler.
The newest version!
/*
 * Enhydra Java Application Server Project
 * 
 * The contents of this file are subject to the Enhydra Public License
 * Version 1.1 (the "License"); you may not use this file except in
 * compliance with the License. You may obtain a copy of the License on
 * the Enhydra web site ( http://www.enhydra.org/ ).
 * 
 * Software distributed under the License is distributed on an "AS IS"
 * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See 
 * the License for the specific terms governing rights and limitations
 * under the License.
 * 
 * The Initial Developer of the Enhydra Application Server is Lutris
 * Technologies, Inc. The Enhydra Application Server and portions created
 * by Lutris Technologies, Inc. are Copyright Lutris Technologies, Inc.
 * All Rights Reserved.
 * 
 * Contributor(s):
 * 
 * $Id: Encodings.java,v 1.2 2005/01/26 08:29:24 jkjome Exp $
 */
package org.enhydra.xml.io;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.lang.reflect.Constructor;
import java.util.HashMap;
import java.util.Iterator;
import java.util.StringTokenizer;

// FIXME: The information here is limited, need to find a canonical
// source of names and encoding sizes, etc.  Create an XML file describing
// it.

/**
 * Class that manages information about encodings.
 */
public final class Encodings {
    /**
     * Some standard character set constants. Using the standard names.
     */
    public static final String ISO_8859_1 = "ISO_8859-1";
    public static final String US_ASCII = "US-ASCII";
    public static final String SHIFT_JIS = "Shift_JIS";

    /**
     * Resource name of file containing character set information.
     */
    private final String CHAR_SET_TABLE
        = "org/enhydra/xml/io/character-sets.tbl";

    /**
     * String use for null encoding in file.
     */
    private final String NULL_STR = "null";

    /**
     * Singleton instance.
     */
    private static Encodings fSingleton;

    /**
     * Singleton empty list of aliases.
     */
    private static final String[] fEmptyAliases = new String[0];

    /**
     * Table of character sets.  The table is keyed by upper-cased encoding
     * names and aliases, containing references to CharacterSet objects.
     */
    private final HashMap fCharSets = new HashMap();
    
    private static final Class CHARSET_CONSTRUCTOR_TYPES[] = {
        String.class, Integer.TYPE, String.class, String[].class
    };

    /**
     * Generate an error about parsing an entry
     */
    private void parseError(String msg,
                            String line) {
        throw new XMLIOError(msg + "; parsing line in " + CHAR_SET_TABLE
                             + "\"" + line + "\"");
    }

    /**
     * Parse the next entry from the file
     */
    private CharacterSet parseCharSetEntry(String line) {
        StringTokenizer tokens = new StringTokenizer(line);
        int numTokens = tokens.countTokens();
        if (numTokens < 4) {
            parseError("must have at least 4 entries", line);
        }

        String className = tokens.nextToken();
        String name = tokens.nextToken();
        Integer charSetSize = null;
        try {
            charSetSize = new Integer(tokens.nextToken());
        } catch (NumberFormatException nfe) {
            parseError("invalid character set size", line);
        }
        String mimePreferred = tokens.nextToken();;
        if (mimePreferred.equals(NULL_STR)) {
            mimePreferred = null;
        }
        String[] aliases = fEmptyAliases;

        if (numTokens > 4) {
            aliases = new String[numTokens - 4];
            int idx = 0;
            while (tokens.hasMoreTokens()) {
                aliases[idx++] = tokens.nextToken();
            }
        }
        
        // Instantiate the appropriate charset class.
        try {
            Class clazz = Class.forName(className);
            Constructor ctr = clazz.getConstructor(CHARSET_CONSTRUCTOR_TYPES);
            Object params[] = { name, charSetSize, mimePreferred, aliases };
            return (CharacterSet)ctr.newInstance(params);
        } catch (Exception e) {
            parseError("Exception loading character set class '" + className + "': " +
            e.toString(), line);
        }
        // Should never be reached, but must be here to make the compiler happy
        throw new IllegalStateException("Ooops. This should never happen. Please debug");        
    }

    /**
     * Add an entry to the mapping table.
     */
    private void addEntry(CharacterSet charSet) {
        fCharSets.put(charSet.getName().toUpperCase().intern(), charSet);
        
        String[] aliases = charSet.getAliases();
        int len = (aliases == null) ? 0 : aliases.length;
        for (int idx = 0; idx < len; idx++) {
            fCharSets.put(aliases[idx].toUpperCase().intern(), charSet);
        }
    }

    /**
     * Parse the character set file.
     */
    private void parseCharacterSetTable(BufferedReader in) throws IOException {
        String line;
        while ((line = in.readLine()) != null) {
            addEntry(parseCharSetEntry(line));
        }
    }

    /**
     * Parse the character set file.
     */
    private void parseCharacterSetTable() {
        ClassLoader loader = Encodings.class.getClassLoader();
        if (loader == null) {
            // Works around a JDeveloper JVM bug.
            loader = ClassLoader.getSystemClassLoader();
        }
        InputStream inStream = loader.getResourceAsStream(CHAR_SET_TABLE);
        if (inStream == null) {
            throw new XMLIOError("can't find \"" + CHAR_SET_TABLE + "\" on class path");
        }
        try {
            try {
                BufferedReader in = new BufferedReader(new InputStreamReader(inStream));
                parseCharacterSetTable(in);
            } finally {
                inStream.close();
            }
        } catch (IOException except) {
            throw new XMLIOError("error reading " + CHAR_SET_TABLE);
        }
    }

    /**
     * Disallow external instantiation.
     */
    private Encodings() {
        parseCharacterSetTable();
    }

    /**
     * Get the entry for an encoding name or null.
     */
    public CharacterSet getCharacterSet(String encoding) {
        return (CharacterSet)fCharSets.get(encoding.toUpperCase());
    }

    /**
     * Determine if an encoding is a valid encoding.
     */
    public boolean isValid(String encoding) {
        return getCharacterSet(encoding) != null;
    }
    
    /**
     * Get the encoding name, given any encoding or alias, or null
     * if unknown encoding.
     */
    public String getName(String encoding) {
        CharacterSet charSet = getCharacterSet(encoding);
        if (charSet == null) {
            return null;
        } else {
            return charSet.getName();
        }
    }

    /**
     * Get the preferred MIME encoding, or null if unknown encoding.
     */
    public String getMIMEPreferred(String encoding) {
        CharacterSet charSet = getCharacterSet(encoding);
        if (charSet == null) {
            return null;
        } else {
            return charSet.getMIMEPreferred();
        }
    }

    /**
     * Get the encoding aliases, given any encoding or alias,
     * or null if unknown encoding.
     */
    public String[] getAliases(String encoding) {
        CharacterSet charSet = getCharacterSet(encoding);
        if (charSet == null) {
            return null;
        } else {
            return charSet.getAliases();
        }
    }

    /**
     * Get the maximum value of an unicode character in an encoding.
     * This checks for 7 and 8 bit encodings; everything else is considered
     * to require 16 bits.
     */
    public int getMaxCharacterValue(String encoding) {
        CharacterSet charSet = getCharacterSet(encoding);
        if (charSet == null) {
            return 0xFFFF;  // assume the best
        } else {
            return charSet.getMaxCharValue();
        }
    }

    /**
     * Determine if two encoding names represent the same encoding.
     * Handles nulls, with two nulls being considered the same encoding.
     */
    public boolean sameEncodings(String encoding1,
                                 String encoding2) {
        if ((encoding1 == null) && (encoding2 == null)) {
            return true;
        } else if ((encoding1 == null) || (encoding2 == null)) {
            return false;
        } else {
            return (getCharacterSet(encoding1) == getCharacterSet(encoding2));
        }
    }

    /**
     * Return string representation for debugging.
     */
    public String toString() {
        StringBuffer buf = new StringBuffer(4096); // larger than default
        Iterator values = fCharSets.values().iterator();
        while (values.hasNext()) {
            buf.append(values.next().toString());
            buf.append('\n');
        }
        return buf.toString();
    }

    /**
     * Get the singleton instance of this class.
     */
    public static Encodings getEncodings() {
        if (fSingleton == null) {
            fSingleton = new Encodings();
        }
        return fSingleton;
    }
}