All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.enhydra.xml.io.MkEncodingsTable Maven / Gradle / Ivy

The newest version!
/*
 * Enhydra Java Application Server Project
 * 
 * The contents of this file are subject to the Enhydra Public License
 * Version 1.1 (the "License"); you may not use this file except in
 * compliance with the License. You may obtain a copy of the License on
 * the Enhydra web site ( http://www.enhydra.org/ ).
 * 
 * Software distributed under the License is distributed on an "AS IS"
 * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See 
 * the License for the specific terms governing rights and limitations
 * under the License.
 * 
 * The Initial Developer of the Enhydra Application Server is Lutris
 * Technologies, Inc. The Enhydra Application Server and portions created
 * by Lutris Technologies, Inc. are Copyright Lutris Technologies, Inc.
 * All Rights Reserved.
 * 
 * Contributor(s):
 * 
 * $Id: MkEncodingsTable.java,v 1.2 2005/01/26 08:29:24 jkjome Exp $
 */
package org.enhydra.xml.io;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.HashSet;

//FIXME: Next to convert to file to XML.

/**
 * Generate a file contain character encodings by parsing
 * the IANA Charset Registry, obtained from:
 * 
* * ftp://ftp.isi.edu/in-notes/iana/assignments/character-sets *
* This is a development-time tool, with special hacks to make up for * various java encoding names not in the table. * The resulting file has the format: *
 * name bits mime-name alias1 alias2 ...
 * 
* Where bits is 7, 8 or 16 and mime-name can be "null" if there is none * defined. This file will be converted to XML in a future release. */ public final class MkEncodingsTable { /** * Resource name of IANA Charset Registry file. */ private final String CHAR_SET_REGISTRY = "character-sets"; /** * Character set table that is created */ private final String CHAR_SET_TABLE = "character-sets.tbl"; /** * Labels in registry file. */ private final String REG_NAME_FIELD = "Name:"; private final String REG_ALIAS_FIELD = "Alias:"; //FIXME: these encodings lists are not complete.. /** * 7 bit encoding names. */ private static final String[] ENCODINGS_7BIT = { "ANSI_X3.4-1968", "T.61-7bit" }; /** * 8 bit encoding names. */ private static final String[] ENCODINGS_8BIT = { "T.61-8bit", "UNKNOWN-8BIT", "PC8-Danish-Norwegian", "PC8-Turkish", "ISO_8859-1:1987", "ISO_8859-2:1987", "ISO_8859-3:1988", "ISO_8859-4:1988", "ISO_8859-6:1987", "ISO_8859-6-E", "ISO_8859-6-I", "ISO_8859-7:1987", "ISO_8859-8:1988", "ISO_8859-8-E", "ISO_8859-8-I", "ISO_8859-5:1988", "ISO_8859-9:1989", "ISO_8859-supp", "ISO-8859-10", "ISO-8859-15", "ISO-8859-1-Windows-3.0-Latin-1", "ISO-8859-1-Windows-3.1-Latin-1", "ISO-8859-2-Windows-Latin-2", "ISO-8859-9-Windows-Latin-5", "latin-greek", "Latin-greek-1" }; /** * Tables of known 7 & 8 bit encodings. */ private static final HashSet f7BitEncodings = new HashSet(); private static final HashSet f8BitEncodings = new HashSet(); /** * Pattern indicating the preferred MIME name. */ private static final String REG_MIME_PREFERRED = "preferred MIME name"; /** * Table of aliases to add. */ private static final String[][] HACKED_ALIASES = { {"UTF-8", "UTF8"}, {"ANSI_X3.4-1968", "646"}, }; /** * Table of prefix conversions. An alias is created for names/aliases * that match the first prefix, with the second subsitutued. */ private static final String[][] HACKED_PREFIXES = { {"windows-", "Cp"}, {"ISO_8859-", "ISO8859-"}, {"ISO-8859-", "ISO8859_"} }; /** * Extra entries to output, with no other hacks available to get them. */ private static String[] EXTRA_ENTRIES = { "UnicodeBig 16 null", "UnicodeBigUnmarked 16 null", "UnicodeLittle 16 null", "UnicodeLittleUnmarked 16 null", "UTF-16 16 null UTF16" }; /** * Class initializer. */ static { for (int idx = 0; idx < ENCODINGS_7BIT.length; idx++) { f7BitEncodings.add(ENCODINGS_7BIT[idx]); } for (int idx = 0; idx < ENCODINGS_8BIT.length; idx++) { f8BitEncodings.add(ENCODINGS_8BIT[idx]); } } /** * Generate an error about parsing a line in registry. */ private void ianaParseError(String msg, String line) { throw new XMLIOError(msg + "; parsing line in " + CHAR_SET_REGISTRY + "\"" + line + "\""); } /** * Get the encoding size. Returning 7, 8, or 16. This makes a guess * based on some encoded knowledge. If not known, returns 16. */ private int getCharSize(String encoding) { if (f7BitEncodings.contains(encoding)) { return 7; } else if (f8BitEncodings.contains(encoding)) { return 8; } else { return 16; } } /** * Extract a encoding name out of a Name: or Alias: line. Returns * null if empty. */ private String parseName(String line) { int len = line.length(); // Get next char after index. int startIdx = line.indexOf(':'); if (startIdx < 0) { ianaParseError("no `:' found", line); } startIdx++; // Skip spaces while ((startIdx < len) && (line.charAt(startIdx) == ' ')) { startIdx++; } // Find end int endIdx = startIdx; while ((endIdx < len) && (line.charAt(endIdx) != ' ')) { endIdx++; } if (endIdx <= startIdx) { return null; } else { return line.substring(startIdx, endIdx).intern(); } } /** * Determine if a line contains the preferred MIME encoding. */ private boolean isMimePreferredEntry(String line) { return (line.indexOf(REG_MIME_PREFERRED) >= 0); } /** * Add a alias to the list of aliases, if its not null or not already * there. */ private void addAlias(ArrayList aliases, String alias) { if ((alias != null) && !aliases.contains(alias)) { aliases.add(alias); } } /** * Do special hacked mapping of name/aliases to other aliases. This * handles alisas not in registry */ private void makeHackedAliases(ArrayList aliases, String name) { // Hacks based on alias. for (int idx = 0; idx < HACKED_ALIASES.length; idx++) { String[] mapping = HACKED_ALIASES[idx]; if (name.equals(mapping[0])) { addAlias(aliases, mapping[1]); } } // Hacks based on prefix. for (int idx = 0; idx < HACKED_PREFIXES.length; idx++) { String[] mapping = HACKED_PREFIXES[idx]; if (name.startsWith(mapping[0])) { addAlias(aliases, mapping[1] + name.substring(mapping[0].length())); } } } /** * Scan the input stream for the next encoding entry and parse that * entry and write a record. */ private boolean parseCharSetEntry(BufferedReader in, PrintWriter out) throws IOException { ArrayList aliases = new ArrayList(); String mimePreferred = null; String line = null; // Scan for next Name: entry while ((line = in.readLine()) != null) { if (line.startsWith(REG_NAME_FIELD)) { break; } } if (line == null) { return false; // EOF } String name = parseName(line); if (name == null) { ianaParseError("no name parsed", line); } if (isMimePreferredEntry(line)){ mimePreferred = name; } // Handle stuff missing from registry makeHackedAliases(aliases, name); // Parse Alias: entries, scanning until a blank line or EOF. while (((line = in.readLine()) != null) && (line.trim().length() > 0)) { if (line.startsWith(REG_ALIAS_FIELD)) { String alias = parseName(line); if (alias != null) { addAlias(aliases, alias); makeHackedAliases(aliases, alias); if (isMimePreferredEntry(line)){ mimePreferred = alias; } } } } // output entry out.print(name); out.print(' '); out.print(getCharSize(name)); out.print(' '); out.print(mimePreferred); int len = aliases.size(); for (int idx = 0; idx < len; idx++) { out.print(' '); out.print(aliases.get(idx)); } out.println(); return true; } /** * Parse the registry file. */ private void parseIanaRegistry(BufferedReader in, PrintWriter out) throws IOException { while (parseCharSetEntry(in, out)) { // Looping till eof } } /** * Parse the registry file. */ private void parseIanaRegistry() throws IOException { BufferedReader in = new BufferedReader(new FileReader(CHAR_SET_REGISTRY)); PrintWriter out = new PrintWriter(new BufferedWriter(new FileWriter(CHAR_SET_TABLE))); parseIanaRegistry(in, out); for (int i = 0; i < EXTRA_ENTRIES.length; i++) { out.println(EXTRA_ENTRIES[i]); } out.close(); in.close(); } /** * Entry */ public static void main(String[] args) throws IOException { new MkEncodingsTable().parseIanaRegistry(); } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy