All Downloads are FREE. Search and download functionalities are using the official Maven repository.

info.freelibrary.marc4j.converter.impl.ReverseCodeTableGenerator Maven / Gradle / Ivy


package info.freelibrary.marc4j.converter.impl;

import java.io.File;
import java.io.FileNotFoundException;
import java.io.PrintStream;
import java.util.Arrays;
import java.util.Hashtable;
import java.util.Vector;

import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;

import org.xml.sax.InputSource;
import org.xml.sax.XMLReader;

/**
 * Invoked at build time to generate a java source file (named ReverseCodeTableGenerated.java) which when compiled will
 * extend the ReverseCodeTable abstract class (primarily through switch statements) and which can be used by the
 * UnicodeToAnsel converter which will produce the same results as the object ReverseCodeTableHash.
* The following routines are only used in the code generation process, and are not available to be called from within * an application that uses MARC4J.
* The routines generated for converting unicode characters to MARC8 multibyte characters are split into several * routines to workaround a limitation in java that a method can only contain 64k of code when it is compiled. * * @author Robert Haschart * @author Kevin S. Clarke */ public class ReverseCodeTableGenerator { private ReverseCodeTableGenerator() { } /** * The main class for the reverse code table generator. * * @param args * @throws FileNotFoundException */ public static void main(final String[] args) throws FileNotFoundException { Hashtable> charsets = null; try { final SAXParserFactory factory = SAXParserFactory.newInstance(); factory.setNamespaceAware(true); factory.setValidating(false); final SAXParser saxParser = factory.newSAXParser(); final XMLReader rdr = saxParser.getXMLReader(); final InputSource src = new InputSource(ReverseCodeTableHandler.class.getResourceAsStream("resources/codetables.xml")); final ReverseCodeTableHandler saxUms = new ReverseCodeTableHandler(); rdr.setContentHandler(saxUms); rdr.parse(src); charsets = saxUms.getCharSets(); final Vector combining = saxUms.getCombiningChars(); final Object charsetsKeys[] = charsets.keySet().toArray(); Arrays.sort(charsetsKeys); if (args.length > 0) { final PrintStream outStream = new PrintStream(new File(args[0])); dumpTablesAsSwitchStatement(combining, charsets, outStream); } else { dumpTablesAsSwitchStatement(combining, charsets, System.out); } } catch (final FileNotFoundException details) { throw details; // exec-maven-plugin will tell us what went wrong } catch (final Exception details) { details.printStackTrace(System.out); System.err.println("Exception: " + details); } } private static void dumpTablesAsSwitchStatement(final Vector combining, final Hashtable> charsets, final PrintStream out) { out.println("package info.freelibrary.marc4j.converter.impl;"); out.println(""); out.println("/**"); out.println(" * An implementation of ReverseCodeTable that is used in converting Unicode"); out.println(" * data to MARC8 data, that doesn't rely on any data files or resources or"); out.println(" * data structures"); out.println(" *"); out.println(" * Warning: This file is generated by running the main routine in the file"); out.println(" * ReverseCodeTableHandler.java"); out.println(" *"); out.println(" * Warning: Do not edit this file, or all edits will be lost at the next"); out.println(" * build."); out.println(" */"); out.println(""); out.println("import java.util.Hashtable;"); out.println(""); out.println("public class ReverseCodeTableGenerated extends ReverseCodeTable {"); out.println(""); out.println(" /**"); out.println(" * Returns true if supplied character is combining; else, false."); out.println(" *"); out.println(" * @param c"); out.println(" * @return True if supplied character is combining; else, false"); out.println(" */"); out.println(" public boolean isCombining(final Character c) {"); out.println(" switch ((int)c.charValue()) {"); final Character combineArray[] = combining.toArray(new Character[0]); Arrays.sort(combineArray); Character prevc = null; for (int index = 0; index < combineArray.length; index++) { final Character c = combineArray[index]; if (!c.equals(prevc)) { final String hex = Integer.toHexString(c.charValue()); out.println(" case 0x" + hex + ":"); } prevc = c; } out.println(" return true;"); out.println(" default:"); out.println(" return false;"); out.println(" }"); out.println(" }"); out.println(""); out.println(" /**"); out.println(" * Returns the character hashtable."); out.println(" *"); out.println(" * @param c"); out.println(" * @return The character hashtable"); out.println(" */"); out.println(" public Hashtable getCharTable(final Character c) {"); out.println(" final String resultStr1 = getCharTableCharSet(c);"); out.println(" final String resultStr2 = getCharTableCharString(c);"); out.println(" if (resultStr2 == null) {"); out.println(" return null;"); out.println(" }"); out.println(" final int htSize = resultStr1.length();"); out.println(" final Hashtable result = new Hashtable(htSize);"); out.println(" final String res2[] = resultStr2.split(\" \");"); out.println(" for (int index = 0; index < resultStr1.length(); index++) {"); out.println(" final Integer intChar = new Integer(resultStr1.charAt(index));"); out.println(" result.put(intChar, deHexify(res2[(res2.length == 1) ? 0 : index]));"); out.println(" }"); out.println(" return result;"); out.println(" }"); out.println(""); final Character charsetsKeys[] = charsets.keySet().toArray(new Character[0]); Arrays.sort(charsetsKeys); final StringBuilder buffer = new StringBuilder(); out.println(" private String getCharTableCharSet(final Character c) {"); out.println(" final int cVal = (int)c.charValue();"); out.println(" switch(cVal) {"); for (int sel = 0; sel < charsetsKeys.length; sel++) { final Hashtable table = charsets.get(charsetsKeys[sel]); final Object tableKeys[] = table.keySet().toArray(); Arrays.sort(tableKeys); final StringBuilder sb = new StringBuilder(); for (int index = 0; index < tableKeys.length; index++) { sb.append((char) ((Integer) tableKeys[index]).intValue()); } final String charset = sb.toString().trim(); if (!charset.equals("1")) { final int intChar = charsetsKeys[sel].charValue(); final String intCharHex = Integer.toHexString(intChar); out.println(" case 0x" + intCharHex + ":"); out.println(" return \"" + charset + "\";"); } } out.println(" }"); out.println(" return \"1\";"); out.println(" }"); dumpPartialCharTableCharString(out, buffer, charsetsKeys, charsets, 0, 3500); dumpPartialCharTableCharString(out, buffer, charsetsKeys, charsets, 3500, 7000); dumpPartialCharTableCharString(out, buffer, charsetsKeys, charsets, 7000, 10500); dumpPartialCharTableCharString(out, buffer, charsetsKeys, charsets, 10500, 14000); dumpPartialCharTableCharString(out, buffer, charsetsKeys, charsets, 14000, charsetsKeys.length); out.println(" private String getCharTableCharString(final Character c) {"); out.println(" final int cVal = (int)c.charValue();"); out.println(buffer.toString()); out.println(" return null;"); out.println(" }"); out.println("}"); } static private void dumpPartialCharTableCharString(final PrintStream out, final StringBuilder buffer, final Object charsetsKeys[], final Hashtable> charsets, final int startOffset, final int endOffset) { final String startByteStr = "0x" + Integer.toHexString((((Character) charsetsKeys[startOffset]).charValue())); final String endByteStr = "0x" + Integer.toHexString((((Character) charsetsKeys[endOffset - 1]).charValue())); buffer.append(" if (cVal >= " + startByteStr + " && cVal <= " + endByteStr + ") {\n return getCharTableCharString_" + startByteStr + "_" + endByteStr + "(c);\n }\n"); out.println(" private String getCharTableCharString_" + startByteStr + "_" + endByteStr + "(final Character c) {"); out.println(" switch ((int)c.charValue()) {"); for (int sel = startOffset; sel < charsetsKeys.length && sel < endOffset; sel++) { final Hashtable table = charsets.get(charsetsKeys[sel]); final Object tableKeys[] = table.keySet().toArray(); Arrays.sort(tableKeys); final StringBuilder sb1 = new StringBuilder(); final StringBuilder sb2 = new StringBuilder(); boolean useSB1 = false; char prevcharArray[] = null; for (int index = 0; index < tableKeys.length; index++) { final Object value = table.get(tableKeys[index]); final char valarray[] = (char[]) value; sb1.append(hexify(valarray)); if (index == 0) { sb2.append(hexify(valarray)); } if (index > 0 && valarray.length == 1 && prevcharArray != null && prevcharArray.length == 1 && valarray[0] != prevcharArray[0]) { useSB1 = true; } sb1.append(" "); prevcharArray = valarray; } final String returnVal = useSB1 ? sb1.toString().trim() : sb2.toString().trim(); final int intChar = ((Character) charsetsKeys[sel]).charValue(); final String hex = Integer.toHexString(intChar); out.println(" case 0x" + hex + ":"); out.println(" return \"" + returnVal + "\";"); } out.println(" default:"); out.println(" return null;"); out.println(" }"); out.println(" }"); out.println(""); } /** * Utility function for translating an array of characters to a two character hex string of the character values. * * @param aValArray The array of characters to encode * @return A string representation of the hex code */ private static String hexify(final char[] aValArray) { String result = ""; for (int index = 0; index < aValArray.length; index++) { result += Integer.toHexString(aValArray[index]); } return result; } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy