![JAR search and dependency download from the Maven repository](/logo.png)
info.freelibrary.marc4j.converter.impl.ReverseCodeTableGenerator Maven / Gradle / Ivy
package info.freelibrary.marc4j.converter.impl;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.PrintStream;
import java.util.Arrays;
import java.util.Hashtable;
import java.util.Vector;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.xml.sax.InputSource;
import org.xml.sax.XMLReader;
/**
* Invoked at build time to generate a java source file (named ReverseCodeTableGenerated.java) which when compiled will
* extend the ReverseCodeTable abstract class (primarily through switch statements) and which can be used by the
* UnicodeToAnsel converter which will produce the same results as the object ReverseCodeTableHash.
* The following routines are only used in the code generation process, and are not available to be called from within
* an application that uses MARC4J.
* The routines generated for converting unicode characters to MARC8 multibyte characters are split into several
* routines to workaround a limitation in java that a method can only contain 64k of code when it is compiled.
*
* @author Robert Haschart
* @author Kevin S. Clarke
*/
public class ReverseCodeTableGenerator {
private ReverseCodeTableGenerator() {
}
/**
* The main class for the reverse code table generator.
*
* @param args
* @throws FileNotFoundException
*/
public static void main(final String[] args) throws FileNotFoundException {
Hashtable> charsets = null;
try {
final SAXParserFactory factory = SAXParserFactory.newInstance();
factory.setNamespaceAware(true);
factory.setValidating(false);
final SAXParser saxParser = factory.newSAXParser();
final XMLReader rdr = saxParser.getXMLReader();
final InputSource src =
new InputSource(ReverseCodeTableHandler.class.getResourceAsStream("resources/codetables.xml"));
final ReverseCodeTableHandler saxUms = new ReverseCodeTableHandler();
rdr.setContentHandler(saxUms);
rdr.parse(src);
charsets = saxUms.getCharSets();
final Vector combining = saxUms.getCombiningChars();
final Object charsetsKeys[] = charsets.keySet().toArray();
Arrays.sort(charsetsKeys);
if (args.length > 0) {
final PrintStream outStream = new PrintStream(new File(args[0]));
dumpTablesAsSwitchStatement(combining, charsets, outStream);
} else {
dumpTablesAsSwitchStatement(combining, charsets, System.out);
}
} catch (final FileNotFoundException details) {
throw details; // exec-maven-plugin will tell us what went wrong
} catch (final Exception details) {
details.printStackTrace(System.out);
System.err.println("Exception: " + details);
}
}
private static void dumpTablesAsSwitchStatement(final Vector combining,
final Hashtable> charsets, final PrintStream out) {
out.println("package info.freelibrary.marc4j.converter.impl;");
out.println("");
out.println("/**");
out.println(" * An implementation of ReverseCodeTable that is used in converting Unicode");
out.println(" * data to MARC8 data, that doesn't rely on any data files or resources or");
out.println(" * data structures");
out.println(" *");
out.println(" * Warning: This file is generated by running the main routine in the file");
out.println(" * ReverseCodeTableHandler.java");
out.println(" *");
out.println(" * Warning: Do not edit this file, or all edits will be lost at the next");
out.println(" * build.");
out.println(" */");
out.println("");
out.println("import java.util.Hashtable;");
out.println("");
out.println("public class ReverseCodeTableGenerated extends ReverseCodeTable {");
out.println("");
out.println(" /**");
out.println(" * Returns true if supplied character is combining; else, false.");
out.println(" *");
out.println(" * @param c");
out.println(" * @return True if supplied character is combining; else, false");
out.println(" */");
out.println(" public boolean isCombining(final Character c) {");
out.println(" switch ((int)c.charValue()) {");
final Character combineArray[] = combining.toArray(new Character[0]);
Arrays.sort(combineArray);
Character prevc = null;
for (int index = 0; index < combineArray.length; index++) {
final Character c = combineArray[index];
if (!c.equals(prevc)) {
final String hex = Integer.toHexString(c.charValue());
out.println(" case 0x" + hex + ":");
}
prevc = c;
}
out.println(" return true;");
out.println(" default:");
out.println(" return false;");
out.println(" }");
out.println(" }");
out.println("");
out.println(" /**");
out.println(" * Returns the character hashtable.");
out.println(" *");
out.println(" * @param c");
out.println(" * @return The character hashtable");
out.println(" */");
out.println(" public Hashtable getCharTable(final Character c) {");
out.println(" final String resultStr1 = getCharTableCharSet(c);");
out.println(" final String resultStr2 = getCharTableCharString(c);");
out.println(" if (resultStr2 == null) {");
out.println(" return null;");
out.println(" }");
out.println(" final int htSize = resultStr1.length();");
out.println(" final Hashtable result = new Hashtable(htSize);");
out.println(" final String res2[] = resultStr2.split(\" \");");
out.println(" for (int index = 0; index < resultStr1.length(); index++) {");
out.println(" final Integer intChar = new Integer(resultStr1.charAt(index));");
out.println(" result.put(intChar, deHexify(res2[(res2.length == 1) ? 0 : index]));");
out.println(" }");
out.println(" return result;");
out.println(" }");
out.println("");
final Character charsetsKeys[] = charsets.keySet().toArray(new Character[0]);
Arrays.sort(charsetsKeys);
final StringBuilder buffer = new StringBuilder();
out.println(" private String getCharTableCharSet(final Character c) {");
out.println(" final int cVal = (int)c.charValue();");
out.println(" switch(cVal) {");
for (int sel = 0; sel < charsetsKeys.length; sel++) {
final Hashtable table = charsets.get(charsetsKeys[sel]);
final Object tableKeys[] = table.keySet().toArray();
Arrays.sort(tableKeys);
final StringBuilder sb = new StringBuilder();
for (int index = 0; index < tableKeys.length; index++) {
sb.append((char) ((Integer) tableKeys[index]).intValue());
}
final String charset = sb.toString().trim();
if (!charset.equals("1")) {
final int intChar = charsetsKeys[sel].charValue();
final String intCharHex = Integer.toHexString(intChar);
out.println(" case 0x" + intCharHex + ":");
out.println(" return \"" + charset + "\";");
}
}
out.println(" }");
out.println(" return \"1\";");
out.println(" }");
dumpPartialCharTableCharString(out, buffer, charsetsKeys, charsets, 0, 3500);
dumpPartialCharTableCharString(out, buffer, charsetsKeys, charsets, 3500, 7000);
dumpPartialCharTableCharString(out, buffer, charsetsKeys, charsets, 7000, 10500);
dumpPartialCharTableCharString(out, buffer, charsetsKeys, charsets, 10500, 14000);
dumpPartialCharTableCharString(out, buffer, charsetsKeys, charsets, 14000, charsetsKeys.length);
out.println(" private String getCharTableCharString(final Character c) {");
out.println(" final int cVal = (int)c.charValue();");
out.println(buffer.toString());
out.println(" return null;");
out.println(" }");
out.println("}");
}
static private void dumpPartialCharTableCharString(final PrintStream out, final StringBuilder buffer,
final Object charsetsKeys[], final Hashtable> charsets,
final int startOffset, final int endOffset) {
final String startByteStr = "0x" + Integer.toHexString((((Character) charsetsKeys[startOffset]).charValue()));
final String endByteStr = "0x" + Integer.toHexString((((Character) charsetsKeys[endOffset - 1]).charValue()));
buffer.append(" if (cVal >= " + startByteStr + " && cVal <= " + endByteStr +
") {\n return getCharTableCharString_" + startByteStr + "_" + endByteStr +
"(c);\n }\n");
out.println(" private String getCharTableCharString_" + startByteStr + "_" + endByteStr +
"(final Character c) {");
out.println(" switch ((int)c.charValue()) {");
for (int sel = startOffset; sel < charsetsKeys.length && sel < endOffset; sel++) {
final Hashtable table = charsets.get(charsetsKeys[sel]);
final Object tableKeys[] = table.keySet().toArray();
Arrays.sort(tableKeys);
final StringBuilder sb1 = new StringBuilder();
final StringBuilder sb2 = new StringBuilder();
boolean useSB1 = false;
char prevcharArray[] = null;
for (int index = 0; index < tableKeys.length; index++) {
final Object value = table.get(tableKeys[index]);
final char valarray[] = (char[]) value;
sb1.append(hexify(valarray));
if (index == 0) {
sb2.append(hexify(valarray));
}
if (index > 0 && valarray.length == 1 && prevcharArray != null && prevcharArray.length == 1 &&
valarray[0] != prevcharArray[0]) {
useSB1 = true;
}
sb1.append(" ");
prevcharArray = valarray;
}
final String returnVal = useSB1 ? sb1.toString().trim() : sb2.toString().trim();
final int intChar = ((Character) charsetsKeys[sel]).charValue();
final String hex = Integer.toHexString(intChar);
out.println(" case 0x" + hex + ":");
out.println(" return \"" + returnVal + "\";");
}
out.println(" default:");
out.println(" return null;");
out.println(" }");
out.println(" }");
out.println("");
}
/**
* Utility function for translating an array of characters to a two character hex string of the character values.
*
* @param aValArray The array of characters to encode
* @return A string representation of the hex code
*/
private static String hexify(final char[] aValArray) {
String result = "";
for (int index = 0; index < aValArray.length; index++) {
result += Integer.toHexString(aValArray[index]);
}
return result;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy