All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.daisy.braille.utils.pef.TextHandler Maven / Gradle / Ivy

There is a newer version: 1.0.7
Show newest version
/*
 * Braille Utils (C) 2010-2011 Daisy Consortium
 *
 * This library is free software; you can redistribute it and/or modify it under
 * the terms of the GNU Lesser General Public License as published by the Free
 * Software Foundation; either version 2.1 of the License, or (at your option)
 * any later version.
 *
 * This library is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
 * details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this library; if not, write to the Free Software Foundation, Inc.,
 * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
 */
package org.daisy.braille.utils.pef;

import org.daisy.dotify.api.table.BrailleConverter;
import org.daisy.dotify.api.table.Table;
import org.daisy.dotify.api.table.TableCatalogService;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.LineNumberReader;
import java.io.PrintWriter;
import java.io.UnsupportedEncodingException;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.List;
import java.util.Map;
import java.util.logging.Level;
import java.util.logging.Logger;

/**
 * Provides a handler for reading text and writing a PEF-file.
 *
 * @author Joel Håkansson
 */
public class TextHandler {
    /**
     * Defines a date format (yyyy-MM-dd).
     */
    public static final SimpleDateFormat DATE_FORMAT = new SimpleDateFormat("yyyy-MM-dd");
    /**
     * Key for parseTextFile setting,
     * corresponding settings value should contain the title of the publication.
     */
    public static final String KEY_TITLE = "title";
    /**
     * Key for parseTextFile setting,
     * corresponding settings value should contain the author of the publication.
     */
    public static final String KEY_AUTHOR = "author";
    /**
     * Key for parseTextFile setting,
     * corresponding settings value should contain the identifier for the publication.
     */
    public static final String KEY_IDENTIFIER = "identifier";
    /**
     * Key for parseTextFile setting,
     * corresponding settings value should match the table to use.
     */
    public static final String KEY_MODE = "mode";
    /**
     * Key for parseTextFile setting,
     * corresponding settings value should contain the language of the publication.
     */
    public static final String KEY_LANGUAGE = "language";
    /**
     * Key for parseTextFile setting,
     * corresponding settings value should be "true" for duplex or "false" for simplex.
     */
    public static final String KEY_DUPLEX = "duplex";
    /**
     * Key for parseTextFile setting,
     * corresponding settings value should be a string containing a valid date on the form yyyy-MM-dd.
     */
    public static final String KEY_DATE = "date";

    private static final Logger LOGGER = Logger.getLogger(TextHandler.class.getCanonicalName());

    private final File input;
    private final File output;
    private final TableCatalogService factory;
    private final String title;
    private final String author;
    private final String language;
    private final String identifier;
    private final BrailleConverter converter;
    private final boolean duplex;
    private Date date;

    private int maxRows;
    private int maxCols;

    /**
     * Provides a Builder for TextHandler.
     */
    public static class Builder {
        // required params
        private final File input;
        private final File output;
        private final TableCatalogService factory;

        // optional params
        private String title = "";
        private String author = "";
        private String language = "";
        private String identifier = "";
        private String converterId = null;
        private boolean duplex = true;
        private Date date = new Date();

        /**
         * Create a new TextParser builder.
         *
         * @param input   the input
         * @param output  the output
         * @param factory the table catalog
         */
        public Builder(File input, File output, TableCatalogService factory) {
            this.input = input;
            this.output = output;
            this.factory = factory;
            String s = (new Long((Math.round(Math.random() * 1000000000)))).toString();
            char[] chars = s.toCharArray();
            char[] dest = new char[]{'0', '0', '0', '0', '0', '0', '0', '0', '0'};
            System.arraycopy(chars, 0, dest, 9 - chars.length, chars.length);
            this.identifier = "AUTO_ID_" + new String(dest);
        }

        /**
         * Sets options from a map.
         *
         * @param settings the settings.
         * @return returns this object
         * @throws IllegalArgumentException if a key is unknown or if a value doesn't meet the requirements.
         */
        public Builder options(Map settings) {
            for (String key : settings.keySet()) {
                String value = settings.get(key);
                if (KEY_TITLE.equals(key)) {
                    title(value);
                } else if (KEY_AUTHOR.equals(key)) {
                    author(value);
                } else if (KEY_IDENTIFIER.equals(key)) {
                    identifier(value);
                } else if (KEY_MODE.equals(key)) {
                    converterId(value);
                } else if (KEY_LANGUAGE.equals(key)) {
                    language(value);
                } else if (KEY_DUPLEX.equals(key)) {
                    duplex("true".equals(value.toLowerCase()));
                } else if (KEY_DATE.equals(key)) {
                    try {
                        date(DATE_FORMAT.parse(value));
                    } catch (ParseException e) {
                        throw new IllegalArgumentException(e);
                    }
                } else {
                    throw new IllegalArgumentException("Unknown option \"" + key + "\"");
                }
            }
            return this;
        }

        //init optional params here

        /**
         * Sets the title for publications created using TextHandlers created with this builder.
         *
         * @param value the title
         * @return returns this object
         */
        public Builder title(String value) {
            if (value == null) {
                throw new IllegalArgumentException("Null value not accepted.");
            }
            title = value;
            return this;
        }

        /**
         * Sets the author for publications created using TextHandlers created with this builder.
         *
         * @param value the author
         * @return returns this object
         */
        public Builder author(String value) {
            if (value == null) {
                throw new IllegalArgumentException("Null value not accepted.");
            }
            author = value;
            return this;
        }

        /**
         * Sets the language for publications created using TextHandlers created with this builder.
         *
         * @param value the language
         * @return returns this object
         */
        public Builder language(String value) {
            if (value == null) {
                throw new IllegalArgumentException("Null value not accepted.");
            }
            language = value;
            return this;
        }

        /**
         * Sets the identifier for publications created using TextHandlers created with this builder.
         *
         * @param value the identifier
         * @return returns this object
         */
        public Builder identifier(String value) {
            if (value == null) {
                throw new IllegalArgumentException("Null value not accepted.");
            }
            identifier = value;
            return this;
        }

        /**
         * Sets the converter identifier to be used when creating a TextHandler. See TableCatalog
         * for available values. If none is suppled, the builder will attempt to select one
         * based on file input characteristics.
         *
         * @param value the identifier for the converter
         * @return returns this object
         */
        public Builder converterId(String value) {
            converterId = value;
            return this;
        }

        /**
         * Sets the duplex property for publications created using TextHandlers created with this builder.
         *
         * @param value the duplex value
         * @return returns this object
         */
        public Builder duplex(boolean value) {
            duplex = value;
            return this;
        }

        /**
         * Sets the date for publications created using TextHandlers created with this builder.
         *
         * @param value the date to use
         * @return returns this object
         */
        public Builder date(Date value) {
            if (value == null) {
                throw new IllegalArgumentException("Null value not accepted.");
            }
            date = value;
            return this;
        }

        /**
         * Builds a TextParser using the settings of this Builder.
         *
         * @return returns a new TextParser
         * @throws IOException                  if an error occurs
         * @throws InputDetectionException      if an error occurs
         * @throws UnsupportedEncodingException if an error occurs
         */
        public TextHandler build() throws IOException, InputDetectionException {
            return new TextHandler(this);
        }

        /**
         * Parses the input file with the current settings.
         *
         * @throws InputDetectionException if an error occurs
         * @throws IOException             if an error occurs
         */
        public void parse() throws InputDetectionException, IOException {
            new TextHandler(this).parse();
        }
    }

    private TextHandler(Builder builder) throws IOException, InputDetectionException {
        input = builder.input;
        output = builder.output;
        factory = builder.factory;
        title = builder.title;
        author = builder.author;
        language = builder.language;
        identifier = builder.identifier;
        if (builder.converterId == null) {
            List tableCandiates;
            FileInputStream is = new FileInputStream(input);
            TextInputDetector tid = new TextInputDetector(factory);
            tableCandiates = tid.detect(is);
            if (tableCandiates == null || tableCandiates.size() < 1) {
                throw new InputDetectionException("Cannot detect table.");
            }
            if (tableCandiates.size() > 1) {
                StringBuilder sb = new StringBuilder();
                boolean first = true;
                for (Table t : tableCandiates) {
                    if (!first) {
                        sb.append(", ");
                    } else {
                        first = false;
                    }
                    sb.append("'" + t.getDisplayName() + "'");
                }
                throw new InputDetectionException(
                    "Cannot choose a table automatically. Possible matches: " + sb.toString()
                );
            }
            if (LOGGER.isLoggable(Level.FINE)) {
                LOGGER.fine("Using " + tableCandiates.get(0).getDisplayName());
            }
            converter = tableCandiates.get(0).newBrailleConverter();
        } else {
            converter = factory.newTable(builder.converterId).newBrailleConverter();
        }
        duplex = builder.duplex;
        date = builder.date;
    }

    /**
     * Creates a new builder with the supplied parameters.
     *
     * @param input   the input file
     * @param output  the output file
     * @param factory the table catalog
     * @return returns a new builder
     */
    public static Builder with(File input, File output, TableCatalogService factory) {
        return new TextHandler.Builder(input, output, factory);
    }

    /**
     * Parse using current settings.
     *
     * @throws IOException if an I/O error occurs
     */
    public void parse() throws IOException {
        if (date == null) {
            date = new Date();
        }
        FileInputStream is = new FileInputStream(input);
        PrintWriter pw = new PrintWriter(output, "utf-8");
        LineNumberReader lr = new LineNumberReader(new InputStreamReader(is, converter.getPreferredCharset()));
        // determine max rows/page and chars/row

        read(lr, null);

        // reset input
        is = new FileInputStream(input);
        lr = new LineNumberReader(new InputStreamReader(is, converter.getPreferredCharset()));

        pw.println("");
        pw.println("");
        pw.println("    ");
        pw.println("        ");
        if (!"".equals(title)) {
            pw.println("            " + title + "");
        }
        if (!"".equals(author)) {
            pw.println("            " + author + "");
        }
        if (!"".equals(language)) {
            pw.println("            " + language + "");
        }
        pw.println("            " + DATE_FORMAT.format(date) + "");
        pw.println("            application/x-pef+xml");
        if (!"".equals(identifier)) {
            pw.println("            " + identifier + "");
        }
        pw.println("        ");
        pw.println("    ");
        pw.println("    ");
        pw.println(
            "        "
        );
        pw.println("            
"); read(lr, pw); pw.println("
"); pw.println("
"); pw.println(" "); pw.println("
"); pw.flush(); pw.close(); } private void read(LineNumberReader lr, PrintWriter pw) throws IOException { maxRows = 0; maxCols = 0; int cRows = 0; boolean pageClosed = true; int eofIndex = -1; cRows++; String line = lr.readLine(); while (line != null) { eofIndex = line.indexOf(0x1A); if (eofIndex > -1) { line = line.substring(0, eofIndex); //remove trailing characters beyond eof-mark (CTRL+Z) } if ("\f".equals(line)) { // if line consists of a single form feed character. Just close the page (don't add rows yet). // if page is already closed, this is an empty page if (pageClosed) { if (pw != null) { pw.println(" "); } pageClosed = false; } if (pw != null) { pw.println(" "); } pageClosed = true; cRows--; // don't count this row if (cRows > maxRows) { maxRows = cRows; } cRows = 0; } else { String[] pieces = line.split("\\f", -1); //split on form feed int i = 1; for (String p : pieces) { if (i > 1) { // there were form feeds if (pw != null) { pw.println(" "); } pageClosed = true; cRows--; // don't count this row if (cRows > maxRows) { maxRows = cRows; } cRows = 0; } if (pageClosed) { if (pw != null) { pw.println(" "); } pageClosed = false; } if (p.length() > maxCols) { maxCols = p.length(); } // don't output if row contains form feeds and this segment equals "" if (!(pieces.length > 1 && (i == pieces.length || i == 1) && "".equals(p)) && pw != null) { pw.print(" "); pw.print(converter.toBraille(p)); pw.println(""); } i++; } } if (eofIndex > -1) { // End of file reached. Stop reading. line = null; } else { line = lr.readLine(); cRows++; } } lr.close(); if (!pageClosed) { if (pw != null) { pw.println(" "); } pageClosed = true; cRows--; // don't count this row if (cRows > maxRows) { maxRows = cRows; } cRows = 0; } } }