All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.daisy.braille.utils.pef.PEFFileSplitter Maven / Gradle / Ivy

There is a newer version: 1.0.7
Show newest version
/*
 * Braille Utils (C) 2010-2011 Daisy Consortium
 *
 * This library is free software; you can redistribute it and/or modify it under
 * the terms of the GNU Lesser General Public License as published by the Free
 * Software Foundation; either version 2.1 of the License, or (at your option)
 * any later version.
 *
 * This library is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
 * details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this library; if not, write to the Free Software Foundation, Inc.,
 * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
 */
package org.daisy.braille.utils.pef;

import org.xml.sax.ErrorHandler;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
import java.util.Stack;
import java.util.function.Predicate;
import java.util.logging.Level;
import java.util.logging.Logger;
import javax.xml.namespace.QName;
import javax.xml.stream.XMLEventFactory;
import javax.xml.stream.XMLEventReader;
import javax.xml.stream.XMLEventWriter;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLOutputFactory;
import javax.xml.stream.XMLStreamConstants;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.events.XMLEvent;

/**
 * Splits a PEF-file into several single volume files. The main purpose is to
 * interact with software that operates on one volume at a time.
 *
 * @author Joel Håkansson
 */
public class PEFFileSplitter implements ErrorHandler {
    private static final Logger logger = Logger.getLogger(PEFFileSplitter.class.getCanonicalName());

    /**
     * Defines the default prefix for generated file names.
     */
    public static final String PREFIX = "volume-";

    /**
     * Defines the default postfix for generated file names.
     */
    public static final String POSTFIX = ".pef";

    enum State {
        HEADER,
        BODY,
        FOOTER
    }

    private final Predicate validator;

    /**
     * Creates a new PEFFileSplitter object.
     *
     * @param validator a PEF validator. A full validation is strongly recommended.
     */
    public PEFFileSplitter(Predicate validator) {
        this.validator = Objects.requireNonNull(validator);
    }

    /**
     * Splits a PEF-file into several single volume PEF-files.
     *
     * @param input     input PEF-file
     * @param directory output directory
     * @return returns true if split was successful, false otherwise
     * @throws IllegalArgumentException if input is not a file
     */
    public boolean split(File input, File directory) {
        if (!input.isFile()) {
            throw new IllegalArgumentException("Input is not a file: " + input);
        }
        String inputName = input.getName();
        String inputExt = ".pef";
        int index = inputName.lastIndexOf('.');
        if (index >= 0) {
            if (index < inputName.length()) {
                inputExt = inputName.substring(index);
            }
            inputName = inputName.substring(0, index);

        }
        try {
            return split(new FileInputStream(input), directory, inputName + "-", inputExt);
        } catch (FileNotFoundException e) {
            return false;
        }
    }

    /**
     * Splits the PEF-document provided as an input stream into several single volume PEF-files using
     * the default file name pre- and postfix.
     *
     * @param is        the input stream to the PEF-document
     * @param directory the output directory
     * @return returns true if split was successful, false otherwise
     */
    public boolean split(InputStream is, File directory) {
        return split(is, directory, PREFIX, POSTFIX);
    }

    /**
     * Splits the PEF-document provided as an input stream into several single volume PEF-files using
     * the supplied file name pre- and postfix.
     *
     * @param is        the input stream to the PEF-document
     * @param directory the output directory
     * @param prefix    the prefix to use
     * @param postfix   the postfix to use
     * @return returns true if split was successful, false otherwise
     */
    public boolean split(InputStream is, File directory, String prefix, String postfix) {
        directory.mkdirs();
        XMLInputFactory inFactory = XMLInputFactory.newInstance();
        inFactory.setProperty(XMLInputFactory.IS_COALESCING, Boolean.TRUE);
        inFactory.setProperty(XMLInputFactory.IS_NAMESPACE_AWARE, Boolean.TRUE);
        inFactory.setProperty(XMLInputFactory.SUPPORT_DTD, Boolean.TRUE);
        inFactory.setProperty(XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES, Boolean.TRUE);

        sendMessage("Splitting");
        try {
            XMLOutputFactory outputFactory = XMLOutputFactory.newInstance();
            XMLEventReader reader = inFactory.createXMLEventReader(is);
            XMLEventFactory eventFactory = XMLEventFactory.newInstance();
            List header = new ArrayList<>();
            Stack files = new Stack<>();
            Stack writers = new Stack<>();
            Stack os = new Stack<>();
            QName volume = new QName("http://www.daisy.org/ns/2008/pef", "volume");
            QName body = new QName("http://www.daisy.org/ns/2008/pef", "body");
            int i = 0;
            State state = State.HEADER;
            while (reader.hasNext()) {
                XMLEvent event = reader.nextEvent();
                if (event.getEventType() == XMLStreamConstants.START_ELEMENT
                        && volume.equals(event.asStartElement().getName())) {
                    state = State.BODY;
                    i++;
                    files.push(new File(directory, prefix + i + postfix));
                    os.push(new FileOutputStream(files.peek()));
                    writers.push(outputFactory.createXMLEventWriter(os.peek(), "UTF-8"));
                    // output header information
                    boolean ident = false;
                    QName dcIdentifier = new QName("http://purl.org/dc/elements/1.1/", "identifier");
                    for (XMLEvent e : header) {
                        if (e.getEventType() == XMLStreamConstants.START_ELEMENT &&
                                dcIdentifier.equals(e.asStartElement().getName())) {
                            ident = true;
                            writers.peek().add(e);
                        } else if (ident == true && e.getEventType() == XMLStreamConstants.CHARACTERS) {
                            ident = false;
                            XMLEvent e2 = eventFactory.createCharacters(e.asCharacters().getData() + "-" + i);
                            writers.peek().add(e2);
                        } else {
                            writers.peek().add(e);
                        }
                    }
                } else if (event.getEventType() == XMLStreamConstants.END_ELEMENT &&
                        body.equals(event.asEndElement().getName())) {
                    state = State.FOOTER;
                }
                switch (state) {
                    case HEADER:
                        //push header event
                        header.add(event);
                        break;
                    case BODY:
                        writers.peek().add(event);
                        break;
                    case FOOTER:
                        // write footer to all files
                        for (XMLEventWriter w : writers) {
                            w.add(event);
                        }
                        break;
                }
            }
            for (XMLEventWriter w : writers) {
                w.close();
            }
            for (FileOutputStream s : os) {
                s.close();
            }
            is.close();
            sendMessage("Checking result for errors");
            for (File f : files) {
                sendMessage("Examining " + f.getName(), Level.FINE);
                if (!validator.test(f.toURI().toURL())) {
                    sendMessage("Validation of result file failed: " + f.getName(), Level.SEVERE);
                    return false;
                }
                sendMessage(f.getName() + " ok!", Level.FINE);
            }
            sendMessage("Done!");
            return true;
        } catch (FileNotFoundException e) {
            logger.throwing("PEFFileSplitter", e.getMessage(), e);
            return false;
        } catch (IOException e) {
            logger.throwing("PEFFileSplitter", e.getMessage(), e);
            return false;
        } catch (XMLStreamException e) {
            logger.throwing("PEFFileSplitter", e.getMessage(), e);
            return false;
        }
    }

    @Override
    public void error(SAXParseException exception) throws SAXException {
        throw new SAXException(exception);
    }

    @Override
    public void fatalError(SAXParseException exception) throws SAXException {
        throw new SAXException(exception);
    }

    @Override
    public void warning(SAXParseException exception) throws SAXException {
        sendMessage(exception.toString());
    }

    private void sendMessage(String msg) {
        sendMessage(msg, Level.INFO);
    }

    private void sendMessage(String msg, Level level) {
        logger.log(level, msg);
    }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy