All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.greenpepper.runner.dialect.DocDialect Maven / Gradle / Ivy

The newest version!
package com.greenpepper.runner.dialect;

import com.greenpepper.dialect.SpecificationDialect;
import com.greenpepper.dialect.SpecificationDialectException;
import org.apache.poi.hwpf.HWPFDocumentCore;
import org.apache.poi.hwpf.converter.WordToHtmlConverter;
import org.apache.poi.hwpf.converter.WordToHtmlUtils;
import com.greenpepper.shaded.org.slf4j.Logger;
import com.greenpepper.shaded.org.slf4j.LoggerFactory;
import org.w3c.dom.Document;

import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;

public class DocDialect extends AbstractOpenDocumentDialect {

    private static final Logger LOGGER = LoggerFactory.getLogger(DocDialect.class);

    @Override
    public String convert(InputStream input) throws SpecificationDialectException {
        LOGGER.debug("Converting content to XHTML");
        try {

            HWPFDocumentCore wordDocument = WordToHtmlUtils.loadDoc(input);

            WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(
                    DocumentBuilderFactory.newInstance().newDocumentBuilder()
                            .newDocument());
            wordToHtmlConverter.processDocument(wordDocument);
            Document htmlDocument = wordToHtmlConverter.getDocument();
            ByteArrayOutputStream out = new ByteArrayOutputStream();
            DOMSource domSource = new DOMSource(htmlDocument);
            StreamResult streamResult = new StreamResult(out);

            TransformerFactory tf = TransformerFactory.newInstance();
            Transformer serializer = tf.newTransformer();
            serializer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
            serializer.setOutputProperty(OutputKeys.INDENT, "yes");
            serializer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "2");
            serializer.setOutputProperty(OutputKeys.METHOD, "html");
            serializer.transform(domSource, streamResult);
            out.close();

            String result = cleanUpTheHtml(new String(out.toByteArray()));
            LOGGER.debug("result of the conversion:\n{}", result);
            return result;
        } catch (TransformerException e) {
            throw new SpecificationDialectException("unable to convert to XHTML", e);
        } catch (IOException e) {
            throw new SpecificationDialectException("unable to convert to XHTML", e);
        } catch (ParserConfigurationException e) {
            throw new SpecificationDialectException("unable to convert to XHTML", e);
        }
    }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy