org.docx4j.samples.DocxToXhtmlAndBack Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of docx4j-ImportXHTML Show documentation
docx4j-ImportXHTML converts XHTML to OpenXML WordML (docx) using docx4j
There is a newer version: 11.4.8
/*
 *  Copyright 2007-2008, Plutext Pty Ltd.
 *
 *  This file is part of docx4j.

    docx4j is licensed under the Apache License, Version 2.0 (the "License");
    you may not use this file except in compliance with the License.

    You may obtain a copy of the License at

        http://www.apache.org/licenses/LICENSE-2.0

    Unless required by applicable law or agreed to in writing, software
    distributed under the License is distributed on an "AS IS" BASIS,
    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    See the License for the specific language governing permissions and
    limitations under the License.

 */

package org.docx4j.samples;

import java.io.File;
import java.io.OutputStream;

import org.apache.commons.io.FileUtils;
import org.docx4j.Docx4J;
import org.docx4j.Docx4jProperties;
import org.docx4j.convert.in.xhtml.XHTMLImporterImpl;
import org.docx4j.convert.out.ConversionFeatures;
import org.docx4j.convert.out.html.AbstractHtmlExporter;
import org.docx4j.convert.out.html.AbstractHtmlExporter.HtmlSettings;
import org.docx4j.convert.out.html.HTMLExporterXslt;
import org.docx4j.convert.out.html.HtmlExporterNG2;
import org.docx4j.convert.out.html.SdtToListSdtTagHandler;
import org.docx4j.convert.out.html.SdtWriter;
import org.docx4j.openpackaging.packages.WordprocessingMLPackage;
import org.docx4j.openpackaging.parts.WordprocessingML.NumberingDefinitionsPart;

/**
 * docx to xhtml to docx again.
 * 
 * Useful for testing support for specific features.
 *
 */
public class DocxToXhtmlAndBack {

	static String dir;
	
	protected static String inputfilepath;	
	protected static String outputfilepath;
	
	// Config for non-command line version
	static {
	
		dir = System.getProperty("user.dir") + "/sample-docs/docx/";
//		dir = System.getProperty("user.dir") + "/";
    	inputfilepath = "sample-docxv2.docx";
//    	inputfilepath = System.getProperty("user.dir") + "/sample-docs/docx/tables.docx";
//    	inputfilepath = System.getProperty("user.dir") + "/images.docx";
		
	}

    public static void main(String[] args)
            throws Exception {
    	
    	// Images: provide correct baseURL
    	String baseURL = "file:///bvols/@git/repos/docx4j-ImportXHTML/sample-docs/docx/sample-docxv2.docx_files";    	
//    	Docx4jProperties.setProperty("docx4j.Convert.Out.HTML.OutputMethodXML", true);
    	
    	
		try {
			getInputFilePath(args);
		} catch (IllegalArgumentException e) {
		}

		System.out.println(inputfilepath);
		WordprocessingMLPackage wordMLPackage = WordprocessingMLPackage.load(new java.io.File(dir+inputfilepath));

		// XHTML export
		AbstractHtmlExporter exporter = new HtmlExporterNG2();
    	HtmlSettings htmlSettings = new HtmlSettings();
    	
    	htmlSettings.setWmlPackage(wordMLPackage);
    	
    	htmlSettings.setImageDirPath(dir + inputfilepath + "_files");
    	htmlSettings.setImageTargetUri(dir + inputfilepath + "_files");
    	
    	// list numbering:  depending on whether you want list numbering hardcoded, or done using .
		boolean nestLists = true;
    	if (nestLists) {
    		SdtWriter.registerTagHandler("HTML_ELEMENT", new SdtToListSdtTagHandler());
    	} else {
    		htmlSettings.getFeatures().remove(ConversionFeatures.PP_HTML_COLLECT_LISTS);
    	} // must do one or the other
    	
    	
    	String htmlFilePath = dir + "/DocxToXhtmlAndBack.html";
		OutputStream os = new java.io.FileOutputStream(htmlFilePath);

//		javax.xml.transform.stream.StreamResult result = new javax.xml.transform.stream.StreamResult(os);
//		exporter.html(wordMLPackage, result, htmlSettings);
//		os.flush();
//		os.close();

		
		Docx4J.toHTML(htmlSettings, os, Docx4J.FLAG_NONE);
		
		
		// XHTML to docx
        String stringFromFile = FileUtils.readFileToString(new File(htmlFilePath), "UTF-8");
		
        
		WordprocessingMLPackage docxOut = WordprocessingMLPackage.createPackage();
		NumberingDefinitionsPart ndp = new NumberingDefinitionsPart();
		docxOut.getMainDocumentPart().addTargetPart(ndp);
		ndp.unmarshalDefaultNumbering();	
		
        XHTMLImporterImpl XHTMLImporter = new XHTMLImporterImpl(docxOut);
        XHTMLImporter.setHyperlinkStyle("Hyperlink");
					
		docxOut.getMainDocumentPart().getContent().addAll( 
				XHTMLImporter.convert(stringFromFile, baseURL) );
				
		docxOut.save(new java.io.File(dir + "/DocxToXhtmlAndBack.docx") );

    }
    
	protected static void getInputFilePath(String[] args) throws IllegalArgumentException {

		if (args.length==0) throw new IllegalArgumentException("Input file arg missing");

		inputfilepath = args[0];
	}
	
	protected static void getOutputFilePath(String[] args) throws IllegalArgumentException {

		if (args.length<2) throw new IllegalArgumentException("Output file arg missing");

		outputfilepath = args[1];
	}	
    
}