All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.docx4j.template.xhtml.utils.XHTMLImporterUtils Maven / Gradle / Ivy

The newest version!
/*
 * Copyright (c) 2018, hiwepy (https://github.com/hiwepy).
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */
package org.docx4j.template.xhtml.utils;

import java.io.IOException;
import java.nio.charset.Charset;
import java.util.List;

import org.docx4j.Docx4jProperties;
import org.docx4j.convert.in.xhtml.XHTMLImporterImpl;
import org.docx4j.openpackaging.exceptions.Docx4JException;
import org.docx4j.openpackaging.packages.WordprocessingMLPackage;
import org.docx4j.openpackaging.parts.WordprocessingML.AltChunkType;
import org.docx4j.openpackaging.parts.WordprocessingML.MainDocumentPart;
import org.docx4j.template.Docx4jConstants;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Entities;

public class XHTMLImporterUtils {

	public static WordprocessingMLPackage handle(WordprocessingMLPackage wmlPackage, Document doc,boolean fragment,boolean altChunk) throws IOException, Docx4JException {
		//设置转换模式
		doc.outputSettings().syntax(Document.OutputSettings.Syntax.xml).escapeMode(Entities.EscapeMode.xhtml);  //转为 xhtml 格式
		
		if(altChunk){
			//Document对象
			MainDocumentPart document = wmlPackage.getMainDocumentPart();
			//获取Jsoup参数
			String charsetName = Docx4jProperties.getProperty(Docx4jConstants.DOCX4J_JSOUP_PARSE_CHARSETNAME, Docx4jConstants.DEFAULT_CHARSETNAME );
			//设置转换模式
			doc.outputSettings().syntax(Document.OutputSettings.Syntax.xml).escapeMode(Entities.EscapeMode.xhtml);  //转为 xhtml 格式
			//创建html导入对象
			//XHTMLImporterImpl xhtmlImporter = new XHTMLImporterImpl(wordMLPackage);
			document.addAltChunk(AltChunkType.Xhtml, (fragment ? doc.body().html() : doc.html()) .getBytes(Charset.forName(charsetName)));
			//document.addAltChunk(type, bytes, attachmentPoint)
			//document.addAltChunk(type, is)
			//document.addAltChunk(type, is, attachmentPoint)
			WordprocessingMLPackage tempPackage = document.convertAltChunks();
			
			//返回处理后的WordprocessingMLPackage对象
			return tempPackage;
		}
		
		//创建html导入对象
		XHTMLImporterImpl xhtmlImporter = new XHTMLImporterImpl(wmlPackage);
		//将xhtml转换为wmlPackage可用的对象
		List list = xhtmlImporter.convert((fragment ? doc.body().html() : doc.html()), doc.baseUri());
		//导入转换后的内容对象
		wmlPackage.getMainDocumentPart().getContent().addAll(list);
		//返回原WordprocessingMLPackage对象
		return wmlPackage;
	}
	
}