org.docx4j.model.datastorage.OpenDoPEHandler Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of docx4j Show documentation
docx4j is a library which helps you to work with the Office Open XML file format as used in docx documents, pptx presentations, and xlsx spreadsheets.
There is a newer version: 6.1.2
Show newest version
/**
 *  Copyright 2010-2013, Plutext Pty Ltd.
 *   
 *  This file is part of docx4j.

    docx4j is licensed under the Apache License, Version 2.0 (the "License"); 
    you may not use this file except in compliance with the License. 

    You may obtain a copy of the License at 

        http://www.apache.org/licenses/LICENSE-2.0 

    Unless required by applicable law or agreed to in writing, software 
    distributed under the License is distributed on an "AS IS" BASIS, 
    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
    See the License for the specific language governing permissions and 
    limitations under the License.

 **/
package org.docx4j.model.datastorage;

import static org.docx4j.model.datastorage.XPathEnhancerParser.enhanceXPath;

import java.lang.reflect.Method;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import javax.xml.bind.JAXBElement;

import org.apache.commons.lang.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.docx4j.TraversalUtil;
import org.docx4j.XmlUtils;
import org.docx4j.finders.TcFinder;
import org.docx4j.jaxb.Context;
import org.docx4j.model.sdt.QueryString;
import org.docx4j.openpackaging.contenttype.ContentType;
import org.docx4j.openpackaging.exceptions.Docx4JException;
import org.docx4j.openpackaging.exceptions.InvalidFormatException;
import org.docx4j.openpackaging.packages.WordprocessingMLPackage;
import org.docx4j.openpackaging.parts.CustomXmlPart;
import org.docx4j.openpackaging.parts.PartName;
import org.docx4j.openpackaging.parts.WordprocessingML.AlternativeFormatInputPart;
import org.docx4j.openpackaging.parts.WordprocessingML.FooterPart;
import org.docx4j.openpackaging.parts.WordprocessingML.HeaderPart;
import org.docx4j.openpackaging.parts.opendope.ComponentsPart;
import org.docx4j.openpackaging.parts.opendope.ConditionsPart;
import org.docx4j.openpackaging.parts.opendope.XPathsPart;
import org.docx4j.openpackaging.parts.relationships.Namespaces;
import org.docx4j.openpackaging.parts.relationships.RelationshipsPart;
import org.docx4j.relationships.Relationship;
import org.docx4j.wml.CTAltChunk;
import org.docx4j.wml.CTDataBinding;
import org.docx4j.wml.CTLock;
import org.docx4j.wml.CTSdtCell;
import org.docx4j.wml.CTSdtContentCell;
import org.docx4j.wml.ContentAccessor;
import org.docx4j.wml.P;
import org.docx4j.wml.PPr;
import org.docx4j.wml.SdtBlock;
import org.docx4j.wml.SdtElement;
import org.docx4j.wml.SdtPr;
import org.docx4j.wml.SectPr;
import org.docx4j.wml.Tag;
import org.docx4j.wml.Tc;
import org.docx4j.wml.TcPr;
import org.jvnet.jaxb2_commons.ppp.Child;
import org.opendope.conditions.Condition;
import org.w3c.dom.Node;

public class OpenDoPEHandler {

	private static Logger log = LoggerFactory.getLogger(OpenDoPEHandler.class);

	public OpenDoPEHandler(WordprocessingMLPackage wordMLPackage)
			throws Docx4JException {

		this.wordMLPackage = wordMLPackage;

		if (wordMLPackage.getMainDocumentPart().getXPathsPart() == null) {
			throw new Docx4JException("OpenDoPE XPaths part missing");
		} else {
			xPaths = wordMLPackage.getMainDocumentPart().getXPathsPart()
					.getJaxbElement();
			log.debug(XmlUtils.marshaltoString(xPaths, true, true));
		}
		if (wordMLPackage.getMainDocumentPart().getConditionsPart() != null) {
			conditions = wordMLPackage.getMainDocumentPart()
					.getConditionsPart().getJaxbElement();
			log.debug(XmlUtils.marshaltoString(conditions, true, true));
		}
		if (wordMLPackage.getMainDocumentPart().getComponentsPart() != null) {
			components = wordMLPackage.getMainDocumentPart()
					.getComponentsPart().getJaxbElement();
			log.debug(XmlUtils.marshaltoString(components, true, true));
		}

		shallowTraversor = new ShallowTraversor();
		shallowTraversor.wordMLPackage = wordMLPackage;
	}

	private WordprocessingMLPackage wordMLPackage;
	private ShallowTraversor shallowTraversor;

	public final static String BINDING_ROLE_XPATH = "od:xpath";

	public final static String BINDING_ROLE_CONDITIONAL = "od:condition";
	public final static String BINDING_RESULT_CONDITION_FALSE = "od:resultConditionFalse";

	public final static String BINDING_ROLE_REPEAT = "od:repeat";
	public final static String BINDING_RESULT_RPTD_ZERO = "od:resultRepeatZero";
	public final static String BINDING_RESULT_RPTD = "od:rptd";
	
	// Repeat position condition (eg second last entry)
	public final static String BINDING_ROLE_RPT_POS_CON = "od:RptPosCon";  // see bind.xslt

	public final static String BINDING_ROLE_NARRATIVE = "od:narrative";

	public final static String BINDING_ROLE_COMPONENT = "od:component";
	public final static String BINDING_ROLE_COMPONENT_BEFORE = "od:continuousBefore";
	public final static String BINDING_ROLE_COMPONENT_AFTER = "od:continuousAfter";

	public final static String BINDING_CONTENTTYPE = "od:ContentType";
	public final static String BINDING_HANDLER = "od:Handler";
	public final static String BINDING_PROGID = "od:progid"; // eg =Word.Document
	/*
	 * --------------------------------------------------------------------------
	 * - Pre-processing of content controls which have a tag containing
	 * "bindingrole"
	 */

	private org.opendope.conditions.Conditions conditions;
	private org.opendope.xpaths.Xpaths xPaths;
	private org.opendope.components.Components components;

// TODO consider whether to reinstate.  User would need to choose between 	
//  conditional SDT removal, and reverting functionality.
	
//	private boolean removeSdtCellsOnFailedCondition;
//
//	/**
//	 * Configure, how the preprocessor handles conditions on table cells.
//	 *
//	 * If set to false, conditional SDT cells are replaced by empty
//	 * cells. This is the default behavior.
//	 *
//	 * If set to true, conditional SDT cells are removed entirely.
//	 * Note that the table geometry is not changed; hence this works better
//	 * without dynamic table widths / no global width settings.
//	 *
//	 * This affects all future calls on the {@link #preprocess} method for this
//     * instance.
//	 *
//	 * @param removeSdtCellsOnFailedCondition
//	 *            The new value for the cell removal flag.
//	 */
//	public void setRemoveSdtCellsOnFailedCondition(
//			boolean removeSdtCellsOnFailedCondition) {
//		this.removeSdtCellsOnFailedCondition = removeSdtCellsOnFailedCondition;
//	}

	/**
	 * Preprocess content controls which have tag
	 * "od:condition|od:repeat|od:component".
	 *
	 * It is "preprocess" in the sense that it is "pre" opening in Word
	 *
	 * The algorithm is as follows:
	 *
	 * Inject components first.
	 *
	 * Look at each top level SDT (ShallowTraversor). If it does not have a real
	 * data binding, it might have a bindingrole tag we need to process
	 * (processBindingRoleIfAny).
	 *
	 * Conditionals are easy.
	 *
	 * processRepeat method:
	 *
	 * - clones the sdt n times
	 *
	 * - invokes DeepTraversor which changes xpath binding on descendant sdts
	 * (both sdts with real bindings and sdts with bindingrole tags).
	 *
	 * It is not the job of DeepTraversor to expand out any other repeats it
	 * might encounter, or to resolve conditionals.
	 *
	 * Those things are done by ShallowTraversor, to which control returns, as
	 * it continues its traverse.
	 *
	 * The implementation of 13 Sept 2010 replaced the previous XPath based
	 * implementation, which did not support nested repeats. I've chosen to
	 * build this around TraversalUtil, instead of using XSLT, and this seems to
	 * have worked out nicely.
	 *
	 * The implementation of 10 October 2010 replaced the v1 conventions
	 * implementation with a v2 implementation. The main method in this class
	 * can convert v1 documents to v2. The v2 implementation is not yet
	 * complete. All v1 features are implemented, but not the new v2 stuff (eg
	 * complex conditions).
	 *
	 * @param documentPart
	 * @throws Exception
	 */
	public WordprocessingMLPackage preprocess() throws Docx4JException {

		do {
			// A component can apply in both the main document part,
			// and in headers/footers. See further
			// http://forums.opendope.org/Support-components-in-headers-footers-tp2964174p2964174.html
			// A component added to the
			// main document part could add new headers/footers.
			// So we need to work out what parts to preprocess
			// here inside this do loop.
			Set partList = getParts(wordMLPackage);

			// Process repeats and conditionals.
			try {
				for (ContentAccessor part : partList) {
					new TraversalUtil(part, shallowTraversor);
				}
			} catch (InputIntegrityException iie) { // RuntimeException
				throw new Docx4JException(iie.getMessage(), iie);
			}

			// Convert any sdt with 
			// to altChunk, and for MergeDocx users, to
			// real WordML.
			for (ContentAccessor part : partList) {
				wordMLPackage = fetchComponents(wordMLPackage, part);
			}

		} while (justGotAComponent);
		// ie repeat the whole process if you got a component

		return wordMLPackage;
	}

	private boolean justGotAComponent = false;

	private static DocxFetcher docxFetcher;

	public static DocxFetcher getDocxFetcher() {
		return docxFetcher;
	}

	public static void setDocxFetcher(DocxFetcher docxFetcher) {
		OpenDoPEHandler.docxFetcher = docxFetcher;
	}

	protected static Set getParts(WordprocessingMLPackage srcPackage) {

		Set partList = new HashSet();

		partList.add(srcPackage.getMainDocumentPart());

		// Add headers/footers
		RelationshipsPart rp = srcPackage.getMainDocumentPart()
				.getRelationshipsPart();
		for (Relationship r : rp.getRelationships().getRelationship()) {

			if (r.getType().equals(Namespaces.HEADER)) {
				partList.add((HeaderPart) rp.getPart(r));
			} else if (r.getType().equals(Namespaces.FOOTER)) {
				partList.add((FooterPart) rp.getPart(r));
			}
		}

		return partList;
	}

	private WordprocessingMLPackage fetchComponents(
			WordprocessingMLPackage srcPackage, ContentAccessor contentAccessor)
			throws Docx4JException {

		// convert components to altChunk
		Map replacements = new HashMap();
		Integer index = 0;
		justGotAComponent = false;

		LinkedList continuousBeforeIndex = new LinkedList();
		List continuousBefore = new ArrayList();

		List continuousAfter = new ArrayList();

		for (Object block : contentAccessor.getContent()) {

			// Object ublock = XmlUtils.unwrap(block);
			if (block instanceof org.docx4j.wml.SdtBlock) {

				org.docx4j.wml.SdtBlock sdt = (org.docx4j.wml.SdtBlock) block;

				Tag tag = getSdtPr(sdt).getTag();

				if (tag == null) {
					List