org.docx4j.model.datastorage.OpenDoPEHandlerComponents Maven / Gradle / Ivy

Show more of this group Show more artifacts with this name
Show all versions of docx4j-core Show documentation
docx4j is a library which helps you to work with the Office Open XML file format as used in docx documents, pptx presentations, and xlsx spreadsheets.
There is a newer version: 11.4.11
Show newest version
package org.docx4j.model.datastorage;

import java.lang.reflect.Method;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;

import javax.xml.namespace.QName;

import org.docx4j.TraversalUtil;
import org.docx4j.TraversalUtil.CallbackImpl;
import org.docx4j.XmlUtils;
import org.docx4j.jaxb.Context;
import org.docx4j.model.sdt.QueryString;
import org.docx4j.openpackaging.contenttype.ContentType;
import org.docx4j.openpackaging.exceptions.Docx4JException;
import org.docx4j.openpackaging.exceptions.InvalidFormatException;
import org.docx4j.openpackaging.packages.WordprocessingMLPackage;
import org.docx4j.openpackaging.parts.CustomXmlDataStoragePart;
import org.docx4j.openpackaging.parts.CustomXmlPart;
import org.docx4j.openpackaging.parts.PartName;
import org.docx4j.openpackaging.parts.WordprocessingML.AlternativeFormatInputPart;
import org.docx4j.openpackaging.parts.WordprocessingML.MainDocumentPart;
import org.docx4j.openpackaging.parts.opendope.ComponentsPart;
import org.docx4j.openpackaging.parts.relationships.RelationshipsPart;
import org.docx4j.relationships.Relationship;
import org.docx4j.wml.Body;
import org.docx4j.wml.CTAltChunk;
import org.docx4j.wml.ContentAccessor;
import org.docx4j.wml.Id;
import org.docx4j.wml.SdtElement;
import org.docx4j.wml.SdtPr;
import org.docx4j.wml.Tag;
import org.opendope.conditions.Condition;
import org.opendope.xpaths.Xpaths;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Element;

/**
 * Process OpenDoPE components.
 * 
 * From 6.1, components can be at any level in the
 * Main Document Part 
 * content hierarchy where an SdtBlock is allowed.
 * 
 * From 6.1, components support the idea of an XPath 
 * context: where an XPath is not absolute, it will be
 * interpreted relative to context. Context can be
 * provided explicitly (the id of an XPath in the XPaths 
 * part).  If not provided explicitly, we can read it
 * from an enclosing repeat.  (We don't do that for 
 * conditions right now, since they are a bit more tricky).
 * 
 * In 6.1, the section break handling is missing.
 * TODO: consider whether it is needed.  May be useful
 * for top level components which add headers/footers?
 * 
 * @author jharrop
 */
public class OpenDoPEHandlerComponents {

	private static Logger log = LoggerFactory.getLogger(OpenDoPEHandlerComponents.class);
	
	// Docx4j 6.1: re-designed component processing model:
	// 1. components don't have to be at the top paragraph level of the content tree,
	// 2. they can use an XPath context
	// BUT:
	// 3. component processing is now done before condition/repeat processing
	// 4. component processing is not recursive anymore
	// 5. components typically use the "main" answer file
	
	// We only support components in the MainDocumentPart,
	// since MergeDocx can concatenate content in that part,
	// but not in other parts.
	
	// TODO: make this step optional
	
	private WordprocessingMLPackage srcPackage;

	protected boolean justGotAComponent = false;
	
	private org.opendope.components.Components components;
		
	private Map xpathsMap = null;
	private Map conditionsMap = null; 

	private static DocxFetcher docxFetcher;

	public static DocxFetcher getDocxFetcher() {
		return docxFetcher;
	}

	public static void setDocxFetcher(DocxFetcher docxFetcher) {
		OpenDoPEHandlerComponents.docxFetcher = docxFetcher;
	}
	
	public OpenDoPEHandlerComponents(WordprocessingMLPackage wordMLPackage)
			throws Docx4JException {

		this.srcPackage = wordMLPackage;

		if (wordMLPackage.getMainDocumentPart().getXPathsPart() == null) {
			log.info("OpenDoPE XPaths part missing (ok if you are just processing w15 repeatingSection)");
			return;
			
		} else {
			org.opendope.xpaths.Xpaths xPaths = wordMLPackage.getMainDocumentPart().getXPathsPart().getJaxbElement();
            if(log.isDebugEnabled()) {
                log.debug(XmlUtils.marshaltoString(xPaths, true, true));
            }
			
			xpathsMap = new HashMap(2*xPaths.getXpath().size());
			
			for (Xpaths.Xpath xp : xPaths.getXpath() ) {
				
				if (xpathsMap.put(xp.getId(), xp)!=null) {
					log.error("Duplicates in XPaths part: " + xp.getId());
				}
				// TODO key should include storeItemID?
			}
			
		}

		if (wordMLPackage.getMainDocumentPart().getConditionsPart() != null) {
			org.opendope.conditions.Conditions conditions = wordMLPackage.getMainDocumentPart()
					.getConditionsPart().getJaxbElement();
            if(log.isDebugEnabled()) {
                log.debug(XmlUtils.marshaltoString(conditions, true, true));
            }
			
			conditionsMap = new HashMap(2*conditions.getCondition().size());
			
			for (Condition c : conditions.getCondition()) {
				if (conditionsMap.put(c.getId(), c)!=null) {
					log.error("Duplicates in Conditions part: " + c.getId());
				}
			}
		}
		
		if (wordMLPackage.getMainDocumentPart().getComponentsPart() != null) {
			components = wordMLPackage.getMainDocumentPart()
					.getComponentsPart().getJaxbElement();
            if(log.isDebugEnabled()) {
                log.debug(XmlUtils.marshaltoString(components, true, true));
            }
		}
	}	
	
	

	
	/**
	 * altChunkRel.getId(), xpathId 
	 */
	Map altChunkXPathContexts = new HashMap();
		// We'd need PartName to map to that, if we were to support parts other than MDP!
	
	/**
	 * Component processing 
	 * 
	 * @param srcPackage
	 * @param contentAccessor
	 * @return
	 * @throws Docx4JException
	 */
	public WordprocessingMLPackage fetchComponents()
			throws Docx4JException {
		
		if (xpathsMap==null) return srcPackage;
		
//		System.out.println("before component processing");
//		System.out.println(wordMLPackage.getMainDocumentPart().getXML());

		justGotAComponent = false;
		
		// Convert any sdt with 
		// to altChunk, and for MergeDocx users, to
		// real WordML.
		ContentAccessor part = srcPackage.getMainDocumentPart();

//			LinkedList continuousBeforeIndex = new LinkedList();
//			List continuousBefore = new ArrayList();
//
//			List continuousAfter = new ArrayList();

		FindComponentsTraversor t = new FindComponentsTraversor();
		t.wordMLPackage = srcPackage;
		if (part instanceof MainDocumentPart /* which it will be in this release */) {
			// avoid invoking walkJAXBElements on a list,
			// (docx4j 6.1.0 doesn't know how to replace children there,
			//  which it needs to do if the component is a child of w:body)
			Body b = ((MainDocumentPart)part).getJaxbElement().getBody();
			t.walkJAXBElements(b);
		} else {
			t.walkJAXBElements(part.getContent());			
		}

		if (!justGotAComponent) {
			return srcPackage;
		}
		
		Map answerDomDocs = new HashMap();
		CustomXmlPart data = CustomXmlDataStoragePartSelector.getCustomXmlDataStoragePart(srcPackage);
		if (data instanceof CustomXmlDataStoragePart) {
			Document doc = ((CustomXmlDataStoragePart)data).getData().getDocument();
			answerDomDocs.put(getQName(doc.getDocumentElement()), data);
		} else {
			throw new Docx4JException("TODO: handle " + data.getClass().getName());
		}

		// process altChunk
		try {
			// Use reflection, so docx4j can be built
			// by users who don't have the Enterprise MergeDocx utility
			Class documentBuilder = Class
					.forName("com.plutext.merge.altchunk.ProcessAltChunk");
			Method[] methods = documentBuilder.getMethods();
			Method processMethod = null;
			for (int j = 0; j < methods.length; j++) {
//				log.debug(methods[j].getName());
				if (methods[j].getName().equals("process")
						&& methods[j].getParameterCount()==5) {
					processMethod = methods[j];
				}
			}
			if (processMethod == null )
				throw new NoSuchMethodException();
			
			return (WordprocessingMLPackage) processMethod.invoke(null,
					srcPackage,
					answerDomDocs,
					xpathsMap,
					conditionsMap,
					altChunkXPathContexts);
			
			
		} catch (ClassNotFoundException e) {
			extensionMissing(e);
			return srcPackage;
			// throw new Docx4JException("Problem processing w:altChunk", e);
		} catch (NoSuchMethodException e) {
			// Degrade gracefully
			extensionMissing(e);
			return srcPackage;
			// throw new Docx4JException("Problem processing w:altChunk", e);
		} catch (Exception e) {
			throw new Docx4JException("Problem processing w:altChunk", e);
		}
	}
	
  private QName getQName(Element el) {
	    QName qname = new QName(el.getNamespaceURI(), el.getLocalName());
	    System.out.println(qname);
	    return qname;
	}	

//	public void makeContinuous(SectPr sectPr) {
//
//		if (sectPr == null) {
//			log.warn("sectPr was null");
//			return;
//		}
//
//		SectPr.Type type = Context.getWmlObjectFactory().createSectPrType();
//		type.setVal("continuous");
//		sectPr.setType(type);
//
//		// columns, endnotes, footnotes, formprot, line numbers are OK
//
//		// null out certain page level section properties
//		sectPr.setBidi(null);
//		sectPr.setDocGrid(null);
//		sectPr.setPaperSrc(null);
//		sectPr.setPgBorders(null);
//		sectPr.setPgMar(null);
//		sectPr.setPgNumType(null);
//		sectPr.setPgSz(null);
//		sectPr.setPrinterSettings(null);
//		sectPr.setSectPrChange(null);
//		sectPr.setTitlePg(null);
//		sectPr.setVAlign(null);
//	}

	public void extensionMissing(Exception e) {
		log.error("\n" + e.getClass().getName() + ": " + e.getMessage() + "\n");
		log.error("* You don't appear to have the MergeDocx paid extension,");
		log.error("* which is necessary to merge docx, or process altChunk.");
		log.error("* Purchases of this extension support the docx4j project.");
		log.error("* Please visit www.plutext.com if you want to buy it.");
	}


	private PartName getNewPartName(String prefix, String suffix,
			RelationshipsPart rp) throws InvalidFormatException {

		PartName proposed = null;
		int i = 1;
		do {

			if (i > 1) {
				proposed = new PartName(prefix + i + suffix);
			} else {
				proposed = new PartName(prefix + suffix);
			}
			i++;

		} while (rp.getRel(proposed) != null);

		return proposed;

	}

	/**
	 * This traversor finds components, taking note of their XPath context.
	 * For now, that's their immediate repeat ancestor.
	 */
	private class FindComponentsTraversor extends CallbackImpl {

		WordprocessingMLPackage wordMLPackage;
				
	    private LinkedList repeatContext = new LinkedList();
		

		@Override
		public List