org.docx4j.model.datastorage.OpenDoPEHandlerComponents Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of docx4j-core Show documentation
Show all versions of docx4j-core Show documentation
docx4j is a library which helps you to work with the Office Open
XML file format as used in docx
documents, pptx presentations, and xlsx spreadsheets.
package org.docx4j.model.datastorage;
import java.lang.reflect.Method;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import javax.xml.namespace.QName;
import org.docx4j.TraversalUtil;
import org.docx4j.TraversalUtil.CallbackImpl;
import org.docx4j.XmlUtils;
import org.docx4j.jaxb.Context;
import org.docx4j.model.sdt.QueryString;
import org.docx4j.openpackaging.contenttype.ContentType;
import org.docx4j.openpackaging.exceptions.Docx4JException;
import org.docx4j.openpackaging.exceptions.InvalidFormatException;
import org.docx4j.openpackaging.packages.WordprocessingMLPackage;
import org.docx4j.openpackaging.parts.CustomXmlDataStoragePart;
import org.docx4j.openpackaging.parts.CustomXmlPart;
import org.docx4j.openpackaging.parts.PartName;
import org.docx4j.openpackaging.parts.WordprocessingML.AlternativeFormatInputPart;
import org.docx4j.openpackaging.parts.WordprocessingML.MainDocumentPart;
import org.docx4j.openpackaging.parts.opendope.ComponentsPart;
import org.docx4j.openpackaging.parts.relationships.RelationshipsPart;
import org.docx4j.relationships.Relationship;
import org.docx4j.wml.Body;
import org.docx4j.wml.CTAltChunk;
import org.docx4j.wml.ContentAccessor;
import org.docx4j.wml.Id;
import org.docx4j.wml.SdtElement;
import org.docx4j.wml.SdtPr;
import org.docx4j.wml.Tag;
import org.opendope.conditions.Condition;
import org.opendope.xpaths.Xpaths;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
/**
* Process OpenDoPE components.
*
* From 6.1, components can be at any level in the
* Main Document Part
* content hierarchy where an SdtBlock is allowed.
*
* From 6.1, components support the idea of an XPath
* context: where an XPath is not absolute, it will be
* interpreted relative to context. Context can be
* provided explicitly (the id of an XPath in the XPaths
* part). If not provided explicitly, we can read it
* from an enclosing repeat. (We don't do that for
* conditions right now, since they are a bit more tricky).
*
* In 6.1, the section break handling is missing.
* TODO: consider whether it is needed. May be useful
* for top level components which add headers/footers?
*
* @author jharrop
*/
public class OpenDoPEHandlerComponents {
private static Logger log = LoggerFactory.getLogger(OpenDoPEHandlerComponents.class);
// Docx4j 6.1: re-designed component processing model:
// 1. components don't have to be at the top paragraph level of the content tree,
// 2. they can use an XPath context
// BUT:
// 3. component processing is now done before condition/repeat processing
// 4. component processing is not recursive anymore
// 5. components typically use the "main" answer file
// We only support components in the MainDocumentPart,
// since MergeDocx can concatenate content in that part,
// but not in other parts.
// TODO: make this step optional
private WordprocessingMLPackage srcPackage;
protected boolean justGotAComponent = false;
private org.opendope.components.Components components;
private Map xpathsMap = null;
private Map conditionsMap = null;
private static DocxFetcher docxFetcher;
public static DocxFetcher getDocxFetcher() {
return docxFetcher;
}
public static void setDocxFetcher(DocxFetcher docxFetcher) {
OpenDoPEHandlerComponents.docxFetcher = docxFetcher;
}
public OpenDoPEHandlerComponents(WordprocessingMLPackage wordMLPackage)
throws Docx4JException {
this.srcPackage = wordMLPackage;
if (wordMLPackage.getMainDocumentPart().getXPathsPart() == null) {
log.info("OpenDoPE XPaths part missing (ok if you are just processing w15 repeatingSection)");
return;
} else {
org.opendope.xpaths.Xpaths xPaths = wordMLPackage.getMainDocumentPart().getXPathsPart().getJaxbElement();
if(log.isDebugEnabled()) {
log.debug(XmlUtils.marshaltoString(xPaths, true, true));
}
xpathsMap = new HashMap(2*xPaths.getXpath().size());
for (Xpaths.Xpath xp : xPaths.getXpath() ) {
if (xpathsMap.put(xp.getId(), xp)!=null) {
log.error("Duplicates in XPaths part: " + xp.getId());
}
// TODO key should include storeItemID?
}
}
if (wordMLPackage.getMainDocumentPart().getConditionsPart() != null) {
org.opendope.conditions.Conditions conditions = wordMLPackage.getMainDocumentPart()
.getConditionsPart().getJaxbElement();
if(log.isDebugEnabled()) {
log.debug(XmlUtils.marshaltoString(conditions, true, true));
}
conditionsMap = new HashMap(2*conditions.getCondition().size());
for (Condition c : conditions.getCondition()) {
if (conditionsMap.put(c.getId(), c)!=null) {
log.error("Duplicates in Conditions part: " + c.getId());
}
}
}
if (wordMLPackage.getMainDocumentPart().getComponentsPart() != null) {
components = wordMLPackage.getMainDocumentPart()
.getComponentsPart().getJaxbElement();
if(log.isDebugEnabled()) {
log.debug(XmlUtils.marshaltoString(components, true, true));
}
}
}
/**
* altChunkRel.getId(), xpathId
*/
Map altChunkXPathContexts = new HashMap();
// We'd need PartName to map to that, if we were to support parts other than MDP!
/**
* Component processing
*
* @param srcPackage
* @param contentAccessor
* @return
* @throws Docx4JException
*/
public WordprocessingMLPackage fetchComponents()
throws Docx4JException {
if (xpathsMap==null) return srcPackage;
// System.out.println("before component processing");
// System.out.println(wordMLPackage.getMainDocumentPart().getXML());
justGotAComponent = false;
// Convert any sdt with
// to altChunk, and for MergeDocx users, to
// real WordML.
ContentAccessor part = srcPackage.getMainDocumentPart();
// LinkedList continuousBeforeIndex = new LinkedList();
// List continuousBefore = new ArrayList();
//
// List continuousAfter = new ArrayList();
FindComponentsTraversor t = new FindComponentsTraversor();
t.wordMLPackage = srcPackage;
if (part instanceof MainDocumentPart /* which it will be in this release */) {
// avoid invoking walkJAXBElements on a list,
// (docx4j 6.1.0 doesn't know how to replace children there,
// which it needs to do if the component is a child of w:body)
Body b = ((MainDocumentPart)part).getJaxbElement().getBody();
t.walkJAXBElements(b);
} else {
t.walkJAXBElements(part.getContent());
}
if (!justGotAComponent) {
return srcPackage;
}
Map answerDomDocs = new HashMap();
CustomXmlPart data = CustomXmlDataStoragePartSelector.getCustomXmlDataStoragePart(srcPackage);
if (data instanceof CustomXmlDataStoragePart) {
Document doc = ((CustomXmlDataStoragePart)data).getData().getDocument();
answerDomDocs.put(getQName(doc.getDocumentElement()), data);
} else {
throw new Docx4JException("TODO: handle " + data.getClass().getName());
}
// process altChunk
try {
// Use reflection, so docx4j can be built
// by users who don't have the Enterprise MergeDocx utility
Class documentBuilder = Class
.forName("com.plutext.merge.altchunk.ProcessAltChunk");
Method[] methods = documentBuilder.getMethods();
Method processMethod = null;
for (int j = 0; j < methods.length; j++) {
// log.debug(methods[j].getName());
if (methods[j].getName().equals("process")
&& methods[j].getParameterCount()==5) {
processMethod = methods[j];
}
}
if (processMethod == null )
throw new NoSuchMethodException();
return (WordprocessingMLPackage) processMethod.invoke(null,
srcPackage,
answerDomDocs,
xpathsMap,
conditionsMap,
altChunkXPathContexts);
} catch (ClassNotFoundException e) {
extensionMissing(e);
return srcPackage;
// throw new Docx4JException("Problem processing w:altChunk", e);
} catch (NoSuchMethodException e) {
// Degrade gracefully
extensionMissing(e);
return srcPackage;
// throw new Docx4JException("Problem processing w:altChunk", e);
} catch (Exception e) {
throw new Docx4JException("Problem processing w:altChunk", e);
}
}
private QName getQName(Element el) {
QName qname = new QName(el.getNamespaceURI(), el.getLocalName());
System.out.println(qname);
return qname;
}
// public void makeContinuous(SectPr sectPr) {
//
// if (sectPr == null) {
// log.warn("sectPr was null");
// return;
// }
//
// SectPr.Type type = Context.getWmlObjectFactory().createSectPrType();
// type.setVal("continuous");
// sectPr.setType(type);
//
// // columns, endnotes, footnotes, formprot, line numbers are OK
//
// // null out certain page level section properties
// sectPr.setBidi(null);
// sectPr.setDocGrid(null);
// sectPr.setPaperSrc(null);
// sectPr.setPgBorders(null);
// sectPr.setPgMar(null);
// sectPr.setPgNumType(null);
// sectPr.setPgSz(null);
// sectPr.setPrinterSettings(null);
// sectPr.setSectPrChange(null);
// sectPr.setTitlePg(null);
// sectPr.setVAlign(null);
// }
public void extensionMissing(Exception e) {
log.error("\n" + e.getClass().getName() + ": " + e.getMessage() + "\n");
log.error("* You don't appear to have the MergeDocx paid extension,");
log.error("* which is necessary to merge docx, or process altChunk.");
log.error("* Purchases of this extension support the docx4j project.");
log.error("* Please visit www.plutext.com if you want to buy it.");
}
private PartName getNewPartName(String prefix, String suffix,
RelationshipsPart rp) throws InvalidFormatException {
PartName proposed = null;
int i = 1;
do {
if (i > 1) {
proposed = new PartName(prefix + i + suffix);
} else {
proposed = new PartName(prefix + suffix);
}
i++;
} while (rp.getRel(proposed) != null);
return proposed;
}
/**
* This traversor finds components, taking note of their XPath context.
* For now, that's their immediate repeat ancestor.
*/
private class FindComponentsTraversor extends CallbackImpl {
WordprocessingMLPackage wordMLPackage;
private LinkedList repeatContext = new LinkedList();
@Override
public List