org.docx4j.model.fields.FieldsPreprocessor Maven / Gradle / Ivy

Show more of this group Show more artifacts with this name
Show all versions of docx4j-core Show documentation
docx4j is a library which helps you to work with the Office Open XML file format as used in docx documents, pptx presentations, and xlsx spreadsheets.
There is a newer version: 11.4.11
Show newest version
package org.docx4j.model.fields;

import org.docx4j.XmlUtils;
import org.docx4j.jaxb.Context;
import org.docx4j.openpackaging.exceptions.Docx4JException;
import org.docx4j.openpackaging.exceptions.Docx4JRuntimeException;
import org.docx4j.openpackaging.parts.JaxbXmlPart;
import org.docx4j.wml.ContentAccessor;
import org.docx4j.wml.FldChar;
import org.docx4j.wml.P;
import org.docx4j.wml.ProofErr;
import org.docx4j.wml.R;
import org.docx4j.wml.STFldCharType;
import org.docx4j.wml.Text;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import javax.xml.bind.JAXBContext;
import javax.xml.bind.Unmarshaller;
import javax.xml.namespace.QName;
import javax.xml.transform.Source;
import javax.xml.transform.Templates;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.stream.StreamSource;
import java.io.IOException;
import java.util.LinkedList;
import java.util.List;
import java.util.NoSuchElementException;

/**
 * This class puts fields into a "canonical" representation
 * (see FieldRef for description).
 * 
 * It does this in 2 steps:
 * - step 1: use XSLT to convert simple fields into complex ones
 * - step 2: put all the instructions into a single run
 * 
 * Currently the canonicalisation is done at the paragraph level,
 * so it is not suitable for fields (such as TOC) which extend across paragraphs.
 * TOC will need to be regenerated (using Word) if touched by canonicalisation.
 * 
 * @author jharrop
 *
 */
public class FieldsPreprocessor {
	
	private static Logger log = LoggerFactory.getLogger(FieldsPreprocessor.class);		

    private final static QName _RInstrText_QNAME = new QName("http://schemas.openxmlformats.org/wordprocessingml/2006/main", 
    		"instrText");
    private final static QName _PHyperlink_QNAME = new QName("http://schemas.openxmlformats.org/wordprocessingml/2006/main", 
    		"hyperlink");
    
	
	static Templates xslt;			
	static {
		try {
			Source xsltSource = new StreamSource(
						org.docx4j.utils.ResourceUtils.getResource(
								"org/docx4j/model/fields/FieldsSimpleToComplex.xslt"));
			xslt = XmlUtils.getTransformerTemplate(xsltSource);
		} catch (IOException e) {
			e.printStackTrace();
		} catch (TransformerConfigurationException e) {
			e.printStackTrace();
		}
		
	}
	
	private FieldsPreprocessor(List fieldRefs) {
		this.fieldRefs = fieldRefs;
	}

	/**
	 * Convert any w:fldSimple in this part to complex field. 
	 * @param part
	 * @throws Docx4JException
	 */
	public static void complexifyFields(JaxbXmlPart part) throws Docx4JException {
		
		org.w3c.dom.Document doc = XmlUtils.marshaltoW3CDomDocument(
				part.getJaxbElement() ); 	
		
//		XPathsPart xPathsPart = null;
				
		JAXBContext jc = Context.jc;
		try {
			// Use constructor which takes Unmarshaller, rather than JAXBContext,
			// so we can set JaxbValidationEventHandler
			Unmarshaller u = jc.createUnmarshaller();
			u.setEventHandler(new org.docx4j.jaxb.JaxbValidationEventHandler());
			javax.xml.bind.util.JAXBResult result = new javax.xml.bind.util.JAXBResult(u );
								
			org.docx4j.XmlUtils.transform(doc, xslt, null, result);
			
			part.setJaxbElement(result);
		} catch (Exception e) {
			throw new Docx4JException("Problems transforming fields", e);			
		}
				
	}
	
	
	/**
	 * Convert the field(s) in the input P into a predictable
	 * format, and add a FieldRef object to the list for each
	 * top level field encountered.  
	 * 
	 * WARNING: this method should not be used where a field 
	 * in the P extends into a subsequent P.
	 * 
	 * @param p
	 * @param fieldRefs
	 * @return the modified P
	 */
	public static P canonicalise(P p, List fieldRefs) {
		/*
		 * Result is something like:
		 * 
		        
		            
		                
		                 DATE  
		                
		            
		            
		                4/12/2011
		            
		            
		                
		            
		        		  
		 
		 * Note that the content between begin and separate could be more complex
		 * including nested fields.
		 **/
		
		
		FieldsPreprocessor fp = new FieldsPreprocessor(fieldRefs);
		return fp.canonicaliseInstance(p);
	}
	
	private P canonicaliseInstance(P p) {

		P newP = Context.getWmlObjectFactory().createP();
		newP.setPPr(p.getPPr());
		
		newR = Context.getWmlObjectFactory().createR();
//		fieldRPr = null;
		
		stack = new LinkedList();
		if(log.isDebugEnabled()) {
            log.debug(XmlUtils.marshaltoString(p));
        }
		handleContent(p.getContent(), newP);

		// log.debug(XmlUtils.marshaltoString(newP, true));

		return newP;
	}
	
	/**
	 * A list of FieldRef objects representing outermost fields
	 * only.
	 */
	private List fieldRefs;
	
	
	private LinkedList stack;
	private FieldRef currentField=null;
	
	private R newR;
	
	private void handleContent(List