org.docx4j.model.datastorage.RemovalHandler Maven / Gradle / Ivy

Show more of this group Show more artifacts with this name
Show all versions of docx4j-core Show documentation
docx4j is a library which helps you to work with the Office Open XML file format as used in docx documents, pptx presentations, and xlsx spreadsheets.
There is a newer version: 11.4.11
Show newest version
/*
 * Copyright 2011 Plutext Pty Ltd and individual contributors.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.docx4j.model.datastorage;

import static org.docx4j.XmlUtils.marshaltoW3CDomDocument;
import static org.docx4j.XmlUtils.prepareJAXBResult;
import static org.docx4j.XmlUtils.transform;
import static org.docx4j.model.datastorage.RemovalHandler.Quantifier.ALL;
import static org.docx4j.model.datastorage.RemovalHandler.Quantifier.ALL_BUT_PLACEHOLDERS;
import static org.docx4j.model.datastorage.RemovalHandler.Quantifier.ALL_BUT_PLACEHOLDERS_CONTENT;
import static org.docx4j.model.datastorage.RemovalHandler.Quantifier.NAMED;

import java.util.HashMap;
import java.util.Map;

import javax.xml.bind.JAXBException;
import javax.xml.bind.util.JAXBResult;
import javax.xml.transform.Source;
import javax.xml.transform.Templates;
import javax.xml.transform.stream.StreamSource;

import org.apache.commons.lang3.ArrayUtils;
import org.docx4j.Docx4jProperties;
import org.docx4j.XmlUtils;
import org.docx4j.jaxb.Context;
import org.docx4j.openpackaging.exceptions.Docx4JException;
import org.docx4j.openpackaging.packages.WordprocessingMLPackage;
import org.docx4j.openpackaging.parts.JaxbXmlPart;
import org.docx4j.openpackaging.parts.WordprocessingML.FooterPart;
import org.docx4j.openpackaging.parts.WordprocessingML.HeaderPart;
import org.docx4j.openpackaging.parts.relationships.Namespaces;
import org.docx4j.openpackaging.parts.relationships.RelationshipsPart;
import org.docx4j.relationships.Relationship;
import org.docx4j.utils.ResourceUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Document;

/**
 * Tool to remove content controls (Structured Document Tags) from an OpenXML document part.
 * 
 * 
 * This tool removes SDTs tagged with a certain quantifier from the document part.
 * 
 * 
 * Restrictions:
 * 
 * 
 * 
 * As it does text processing and no real namespace qualification exist, the quantifiers must be prefixed exactly be
 * "od", not any other prefix.
 * 
 * In case of qualified removal (in effect not {@link Quantifier#ALL}), bindings containing more than one qualifier are
 * not supported, that is, when you tag od:repeat=/this&od:xpath=/that, the SDT is removed whenever you
 * specify to remove either repeat or bind tags. (multiple qualifiers are not recommended in any case!)
 * 
 *
 * @author Karsten Tinnefeld
 * @version $Revision: $ $Date: $
 */
public class RemovalHandler {

		private static Logger log = LoggerFactory.getLogger(RemovalHandler.class);		
	
    	static Templates removalTemplate;			

        /**
         * Initializes the removal handler.
         *
         * This tool is thread safe and should be reused, as initialization is
         * relatively expensive.
         */
        public RemovalHandler() {

    		try {
    			final Source xsltSource = new StreamSource(
    						ResourceUtils.getResourceViaProperty(
    								"docx4j.model.datastorage.RemovalHandler.xslt",
    								"org/docx4j/model/datastorage/RemovalHandler.xslt"));
    			removalTemplate = XmlUtils.getTransformerTemplate(xsltSource);

            } catch (Exception e) {
                    throw new IllegalStateException(
                                    "Error instantiating SDT removal stylesheet", e);
            }
        }

        
        /**
         * Removes Structured Document Tags from the main document part, headers, and footer, 
         * preserving their contents.
         *
         * @param wordMLPackage
         *            The docx package to modify (in situ).
         *            
         * @throws Docx4JException
         *             In case any transformation error occurs.
         * @since 6.1.0
         */
        public void removeSDTs(WordprocessingMLPackage wordMLPackage) throws Docx4JException {

        	removeSDTs(wordMLPackage, getQuantifier() , (String[])null);
        }
        
        /**
         * Removes Structured Document Tags from the main document part, headers, and footer, 
         * preserving their contents.
         *
         * In case key "empty" is specified, value bindings (xpath) are removed only
         * if they have void contents (e.g. the XML points nowhere).
         *
         * @param wordMLPackage
         *            The docx package to modify (in situ).
         * @param quantifier
         *            The quantifier regarding which kinds of parts are to be
         *            removed.
         * @param keys
         *            In case of {@link Quantifier#NAMED}, quantifier names. All
         *            strings except "xpath", "condition", "repeat", "empty" are
         *            ignored.
         * @throws Docx4JException
         *             In case any transformation error occurs.
         */
        public void removeSDTs(WordprocessingMLPackage wordMLPackage,
                        final Quantifier quantifier, final String... keys) throws Docx4JException {

                // A component can apply in both the main document part,
                // and in headers/footers. See further
                // http://forums.opendope.org/Support-components-in-headers-footers-tp2964174p2964174.html


                removeSDTs(wordMLPackage.getMainDocumentPart(), quantifier, keys);

                // Remove from headers/footers
                RelationshipsPart rp = wordMLPackage.getMainDocumentPart()
                                .getRelationshipsPart();
                for (Relationship r : rp.getRelationships().getRelationship()) {

                        if (r.getType().equals(Namespaces.HEADER)) {
                                removeSDTs((HeaderPart) rp.getPart(r), quantifier, keys);
                        } else if (r.getType().equals(Namespaces.FOOTER)) {
                                removeSDTs((FooterPart) rp.getPart(r), quantifier, keys);
                        }
                }
        }

        
        /**
         * Removes Structured Document Tags from a document part, preserving their
         * contents.
         *
         * In case key "empty" is specified, value bindings (xpath) are removed only
         * if they have void contents (e.g. the XML points nowhere).
         *
         * @param part
         *            The document part to modify (in situ).
         * @param quantifier
         *            The quantifier regarding which kinds of parts are to be
         *            removed.
         * @param keys
         *            In case of {@link Quantifier#NAMED}, quantifier names. All
         *            strings except "xpath", "condition", "repeat", "empty" are
         *            ignored.
         * @throws Docx4JException
         *             In case any transformation error occurs.
         */
        public void removeSDTs(final JaxbXmlPart part)
                        throws Docx4JException {

        	removeSDTs(part, getQuantifier() , (String[])null);
        }
        
        /**
         * Removes Structured Document Tags from a document part, preserving their
         * contents.
         *
         * In case key "empty" is specified, value bindings (xpath) are removed only
         * if they have void contents (e.g. the XML points nowhere).
         *
         * @param part
         *            The document part to modify (in situ).
         * @param quantifier
         *            The quantifier regarding which kinds of parts are to be
         *            removed.
         * @param keys
         *            In case of {@link Quantifier#NAMED}, quantifier names. All
         *            strings except "xpath", "condition", "repeat", "empty" are
         *            ignored.
         * @throws Docx4JException
         *             In case any transformation error occurs.
         */
        public void removeSDTs(final JaxbXmlPart part,
                        final Quantifier quantifier, final String... keys)
                        throws Docx4JException {

                final Map parameters = new HashMap();
                parameters.put("all", quantifier == ALL);
                parameters.put("all_but_placeholders", quantifier == ALL_BUT_PLACEHOLDERS);
                parameters.put("all_but_placeholders_content", quantifier == ALL_BUT_PLACEHOLDERS_CONTENT);
                if (quantifier == NAMED)
                        parameters.put("types", ArrayUtils.toString(keys));

                final Document partDOM = marshaltoW3CDomDocument(part.getJaxbElement());
                final JAXBResult result = prepareJAXBResult(Context.jc);

                transform(partDOM, removalTemplate, parameters, result);

                try {
                        part.setJaxbElement(result);

                } catch (JAXBException e) {
                        throw new Docx4JException(
                                        "Error unmarshalling document part for SDT removal", e);
                }
        }
        
        private Quantifier defaultQuantifier = null;
        private Quantifier getQuantifier() {
        	
        	if (defaultQuantifier!=null) return defaultQuantifier;
        	
        	String q = Docx4jProperties.getProperty("docx4j.model.datastorage.RemovalHandler.Quantifier", "ALL");
        	
        	if (q.equals("ALL")) {
        		defaultQuantifier = Quantifier.ALL;
        	} else if (q.equals("ALL_BUT_PLACEHOLDERS")) {
        		defaultQuantifier = Quantifier.ALL_BUT_PLACEHOLDERS;
        	} else if (q.equals("ALL_BUT_PLACEHOLDERS_CONTENT")) {
        		defaultQuantifier = Quantifier.ALL_BUT_PLACEHOLDERS_CONTENT;
        	} else if (q.equals("DEFAULT")) {
        		defaultQuantifier = Quantifier.DEFAULT;
        	} else if (q.equals("NAMED")) {
        		defaultQuantifier = Quantifier.NAMED;
        	} else {
        		log.warn("Unknown Quantifier property value: " + q);
        		defaultQuantifier = Quantifier.ALL;        		
        	}
        	
        	return defaultQuantifier;
        }

        /**
         * A quantifier specifying kinds of SDTs.
         */
        public static enum Quantifier {

                /**
                 * Every SDT shall be removed.  From 3.3.0, this really means all SDTs in the main document part.
                 * If an SDT does not contain any 'real' content, then remove that XML as well.
                 */
                ALL,

                /**
                 * Ordinarily, if an SDT contains XML but no real content, 
                 * that XML is also removed.  
                 * Choose this option if you want to keep placeholder XML (but remove the SDT).
                 * Currently, to be identified as a placeholder, it must use rStyle 'PlaceholderText'.
                 * @since 6.1.1 
                 */
                ALL_BUT_PLACEHOLDERS_CONTENT,
                
                /**
                 * Ordinarily, if an SDT contains XML but no real content, 
                 * that XML is also removed.  
                 * Choose this option if you want to keep placeholder XML (keeping the SDT as well).
                 * Currently, to be identified as a placeholder, it must use rStyle 'PlaceholderText'.
                 * @since 6.1.0 
                 */
                ALL_BUT_PLACEHOLDERS,
                
                /**
                 * The default SDTs shall be removed, that is, condition and repeat.
                 * 
                 * (If you want to remove xpaths, either use Quantifier.ALL, or pass key "xpath")
                 */
                DEFAULT,
                
                /**
                 * Named SDTs shall be removed, the names given separately.
                 */
                NAMED;                
        }
}