All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.docx4j.convert.out.html.ListsToContentControls Maven / Gradle / Ivy

Go to download

docx4j is a library which helps you to work with the Office Open XML file format as used in docx documents, pptx presentations, and xlsx spreadsheets.

There is a newer version: 11.4.11
Show newest version
package org.docx4j.convert.out.html;

import java.math.BigInteger;
import java.util.ArrayList;
import java.util.LinkedList;
import java.util.List;

import javax.xml.bind.JAXBElement;

import org.docx4j.TraversalUtil;
import org.docx4j.finders.SdtFinder;
import org.docx4j.finders.TcFinder;
import org.docx4j.model.PropertyResolver;
import org.docx4j.model.listnumbering.AbstractListNumberingDefinition;
import org.docx4j.model.listnumbering.ListLevel;
import org.docx4j.model.listnumbering.ListNumberingDefinition;
import org.docx4j.openpackaging.packages.WordprocessingMLPackage;
import org.docx4j.openpackaging.parts.WordprocessingML.MainDocumentPart;
import org.docx4j.openpackaging.parts.WordprocessingML.NumberingDefinitionsPart;
import org.docx4j.openpackaging.parts.WordprocessingML.StyleDefinitionsPart;
import org.docx4j.wml.P;
import org.docx4j.wml.PPr;
import org.docx4j.wml.PPrBase.NumPr;
import org.docx4j.wml.SdtBlock;
import org.docx4j.wml.SdtContentBlock;
import org.docx4j.wml.SdtElement;
import org.docx4j.wml.SdtPr;
import org.docx4j.wml.Tag;
import org.docx4j.wml.Tbl;
import org.docx4j.wml.Tc;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * Create list items in OL or UL (as appropriate).
 * 
 * We can't just use a LinkedList (stack) of list contexts,
 * which we push and pop, since we have to write complete
 * XML elements (as opposed to opening and closing tags).
 * 
 * So this means either extending org.docx4j.model.structure.jaxb
 * beyond sections, or some other approach, like wrapping 
 * list items in a content control.  Let's try that.
 * 
 * That's like org.docx4j.convert.out.common.preprocess.Containerization
 * 
 * So we have a 2 step process:
 * 
 * 1.  insert the content controls
 * 
 * 2.  use an SdtWriter to turn these into UL or OL.
 * 
 * This class does step 1.  
 * 
 * Step 2 is implemented by SdtToListSdtTagHandler;  it will only be used if you invoke
 * SdtWriter.registerTagHandler("HTML_ELEMENT", new SdtToListSdtTagHandler())
 * 
 * @author jharrop
 *
 */
public class ListsToContentControls {
	
	public static Logger log = LoggerFactory.getLogger(ListsToContentControls.class);		
	
	public ListsToContentControls(WordprocessingMLPackage wmlPackage) {
		this.wmlPackage = wmlPackage;
		mainDocument = wmlPackage.getMainDocumentPart();
		this.ndp=mainDocument.getNumberingDefinitionsPart();
		stylesPart = wmlPackage.getMainDocumentPart().getStyleDefinitionsPart();
		
		propertyResolver = wmlPackage.getMainDocumentPart().getPropertyResolver();
	}
	
	private WordprocessingMLPackage wmlPackage;
	private MainDocumentPart mainDocument;
	private NumberingDefinitionsPart ndp;
	private StyleDefinitionsPart stylesPart;
	
	private PropertyResolver propertyResolver;
	
    private LinkedList listStack = null;

	public static class ListSpec {
		
		ListSpec(BigInteger numId, BigInteger ilvl) {
			this.numId = numId;
			this.ilvl = ilvl;
		}
		
		BigInteger ilvl;
		BigInteger numId;
		
		SdtBlock sdtList = null;
		
		
	}
	
	public static void process(WordprocessingMLPackage wmlPackage) {
		//TODO: Convert to visitor behaviour here like TraversalUtil.visit with onlyBody = false
		
		ListsToContentControls lc = new ListsToContentControls(wmlPackage);
		
		if (lc.ndp==null) {
			log.info("No NumberingDefinitionsPart, skipping");
			return;
		}
		lc.process();
		
//		try {
//			wmlPackage.save(new File("cc.docx"));
//		} catch (Docx4JException e) {
//			e.printStackTrace();
//		}
	}
	
	private void process() {
		List content = null;
		List groupedContent = null;
		
		///////////////////////////////////////////////
		// First, contents of existing content controls
		// .. find the content controls
		SdtFinder sdtFinder = new SdtFinder();
		new TraversalUtil(mainDocument.getContent(), sdtFinder);
		
		// .. loop through them
		for (SdtElement sdtEl : sdtFinder.getSdtList()) {
			content = sdtEl.getSdtContent().getContent();
			groupedContent = groupContent(content);
			
			if (groupedContent != null) {
				content.clear();
				content.addAll(groupedContent);
			}
			
		}
		
		
		///////////////////////////////////////////////
		// Second, contents of table cells
		TcFinder tcFinder = new TcFinder();
		tcFinder.setTraverseTables(true);
		new TraversalUtil(mainDocument.getContent(), tcFinder);
		for (Tc tc : tcFinder.tcList) {
			
			content = tc.getContent();
			groupedContent = groupContent(content);
			
			if (groupedContent != null) {
				content.clear();
				content.addAll(groupedContent);
			}
			
		}
		
		
		///////////////////////////////////////////////
		// Third, body level content
		content = mainDocument.getContent();
		groupedContent = groupContent(content);
		
		if (groupedContent != null) {
			content.clear();
			content.addAll(groupedContent);
		}
	}
	
	private void closeAllLists() {
		listStack.clear();
	}
	
	private void setTag(SdtBlock sdtList, BigInteger numId, BigInteger ilvl) {
		
		SdtPr sdtPr = new SdtPr();
		Tag tag = new Tag();
		sdtPr.setTag(tag);
		
		sdtList.setSdtPr(sdtPr);
		
		// Bullets = UL.  Work it out.
		ListNumberingDefinition lnd = ndp.getInstanceListDefinitions().get(numId.toString());
		if (lnd==null) {
			// Default to UL
			log.warn("Couldn't find instance list for numId " + numId);
			tag.setVal("HTML_ELEMENT=OL");
			return;
		}
		
		AbstractListNumberingDefinition ald = lnd.getAbstractListDefinition();
		if (ald==null) {
			// Default to UL
			log.warn("Couldn't find abstract list for instance list " + numId);
			tag.setVal("HTML_ELEMENT=OL");
			return;
		}
		
		ListLevel level = ald.getListLevels().get(ilvl.toString());
		if (level==null) {
			// Default to UL
			log.warn("Couldn't find level " + ilvl.toString() + " in instance list ");
			tag.setVal("HTML_ELEMENT=OL");
			return;			
		}
		
		if (level.IsBullet()) {
			tag.setVal("HTML_ELEMENT=UL");			
		} else {
			tag.setVal("HTML_ELEMENT=OL");						
		}
		
	}
	
	private List groupContent(List bodyElts) {
		
		// Reset state
		listStack = new LinkedList();
		
		List resultElts = new ArrayList();
		P paragraph = null;
		
		for (Object o : bodyElts) {
			
			Object unwrapped;
			if (o instanceof JAXBElement) {
				unwrapped = ((JAXBElement)o).getValue();
			} else {
				unwrapped=o;
			}

			/*
			 * We can nest lists, but any time a bare table 
			 * or paragraph etc is encountered (ie anything not a list item),
			 * we'll finish the lists.
			 */
					
			if (unwrapped instanceof P) {
				
				paragraph = (P)unwrapped;				
				PPr ppr = propertyResolver.getEffectivePPr(paragraph.getPPr());
				
				NumPr numPr = ppr.getNumPr();
				
				if (numPr==null) {
					closeAllLists();
					resultElts.add(unwrapped);
					continue;
				}
				
				/* It is numbered.
				 * 
				 * Cases:
				 * 
				 * - no current list
				 * 
				 * - same list, same level
				 * 
				 * - same list, different level
				 * 
				 * - different list
				 * 
				 * 
				 * If a list item uses the same list but is a different
				 * level, we'll push/pop levels as appropriate.
				 * 
				 * This implies that when we start, we'll push levels
				 * to get to the right starting level.
				 * 
				 * If its a different list, we'll pop all levels, and
				 * start again.
				 * 
				 * TODO: consider what styling to attach to the OL|UL.
				 * We should match the ImportXHTML behaviour.
				 * 
				 */
				
				BigInteger numId = numPr.getNumId().getVal();
				
				BigInteger ilvl = null;
				if (numPr.getIlvl()==null) {
					ilvl = BigInteger.ZERO;
				} else {
					ilvl = numPr.getIlvl().getVal();
				}
				log.debug("ilvl: " + ilvl.intValue());
				
				ListSpec listSpec = listStack.peek();
				if (listSpec==null
						|| (numId!=null
								&& !numId.equals(listSpec.numId))) {
					// new or different list
					log.debug("NEW LIST");
					
					// if its a different list, pop all levels
					if (listSpec!=null) {
						closeAllLists();						
					}
					
					// add appropriate levels
					for (int i=0; i<=ilvl.intValue(); i++) {
						
						log.debug("adding level " + i);
						
						listSpec = new ListSpec(numId, BigInteger.valueOf(i));
						listSpec.sdtList = new SdtBlock();
						setTag(listSpec.sdtList, numId, ilvl);			

						listSpec.sdtList.setSdtContent(new SdtContentBlock());
						
						if (listStack.peek()==null) {
							resultElts.add(listSpec.sdtList);
						} else {
							listStack.peek().sdtList.getSdtContent().getContent().add(listSpec.sdtList);
						}
						listStack.push(listSpec);
					}
					
					listSpec.sdtList.getSdtContent().getContent().add(paragraph);
				} else if (numId==null) {
					log.error("TODO: encountered null numId!");
					closeAllLists();
					resultElts.add(unwrapped);
					continue;	
				} else // (numId.equals(listSpec.numId)) 
				{
					// same list
					log.debug("listSpec.ilvl.intValue():" + listSpec.ilvl.intValue());
					
					if (ilvl.equals(listSpec.ilvl)) {
						// just add to it
						log.debug("same level");
					} else if (ilvl.compareTo(listSpec.ilvl)>0) {

						// deeper, so add levels
						for (int i=listSpec.ilvl.intValue()+1; i<=ilvl.intValue(); i++) {
							
							log.debug("adding level " + i);
							
							listSpec = new ListSpec(numId, BigInteger.valueOf(i));
							listSpec.sdtList = new SdtBlock();
							setTag(listSpec.sdtList, numId, ilvl);			
							
							listSpec.sdtList.setSdtContent(new SdtContentBlock());
							
							if (listStack.peek()==null) {
								resultElts.add(listSpec.sdtList);
							} else {
								listStack.peek().sdtList.getSdtContent().getContent().add(listSpec.sdtList);
							}
							listStack.push(listSpec);
						}
						
					} else {
						log.debug("must be pop...");
						// shallower, so pop levels
						for (int i=listSpec.ilvl.intValue(); i>ilvl.intValue(); i--) {
							log.debug("popping");
							listStack.pop();
							listSpec = listStack.peek();
							log.debug("popped!");
						}
						
					}
					listSpec.sdtList.getSdtContent().getContent().add(paragraph);										
				} 
				
				
			} else if (unwrapped instanceof Tbl) {
				closeAllLists();
				resultElts.add(unwrapped);
				
			} else {
				log.warn("TODO: handle " + unwrapped.getClass().getName());
				closeAllLists();
				resultElts.add(o);
			}
			
		}
		return resultElts;
	}	
	
		

}