All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.pptx4j.convert.in.xhtml.XHTMLtoPPTX Maven / Gradle / Ivy

There is a newer version: 11.4.8
Show newest version
/*
 *  This file is part of the docx4j-ImportXHTML library.
 *
 *  Copyright 2011-2013, Plutext Pty Ltd, and contributors.
 *  Portions contributed before 15 July 2013 formed part of docx4j 
 *  and were contributed under ASL v2 (a copy of which is incorporated
 *  herein by reference and applies to those portions). 
 *   
 *  This library as a whole is licensed under the GNU Lesser General 
 *  Public License as published by the Free Software Foundation; 
    version 2.1.
    
    This library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Lesser General Public
    License as published by the Free Software Foundation; either
    version 2.1 of the License, or (at your option) any later version.

    This library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    Lesser General Public License for more details.

    You should have received a copy of the GNU Lesser General Public
    License along with this library (see legals/LICENSE); if not, 
    see http://www.gnu.org/licenses/lgpl-2.1.html
    
 */
package org.pptx4j.convert.in.xhtml;

import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import javax.xml.bind.JAXBException;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.docx4j.XmlUtils;
import org.docx4j.dml.CTGraphicalObjectFrameLocking;
import org.docx4j.dml.CTHyperlink;
import org.docx4j.dml.CTNonVisualDrawingProps;
import org.docx4j.dml.CTNonVisualGraphicFrameProperties;
import org.docx4j.dml.CTPoint2D;
import org.docx4j.dml.CTPositiveSize2D;
import org.docx4j.dml.CTRegularTextRun;
import org.docx4j.dml.CTTable;
import org.docx4j.dml.CTTableCell;
import org.docx4j.dml.CTTableCol;
import org.docx4j.dml.CTTableGrid;
import org.docx4j.dml.CTTableRow;
import org.docx4j.dml.CTTextCharacterProperties;
import org.docx4j.dml.CTTextLineBreak;
import org.docx4j.dml.CTTextParagraph;
import org.docx4j.dml.CTTransform2D;
import org.docx4j.dml.Graphic;
import org.docx4j.dml.GraphicData;
import org.docx4j.model.properties.Property;
import org.docx4j.model.properties.PropertyFactory;
import org.docx4j.model.properties.run.AbstractRunProperty;
import org.docx4j.openpackaging.exceptions.Docx4JException;
import org.docx4j.openpackaging.packages.PresentationMLPackage;
import org.docx4j.openpackaging.parts.PartName;
import org.docx4j.openpackaging.parts.PresentationML.MainPresentationPart;
import org.docx4j.openpackaging.parts.PresentationML.SlideLayoutPart;
import org.docx4j.openpackaging.parts.PresentationML.SlidePart;
import org.docx4j.openpackaging.parts.relationships.Namespaces;
import org.docx4j.openpackaging.parts.relationships.RelationshipsPart;
import org.docx4j.org.xhtmlrenderer.css.constants.CSSName;
import org.docx4j.org.xhtmlrenderer.css.constants.IdentValue;
import org.docx4j.org.xhtmlrenderer.css.style.CalculatedStyle;
import org.docx4j.org.xhtmlrenderer.css.style.DerivedValue;
import org.docx4j.org.xhtmlrenderer.css.style.FSDerivedValue;
import org.docx4j.org.xhtmlrenderer.newtable.TableBox;
import org.docx4j.org.xhtmlrenderer.newtable.TableCellBox;
import org.docx4j.org.xhtmlrenderer.newtable.TableRowBox;
import org.docx4j.org.xhtmlrenderer.newtable.TableSectionBox;
import org.docx4j.org.xhtmlrenderer.docx.DocxRenderer;
import org.docx4j.org.xhtmlrenderer.render.AnonymousBlockBox;
import org.docx4j.org.xhtmlrenderer.render.BlockBox;
import org.docx4j.org.xhtmlrenderer.render.Box;
import org.docx4j.org.xhtmlrenderer.render.InlineBox;
import org.docx4j.org.xhtmlrenderer.resource.XMLResource;
import org.docx4j.relationships.Relationship;
import org.pptx4j.pml.CTGraphicalObjectFrame;
import org.pptx4j.pml.CTGraphicalObjectFrameNonVisual;
import org.pptx4j.pml.Shape;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.css.CSSValue;
import org.xml.sax.InputSource;

public class XHTMLtoPPTX {
    
    private static final org.docx4j.dml.ObjectFactory DML_OBJECT_FACTORY = new org.docx4j.dml.ObjectFactory();
    private static final org.pptx4j.pml.ObjectFactory PML_OBJECT_FACTORY = org.pptx4j.jaxb.Context.getpmlObjectFactory();
    private static final org.docx4j.relationships.ObjectFactory RELATIONSHIPS_FACTORY = new org.docx4j.relationships.ObjectFactory();

    private static final String PARAGRAPH_SHAPE =            
                    "" +
                    "  " + 
                    "    " +
                    "    " +
                    "      " +
                    "    " +
                    "    " +
                    "      " +
                    "    " +
                    "  " +
                    "  " +
                    "  " +
                    "    " +
                    "    " +
                    "  " +
                    "";

    private static final Logger LOG = LoggerFactory.getLogger(XHTMLtoPPTX.class);
        
    private PresentationMLPackage presentationMLPackage;
    private DocxRenderer renderer;
    
    private RelationshipsPart rp;
    private MainPresentationPart pp;
    private SlideLayoutPart layoutPart;
    
    private XHTMLtoPPTX(PresentationMLPackage pmlPackage, SlidePart slidePart, DocxRenderer renderer) throws Exception{
        this.presentationMLPackage= pmlPackage;
        this.renderer = renderer;
        
        pp = (MainPresentationPart)presentationMLPackage.getParts().getParts().get(new PartName("/ppt/presentation.xml"));     
        layoutPart = (SlideLayoutPart)presentationMLPackage.getParts().getParts().get(new PartName("/ppt/slideLayouts/slideLayout1.xml"));
        rp = slidePart.getRelationshipsPart();
    }

    /**
     * 
     * Convert the well formed XHTML contained in the string to a list of PML objects.
     * 
     * @param content
     * @param baseUrl
     * @param presentationMLPackage
     * @param slidePart 
     * @return
     */
    public static List convertSingleSlide(String content,  String baseUrl, 
    		PresentationMLPackage presentationMLPackage, SlidePart slidePart) throws Exception {
        DocxRenderer pptxRenderer = createRenderer(content, baseUrl);
        XHTMLtoPPTX importer = new XHTMLtoPPTX(presentationMLPackage, slidePart, pptxRenderer);
        return importer.traverse();
    }

    private static DocxRenderer createRenderer(String content, String baseUrl) {
        DocxRenderer pptxRenderer = new DocxRenderer();
        InputSource is = new InputSource(new BufferedReader(new StringReader(content)));
        Document dom = XMLResource.load(is).getDocument();
        
        pptxRenderer.setDocument(dom, baseUrl);
        pptxRenderer.layout();
        pptxRenderer.getRootBox().getLayer().getPages();
        
        return pptxRenderer;
    }

    private List traverse() throws Docx4JException, FileNotFoundException, JAXBException {
        return traverseChildren(renderer.getRootBox(), new TraversalSettings());
    }
    
    private List traverseChildren(BlockBox blockBox, TraversalSettings settings) throws Docx4JException, JAXBException {
    	
        List converted = new ArrayList();
        
        for(Object o : blockBox.getChildren()) {
        	
            converted.addAll(traversalResultToList(traverseChild((Box)o, settings)));
        }
        
        if (blockBox.getInlineContent() != null) {
        	// TODO review: looks like inline content will get added at end
            for (Object o : blockBox.getInlineContent()) {
                converted.addAll(traversalResultToList(tranverseInlineContent(o, settings)));
            }
        }
        return converted;
    }

    @SuppressWarnings("unchecked")
    private List traversalResultToList(Object switchNode) {
        if(switchNode == null) {
            return new ArrayList();
        } else if(switchNode instanceof List) {
            return (List) switchNode;
        } else {
            List list = new ArrayList();
            list.add(switchNode);
            return list;
        }
    }
    
    private Object traverseChild(Box box, TraversalSettings settings) throws Docx4JException, JAXBException {
        LOG.info(box.getClass().getName());
        if(box instanceof TableBox) {
        	settings.setParagraphShape(null);
            return processTable((TableBox) box, settings);
        } else if (box instanceof TableSectionBox) {
            // no support for table section in pptx, skipping to children
            return traverseChildren((TableSectionBox)box, settings);
        } else if (box instanceof TableRowBox) {
            return traverseTableRow((TableRowBox)box, settings);
        } else if (box instanceof TableCellBox) {
        	settings.setParagraphShape(null);
            return traverseTableCell((TableCellBox)box, settings);
        } else if (box instanceof AnonymousBlockBox) {
            return processAnonymousBlockBox((AnonymousBlockBox)box, settings);
        } else if (box instanceof BlockBox) {
            return traverseBlockBox((BlockBox)box, settings);
        } else {
            LOG.warn("TODO: " + box.getClass().getName() );        	
            return new ArrayList();
        }
    }

    private Object processTable(TableBox tableBox, TraversalSettings settings) throws JAXBException, Docx4JException {
        CTTable ctTable = DML_OBJECT_FACTORY.createCTTable();
        ctTable.setTblGrid(createCtTableGrid(tableBox));

        CTGraphicalObjectFrame graphicFrame = createTableGraphicFrame();
        graphicFrame.getGraphic().getGraphicData().getAny().add(DML_OBJECT_FACTORY.createTbl(ctTable));

        //traverse
        List children = traverseChildren(tableBox, settings);
        List tableRows = filterList(children, CTTableRow.class);
        ctTable.getTr().addAll(tableRows);
        if(tableRows.size() != children.size()) {
            LOG.warn("Some table data lost");
        }

        return graphicFrame;
    }
    
    private CTTableGrid createCtTableGrid(TableBox tableBox) {
        CTTableGrid ctTableGrid = DML_OBJECT_FACTORY.createCTTableGrid();
        int[] colPos = tableBox.getColumnPos();
        for (int col=1; col<=tableBox.numEffCols(); col++) {
            CTTableCol gridCol = DML_OBJECT_FACTORY.createCTTableCol();
            ctTableGrid.getGridCol().add(gridCol);
            gridCol.setW((colPos[col] - colPos[col - 1]) * 10000);
        }
        return ctTableGrid;
    }

    private CTGraphicalObjectFrame createTableGraphicFrame() {
        CTGraphicalObjectFrame graphicFrame = PML_OBJECT_FACTORY.createCTGraphicalObjectFrame();
        CTGraphicalObjectFrameNonVisual nvGraphicFramePr = PML_OBJECT_FACTORY.createCTGraphicalObjectFrameNonVisual();
        CTNonVisualDrawingProps cNvPr = DML_OBJECT_FACTORY.createCTNonVisualDrawingProps();
        CTNonVisualGraphicFrameProperties cNvGraphicFramePr = DML_OBJECT_FACTORY.createCTNonVisualGraphicFrameProperties();
        CTGraphicalObjectFrameLocking graphicFrameLocks = DML_OBJECT_FACTORY.createCTGraphicalObjectFrameLocking();
        CTTransform2D xfrm = DML_OBJECT_FACTORY.createCTTransform2D();
        Graphic graphic = DML_OBJECT_FACTORY.createGraphic();
        GraphicData graphicData = DML_OBJECT_FACTORY.createGraphicData();

        // Build the parent-child relationship of this slides.xml
        graphicFrame.setNvGraphicFramePr(nvGraphicFramePr);
        nvGraphicFramePr.setCNvPr(cNvPr);
        cNvPr.setName("1");
        nvGraphicFramePr.setCNvGraphicFramePr(cNvGraphicFramePr);
        cNvGraphicFramePr.setGraphicFrameLocks(graphicFrameLocks);
        graphicFrameLocks.setNoGrp(true);
        nvGraphicFramePr.setNvPr(PML_OBJECT_FACTORY.createNvPr());
        graphicFrame.setXfrm(xfrm);

        CTPositiveSize2D ext = DML_OBJECT_FACTORY.createCTPositiveSize2D();
        ext.setCx(6096000);
        ext.setCy(741680);

        xfrm.setExt(ext);

        CTPoint2D off = DML_OBJECT_FACTORY.createCTPoint2D();
        xfrm.setOff(off);
        off.setX(1524000);
        off.setY(1397000);

        graphicFrame.setGraphic(graphic);
        graphic.setGraphicData(graphicData);
        graphicData.setUri("http://schemas.openxmlformats.org/drawingml/2006/table");

        return graphicFrame;
    }

    private CTTableRow traverseTableRow(TableRowBox tableRowBox, TraversalSettings settings) throws Docx4JException, JAXBException {
        CTTableRow ctTableRow = DML_OBJECT_FACTORY.createCTTableRow();
        ctTableRow.setH(tableRowBox.getHeight() * 10000);

        //traverse
        List children = traverseChildren(tableRowBox, settings);
        List tableCells = filterList(children, CTTableCell.class);
        ctTableRow.getTc().addAll(tableCells);
        if(tableCells.size() != children.size()) {
            LOG.warn("Some table row lost");
        }

        return ctTableRow;
    }

    private CTTableCell traverseTableCell(TableCellBox tableCellBox, TraversalSettings settings) throws JAXBException, Docx4JException {
        // traverse
        settings.setInTableCell(true);
        List children = traverseChildren(tableCellBox, settings);
        settings.setInTableCell(false);
        List cellContent = filterList(children, CTTextParagraph.class);
        if(cellContent.size() != children.size()) {
            LOG.warn("Some table cell content lost");
        }

        return createTableCell(cellContent);
    }
    
    @SuppressWarnings("unchecked")
    private  List filterList(List input, Class clazz){
        ArrayList result = new ArrayList();
        for(Object o : input) {
            if(clazz.isInstance(o)) {
                result.add((T)o);
            }
        }
        return result;
    }

    private CTTableCell createTableCell(Collection children) throws JAXBException {
        String contents =
         "" +
         "  " +
         "    " +
         "    " +
         "  " +
         "";
        CTTableCell ctTableCell = (CTTableCell)XmlUtils.unmarshalString(contents, org.docx4j.jaxb.Context.jc, CTTableCell.class);
        ctTableCell.getTxBody().getP().addAll(children);
        return ctTableCell;
    }

    private Object processAnonymousBlockBox(AnonymousBlockBox anonymousBlockBox, TraversalSettings settings) throws Docx4JException, JAXBException {
        List children = traverseChildren(anonymousBlockBox, settings);
//        if(settings.isInTableCell()) {
//            return children;
//        } else {
//            return createParagraphShape(createParagraph(children));
//        }
        CTTextParagraph paragraph = createParagraph(children);
        if(settings.isInTableCell()) {
            return children; // or paragraph?
        } else {
            Shape paragraphShape = settings.getParagraphShape();
            if (paragraphShape==null) {
            	paragraphShape = createParagraphShape(paragraph);
            	settings.setParagraphShape(paragraphShape);
            } else {
            	// Add this a:p to existing p:txBody
                paragraphShape.getTxBody().getP().add(paragraph);            	
            }
            return paragraphShape;
        }
    }

    private Object traverseBlockBox(BlockBox blockBox, TraversalSettings settings) throws Docx4JException, JAXBException {
        Element e = blockBox.getElement();
        if (blockBox.getElement() == null) {
            return new ArrayList();
        } else if(isHtmlOrBody(e)) {
            return traverseChildren(blockBox, settings);
        } else if (isParagraph(e) || isHeading(e) ) {
            return processParagraph(blockBox, settings);
        } else if ( isListItem(e)) {
            return processParagraph(blockBox, settings);
            // TODO list numbering
        } else if(isList(e)) {
            return traverseChildren(blockBox, settings);
        } else {
            LOG.warn("TODO: " + e.getLocalName() );        	        	
            return new ArrayList();
        }
    }
    
    private Object tranverseInlineContent(Object o, TraversalSettings settings) {
        if (o instanceof InlineBox) {
            return traverseInlineBoxContent((InlineBox)o, settings);
        } else {
            LOG.debug("What to do with " + o.getClass().getName());
            return null;
        }
    }

    private Object processParagraph(BlockBox blockBox, TraversalSettings settings) throws JAXBException, Docx4JException {
        // traverse
        TraversalSettings localSettings = settings.clone();
        localSettings.setInTableCell(false);
        List children = traverseChildren(blockBox, localSettings);
        settings.setCssMap(null);

        CTTextParagraph paragraph = createParagraph(children);
        if(settings.isInTableCell()) {
            return paragraph;
        } else {
            Shape paragraphShape = settings.getParagraphShape();
            if (paragraphShape==null) {
            	paragraphShape = createParagraphShape(paragraph);
            	settings.setParagraphShape(paragraphShape);
            } else {
            	// Add this a:p to existing p:txBody
                paragraphShape.getTxBody().getP().add(paragraph);            	
            }
            return paragraphShape;
        }
    }

    private CTTextParagraph createParagraph(List children) {
        CTTextParagraph paragraph = DML_OBJECT_FACTORY.createCTTextParagraph();
        paragraph.getEGTextRun().addAll(children);
        return paragraph;
    }

    private Shape createParagraphShape(CTTextParagraph paragraph) throws JAXBException {
        Shape paragraphShape = (Shape) XmlUtils.unmarshalString(PARAGRAPH_SHAPE, org.pptx4j.jaxb.Context.jcPML, Shape.class);
        paragraphShape.getTxBody().getP().add(paragraph);
        return paragraphShape;
    }

    private Object traverseInlineBoxContent(InlineBox inlineBox, TraversalSettings settings) {
        Object content = processInlineBoxContent(inlineBox, settings);
        if(settings.isInTableCell()) {
            ArrayList contentList = new ArrayList();
            contentList.add(content);
            return createParagraph(contentList);
        } else {
            return content;
        }
    }

    private Object processInlineBoxContent(InlineBox inlineBox, TraversalSettings settings) {
        if(inlineBox.getElement() != null) {
            if(isHyperlink(inlineBox)) {
                return processHyperlink(inlineBox, settings);
            }
        }
        
        if (inlineBox.getTextNode() == null) {
            return processEmptyTextNode(inlineBox, settings);
        } else  {
            return processRegularTextNode(inlineBox, settings);
        }
    }
    
    private CTRegularTextRun processHyperlink(InlineBox inlineBox, TraversalSettings settings) {
        String hrefAttr = inlineBox.getElement().getAttribute("href");
        
        if(inlineBox.isStartsHere()) {
            settings.setHyperlink(hrefAttr);
        }
        
        CTRegularTextRun hyperlink = processRegularTextNode(inlineBox, settings);
        
        if(inlineBox.isEndsHere()) {
            settings.setHyperlink(null);
        }
        
        return hyperlink;
    }
     
    private CTTextLineBreak processEmptyTextNode(InlineBox inlineBox, TraversalSettings settings) {
        if (isLineBreak(inlineBox)) {
            return DML_OBJECT_FACTORY.createCTTextLineBreak();
        } else {
            LOG.debug("InlineBox has no text, so skipping");
            // TODO .. a span in a span? need to traverse?
            return null;
        }
    }

    private CTRegularTextRun processRegularTextNode(InlineBox inlineBox, TraversalSettings settings) {
        settings.setCssMap(getCascadedProperties(inlineBox.getStyle()));
        CTRegularTextRun run = DML_OBJECT_FACTORY.createCTRegularTextRun();
        run.setT(inlineBox.getTextNode().getTextContent());
        run.setRPr(createRunProperties(settings));
        return run;
    }

    private CTTextCharacterProperties createRunProperties(TraversalSettings settings) {
        CTTextCharacterProperties rPr = DML_OBJECT_FACTORY.createCTTextCharacterProperties();
        addStylingProperties(rPr, settings.getCssMap());
        if(settings.isHyperlinkTraversal()) {
            rPr.setHlinkClick(createHyperlink(settings.getHyperlink()));
        }
        return rPr;
    }
    
    private void addStylingProperties(CTTextCharacterProperties rPr, Map cssMap) {
        for (String cssName : cssMap.keySet()) {
            Property p = PropertyFactory.createPropertyFromCssName(cssName, cssMap.get(cssName));
            if (p != null) {
                if (p instanceof AbstractRunProperty) {             
                    ((AbstractRunProperty)p).set(rPr);
                } else {
                    LOG.debug("Unknown property " + p.getClass().getName());
                }
            }
        }
    }

    private CTHyperlink createHyperlink(String url) {
        Relationship relationship = addHyperlinkRelationship(url);
        CTHyperlink hyperlink = DML_OBJECT_FACTORY.createCTHyperlink();
        hyperlink.setId(relationship.getId());
        return hyperlink;
    }
    
    private Relationship addHyperlinkRelationship(String url) {
        Relationship rel = RELATIONSHIPS_FACTORY.createRelationship();
        rel.setType(Namespaces.HYPERLINK);
        rel.setTarget(url);
        rel.setTargetMode("External");  
        
        // addRelationship sets the rel's @Id
        rp.addRelationship(rel);
        
        return rel;
    }
    
    private Map getCascadedProperties(CalculatedStyle cs) {
        
        Map cssMap = new HashMap();
        
        FSDerivedValue[] derivedValues = cs.getDerivedValues();
        for (int i = 0; i < derivedValues.length; i++) {
                        
            CSSName name = CSSName.getByID(i);
            
            if (name.toString().startsWith("-fs")) continue;
                        
            FSDerivedValue val = cs.valueByName(name); // walks parents as necessary to get the value
            
            if (val != null && val instanceof DerivedValue) {    
                
                cssMap.put(name.toString(), ((DerivedValue)val).getCSSPrimitiveValue() );
                
            } else if (val != null && val instanceof IdentValue) {
                
                cssMap.put(name.toString(), ((IdentValue)val).getCSSPrimitiveValue() );

            } else  if (val!=null ) {
                
                LOG.debug("Skipping " +  name.toString() + " .. " + val.getClass().getName() );
            } else {
                LOG.debug("Skipping " +  name.toString() + " .. (null value)" );                
            }
        }
        
        return cssMap;
        
    }
    
    private boolean isLineBreak(InlineBox inlineBox) {
        return inlineBox.getElement().getNodeName().equals("br");
    }

    private boolean isHyperlink(InlineBox inlineBox) {
        return inlineBox.getElement().getNodeName().equals("a");
    }
    
    private boolean isHtmlOrBody(Element e) {
        return e.getNodeName().equals("html") || e.getNodeName().equals("body");
    }

    private boolean isParagraph(Element e) {
        return e.getNodeName().equals("p");
    }
    
    private boolean isHeading(Element e) {
        return e.getNodeName().equals("h1")
        		|| e.getNodeName().equals("h2")
        		|| e.getNodeName().equals("h3")
        		// TODO etc
        		;
    }
    
    private boolean isList(Element e) {
        return e.getNodeName().equals("ol") || e.getNodeName().equals("ul");
    }
    
    private boolean isListItem(Element e) {
        return e.getNodeName().equals("li");
    }

}