All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.docx4j.anon.DmlVmlAnalyzer Maven / Gradle / Ivy

package org.docx4j.anon;

import java.lang.reflect.Method;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;

import jakarta.xml.bind.JAXBContext;
import jakarta.xml.bind.JAXBElement;

import org.docx4j.TraversalUtil.CallbackImpl;
import org.docx4j.XmlUtils;
import org.docx4j.dml.CTBlip;
import org.docx4j.dml.CTHyperlink;
import org.docx4j.dml.CTNonVisualDrawingProps;
import org.docx4j.dml.diagram.CTDataModel;
import org.docx4j.jaxb.Context;
import org.docx4j.openpackaging.parts.JaxbXmlPart;
import org.docx4j.openpackaging.parts.Part;
import org.docx4j.openpackaging.parts.WordprocessingML.ImageBmpPart;
import org.docx4j.openpackaging.parts.WordprocessingML.ImageGifPart;
import org.docx4j.openpackaging.parts.WordprocessingML.ImageJpegPart;
import org.docx4j.openpackaging.parts.WordprocessingML.ImagePngPart;
import org.docx4j.openpackaging.parts.WordprocessingML.ImageTiffPart;
import org.docx4j.openpackaging.parts.relationships.Namespaces;
import org.docx4j.vml.CTImageData;
import org.docx4j.wml.CTObject;
import org.docx4j.wml.FldChar;
import org.docx4j.wml.Pict;
import org.docx4j.wml.SdtBlock;
import org.docx4j.wml.Text;
import org.jvnet.jaxb2_commons.ppp.Child;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * This will detect DrawingML or VML which does anything more than
 * link to a safe image (ie one we've replaced) 
 * 
 * @author jharrop
 *
 */
public class DmlVmlAnalyzer extends CallbackImpl {
	
	private static Logger log = LoggerFactory.getLogger(DmlVmlAnalyzer.class);
	
	private JaxbXmlPart sourcePart;
	public void setPart(JaxbXmlPart p) {
		this.sourcePart = p;
	}

	/**
	 * Objects we might not anonymise
	 */
	HashSet unsafeObjects = null;

	/**
	 * Objects it is interesting to note are present 
	 */
	HashSet inventoryObjects = null;

	HashSet fieldsPresent = null;
	
	
	boolean containsVML;
	
	public void reinit() {
		unsafeObjects = new HashSet();
		inventoryObjects = new HashSet();
		
		containsVML = false;
		
		fieldsPresent = new HashSet(); 
	}
	
	@Override
	public boolean shouldTraverse(Object o) {
		
		if (o instanceof org.docx4j.math.CTOMathPara) {
			// No effort is made to alter formula
			unsafeObjects.add(o);
			return false;
		}
		
		return true;
	}	
		

	@Override
	public List apply(Object o2) {
		
//		System.out.println(o.getClass().getName());
		
		if (o2 instanceof JAXBElement) {
			
			// record field instruction
			if (((JAXBElement)o2).getName().getLocalPart().equals("instrText")) {
				
				Text instr = (Text)XmlUtils.unwrap(o2);				
				fieldsPresent.add(instr.getValue());
				
				System.out.println(instr.getValue());
			}
		}

		Object o = XmlUtils.unwrap(o2);
		
		if (o instanceof org.docx4j.vml.CTImageData) {
			// remove its title
			((CTImageData)o).setTitle("foo");
			
			if ( ((CTImageData)o).getRelid()!=null ) {
            	
            	String rId = ((CTImageData)o).getRelid();
            	Part embeddedPart = sourcePart.getRelationshipsPart().getPart(rId);
            	if (embeddedPart instanceof ImagePngPart
    					|| embeddedPart instanceof ImageGifPart
    					|| embeddedPart instanceof ImageJpegPart
    					|| embeddedPart instanceof ImageBmpPart
    					|| embeddedPart instanceof ImageTiffPart	
    					// Others treated as unsafe
    					) {
            		// We've handled this
            		
            	} else {
            		// Unsafe, but noted elsewhere
            	}
			}
		} else if (o instanceof org.docx4j.math.CTOMathPara) {
			
			unsafeObjects.add(o.getClass().getName());
			
		} 
		
		return null;
	}

	
	public List getChildren(Object o2) {
					
		if (o2==null) {
			log.warn("null passed to getChildrenImpl");
			return null;
		}
		
		Object o = XmlUtils.unwrap(o2);

		log.debug("getting children of " + o.getClass().getName() );
		if (o instanceof org.docx4j.wml.Text) return null;
		
		// Short circuit for common elements
		if (o instanceof List) {
			// Handy if you have your own list of objects you wish to process
			return (List) o;
		} else if (o instanceof org.docx4j.wml.ContentAccessor) {
			return ((org.docx4j.wml.ContentAccessor) o).getContent();
		} else if (o instanceof org.docx4j.wml.SdtElement) {
			return ((org.docx4j.wml.SdtElement) o).getSdtContent().getContent();
			
		} else if (o instanceof org.docx4j.dml.wordprocessingDrawing.Anchor) {
			
			// Similar to wordprocessingDrawing.Inline below
			
			log.debug( sourcePart.getPartName().getName() + "\n"
					+ XmlUtils.marshaltoString(o, true, true, Context.jc, 
							"http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing", "anchor", o.getClass()));
			
            org.docx4j.dml.wordprocessingDrawing.Anchor anchor = (org.docx4j.dml.wordprocessingDrawing.Anchor) o;
            List artificialList = new ArrayList();
            CTNonVisualDrawingProps drawingProps = anchor.getDocPr();
            if (drawingProps != null) {
                handleCTNonVisualDrawingProps(drawingProps, artificialList);
            }
            if (anchor.getGraphic() == null) {
    			log.warn("TODO: Handle case of no a:graphic: " + XmlUtils.marshaltoString(o, true, true, Context.jc, 
						"http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing", "anchor", o.getClass()));
            } else {
                log.debug("found a:graphic");
                org.docx4j.dml.Graphic graphic = anchor.getGraphic();
                if (graphic.getGraphicData() == null) {
                	
        			log.warn("TODO: Handle case of no a:graphicData: " + XmlUtils.marshaltoString(o, true, true, Context.jc, "foo", "Inline", o.getClass()));    
                	
                } else {
                	List l = handleGraphicData(graphic.getGraphicData());
                	if (l!=null) {
                		artificialList.addAll(l);
                	}
                }
            }
            if (!artificialList.isEmpty())
                return artificialList;
            
        } else if (o instanceof org.docx4j.dml.wordprocessingDrawing.Inline) {
        	
        	// Done
			
			/*
				
				    
				    
				    
				    
				        
				    
				    
				        
                			 */
        	
            org.docx4j.dml.wordprocessingDrawing.Inline inline = (org.docx4j.dml.wordprocessingDrawing.Inline) o;
            
            List artificialList = new ArrayList();
            CTNonVisualDrawingProps drawingProps = inline.getDocPr();
            if (drawingProps != null) {
            	// handle 
              
              :
               artificialList = new ArrayList();
					artificialList.add(dmlPic.getBlipFill().getBlip());
					return artificialList;
			} else {
				return null;						
			}		
		} else if (o instanceof org.docx4j.dml.CTGvmlPicture) {  // Post 2.7.1
			
			log.warn("TODO: " + XmlUtils.marshaltoString(o));			
			
			org.docx4j.dml.CTGvmlPicture dmlPic = ((org.docx4j.dml.CTGvmlPicture)o);
			if (dmlPic.getBlipFill()!=null
					&& dmlPic.getBlipFill().getBlip()!=null) {
					log.debug("found DML Blip");
					List artificialList = new ArrayList();
					artificialList.add(dmlPic.getBlipFill().getBlip());
					return artificialList;
			} else {
				return null;						
			}		

		} else if (o instanceof org.docx4j.vml.CTShapetype ) {
			
			// NB, may not be triggered, depending on parent.
			
			/* eg
		          
		            
		            
		              
		              
              			 */
			
			containsVML = true;
			
			// Generally nothing sensitive here
			log.debug( XmlUtils.marshaltoString(o, true, true, Context.jc, 
					Namespaces.VML, "shapetype", o.getClass()));
			inventoryObjects.add(o);
			
			return null;
			
		} else if (o instanceof org.docx4j.vml.CTShape) {

			containsVML = true;
			
			log.debug(XmlUtils.marshaltoString(o));
			
//				return ((org.docx4j.vml.CTShape)o).getAny();
			List artificialList = new ArrayList();
			for (JAXBElement j : ((org.docx4j.vml.CTShape)o).getPathOrFormulasOrHandles() ) {
//					System.out.println(XmlUtils.unwrap(j).getClass().getName() );
				artificialList.add(j);				
			}
			return artificialList;
			
		} else if (o instanceof CTDataModel) {
			
			log.warn("TODO: " + XmlUtils.marshaltoString(o));
			
			CTDataModel dataModel = (CTDataModel)o;
			List artificialList = new ArrayList();
			// We're going to create a list merging two children ..			
			artificialList.addAll(dataModel.getPtLst().getPt());
			artificialList.addAll(dataModel.getCxnLst().getCxn());			
			return artificialList;
			
		} else if (o instanceof org.docx4j.dml.diagram2008.CTDrawing) {
			
			log.warn("TODO: " + XmlUtils.marshaltoString(o));
			
			return ((org.docx4j.dml.diagram2008.CTDrawing)o).getSpTree().getSpOrGrpSp();
			
		} else if (o instanceof org.docx4j.vml.CTTextbox) {		
			
			// We anon inside 
			/*
					
						
							 artificialList = new ArrayList();
			artificialList.addAll(ctObject.getAnyAndAny());
			if (ctObject.getControl()!=null) {
				artificialList.add(ctObject.getControl() ); // CTControl
			}
			return artificialList;
			
		} else if (o instanceof org.docx4j.dml.CTGvmlGroupShape) {
			
			log.warn("TODO: " + XmlUtils.marshaltoString(o));
			
			return ((org.docx4j.dml.CTGvmlGroupShape)o).getTxSpOrSpOrCxnSp();
			
		} else if(o instanceof FldChar) {

			// Interesting to analyse fields; we record instrText above
			
			FldChar fldChar = ((FldChar)o);
			List artificialList = new ArrayList();
			artificialList.add(fldChar.getFldCharType());
			if(fldChar.getFfData() != null) {
				artificialList.add(fldChar.getFfData());
			}
			if(fldChar.getFldData() != null) {
				artificialList.add(fldChar.getFldData());
			}
			if(fldChar.getNumberingChange() != null) {
				artificialList.add(fldChar.getNumberingChange());
			}
			return artificialList;
		}

		// OK, what is this? Use reflection ..
		// This should work for things including w:drawing
		log.debug(".. looking for method which returns list "  );
		try {
			Method[] methods = o.getClass().getDeclaredMethods();
			for (int i = 0; i)m.invoke(o);					
				}
			}
			
		} catch (Exception e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
		log.debug(".. no list member");
		return null;
	}
		
	private  List handleGraphicData(org.docx4j.dml.GraphicData graphicData) {
		
		/*
				        
				            
				                
				                    
				                    
				                        
				                    
				                
				                
				                    
				                    
				                    
				                        
				                    
				                		 */
		
        List tmpArtificialList = new ArrayList();
        if (graphicData.getPic() != null) {
            //GraphicData can have a hyperlink reference, which can be found this way
            CTNonVisualDrawingProps picNonVisual = graphicData.getPic().getNvPicPr().getCNvPr();
            if (picNonVisual != null) {
                handleCTNonVisualDrawingProps(picNonVisual, tmpArtificialList);
            }
        }
        // Its not graphicData.getAny() we're typically interested in
        if (graphicData.getPic() != null && graphicData.getPic().getBlipFill() != null
                && graphicData.getPic().getBlipFill().getBlip() != null) {
        	
            CTBlip blip = graphicData.getPic().getBlipFill().getBlip();
            if (blip.getLink()!=null) {
            	// Assume OK.  Either its on the public internet, or its inaccessible.
            	log.debug("blip contained a link .. assumed ok");
            } else if (blip.getEmbed()!=null ) {
            	
            	String rId = blip.getEmbed();
            	Part embeddedPart = sourcePart.getRelationshipsPart().getPart(rId);
            	if (embeddedPart instanceof ImagePngPart
    					|| embeddedPart instanceof ImageGifPart
    					|| embeddedPart instanceof ImageJpegPart
    					|| embeddedPart instanceof ImageBmpPart
    					|| embeddedPart instanceof ImageTiffPart	
    					// Others treated as unsafe
    					) {
            		
            		// We've handled this
            		
            	} else {
            		
            		// Unsafe, but noted elsewhere
            		
            	}
            	
            }
			
        	return null;
        } else {
        	
        	// Unsafe; Charts and other stuff is in here
        	addUnsafe(graphicData, "http://schemas.openxmlformats.org/drawingml/2006/main", "graphicData", org.docx4j.dml.GraphicData.class);
        	
            return graphicData.getAny();
        }
    }

	private void addUnsafe(Object o, 
			String uri, String local, Class declaredType)  {

		// For now, we'll use marshalled content
		unsafeObjects.add(XmlUtils.marshaltoString(o, true, true, Context.jc, 
				uri, local, declaredType));
	}
	
//	private void addUnsafe(Object o) {
//		
//		// For now, we'll use marshalled content
//		unsafeObjects.add(XmlUtils.marshaltoString(o));
//	}
	
	 
	/**
	 * There can be hyperlinks references in CTNonVisualDrawingProps.
	 * @param drawingProps
	 * @param artificialList
	 */
	private  void handleCTNonVisualDrawingProps(CTNonVisualDrawingProps drawingProps, List artificialList){
      if (drawingProps != null) {
    	  
		    // 
    	  
    	  if (drawingProps.getDescr()!=null) {
    		  drawingProps.setDescr(null);
    	  }
    	  // Name is probably ok
    	  
    	  
          CTHyperlink docPrHyperLink = drawingProps.getHlinkClick();
          if (docPrHyperLink != null)
              artificialList.add(docPrHyperLink);
      } 
	}	
	
    
}