All Downloads are FREE. Search and download functionalities are using the official Maven repository.
Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
org.docx4j.anon.DmlVmlAnalyzer Maven / Gradle / Ivy
package org.docx4j.anon;
import java.lang.reflect.Method;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import jakarta.xml.bind.JAXBContext;
import jakarta.xml.bind.JAXBElement;
import org.docx4j.TraversalUtil.CallbackImpl;
import org.docx4j.XmlUtils;
import org.docx4j.dml.CTBlip;
import org.docx4j.dml.CTHyperlink;
import org.docx4j.dml.CTNonVisualDrawingProps;
import org.docx4j.dml.diagram.CTDataModel;
import org.docx4j.jaxb.Context;
import org.docx4j.openpackaging.parts.JaxbXmlPart;
import org.docx4j.openpackaging.parts.Part;
import org.docx4j.openpackaging.parts.WordprocessingML.ImageBmpPart;
import org.docx4j.openpackaging.parts.WordprocessingML.ImageGifPart;
import org.docx4j.openpackaging.parts.WordprocessingML.ImageJpegPart;
import org.docx4j.openpackaging.parts.WordprocessingML.ImagePngPart;
import org.docx4j.openpackaging.parts.WordprocessingML.ImageTiffPart;
import org.docx4j.openpackaging.parts.relationships.Namespaces;
import org.docx4j.vml.CTImageData;
import org.docx4j.wml.CTObject;
import org.docx4j.wml.FldChar;
import org.docx4j.wml.Pict;
import org.docx4j.wml.SdtBlock;
import org.docx4j.wml.Text;
import org.jvnet.jaxb2_commons.ppp.Child;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* This will detect DrawingML or VML which does anything more than
* link to a safe image (ie one we've replaced)
*
* @author jharrop
*
*/
public class DmlVmlAnalyzer extends CallbackImpl {
private static Logger log = LoggerFactory.getLogger(DmlVmlAnalyzer.class);
private JaxbXmlPart sourcePart;
public void setPart(JaxbXmlPart p) {
this.sourcePart = p;
}
/**
* Objects we might not anonymise
*/
HashSet unsafeObjects = null;
/**
* Objects it is interesting to note are present
*/
HashSet inventoryObjects = null;
HashSet fieldsPresent = null;
boolean containsVML;
public void reinit() {
unsafeObjects = new HashSet();
inventoryObjects = new HashSet();
containsVML = false;
fieldsPresent = new HashSet();
}
@Override
public boolean shouldTraverse(Object o) {
if (o instanceof org.docx4j.math.CTOMathPara) {
// No effort is made to alter formula
unsafeObjects.add(o);
return false;
}
return true;
}
@Override
public List apply(Object o2) {
// System.out.println(o.getClass().getName());
if (o2 instanceof JAXBElement) {
// record field instruction
if (((JAXBElement)o2).getName().getLocalPart().equals("instrText")) {
Text instr = (Text)XmlUtils.unwrap(o2);
fieldsPresent.add(instr.getValue());
System.out.println(instr.getValue());
}
}
Object o = XmlUtils.unwrap(o2);
if (o instanceof org.docx4j.vml.CTImageData) {
// remove its title
((CTImageData)o).setTitle("foo");
if ( ((CTImageData)o).getRelid()!=null ) {
String rId = ((CTImageData)o).getRelid();
Part embeddedPart = sourcePart.getRelationshipsPart().getPart(rId);
if (embeddedPart instanceof ImagePngPart
|| embeddedPart instanceof ImageGifPart
|| embeddedPart instanceof ImageJpegPart
|| embeddedPart instanceof ImageBmpPart
|| embeddedPart instanceof ImageTiffPart
// Others treated as unsafe
) {
// We've handled this
} else {
// Unsafe, but noted elsewhere
}
}
} else if (o instanceof org.docx4j.math.CTOMathPara) {
unsafeObjects.add(o.getClass().getName());
}
return null;
}
public List getChildren(Object o2) {
if (o2==null) {
log.warn("null passed to getChildrenImpl");
return null;
}
Object o = XmlUtils.unwrap(o2);
log.debug("getting children of " + o.getClass().getName() );
if (o instanceof org.docx4j.wml.Text) return null;
// Short circuit for common elements
if (o instanceof List) {
// Handy if you have your own list of objects you wish to process
return (List) o;
} else if (o instanceof org.docx4j.wml.ContentAccessor) {
return ((org.docx4j.wml.ContentAccessor) o).getContent();
} else if (o instanceof org.docx4j.wml.SdtElement) {
return ((org.docx4j.wml.SdtElement) o).getSdtContent().getContent();
} else if (o instanceof org.docx4j.dml.wordprocessingDrawing.Anchor) {
// Similar to wordprocessingDrawing.Inline below
log.debug( sourcePart.getPartName().getName() + "\n"
+ XmlUtils.marshaltoString(o, true, true, Context.jc,
"http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing", "anchor", o.getClass()));
org.docx4j.dml.wordprocessingDrawing.Anchor anchor = (org.docx4j.dml.wordprocessingDrawing.Anchor) o;
List artificialList = new ArrayList();
CTNonVisualDrawingProps drawingProps = anchor.getDocPr();
if (drawingProps != null) {
handleCTNonVisualDrawingProps(drawingProps, artificialList);
}
if (anchor.getGraphic() == null) {
log.warn("TODO: Handle case of no a:graphic: " + XmlUtils.marshaltoString(o, true, true, Context.jc,
"http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing", "anchor", o.getClass()));
} else {
log.debug("found a:graphic");
org.docx4j.dml.Graphic graphic = anchor.getGraphic();
if (graphic.getGraphicData() == null) {
log.warn("TODO: Handle case of no a:graphicData: " + XmlUtils.marshaltoString(o, true, true, Context.jc, "foo", "Inline", o.getClass()));
} else {
List l = handleGraphicData(graphic.getGraphicData());
if (l!=null) {
artificialList.addAll(l);
}
}
}
if (!artificialList.isEmpty())
return artificialList;
} else if (o instanceof org.docx4j.dml.wordprocessingDrawing.Inline) {
// Done
/*
*/
org.docx4j.dml.wordprocessingDrawing.Inline inline = (org.docx4j.dml.wordprocessingDrawing.Inline) o;
List artificialList = new ArrayList();
CTNonVisualDrawingProps drawingProps = inline.getDocPr();
if (drawingProps != null) {
// handle
:
artificialList = new ArrayList();
artificialList.add(dmlPic.getBlipFill().getBlip());
return artificialList;
} else {
return null;
}
} else if (o instanceof org.docx4j.dml.CTGvmlPicture) { // Post 2.7.1
log.warn("TODO: " + XmlUtils.marshaltoString(o));
org.docx4j.dml.CTGvmlPicture dmlPic = ((org.docx4j.dml.CTGvmlPicture)o);
if (dmlPic.getBlipFill()!=null
&& dmlPic.getBlipFill().getBlip()!=null) {
log.debug("found DML Blip");
List artificialList = new ArrayList();
artificialList.add(dmlPic.getBlipFill().getBlip());
return artificialList;
} else {
return null;
}
} else if (o instanceof org.docx4j.vml.CTShapetype ) {
// NB, may not be triggered, depending on parent.
/* eg
*/
containsVML = true;
// Generally nothing sensitive here
log.debug( XmlUtils.marshaltoString(o, true, true, Context.jc,
Namespaces.VML, "shapetype", o.getClass()));
inventoryObjects.add(o);
return null;
} else if (o instanceof org.docx4j.vml.CTShape) {
containsVML = true;
log.debug(XmlUtils.marshaltoString(o));
// return ((org.docx4j.vml.CTShape)o).getAny();
List artificialList = new ArrayList();
for (JAXBElement> j : ((org.docx4j.vml.CTShape)o).getPathOrFormulasOrHandles() ) {
// System.out.println(XmlUtils.unwrap(j).getClass().getName() );
artificialList.add(j);
}
return artificialList;
} else if (o instanceof CTDataModel) {
log.warn("TODO: " + XmlUtils.marshaltoString(o));
CTDataModel dataModel = (CTDataModel)o;
List artificialList = new ArrayList();
// We're going to create a list merging two children ..
artificialList.addAll(dataModel.getPtLst().getPt());
artificialList.addAll(dataModel.getCxnLst().getCxn());
return artificialList;
} else if (o instanceof org.docx4j.dml.diagram2008.CTDrawing) {
log.warn("TODO: " + XmlUtils.marshaltoString(o));
return ((org.docx4j.dml.diagram2008.CTDrawing)o).getSpTree().getSpOrGrpSp();
} else if (o instanceof org.docx4j.vml.CTTextbox) {
// We anon inside
/*
artificialList = new ArrayList();
artificialList.addAll(ctObject.getAnyAndAny());
if (ctObject.getControl()!=null) {
artificialList.add(ctObject.getControl() ); // CTControl
}
return artificialList;
} else if (o instanceof org.docx4j.dml.CTGvmlGroupShape) {
log.warn("TODO: " + XmlUtils.marshaltoString(o));
return ((org.docx4j.dml.CTGvmlGroupShape)o).getTxSpOrSpOrCxnSp();
} else if(o instanceof FldChar) {
// Interesting to analyse fields; we record instrText above
FldChar fldChar = ((FldChar)o);
List artificialList = new ArrayList();
artificialList.add(fldChar.getFldCharType());
if(fldChar.getFfData() != null) {
artificialList.add(fldChar.getFfData());
}
if(fldChar.getFldData() != null) {
artificialList.add(fldChar.getFldData());
}
if(fldChar.getNumberingChange() != null) {
artificialList.add(fldChar.getNumberingChange());
}
return artificialList;
}
// OK, what is this? Use reflection ..
// This should work for things including w:drawing
log.debug(".. looking for method which returns list " );
try {
Method[] methods = o.getClass().getDeclaredMethods();
for (int i = 0; i)m.invoke(o);
}
}
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
log.debug(".. no list member");
return null;
}
private List handleGraphicData(org.docx4j.dml.GraphicData graphicData) {
/*
*/
List tmpArtificialList = new ArrayList();
if (graphicData.getPic() != null) {
//GraphicData can have a hyperlink reference, which can be found this way
CTNonVisualDrawingProps picNonVisual = graphicData.getPic().getNvPicPr().getCNvPr();
if (picNonVisual != null) {
handleCTNonVisualDrawingProps(picNonVisual, tmpArtificialList);
}
}
// Its not graphicData.getAny() we're typically interested in
if (graphicData.getPic() != null && graphicData.getPic().getBlipFill() != null
&& graphicData.getPic().getBlipFill().getBlip() != null) {
CTBlip blip = graphicData.getPic().getBlipFill().getBlip();
if (blip.getLink()!=null) {
// Assume OK. Either its on the public internet, or its inaccessible.
log.debug("blip contained a link .. assumed ok");
} else if (blip.getEmbed()!=null ) {
String rId = blip.getEmbed();
Part embeddedPart = sourcePart.getRelationshipsPart().getPart(rId);
if (embeddedPart instanceof ImagePngPart
|| embeddedPart instanceof ImageGifPart
|| embeddedPart instanceof ImageJpegPart
|| embeddedPart instanceof ImageBmpPart
|| embeddedPart instanceof ImageTiffPart
// Others treated as unsafe
) {
// We've handled this
} else {
// Unsafe, but noted elsewhere
}
}
return null;
} else {
// Unsafe; Charts and other stuff is in here
addUnsafe(graphicData, "http://schemas.openxmlformats.org/drawingml/2006/main", "graphicData", org.docx4j.dml.GraphicData.class);
return graphicData.getAny();
}
}
private void addUnsafe(Object o,
String uri, String local, Class declaredType) {
// For now, we'll use marshalled content
unsafeObjects.add(XmlUtils.marshaltoString(o, true, true, Context.jc,
uri, local, declaredType));
}
// private void addUnsafe(Object o) {
//
// // For now, we'll use marshalled content
// unsafeObjects.add(XmlUtils.marshaltoString(o));
// }
/**
* There can be hyperlinks references in CTNonVisualDrawingProps.
* @param drawingProps
* @param artificialList
*/
private void handleCTNonVisualDrawingProps(CTNonVisualDrawingProps drawingProps, List artificialList){
if (drawingProps != null) {
//
if (drawingProps.getDescr()!=null) {
drawingProps.setDescr(null);
}
// Name is probably ok
CTHyperlink docPrHyperLink = drawingProps.getHlinkClick();
if (docPrHyperLink != null)
artificialList.add(docPrHyperLink);
}
}
}