Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
org.apache.poi.xwpf.converter.core.openxmlformats.OpenXMlFormatsVisitor Maven / Gradle / Ivy
package org.apache.poi.xwpf.converter.core.openxmlformats;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
import java.util.Stack;
import org.apache.poi.util.IOUtils;
import org.apache.poi.xwpf.converter.core.IImageExtractor;
import org.apache.poi.xwpf.converter.core.IMasterPageHandler;
import org.apache.poi.xwpf.converter.core.IXWPFMasterPage;
import org.apache.poi.xwpf.converter.core.ListItemContext;
import org.apache.poi.xwpf.converter.core.MasterPageManager;
import org.apache.poi.xwpf.converter.core.Options;
import org.apache.poi.xwpf.converter.core.styles.XWPFStylesDocument;
import org.apache.poi.xwpf.converter.core.utils.DxaUtil;
import org.apache.poi.xwpf.converter.core.utils.StringUtils;
import org.apache.poi.xwpf.converter.core.utils.XWPFRunHelper;
import org.apache.poi.xwpf.converter.core.utils.XWPFTableUtil;
import org.apache.xmlbeans.XmlCursor;
import org.apache.xmlbeans.XmlObject;
import org.apache.xmlbeans.XmlTokenSource;
import org.openxmlformats.schemas.drawingml.x2006.main.CTBlipFillProperties;
import org.openxmlformats.schemas.drawingml.x2006.main.CTGraphicalObject;
import org.openxmlformats.schemas.drawingml.x2006.main.CTGraphicalObjectData;
import org.openxmlformats.schemas.drawingml.x2006.picture.CTPicture;
import org.openxmlformats.schemas.drawingml.x2006.wordprocessingDrawing.CTAnchor;
import org.openxmlformats.schemas.drawingml.x2006.wordprocessingDrawing.CTInline;
import org.openxmlformats.schemas.drawingml.x2006.wordprocessingDrawing.CTPosH;
import org.openxmlformats.schemas.drawingml.x2006.wordprocessingDrawing.CTPosV;
import org.openxmlformats.schemas.drawingml.x2006.wordprocessingDrawing.CTWrapSquare;
import org.openxmlformats.schemas.drawingml.x2006.wordprocessingDrawing.STRelFromH;
import org.openxmlformats.schemas.drawingml.x2006.wordprocessingDrawing.STRelFromV;
import org.openxmlformats.schemas.drawingml.x2006.wordprocessingDrawing.STWrapText;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTBookmark;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTBr;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTDocument1;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTDrawing;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTEmpty;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTHdrFtr;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTHdrFtrRef;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTHyperlink;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTP;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTPTab;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTR;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTRow;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTRunTrackChange;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSdtBlock;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSdtCell;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSdtContentBlock;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSdtContentRun;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSdtRun;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSectPr;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSimpleField;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSmartTagRun;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTTabs;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTTbl;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTTc;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTText;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.FtrDocument;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.HdrDocument;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.STBrType;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.STFldCharType;
public abstract class OpenXMlFormatsVisitor
implements IMasterPageHandler
{
private final IOpenXMLFormatsPartProvider provider;
protected final O options;
private final CTDocument1 document;
private final MasterPageManager masterPageManager;
private CTHdrFtr currentHeader;
private CTHdrFtrRef currentHeaderRef;
private CTHdrFtr currentFooter;
private CTHdrFtrRef currentFooterRef;
private boolean pageBreakOnNextParagraph;
protected final XWPFStylesDocument stylesDocument;
private final Stack tables;
public OpenXMlFormatsVisitor( IOpenXMLFormatsPartProvider provider, O options )
throws Exception
{
this.provider = provider;
this.options = options;
this.tables = new Stack();
this.document = provider.getDocument();
this.stylesDocument = createStylesDocument( provider );
this.masterPageManager = new MasterPageManager( document, this );
}
protected XWPFStylesDocument createStylesDocument( IOpenXMLFormatsPartProvider provider )
throws Exception
{
return new XWPFStylesDocument( provider );
}
public XWPFStylesDocument getStylesDocument()
{
return stylesDocument;
}
public O getOptions()
{
return options;
}
public MasterPageManager getMasterPageManager()
{
return masterPageManager;
}
// ------------------------------ Start/End document visitor -----------
/**
* Main entry for visit XWPFDocument.
*
* @param out
* @throws Exception
*/
public void start()
throws Exception
{
// start document
T container = startVisitDocument();
// Create IText, XHTML element for each XWPF elements from the w:body
visitBodyElements( document, container );
// end document
endVisitDocument();
}
/**
* Start of visit document.
*
* @return
* @throws Exception
*/
protected abstract T startVisitDocument()
throws Exception;
/**
* End of visit document.
*
* @throws Exception
*/
protected abstract void endVisitDocument()
throws Exception;
// ------------------------------ OpenXMLFormats Elements visitor -----------
private void visitBodyElements( CTDocument1 document, T container )
throws Exception
{
visitBodyElements( document.getBody(), container );
}
protected void visitBodyElements( XmlTokenSource token, T container )
throws Exception
{
if ( !masterPageManager.isInitialized() )
{
// master page manager which hosts each <:w;sectPr declared in the word/document.xml
// must be initialized. The initialization loop for each
// rListAfterSeparate = null;
XmlCursor cursor = null;
try
{
cursor = paragraph.newCursor();
cursor.selectPath( "./*" );
while ( cursor.toNextSelection() )
{
XmlObject o = cursor.getObject();
if ( o instanceof CTR )
{
/*
* Test if it's :
*/
CTR r = (CTR) o;
STFldCharType.Enum fldCharType = XWPFRunHelper.getFldCharType( r );
if ( fldCharType != null )
{
if ( fldCharType.equals( STFldCharType.BEGIN ) )
{
process( paragraph, paragraphContainer, pageNumber, url, rListAfterSeparate );
fldCharTypeParsing = true;
rListAfterSeparate = new ArrayList();
pageNumber = false;
url = null;
}
else if ( fldCharType.equals( STFldCharType.END ) )
{
process( paragraph, paragraphContainer, pageNumber, url, rListAfterSeparate );
fldCharTypeParsing = false;
rListAfterSeparate = null;
pageNumber = false;
url = null;
}
}
else
{
if ( fldCharTypeParsing )
{
String instrText = XWPFRunHelper.getInstrText( r );
if ( instrText != null )
{
if ( StringUtils.isNotEmpty( instrText ) )
{
// test if it's PAGE
boolean instrTextPage = XWPFRunHelper.isInstrTextPage( instrText );
if ( !instrTextPage )
{
// test if it's HYPERLINK
// "http://code.google.com/p/xdocrepor"
String instrTextHyperlink = XWPFRunHelper.getInstrTextHyperlink( instrText );
if ( instrTextHyperlink != null )
{
url = instrTextHyperlink;
}
}
else
{
pageNumber = true;
}
}
}
else
{
rListAfterSeparate.add( r );
}
}
else
{
visitRun( r, paragraph, false, null, paragraphContainer );
}
}
}
else
{
if ( fldCharTypeParsing )
{
rListAfterSeparate.add( o );
}
else
{
visitRun( paragraph, o, paragraphContainer );
}
}
}
}
finally
{
if ( cursor != null )
{
cursor.dispose();
}
}
}
private void process( CTP paragraph, T paragraphContainer, boolean pageNumber, String url,
List rListAfterSeparate )
throws Exception
{
if ( rListAfterSeparate != null )
{
for ( XmlObject oAfterSeparate : rListAfterSeparate )
{
if ( oAfterSeparate instanceof CTR )
{
CTR ctr = (CTR) oAfterSeparate;
visitRun( ctr, paragraph, pageNumber, url, paragraphContainer );
}
else
{
visitRun( paragraph, oAfterSeparate, paragraphContainer );
}
}
}
}
private void visitRun( CTP paragraph, XmlObject o, T paragraphContainer )
throws Exception
{
if ( o instanceof CTHyperlink )
{
CTHyperlink link = (CTHyperlink) o;
String anchor = link.getAnchor();
String href = null;
// Test if the is an id for hyperlink
String hyperlinkId = link.getId();
if ( StringUtils.isNotEmpty( hyperlinkId ) )
{
// TODO
// XWPFHyperlink hyperlink = document.getHyperlinkByID( hyperlinkId );
// href = hyperlink.getURL();
}
for ( CTR r : link.getRList() )
{
visitRun( r, paragraph, false, href != null ? href : "#" + anchor, paragraphContainer );
}
}
else if ( o instanceof CTSdtRun )
{
CTSdtContentRun run = ( (CTSdtRun) o ).getSdtContent();
for ( CTR r : run.getRList() )
{
visitRun( r, paragraph, false, null, paragraphContainer );
}
}
else if ( o instanceof CTRunTrackChange )
{
for ( CTR r : ( (CTRunTrackChange) o ).getRList() )
{
visitRun( r, paragraph, false, null, paragraphContainer );
}
}
else if ( o instanceof CTSimpleField )
{
CTSimpleField simpleField = (CTSimpleField) o;
String instr = simpleField.getInstr();
// 1) test if it's page number
//
// - 1 -
boolean fieldPageNumber = XWPFRunHelper.isInstrTextPage( instr );
String fieldHref = null;
if ( !fieldPageNumber )
{
// not page number, test if it's hyperlink :
// HYPERLINK "http://code.google.com/p/xdocrepor"
fieldHref = XWPFRunHelper.getInstrTextHyperlink( instr );
}
for ( CTR r : simpleField.getRList() )
{
visitRun( r, paragraph, fieldPageNumber, fieldHref, paragraphContainer );
}
}
else if ( o instanceof CTSmartTagRun )
{
// Smart Tags can be nested many times.
// This implementation does not preserve the tagging information
// buildRunsInOrderFromXml(o);
}
else if ( o instanceof CTBookmark )
{
CTBookmark bookmark = (CTBookmark) o;
visitBookmark( bookmark, paragraph, paragraphContainer );
}
}
protected abstract T startVisitParagraph( CTP paragraph, ListItemContext itemContext, T container )
throws Exception;
protected abstract void pageBreak()
throws Exception;
protected void visitRun( CTR run, CTP paragraph, boolean pageNumber, String url, T paragraphContainer )
throws Exception
{
// Loop for each element of tcList = sdtCell.getSdtContent().getTcList();
for ( CTTc cell : tcList )
{
visitCell( cell, tableContainer );
}
}
}
}
}
finally
{
if ( cursor != null )
{
cursor.dispose();
}
}
}
protected abstract void startVisitTableRow( CTRow row, T tableContainer, boolean headerRow )
throws Exception;
protected void visitCell( CTTc cell, T tableContainer )
throws Exception
{
T tableCellContainer = startVisitTableCell( cell, tableContainer );
visitTableCellBody( cell, tableCellContainer );
endVisitTableCell( cell, tableContainer, tableCellContainer );
}
protected abstract T startVisitTableCell( CTTc cell, T tableContainer )
throws Exception;
protected void visitTableCellBody( CTTc cell, T tableCellContainer )
throws Exception
{
visitBodyElements( cell, tableCellContainer );
}
protected abstract void endVisitTableCell( CTTc cell, T tableContainer, T tableCellContainer )
throws Exception;
// ------------------------ Image --------------
protected void visitDrawing( CTDrawing drawing, T parentContainer )
throws Exception
{
List inlines = drawing.getInlineList();
for ( CTInline inline : inlines )
{
visitInline( inline, parentContainer );
}
List anchors = drawing.getAnchorList();
for ( CTAnchor anchor : anchors )
{
visitAnchor( anchor, parentContainer );
}
}
protected void visitAnchor( CTAnchor anchor, T parentContainer )
throws Exception
{
CTGraphicalObject graphic = anchor.getGraphic();
/*
* wp:positionH relativeFrom="column"> -898525
*/
STRelFromH.Enum relativeFromH = null;
Float offsetX = null;
CTPosH positionH = anchor.getPositionH();
if ( positionH != null )
{
relativeFromH = positionH.getRelativeFrom();
//offsetX = DxaUtil.emu2points( positionH.getPosOffset() );
}
STRelFromV.Enum relativeFromV = null;
Float offsetY = null;
CTPosV positionV = anchor.getPositionV();
if ( positionV != null )
{
relativeFromV = positionV.getRelativeFrom();
offsetY = DxaUtil.emu2points( positionV.getPosOffset() );
}
STWrapText.Enum wrapText = null;
CTWrapSquare wrapSquare = anchor.getWrapSquare();
if ( wrapSquare != null )
{
wrapText = wrapSquare.getWrapText();
}
visitGraphicalObject( parentContainer, graphic, offsetX, relativeFromH, offsetY, relativeFromV, wrapText );
}
protected void visitInline( CTInline inline, T parentContainer )
throws Exception
{
CTGraphicalObject graphic = inline.getGraphic();
visitGraphicalObject( parentContainer, graphic, null, null, null, null, null );
}
private void visitGraphicalObject( T parentContainer, CTGraphicalObject graphic, Float offsetX,
STRelFromH.Enum relativeFromH, Float offsetY, STRelFromV.Enum relativeFromV,
STWrapText.Enum wrapText )
throws Exception
{
if ( graphic != null )
{
CTGraphicalObjectData graphicData = graphic.getGraphicData();
if ( graphicData != null )
{
XmlCursor c = graphicData.newCursor();
c.selectPath( "./*" );
while ( c.toNextSelection() )
{
XmlObject o = c.getObject();
if ( o instanceof CTPicture )
{
CTPicture picture = (CTPicture) o;
// extract the picture if needed
IImageExtractor extractor = getImageExtractor();
if ( extractor != null )
{
/*
* XWPFPictureData pictureData = getPictureData( picture ); if ( pictureData != null ) { try
* { extractor.extract( WORD_MEDIA + pictureData.getFileName(), pictureData.getData() ); }
* catch ( Throwable e ) { LOGGER.log( Level.SEVERE, "Error while extracting the image " +
* pictureData.getFileName(), e ); } }
*/
}
// visit the picture.
visitPicture( picture, offsetX, relativeFromH, offsetY, relativeFromV, wrapText,
parentContainer );
}
}
c.dispose();
}
}
}
protected abstract void visitPicture( CTPicture picture,
Float offsetX,
org.openxmlformats.schemas.drawingml.x2006.wordprocessingDrawing.STRelFromH.Enum relativeFromH,
Float offsetY,
org.openxmlformats.schemas.drawingml.x2006.wordprocessingDrawing.STRelFromV.Enum relativeFromV,
STWrapText.Enum wrapText, T parentContainer )
throws Exception;
// ------------------------------ Header/Footer visitor -----------
public void visitHeaderRef( CTHdrFtrRef headerRef, CTSectPr sectPr, E masterPage )
throws Exception
{
this.currentHeaderRef = headerRef;
this.currentHeader = getHeader( headerRef );
visitHeader( currentHeader, headerRef, sectPr, masterPage );
this.currentHeader = null;
this.currentHeaderRef = null;
}
protected abstract void visitHeader( CTHdrFtr currentHeader, CTHdrFtrRef headerRef, CTSectPr sectPr, E masterPage )
throws Exception;
private CTHdrFtr getHeader( CTHdrFtrRef headerRef )
throws Exception
{
String relId = headerRef.getId();
HdrDocument hdrDoc = provider.getHdrDocumentByPartId( relId );
CTHdrFtr hdrFtr = hdrDoc.getHdr();
return hdrFtr;
}
public void visitFooterRef( CTHdrFtrRef footerRef, CTSectPr sectPr, E masterPage )
throws Exception
{
this.currentFooterRef = footerRef;
this.currentFooter = getFooter( footerRef );
visitFooter( currentFooter, footerRef, sectPr, masterPage );
this.currentFooter = null;
this.currentFooterRef = null;
}
protected abstract void visitFooter( CTHdrFtr currentFooter, CTHdrFtrRef footerRef, CTSectPr sectPr, E masterPage )
throws Exception;
private CTHdrFtr getFooter( CTHdrFtrRef footerRef )
throws Exception
{
String relId = footerRef.getId();
FtrDocument hdrDoc = provider.getFtrDocumentByPartId( relId );
CTHdrFtr hdrFtr = hdrDoc.getFtr();
return hdrFtr;
}
/**
* Returns the image extractor and null otherwise.
*
* @return
*/
protected IImageExtractor getImageExtractor()
{
return options.getExtractor();
}
/**
* Returns true if word/document.xml is parsing and false otherwise.
*
* @return true if word/document.xml is parsing and false otherwise.
*/
protected boolean isWordDocumentPartParsing()
{
return currentHeader == null && currentFooter == null;
}
public CTTbl getParentTable()
{
if ( tables.isEmpty() )
{
return null;
}
return tables.peek();
}
public byte[] getPictureBytes( CTPicture picture )
throws Exception
{
CTBlipFillProperties blipProps = picture.getBlipFill();
if ( blipProps == null || !blipProps.isSetBlip() )
{
// return null if Blip data is missing
return null;
}
String blipId = blipProps.getBlip().getEmbed();
InputStream in = provider.getInputStreamByRelId( getPartRelIdParsing(), blipId );
if ( in == null )
{
return null;
}
return IOUtils.toByteArray( in );
}
private String getPartRelIdParsing()
{
if ( currentHeaderRef != null )
{
return currentHeaderRef.getId();
}
if ( currentFooterRef != null )
{
return currentFooterRef.getId();
}
return null;
}
}