org.apache.poi.xwpf.converter.core.XWPFDocumentVisitor Maven / Gradle / Ivy
/**
* Copyright (C) 2011-2015 The XDocReport Team
*
* All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.apache.poi.xwpf.converter.core;
import java.io.IOException;
import java.math.BigInteger;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.apache.poi.openxml4j.opc.PackagePart;
import org.apache.poi.xwpf.converter.core.styles.XWPFStylesDocument;
import org.apache.poi.xwpf.converter.core.utils.DxaUtil;
import org.apache.poi.xwpf.converter.core.utils.StringUtils;
import org.apache.poi.xwpf.converter.core.utils.XWPFRunHelper;
import org.apache.poi.xwpf.converter.core.utils.XWPFTableUtil;
import org.apache.poi.xwpf.usermodel.BodyElementType;
import org.apache.poi.xwpf.usermodel.BodyType;
import org.apache.poi.xwpf.usermodel.IBody;
import org.apache.poi.xwpf.usermodel.IBodyElement;
import org.apache.poi.xwpf.usermodel.XWPFAbstractNum;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.poi.xwpf.usermodel.XWPFFooter;
import org.apache.poi.xwpf.usermodel.XWPFHeader;
import org.apache.poi.xwpf.usermodel.XWPFHeaderFooter;
import org.apache.poi.xwpf.usermodel.XWPFHyperlink;
import org.apache.poi.xwpf.usermodel.XWPFHyperlinkRun;
import org.apache.poi.xwpf.usermodel.XWPFNum;
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
import org.apache.poi.xwpf.usermodel.XWPFPictureData;
import org.apache.poi.xwpf.usermodel.XWPFRun;
import org.apache.poi.xwpf.usermodel.XWPFStyle;
import org.apache.poi.xwpf.usermodel.XWPFTable;
import org.apache.poi.xwpf.usermodel.XWPFTableCell;
import org.apache.poi.xwpf.usermodel.XWPFTableRow;
import org.apache.xmlbeans.XmlCursor;
import org.apache.xmlbeans.XmlException;
import org.apache.xmlbeans.XmlObject;
import org.apache.xmlbeans.XmlTokenSource;
import org.openxmlformats.schemas.drawingml.x2006.main.CTGraphicalObject;
import org.openxmlformats.schemas.drawingml.x2006.main.CTGraphicalObjectData;
import org.openxmlformats.schemas.drawingml.x2006.picture.CTPicture;
import org.openxmlformats.schemas.drawingml.x2006.wordprocessingDrawing.CTAnchor;
import org.openxmlformats.schemas.drawingml.x2006.wordprocessingDrawing.CTInline;
import org.openxmlformats.schemas.drawingml.x2006.wordprocessingDrawing.CTPosH;
import org.openxmlformats.schemas.drawingml.x2006.wordprocessingDrawing.CTPosV;
import org.openxmlformats.schemas.drawingml.x2006.wordprocessingDrawing.CTWrapSquare;
import org.openxmlformats.schemas.drawingml.x2006.wordprocessingDrawing.STRelFromH;
import org.openxmlformats.schemas.drawingml.x2006.wordprocessingDrawing.STRelFromV;
import org.openxmlformats.schemas.drawingml.x2006.wordprocessingDrawing.STWrapText;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTBookmark;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTBr;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTDecimalNumber;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTDrawing;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTEmpty;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTHdrFtr;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTHdrFtrRef;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTHyperlink;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTLvl;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTNumPr;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTOnOff;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTP;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTPPr;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTPTab;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTR;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTRPr;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTRow;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTRunTrackChange;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSdtBlock;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSdtCell;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSdtContentBlock;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSdtContentRun;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSdtRun;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSectPr;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSimpleField;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSmartTagRun;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTString;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTStyle;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTTabs;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTTbl;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTTc;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTText;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.FtrDocument;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.HdrDocument;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.STBrType;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.STFldCharType;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.STMerge;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.STOnOff;
/**
* Visitor to visit elements from entry word/document.xml, word/header*.xml, word/footer*.xml
*
* @param
* @param
* @param
*/
public abstract class XWPFDocumentVisitor
implements IMasterPageHandler
{
private static final Logger LOGGER = Logger.getLogger( XWPFDocumentVisitor.class.getName() );
protected static final String WORD_MEDIA = "word/media/";
protected final XWPFDocument document;
private final MasterPageManager masterPageManager;
private XWPFHeader currentHeader;
private XWPFFooter currentFooter;
protected final XWPFStylesDocument stylesDocument;
protected final O options;
private boolean pageBreakOnNextParagraph;
protected boolean processingTotalPageCountField = false;
protected boolean totalPageFieldUsed = false;
/**
* Map of w:numId and ListContext
*/
private Map listContextMap;
public XWPFDocumentVisitor( XWPFDocument document, O options )
throws Exception
{
this.document = document;
this.options = options;
this.stylesDocument = createStylesDocument( document );
this.masterPageManager = new MasterPageManager( document.getDocument(), this );
}
protected XWPFStylesDocument createStylesDocument( XWPFDocument document )
throws XmlException, IOException
{
return new XWPFStylesDocument( document );
}
public XWPFStylesDocument getStylesDocument()
{
return stylesDocument;
}
public O getOptions()
{
return options;
}
public MasterPageManager getMasterPageManager()
{
return masterPageManager;
}
// ------------------------------ Start/End document visitor -----------
/**
* Main entry for visit XWPFDocument.
*
* @param out
* @throws Exception
*/
public void start()
throws Exception
{
// start document
T container = startVisitDocument();
// Create IText, XHTML element for each XWPF elements from the w:body
List bodyElements = document.getBodyElements();
visitBodyElements( bodyElements, container );
// end document
endVisitDocument();
}
/**
* Start of visit document.
*
* @return
* @throws Exception
*/
protected abstract T startVisitDocument()
throws Exception;
/**
* End of visit document.
*
* @throws Exception
*/
protected abstract void endVisitDocument()
throws Exception;
// ------------------------------ XWPF Elements visitor -----------
protected void visitBodyElements( List bodyElements, T container )
throws Exception
{
if ( !masterPageManager.isInitialized() )
{
// master page manager which hosts each <:w;sectPr declared in the word/document.xml
// must be initialized. The initialization loop for each
//
// -
//
// -
//
//
//
// get numbering.xml/w:num
/**
*
*/
XWPFNum num = getXWPFNum( numPr );
if ( num != null )
{
// get the abstractNum by usisng abstractNumId
/**
* -
* -
* -
*/
XWPFAbstractNum abstractNum = getXWPFAbstractNum( num );
// get the
*/
CTDecimalNumber ilvl = numPr.getIlvl();
int level = ilvl != null ? ilvl.getVal().intValue() : 0;
CTLvl lvl = abstractNum.getAbstractNum().getLvlArray( level );
if ( lvl != null )
{
ListContext listContext = getListContext( originalNumPr.getNumId().getVal().intValue() );
itemContext = listContext.addItem( lvl );
}
}
}
T paragraphContainer = startVisitParagraph( paragraph, itemContext, container );
visitParagraphBody( paragraph, index, paragraphContainer );
endVisitParagraph( paragraph, container, paragraphContainer );
}
private CTNumPr getNumPr( CTNumPr numPr )
{
if ( numPr != null )
{
XWPFNum num = getXWPFNum( numPr );
if ( num != null )
{
// get the abstractNum by usisng abstractNumId
/**
* -
* -
* -
*/
XWPFAbstractNum abstractNum = getXWPFAbstractNum( num );
CTString numStyleLink = abstractNum.getAbstractNum().getNumStyleLink();
String styleId = numStyleLink != null ? numStyleLink.getVal() : null;
if ( styleId != null )
{
// has w:numStyleLink which reference other style
/*
*
*
*/
CTStyle style = stylesDocument.getStyle( styleId );
CTPPr ppr = style.getPPr();
if ( ppr == null )
{
return null;
}
return getNumPr( ppr.getNumPr() );
}
}
}
return numPr;
}
private ListContext getListContext( int numId )
{
if ( listContextMap == null )
{
listContextMap = new HashMap();
}
ListContext listContext = listContextMap.get( numId );
if ( listContext == null )
{
listContext = new ListContext();
listContextMap.put( numId, listContext );
}
return listContext;
}
protected abstract T startVisitParagraph( XWPFParagraph paragraph, ListItemContext itemContext, T parentContainer )
throws Exception;
protected abstract void endVisitParagraph( XWPFParagraph paragraph, T parentContainer, T paragraphContainer )
throws Exception;
protected void visitParagraphBody( XWPFParagraph paragraph, int index, T paragraphContainer )
throws Exception
{
List runs = paragraph.getRuns();
if ( runs.isEmpty() )
{
// a new line must be generated if :
// - there is next paragraph/table
// - if the body is a cell (with none vMerge) and contains just this paragraph
if ( isAddNewLine( paragraph, index ) )
{
visitEmptyRun( paragraphContainer );
}
// sometimes, POI tells that run is empty
// but it can be have w:r in the w:pPr
// => See the header1.xml of DocxBig.docx ,
// => test if it exist w:r
// CTP p = paragraph.getCTP();
// CTPPr pPr = p.getPPr();
// if (pPr != null) {
// XmlObject[] wRuns =
// pPr.selectPath("declare namespace w='http://schemas.openxmlformats.org/wordprocessingml/2006/main' .//w:r");
// if (wRuns != null) {
// for ( int i = 0; i < wRuns.length; i++ )
// {
// XmlObject o = wRuns[i];
// o.getDomNode().getParentNode()
// if (o instanceof CTR) {
// System.err.println(wRuns[i]);
// }
//
// }
// }
// }
// //XmlObject[] t =
// o.selectPath("declare namespace w='http://schemas.openxmlformats.org/wordprocessingml/2006/main' .//w:t");
// //paragraph.getCTP().get
}
else
{
// Loop for each element of run) must generate new line and false otherwise.
*
* @param paragraph
* @param index
* @return
*/
private boolean isAddNewLine( XWPFParagraph paragraph, int index )
{
// a new line must be generated if :
// - there is next paragraph/table
// - if the body is a cell (with none vMerge) and contains just this paragraph
IBody body = paragraph.getBody();
List bodyElements = body.getBodyElements();
if ( body.getPartType() == BodyType.TABLECELL && bodyElements.size() == 1 )
{
XWPFTableCell cell = (XWPFTableCell) body;
STMerge.Enum vMerge = stylesDocument.getTableCellVMerge( cell );
if ( vMerge != null && vMerge.equals( STMerge.CONTINUE ) )
{
// here a new line must not be generated because the body is a cell (with none vMerge) and contains just
// this paragraph
return false;
}
// Loop for each cell of the row : if all cells are empty, new line must be generated otherwise none empty
// line must be generated.
XWPFTableRow row = cell.getTableRow();
List cells = row.getTableCells();
for ( XWPFTableCell c : cells )
{
if ( c.getBodyElements().size() != 1 )
{
return false;
}
IBodyElement element = c.getBodyElements().get( 0 );
if ( element.getElementType() != BodyElementType.PARAGRAPH )
{
return false;
}
return ( (XWPFParagraph) element ).getRuns().size() == 0;
}
return true;
}
// here a new line must be generated if there is next paragraph/table
return bodyElements.size() > index + 1;
}
private void visitRuns( XWPFParagraph paragraph, T paragraphContainer )
throws Exception
{
boolean fldCharTypeParsing = false;
boolean pageNumber = false;
String url = null;
List rListAfterSeparate = null;
CTP ctp = paragraph.getCTP();
XmlCursor c = ctp.newCursor();
c.selectPath( "child::*" );
while ( c.toNextSelection() )
{
XmlObject o = c.getObject();
if ( o instanceof CTR )
{
/*
* Test if it's :
*/
CTR r = (CTR) o;
STFldCharType.Enum fldCharType = XWPFRunHelper.getFldCharType( r );
if ( fldCharType != null )
{
if ( fldCharType.equals( STFldCharType.BEGIN ) )
{
process( paragraph, paragraphContainer, pageNumber, url, rListAfterSeparate );
fldCharTypeParsing = true;
rListAfterSeparate = new ArrayList();
pageNumber = false;
url = null;
}
else if ( fldCharType.equals( STFldCharType.END ) )
{
process( paragraph, paragraphContainer, pageNumber, url, rListAfterSeparate );
fldCharTypeParsing = false;
rListAfterSeparate = null;
pageNumber = false;
processingTotalPageCountField = false;
url = null;
}
}
else
{
if ( fldCharTypeParsing )
{
String instrText = XWPFRunHelper.getInstrText( r );
if ( instrText != null )
{
if ( StringUtils.isNotEmpty( instrText ) )
{
// test if it's PAGE
boolean instrTextPage = XWPFRunHelper.isInstrTextPage( instrText );
if ( !instrTextPage )
{
// test if it's NUMPAGES
processingTotalPageCountField = XWPFRunHelper.isInstrTextNumpages( instrText );
if(!totalPageFieldUsed){
totalPageFieldUsed = true;
}
// test if it's HYPERLINK
// "http://code.google.com/p/xdocrepor"
String instrTextHyperlink = XWPFRunHelper.getInstrTextHyperlink( instrText );
if ( instrTextHyperlink != null )
{
url = instrTextHyperlink;
}
}
else
{
pageNumber = true;
}
}
}
else
{
rListAfterSeparate.add( r );
}
}
else
{
XWPFRun run = new XWPFRun( r, paragraph );
visitRun( run, false, null, paragraphContainer );
}
}
}
else
{
if ( fldCharTypeParsing )
{
rListAfterSeparate.add( o );
}
else
{
visitRun( paragraph, o, paragraphContainer );
}
}
}
c.dispose();
process( paragraph, paragraphContainer, pageNumber, url, rListAfterSeparate );
fldCharTypeParsing = false;
rListAfterSeparate = null;
pageNumber = false;
url = null;
}
private void process( XWPFParagraph paragraph, T paragraphContainer, boolean pageNumber, String url,
List rListAfterSeparate )
throws Exception
{
if ( rListAfterSeparate != null )
{
for ( XmlObject oAfterSeparate : rListAfterSeparate )
{
if ( oAfterSeparate instanceof CTR )
{
CTR ctr = (CTR) oAfterSeparate;
XWPFRun run = new XWPFRun( ctr, paragraph );
visitRun( run, pageNumber, url, paragraphContainer );
}
else
{
visitRun( paragraph, oAfterSeparate, paragraphContainer );
}
}
}
}
private void visitRun( XWPFParagraph paragraph, XmlObject o, T paragraphContainer )
throws Exception
{
if ( o instanceof CTHyperlink )
{
CTHyperlink link = (CTHyperlink) o;
String anchor = link.getAnchor();
String href = null;
// Test if the is an id for hyperlink
String hyperlinkId = link.getId();
if ( StringUtils.isNotEmpty( hyperlinkId ) )
{
XWPFHyperlink hyperlink = document.getHyperlinkByID( hyperlinkId );
href = hyperlink != null ? hyperlink.getURL() : null;
}
for ( CTR r : link.getRList() )
{
XWPFRun run = new XWPFHyperlinkRun( link, r, paragraph );
visitRun( run, false, href != null ? href : "#" + anchor, paragraphContainer );
}
}
else if ( o instanceof CTSdtRun )
{
CTSdtContentRun run = ( (CTSdtRun) o ).getSdtContent();
for ( CTR r : run.getRList() )
{
XWPFRun ru = new XWPFRun( r, paragraph );
visitRun( ru, false, null, paragraphContainer );
}
}
else if ( o instanceof CTRunTrackChange )
{
for ( CTR r : ( (CTRunTrackChange) o ).getRList() )
{
XWPFRun run = new XWPFRun( r, paragraph );
visitRun( run, false, null, paragraphContainer );
}
}
else if ( o instanceof CTSimpleField )
{
CTSimpleField simpleField = (CTSimpleField) o;
String instr = simpleField.getInstr();
// 1) test if it's page number
//
// - 1 -
boolean fieldPageNumber = XWPFRunHelper.isInstrTextPage( instr );
String fieldHref = null;
if ( !fieldPageNumber )
{
// not page number, test if it's hyperlink :
// HYPERLINK "http://code.google.com/p/xdocrepor"
fieldHref = XWPFRunHelper.getInstrTextHyperlink( instr );
}
for ( CTR r : simpleField.getRList() )
{
XWPFRun run = new XWPFRun( r, paragraph );
visitRun( run, fieldPageNumber, fieldHref, paragraphContainer );
}
}
else if ( o instanceof CTSmartTagRun )
{
// Smart Tags can be nested many times.
// This implementation does not preserve the tagging information
// buildRunsInOrderFromXml(o);
}
else if ( o instanceof CTBookmark )
{
CTBookmark bookmark = (CTBookmark) o;
visitBookmark( bookmark, paragraph, paragraphContainer );
}
}
protected abstract void visitEmptyRun( T paragraphContainer )
throws Exception;
protected void visitRun( XWPFRun run, boolean pageNumber, String url, T paragraphContainer )
throws Exception
{
CTR ctr = run.getCTR();
CTRPr rPr = ctr.getRPr();
boolean hasTexStyles = rPr != null && (rPr.getHighlight() != null || rPr.getStrike() != null ||
rPr.getDstrike() != null || rPr.getVertAlign() != null ) ;
StringBuilder text = new StringBuilder();
// Loop for each element of rows = table.getRows();
int rowsSize = rows.size();
for ( int i = 0; i < rowsSize; i++ )
{
firstRow = ( i == 0 );
lastRow = isLastRow( i, rowsSize );
XWPFTableRow row = rows.get( i );
visitTableRow( row, colWidths, tableContainer, firstRow, lastRow, i, rowsSize );
}
}
private boolean isLastRow( int rowIndex, int rowsSize )
{
return rowIndex == rowsSize - 1;
}
protected abstract T startVisitTable( XWPFTable table, float[] colWidths, T tableContainer )
throws Exception;
protected abstract void endVisitTable( XWPFTable table, T parentContainer, T tableContainer )
throws Exception;
protected void visitTableRow( XWPFTableRow row, float[] colWidths, T tableContainer, boolean firstRow,
boolean lastRowIfNoneVMerge, int rowIndex, int rowsSize )
throws Exception
{
boolean headerRow = stylesDocument.isTableRowHeader( row );
startVisitTableRow( row, tableContainer, rowIndex, headerRow );
int nbColumns = colWidths.length;
// Process cell
boolean firstCol = true;
boolean lastCol = false;
boolean lastRow = false;
List vMergedCells = null;
List cells = row.getTableCells();
if ( nbColumns > cells.size() )
{
// Columns number is not equal to cells number.
// POI have a bug with
//
//
// ...
//
//
// <= this tc which is a XWPFTableCell is not included in the row.getTableCells();
firstCol = true;
int cellIndex = -1;
CTRow ctRow = row.getCtRow();
XmlCursor c = ctRow.newCursor();
c.selectPath( "./*" );
while ( c.toNextSelection() )
{
XmlObject o = c.getObject();
if ( o instanceof CTTc )
{
CTTc tc = (CTTc) o;
XWPFTableCell cell = row.getTableCell( tc );
cellIndex = getCellIndex( cellIndex, cell );
lastCol = ( cellIndex == nbColumns );
vMergedCells = getVMergedCells( cell, rowIndex, cellIndex );
if ( vMergedCells == null || vMergedCells.size() > 0 )
{
lastRow = isLastRow( lastRowIfNoneVMerge, rowIndex, rowsSize, vMergedCells );
visitCell( cell, tableContainer, firstRow, lastRow, firstCol, lastCol, rowIndex, cellIndex,
vMergedCells );
}
firstCol = false;
}
else if ( o instanceof CTSdtCell )
{
// Fix bug of POI
CTSdtCell sdtCell = (CTSdtCell) o;
List tcList = sdtCell.getSdtContent().getTcList();
for ( CTTc ctTc : tcList )
{
XWPFTableCell cell = new XWPFTableCell( ctTc, row, row.getTable().getBody() );
cellIndex = getCellIndex( cellIndex, cell );
lastCol = ( cellIndex == nbColumns );
List rowCells = row.getTableCells();
if (!rowCells.contains(cell))
{
rowCells.add(cell);
}
vMergedCells = getVMergedCells( cell, rowIndex, cellIndex );
if ( vMergedCells == null || vMergedCells.size() > 0 )
{
lastRow = isLastRow( lastRowIfNoneVMerge, rowIndex, rowsSize, vMergedCells );
visitCell( cell, tableContainer, firstRow, lastRow, firstCol, lastCol, rowIndex, cellIndex,
vMergedCells );
}
firstCol = false;
}
}
}
c.dispose();
}
else
{
// Column number is equal to cells number.
for ( int i = 0; i < cells.size(); i++ )
{
lastCol = ( i == cells.size() - 1 );
XWPFTableCell cell = cells.get( i );
vMergedCells = getVMergedCells( cell, rowIndex, i );
if ( vMergedCells == null || vMergedCells.size() > 0 )
{
lastRow = isLastRow( lastRowIfNoneVMerge, rowIndex, rowsSize, vMergedCells );
visitCell( cell, tableContainer, firstRow, lastRow, firstCol, lastCol, rowIndex, i, vMergedCells );
}
firstCol = false;
}
}
endVisitTableRow( row, tableContainer, firstRow, lastRow, headerRow );
}
private boolean isLastRow( boolean lastRowIfNoneVMerge, int rowIndex, int rowsSize, List vMergedCells )
{
if ( vMergedCells == null )
{
return lastRowIfNoneVMerge;
}
return isLastRow( rowIndex - 1 + vMergedCells.size(), rowsSize );
}
private int getCellIndex( int cellIndex, XWPFTableCell cell )
{
BigInteger gridSpan = stylesDocument.getTableCellGridSpan( cell.getCTTc().getTcPr() );
if ( gridSpan != null )
{
cellIndex = cellIndex + gridSpan.intValue();
}
else
{
cellIndex++;
}
return cellIndex;
}
protected void startVisitTableRow( XWPFTableRow row, T tableContainer, int rowIndex, boolean headerRow )
throws Exception
{
}
protected void endVisitTableRow( XWPFTableRow row, T tableContainer, boolean firstRow, boolean lastRow,
boolean headerRow )
throws Exception
{
}
protected void visitCell( XWPFTableCell cell, T tableContainer, boolean firstRow, boolean lastRow,
boolean firstCol, boolean lastCol, int rowIndex, int cellIndex,
List vMergedCells )
throws Exception
{
T tableCellContainer =
startVisitTableCell( cell, tableContainer, firstRow, lastRow, firstCol, lastCol, vMergedCells );
visitTableCellBody( cell, vMergedCells, tableCellContainer );
endVisitTableCell( cell, tableContainer, tableCellContainer );
}
private List getVMergedCells( XWPFTableCell cell, int rowIndex, int cellIndex )
{
List vMergedCells = null;
STMerge.Enum vMerge = stylesDocument.getTableCellVMerge( cell );
if ( vMerge != null )
{
if ( vMerge.equals( STMerge.RESTART ) )
{
// vMerge="restart"
// Loop for each table cell of each row upon vMerge="restart" was found or cell without vMerge
// was declared.
vMergedCells = new ArrayList();
vMergedCells.add( cell );
XWPFTableRow row = null;
XWPFTableCell c;
XWPFTable table = cell.getTableRow().getTable();
for ( int i = rowIndex + 1; i < table.getRows().size(); i++ )
{
row = table.getRow( i );
c = row.getCell( cellIndex );
if ( c == null )
{
break;
}
vMerge = stylesDocument.getTableCellVMerge( c );
if ( vMerge != null && vMerge.equals( STMerge.CONTINUE ) )
{
vMergedCells.add( c );
}
else
{
return vMergedCells;
}
}
}
else
{
// vMerge="continue", ignore the cell because it was already processed
return Collections.emptyList();
}
}
return vMergedCells;
}
protected void visitTableCellBody( XWPFTableCell cell, List vMergeCells, T tableCellContainer )
throws Exception
{
if ( vMergeCells != null )
{
for ( XWPFTableCell mergedCell : vMergeCells )
{
List bodyElements = mergedCell.getBodyElements();
visitBodyElements( bodyElements, tableCellContainer );
}
}
else
{
List bodyElements = cell.getBodyElements();
visitBodyElements( bodyElements, tableCellContainer );
}
}
protected abstract T startVisitTableCell( XWPFTableCell cell, T tableContainer, boolean firstRow, boolean lastRow,
boolean firstCol, boolean lastCol, List vMergeCells )
throws Exception;
protected abstract void endVisitTableCell( XWPFTableCell cell, T tableContainer, T tableCellContainer )
throws Exception;
protected XWPFStyle getXWPFStyle( String styleID )
{
if ( styleID == null )
return null;
else
return document.getStyles().getStyle( styleID );
}
/**
* Returns true if word/document.xml is parsing and false otherwise.
*
* @return true if word/document.xml is parsing and false otherwise.
*/
protected boolean isWordDocumentPartParsing()
{
return currentHeader == null && currentFooter == null;
}
// ------------------------------ Header/Footer visitor -----------
public void visitHeaderRef( CTHdrFtrRef headerRef, CTSectPr sectPr, E masterPage )
throws Exception
{
this.currentHeader = getXWPFHeader( headerRef );
visitHeader( currentHeader, headerRef, sectPr, masterPage );
this.currentHeader = null;
}
protected abstract void visitHeader( XWPFHeader header, CTHdrFtrRef headerRef, CTSectPr sectPr, E masterPage )
throws Exception;
public void visitFooterRef( CTHdrFtrRef footerRef, CTSectPr sectPr, E masterPage )
throws Exception
{
this.currentFooter = getXWPFFooter( footerRef );
visitFooter( currentFooter, footerRef, sectPr, masterPage );
this.currentFooter = null;
}
protected abstract void visitFooter( XWPFFooter footer, CTHdrFtrRef footerRef, CTSectPr sectPr, E masterPage )
throws Exception;
/**
* Returns the list of {@link IBodyElement} of the given header/footer. We do that because
* {@link XWPFHeaderFooter#getBodyElements()} doesn't contains the //
* getBodyElements( XWPFHeaderFooter part )
{
List bodyElements = new ArrayList();
XmlTokenSource headerFooter = part._getHdrFtr();
addBodyElements( headerFooter, part, bodyElements );
return bodyElements;
}
/**
* Add body elements from the given token source.
*
* @param source
* @param part
* @param bodyElements
*/
private void addBodyElements( XmlTokenSource source, IBody part, List bodyElements )
{
// parse the document with cursor and add
// the XmlObject to its lists
XmlCursor cursor = source.newCursor();
cursor.selectPath( "./*" );
while ( cursor.toNextSelection() )
{
XmlObject o = cursor.getObject();
if ( o instanceof CTSdtBlock )
{
// headers = document.getHeaderList();
for ( XWPFHeader header : headers )
{
if ( header.getPackagePart().equals( hdrPart ) )
{
// header is aleady loaded, return it.
return header;
}
}
// should never come, but load the header if needed.
HdrDocument hdrDoc = HdrDocument.Factory.parse( hdrPart.getInputStream() );
CTHdrFtr hdrFtr = hdrDoc.getHdr();
XWPFHeader hdr = new XWPFHeader( document, hdrFtr );
return hdr;
}
/**
* Returns the {@link XWPFFooter} of the given footer reference.
*
* @param footerRef the footer reference.
* @return
* @throws XmlException
* @throws IOException
*/
protected XWPFFooter getXWPFFooter( CTHdrFtrRef footerRef )
throws XmlException, IOException
{
PackagePart hdrPart = document.getPartById( footerRef.getId() );
List footers = document.getFooterList();
for ( XWPFFooter footer : footers )
{
if ( footer.getPackagePart().equals( hdrPart ) )
{
// footer is aleady loaded, return it.
return footer;
}
}
// should never come, but load the footer if needed.
FtrDocument hdrDoc = FtrDocument.Factory.parse( hdrPart.getInputStream() );
CTHdrFtr hdrFtr = hdrDoc.getFtr();
XWPFFooter ftr = new XWPFFooter( document, hdrFtr );
return ftr;
}
// ------------------------ Image --------------
protected void visitDrawing( CTDrawing drawing, T parentContainer )
throws Exception
{
List inlines = drawing.getInlineList();
for ( CTInline inline : inlines )
{
visitInline( inline, parentContainer );
}
List anchors = drawing.getAnchorList();
for ( CTAnchor anchor : anchors )
{
visitAnchor( anchor, parentContainer );
}
}
protected void visitAnchor( CTAnchor anchor, T parentContainer )
throws Exception
{
CTGraphicalObject graphic = anchor.getGraphic();
/*
* wp:positionH relativeFrom="column"> -898525
*/
STRelFromH.Enum relativeFromH = null;
Float offsetX = null;
CTPosH positionH = anchor.getPositionH();
if ( positionH != null )
{
relativeFromH = positionH.getRelativeFrom();
offsetX = DxaUtil.emu2points( positionH.getPosOffset() );
}
STRelFromV.Enum relativeFromV = null;
Float offsetY = null;
CTPosV positionV = anchor.getPositionV();
if ( positionV != null )
{
relativeFromV = positionV.getRelativeFrom();
offsetY = DxaUtil.emu2points( positionV.getPosOffset() );
}
STWrapText.Enum wrapText = null;
CTWrapSquare wrapSquare = anchor.getWrapSquare();
if ( wrapSquare != null )
{
wrapText = wrapSquare.getWrapText();
}
visitGraphicalObject( parentContainer, graphic, offsetX, relativeFromH, offsetY, relativeFromV, wrapText );
}
protected void visitInline( CTInline inline, T parentContainer )
throws Exception
{
CTGraphicalObject graphic = inline.getGraphic();
visitGraphicalObject( parentContainer, graphic, null, null, null, null, null );
}
private void visitGraphicalObject( T parentContainer, CTGraphicalObject graphic, Float offsetX,
STRelFromH.Enum relativeFromH, Float offsetY, STRelFromV.Enum relativeFromV,
STWrapText.Enum wrapText )
throws Exception
{
if ( graphic != null )
{
CTGraphicalObjectData graphicData = graphic.getGraphicData();
if ( graphicData != null )
{
XmlCursor c = graphicData.newCursor();
c.selectPath( "./*" );
while ( c.toNextSelection() )
{
XmlObject o = c.getObject();
if ( o instanceof CTPicture )
{
CTPicture picture = (CTPicture) o;
// extract the picture if needed
IImageExtractor extractor = getImageExtractor();
if ( extractor != null )
{
XWPFPictureData pictureData = getPictureData( picture );
if ( pictureData != null )
{
try
{
extractor.extract( WORD_MEDIA + pictureData.getFileName(), pictureData.getData() );
}
catch ( Throwable e )
{
LOGGER.log( Level.SEVERE,
"Error while extracting the image " + pictureData.getFileName(), e );
}
}
}
// visit the picture.
visitPicture( picture, offsetX, relativeFromH, offsetY, relativeFromV, wrapText,
parentContainer );
}
}
c.dispose();
}
}
}
/**
* Returns the picture data of the given image id.
*
* @param blipId
* @return
*/
protected XWPFPictureData getPictureDataByID( String blipId )
{
if ( currentHeader != null )
{
return currentHeader.getPictureDataByID( blipId );
}
if ( currentFooter != null )
{
return currentFooter.getPictureDataByID( blipId );
}
return document.getPictureDataByID( blipId );
}
/**
* Returns the image extractor and null otherwise.
*
* @return
*/
protected IImageExtractor getImageExtractor()
{
return options.getExtractor();
}
/**
* Returns the picture data of the given picture.
*
* @param picture
* @return
*/
public XWPFPictureData getPictureData( CTPicture picture )
{
String blipId = picture.getBlipFill().getBlip().getEmbed();
return getPictureDataByID( blipId );
}
protected abstract void visitPicture( CTPicture picture, Float offsetX, STRelFromH.Enum relativeFromH,
Float offsetY, STRelFromV.Enum relativeFromV, STWrapText.Enum wrapText,
T parentContainer )
throws Exception;
}