com.github.pjfanning.xlsx.impl.StreamingSheetReader Maven / Gradle / Ivy
The newest version!
package com.github.pjfanning.xlsx.impl;
import com.github.pjfanning.xlsx.CloseableIterator;
import com.github.pjfanning.xlsx.SharedFormula;
import com.github.pjfanning.xlsx.StreamingReader;
import com.github.pjfanning.xlsx.exceptions.CloseException;
import com.github.pjfanning.xlsx.exceptions.OpenException;
import com.github.pjfanning.xlsx.exceptions.ReadException;
import com.github.pjfanning.xlsx.impl.ooxml.HyperlinkData;
import org.apache.poi.ooxml.POIXMLException;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.openxml4j.opc.PackagePart;
import org.apache.poi.openxml4j.opc.PackageRelationship;
import org.apache.poi.openxml4j.opc.PackageRelationshipCollection;
import org.apache.poi.ss.usermodel.BuiltinFormats;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.ss.util.CellAddress;
import org.apache.poi.ss.util.CellRangeAddress;
import org.apache.poi.ss.util.PaneInformation;
import org.apache.poi.util.XMLHelper;
import org.apache.poi.xssf.model.Comments;
import org.apache.poi.xssf.model.SharedStrings;
import org.apache.poi.xssf.model.StylesTable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.poi.xssf.usermodel.XSSFCellStyle;
import org.apache.poi.xssf.usermodel.XSSFDrawing;
import org.apache.poi.xssf.usermodel.XSSFRelation;
import org.apache.poi.xssf.usermodel.XSSFShape;
import javax.xml.namespace.QName;
import javax.xml.stream.XMLEventReader;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.events.Attribute;
import javax.xml.stream.events.StartElement;
import java.io.IOException;
import java.util.*;
import static com.github.pjfanning.xlsx.impl.NumberUtil.parseInt;
public class StreamingSheetReader implements Iterable {
private static final Logger LOG = LoggerFactory.getLogger(StreamingSheetReader.class);
private static XMLInputFactory xmlInputFactory;
private final StreamingWorkbookReader streamingWorkbookReader;
private final PackagePart packagePart;
private final SharedStrings sst;
private final StylesTable stylesTable;
private final Comments commentsTable;
private final boolean use1904Dates;
private final int rowCacheSize;
private final Set hiddenColumns = new HashSet<>();
private final Map columnWidths = new HashMap<>();
private final Set mergedCells = new LinkedHashSet<>(); // use HashSet to prevent duplicates
private final List iterators = new ArrayList<>();
private final Set hyperlinks = new LinkedHashSet<>(); // use HashSet to prevent duplicates
private List xlsxHyperlinks;
private Map sharedFormulaMap;
private int firstRowNum;
private int lastRowNum;
private float defaultRowHeight;
private int baseColWidth = 8; //POI XSSFSheet default
private StreamingSheet sheet;
private CellAddress activeCell;
private PaneInformation pane;
StreamingSheetReader(final StreamingWorkbookReader streamingWorkbookReader,
final PackagePart packagePart,
final SharedStrings sst, final StylesTable stylesTable, final Comments commentsTable,
final boolean use1904Dates, final int rowCacheSize) {
this.streamingWorkbookReader = streamingWorkbookReader;
this.packagePart = packagePart;
this.sst = sst;
this.stylesTable = stylesTable;
this.commentsTable = commentsTable;
this.use1904Dates = use1904Dates;
this.rowCacheSize = rowCacheSize;
}
void setSheet(StreamingSheet sheet) {
this.sheet = sheet;
}
void removeIterator(StreamingRowIterator iterator) {
iterators.remove(iterator);
}
Map getSharedFormulaMap() {
if (getBuilder().readSharedFormulas()) {
if (sharedFormulaMap == null) {
return Collections.emptyMap();
}
return Collections.unmodifiableMap(sharedFormulaMap);
} else {
throw new IllegalStateException("The reading of shared formulas has been disabled. Enable using StreamingReader.Builder.");
}
}
void addSharedFormula(String siValue, SharedFormula sharedFormula) {
if (getBuilder().readSharedFormulas()) {
if (sharedFormulaMap == null) {
sharedFormulaMap = new HashMap<>();
}
sharedFormulaMap.put(siValue, sharedFormula);
}
}
SharedFormula removeSharedFormula(String siValue) {
if (sharedFormulaMap != null) {
return sharedFormulaMap.remove(siValue);
}
return null;
}
boolean isUse1904Dates() {
return use1904Dates;
}
float getDefaultRowHeight() {
return defaultRowHeight;
}
void setDefaultRowHeight(float defaultRowHeight) {
this.defaultRowHeight = defaultRowHeight;
}
int getBaseColWidth() {
return baseColWidth;
}
void setBaseColWidth(int baseColWidth) {
this.baseColWidth = baseColWidth;
}
/**
* Get the hidden state for a given column
*
* @param columnIndex - the column to set (0-based)
* @return hidden - false
if the column is visible
*/
boolean isColumnHidden(int columnIndex) {
if (iterators.isEmpty()) {
// create a new streaming iterator to parse sheet
iterator();
}
return hiddenColumns.contains(columnIndex);
}
float getColumnWidth(int columnIndex) {
if (iterators.isEmpty()) {
// create a new streaming iterator to parse sheet
iterator();
}
Float width = columnWidths.get(columnIndex);
return width == null ? getBaseColWidth() : width;
}
/**
* Gets the first row on the sheet
*/
int getFirstRowNum() {
if (iterators.isEmpty()) {
// create a new streaming iterator to parse sheet
iterator();
}
return firstRowNum;
}
void setFirstRowNum(int firstRowNum) {
this.firstRowNum = firstRowNum;
}
/**
* Gets the last row on the sheet
*/
int getLastRowNum() {
if (iterators.isEmpty()) {
// create a new streaming iterator to parse sheet
iterator();
}
return lastRowNum;
}
void setLastRowNum(int lastRowNum) {
this.lastRowNum = lastRowNum;
}
/**
* Read the numeric format string out of the styles table for this cell. Stores
* the result in the Cell.
*
* @param startElement
* @param cell
*/
void setFormatString(StartElement startElement, StreamingCell cell) {
Attribute cellStyle = startElement.getAttributeByName(new QName("s"));
String cellStyleString = (cellStyle != null) ? cellStyle.getValue() : null;
XSSFCellStyle style = null;
if (stylesTable != null) {
if(cellStyleString != null) {
style = stylesTable.getStyleAt(parseInt(cellStyleString));
} else if(stylesTable.getNumCellStyles() > 0) {
style = stylesTable.getStyleAt(0);
}
}
if(style != null) {
cell.setNumericFormatIndex(style.getDataFormat());
String formatString = style.getDataFormatString();
if(formatString != null) {
cell.setNumericFormat(formatString);
} else {
cell.setNumericFormat(BuiltinFormats.getBuiltinFormat(cell.getNumericFormatIndex()));
}
} else {
cell.setNumericFormatIndex(null);
cell.setNumericFormat(null);
}
}
CellAddress getActiveCell() {
return activeCell;
}
void setActiveCell(CellAddress activeCell) {
this.activeCell = activeCell;
}
PaneInformation getPane() {
if (iterators.isEmpty()) {
// create a new streaming iterator to parse sheet
iterator();
}
return pane;
}
void setPane(PaneInformation pane) {
this.pane = pane;
}
/**
* Returns a new streaming iterator to loop through rows. This iterator is not
* guaranteed to have all rows in memory, and any particular iteration may
* trigger a load from disk to read in new data.
*
* This is an iterator of the PHYSICAL rows.
* Meaning the 3rd element may not be the third row if say for instance the second row is undefined.
*
* This behaviour changed in v4.0.0. Earlier versions only created one iterator and repeated
* calls to this method just returned the same iterator. Creating multiple iterators will slow down
* your application and should be avoided unless necessary.
*
* @return the streaming iterator, an instance of {@link CloseableIterator} -
* it is recommended that you close the iterator when finished with it if you intend to keep the sheet open.
* @throws OpenException if there is an IO issue
* @throws ReadException if there is a parse issue
*/
@Override
public CloseableIterator iterator() throws OpenException, ReadException {
try {
//StreamingRowIterator requires a new XMLEventReader with a new InputStream to be provided to start from the
//beginning of the Sheet
XMLEventReader parser = getXmlInputFactory().createXMLEventReader(packagePart.getInputStream());
StreamingRowIterator iterator = new StreamingRowIterator(this,
sst, stylesTable, parser, use1904Dates, rowCacheSize, hiddenColumns, columnWidths, mergedCells, hyperlinks,
sharedFormulaMap, defaultRowHeight, sheet);
iterators.add(iterator);
return iterator;
} catch (IOException e) {
throw new OpenException("Failed to open stream", e);
} catch (XMLStreamException e) {
throw new ReadException("Unable to read row data", e);
}
}
/**
* @return the comments associated with this sheet (only if feature is enabled on the Builder)
* @throws IllegalStateException if {@link com.github.pjfanning.xlsx.StreamingReader.Builder#setReadComments(boolean)} is not set to true
*/
Comments getCellComments() {
if (!streamingWorkbookReader.getBuilder().readComments()) {
throw new IllegalStateException("getCellComments() only works if StreamingWorking.Builder setReadComments is set to true");
}
return this.commentsTable;
}
List getMergedCells() {
return new ArrayList<>(this.mergedCells);
}
XSSFDrawing getDrawingPatriarch() {
if (!streamingWorkbookReader.getBuilder().readShapes()) {
throw new IllegalStateException("getDrawingPatriarch() only works if StreamingWorking.Builder setReadShapes is set to true");
}
if (sheet != null) {
List shapes = streamingWorkbookReader.getShapes(sheet.getSheetName());
if (shapes != null) {
Iterator shapesIter = shapes.iterator();
while (shapesIter.hasNext()) {
return shapesIter.next().getDrawing();
}
}
}
return null;
}
public void close() throws CloseException {
try {
iterators.forEach(iter -> iter.close(false));
} finally {
// The sst instance is closed at the workbook level
if (commentsTable instanceof AutoCloseable) {
try {
((AutoCloseable) commentsTable).close();
} catch (Exception e) {
throw new CloseException("Failed to close CommentsTable", e);
}
}
}
}
StreamingReader.Builder getBuilder() {
return streamingWorkbookReader.getBuilder();
}
Workbook getWorkbook() {
return streamingWorkbookReader.getWorkbook();
}
/**
* @return the hyperlinks associated with this sheet (only if feature is enabled on the Builder)
* @throws IllegalStateException if {@link com.github.pjfanning.xlsx.StreamingReader.Builder#setReadHyperlinks(boolean)} is not set to true
*/
List getHyperlinks() {
if (!getBuilder().readHyperlinks()) {
throw new IllegalStateException("getHyperlinks() only works if StreamingWorking.Builder setReadHyperlinks is set to true");
}
initHyperlinks();
return xlsxHyperlinks;
}
private void initHyperlinks() {
if (xlsxHyperlinks == null || xlsxHyperlinks.isEmpty()) {
ArrayList links = new ArrayList<>();
try {
PackageRelationshipCollection hyperRels =
packagePart.getRelationshipsByType(XSSFRelation.SHEET_HYPERLINKS.getRelation());
// Turn each one into a XSSFHyperlink
for(HyperlinkData hyperlink : hyperlinks) {
PackageRelationship hyperRel = null;
if(hyperlink.getId() != null) {
hyperRel = hyperRels.getRelationshipByID(hyperlink.getId());
}
links.add( new XlsxHyperlink(hyperlink, hyperRel) );
}
} catch (InvalidFormatException e){
throw new POIXMLException(e);
}
xlsxHyperlinks = links;
}
}
private static XMLInputFactory getXmlInputFactory() {
if (xmlInputFactory == null) {
try {
xmlInputFactory = XMLHelper.newXMLInputFactory();
} catch (Exception e) {
LOG.error("Issue creating XMLInputFactory", e);
throw e;
}
}
return xmlInputFactory;
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy