Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
com.github.pjfanning.xlsx.impl.StreamingWorkbookReader Maven / Gradle / Ivy
package com.github.pjfanning.xlsx.impl;
import com.github.pjfanning.xlsx.SharedStringsImplementationType;
import com.github.pjfanning.xlsx.StreamingReader.Builder;
import com.github.pjfanning.xlsx.exceptions.ExcelRuntimeException;
import com.github.pjfanning.xlsx.exceptions.MissingSheetException;
import com.github.pjfanning.xlsx.exceptions.NotSupportedException;
import com.github.pjfanning.xlsx.exceptions.OpenException;
import com.github.pjfanning.xlsx.exceptions.ParseException;
import com.github.pjfanning.xlsx.exceptions.ReadException;
import com.github.pjfanning.xlsx.impl.ooxml.OoxmlStrictHelper;
import com.github.pjfanning.xlsx.impl.ooxml.OoxmlReader;
import org.apache.commons.io.IOUtils;
import org.apache.poi.UnsupportedFileFormatException;
import org.apache.poi.ooxml.POIXMLProperties;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.openxml4j.opc.PackagePart;
import org.apache.poi.poifs.crypt.Decryptor;
import org.apache.poi.poifs.crypt.EncryptionInfo;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.ss.usermodel.Date1904Support;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.xssf.model.*;
import org.apache.poi.xssf.usermodel.XSSFShape;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
import javax.xml.stream.XMLStreamException;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.security.GeneralSecurityException;
import java.util.*;
import static com.github.pjfanning.xlsx.XmlUtils.readDocument;
import static com.github.pjfanning.xlsx.XmlUtils.searchForNodeList;
public class StreamingWorkbookReader implements Iterable, Date1904Support, AutoCloseable {
private static final Logger log = LoggerFactory.getLogger(StreamingWorkbookReader.class);
private List sheets;
private final Map sheetMap = new HashMap<>();
private final List> sheetProperties = new ArrayList<>();
private final Map> shapeMap = new HashMap<>();
private final Builder builder;
private File tmp;
private OPCPackage pkg;
private SharedStrings sst;
private StylesTable styles;
private boolean use1904Dates = false;
private boolean strictFormat = false;
private StreamingWorkbook workbook = null;
private POIXMLProperties.CoreProperties coreProperties = null;
private OoxmlReader ooxmlReader;
public StreamingWorkbookReader(Builder builder) {
this.builder = builder;
}
/**
* Initializes the reader with the given input stream.
* @param is the input stream to read from
* @throws OpenException if an error occurs while opening the file
* @throws ReadException if an error occurs while reading the file
* @throws ParseException if an error occurs while parsing the file
*/
public void init(InputStream is) throws OpenException, ReadException, ParseException {
if (builder.avoidTempFiles()) {
try {
if(builder.getPassword() != null) {
POIFSFileSystem poifs = new POIFSFileSystem(is);
pkg = decryptWorkbook(poifs);
} else {
pkg = OPCPackage.open(is);
}
loadPackage(pkg);
} catch(SAXException e) {
IOUtils.closeQuietly(pkg);
throw new ParseException("Failed to parse stream", e);
} catch(IOException e) {
IOUtils.closeQuietly(pkg);
throw new OpenException("Failed to open stream", e);
} catch(GeneralSecurityException e) {
IOUtils.closeQuietly(pkg);
throw new ReadException("Unable to read workbook - Decryption failed", e);
} catch(OpenXML4JException | XMLStreamException | RuntimeException e) {
IOUtils.closeQuietly(pkg);
throw new ReadException("Unable to read workbook", e);
}
} else {
File f = null;
try {
f = TempFileUtil.writeInputStreamToFile(is, builder.getBufferSize());
if (log.isDebugEnabled()) {
log.debug("Created temp file [{}]", f.getAbsolutePath());
}
init(f);
tmp = f;
} catch(OpenException | ReadException e) {
if (f != null && !f.delete()) {
log.debug("failed to delete temp file");
}
throw e;
} catch(UnsupportedFileFormatException e) {
if (f != null && !f.delete()) {
log.debug("failed to delete temp file");
}
throw new ReadException("Unsupported File Format (only xlsx files are supported)", e);
} catch(IOException | RuntimeException e) {
if (f != null && !f.delete()) {
log.debug("failed to delete temp file");
}
throw new ReadException("Unable to read input stream", e);
}
}
}
/**
* Initializes the reader with the given input stream.
* @param f the file to read from
* @throws OpenException if an error occurs while opening the file
* @throws ReadException if an error occurs while reading the file
* @throws ParseException if an error occurs while parsing the file
*/
public void init(File f) throws OpenException, ReadException, ParseException {
try {
if(builder.getPassword() != null) {
POIFSFileSystem poifs = new POIFSFileSystem(f);
pkg = decryptWorkbook(poifs);
} else {
pkg = OPCPackage.open(f);
}
loadPackage(pkg);
} catch(SAXException e) {
IOUtils.closeQuietly(pkg);
throw new ParseException("Failed to parse file", e);
} catch(IOException e) {
IOUtils.closeQuietly(pkg);
throw new OpenException("Failed to open file", e);
} catch(UnsupportedFileFormatException e) {
IOUtils.closeQuietly(pkg);
throw new ReadException("Unsupported File Format (only xlsx files are supported)", e);
} catch(OpenXML4JException | XMLStreamException e) {
IOUtils.closeQuietly(pkg);
throw new ReadException("Unable to read workbook", e);
} catch(GeneralSecurityException e) {
IOUtils.closeQuietly(pkg);
throw new ReadException("Unable to read workbook - Decryption failed", e);
} catch(ExcelRuntimeException e) {
IOUtils.closeQuietly(pkg);
throw e;
} catch(RuntimeException e) {
IOUtils.closeQuietly(pkg);
throw new ReadException("Unable to read workbook", e);
}
}
private OPCPackage decryptWorkbook(POIFSFileSystem poifs) throws IOException, GeneralSecurityException, InvalidFormatException {
// Based on: https://poi.apache.org/encryption.html
EncryptionInfo info = new EncryptionInfo(poifs);
Decryptor d = Decryptor.getInstance(info);
d.verifyPassword(builder.getPassword());
return OPCPackage.open(d.getDataStream(poifs));
}
private void loadPackage(OPCPackage pkg) throws IOException, OpenXML4JException, SAXException, XMLStreamException {
strictFormat = pkg.isStrictOoxmlFormat();
ooxmlReader = new OoxmlReader(builder, pkg, strictFormat);
if (strictFormat) {
log.info("file is in strict OOXML format");
}
final Document workbookDoc = readDocument(ooxmlReader.getWorkbookData());
use1904Dates = WorkbookUtil.use1904Dates(workbookDoc);
lookupSheetNames(workbookDoc);
if (builder.getSharedStringsImplementationType() == SharedStringsImplementationType.TEMP_FILE_BACKED) {
log.info("Created sst cache file");
sst = PoiSharedStringsSupport.createTempFileSharedStringsTable(pkg, builder);
} else if (builder.getSharedStringsImplementationType() == SharedStringsImplementationType.CUSTOM_MAP_BACKED) {
sst = PoiSharedStringsSupport.createMapBackedSharedStringsTable(pkg, builder);
} else if (strictFormat) {
sst = OoxmlStrictHelper.getSharedStringsTable(builder, pkg);
} else {
sst = ooxmlReader.getSharedStrings(builder);
}
if (builder.readCoreProperties()) {
try {
final POIXMLProperties xmlProperties = new POIXMLProperties(pkg);
coreProperties = xmlProperties.getCoreProperties();
} catch (Exception e) {
log.warn("Failed to read coreProperties", e);
}
}
if (builder.readStyles()) {
if (strictFormat) {
ThemesTable themesTable = OoxmlStrictHelper.getThemesTable(builder, pkg);
styles = OoxmlStrictHelper.getStylesTable(builder, pkg);
if (styles != null) styles.setTheme(themesTable);
} else {
styles = ooxmlReader.getStylesTable();
}
}
}
void setWorkbook(StreamingWorkbook workbook) {
this.workbook = workbook;
workbook.setCoreProperties(coreProperties);
}
Workbook getWorkbook() {
return workbook;
}
private List loadSheets() {
final ArrayList sheetList = new ArrayList<>();
final int numSheets = ooxmlReader.getNumberOfSheets();
for(int i = 0; i < numSheets; i++) {
final StreamingSheet maybeSheet = sheetMap.get(i);
sheetList.add(maybeSheet == null ? createSheet(i) : maybeSheet);
}
sheetMap.clear();
return sheetList;
}
/**
* Returns the sheet at the given index (0 based).
* @param idx index (0 based)
* @return the sheet at the given index
* @throws MissingSheetException if a sheet at the given index does not exist
* @throws IOException should never be thrown
* @throws XMLStreamException should never be thrown
*/
public StreamingSheet getSheetAt(final int idx) throws MissingSheetException, IOException, XMLStreamException {
if (sheets != null && sheets.size() > idx) {
return sheets.get(idx);
} else {
StreamingSheet sheet = sheetMap.get(idx);
if (sheet == null) {
sheet = createSheet(idx);
sheetMap.put(idx, sheet);
}
return sheet;
}
}
/**
* Returns the sheet with the given name.
* @param name the name of the sheet to return
* @return the sheet with the given name
* @throws MissingSheetException if a sheet with the given name does not exist
* @throws IOException should never be thrown
* @throws XMLStreamException should never be thrown
*/
public StreamingSheet getSheet(final String name) throws MissingSheetException, IOException, XMLStreamException {
final int idx = ooxmlReader.getSheetIndex(name);
return getSheetAt(idx);
}
private StreamingSheet createSheet(final int idx) throws MissingSheetException {
final OoxmlReader.SheetData sheetData = ooxmlReader.getSheetDataAt(idx);
if (builder.readShapes()) {
shapeMap.put(sheetData.getSheetName(), sheetData.getShapes());
}
final PackagePart part = sheetData.getSheetPart();
if (part == null) {
throw new MissingSheetException("Sheet at index " + idx + " is missing");
}
return new StreamingSheet(
sheetProperties.get(idx).get("name"),
new StreamingSheetReader(this, part, sst, styles,
sheetData.getComments(), use1904Dates, builder.getRowCacheSize()));
}
private void lookupSheetNames(Document workbookDoc) {
sheetProperties.clear();
NodeList nl = searchForNodeList(workbookDoc, "/ss:workbook/ss:sheets/ss:sheet");
for(int i = 0; i < nl.getLength(); i++) {
Map props = new HashMap<>();
props.put("name", nl.item(i).getAttributes().getNamedItem("name").getTextContent());
Node state = nl.item(i).getAttributes().getNamedItem("state");
props.put("state", state == null ? "visible" : state.getTextContent());
sheetProperties.add(props);
}
}
List getSheets() throws XMLStreamException, IOException {
if (sheets == null) {
sheets = loadSheets();
}
return sheets;
}
public List> getSheetProperties() {
return sheetProperties;
}
@Override
public Iterator iterator() throws ReadException {
try {
return new StreamingSheetIterator(getSheets().iterator());
} catch (XMLStreamException|IOException e) {
throw new ReadException(e);
}
}
@Override
public Spliterator spliterator() throws ReadException {
try {
return Spliterators.spliterator(getSheets(), Spliterator.ORDERED);
} catch (XMLStreamException|IOException e) {
throw new ReadException(e);
}
}
/**
* {@inheritDoc}
*/
@Override
public boolean isDate1904() {
return use1904Dates;
}
@Override
public void close() throws IOException {
try {
if (sheets != null) {
for(StreamingSheet sheet : sheets) {
sheet.getReader().close();
}
}
} finally {
try {
pkg.revert();
pkg.close();
} finally {
if(tmp != null) {
if (log.isDebugEnabled()) {
log.debug("Deleting tmp file [{}]", tmp.getAbsolutePath());
}
if (!tmp.delete()) {
log.debug("Failed tp delete temp file");
}
}
if(sst instanceof AutoCloseable) {
try {
((AutoCloseable) sst).close();
} catch (Exception e) {
log.warn("Failed to close sst", e);
}
}
}
}
}
Builder getBuilder() {
return builder;
}
OPCPackage getOPCPackage() {
return pkg;
}
List getShapes(String sheetName) {
return shapeMap.get(sheetName);
}
static class StreamingSheetIterator implements Iterator {
private final Iterator iterator;
public StreamingSheetIterator(Iterator iterator) {
this.iterator = iterator;
}
@Override
public boolean hasNext() {
return iterator.hasNext();
}
@Override
public Sheet next() {
return iterator.next();
}
@Override
public void remove() {
throw new NotSupportedException();
}
}
}