All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.github.pjfanning.xlsx.impl.StreamingWorkbookReader Maven / Gradle / Ivy

The newest version!
package com.github.pjfanning.xlsx.impl;

import com.github.pjfanning.xlsx.SharedStringsImplementationType;
import com.github.pjfanning.xlsx.StreamingReader.Builder;
import com.github.pjfanning.xlsx.exceptions.ExcelRuntimeException;
import com.github.pjfanning.xlsx.exceptions.MissingSheetException;
import com.github.pjfanning.xlsx.exceptions.NotSupportedException;
import com.github.pjfanning.xlsx.exceptions.OpenException;
import com.github.pjfanning.xlsx.exceptions.ParseException;
import com.github.pjfanning.xlsx.exceptions.ReadException;
import com.github.pjfanning.xlsx.impl.ooxml.OoxmlStrictHelper;
import com.github.pjfanning.xlsx.impl.ooxml.OoxmlReader;
import org.apache.commons.io.IOUtils;
import org.apache.poi.UnsupportedFileFormatException;
import org.apache.poi.ooxml.POIXMLProperties;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.openxml4j.opc.PackagePart;
import org.apache.poi.poifs.crypt.Decryptor;
import org.apache.poi.poifs.crypt.EncryptionInfo;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.ss.usermodel.Date1904Support;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.xssf.model.*;
import org.apache.poi.xssf.usermodel.XSSFShape;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;

import javax.xml.stream.XMLStreamException;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.security.GeneralSecurityException;
import java.util.*;

import static com.github.pjfanning.xlsx.XmlUtils.readDocument;
import static com.github.pjfanning.xlsx.XmlUtils.searchForNodeList;

public class StreamingWorkbookReader implements Iterable, Date1904Support, AutoCloseable {
  private static final Logger log = LoggerFactory.getLogger(StreamingWorkbookReader.class);

  private List sheets;
  private final Map sheetMap = new HashMap<>();
  private final List> sheetProperties = new ArrayList<>();
  private final Map> shapeMap = new HashMap<>();
  private final Builder builder;
  private File tmp;
  private OPCPackage pkg;
  private SharedStrings sst;
  private StylesTable styles;
  private boolean use1904Dates = false;
  private boolean strictFormat = false;
  private StreamingWorkbook workbook = null;
  private POIXMLProperties.CoreProperties coreProperties = null;
  private OoxmlReader ooxmlReader;

  public StreamingWorkbookReader(Builder builder) {
    this.builder = builder;
  }

  /**
   * Initializes the reader with the given input stream.
   * @param is the input stream to read from
   * @throws OpenException if an error occurs while opening the file
   * @throws ReadException if an error occurs while reading the file
   * @throws ParseException if an error occurs while parsing the file
   */
  public void init(InputStream is) throws OpenException, ReadException, ParseException {
    if (builder.avoidTempFiles()) {
      try {
        if(builder.getPassword() != null) {
          POIFSFileSystem poifs = new POIFSFileSystem(is);
          pkg = decryptWorkbook(poifs);
        } else {
          pkg = OPCPackage.open(is);
        }
        loadPackage(pkg);
      } catch(SAXException e) {
        IOUtils.closeQuietly(pkg);
        throw new ParseException("Failed to parse stream", e);
      } catch(IOException e) {
        IOUtils.closeQuietly(pkg);
        throw new OpenException("Failed to open stream", e);
      } catch(GeneralSecurityException e) {
        IOUtils.closeQuietly(pkg);
        throw new ReadException("Unable to read workbook - Decryption failed", e);
      } catch(OpenXML4JException | XMLStreamException | RuntimeException e) {
        IOUtils.closeQuietly(pkg);
        throw new ReadException("Unable to read workbook", e);
      }
    } else {
      File f = null;
      try {
        f = TempFileUtil.writeInputStreamToFile(is, builder.getBufferSize());
        if (log.isDebugEnabled()) {
          log.debug("Created temp file [{}]", f.getAbsolutePath());
        }
        init(f);
        tmp = f;
      } catch(OpenException | ReadException e) {
        if (f != null && !f.delete()) {
          log.debug("failed to delete temp file");
        }
        throw e;
      } catch(UnsupportedFileFormatException e) {
        if (f != null && !f.delete()) {
          log.debug("failed to delete temp file");
        }
        throw new ReadException("Unsupported File Format (only xlsx files are supported)", e);
      } catch(IOException | RuntimeException e) {
        if (f != null && !f.delete()) {
          log.debug("failed to delete temp file");
        }
        throw new ReadException("Unable to read input stream", e);
      }
    }
  }

  /**
   * Initializes the reader with the given input stream.
   * @param f the file to read from
   * @throws OpenException if an error occurs while opening the file
   * @throws ReadException if an error occurs while reading the file
   * @throws ParseException if an error occurs while parsing the file
   */
  public void init(File f) throws OpenException, ReadException, ParseException {
    try {
      if(builder.getPassword() != null) {
        POIFSFileSystem poifs = new POIFSFileSystem(f);
        pkg = decryptWorkbook(poifs);
      } else {
        pkg = OPCPackage.open(f);
      }
      loadPackage(pkg);
    } catch(SAXException e) {
      IOUtils.closeQuietly(pkg);
      throw new ParseException("Failed to parse file", e);
    } catch(IOException e) {
      IOUtils.closeQuietly(pkg);
      throw new OpenException("Failed to open file", e);
    } catch(UnsupportedFileFormatException e) {
      IOUtils.closeQuietly(pkg);
      throw new ReadException("Unsupported File Format (only xlsx files are supported)", e);
    } catch(OpenXML4JException | XMLStreamException e) {
      IOUtils.closeQuietly(pkg);
      throw new ReadException("Unable to read workbook", e);
    } catch(GeneralSecurityException e) {
      IOUtils.closeQuietly(pkg);
      throw new ReadException("Unable to read workbook - Decryption failed", e);
    } catch(ExcelRuntimeException e) {
      IOUtils.closeQuietly(pkg);
      throw e;
    } catch(RuntimeException e) {
      IOUtils.closeQuietly(pkg);
      throw new ReadException("Unable to read workbook", e);
    }
  }

  private OPCPackage decryptWorkbook(POIFSFileSystem poifs) throws IOException, GeneralSecurityException, InvalidFormatException {
    // Based on: https://poi.apache.org/encryption.html
    EncryptionInfo info = new EncryptionInfo(poifs);
    Decryptor d = Decryptor.getInstance(info);
    d.verifyPassword(builder.getPassword());
    return OPCPackage.open(d.getDataStream(poifs));
  }

  private void loadPackage(OPCPackage pkg) throws IOException, OpenXML4JException, SAXException, XMLStreamException {
    strictFormat = pkg.isStrictOoxmlFormat();
    ooxmlReader = new OoxmlReader(builder, pkg, strictFormat);
    if (strictFormat) {
      log.info("file is in strict OOXML format");
    }

    final Document workbookDoc = readDocument(ooxmlReader.getWorkbookData());
    use1904Dates = WorkbookUtil.use1904Dates(workbookDoc);
    lookupSheetNames(workbookDoc);

    if (builder.getSharedStringsImplementationType() == SharedStringsImplementationType.TEMP_FILE_BACKED) {
      log.info("Created sst cache file");
      sst = PoiSharedStringsSupport.createTempFileSharedStringsTable(pkg, builder);
    } else if (builder.getSharedStringsImplementationType() == SharedStringsImplementationType.CUSTOM_MAP_BACKED) {
      sst = PoiSharedStringsSupport.createMapBackedSharedStringsTable(pkg, builder);
    } else if (strictFormat) {
      sst = OoxmlStrictHelper.getSharedStringsTable(builder, pkg);
    } else {
      sst = ooxmlReader.getSharedStrings(builder);
    }

    if (builder.readCoreProperties()) {
      try {
        final POIXMLProperties xmlProperties = new POIXMLProperties(pkg);
        coreProperties = xmlProperties.getCoreProperties();
      } catch (Exception e) {
        log.warn("Failed to read coreProperties", e);
      }
    }

    if (builder.readStyles()) {
      if (strictFormat) {
        ThemesTable themesTable = OoxmlStrictHelper.getThemesTable(builder, pkg);
        styles = OoxmlStrictHelper.getStylesTable(builder, pkg);
        if (styles != null) styles.setTheme(themesTable);
      } else {
        styles = ooxmlReader.getStylesTable();
      }
    }
  }

  void setWorkbook(StreamingWorkbook workbook) {
    this.workbook = workbook;
    workbook.setCoreProperties(coreProperties);
  }

  Workbook getWorkbook() {
    return workbook;
  }

  private List loadSheets() {
    final ArrayList sheetList = new ArrayList<>();
    final int numSheets = ooxmlReader.getNumberOfSheets();
    for(int i = 0; i < numSheets; i++) {
      final StreamingSheet maybeSheet = sheetMap.get(i);
      sheetList.add(maybeSheet == null ? createSheet(i) : maybeSheet);
    }
    sheetMap.clear();
    return sheetList;
  }

  /**
   * Returns the sheet at the given index (0 based).
   * @param idx index (0 based)
   * @return the sheet at the given index
   * @throws MissingSheetException if a sheet at the given index does not exist
   * @throws IOException should never be thrown
   * @throws XMLStreamException should never be thrown
   */
  public StreamingSheet getSheetAt(final int idx) throws MissingSheetException, IOException, XMLStreamException {
    if (sheets != null && sheets.size() > idx) {
      return sheets.get(idx);
    } else {
      StreamingSheet sheet = sheetMap.get(idx);
      if (sheet == null) {
        sheet = createSheet(idx);
        sheetMap.put(idx, sheet);
      }
      return sheet;
    }
  }

  /**
   * Returns the sheet with the given name.
   * @param name the name of the sheet to return
   * @return the sheet with the given name
   * @throws MissingSheetException if a sheet with the given name does not exist
   * @throws IOException should never be thrown
   * @throws XMLStreamException should never be thrown
   */
  public StreamingSheet getSheet(final String name) throws MissingSheetException, IOException, XMLStreamException {
    final int idx = ooxmlReader.getSheetIndex(name);
    return getSheetAt(idx);
  }

  private StreamingSheet createSheet(final int idx) throws MissingSheetException {
    final OoxmlReader.SheetData sheetData = ooxmlReader.getSheetDataAt(idx);
    if (builder.readShapes()) {
      shapeMap.put(sheetData.getSheetName(), sheetData.getShapes());
    }
    final PackagePart part = sheetData.getSheetPart();
    if (part == null) {
      throw new MissingSheetException("Sheet at index " + idx + " is missing");
    }
    return new StreamingSheet(
              sheetProperties.get(idx).get("name"),
              new StreamingSheetReader(this, part, sst, styles,
                      sheetData.getComments(), use1904Dates, builder.getRowCacheSize()));
  }

  private void lookupSheetNames(Document workbookDoc) {
    sheetProperties.clear();
    NodeList nl = searchForNodeList(workbookDoc, "/ss:workbook/ss:sheets/ss:sheet");
    for(int i = 0; i < nl.getLength(); i++) {
      Map props = new HashMap<>();
      props.put("name", nl.item(i).getAttributes().getNamedItem("name").getTextContent());

      Node state = nl.item(i).getAttributes().getNamedItem("state");
      props.put("state", state == null ? "visible" : state.getTextContent());
      sheetProperties.add(props);
    }
  }

  List getSheets() throws XMLStreamException, IOException {
    if (sheets == null) {
      sheets = loadSheets();
    }
    return sheets;
  }

  public List> getSheetProperties() {
    return sheetProperties;
  }

  @Override
  public Iterator iterator() throws ReadException {
    try {
      return new StreamingSheetIterator(getSheets().iterator());
    } catch (XMLStreamException|IOException e) {
      throw new ReadException(e);
    }
  }

  @Override
  public Spliterator spliterator() throws ReadException {
    try {
      return Spliterators.spliterator(getSheets(), Spliterator.ORDERED);
    } catch (XMLStreamException|IOException e) {
      throw new ReadException(e);
    }
  }

  /**
   * {@inheritDoc}
   */
  @Override
  public boolean isDate1904() {
    return use1904Dates;
  }

  @Override
  public void close() throws IOException {
    try {
      if (sheets != null) {
        for(StreamingSheet sheet : sheets) {
          sheet.getReader().close();
        }
      }
    } finally {
      try {
        pkg.revert();
        pkg.close();
      } finally {
        if(tmp != null) {
          if (log.isDebugEnabled()) {
            log.debug("Deleting tmp file [{}]", tmp.getAbsolutePath());
          }
          if (!tmp.delete()) {
            log.debug("Failed tp delete temp file");
          }
        }
        if(sst instanceof AutoCloseable) {
          try {
            ((AutoCloseable) sst).close();
          } catch (Exception e) {
            log.warn("Failed to close sst", e);
          }
        }
      }
    }
  }

  Builder getBuilder() {
    return builder;
  }

  OPCPackage getOPCPackage() {
    return pkg;
  }

  List getShapes(String sheetName) {
    return shapeMap.get(sheetName);
  }

  static class StreamingSheetIterator implements Iterator {
    private final Iterator iterator;

    public StreamingSheetIterator(Iterator iterator) {
      this.iterator = iterator;
    }

    @Override
    public boolean hasNext() {
      return iterator.hasNext();
    }

    @Override
    public Sheet next() {
      return iterator.next();
    }

    @Override
    public void remove() {
      throw new NotSupportedException();
    }
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy