All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.github.romualdrousseau.any2json.loader.excel.xlsx.XlsxSheet Maven / Gradle / Ivy

package com.github.romualdrousseau.any2json.loader.excel.xlsx;

import java.io.Closeable;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;

import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParserFactory;

import com.github.romualdrousseau.any2json.base.PatcheableSheetStore;
import com.github.romualdrousseau.shuju.bigdata.DataFrame;
import com.github.romualdrousseau.shuju.bigdata.DataFrameWriter;
import com.github.romualdrousseau.shuju.strings.StringUtils;

import org.apache.poi.ss.util.CellRangeAddress;
import org.apache.poi.xssf.model.SharedStrings;
import org.apache.poi.xssf.model.StylesTable;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;

public class XlsxSheet extends PatcheableSheetStore implements Closeable {

    private static final int BATCH_SIZE = 50000;

    private final String name;
    private final InputStream sheetData;
    private final StylesTable styles;
    private final SharedStrings sharedStrings;

    private List mergedRegions;
    private DataFrame rows;
    private boolean dataLoaded;

    public XlsxSheet(final String name, final InputStream sheetData, final SharedStrings sharedStrings,
            final StylesTable styles) {
        this.name = name;
        this.sheetData = sheetData;
        this.sharedStrings = sharedStrings;
        this.styles = styles;
    }

    @Override
    public void close() {
        try {
            if (this.rows != null) {
                this.rows.close();
                this.rows = null;
            }
        } catch (final IOException x) {
            // throw new UncheckedIOException(x);
        }
    }

    public XlsxSheet ensureDataLoaded() {
        if (this.dataLoaded) {
            return this;
        }
        try (final var writer = new DataFrameWriter(BATCH_SIZE)) {
            final var parserFactory = SAXParserFactory.newInstance();
            final var parser = parserFactory.newSAXParser().getXMLReader();
            final var contentHandler = new ContentHandler(writer, this.sharedStrings, this.styles);
            parser.setContentHandler(contentHandler);
            parser.parse(new InputSource(this.sheetData));
            this.mergedRegions = contentHandler.getMergesRegions();
            this.rows = contentHandler.getRows();
        } catch (SAXException | IOException | ParserConfigurationException ignore) {
        } finally {
            try {
                this.dataLoaded = true;
                this.sheetData.close();
            } catch (final IOException x) {
                // throw new UncheckedIOException(x);
            }
        }
        return this;
    }

    public String getName() {
        return this.name;
    }

    @Override
    public int getLastColumnNum(final int rowIndex) {
        return this.rows.getColumnCount(rowIndex) - 1;
    }

    @Override
    public int getLastRowNum() {
        return this.rows.getRowCount() - 1;
    }

    @Override
    public boolean hasCellDataAt(final int colIndex, final int rowIndex) {
        final var n = this.getInternalMergeDown(colIndex, rowIndex);
        if (n >= this.rows.getRowCount()) {
            return false;
        }
        final var patchCell = this.getPatchCell(colIndex, n);
        if (patchCell != null) {
            return true;
        } else {
            final var cells = this.rows.getRow(n);
            return cells != null && colIndex < cells.size() && cells.get(colIndex) != null;
        }
    }

    @Override
    public String getCellDataAt(final int colIndex, final int rowIndex) {
        final var n = this.getInternalMergeDown(colIndex, rowIndex);
        if (n >= this.rows.getRowCount()) {
            return null;
        }
        final var patchCell = this.getPatchCell(colIndex, n);
        if (patchCell != null) {
            return patchCell;
        } else {
            final var cells = this.rows.getRow(n);
            return cells != null && colIndex < cells.size() ? StringUtils.cleanToken(cells.get(colIndex)) : null;
        }
    }

    @Override
    public int getNumberOfMergedCellsAt(final int colIndex, final int rowIndex) {
        if (this.mergedRegions.size() == 0) {
            return 1;
        }
        int numberOfCells = 0;
        for (final CellRangeAddress region : this.mergedRegions) {
            if (region.isInRange(rowIndex, colIndex)) {
                numberOfCells = region.getLastColumn() - region.getFirstColumn();
                break;
            }
        }
        return numberOfCells + 1;
    }

    @Override
    public void patchCell(final int colIndex1, final int rowIndex1, final int colIndex2, final int rowIndex2,
            final String value, final boolean unmergeAll) {
        final String newCell;
        if (value == null) {
            newCell = this.getCellDataAt(colIndex1, rowIndex1);
        } else {
            newCell = value;
        }

        if (!unmergeAll) {
            this.unmergeCell(colIndex2, rowIndex2);
        }

        final var n2 = this.getInternalMergeDown(colIndex2, rowIndex2);
        this.addPatchCell(colIndex2, n2, newCell);
    }

    private void unmergeCell(final int colIndex, final int rowIndex) {
        final List regionsToRemove = new ArrayList();
        for (final CellRangeAddress region : this.mergedRegions) {
            if (region.isInRange(rowIndex, colIndex)) {
                regionsToRemove.add(region);
            }
        }
        for (final CellRangeAddress region : regionsToRemove) {
            this.mergedRegions.remove(region);
        }
    }

    private int getInternalMergeDown(final int colIndex, final int rowIndex) {
        if (this.mergedRegions.size() == 0) {
            return rowIndex;
        }
        var rowToReturn = rowIndex;
        for (final var region : mergedRegions) {
            if (region.getLastRow() > region.getFirstRow() && rowIndex > region.getFirstRow()
                    && region.isInRange(rowIndex, colIndex)) {
                rowToReturn = region.getFirstRow();
                break;
            }
        }
        return rowToReturn;
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy