All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.github.liaochong.myexcel.core.XSSFSheetXMLHandler Maven / Gradle / Ivy

/*
 * Copyright 2019 liaochong
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.github.liaochong.myexcel.core;

import com.github.liaochong.myexcel.core.constant.Constants;
import org.apache.poi.ss.usermodel.RichTextString;
import org.apache.poi.ss.util.CellAddress;
import org.apache.poi.xssf.model.SharedStrings;
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;

import java.util.Map;
import java.util.stream.Collectors;

import static org.apache.poi.xssf.usermodel.XSSFRelation.NS_SPREADSHEETML;

/**
 * @author liaochong
 * @version 1.0
 */
class XSSFSheetXMLHandler extends DefaultHandler {
    private static final Logger logger = LoggerFactory.getLogger(XSSFSheetXMLHandler.class);

    /**
     * These are the different kinds of cells we support.
     * We keep track of the current one between
     * the start and end.
     */
    enum xssfDataType {
        BOOLEAN,
        ERROR,
        FORMULA,
        INLINE_STRING,
        SST_STRING,
        NUMBER,
    }

    /**
     * Read only access to the shared strings table, for looking
     * up (most) string cell's contents
     */
    private final SharedStrings sharedStringsTable;

    /**
     * Where our text is going
     */
    private final XSSFSheetXMLHandler.SheetContentsHandler output;

    // Set when V start element is seen
    private boolean vIsOpen;
    // Set when an Inline String "is" is seen
    private boolean isIsOpen;

    // Set when cell start element is seen;
    // used when cell close element is seen.
    private xssfDataType nextDataType;

    private int rowNum;
    private int preRowNum = -1;
    private int nextRowNum;      // some sheets do not have rowNums, Excel can read them so we should try to handle them correctly as well
    private String cellRef;

    // Gathers characters as they are seen.
    private final StringBuilder value = new StringBuilder(64);

    private final Map mergeCellMapping;

    private final Map mergeFirstCellMapping;

    /**
     * Accepts objects needed while parsing.
     *
     * @param strings              Table of shared strings
     * @param sheetContentsHandler sheetContentsHandler
     */
    public XSSFSheetXMLHandler(
            Map mergeCellMapping,
            SharedStrings strings,
            XSSFSheetXMLHandler.SheetContentsHandler sheetContentsHandler) {
        this.mergeCellMapping = mergeCellMapping;
        this.mergeFirstCellMapping = mergeCellMapping.values().stream().distinct().collect(Collectors.toMap(cellAddress -> cellAddress, c -> ""));
        this.sharedStringsTable = strings;
        this.output = sheetContentsHandler;
        this.nextDataType = xssfDataType.NUMBER;
    }

    private boolean isTextTag(String name) {
        if ("v".equals(name)) {
            // Easy, normal v text tag
            return true;
        }
        if ("inlineStr".equals(name)) {
            // Easy inline string
            return true;
        }
        // Inline string ... pair
        return "t".equals(name) && isIsOpen;
        // It isn't a text tag
    }

    @Override
    @SuppressWarnings("unused")
    public void startElement(String uri, String localName, String qName,
                             Attributes attributes) throws SAXException {

        if (uri != null && !uri.equals(NS_SPREADSHEETML)) {
            return;
        }

        if (isTextTag(localName)) {
            vIsOpen = true;
            // Clear contents cache
            value.setLength(0);
        }
        // c => cell
        else if ("c".equals(localName)) {
            // Set up defaults.
            this.nextDataType = xssfDataType.NUMBER;
            cellRef = attributes.getValue("r");
            String cellType = attributes.getValue("t");
            String cellStyleStr = attributes.getValue("s");
            if ("b".equals(cellType))
                nextDataType = xssfDataType.BOOLEAN;
            else if ("e".equals(cellType))
                nextDataType = xssfDataType.ERROR;
            else if ("inlineStr".equals(cellType))
                nextDataType = xssfDataType.INLINE_STRING;
            else if ("s".equals(cellType))
                nextDataType = xssfDataType.SST_STRING;
            else if ("str".equals(cellType))
                nextDataType = xssfDataType.FORMULA;
        } else if ("row".equals(localName)) {
            String rowNumStr = attributes.getValue("r");
            if (rowNumStr != null) {
                rowNum = Integer.parseInt(rowNumStr) - 1;
            } else {
                rowNum = nextRowNum;
            }
            if (rowNum - 1 != preRowNum) {
                for (int blankRowNum = preRowNum + 1; blankRowNum < rowNum; blankRowNum++) {
                    output.startRow(blankRowNum);
                    output.endRow(blankRowNum);
                }
            }
            output.startRow(rowNum);
            this.preRowNum = rowNum;
        } else if ("is".equals(localName)) {
            // Inline string outer tag
            isIsOpen = true;
        } else if ("f".equals(localName)) {
            // Mark us as being a formula if not already
            if (nextDataType == xssfDataType.NUMBER) {
                nextDataType = xssfDataType.FORMULA;
            }
        }
    }

    @Override
    public void endElement(String uri, String localName, String qName)
            throws SAXException {

        if (uri != null && !uri.equals(NS_SPREADSHEETML)) {
            return;
        }

        String thisStr = null;

        // v => contents of a cell
        if (isTextTag(localName)) {
            vIsOpen = false;

            // Process the value contents as required, now we have it all
            switch (nextDataType) {
                case BOOLEAN:
                    char first = value.charAt(0);
                    thisStr = first == '0' ? "FALSE" : "TRUE";
                    break;

                case ERROR:
                    thisStr = "ERROR:" + value;
                    break;

                case FORMULA:
                    // No formatting applied, just do raw value in all cases
                    thisStr = value.toString();
                    break;
                case INLINE_STRING:
                    // TODO: Can these ever have formatting on them?
                    XSSFRichTextString rtsi = new XSSFRichTextString(value.toString());
                    thisStr = rtsi.toString();
                    break;

                case SST_STRING:
                    String sstIndex = value.toString();
                    try {
                        int idx = Integer.parseInt(sstIndex);
                        RichTextString rtss = sharedStringsTable.getItemAt(idx);
                        thisStr = rtss.toString();
                    } catch (NumberFormatException ex) {
                        logger.error("Failed to parse SST index '" + sstIndex, ex);
                    }
                    break;

                case NUMBER:
                    String n = value.toString();
                    if (n.contains(Constants.SPOT)) {
                        n = String.valueOf(Double.parseDouble(n));
                    }
                    thisStr = n;
                    break;

                default:
                    thisStr = "(TODO: Unexpected type: " + nextDataType + ")";
                    break;
            }

            CellAddress cellAddress = new CellAddress(cellRef);
            String finalThisStr = thisStr;
            mergeFirstCellMapping.computeIfPresent(cellAddress, (k, v) -> finalThisStr);
            // Output
            output.cell(cellAddress, thisStr);
        } else if ("c".equals(localName)) {
            CellAddress cellAddress = new CellAddress(cellRef);
            CellAddress firstCellAddress = mergeCellMapping.get(cellAddress);
            if (firstCellAddress != null) {
                output.cell(cellAddress, mergeFirstCellMapping.get(firstCellAddress));
                mergeCellMapping.remove(cellAddress);
            }
        } else if ("row".equals(localName)) {
            // Finish up the row
            output.endRow(rowNum);
            // some sheets do not have rowNum set in the XML, Excel can read them so we should try to read them as well
            nextRowNum = rowNum + 1;
        } else if ("sheetData".equals(localName)) {
            // indicate that this sheet is now done
            output.endSheet();
        } else if ("is".equals(localName)) {
            isIsOpen = false;
        }
    }

    /**
     * Captures characters only if a suitable element is open.
     * Originally was just "v"; extended for inlineStr also.
     */
    @Override
    public void characters(char[] ch, int start, int length)
            throws SAXException {
        if (vIsOpen) {
            value.append(ch, start, length);
        }
    }

    /**
     * You need to implement this to handle the results
     * of the sheet parsing.
     */
    public interface SheetContentsHandler {
        /**
         * A row with the (zero based) row number has started
         *
         * @param rowNum rowNum
         */
        void startRow(int rowNum);

        /**
         * A row with the (zero based) row number has ended
         *
         * @param rowNum rowNum
         */
        void endRow(int rowNum);

        /**
         * A cell, with the given formatted value (may be null),
         * and possibly a comment (may be null), was encountered.
         * 

* Sheets that have missing or empty cells may result in * sparse calls to cell. See the code in * src/examples/src/org/apache/poi/xssf/eventusermodel/XLSX2CSV.java * for an example of how to handle this scenario. * * @param cellAddress cellAddress * @param formattedValue formattedValue */ void cell(CellAddress cellAddress, String formattedValue); /** * Signal that the end of a sheet was been reached */ default void endSheet() { } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy