All Downloads are FREE. Search and download functionalities are using the official Maven repository.

spreadsheet.xlsx.internal.SaxEntryParser Maven / Gradle / Ivy

/*
 * Copyright 2016 National Bank of Belgium
 * 
 * Licensed under the EUPL, Version 1.1 or - as soon they will be approved 
 * by the European Commission - subsequent versions of the EUPL (the "Licence");
 * You may not use this work except in compliance with the Licence.
 * You may obtain a copy of the Licence at:
 * 
 * http://ec.europa.eu/idabc/eupl
 * 
 * Unless required by applicable law or agreed to in writing, software 
 * distributed under the Licence is distributed on an "AS IS" basis,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the Licence for the specific language governing permissions and 
 * limitations under the Licence.
 */
package spreadsheet.xlsx.internal;

import java.io.IOException;
import java.io.InputStream;
import java.util.Arrays;
import java.util.logging.Level;
import java.util.logging.Logger;

import nbbrd.io.function.IOSupplier;
import nbbrd.io.xml.Sax;
import org.checkerframework.checker.nullness.qual.NonNull;
import org.checkerframework.checker.nullness.qual.Nullable;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
import org.xml.sax.SAXNotRecognizedException;
import org.xml.sax.SAXNotSupportedException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
import spreadsheet.xlsx.XlsxEntryParser;
import spreadsheet.xlsx.XlsxDataType;

/**
 *
 * @author Philippe Charles
 */
public final class SaxEntryParser implements XlsxEntryParser {

    @lombok.NonNull
    private final XMLReader reader;

    public SaxEntryParser(XMLReader reader) {
        this.reader = disableNamespaces(reader);
    }

    @Override
    public void visitWorkbook(InputStream stream, WorkbookVisitor visitor) throws IOException {
        visit(new WorkbookSaxEventHandler(visitor), stream);
    }

    @Override
    public void visitSharedStrings(InputStream stream, SharedStringsVisitor visitor) throws IOException {
        visit(new SharedStringsSaxEventHandler(visitor), stream);
    }

    @Override
    public void visitStyles(InputStream stream, StylesVisitor visitor) throws IOException {
        visit(new StylesSaxEventHandler(visitor), stream);
    }

    @Override
    public void visitSheet(InputStream stream, SheetVisitor visitor) throws IOException {
        visit(new SheetSaxEventHandler(visitor), stream);
    }

    @Override
    public void close() throws IOException {
    }

    private void visit(ContentHandler handler, InputStream stream) throws IOException {
        Sax.Parser.builder().factory(() -> reader).contentHandler(handler).after(VOID).build().parseStream(stream);
    }

    private static final IOSupplier VOID = IOSupplier.of(null);

    @lombok.RequiredArgsConstructor
    private static final class SheetSaxEventHandler extends DefaultHandler {

        private final SheetVisitor visitor;
        private final SaxStringBuilder stringBuilder = new SaxStringBuilder();
        private int level = 0;
        private String sheetBounds = null;
        private String ref = null;
        private String rawDataType = null;
        private String rawStyleIndex = null;

        @Override
        public void startElement(String uri, String localName, String name, Attributes attributes) throws SAXException {
            switch (level) {
                case 1:
                    switch (name) {
                        case SHEET_DIMENSIONS_TAG:
                            sheetBounds = attributes.getValue(SHEET_BOUNDS_ATTRIBUTE);
                            break;
                        case SHEET_DATA_TAG:
                            visitor.onSheetData(sheetBounds);
                            break;
                    }
                    break;
                case 3:
                    if (isEqualTo(name, CELL_TAG)) {
                        parseCellAttributes(attributes);
                    }
                    break;
                case 4:
                    if (isChar(name)) {
                        if (name.charAt(0) == CELL_VALUE_TAG) {
                            stringBuilder.enable().clear();
                        }
                    } else {
                        if (name.equals(INLINE_STRING_TAG)) {
                            stringBuilder.enable().clear();
                        }
                    }
                    break;
            }
            level++;
        }

        @Override
        public void endElement(String uri, String localName, String name) throws SAXException {
            level--;
            switch (level) {
                case 4:
                    if (isChar(name)) {
                        if (name.charAt(0) == CELL_VALUE_TAG) {
                            pushCellValue();
                        }
                    } else {
                        if (name.equals(INLINE_STRING_TAG)) {
                            pushCellValue();
                        }
                    }
                    break;
            }
        }

        @Override
        public void characters(char[] ch, int start, int length) throws SAXException {
            stringBuilder.appendIfNeeded(ch, start, length);
        }

        private void pushCellValue() {
            if (ref != null) {
                XlsxDataType dataType = parseDataType(rawDataType);
                int styleIndex = XlsxValueFactory.isStyleRequired(dataType)
                        ? XlsxValueFactory.parseStyleIndex(rawStyleIndex)
                        : XlsxValueFactory.NULL_STYLE_INDEX;
                visitor.onCell(ref, stringBuilder.disable().build(), dataType, styleIndex);
            }
        }

        private void parseCellAttributes(Attributes attributes) {
            ref = null;
            rawDataType = null;
            rawStyleIndex = null;
            for (int i = 0; i < attributes.getLength(); i++) {
                String attribute = attributes.getLocalName(i);
                if (isChar(attribute)) {
                    switch (attribute.charAt(0)) {
                        case REFERENCE_ATTRIBUTE:
                            ref = attributes.getValue(i);
                            break;
                        case CELL_DATA_TYPE_ATTRIBUTE:
                            rawDataType = attributes.getValue(i);
                            break;
                        case STYLE_INDEX_ATTRIBUTE:
                            rawStyleIndex = attributes.getValue(i);
                            break;
                    }
                }
            }
        }

        private static final char CELL_TAG = 'c';
        private static final char REFERENCE_ATTRIBUTE = 'r';
        private static final char STYLE_INDEX_ATTRIBUTE = 's';
        private static final char CELL_DATA_TYPE_ATTRIBUTE = 't';
        private static final char CELL_VALUE_TAG = 'v';
        private static final String SHEET_DIMENSIONS_TAG = "dimension";
        private static final String SHEET_BOUNDS_ATTRIBUTE = "ref";
        private static final String SHEET_DATA_TAG = "sheetData";
        private static final String INLINE_STRING_TAG = "is";

        @NonNull
        private static XlsxDataType parseDataType(@Nullable String rawDataType) {
            if (rawDataType == null) {
                return XlsxDataType.UNDEFINED;
            }
            if (isChar(rawDataType)) {
                switch (rawDataType.charAt(0)) {
                    case NUMBER_TYPE:
                        return XlsxDataType.NUMBER;
                    case SHARED_STRING_TYPE:
                        return XlsxDataType.SHARED_STRING;
                    case DATE_TYPE:
                        return XlsxDataType.DATE;
                    case BOOLEAN_TYPE:
                        return XlsxDataType.BOOLEAN;
                    case ERROR_TYPE:
                        return XlsxDataType.ERROR;
                    default:
                        return XlsxDataType.UNKNOWN;
                }
            }
            switch (rawDataType) {
                case STRING_TYPE:
                    return XlsxDataType.STRING;
                case INLINE_STRING_TYPE:
                    return XlsxDataType.INLINE_STRING;
                default:
                    return XlsxDataType.UNKNOWN;
            }
        }

        // http://msdn.microsoft.com/en-us/library/documentformat.openxml.spreadsheet.cellvalues.aspx
        private static final char BOOLEAN_TYPE = 'b';
        private static final char NUMBER_TYPE = 'n';
        private static final char ERROR_TYPE = 'e';
        private static final char SHARED_STRING_TYPE = 's';
        private static final String STRING_TYPE = "str";
        private static final String INLINE_STRING_TYPE = "inlineStr";
        private static final char DATE_TYPE = 'd';
    }

    /**
     * http://msdn.microsoft.com/en-us/library/office/documentformat.openxml.spreadsheet.aspx
     */
    @lombok.RequiredArgsConstructor
    private static final class WorkbookSaxEventHandler extends DefaultHandler {

        private final WorkbookVisitor visitor;

        @Override
        public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
            switch (qName) {
                case SHEET_TAG:
                    visitor.onSheet(attributes.getValue(SHEET_TAB_ID_ATTRIBUTE), attributes.getValue(SHEET_NAME_ATTRIBUTE));
                    break;
                case WORKBOOK_PROPERTIES_TAG:
                    visitor.onDate1904(Boolean.parseBoolean(attributes.getValue(DATE1904_ATTRIBUTE)));
                    break;
            }
        }

        private static final String SHEET_TAG = "sheet";
        private static final String WORKBOOK_PROPERTIES_TAG = "workbookPr";
        private static final String DATE1904_ATTRIBUTE = "date1904";
        private static final String SHEET_TAB_ID_ATTRIBUTE = "r:id";
        private static final String SHEET_NAME_ATTRIBUTE = "name";
    }

    /**
     * http://msdn.microsoft.com/en-us/library/office/gg278314.aspx
     */
    @lombok.RequiredArgsConstructor
    private static final class SharedStringsSaxEventHandler extends DefaultHandler {

        private final SharedStringsVisitor visitor;
        private final SaxStringBuilder stringBuilder = new SaxStringBuilder();

        @Override
        public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
            switch (qName) {
                case SHARED_STRING_ITEM_TAG:
                    stringBuilder.clear();
                    break;
                case TEXT_TAG:
                    stringBuilder.enable();
                    break;
            }
        }

        @Override
        public void endElement(String uri, String localName, String qName) throws SAXException {
            switch (qName) {
                case TEXT_TAG:
                    stringBuilder.disable();
                    break;
                case SHARED_STRING_ITEM_TAG:
                    visitor.onSharedString(stringBuilder.build());
                    break;
            }
        }

        @Override
        public void characters(char[] ch, int start, int length) throws SAXException {
            stringBuilder.appendIfNeeded(ch, start, length);
        }

        private static final String SHARED_STRING_ITEM_TAG = "si";
        private static final String TEXT_TAG = "t";
    }

    @lombok.RequiredArgsConstructor
    private static final class StylesSaxEventHandler extends DefaultHandler {

        private final StylesVisitor visitor;
        private boolean insideGroupTag = false;

        @Override
        public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
            switch (qName) {
                case CELL_FORMATS_TAG:
                    insideGroupTag = true;
                    break;
                case CELL_FORMAT_TAG:
                    if (insideGroupTag) {
                        try {
                            visitor.onCellFormat(Integer.parseInt(attributes.getValue(NUMBER_FORMAT_ID_ATTRIBUTE)));
                        } catch (NumberFormatException ex) {
                            throw new SAXException(ex);
                        }
                    }
                    break;
                case NUMBER_FORMATS_TAG:
                    insideGroupTag = true;
                    break;
                case NUMBER_FORMAT_TAG:
                    if (insideGroupTag) {
                        try {
                            visitor.onNumberFormat(
                                    Integer.parseInt(attributes.getValue(NUMBER_FORMAT_ID_ATTRIBUTE)),
                                    attributes.getValue(NUMBER_FORMAT_CODE_ATTRIBUTE));
                        } catch (NumberFormatException ex) {
                            throw new SAXException(ex);
                        }
                    }
                    break;
            }
        }

        @Override
        public void endElement(String uri, String localName, String qName) throws SAXException {
            if (qName.equals(CELL_FORMATS_TAG) || qName.equals(NUMBER_FORMATS_TAG)) {
                insideGroupTag = false;
            }
        }

        private static final String CELL_FORMAT_TAG = "xf";
        private static final String CELL_FORMATS_TAG = "cellXfs";
        private static final String NUMBER_FORMAT_TAG = "numFmt";
        private static final String NUMBER_FORMATS_TAG = "numFmts";
        private static final String NUMBER_FORMAT_ID_ATTRIBUTE = "numFmtId";
        private static final String NUMBER_FORMAT_CODE_ATTRIBUTE = "formatCode";
    }

    private static final class SaxStringBuilder {

        private boolean enabled = false;
        private char[] buffer = new char[64];
        private int bufferLength = 0;

        public SaxStringBuilder clear() {
            bufferLength = 0;
            return this;
        }

        public boolean isEnabled() {
            return enabled;
        }

        public SaxStringBuilder enable() {
            this.enabled = true;
            return this;
        }

        public SaxStringBuilder disable() {
            this.enabled = false;
            return this;
        }

        public String build() {
            return new String(buffer, 0, bufferLength);
        }

        public SaxStringBuilder appendIfNeeded(char[] ch, int start, int length) {
            if (isEnabled()) {
                int expectedLength = bufferLength + length;
                if (expectedLength > buffer.length) {
                    buffer = Arrays.copyOf(buffer, expectedLength);
                }
                System.arraycopy(ch, start, buffer, bufferLength, length);
                bufferLength = expectedLength;
            }
            return this;
        }
    }

    static boolean isChar(String input) {
        return input.length() == 1;
    }

    static boolean isEqualTo(String input, char c) {
        return input.length() == 1 && input.charAt(0) == c;
    }

    static XMLReader disableNamespaces(XMLReader reader) {
        try {
            reader.setFeature("http://xml.org/sax/features/namespaces", false);
        } catch (SAXNotRecognizedException | SAXNotSupportedException ex) {
            Logger.getLogger(SaxEntryParser.class.getName()).log(Level.FINE, null, ex);
        }
        return reader;
    }

    public static final XlsxEntryParser.Factory FACTORY = () -> new SaxEntryParser(Sax.createReader());
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy