All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.ttzero.excel.reader.BIFF8Reader Maven / Gradle / Ivy

/*
 * Copyright (c) 2019-2020, [email protected] All Rights Reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.ttzero.excel.reader;


import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.ttzero.excel.entity.e3.AdjacentShortBlock;
import org.ttzero.excel.entity.e3.Block;
import org.ttzero.excel.entity.e3.ContextHandler;
import org.ttzero.excel.entity.e3.EndOfChainException;
import org.ttzero.excel.entity.e3.SectorAllocationTable;
import org.ttzero.excel.entity.e3.SectorAllocationTableFactory;
import org.ttzero.excel.entity.e3.Context;
import org.ttzero.excel.entity.e3.enums.ByteOrder;
import org.ttzero.excel.entity.e3.enums.DirTypeEnum;
import org.ttzero.excel.entity.e3.Directory;
import org.ttzero.excel.entity.e3.GlobalsSetting;
import org.ttzero.excel.entity.e3.enums.RedBlackEnum;
import org.ttzero.excel.entity.e3.SheetInfo;
import org.ttzero.excel.entity.e3.ShortBlock;
import org.ttzero.excel.entity.e3.GlobalsSubStream;
import org.ttzero.excel.entity.e3.enums.ShortStreamEnum;
import org.ttzero.excel.entity.e3.oleps.PIDSI;
import org.ttzero.excel.entity.e3.oleps.PropertySetStreamParser;
import org.ttzero.excel.entity.e3.OlepShortBlock;
import org.ttzero.excel.entity.e3.oleps.OlepParser;
import org.ttzero.excel.manager.E3Const;
import org.ttzero.excel.manager.ExcelType;
import org.ttzero.excel.manager.docProps.App;
import org.ttzero.excel.manager.docProps.Core;
import org.ttzero.excel.util.FileUtil;

import java.io.IOException;
import java.io.InputStream;
import java.nio.ByteBuffer;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardOpenOption;
import java.util.Arrays;
import java.util.List;


/**
 * A BIFF8(xls) format file reader
 *
 * @see ExcelReader
 * @author guanquan.wang at 2019-01-25 15:16
 */
public class BIFF8Reader extends ExcelReader {
    private static final Logger LOGGER = LoggerFactory.getLogger(BIFF8Reader.class);

    protected Context context;
    protected Directory[] rootEntities;
    protected App app;
    protected Core core;
    protected String category;
    // globals setting
    protected GlobalsSetting gs;
    // A tmp buffer, it will release after init
    protected ByteBuffer buffer;

    public BIFF8Reader(Path path) throws IOException {
        this(path, 0, 0, VALUE_ONLY);
    }

    public BIFF8Reader(Path path, int option) throws IOException {
        this(path, 0, 0, option);
    }

    public BIFF8Reader(Path path, int bufferSize, int cacheSize, int option) throws IOException {
        super();
        // Check file size
        if (Files.size(path) < 1024) {
            throw new ExcelReadException("File has been corrupted");
        }
        this.option = option;

        this.context = new Context();
        context.cacheSize = bufferSize;
        context.hotSize = cacheSize;
        context.channel = Files.newByteChannel(path, StandardOpenOption.READ);

        init();
    }

    public BIFF8Reader(InputStream is) throws IOException {
        this(is, 0, 0, VALUE_ONLY);
    }

    public BIFF8Reader(InputStream is, int option) throws IOException {
        this(is, 0, 0, option);
    }

    public BIFF8Reader(InputStream stream, int bufferSize, int cacheSize, int option) throws IOException {
        Path temp = FileUtil.mktmp(E3Const.BIFF_PREFIX);
        if (temp == null) {
            throw new IOException("Create temp directory error. Please check your permission");
        }
        FileUtil.cp(stream, temp);

        // Check file size
        if (Files.size(temp) < 1024) {
            throw new ExcelReadException("File has been corrupted");
        }
        this.option = option;

        this.context = new Context();
        context.cacheSize = bufferSize;
        context.hotSize = cacheSize;
        context.channel = Files.newByteChannel(temp, StandardOpenOption.READ);

        init();
    }

    private void init() throws IOException {
        // The header is always located at the beginning of the file,
        // and its size is exactly 512 bytes
        buffer = ByteBuffer.allocate(1 << 9);
        buffer.order(java.nio.ByteOrder.LITTLE_ENDIAN);
        context.channel.read(buffer);
        buffer.flip();

        // 0-8: Compound document file identifier
        // 8-16: Unique identifier (UID) of this file
        buffer.position(24);

        // Parse header data
        parseHeader(buffer);

        // Big endian not support
        if (context.byteOrder == ByteOrder.BigEndian) {
            throw new IOException("Not support byte order: Big-Endian.");
        }

        // Publish Context
        ContextHandler.hook(context);

        // Parse Master Sector Allocation Table
        if (context.count_sat << 7 > SectorAllocationTable.SmallSectorTable.LIMIT) {
            // Create A Large Sector Allocation Table
            context.sectorTable = SectorAllocationTableFactory.create(context.count_sat << 7);
        }

        // Parse master sat
        parseMasterSAT(context.msat, context.msat.length);

        // parse attach master sat
        if (context.count_msat > 0) {
            attachMasterSAT();
        }

        // Short Sector Allocation Table
        context.ssat = shortSector();

        LOGGER.debug("Context: {}", context.toString());
        if (LOGGER.isTraceEnabled()) {
            LOGGER.trace("Short-Sector Allocation Table: {}", Arrays.toString(context.ssat));
            LOGGER.trace("Sector Allocation Table: {}", context.sectorTable);
        }

        // Free
        context.msat = null;

        // Root entity
        rootEntities = acceptDir(context.dir_sid);

        // Set default split value
        context.split = 5;

        parseDocSummaryInformation();

        // Release tmp buffer
        buffer = null;
    }

    private void parseHeader(ByteBuffer buffer) {
        context.excelType = ExcelType.XLS;
        // Revision number of the file format (most used is 003EH)
        context.mv = buffer.getShort();
        // Version number of the file format (most used is 0003H)
        context.v = buffer.getShort();
        // Byte order identifier
        context.byteOrder = ByteOrder.of(buffer.getShort());
        // Size of a sector in the compound document file (➜3.1) in power-of-two (ssz)
        context.ssz = buffer.getShort();
        context.sectorSize = 1 << context.ssz;
        // Size of a short-sector in the short-stream container
        // stream in power-of-two (sssz)
        context.sssz = buffer.getShort();
        // 34 10 Not used
        buffer.position(buffer.position() + 10);
        // Total number of sectors used for the sector allocation table
        context.count_sat = buffer.getInt();
        // SecID of first sector of the directory stream
        context.dir_sid = buffer.getInt();
        // 52 4 Not used
        buffer.position(buffer.position() + 4);
        // Minimum size of a standard streamm (in bytes, minimum
        // allowed and most used size is 4096 bytes)
        context.standard_size = buffer.getInt();
        // SecID of first sector of the short-sector allocation table,
        // or –2 (End Of Chain SecID) if not extant
        context.first_ssat_sid = buffer.getInt();
        // Total number of sectors used for the short-sector allocation table
        context.count_ssat = buffer.getInt();
        // SecID of first sector of the master sector allocation table,
        // or –2 (End Of Chain SecID) if no additional sectors used
        context.first_msat_sid = buffer.getInt();
        // Total number of sectors used for the master sector allocation table
        context.count_msat = buffer.getInt();
        // First part of the master sector allocation table containing 109 SecIDs
        context.msat = SectorAllocationTable.parse(buffer, Math.min(context.count_sat << 2, buffer.remaining()));
    }

    /**
     * The short-sector allocation table (SSAT) is an array of SecIDs
     * and contains the SecID chains of all shortstreams, similar
     * to the sector allocation table ( sheetName = null;
            if (app == null) {
                app = new App();
            }
            for (OlepParser.PropertyIdentifierAndOffset iao : propertySets[0].idAndOffset) {
                switch (iao.id) {
                    case 0x000002:
                        category = iao.vString;
                        break;
//                    case 0x00000B:
//                        break;
//                    case 0x000010:
//                        break;
//                    case 0x000013:
//                        break;
//                    case 0x000016:
//                        break;
                    case 0x00000D:
                        sheetName = iao.vVector;
                        break;
//                    case 0x00000C:
//                        break;
                    case 0x00000F:
                        app.setCompany(iao.vString);
                        break;
                }
            }

            if (sheetName != null) {
                BIFF8Sheet[] sheets = new BIFF8Sheet[sheetName.size()];
                int i = 0;
                for (Object o : sheetName) {
                    sheets[i] = (BIFF8Sheet) sheetFactory(option);
                    sheets[i].setName(o.toString());
                    sheets[i].setReader(this);
                    sheets[i].setIndex(i);
                    i++;
                }
                this.sheets = sheets;
            }
            break;
        }
        if (this.sheets == null) {
            gs = parseGlobalSetting();
            if (gs == null) {
                throw new ExcelReadException("Miss the GlobalSetting, maybe the file has be corrupted.");
            }
        }
    }

    /**
     * Parse global setting
     *
     * @return the setting
     */
    GlobalsSetting parseGlobalSetting() {
        for (Directory re : rootEntities) {
            if (!ShortStreamEnum.Workbook.name().equals(re.name)) {
                continue;
            }
            boolean standard_sector = re.sid < context.sectorTable.size() && context.recordBoundCheck(re.sid, re.size, context.ssz);
            Block block = standard_sector ? new Block(context, re.sid).init() : new ShortBlock(context, re.sid).init();
            GlobalsSetting setting = GlobalsSubStream.get(block);

            SheetInfo[] si = setting.getSheets();
            if (this.sheets != null) {
                BIFF8Sheet[] sheets = (BIFF8Sheet[]) this.sheets;
                for (BIFF8Sheet sheet : sheets) {
                    String name = sheet.getName();
                    for (SheetInfo s : si) {
                        // 0x0 Worksheet
                        if (s.getName().equals(name) && s.getType() == ShortStreamEnum.Workbook.getHex()) {
                            sheet.hidden = s.getStatus() == 0x1;
                            sheet.sheetInfo = s;
                        }
                        sheet.standard_sector = standard_sector;
                    }
                }
            } else {
                BIFF8Sheet[] sheets = new BIFF8Sheet[si.length];
                int i = 0;
                for (SheetInfo s : si) {
                    // 0x0 Worksheet
                    if (s.getType() == ShortStreamEnum.Workbook.getHex()) {
                        sheets[i] = (BIFF8Sheet) sheetFactory(option);
                        sheets[i].setName(s.getName());
                        sheets[i].setReader(this);
                        sheets[i].setIndex(i);
                        sheets[i].hidden = s.getStatus() == 0x1;
                        sheets[i].sheetInfo = s;
                        sheets[i].standard_sector = standard_sector;
                        i++;
                    }
                }
                if (i != sheets.length) {
                    BIFF8Sheet[] _sheets = new BIFF8Sheet[i];
                    System.arraycopy(sheets, 0, _sheets, 0, i);
                    sheets = _sheets;
                }
                this.sheets = sheets;
            }
            return setting;
        }
        return null;
    }

    /**
     * Make the reader parse formula
     *
     * @return {@link ExcelReader}
     */
    @Override
    public ExcelReader parseFormula() {
        // TODO
        return this;
    }

    /**
     * List all pictures in excel
     *
     * @return picture list or null if not exists.
     */
    @Override
    public List listPictures() {
        if (gs == null) {
            gs = parseGlobalSetting();
        }
        if (drawings == null && gs.getMsoDrawingGroupBlockIndex() != null) {
            drawings = new BIFF8Drawings(Block.loadWithIndex(gs.getMsoDrawingGroupBlockIndex()), sheets);
        }
        return drawings != null ? drawings.listPictures() : null;
    }

    /**
     * Create a read sheet
     *
     * @param option the reader option.
     * @return Sheet extends BIFF8Sheet
     */
    protected Sheet sheetFactory(int option) {
        BIFF8Sheet sheet;
        switch (option) {
            case VALUE_AND_CALC: sheet = new BIFF8Sheet().asCalcSheet(); break;
            case COPY_ON_MERGED: sheet = new BIFF8Sheet().asMergeSheet(); break;
            // TODO full reader
//            case VALUE_AND_CALC|COPY_ON_MERGED: break;
            default            : sheet = new BIFF8Sheet();
        }
        return sheet;
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy