org.ttzero.excel.reader.BIFF8Reader Maven / Gradle / Ivy
/*
* Copyright (c) 2019-2020, [email protected] All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.ttzero.excel.reader;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.ttzero.excel.entity.e3.AdjacentShortBlock;
import org.ttzero.excel.entity.e3.Block;
import org.ttzero.excel.entity.e3.ContextHandler;
import org.ttzero.excel.entity.e3.EndOfChainException;
import org.ttzero.excel.entity.e3.SectorAllocationTable;
import org.ttzero.excel.entity.e3.SectorAllocationTableFactory;
import org.ttzero.excel.entity.e3.Context;
import org.ttzero.excel.entity.e3.enums.ByteOrder;
import org.ttzero.excel.entity.e3.enums.DirTypeEnum;
import org.ttzero.excel.entity.e3.Directory;
import org.ttzero.excel.entity.e3.GlobalsSetting;
import org.ttzero.excel.entity.e3.enums.RedBlackEnum;
import org.ttzero.excel.entity.e3.SheetInfo;
import org.ttzero.excel.entity.e3.ShortBlock;
import org.ttzero.excel.entity.e3.GlobalsSubStream;
import org.ttzero.excel.entity.e3.enums.ShortStreamEnum;
import org.ttzero.excel.entity.e3.oleps.PIDSI;
import org.ttzero.excel.entity.e3.oleps.PropertySetStreamParser;
import org.ttzero.excel.entity.e3.OlepShortBlock;
import org.ttzero.excel.entity.e3.oleps.OlepParser;
import org.ttzero.excel.manager.ExcelType;
import org.ttzero.excel.manager.docProps.App;
import org.ttzero.excel.manager.docProps.Core;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardOpenOption;
import java.util.Arrays;
import java.util.List;
/**
* A BIFF8(xls) format file reader
*
* @see ExcelReader
* @author guanquan.wang at 2019-01-25 15:16
*/
public class BIFF8Reader extends ExcelReader {
private static final Logger LOGGER = LoggerFactory.getLogger(BIFF8Reader.class);
Context context;
private Directory[] rootEntities;
private App app;
private Core core;
private String category;
// globals setting
GlobalsSetting gs;
// A tmp buffer, it will release after init
private ByteBuffer buffer;
public BIFF8Reader(Path path) throws IOException {
this(path, 0, 0, VALUE_ONLY);
}
public BIFF8Reader(Path path, int cacheSize, int hotSize, int option) throws IOException {
super();
// Check file size
if (Files.size(path) < 1024) {
throw new ExcelReadException("File has been corrupted");
}
this.option = option;
this.context = new Context();
context.cacheSize = cacheSize;
context.hotSize = hotSize;
context.channel = Files.newByteChannel(path, StandardOpenOption.READ);
init();
}
private void init() throws IOException {
// The header is always located at the beginning of the file,
// and its size is exactly 512 bytes
buffer = ByteBuffer.allocate(1 << 9);
buffer.order(java.nio.ByteOrder.LITTLE_ENDIAN);
context.channel.read(buffer);
buffer.flip();
// 0-8: Compound document file identifier
// 8-16: Unique identifier (UID) of this file
buffer.position(24);
// Parse header data
parseHeader(buffer);
// Big endian not support
if (context.byteOrder == ByteOrder.BigEndian) {
throw new IOException("Not support byte order: Big-Endian.");
}
// Publish Context
ContextHandler.hook(context);
// Parse Master Sector Allocation Table
if (context.count_sat << 7 > SectorAllocationTable.SmallSectorTable.LIMIT) {
// Create A Large Sector Allocation Table
context.sectorTable = SectorAllocationTableFactory.create(context.count_sat << 7);
}
// Parse master sat
parseMasterSAT(context.msat, context.msat.length);
// parse attach master sat
if (context.count_msat > 0) {
attachMasterSAT();
}
// Short Sector Allocation Table
context.ssat = shortSector();
LOGGER.debug("Context: {}", context.toString());
if (LOGGER.isTraceEnabled()) {
LOGGER.trace("Short-Sector Allocation Table: {}", Arrays.toString(context.ssat));
LOGGER.trace("Sector Allocation Table: {}", context.sectorTable);
}
// Free
context.msat = null;
// Root entity
rootEntities = acceptDir(context.dir_sid);
// Set default split value
context.split = 5;
parseDocSummaryInformation();
// Release tmp buffer
buffer = null;
}
private void parseHeader(ByteBuffer buffer) {
context.excelType = ExcelType.XLS;
// Revision number of the file format (most used is 003EH)
context.mv = buffer.getShort();
// Version number of the file format (most used is 0003H)
context.v = buffer.getShort();
// Byte order identifier
context.byteOrder = ByteOrder.of(buffer.getShort());
// Size of a sector in the compound document file (➜3.1) in power-of-two (ssz)
context.ssz = buffer.getShort();
context.sectorSize = 1 << context.ssz;
// Size of a short-sector in the short-stream container
// stream in power-of-two (sssz)
context.sssz = buffer.getShort();
// 34 10 Not used
buffer.position(buffer.position() + 10);
// Total number of sectors used for the sector allocation table
context.count_sat = buffer.getInt();
// SecID of first sector of the directory stream
context.dir_sid = buffer.getInt();
// 52 4 Not used
buffer.position(buffer.position() + 4);
// Minimum size of a standard streamm (in bytes, minimum
// allowed and most used size is 4096 bytes)
context.standard_size = buffer.getInt();
// SecID of first sector of the short-sector allocation table,
// or –2 (End Of Chain SecID) if not extant
context.first_ssat_sid = buffer.getInt();
// Total number of sectors used for the short-sector allocation table
context.count_ssat = buffer.getInt();
// SecID of first sector of the master sector allocation table,
// or –2 (End Of Chain SecID) if no additional sectors used
context.first_msat_sid = buffer.getInt();
// Total number of sectors used for the master sector allocation table
context.count_msat = buffer.getInt();
// First part of the master sector allocation table containing 109 SecIDs
context.msat = SectorAllocationTable.parse(buffer, Math.min(context.count_sat << 2, buffer.remaining()));
}
/**
* The short-sector allocation table (SSAT) is an array of SecIDs
* and contains the SecID chains of all shortstreams, similar
* to the sector allocation table ( sheetName = null;
if (app == null) {
app = new App();
}
for (OlepParser.PropertyIdentifierAndOffset iao : propertySets[0].idAndOffset) {
switch (iao.id) {
case 0x000002:
category = iao.vString;
break;
// case 0x00000B:
// break;
// case 0x000010:
// break;
// case 0x000013:
// break;
// case 0x000016:
// break;
case 0x00000D:
sheetName = iao.vVector;
break;
// case 0x00000C:
// break;
case 0x00000F:
app.setCompany(iao.vString);
break;
}
}
if (sheetName != null) {
BIFF8Sheet[] sheets = new BIFF8Sheet[sheetName.size()];
int i = 0;
for (Object o : sheetName) {
sheets[i] = new BIFF8Sheet();
sheets[i].setName(o.toString());
sheets[i].setReader(this);
sheets[i].setIndex(i);
i++;
}
this.sheets = sheets;
}
break;
}
if (this.sheets == null) {
gs = parseGlobalSetting();
if (gs == null) {
throw new ExcelReadException("Miss the GlobalSetting, maybe the file has be corrupted.");
}
}
}
/**
* Parse global setting
*
* @return the setting
*/
GlobalsSetting parseGlobalSetting() {
for (Directory re : rootEntities) {
if (!ShortStreamEnum.Workbook.name().equals(re.name)) {
continue;
}
boolean standard_sector = re.sid < context.sectorTable.size() && context.recordBoundCheck(re.sid, re.size, context.ssz);
Block block = standard_sector ? new Block(context, re.sid).init() : new ShortBlock(context, re.sid, -1).init();
GlobalsSetting setting = GlobalsSubStream.get(block);
SheetInfo[] si = setting.getSheets();
if (this.sheets != null) {
BIFF8Sheet[] sheets = (BIFF8Sheet[]) this.sheets;
for (BIFF8Sheet sheet : sheets) {
String name = sheet.getName();
for (SheetInfo s : si) {
// 0x0 Worksheet
if (s.getName().equals(name) && s.getType() == ShortStreamEnum.Workbook.getHex()) {
sheet.hidden = s.getStatus() == 0x1;
sheet.sheetInfo = s;
}
sheet.standard_sector = standard_sector;
}
}
} else {
BIFF8Sheet[] sheets = new BIFF8Sheet[si.length];
int i = 0;
for (SheetInfo s : si) {
// 0x0 Worksheet
if (s.getType() == ShortStreamEnum.Workbook.getHex()) {
sheets[i] = new BIFF8Sheet();
sheets[i].setName(s.getName());
sheets[i].setReader(this);
sheets[i].setIndex(i);
sheets[i].hidden = s.getStatus() == 0x1;
sheets[i].sheetInfo = s;
sheets[i].standard_sector = standard_sector;
i++;
}
}
if (i != sheets.length) {
BIFF8Sheet[] _sheets = new BIFF8Sheet[i];
System.arraycopy(sheets, 0, _sheets, 0, i);
sheets = _sheets;
}
this.sheets = sheets;
}
return setting;
}
return null;
}
/**
* Make the reader parse formula
*
* @return {@link ExcelReader}
*/
@Override
public ExcelReader parseFormula() {
// TODO
return this;
}
}