org.ttzero.excel.reader.ShareStringParser Maven / Gradle / Ivy
/*
* Copyright (c) 2019-2020, [email protected] All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.ttzero.excel.reader;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.ttzero.excel.entity.e3.Context;
import org.ttzero.excel.entity.e3.IgnoreParser;
import org.ttzero.excel.entity.e3.ParserIdentifier;
import org.ttzero.excel.entity.e3.StringParser;
import org.ttzero.excel.entity.e3.Block;
import java.io.IOException;
/**
* 5.100 SST – Shared String Table
*
* A BIFF8 workbook collects the strings of all text cells
* in a global list, the Shared String Table. This table is
* located in the record SST in the Workbook Globals Sub-stream
* (➜4.2.5).
*
* @author guanquan.wang on 2019-02-05
*/
public class ShareStringParser {
private static final Logger LOGGER = LoggerFactory.getLogger(ShareStringParser.class);
public static SharedStrings get(Block block) {
block.ready();
// Total number of strings in the workbook (see below)
int ts = block.nextInt();
// Number of following strings (nm)
int nm = block.nextInt();
LOGGER.debug("SharedString Count: {} Unique: {}", ts, nm);
// List of nm Unicode strings, 16-bit string length (➜2.5.3)
int page = 1 << 9;
SharedStrings sst;
if (nm > page << 1) {
// Store value as `IndexSharedStringTable` file format
Context context = block.getContext();
try {
IndexSharedStringTable isst = new IndexSharedStringTable();
for (int i = 0; i < nm; i++) {
isst.push(StringParser.get16Bit(block));
}
sst = new SharedStrings(isst, context.cacheSize, context.hotSize);
} catch (IOException e) {
throw new ExcelReadException("Error. Create SharedStringTable failed.", e);
}
} else {
String[] data = new String[nm];
for (int i = 0; i < nm; i++) {
data[i] = StringParser.get16Bit(block);
}
sst = new SharedStrings(data);
}
if (block.recordRemainingSize() == 2) {
/*
5.42 EXTSST – Extended SST
This record occurs in conjunction with the SST record (➜5.100).
It is used by Excel to create a hash table with stream offsets
to the SST record to optimise string search operations. Excel
may not shorten this record if strings are deleted from the
shared string table, so the last part might contain invalid data.
The stream indexes in this record divide the SST into portions
containing a constant number of strings. See ➜4.11 for more
information about shared string tables. It is not required to
write this record when exporting an Excel document.
*/
short id = block.nextIdentifier();
if (id == ParserIdentifier.EXTSST) {
block.commit();
IgnoreParser.get(block);
} else block.cacheIdentifier();
} else block.commit();
return sst;
}
public static short getId() {
return ParserIdentifier.SST;
}
}