All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.ttzero.excel.reader.ShareStringParser Maven / Gradle / Ivy

/*
 * Copyright (c) 2019-2020, [email protected] All Rights Reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.ttzero.excel.reader;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.ttzero.excel.entity.e3.Context;
import org.ttzero.excel.entity.e3.IgnoreParser;
import org.ttzero.excel.entity.e3.ParserIdentifier;
import org.ttzero.excel.entity.e3.StringParser;
import org.ttzero.excel.entity.e3.Block;

import java.io.IOException;

/**
 * 5.100 SST – Shared String Table
 * 

* A BIFF8 workbook collects the strings of all text cells * in a global list, the Shared String Table. This table is * located in the record SST in the Workbook Globals Sub-stream * (➜4.2.5). * * @author guanquan.wang on 2019-02-05 */ public class ShareStringParser { private static final Logger LOGGER = LoggerFactory.getLogger(ShareStringParser.class); public static SharedStrings get(Block block) { block.ready(); // Total number of strings in the workbook (see below) int ts = block.nextInt(); // Number of following strings (nm) int nm = block.nextInt(); LOGGER.debug("SharedString Count: {} Unique: {}", ts, nm); // List of nm Unicode strings, 16-bit string length (➜2.5.3) int page = 1 << 9; SharedStrings sst; if (nm > page << 1) { // Store value as `IndexSharedStringTable` file format Context context = block.getContext(); try { IndexSharedStringTable isst = new IndexSharedStringTable(); for (int i = 0; i < nm; i++) { isst.push(StringParser.get16Bit(block)); } sst = new SharedStrings(isst, context.cacheSize, context.hotSize); } catch (IOException e) { throw new ExcelReadException("Error. Create SharedStringTable failed.", e); } } else { String[] data = new String[nm]; for (int i = 0; i < nm; i++) { data[i] = StringParser.get16Bit(block); } sst = new SharedStrings(data); } if (block.recordRemainingSize() == 2) { /* 5.42 EXTSST – Extended SST This record occurs in conjunction with the SST record (➜5.100). It is used by Excel to create a hash table with stream offsets to the SST record to optimise string search operations. Excel may not shorten this record if strings are deleted from the shared string table, so the last part might contain invalid data. The stream indexes in this record divide the SST into portions containing a constant number of strings. See ➜4.11 for more information about shared string tables. It is not required to write this record when exporting an Excel document. */ short id = block.nextIdentifier(); if (id == ParserIdentifier.EXTSST) { block.commit(); IgnoreParser.get(block); } else block.cacheIdentifier(); } else block.commit(); return sst; } public static short getId() { return ParserIdentifier.SST; } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy