All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.ttzero.excel.entity.e3.StringParser Maven / Gradle / Ivy

/*
 * Copyright (c) 2019-2020, [email protected] All Rights Reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.ttzero.excel.entity.e3;

/**
 * 5.102 STRING
 * 

* From BIFF8 on, strings are always stored using * UTF-16LE3 text encoding. The character array is * a sequence of 16-bit values4. Additionally it is * possible to use a compressed format, which omits * the high bytes of all characters, if they are all zero. * The following table describes the standard format * of the entire string, but in many records the strings * differ from this format. This will be mentioned separately. *

* It is possible (but not required) to store Rich-Text * formatting information and Asian phonetic information * inside a Unicode string. This results in four different * ways to store a string. The character array is not * zero-terminated. * * @author guanquan.wang at 2019-01-28 17:36 */ public class StringParser { /** * Unicode string, 8-bit string length, * Contain length information * * @param block the byte {@link Block} * @return string value */ public static String get8BitWithHead(Block block) { block.ready(); // Length of the string String v = get(block, Option.ASCII); block.commit(); return v; } /** * Unicode string, 16-bit string length, * Contain length information * * @param block the byte {@link Block} * @return string value */ public static String get16BitWithHead(Block block) { block.ready(); // Length of the string String v = get(block, Option.UTF16); block.commit(); return v; } /** * Unicode string, 8-bit string length * * @param block the byte {@link Block} * @return string value */ public static String get8Bit(Block block) { // Length of the string return get(block, Option.ASCII); } /** * Unicode string, 16-bit string length * * @param block the byte {@link Block} * @return string value */ public static String get16Bit(Block block) { // Length of the string return get(block, Option.UTF16); } private static String get(Block block, Option bitOption) { // Length of the string (character count, ln) short ln = bitOption.isOn(0) ? block.nextShort() : block.nextByte(); // Option flags: Option option = Option.of(block.nextByte()); boolean richText = option.isOn(3); // ConstantNumber of Rich-Text formatting runs (rt) short rt = richText ? block.nextShort() : 0; boolean phonetic = option.isOn(2); // Size of Asian phonetic settings block (in bytes, sz) int sz = phonetic ? block.nextInt() : 0; String value = block.utf(ln, option); // TODO Rich-Text settings (richtext): if (richText) { // List of rt formatting runs // int[] listOfRt = new int[rt]; // TODO Read int array. block.skip(rt << 2); } // TODO Asian phonetic settings (phonetic): if (phonetic) { // Asian Phonetic Settings Block // TODO block.skip(sz); } return value; } /** * Asian Phonetic Settings Block * * @param block the bit block */ static void asianPhoneticSetting(Block block) { short identifier = block.nextIdentifier(); if (identifier != 0x0001) { // FIXME Unknown identifier 0001H } // Size of the following data short size = block.nextShort(); // Index to FONT record (➜5.45) used for the Asian phonetic text short fontIndex = block.nextShort(); // Additional settings for the Asian phonetic text Option option = Option.of(block.nextShort()); // Type of Japanese phonetic text int type = option.range(0, 2); // TODO switch (type) { // 00 = Katakana (narrow) case 0: break; // 01 = Katakana (wide) case 1: break; // 10 = Hiragana case 2: break; } // Alignment of all portions of the Asian phonetic text int aligin = option.range(2, 2); switch (type) { // 00 = Not specified (Japanese only) case 0: break; // 01 = Left (Top for vertical text) case 1: break; // 10 = Centered case 2: break; // 11 = Distributed case 3: break; } int a = option.range(4, 2); // 11 (always set) if (a != 3) { // FIXME unknown value } // ConstantNumber of portions the Asian phonetic text is broken into short np = block.nextShort(); // TODO If np = 0, the Asian phonetic text refers to the entire cell text if (np == 0) { } // Total length of the following Asian phonetic text (number of characters, ln) short ln = block.nextShort(); // Repeated total length of the text short rn = block.nextShort(); if (np != 0) { // Character array of Asian phonetic text, no Unicode string header, always 16-bit characters. String value = block.utf(ln << 1, Option.UTF16); // Note: If ln = 0, this field is not empty but contains 0000 // List of np structures that describe the position of each portion in the main text. Each // structure contains the following fields: for (int i = 0; i < np; i++) { // First character in the Asian phonetic text of this portion (cpa) short cpa = block.nextShort(); // First character of the main text belonging to this portion (cpm) short cpm = block.nextShort(); // ConstantNumber of characters in main text belonging to this portion (ccm) short ccm = block.nextShort(); // TODO storage } } else { block.nextShort(); } } public static short getId() { return ParserIdentifier.STRING; } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy