org.ttzero.excel.entity.e3.StringParser Maven / Gradle / Ivy
/*
* Copyright (c) 2019-2020, [email protected] All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.ttzero.excel.entity.e3;
/**
* 5.102 STRING
*
* From BIFF8 on, strings are always stored using
* UTF-16LE3 text encoding. The character array is
* a sequence of 16-bit values4. Additionally it is
* possible to use a compressed format, which omits
* the high bytes of all characters, if they are all zero.
* The following table describes the standard format
* of the entire string, but in many records the strings
* differ from this format. This will be mentioned separately.
*
* It is possible (but not required) to store Rich-Text
* formatting information and Asian phonetic information
* inside a Unicode string. This results in four different
* ways to store a string. The character array is not
* zero-terminated.
*
* @author guanquan.wang at 2019-01-28 17:36
*/
public class StringParser {
/**
* Unicode string, 8-bit string length,
* Contain length information
*
* @param block the byte {@link Block}
* @return string value
*/
public static String get8BitWithHead(Block block) {
block.ready();
// Length of the string
String v = get(block, Option.ASCII);
block.commit();
return v;
}
/**
* Unicode string, 16-bit string length,
* Contain length information
*
* @param block the byte {@link Block}
* @return string value
*/
public static String get16BitWithHead(Block block) {
block.ready();
// Length of the string
String v = get(block, Option.UTF16);
block.commit();
return v;
}
/**
* Unicode string, 8-bit string length
*
* @param block the byte {@link Block}
* @return string value
*/
public static String get8Bit(Block block) {
// Length of the string
return get(block, Option.ASCII);
}
/**
* Unicode string, 16-bit string length
*
* @param block the byte {@link Block}
* @return string value
*/
public static String get16Bit(Block block) {
// Length of the string
return get(block, Option.UTF16);
}
private static String get(Block block, Option bitOption) {
// Length of the string (character count, ln)
short ln = bitOption.isOn(0) ? block.nextShort() : block.nextByte();
// Option flags:
Option option = Option.of(block.nextByte());
boolean richText = option.isOn(3);
// ConstantNumber of Rich-Text formatting runs (rt)
short rt = richText ? block.nextShort() : 0;
boolean phonetic = option.isOn(2);
// Size of Asian phonetic settings block (in bytes, sz)
int sz = phonetic ? block.nextInt() : 0;
String value = block.utf(ln, option);
// TODO Rich-Text settings (richtext):
if (richText) {
// List of rt formatting runs
// int[] listOfRt = new int[rt];
// TODO Read int array.
block.skip(rt << 2);
}
// TODO Asian phonetic settings (phonetic):
if (phonetic) {
// Asian Phonetic Settings Block
// TODO
block.skip(sz);
}
return value;
}
/**
* Asian Phonetic Settings Block
*
* @param block the bit block
*/
static void asianPhoneticSetting(Block block) {
short identifier = block.nextIdentifier();
if (identifier != 0x0001) {
// FIXME Unknown identifier 0001H
}
// Size of the following data
short size = block.nextShort();
// Index to FONT record (➜5.45) used for the Asian phonetic text
short fontIndex = block.nextShort();
// Additional settings for the Asian phonetic text
Option option = Option.of(block.nextShort());
// Type of Japanese phonetic text
int type = option.range(0, 2);
// TODO
switch (type) {
// 00 = Katakana (narrow)
case 0:
break;
// 01 = Katakana (wide)
case 1:
break;
// 10 = Hiragana
case 2:
break;
}
// Alignment of all portions of the Asian phonetic text
int aligin = option.range(2, 2);
switch (type) {
// 00 = Not specified (Japanese only)
case 0:
break;
// 01 = Left (Top for vertical text)
case 1:
break;
// 10 = Centered
case 2:
break;
// 11 = Distributed
case 3:
break;
}
int a = option.range(4, 2);
// 11 (always set)
if (a != 3) {
// FIXME unknown value
}
// ConstantNumber of portions the Asian phonetic text is broken into
short np = block.nextShort();
// TODO If np = 0, the Asian phonetic text refers to the entire cell text
if (np == 0) {
}
// Total length of the following Asian phonetic text (number of characters, ln)
short ln = block.nextShort();
// Repeated total length of the text
short rn = block.nextShort();
if (np != 0) {
// Character array of Asian phonetic text, no Unicode string header, always 16-bit characters.
String value = block.utf(ln << 1, Option.UTF16);
// Note: If ln = 0, this field is not empty but contains 0000
// List of np structures that describe the position of each portion in the main text. Each
// structure contains the following fields:
for (int i = 0; i < np; i++) {
// First character in the Asian phonetic text of this portion (cpa)
short cpa = block.nextShort();
// First character of the main text belonging to this portion (cpm)
short cpm = block.nextShort();
// ConstantNumber of characters in main text belonging to this portion (ccm)
short ccm = block.nextShort();
// TODO storage
}
} else {
block.nextShort();
}
}
public static short getId() {
return ParserIdentifier.STRING;
}
}