org.ttzero.excel.entity.e3.StringParser Maven / Gradle / Ivy

Go to download
/*
 * Copyright (c) 2019-2020, [email protected] All Rights Reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.ttzero.excel.entity.e3;

/**
 * 5.102 STRING
 * 
 * From BIFF8 on, strings are always stored using
 * UTF-16LE3 text encoding. The character array is
 * a sequence of 16-bit values4. Additionally it is
 * possible to use a compressed format, which omits
 * the high bytes of all characters, if they are all zero.
 * The following table describes the standard format
 * of the entire string, but in many records the strings
 * differ from this format. This will be mentioned separately.
 * 
 * It is possible (but not required) to store Rich-Text
 * formatting information and Asian phonetic information
 * inside a Unicode string. This results in four different
 * ways to store a string. The character array is not
 * zero-terminated.
 *
 * @author guanquan.wang at 2019-01-28 17:36
 */
public class StringParser {

    /**
     * Unicode string, 8-bit string length,
     * Contain length information
     *
     * @param block the byte {@link Block}
     * @return string value
     */
    public static String get8BitWithHead(Block block) {
        block.ready();
        // Length of the string
        String v = get(block, Option.ASCII);
        block.commit();
        return v;
    }

    /**
     * Unicode string, 16-bit string length,
     * Contain length information
     *
     * @param block the byte {@link Block}
     * @return string value
     */
    public static String get16BitWithHead(Block block) {
        block.ready();
        // Length of the string
        String v = get(block, Option.UTF16);
        block.commit();
        return v;
    }

    /**
     * Unicode string, 8-bit string length
     *
     * @param block the byte {@link Block}
     * @return string value
     */
    public static String get8Bit(Block block) {
        // Length of the string
        return get(block, Option.ASCII);
    }

    /**
     * Unicode string, 16-bit string length
     *
     * @param block the byte {@link Block}
     * @return string value
     */
    public static String get16Bit(Block block) {
        // Length of the string
        return get(block, Option.UTF16);
    }

    private static String get(Block block, Option bitOption) {
        // Length of the string (character count, ln)
        short ln = bitOption.isOn(0) ? block.nextShort() : block.nextByte();
        // Option flags:
        Option option = Option.of(block.nextByte());

        boolean richText = option.isOn(3);
        // ConstantNumber of Rich-Text formatting runs (rt)
        short rt = richText ? block.nextShort() : 0;

        boolean phonetic = option.isOn(2);
        // Size of Asian phonetic settings block (in bytes, sz)
        int sz = phonetic ? block.nextInt() : 0;

        String value = block.utf(ln, option);

        // TODO Rich-Text settings (richtext):
        if (richText) {
            // List of rt formatting runs
//            int[] listOfRt = new int[rt];
            // TODO Read int array.
            block.skip(rt << 2);
        }

        // TODO Asian phonetic settings (phonetic):
        if (phonetic) {
            // Asian Phonetic Settings Block
            // TODO
            block.skip(sz);
        }

        return value;
    }

    /**
     * Asian Phonetic Settings Block
     *
     * @param block the bit block
     */
    static void asianPhoneticSetting(Block block) {
        short identifier = block.nextIdentifier();
        if (identifier != 0x0001) {
            // FIXME Unknown identifier 0001H
        }
        // Size of the following data
        short size = block.nextShort();
        // Index to FONT record (➜5.45) used for the Asian phonetic text
        short fontIndex = block.nextShort();
        // Additional settings for the Asian phonetic text
        Option option = Option.of(block.nextShort());
        // Type of Japanese phonetic text
        int type = option.range(0, 2);
        // TODO
        switch (type) {
            // 00 = Katakana (narrow)
            case 0:
                break;
            // 01 = Katakana (wide)
            case 1:
                break;
            // 10 = Hiragana
            case 2:
                break;
        }
        // Alignment of all portions of the Asian phonetic text
        int aligin = option.range(2, 2);
        switch (type) {
            //  00 = Not specified (Japanese only)
            case 0:
                break;
            // 01 = Left (Top for vertical text)
            case 1:
                break;
            // 10 = Centered
            case 2:
                break;
            // 11 = Distributed
            case 3:
                break;
        }

        int a = option.range(4, 2);
        // 11 (always set)
        if (a != 3) {
            // FIXME unknown value
        }

        // ConstantNumber of portions the Asian phonetic text is broken into
        short np = block.nextShort();
        // TODO If np = 0, the Asian phonetic text refers to the entire cell text
        if (np == 0) {

        }

        // Total length of the following Asian phonetic text (number of characters, ln)
        short ln = block.nextShort();

        // Repeated total length of the text
        short rn = block.nextShort();

        if (np != 0) {
            // Character array of Asian phonetic text, no Unicode string header, always 16-bit characters.
            String value = block.utf(ln << 1, Option.UTF16);
            // Note: If ln = 0, this field is not empty but contains 0000

            // List of np structures that describe the position of each portion in the main text. Each
            // structure contains the following fields:
            for (int i = 0; i < np; i++) {
                // First character in the Asian phonetic text of this portion (cpa)
                short cpa = block.nextShort();
                // First character of the main text belonging to this portion (cpm)
                short cpm = block.nextShort();
                // ConstantNumber of characters in main text belonging to this portion (ccm)
                short ccm = block.nextShort();
                // TODO storage
            }
        } else {
            block.nextShort();
        }
    }

    public static short getId() {
        return ParserIdentifier.STRING;
    }
}