All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.ttzero.excel.entity.e3.HyperlinkParser Maven / Gradle / Ivy

/*
 * Copyright (c) 2019-2020, [email protected] All Rights Reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.ttzero.excel.entity.e3;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.ttzero.excel.reader.Dimension;

import java.nio.charset.StandardCharsets;
import java.util.UUID;

/**
 * 5.58 HYPERLINK
 * 

* In Excel, every cell may contain a hyperlink. * The HYPERLINK record refers to one cell address * or a cell range where all cells contain the same * hyperlink. It is part of the Hyperlink Table in * the Sheet Sub-stream (➜4.13). *

* Every hyperlink can contain a text mark and a * description that is shown in the sheet instead * of the real link. Text marks are appended behind * a link, separated by the hash sign (“#”). *

* Examples for text marks: www.example.org#table1 * or {@code C:\example.xls#Sheet1!A1}. *

* Inside of this record strings are stored in several * formats. Sometimes occurs the character count, * otherwise the character array size (in 16-bit character * arrays the character count is half of the array size). * Furthermore some strings are zero-terminated, others not. * They are stored either as 16-bit character arrays or as * 8-bit character arrays, independent of the characters. * * @author guanquan.wang at 2019-02-01 10:17 */ public class HyperlinkParser { private static final Logger LOGGER = LoggerFactory.getLogger(HyperlinkParser.class); public static final UUID STD_LINK_GUID = UUID.fromString("79EAC9D0-BAF9-11CE-8C82-00AA004BA90B"); public static final UUID STD_FILE_MONIKER = UUID.fromString("00000303-0000-0000-C000-000000000046"); public static final UUID STD_URL_MONIKER = UUID.fromString("79EAC9E0-BAF9-11CE-8C82-00AA004BA90B"); public static Hyperlink get(Block block) { // Size of next data short size = block.nextShort(); // Cell range address of all cells containing this hyperlink (➜2.5.14) Dimension range = DimensionParser.get(block); // GUID of StdLink: // D0H C9H EAH 79H F9H BAH CEH 11H 8CH 82H 00H AAH 00H 4BH A9H 0BH // (79EAC9D0-BAF9-11CE-8C82-00AA004BA90B) UUID linkGUID = block.nextGUID(); if (!STD_LINK_GUID.equals(linkGUID)) { LOGGER.warn("The Hyperlink GUID: {} is different from STD_LINK_GUID: {}", linkGUID, STD_LINK_GUID); } // Unknown value: 00000002H int ignore = block.nextInt(); // Option flags Option option = Option.of(block.nextInt()); // The option flags specify the following content of the record. Hyperlink hyperlink = new Hyperlink(); // Bit 2 and 4 Contents 0 = No description 1 (both bits) = Description if (option.isOn(2) && option.isOn(4)) { hyperlink.description = description(block); } // Bit 7 Contents 0 = No target frame 1 = Target frame if (option.isOn(7)) { hyperlink.targetFrame = targetFrame(block); } // Relative file path hyperlink.relative = option.isOff(1); // Bit 0 Contents 0 = No link extant 1 = File link or URL if (option.isOn(0)) { // hyperlink.link0 = fileLinkRecord(block); // Bit 1 Contents 0 = Relative file path 1 = Absolute path or URL // if (option.isOff(1)) { // String[] paths = localFile(block); // // TODO // hyperlink.link1 = paths[0]; // } // Bit 8 Contents 0 = File link or URL 1 = UNC path (incl. server name) if (option.isOn(8)) { hyperlink.link = unc(block); } else { if (option.isOn(1)) { hyperlink.link = urlLink(block); } else { hyperlink.link = localFile(block)[0]; } } } // Bit 3 Contents 0 = No text mark 1 = Text mark if (option.isOn(3)) { hyperlink.textMark = textMark(block); } return hyperlink; } public static short getId() { return ParserIdentifier.HYPERLINK; } /** * 5.58.2 Hyperlink containing a URL (Uniform Resource Locator) * * @param block the SAT block * @return the url */ private static String urlLink(Block block) { // These data fields occur for links which are not local files or files // in the local network (for instance HTTP and FTP links and e-mail addresses). // The lower 9 bits of the option flags field must be 0.x00x.xx112 (x means optional, depending // on hyperlink content). The GUID could be used to distinguish a URL from a file link. // GUID of URL Moniker: // E0H C9H EAH 79H F9H BAH CEH 11H 8CH 82H 00H AAH 00H 4BH A9H 0BH // (79EAC9E0-BAF9-11CE-8C82-00AA004BA90B) // Check GUID UUID linkGUID = block.nextGUID(); if (!STD_URL_MONIKER.equals(linkGUID)) { LOGGER.warn("The URL Moniker: {} is different from STD_URL_MONIKER: {}", linkGUID, STD_URL_MONIKER); } // Size of character array of the URL, including trailing zero word (us). // There are us/2-1 characters in the following string. int us = block.nextInt(); // Character array of the URL, no Unicode string header, always 16-bit characters, zero-terminated String link = block.nextString(us - 2, StandardCharsets.UTF_16LE); // Skip zero-terminated block.skip(StandardTypeByteSize.CHAR); return link; } /** * 5.58.3 Hyperlink to a Local File * * @param block the SAT block * @return string array. index zero is relative to the location path * index one is absolute path */ private static String[] localFile(Block block) { // These data fields are for links to files on local drives. The path of the file can be complete // with drive letter (absolute) or relative to the location of the workbook. // The lower 9 bits of the option flags field must be 0.x00x.xxx12. // The GUID could be used to distinguish a URL from a file link. // GUID of File Moniker: // 03H 03H 00H 00H 00H 00H 00H 00H C0H 00H 00H 00H 00H 00H 00H 46H // (00000303-0000-0000-C000-000000000046) // Check GUID UUID linkGUID = block.nextGUID(); if (!STD_FILE_MONIKER.equals(linkGUID)) { LOGGER.warn("The local file GUID: {} is different from STD_FILE_MONIKER: {}", linkGUID, STD_FILE_MONIKER); } // Directory up-level count. Each leading “..\” in the file link is deleted and increases this counter. short countUpLevel = block.nextShort(); // Character count of the shortened file path and name, including trailing zero byte (sl) int sl = block.nextInt(); // Character array of the shortened file path and name in 8.3-DOS-format. // This field can be filled with a long file name too. No Unicode string header, // always 8-bit characters, zero-terminated String path = block.utf(sl - 1, Option.ASCII); // Skip zero-terminated block.skip(StandardTypeByteSize.BYTE); String[] result = new String[2]; result[0] = append(path, "..\\", countUpLevel, -1); // Unknown byte sequence: // FFH FFH ADH DEH 00H 00H 00H 00H if (block.nextLong() != 0x00000000DEADFFFFL) { LOGGER.warn("Sequence check failed."); } // 00H 00H 00H 00H 00H 00H 00H 00H if (block.nextLong() != 0x0L) { LOGGER.warn("Sequence check failed."); } // 00H 00H 00H 00H 00H 00H 00H 00H if (block.nextLong() != 0x0L) { LOGGER.warn("Sequence check failed."); } // Size of the following file link field including string length field and additional data field (sz). int sz = block.nextInt(); // If sz is zero, nothing will follow (except a text mark). if (sz > 0) { // (optional) Size of character array of the extended file path and name (xl). // There are xl/2 characters in the following string. int xl = block.nextInt(); // (optional) Unknown byte sequence: 03H 00H // Check sequence short sequence = block.nextShort(); if (sequence != 0x0003) { LOGGER.warn("Sequence check failed."); } // (optional) Character array of the extended file path and name (xl), // no Unicode string header, always 16-bit characters, not zero-terminated String filePath = block.nextString(xl, StandardCharsets.UTF_16LE); result[1] = filePath; } return result; } public static String append(String src, String a, int n, int origin) { StringBuilder buf = new StringBuilder(); // Insert header if (origin == -1) { for (; n-- > 0; ) { buf.append(a); } buf.append(src); } else { buf.append(src); for (; n-- > 0; ) { buf.append(a); } } return buf.toString(); } /** * 5.58.4 Hyperlink to a File with UNC (Universal Naming Convention) Path * * @param block the SAT block * @return unc string */ private static String unc(Block block) { // These data fields are for UNC paths containing a server name (for instance “\\server\path\file.xls”). // The lower 9 bits of the option flags field must be 1.x00x.xx112. // Character count of the UNC, including trailing zero word (fl) int fl = block.nextInt(); // Character array of the UNC, no Unicode string header, always 16-bit characters, zero-terminated. String utf = block.utf((fl >> 1) - 1, Option.UTF16); // Skip zero-terminated block.skip(StandardTypeByteSize.CHAR); return utf; } /** * 5.58.5 Hyperlink to the Current Workbook * * @param block the SAT block * @return the description */ private static String description(Block block) { // The lower 9 bits of the option flags field must be 0.x00x.1x002. // (optional, see option flags) Character count of description text, including trailing zero word (dl) int dl = block.nextInt(); // (optional, see option flags) Character array of description text, no Unicode string header, // always 16-bit characters, zero-terminated String desc = block.utf(dl - 1, Option.UTF16); // Skip zero-terminated block.skip(StandardTypeByteSize.CHAR); return desc; } private static String targetFrame(Block block) { // (optional, see option flags) Character count of target frame, including trailing zero word (fl) int fl = block.nextInt(); // (optional, see option flags) Character array of target frame, no Unicode string header, // always 16-bit characters, zero-terminated String targetFrame = block.utf(fl - 1, Option.UTF16); // Skip zero-terminated block.skip(StandardTypeByteSize.CHAR); return targetFrame; } private static String textMark(Block block) { // (optional, see option flags) Character count of the text mark, including trailing zero word (tl) int tl = block.nextInt(); // (optional, see option flags) Character array of the text mark without “#” sign, no Unicode // string header, always 16-bit characters, zero-terminated String textMark = block.utf(tl - 1, Option.UTF16); // Skip zero-terminated block.skip(StandardTypeByteSize.CHAR); return textMark; } public static class Hyperlink { String description; String targetFrame; String link; String textMark; boolean relative; public String getDescription() { return description; } public String getTargetFrame() { return targetFrame; } public String getLink() { return link; } public String getTextMark() { return textMark; } @Override public String toString() { return "description: " + description + ", targetFrame: " + targetFrame + ", link: " + link + ", textMark: " + textMark + ", relative: " + relative; } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy