org.ttzero.excel.entity.e3.HyperlinkParser Maven / Gradle / Ivy
/*
* Copyright (c) 2019-2020, [email protected] All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.ttzero.excel.entity.e3;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.ttzero.excel.reader.Dimension;
import java.nio.charset.StandardCharsets;
import java.util.UUID;
/**
* 5.58 HYPERLINK
*
* In Excel, every cell may contain a hyperlink.
* The HYPERLINK record refers to one cell address
* or a cell range where all cells contain the same
* hyperlink. It is part of the Hyperlink Table in
* the Sheet Sub-stream (➜4.13).
*
* Every hyperlink can contain a text mark and a
* description that is shown in the sheet instead
* of the real link. Text marks are appended behind
* a link, separated by the hash sign (“#”).
*
* Examples for text marks: www.example.org#table1
* or {@code C:\example.xls#Sheet1!A1}.
*
* Inside of this record strings are stored in several
* formats. Sometimes occurs the character count,
* otherwise the character array size (in 16-bit character
* arrays the character count is half of the array size).
* Furthermore some strings are zero-terminated, others not.
* They are stored either as 16-bit character arrays or as
* 8-bit character arrays, independent of the characters.
*
* @author guanquan.wang at 2019-02-01 10:17
*/
public class HyperlinkParser {
private static final Logger LOGGER = LoggerFactory.getLogger(HyperlinkParser.class);
public static final UUID STD_LINK_GUID = UUID.fromString("79EAC9D0-BAF9-11CE-8C82-00AA004BA90B");
public static final UUID STD_FILE_MONIKER = UUID.fromString("00000303-0000-0000-C000-000000000046");
public static final UUID STD_URL_MONIKER = UUID.fromString("79EAC9E0-BAF9-11CE-8C82-00AA004BA90B");
public static Hyperlink get(Block block) {
// Size of next data
short size = block.nextShort();
// Cell range address of all cells containing this hyperlink (➜2.5.14)
Dimension range = DimensionParser.get(block);
// GUID of StdLink:
// D0H C9H EAH 79H F9H BAH CEH 11H 8CH 82H 00H AAH 00H 4BH A9H 0BH
// (79EAC9D0-BAF9-11CE-8C82-00AA004BA90B)
UUID linkGUID = block.nextGUID();
if (!STD_LINK_GUID.equals(linkGUID)) {
LOGGER.warn("The Hyperlink GUID: {} is different from STD_LINK_GUID: {}", linkGUID, STD_LINK_GUID);
}
// Unknown value: 00000002H
int ignore = block.nextInt();
// Option flags
Option option = Option.of(block.nextInt());
// The option flags specify the following content of the record.
Hyperlink hyperlink = new Hyperlink();
// Bit 2 and 4 Contents 0 = No description 1 (both bits) = Description
if (option.isOn(2) && option.isOn(4)) {
hyperlink.description = description(block);
}
// Bit 7 Contents 0 = No target frame 1 = Target frame
if (option.isOn(7)) {
hyperlink.targetFrame = targetFrame(block);
}
// Relative file path
hyperlink.relative = option.isOff(1);
// Bit 0 Contents 0 = No link extant 1 = File link or URL
if (option.isOn(0)) {
// hyperlink.link0 = fileLinkRecord(block);
// Bit 1 Contents 0 = Relative file path 1 = Absolute path or URL
// if (option.isOff(1)) {
// String[] paths = localFile(block);
// // TODO
// hyperlink.link1 = paths[0];
// }
// Bit 8 Contents 0 = File link or URL 1 = UNC path (incl. server name)
if (option.isOn(8)) {
hyperlink.link = unc(block);
} else {
if (option.isOn(1)) {
hyperlink.link = urlLink(block);
} else {
hyperlink.link = localFile(block)[0];
}
}
}
// Bit 3 Contents 0 = No text mark 1 = Text mark
if (option.isOn(3)) {
hyperlink.textMark = textMark(block);
}
return hyperlink;
}
public static short getId() {
return ParserIdentifier.HYPERLINK;
}
/**
* 5.58.2 Hyperlink containing a URL (Uniform Resource Locator)
*
* @param block the SAT block
* @return the url
*/
private static String urlLink(Block block) {
// These data fields occur for links which are not local files or files
// in the local network (for instance HTTP and FTP links and e-mail addresses).
// The lower 9 bits of the option flags field must be 0.x00x.xx112 (x means optional, depending
// on hyperlink content). The GUID could be used to distinguish a URL from a file link.
// GUID of URL Moniker:
// E0H C9H EAH 79H F9H BAH CEH 11H 8CH 82H 00H AAH 00H 4BH A9H 0BH
// (79EAC9E0-BAF9-11CE-8C82-00AA004BA90B)
// Check GUID
UUID linkGUID = block.nextGUID();
if (!STD_URL_MONIKER.equals(linkGUID)) {
LOGGER.warn("The URL Moniker: {} is different from STD_URL_MONIKER: {}", linkGUID, STD_URL_MONIKER);
}
// Size of character array of the URL, including trailing zero word (us).
// There are us/2-1 characters in the following string.
int us = block.nextInt();
// Character array of the URL, no Unicode string header, always 16-bit characters, zero-terminated
String link = block.nextString(us - 2, StandardCharsets.UTF_16LE);
// Skip zero-terminated
block.skip(StandardTypeByteSize.CHAR);
return link;
}
/**
* 5.58.3 Hyperlink to a Local File
*
* @param block the SAT block
* @return string array. index zero is relative to the location path
* index one is absolute path
*/
private static String[] localFile(Block block) {
// These data fields are for links to files on local drives. The path of the file can be complete
// with drive letter (absolute) or relative to the location of the workbook.
// The lower 9 bits of the option flags field must be 0.x00x.xxx12.
// The GUID could be used to distinguish a URL from a file link.
// GUID of File Moniker:
// 03H 03H 00H 00H 00H 00H 00H 00H C0H 00H 00H 00H 00H 00H 00H 46H
// (00000303-0000-0000-C000-000000000046)
// Check GUID
UUID linkGUID = block.nextGUID();
if (!STD_FILE_MONIKER.equals(linkGUID)) {
LOGGER.warn("The local file GUID: {} is different from STD_FILE_MONIKER: {}", linkGUID, STD_FILE_MONIKER);
}
// Directory up-level count. Each leading “..\” in the file link is deleted and increases this counter.
short countUpLevel = block.nextShort();
// Character count of the shortened file path and name, including trailing zero byte (sl)
int sl = block.nextInt();
// Character array of the shortened file path and name in 8.3-DOS-format.
// This field can be filled with a long file name too. No Unicode string header,
// always 8-bit characters, zero-terminated
String path = block.utf(sl - 1, Option.ASCII);
// Skip zero-terminated
block.skip(StandardTypeByteSize.BYTE);
String[] result = new String[2];
result[0] = append(path, "..\\", countUpLevel, -1);
// Unknown byte sequence:
// FFH FFH ADH DEH 00H 00H 00H 00H
if (block.nextLong() != 0x00000000DEADFFFFL) {
LOGGER.warn("Sequence check failed.");
}
// 00H 00H 00H 00H 00H 00H 00H 00H
if (block.nextLong() != 0x0L) {
LOGGER.warn("Sequence check failed.");
}
// 00H 00H 00H 00H 00H 00H 00H 00H
if (block.nextLong() != 0x0L) {
LOGGER.warn("Sequence check failed.");
}
// Size of the following file link field including string length field and additional data field (sz).
int sz = block.nextInt();
// If sz is zero, nothing will follow (except a text mark).
if (sz > 0) {
// (optional) Size of character array of the extended file path and name (xl).
// There are xl/2 characters in the following string.
int xl = block.nextInt();
// (optional) Unknown byte sequence: 03H 00H
// Check sequence
short sequence = block.nextShort();
if (sequence != 0x0003) {
LOGGER.warn("Sequence check failed.");
}
// (optional) Character array of the extended file path and name (xl),
// no Unicode string header, always 16-bit characters, not zero-terminated
String filePath = block.nextString(xl, StandardCharsets.UTF_16LE);
result[1] = filePath;
}
return result;
}
public static String append(String src, String a, int n, int origin) {
StringBuilder buf = new StringBuilder();
// Insert header
if (origin == -1) {
for (; n-- > 0; ) {
buf.append(a);
}
buf.append(src);
} else {
buf.append(src);
for (; n-- > 0; ) {
buf.append(a);
}
}
return buf.toString();
}
/**
* 5.58.4 Hyperlink to a File with UNC (Universal Naming Convention) Path
*
* @param block the SAT block
* @return unc string
*/
private static String unc(Block block) {
// These data fields are for UNC paths containing a server name (for instance “\\server\path\file.xls”).
// The lower 9 bits of the option flags field must be 1.x00x.xx112.
// Character count of the UNC, including trailing zero word (fl)
int fl = block.nextInt();
// Character array of the UNC, no Unicode string header, always 16-bit characters, zero-terminated.
String utf = block.utf((fl >> 1) - 1, Option.UTF16);
// Skip zero-terminated
block.skip(StandardTypeByteSize.CHAR);
return utf;
}
/**
* 5.58.5 Hyperlink to the Current Workbook
*
* @param block the SAT block
* @return the description
*/
private static String description(Block block) {
// The lower 9 bits of the option flags field must be 0.x00x.1x002.
// (optional, see option flags) Character count of description text, including trailing zero word (dl)
int dl = block.nextInt();
// (optional, see option flags) Character array of description text, no Unicode string header,
// always 16-bit characters, zero-terminated
String desc = block.utf(dl - 1, Option.UTF16);
// Skip zero-terminated
block.skip(StandardTypeByteSize.CHAR);
return desc;
}
private static String targetFrame(Block block) {
// (optional, see option flags) Character count of target frame, including trailing zero word (fl)
int fl = block.nextInt();
// (optional, see option flags) Character array of target frame, no Unicode string header,
// always 16-bit characters, zero-terminated
String targetFrame = block.utf(fl - 1, Option.UTF16);
// Skip zero-terminated
block.skip(StandardTypeByteSize.CHAR);
return targetFrame;
}
private static String textMark(Block block) {
// (optional, see option flags) Character count of the text mark, including trailing zero word (tl)
int tl = block.nextInt();
// (optional, see option flags) Character array of the text mark without “#” sign, no Unicode
// string header, always 16-bit characters, zero-terminated
String textMark = block.utf(tl - 1, Option.UTF16);
// Skip zero-terminated
block.skip(StandardTypeByteSize.CHAR);
return textMark;
}
public static class Hyperlink {
String description;
String targetFrame;
String link;
String textMark;
boolean relative;
public String getDescription() {
return description;
}
public String getTargetFrame() {
return targetFrame;
}
public String getLink() {
return link;
}
public String getTextMark() {
return textMark;
}
@Override
public String toString() {
return "description: " + description +
", targetFrame: " + targetFrame +
", link: " + link +
", textMark: " + textMark +
", relative: " + relative;
}
}
}