org.sejda.impl.sambox.component.OutlineUtils Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of sejda-sambox Show documentation
Show all versions of sejda-sambox Show documentation
Package containing tasks implemented using sambox.
/*
* This file is part of the Sejda source code
* Copyright 2015 by Andrea Vacondio ([email protected]).
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see .
*/
package org.sejda.impl.sambox.component;
import static java.util.Objects.isNull;
import static java.util.Objects.nonNull;
import static java.util.Optional.ofNullable;
import static org.apache.commons.lang3.StringUtils.defaultString;
import static org.sejda.commons.util.RequireUtils.requireNotNullArg;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.Spliterator;
import java.util.Spliterators;
import java.util.stream.Collectors;
import java.util.stream.StreamSupport;
import org.sejda.model.rotation.Rotation;
import org.sejda.sambox.cos.COSArray;
import org.sejda.sambox.cos.COSName;
import org.sejda.sambox.pdmodel.PDDocument;
import org.sejda.sambox.pdmodel.PDDocumentCatalog;
import org.sejda.sambox.pdmodel.PDPage;
import org.sejda.sambox.pdmodel.PageNotFoundException;
import org.sejda.sambox.pdmodel.common.PDRectangle;
import org.sejda.sambox.pdmodel.interactive.documentnavigation.destination.PDDestination;
import org.sejda.sambox.pdmodel.interactive.documentnavigation.destination.PDNamedDestination;
import org.sejda.sambox.pdmodel.interactive.documentnavigation.destination.PDPageDestination;
import org.sejda.sambox.pdmodel.interactive.documentnavigation.destination.PDPageXYZDestination;
import org.sejda.sambox.pdmodel.interactive.documentnavigation.outline.PDDocumentOutline;
import org.sejda.sambox.pdmodel.interactive.documentnavigation.outline.PDOutlineItem;
import org.sejda.sambox.pdmodel.interactive.documentnavigation.outline.PDOutlineTreeIterator;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Utility methods related to outline handling in SAMBox
*
* @author Andrea Vacondio
*
*/
public final class OutlineUtils {
private static final Logger LOG = LoggerFactory.getLogger(OutlineUtils.class);
private OutlineUtils() {
// utility
}
/**
* @param document
* @return a set containing the the outline levels having at least one page destination
*/
public static Set getOutlineLevelsWithPageDestination(PDDocument document) {
return getFlatOutline(document).stream().map(i -> i.level).collect(Collectors.toSet());
}
/**
* @param current
* the outline item
* @param catalog
* the catalog to look for in case of {@link PDNamedDestination}
* @return the {@link PDPageDestination} for the given {@link PDOutlineItem} or an empty {@link Optional} if the destination is not a page. In case the outline item has a named
* destination, it is resolved against the given names tree.
*/
public static Optional toPageDestination(PDOutlineItem current, PDDocumentCatalog catalog) {
try {
return current.resolveToPageDestination(catalog);
} catch (IOException e) {
LOG.warn("Unable to get outline item destination ", e);
}
return Optional.empty();
}
/**
* @param page
* @return a page destination pointing to the top left corner and keeping rotation into account
*/
public static PDPageXYZDestination pageDestinationFor(PDPage page) {
requireNotNullArg(page, "Cannot create a destination to a null page");
PDPageXYZDestination pageDest = new PDPageXYZDestination();
pageDest.setPage(page);
PDRectangle cropBox = page.getCropBox();
switch (Rotation.getRotation(page.getRotation())) {
case DEGREES_180:
pageDest.setLeft((int) cropBox.getUpperRightX());
pageDest.setTop((int) cropBox.getLowerLeftY());
break;
case DEGREES_270:
pageDest.setLeft((int) cropBox.getUpperRightX());
pageDest.setTop((int) cropBox.getUpperRightY());
break;
case DEGREES_90:
pageDest.setLeft((int) cropBox.getLowerLeftX());
pageDest.setTop((int) cropBox.getLowerLeftY());
break;
default:
pageDest.setLeft((int) cropBox.getLowerLeftX());
pageDest.setTop((int) cropBox.getUpperRightY());
break;
}
return pageDest;
}
/**
* Creates a clone of the given page destination pointing to the given new page. If an error occur it falls back to a {@link PDPageXYZDestination}.
*
* @param dest
* @param destPage
* the new pointed page
* @return the cloned page destination
*/
public static PDPageDestination clonePageDestination(PDPageDestination dest, PDPage destPage) {
requireNotNullArg(dest, "Cannot clone a null destination");
try {
PDDestination clonedDestination = PDDestination.create(dest.getCOSObject().duplicate());
if (clonedDestination instanceof PDPageDestination) {
((PDPageDestination) clonedDestination).setPage(destPage);
return (PDPageDestination) clonedDestination;
}
} catch (IOException e) {
LOG.warn("Unable to clone page destination", e);
}
// this should never happen
PDPageXYZDestination ret = new PDPageXYZDestination();
ret.setPage(destPage);
return ret;
}
/**
* Tries to resolve the page pointed by a page destination. It's usually just a {@link PDPageDestination#getPage()} call but some tools out there wrongly put the page number
* instead of the page ref. With this method we try to handle that.
*
* @see wkhtmltopdf issue #3275
*
* @param destination
* @param document
* @return The page pointed by the destination or null
*/
public static PDPage resolvePageDestination(PDPageDestination destination, PDDocument document) {
PDPage page = destination.getPage();
if (isNull(page) && destination.getPageNumber() >= 0) {
try {
LOG.debug("Found page number in page destination, expected a page reference");
return document.getPage(destination.getPageNumber());
} catch (PageNotFoundException e) {
LOG.warn(
"Unable to resolve page destination pointing to page {} (a page reference was expected, a number was found)",
destination.getPageNumber());
}
}
return page;
}
/**
* Copies the dictionary from the given {@link PDOutlineItem} to the destination one
*
* @param from
* @param to
*/
public static void copyOutlineDictionary(PDOutlineItem from, PDOutlineItem to) {
to.setTitle(defaultString(from.getTitle()));
if (nonNull(from.getCOSObject().getDictionaryObject(COSName.C, COSArray.class))) {
to.setTextColor(from.getTextColor());
}
to.setBold(from.isBold());
to.setItalic(from.isItalic());
if (from.isNodeOpen()) {
to.openNode();
} else {
to.closeNode();
}
}
/**
* @param document
* @return a multi map representing a page grouped view of all the outline page destinations.
*/
public static Map> pageGroupedOutlinePageDestinations(PDDocument document) {
Map> destinations = new HashMap<>();
PDDocumentCatalog catalog = document.getDocumentCatalog();
ofNullable(catalog.getDocumentOutline()).ifPresent(outline -> {
StreamSupport.stream(Spliterators.spliteratorUnknownSize(new PDOutlineTreeIterator(outline),
Spliterator.ORDERED | Spliterator.NONNULL), false).forEach(i -> {
toPageDestination(i, document.getDocumentCatalog()).ifPresent(d -> {
PDPage page = resolvePageDestination(d, document);
if (nonNull(page)) {
destinations.computeIfAbsent(page, p -> new HashSet<>()).add(d);
}
});
});
});
return destinations;
}
/**
* @param document
* @return A sorted flat representation of the document outline
*/
public static List getFlatOutline(PDDocument document) {
return ofNullable(document.getDocumentCatalog().getDocumentOutline()).map(PDDocumentOutline::children)
.map(c -> recurseFlatOutline(document, c, 1)).orElseGet(ArrayList::new).stream()
.sorted(Comparator.comparingInt(i -> i.page)).filter(i -> i.page > 0).collect(Collectors.toList());
}
private static List recurseFlatOutline(PDDocument document, Iterable items, int level) {
List result = new ArrayList<>();
for (PDOutlineItem item : items) {
toPageDestination(item, document.getDocumentCatalog()).ifPresent(d -> {
int pageNumber = ofNullable(d.getPage())
.map(p -> document.getPages().indexOf(p) + 1 /* 0-based index */ )
.orElseGet(() -> d.getPageNumber() + 1);
boolean specificLocationInPage = false;
if (d instanceof PDPageXYZDestination xyzPageDest) {
// it's a specific page destination but not the top of the page
if (xyzPageDest.getPage() != null) {
specificLocationInPage =
xyzPageDest.getTop() != (int) xyzPageDest.getPage().getCropBox().getHeight();
}
}
result.add(new OutlineItem(item.getTitle(), pageNumber, level, specificLocationInPage));
});
result.addAll(recurseFlatOutline(document, item.children(), level + 1));
}
return result;
}
public static void printOutline(PDDocument document) {
ofNullable(document.getDocumentCatalog().getDocumentOutline()).ifPresent(outline -> {
int childCounter = 0;
for (PDOutlineItem child : outline.children()) {
childCounter++;
printNode(child, 0, childCounter);
}
});
}
private static void printNode(PDOutlineItem node, int level, int childCounter) {
StringBuilder sb = new StringBuilder();
sb.append("-".repeat(Math.max(0, level)));
sb.append(" ").append(node.getTitle()).append(" (").append(childCounter).append(")");
System.out.println(sb);
if (node.hasChildren()) {
int childrenCount = 0;
for (PDOutlineItem current : node.children()) {
childrenCount++;
printNode(current, level + 1, childrenCount);
}
}
}
}