org.sejda.impl.sambox.component.SamboxOutlineLevelsHandler Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of sejda-sambox Show documentation
Show all versions of sejda-sambox Show documentation
Package containing tasks implemented using sambox.
/*
* This file is part of the Sejda source code
* Created on 09/mar/2015
* Copyright 2013-2014 by Andrea Vacondio ([email protected]).
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see .
*/
package org.sejda.impl.sambox.component;
import static java.util.Objects.requireNonNull;
import static org.apache.commons.lang3.StringUtils.isNotBlank;
import java.util.List;
import java.util.regex.Pattern;
import org.sejda.model.outline.OutlineExtractPageDestinations;
import org.sejda.model.outline.OutlinePageDestinations;
import org.sejda.sambox.pdmodel.PDDocument;
/**
* SAMBox implementation of an {@link org.sejda.model.outline.OutlineLevelsHandler}
*
* @author Andrea Vacondio
*
*/
public class SamboxOutlineLevelsHandler implements org.sejda.model.outline.OutlineLevelsHandler {
private Pattern titleMatchingPattern = Pattern.compile(".+", Pattern.DOTALL);
private PDDocument document;
public SamboxOutlineLevelsHandler(PDDocument document, String matchingTitleRegEx) {
requireNonNull(document, "Unable to retrieve bookmarks from a null document.");
this.document = document;
if (isNotBlank(matchingTitleRegEx)) {
this.titleMatchingPattern = Pattern.compile(matchingTitleRegEx, Pattern.DOTALL);
}
}
@Override
public OutlinePageDestinations getPageDestinationsForLevel(int level) {
OutlinePageDestinations destinations = new OutlinePageDestinations();
OutlineUtils.getFlatOutline(document).stream().filter(i -> i.level == level).filter(i -> isNotBlank(i.title))
.filter(i -> titleMatchingPattern.matcher(i.title).matches())
.forEach(i -> destinations.addPage(i.page, i.title));
return destinations;
}
@Override
public OutlineExtractPageDestinations getExtractPageDestinations(int level, boolean includePageAfter) {
OutlineExtractPageDestinations destinations = new OutlineExtractPageDestinations();
List flatOutline = OutlineUtils.getFlatOutline(document);
for (int i = 0; i < flatOutline.size(); i++) {
OutlineItem item = flatOutline.get(i);
if (item.level == level) {
int startPage = item.page;
String title = item.title;
if (isNotBlank(title)) {
if (titleMatchingPattern.matcher(title).matches()) {
int endPage = document.getNumberOfPages();
for (int j = i + 1; j < flatOutline.size(); j++) {
OutlineItem after = flatOutline.get(j);
if (after.level <= item.level) {
// Looking at bookmark's xyzDestination flag is technically more accurate, but in practice outlines contain non xyzDestinations for sections that
// start half-page
// resulting in the last half page missing from the extract.
// Let's see. Maybe better to error on the safe side and include one extra page than have parts missing?
// The downside with adding one extra page is that batched payslips or any other doc that needs precise splitting will be worse
// with the extra page from the next item in there
// For now choosing the precise split and we'll see if we need to change our minds
// If the bookmark has a xyz destination but the output document would actually be single page, we should not include page after
// Eg: a payslip document where each page has a bookmark (xyz destination) that points to the page, bookmark text is employee name
endPage = includePageAfter ? after.page : after.page - 1;
// no empty documents
if(endPage < startPage) {
endPage = startPage;
}
// endPage = after.page;
break;
}
}
destinations.add(startPage, title, endPage);
}
}
}
}
return destinations;
}
}