org.sejda.impl.sambox.component.optimization.OptimizationRuler Maven / Gradle / Ivy
/*
* Created on 03 feb 2016
* Copyright 2015 by Andrea Vacondio ([email protected]).
* This file is part of Sejda.
*
* Sejda is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Sejda is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with Sejda. If not, see .
*/
package org.sejda.impl.sambox.component.optimization;
import static org.sejda.commons.util.RequireUtils.requireNotNullArg;
import java.util.List;
import java.util.Objects;
import java.util.function.Function;
import java.util.stream.Collectors;
import org.sejda.model.optimization.OptimizationPolicy;
import org.sejda.sambox.cos.COSBase;
import org.sejda.sambox.cos.COSDictionary;
import org.sejda.sambox.cos.COSName;
import org.sejda.sambox.pdmodel.PDDocument;
import org.sejda.sambox.pdmodel.PDPage;
import org.sejda.sambox.pdmodel.PDPageTree;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Component in charge to decide if a document will likely generate split/extract results needing optimization
*
* @author Andrea Vacondio
*
*/
public class OptimizationRuler implements Function {
private static final Logger LOG = LoggerFactory.getLogger(OptimizationRuler.class);
private OptimizationPolicy policy;
public OptimizationRuler(OptimizationPolicy policy) {
requireNotNullArg(policy, "Optimization policy cannot be null");
this.policy = policy;
}
@Override
public Boolean apply(PDDocument document) {
if (policy == OptimizationPolicy.YES) {
return true;
}
if (policy == OptimizationPolicy.AUTO) {
return willNeedOptimization(document);
}
return false;
}
private boolean willNeedOptimization(PDDocument document) {
return hasSharedXObjectDictionaries(document) || hasSharedFontDictionaries(document)
|| hasInheritedResources(document);
}
/**
* @param document
* @return true if the document page tree has non leaf nodes with fonts or images resources, inherited by page leaves.
*/
private boolean hasInheritedResources(PDDocument document) {
// we take all the resource dictionaries in non-leaf nodes (i.e. inherited by pages) and count fonts and the xobjects of subtype Image, so basically we try to determine if
// pages are going to inherit images or fonts, potentially unused in which case we want to optimize
List resources = document.getPages().streamNodes().filter(PDPageTree::isPageTreeNode)
.map(d -> d.getDictionaryObject(COSName.RESOURCES, COSDictionary.class)).filter(Objects::nonNull)
.distinct().collect(Collectors.toList());
long inheritedImage = resources.stream().map(d -> d.getDictionaryObject(COSName.XOBJECT, COSDictionary.class))
.filter(Objects::nonNull).flatMap(d -> d.getValues().stream()).map(COSBase::getCOSObject)
.filter(d -> d instanceof COSDictionary).map(d -> (COSDictionary) d)
.map(d -> d.getNameAsString(COSName.SUBTYPE)).filter(Objects::nonNull)
.filter(COSName.IMAGE.getName()::equals).count();
long inheritedFonts = resources.stream().map(d -> d.getDictionaryObject(COSName.FONT, COSDictionary.class))
.filter(Objects::nonNull).flatMap(d -> d.getValues().stream()).map(COSBase::getCOSObject)
.filter(d -> d instanceof COSDictionary).count();
LOG.debug("Found {} inherited images and {} inherited fonts potentially unused", inheritedImage,
inheritedFonts);
return (inheritedImage + inheritedFonts) > 0;
}
private boolean hasSharedXObjectDictionaries(PDDocument document) {
// we get from all the pages resource dictionaries, all the xobject name dictionaries containing images
List xobjectsDictionaries = document.getPages().stream().map(PDPage::getCOSObject)
.filter(Objects::nonNull).map(d -> d.getDictionaryObject(COSName.RESOURCES, COSDictionary.class))
.filter(Objects::nonNull).map(d -> d.getDictionaryObject(COSName.XOBJECT, COSDictionary.class))
.filter(Objects::nonNull).filter(x -> x.size() > 0).collect(Collectors.toList());
long distinctXobjectsDictionaries = xobjectsDictionaries.stream().distinct().count();
if (xobjectsDictionaries.size() > distinctXobjectsDictionaries) {
// if the distinct count is different it means one or more xobject name dictionary is shared among some pages so it likely contains images used by multiple pages so we
// optimize
LOG.debug("Found shared XObject dictionary containing image resouces");
return true;
}
return false;
}
private boolean hasSharedFontDictionaries(PDDocument document) {
// we get from all the pages resource dictionaries, all the font name dictionaries
List fontDictionaries = document.getPages().stream().map(PDPage::getCOSObject)
.filter(Objects::nonNull).map(d -> d.getDictionaryObject(COSName.RESOURCES, COSDictionary.class))
.filter(Objects::nonNull).map(d -> d.getDictionaryObject(COSName.FONT, COSDictionary.class))
.filter(Objects::nonNull).filter(x -> {
return x.getValues().stream().map(COSBase::getCOSObject).filter(v -> v instanceof COSDictionary)
.count() > 0;
}).collect(Collectors.toList());
long distinctFontDictionaries = fontDictionaries.stream().distinct().count();
if (fontDictionaries.size() > distinctFontDictionaries) {
// if the distinct count is different it means one or more font name dictionaries is shared among pages so it likely contains fonts used by multiple pages so we
// optimize
LOG.debug("Found shared font dictionaries");
return true;
}
return false;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy