All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.sejda.impl.sambox.SetMetadataTask Maven / Gradle / Ivy

There is a newer version: 5.1.7
Show newest version
/*
 * Copyright 2015 by Edi Weissmann ([email protected])
 *
 * This file is part of the Sejda source code
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as
 * published by the Free Software Foundation, either version 3 of the
 * License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program.  If not, see .
 */
package org.sejda.impl.sambox;

import org.sejda.core.support.io.MultipleOutputWriter;
import org.sejda.core.support.io.OutputWriters;
import org.sejda.impl.sambox.component.DefaultPdfSourceOpener;
import org.sejda.impl.sambox.component.PDDocumentHandler;
import org.sejda.model.exception.TaskException;
import org.sejda.model.input.PdfSource;
import org.sejda.model.input.PdfSourceOpener;
import org.sejda.model.parameter.SetMetadataParameters;
import org.sejda.model.task.BaseTask;
import org.sejda.model.task.TaskExecutionContext;
import org.sejda.sambox.pdmodel.PDDocument;
import org.sejda.sambox.pdmodel.PDDocumentCatalog;
import org.sejda.sambox.pdmodel.PDDocumentInformation;
import org.sejda.sambox.pdmodel.common.PDMetadata;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.xml.sax.SAXParseException;

import javax.xml.XMLConstants;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.Source;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.URIResolver;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;
import javax.xml.xpath.XPathFactoryConfigurationException;
import java.io.*;
import java.nio.charset.StandardCharsets;
import java.text.SimpleDateFormat;
import java.util.Calendar;
import java.util.Date;
import java.util.Map.Entry;
import java.util.TimeZone;

import static java.util.Optional.ofNullable;
import static org.sejda.commons.util.IOUtils.closeQuietly;
import static org.sejda.core.notification.dsl.ApplicationEventsNotifier.notifyEvent;
import static org.sejda.core.support.io.model.FileOutput.file;
import static org.sejda.core.support.prefix.NameGenerator.nameGenerator;
import static org.sejda.core.support.prefix.model.NameGenerationRequest.nameRequest;
import static org.sejda.model.util.IOUtils.createTemporaryBuffer;

/**
 * SAMBox implementation of a task setting metadata on an input {@link PdfSource}.
 * 
 * @author Eduard Weissmann
 * 
 */
public class SetMetadataTask extends BaseTask {

    private static final Logger LOG = LoggerFactory.getLogger(SetMetadataTask.class);

    private PDDocumentHandler documentHandler = null;
    private MultipleOutputWriter outputWriter;
    private PdfSourceOpener documentLoader;

    @Override
    public void before(SetMetadataParameters parameters, TaskExecutionContext executionContext) throws TaskException {
        super.before(parameters, executionContext);
        documentLoader = new DefaultPdfSourceOpener(executionContext);
        outputWriter = OutputWriters.newMultipleOutputWriter(parameters.getExistingOutputPolicy(), executionContext);
    }

    @Override
    public void execute(SetMetadataParameters parameters) throws TaskException {
        int totalSteps = parameters.getSourceList().size();

        for (int sourceIndex = 0; sourceIndex < parameters.getSourceList().size(); sourceIndex++) {
            PdfSource source = parameters.getSourceList().get(sourceIndex);
            int fileNumber = executionContext().incrementAndGetOutputDocumentsCounter();
            
            try {
                LOG.debug("Opening {}", source);
                executionContext().notifiableTaskMetadata().setCurrentSource(source);

                documentHandler = source.open(documentLoader);
                documentHandler.setUpdateProducerModifiedDate(parameters.isUpdateProducerModifiedDate());
                if (parameters.isUpdateProducerModifiedDate()) {
                    documentHandler.setCreatorOnPDDocument();
                }

                File tmpFile = createTemporaryBuffer(parameters.getOutput());

                PDDocument doc = documentHandler.getUnderlyingPDDocument();
                doc.setOnBeforeWriteAction(() -> {
                    LOG.debug("Setting metadata on temporary document.");

                    PDDocument doc1 = documentHandler.getUnderlyingPDDocument();
                    PDDocumentCatalog catalog = doc1.getDocumentCatalog();

                    if (parameters.isRemoveAllMetadata()) {
                        doc1.setDocumentInformation(new PDDocumentInformation());
                        catalog.setMetadata(null);

                        return;
                    }

                    PDDocumentInformation actualMeta = doc1.getDocumentInformation();
                    for (Entry meta : parameters.getMetadata().entrySet()) {
                        LOG.trace("'{}' -> '{}'", meta.getKey(), meta.getValue());
                        actualMeta.setCustomMetadataValue(meta.getKey(), meta.getValue());
                    }

                    for (String keyToRemove : parameters.getToRemove()) {
                        LOG.trace("Removing '{}'", keyToRemove);
                        actualMeta.removeMetadataField(keyToRemove);
                    }

                    if (catalog.getMetadata() != null) {
                        LOG.debug("Document has XMP metadata stream");
                        
                        try {
                            updateXmpMetadata(catalog, actualMeta);
                        } catch (RuntimeException ex) {
                            if(exceptionStackContains("Namespace for prefix 'xmp' has not been declared", ex)) {
                                // try to fix the metadata and retry
                                fixMissingXmpNamespace(catalog);
                                updateXmpMetadata(catalog, actualMeta);
                            } else {
                                throw ex;
                            }
                        }
                    }
                });

                documentHandler.setVersionOnPDDocument(parameters.getVersion());
                documentHandler.setCompress(parameters.isCompress());
                documentHandler.savePDDocument(tmpFile, parameters.getOutput().getEncryptionAtRestPolicy());

                String outName = ofNullable(parameters.getSpecificResultFilename(fileNumber)).orElseGet(
                        () -> nameGenerator(parameters.getOutputPrefix()).generate(
                                nameRequest().originalName(source.getName()).fileNumber(fileNumber)));

                outputWriter.addOutput(file(tmpFile).name(outName));

            } finally {
                closeQuietly(documentHandler);
            }

            notifyEvent(executionContext().notifiableTaskMetadata()).stepsCompleted(fileNumber).outOf(totalSteps);
        }

        executionContext().notifiableTaskMetadata().clearCurrentSource();
        parameters.getOutput().accept(outputWriter);
        LOG.debug("Metadata set and written to {}", parameters.getOutput());

    }
    
    private XPathFactory newXPathFactory() {
        try {
            XPathFactory f = XPathFactory.newInstance();
            f.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);
            return f;
        } catch (XPathFactoryConfigurationException e) {
            throw new RuntimeException(e);
        }
    }
    
    private void updateDateNode(String tagName, Document document, Calendar calendar) throws XPathExpressionException {
        SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ");
        dateFormat.setTimeZone(TimeZone.getTimeZone("UTC"));

        String value = "";
        if(calendar != null) {
            value = dateFormat.format(calendar.getTime());
        }
        updateTextNode(tagName, document, value);
    }
    
    private void updateTextNode(String tagName, Document document, String value) throws XPathExpressionException {
        XPath xPath = newXPathFactory().newXPath();
        Node node = (Node) xPath.compile("//*[name()='" + tagName + "']").evaluate(document, XPathConstants.NODE);
        if(value == null) {
            value = "";
        }

        if(node != null) {
            node.setTextContent(value);
        }
    }

    private boolean exceptionStackContains(String msg, Throwable ex) {
        if (ex != null && ex.getMessage() != null && ex.getMessage().contains(msg)){
            return true;
        }

        if (ex != null && ex.getCause() != null) {
            return exceptionStackContains(msg, ex.getCause());
        }

        return false;
    }

    private void deleteAttr(String path, String attrName, Document document) throws XPathExpressionException {
        XPath xPath = newXPathFactory().newXPath();
        Node node = (Node) xPath.compile(path).evaluate(document, XPathConstants.NODE);
        if(node != null && node.getAttributes().getNamedItem(attrName) != null) {
            node.getAttributes().removeNamedItem(attrName);
        }
    }
    
    private void fixMissingXmpNamespace(PDDocumentCatalog catalog) throws IOException {
        try(InputStream is = catalog.getMetadata().createInputStream()) {
            String metadataAsString = new String(is.readAllBytes(), StandardCharsets.UTF_8);
            if (!metadataAsString.contains("xmlns:xmp=\"http://ns.adobe.com/xap/1.0/\"")) {
                LOG.warn("Metadata seems to be missing xmlns:xmp namespace definition, adding it");
                metadataAsString = metadataAsString.replaceAll("




© 2015 - 2025 Weber Informatics LLC | Privacy Policy