org.verapdf.metadata.fixer.MetadataFixerImpl Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of pdfbox-metadata-fixer-jakarta Show documentation
Show all versions of pdfbox-metadata-fixer-jakarta Show documentation
Performs basic metadata repair for a PDF document.
The newest version!
/**
* This file is part of veraPDF PDF Box Metadata Fixer, a module of the veraPDF project.
* Copyright (c) 2015, veraPDF Consortium
* All rights reserved.
*
* veraPDF PDF Box Metadata Fixer is free software: you can redistribute it and/or modify
* it under the terms of either:
*
* The GNU General public license GPLv3+.
* You should have received a copy of the GNU General Public License
* along with veraPDF PDF Box Metadata Fixer as the LICENSE.GPL file in the root of the source
* tree. If not, see http://www.gnu.org/licenses/ or
* https://www.gnu.org/licenses/gpl-3.0.en.html.
*
* The Mozilla Public License MPLv2+.
* You should have received a copy of the Mozilla Public License along with
* veraPDF PDF Box Metadata Fixer as the LICENSE.MPL file in the root of the source tree.
* If a copy of the MPL was not distributed with this file, you can obtain one at
* http://mozilla.org/MPL/2.0/.
*/
package org.verapdf.metadata.fixer;
import java.util.logging.Logger;
import org.verapdf.component.ComponentDetails;
import org.verapdf.component.Components;
import org.verapdf.metadata.fixer.entity.InfoDictionary;
import org.verapdf.metadata.fixer.entity.Metadata;
import org.verapdf.metadata.fixer.entity.PDFDocument;
import org.verapdf.metadata.fixer.schemas.AdobePDF;
import org.verapdf.metadata.fixer.schemas.BasicSchema;
import org.verapdf.metadata.fixer.schemas.DublinCore;
import org.verapdf.metadata.fixer.schemas.XMPBasic;
import org.verapdf.metadata.fixer.utils.DateConverter;
import org.verapdf.metadata.fixer.utils.ProcessedObjectsInspector;
import org.verapdf.metadata.fixer.utils.ValidationStatus;
import org.verapdf.metadata.fixer.utils.parser.ProcessedObjectsParser;
import org.verapdf.pdfa.MetadataFixer;
import org.verapdf.pdfa.flavours.PDFAFlavour;
import org.verapdf.pdfa.results.MetadataFixerResult;
import org.verapdf.pdfa.results.MetadataFixerResultImpl;
import org.verapdf.pdfa.results.ValidationResult;
import org.verapdf.pdfa.validation.profiles.ProfileDirectory;
import org.verapdf.pdfa.validation.profiles.Profiles;
import org.verapdf.pdfa.validation.profiles.ValidationProfile;
import org.xml.sax.SAXException;
import javax.xml.parsers.ParserConfigurationException;
import java.io.IOException;
import java.io.OutputStream;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.*;
import static org.verapdf.metadata.fixer.utils.MetadataFixerConstants.*;
/**
* @author Evgeniy Muravitskiy
*/
abstract class MetadataFixerImpl implements MetadataFixer {
private static final URI componentId = URI.create("http://pdfa.verapdf.org/metadata/fixer#default");
private static final String componentName = "veraPDF PDF Box Metadata Fixer";
private static final ComponentDetails componentDetails = Components.libraryDetails(componentId, componentName);
private static final ProfileDirectory PROFILES = Profiles.getVeraProfileDirectory();
private static final Logger LOGGER = Logger.getLogger(MetadataFixerImpl.class.getCanonicalName());
private static final Map attributes = Collections.unmodifiableMap(mkAttsMap());
protected MetadataFixerImpl() {
// enabled only for nested classes
}
/**
* Fix metadata and info dictionary for
* {@link org.verapdf.metadata.fixer.entity.PDFDocument} and save fixed file
* a certain path. If fixer no changes apply then no save will be produced.
*
* @param output
* stream to result file
* @return report of made corrections
*/
public static MetadataFixerResult fixMetadata(OutputStream output, PDFDocument document, ValidationResult result,
boolean fixIdentification, ProcessedObjectsParser parser) {
return result != null && result.isCompliant() ? new MetadataFixerResultImpl.Builder().build()
: fixAndSaveDocument(output, document, result, fixIdentification, parser);
}
@Override
public ComponentDetails getDetails() {
return componentDetails;
}
private static MetadataFixerResult fixAndSaveDocument(OutputStream output, PDFDocument document, ValidationResult result,
boolean fixIdentification, ProcessedObjectsParser parser) {
try {
Metadata metadata = document.getMetadata();
if (metadata != null) {
MetadataFixerResultImpl.Builder resultBuilder = new MetadataFixerResultImpl.Builder();
ValidationStatus status = getValidationStatus(result, parser);
switch (status) {
case INVALID_METADATA:
executeInvalidMetadataCase(document, metadata, resultBuilder, result.getPDFAFlavour(), fixIdentification);
break;
case INVALID_DOCUMENT:
case INVALID_STRUCTURE: {
resultBuilder.status(MetadataFixerResult.RepairStatus.WONT_FIX);
if (fixIdentification) {
metadata.removePDFIdentificationSchema(resultBuilder,
result.getPDFAFlavour());
}
break;
}
default:
break;
}
updateModificationDate(document, resultBuilder);
MetadataFixerResult partialResult = document.saveDocumentIncremental(resultBuilder.getStatus(), output,
result.getPDFAFlavour());
resultBuilder.status(partialResult.getRepairStatus());
for (String fix : partialResult.getAppliedFixes()) {
resultBuilder.addFix(fix);
}
return resultBuilder.build();
}
return getErrorResult("Problems with metadata obtain. No possibility to fix metadata.");
} catch (Throwable e) {
LOGGER.log(java.util.logging.Level.INFO, e.getMessage());
return getErrorResult("Error while fixing metadata: " + e.getMessage());
}
}
private static MetadataFixerResult getErrorResult(String message) {
MetadataFixerResultImpl.Builder resultBuilder = new MetadataFixerResultImpl.Builder();
resultBuilder.status(MetadataFixerResultImpl.RepairStatus.FIX_ERROR).addFix(message);
return resultBuilder.build();
}
private static ValidationStatus getValidationStatus(ValidationResult result, ProcessedObjectsParser parser) {
ValidationProfile profile = PROFILES.getValidationProfileByFlavour(result.getPDFAFlavour());
if (profile != null) {
try {
return ProcessedObjectsInspector.validationStatus(result.getTestAssertions(), profile, parser);
} catch (IOException | URISyntaxException | ParserConfigurationException | SAXException e) {
LOGGER.log(java.util.logging.Level.INFO, "Problem with validation status obtain. Validation status set as Invalid Document. " + e.getMessage());
return ValidationStatus.INVALID_DOCUMENT;
}
}
LOGGER.log(java.util.logging.Level.INFO, "Problem with validation status obtain. Validation status set as Invalid Metadata.");
return ValidationStatus.INVALID_METADATA;
}
private static void executeInvalidMetadataCase(PDFDocument document, Metadata metadata,
MetadataFixerResultImpl.Builder resultBuilder, PDFAFlavour flavour, boolean fixIdentification) {
if (flavour.getPart() == PDFAFlavour.Specification.ISO_19005_1) {
int removedFilters = document.removeFiltersForAllMetadataObjects();
if (removedFilters > 0) {
resultBuilder.addFix("Metadata streams unfiltered");
} else if (removedFilters < 0) {
throw new IllegalStateException("Problem while removing filters from metadata streams");
}
}
fixMetadata(resultBuilder, document, flavour);
if (fixIdentification) {
metadata.addPDFIdentificationSchema(resultBuilder, flavour);
}
if (metadata.isNeedToBeUpdated()) {
metadata.checkMetadataStream(resultBuilder, flavour);
}
}
private static void fixMetadata(MetadataFixerResultImpl.Builder resultBuilder, PDFDocument document,
PDFAFlavour flavour) {
if (flavour.getPart() == PDFAFlavour.Specification.ISO_19005_1) {
fixDublinCoreSchema(resultBuilder, document);
fixAdobePDFSchema(resultBuilder, document);
fixBasicXMLSchema(resultBuilder, document);
}
}
private static void fixDublinCoreSchema(MetadataFixerResultImpl.Builder resultBuilder, PDFDocument document) {
Metadata metadata = document.getMetadata();
InfoDictionary info = document.getInfoDictionary();
DublinCore schema = metadata.getDublinCoreSchema(info);
if (schema != null && info != null) {
fixProperty(resultBuilder, schema, info, schema.getTitle(), info.getTitle(), METADATA_TITLE);
fixProperty(resultBuilder, schema, info, schema.getSubject(), info.getSubject(), METADATA_SUBJECT);
fixProperty(resultBuilder, schema, info, schema.getAuthor(), info.getAuthor(), METADATA_AUTHOR);
}
}
private static void fixAdobePDFSchema(MetadataFixerResultImpl.Builder resultBuilder, PDFDocument document) {
Metadata metadata = document.getMetadata();
InfoDictionary info = document.getInfoDictionary();
AdobePDF schema = metadata.getAdobePDFSchema(info);
if (schema != null && info != null) {
fixProperty(resultBuilder, schema, info, schema.getProducer(), info.getProducer(), PRODUCER);
fixProperty(resultBuilder, schema, info, schema.getKeywords(), info.getKeywords(), KEYWORDS);
}
}
private static void fixBasicXMLSchema(MetadataFixerResultImpl.Builder resultBuilder, PDFDocument document) {
Metadata metadata = document.getMetadata();
InfoDictionary info = document.getInfoDictionary();
XMPBasic schema = metadata.getXMPBasicSchema(info);
if (schema != null && info != null) {
fixProperty(resultBuilder, schema, info, schema.getCreator(), info.getCreator(), METADATA_CREATOR);
fixCalendarProperty(resultBuilder, schema, info, schema.getCreationDate(), info.getCreationDate(),
METADATA_CREATION_DATE);
fixCalendarProperty(resultBuilder, schema, info, schema.getModificationDate(), info.getModificationDate(),
METADATA_MODIFICATION_DATE);
}
}
private static void fixProperty(MetadataFixerResultImpl.Builder resultBuilder, BasicSchema schema,
InfoDictionary info, String metaValue, String infoValue, String attribute) {
if (infoValue != null) {
String key = attributes.get(attribute);
if (metaValue == null) {
doSaveAction(schema, attribute, infoValue);
resultBuilder.addFix("Added '" + key + "' to metadata from info dictionary");
} else if (!metaValue.equals(infoValue)) {
doSaveAction(info, attribute, metaValue);
resultBuilder.addFix("Added '" + attribute + "' to info dictionary from metadata");
}
}
}
private static void fixCalendarProperty(MetadataFixerResultImpl.Builder resultBuilder, BasicSchema schema,
InfoDictionary info, String metaValue, String infoValue, String attribute) {
if (infoValue != null) {
String key = attributes.get(attribute);
String utcInfoValue = DateConverter.toUTCString(infoValue);
if (metaValue == null) {
doSaveAction(schema, attribute, infoValue);
resultBuilder.addFix("Added '" + key + "' to metadata from info dictionary");
} else if (!metaValue.equals(utcInfoValue) || !infoValue.matches(PDF_DATE_FORMAT_REGEX)) {
doSaveAction(info, attribute, metaValue);
resultBuilder.addFix("Added '" + attribute + "' to info dictionary from metadata");
}
}
}
private static void doSaveAction(BasicSchema schema, String attribute, String value) {
switch (attribute) {
case METADATA_TITLE:
((DublinCore) schema).setTitle(value);
break;
case METADATA_SUBJECT:
((DublinCore) schema).setSubject(value);
break;
case METADATA_AUTHOR:
((DublinCore) schema).setAuthor(value);
break;
case PRODUCER:
((AdobePDF) schema).setProducer(value);
break;
case KEYWORDS:
((AdobePDF) schema).setKeywords(value);
break;
case METADATA_CREATOR:
((XMPBasic) schema).setCreator(value);
break;
case METADATA_CREATION_DATE:
((XMPBasic) schema).setCreationDate(value);
break;
case METADATA_MODIFICATION_DATE:
((XMPBasic) schema).setModificationDate(value);
break;
default:
return;
}
schema.setNeedToBeUpdated(true);
}
private static void updateModificationDate(PDFDocument document, MetadataFixerResultImpl.Builder resultBuilder) {
InfoDictionary info = document.getInfoDictionary();
XMPBasic schema = document.getMetadata().getXMPBasicSchema(info);
if (document.isNeedToBeUpdated() && schema != null) {
Calendar time = Calendar.getInstance(TimeZone.getTimeZone("UTC"));
if (schema.getModificationDate() != null) {
doSaveAction(schema, METADATA_MODIFICATION_DATE, DateConverter.toUTCString(time));
resultBuilder.addFix("Set new modification date to metadata");
}
if (info != null && info.getModificationDate() != null) {
doSaveAction(info, METADATA_MODIFICATION_DATE, DateConverter.toPDFFormat(time));
resultBuilder.addFix("Set new modification date to info dictionary");
}
}
}
private static final Map mkAttsMap() {
Map atts = new HashMap<>();
atts.put(METADATA_TITLE, INFO_TITLE);
atts.put(METADATA_SUBJECT, INFO_SUBJECT);
atts.put(METADATA_AUTHOR, INFO_AUTHOR);
atts.put(PRODUCER, PRODUCER);
atts.put(KEYWORDS, KEYWORDS);
atts.put(METADATA_CREATOR, INFO_CREATOR);
atts.put(METADATA_CREATION_DATE, INFO_CREATION_DATE);
atts.put(METADATA_MODIFICATION_DATE, INFO_MODIFICATION_DATE);
return atts;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy