All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.verapdf.model.ModelParser Maven / Gradle / Ivy

Go to download

Java PDF Box implementation for the veraPDF Validation Java API, generated from an Xtext model.

There is a newer version: 1.26.2
Show newest version
/**
 * This file is part of veraPDF PDF Box PDF/A Validation Model Implementation, a module of the veraPDF project.
 * Copyright (c) 2015, veraPDF Consortium 
 * All rights reserved.
 *
 * veraPDF PDF Box PDF/A Validation Model Implementation is free software: you can redistribute it and/or modify
 * it under the terms of either:
 *
 * The GNU General public license GPLv3+.
 * You should have received a copy of the GNU General Public License
 * along with veraPDF PDF Box PDF/A Validation Model Implementation as the LICENSE.GPL file in the root of the source
 * tree.  If not, see http://www.gnu.org/licenses/ or
 * https://www.gnu.org/licenses/gpl-3.0.en.html.
 *
 * The Mozilla Public License MPLv2+.
 * You should have received a copy of the Mozilla Public License along with
 * veraPDF PDF Box PDF/A Validation Model Implementation as the LICENSE.MPL file in the root of the source tree.
 * If a copy of the MPL was not distributed with this file, you can obtain one at
 * http://mozilla.org/MPL/2.0/.
 */
package org.verapdf.model;

import org.verapdf.xmp.XMPException;
import org.verapdf.xmp.impl.VeraPDFMeta;
import java.util.logging.Logger;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.common.PDMetadata;
import org.apache.pdfbox.pdmodel.encryption.InvalidPasswordException;
import org.verapdf.ReleaseDetails;
import org.verapdf.component.ComponentDetails;
import org.verapdf.component.Components;
import org.verapdf.core.EncryptedPdfException;
import org.verapdf.core.ModelParsingException;
import org.verapdf.features.AbstractFeaturesExtractor;
import org.verapdf.features.FeatureExtractionResult;
import org.verapdf.features.FeatureExtractorConfig;
import org.verapdf.features.pb.PBFeatureParser;
import org.verapdf.metadata.fixer.entity.PDFDocument;
import org.verapdf.metadata.fixer.impl.pb.model.PDFDocumentImpl;
import org.verapdf.model.impl.pb.containers.StaticContainers;
import org.verapdf.model.impl.pb.cos.PBCosDocument;
import org.verapdf.pdfa.Foundries;
import org.verapdf.pdfa.PDFAParser;
import org.verapdf.pdfa.flavours.PDFAFlavour;

import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.net.URI;
import java.util.List;

/**
 * Current class is entry point to model implementation.
 *
 * @author Evgeniy Muravitskiy
 */
public final class ModelParser implements PDFAParser {
	private static final ReleaseDetails pdfBoxReleaseDetails = ReleaseDetails.addDetailsFromResource(
			ReleaseDetails.APPLICATION_PROPERTIES_ROOT + "pdfbox-parser." + ReleaseDetails.PROPERTIES_EXT);
	private static final URI id = URI.create("http://pdfa.verapdf.org/parser#pdfbox");
	private static final ComponentDetails details = Components.veraDetails(id, "PDFBox Parser",
			pdfBoxReleaseDetails.getVersion(), "veraPDF PDFBox based model parser.");

	private static final String PDFUA_PREFIX = "ua";

	private static final Logger LOGGER = Logger.getLogger(ModelParser.class.getCanonicalName());

	private PDDocument document;

	private final PDFAFlavour flavour;

	private ModelParser(final InputStream docStream, PDFAFlavour flavour, PDFAFlavour defaultFlavour) throws IOException {
		this.document = PDDocument.load(docStream, false, true);
		this.flavour = detectFlavour(this.document, flavour, defaultFlavour);
	}

	private ModelParser(final File pdfFile, PDFAFlavour flavour, PDFAFlavour defaultFlavour) throws IOException {
		this.document = PDDocument.load(pdfFile, false, true);
		this.flavour = detectFlavour(this.document, flavour, defaultFlavour);
	}

	public static ModelParser createModelWithFlavour(InputStream toLoad, PDFAFlavour flavour)
			throws ModelParsingException, EncryptedPdfException {
		return createModelWithFlavour(toLoad, flavour, PDFAFlavour.NO_FLAVOUR);
	}

	public static ModelParser createModelWithFlavour(InputStream toLoad, PDFAFlavour flavour, PDFAFlavour defaultFlavour)
			throws ModelParsingException, EncryptedPdfException {
		try {
			cleanUp();
			return new ModelParser(toLoad, flavour, defaultFlavour);
		} catch (InvalidPasswordException excep) {
			throw new EncryptedPdfException("The PDF stream appears to be encrypted.", excep);
		} catch (IOException excep) {
			throw new ModelParsingException("Couldn't parse stream", excep);
		}
	}

	public static ModelParser createModelWithFlavour(File pdfFile, PDFAFlavour flavour)
			throws ModelParsingException, EncryptedPdfException {
		return createModelWithFlavour(pdfFile, flavour, PDFAFlavour.NO_FLAVOUR);
	}

	public static ModelParser createModelWithFlavour(File pdfFile, PDFAFlavour flavour, PDFAFlavour defaultFlavour)
			throws ModelParsingException, EncryptedPdfException {
		try {
			cleanUp();
			return new ModelParser(pdfFile, flavour, defaultFlavour);
		} catch (InvalidPasswordException excep) {
			throw new EncryptedPdfException("The PDF stream appears to be encrypted.", excep);
		} catch (IOException excep) {
			throw new ModelParsingException("Couldn't parse stream", excep);
		}
	}

	private static PDFAFlavour detectFlavour(PDDocument document, PDFAFlavour flavour, PDFAFlavour defaultFlavour) {
		if (flavour == PDFAFlavour.NO_FLAVOUR) {
			return obtainFlavour(document, defaultFlavour == PDFAFlavour.NO_FLAVOUR ?
					Foundries.defaultInstance().defaultFlavour() : defaultFlavour);
		}
		return flavour;
	}

	private static PDFAFlavour obtainFlavour(PDDocument document, PDFAFlavour defaultFlavour) {
		if (document == null || document.getDocumentCatalog() == null) {
			return defaultFlavour;
		}
		PDMetadata metadata = document.getDocumentCatalog().getMetadata();
		if (metadata == null) {
			return defaultFlavour;
		}
		try (InputStream is = metadata.exportXMPMetadata()) {
			VeraPDFMeta veraPDFMeta = VeraPDFMeta.parse(is);
			Integer identificationPart = veraPDFMeta.getIdentificationPart();
			String identificationConformance = veraPDFMeta.getIdentificationConformance();
			String prefix = "";
			if (identificationPart == null && identificationConformance == null) {
				identificationPart = veraPDFMeta.getUAIdentificationPart();
				if (identificationPart != null) {
					prefix = PDFUA_PREFIX;
				}
			}
			if (identificationConformance == null) {
				identificationConformance = "";
			}
			PDFAFlavour pdfaFlavour = PDFAFlavour.byFlavourId(prefix + identificationPart + identificationConformance);
			// TODO: remove that logic after updating NO_FLAVOUR into base pdf validation flavour
			if (pdfaFlavour == PDFAFlavour.NO_FLAVOUR) {
				return defaultFlavour;
			}
			return pdfaFlavour;
		} catch (IOException | XMPException e) {
			LOGGER.log(java.util.logging.Level.SEVERE, e.getMessage());
			return defaultFlavour;
		}
	}

	private static void cleanUp() {
		StaticContainers.clearAllContainers();
	}

	@Override
	public PDFDocument getPDFDocument() {
		return new PDFDocumentImpl(this.document);
	}

	@Override
	public ComponentDetails getDetails() {
		return details;
	}

	/**
	 * Method return root object of model implementation from pdf box model
	 * together with the hierarchy.
	 *
	 * @return root object representing by
	 *         {@link org.verapdf.model.coslayer.CosDocument}
	 * @throws IOException
	 *             when target file is not pdf or pdf file is not contain root
	 *             object
	 */
	@Override
	public org.verapdf.model.baselayer.Object getRoot() {
		return new PBCosDocument(this.document, this.flavour);
	}

	@Override
	public PDFAFlavour getFlavour() {
		return this.flavour;
	}

	@Override
	public FeatureExtractionResult getFeatures(FeatureExtractorConfig config) {
		return PBFeatureParser.getFeaturesCollection(this.document, config);
	}

	@Override
	public FeatureExtractionResult getFeatures(FeatureExtractorConfig config,
			List extractors) {
		return PBFeatureParser.getFeaturesCollection(this.document, extractors, config);
	}

	@Override
	public void close() {
		try {
			if (this.document != null) {
				this.document.close();
			}
		} catch (IOException e) {
			LOGGER.log(java.util.logging.Level.SEVERE, "Problems with document close. " + e.getMessage());
		}
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy