com.itextpdf.pdfua.checkers.PdfUA1Checker Maven / Gradle / Ivy
/*
This file is part of the iText (R) project.
Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
For commercial licensing, contact us at https://itextpdf.com/sales. For AGPL licensing, see below.
AGPL licensing:
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see .
*/
package com.itextpdf.pdfua.checkers;
import com.itextpdf.commons.datastructures.Tuple2;
import com.itextpdf.commons.utils.MessageFormatUtil;
import com.itextpdf.kernel.font.PdfFont;
import com.itextpdf.kernel.pdf.EncryptionConstants;
import com.itextpdf.kernel.pdf.IsoKey;
import com.itextpdf.kernel.pdf.PdfArray;
import com.itextpdf.kernel.pdf.PdfBoolean;
import com.itextpdf.kernel.pdf.PdfCatalog;
import com.itextpdf.kernel.pdf.PdfDictionary;
import com.itextpdf.kernel.pdf.PdfDocument;
import com.itextpdf.kernel.pdf.PdfName;
import com.itextpdf.kernel.pdf.PdfNumber;
import com.itextpdf.kernel.pdf.PdfObject;
import com.itextpdf.kernel.pdf.PdfResources;
import com.itextpdf.kernel.pdf.PdfStream;
import com.itextpdf.kernel.pdf.PdfString;
import com.itextpdf.kernel.pdf.PdfVersion;
import com.itextpdf.kernel.pdf.tagging.PdfMcr;
import com.itextpdf.kernel.pdf.tagging.PdfNamespace;
import com.itextpdf.kernel.pdf.tagging.PdfStructTreeRoot;
import com.itextpdf.kernel.pdf.tagging.StandardRoles;
import com.itextpdf.kernel.pdf.tagutils.IRoleMappingResolver;
import com.itextpdf.kernel.pdf.tagutils.TagStructureContext;
import com.itextpdf.kernel.pdf.tagutils.TagTreeIterator;
import com.itextpdf.kernel.utils.IValidationChecker;
import com.itextpdf.kernel.utils.ValidationContext;
import com.itextpdf.kernel.utils.checkers.FontCheckUtil;
import com.itextpdf.kernel.xmp.XMPConst;
import com.itextpdf.kernel.xmp.XMPException;
import com.itextpdf.kernel.xmp.XMPMeta;
import com.itextpdf.kernel.xmp.XMPMetaFactory;
import com.itextpdf.pdfua.checkers.utils.AnnotationCheckUtil;
import com.itextpdf.pdfua.checkers.utils.BCP47Validator;
import com.itextpdf.pdfua.checkers.utils.FormCheckUtil;
import com.itextpdf.pdfua.checkers.utils.FormulaCheckUtil;
import com.itextpdf.pdfua.checkers.utils.GraphicsCheckUtil;
import com.itextpdf.pdfua.checkers.utils.LayoutCheckUtil;
import com.itextpdf.pdfua.checkers.utils.NoteCheckUtil;
import com.itextpdf.pdfua.checkers.utils.PdfUAValidationContext;
import com.itextpdf.pdfua.checkers.utils.XfaCheckUtil;
import com.itextpdf.pdfua.checkers.utils.headings.HeadingsChecker;
import com.itextpdf.pdfua.checkers.utils.tables.TableCheckUtil;
import com.itextpdf.pdfua.exceptions.PdfUAConformanceException;
import com.itextpdf.pdfua.exceptions.PdfUAExceptionMessageConstants;
import java.util.Collection;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.Stack;
/**
* The class defines the requirements of the PDF/UA-1 standard.
*
* The specification implemented by this class is ISO 14289-1
*/
public class PdfUA1Checker implements IValidationChecker {
private final PdfDocument pdfDocument;
private final TagStructureContext tagStructureContext;
private final HeadingsChecker headingsChecker;
private final PdfUAValidationContext context;
/**
* Creates PdfUA1Checker instance with PDF document which will be validated against PDF/UA-1 standard.
*
* @param pdfDocument the document to validate
*/
public PdfUA1Checker(PdfDocument pdfDocument) {
this.pdfDocument = pdfDocument;
this.tagStructureContext = new TagStructureContext(pdfDocument);
this.context = new PdfUAValidationContext(pdfDocument);
this.headingsChecker = new HeadingsChecker(context);
}
/**
* {@inheritDoc}
*/
@Override
public void validateDocument(ValidationContext validationContext) {
checkCatalog(validationContext.getPdfDocument().getCatalog());
checkStructureTreeRoot(validationContext.getPdfDocument().getStructTreeRoot());
checkFonts(validationContext.getFonts());
XfaCheckUtil.check(validationContext.getPdfDocument());
}
/**
* {@inheritDoc}
*/
@Override
public void validateObject(Object obj, IsoKey key, PdfResources resources, PdfStream contentStream, Object extra) {
switch (key) {
case LAYOUT:
new LayoutCheckUtil(context).checkRenderer(obj);
headingsChecker.checkLayoutElement(obj);
break;
case CANVAS_WRITING_CONTENT:
checkOnWritingCanvasToContent(obj);
break;
case CANVAS_BEGIN_MARKED_CONTENT:
checkOnOpeningBeginMarkedContent(obj, extra);
break;
case FONT:
checkText((String) obj, (PdfFont) extra);
break;
case DUPLICATE_ID_ENTRY:
throw new PdfUAConformanceException(MessageFormatUtil.format(
PdfUAExceptionMessageConstants.NON_UNIQUE_ID_ENTRY_IN_STRUCT_TREE_ROOT, obj));
case PDF_OBJECT:
checkPdfObject((PdfObject) obj);
break;
case CRYPTO:
checkCrypto((PdfDictionary) obj);
break;
}
}
/**
* Verify the conformity of the file specification dictionary.
*
* @param fileSpec the {@link PdfDictionary} containing file specification to be checked
*/
protected void checkFileSpec(PdfDictionary fileSpec) {
if (fileSpec.containsKey(PdfName.EF)) {
if (!fileSpec.containsKey(PdfName.F) || !fileSpec.containsKey(PdfName.UF)) {
throw new PdfUAConformanceException(PdfUAExceptionMessageConstants.FILE_SPECIFICATION_DICTIONARY_SHALL_CONTAIN_F_KEY_AND_UF_KEY);
}
}
}
private void checkText(String str, PdfFont font) {
int index = FontCheckUtil.checkGlyphsOfText(str, font, new UaCharacterChecker());
if (index != -1) {
throw new PdfUAConformanceException(MessageFormatUtil.format(
PdfUAExceptionMessageConstants.GLYPH_IS_NOT_DEFINED_OR_WITHOUT_UNICODE, str.charAt(index)));
}
}
protected void checkMetadata(PdfCatalog catalog) {
if (catalog.getDocument().getPdfVersion().compareTo(PdfVersion.PDF_1_7) > 0) {
throw new PdfUAConformanceException(PdfUAExceptionMessageConstants.INVALID_PDF_VERSION);
}
PdfObject pdfMetadata = catalog.getPdfObject().get(PdfName.Metadata);
if (pdfMetadata == null || !pdfMetadata.isStream()) {
throw new PdfUAConformanceException(PdfUAExceptionMessageConstants.DOCUMENT_SHALL_CONTAIN_XMP_METADATA_STREAM);
}
byte[] metaBytes = ((PdfStream) pdfMetadata).getBytes();
try {
XMPMeta metadata = XMPMetaFactory.parseFromBuffer(metaBytes);
Integer part = metadata.getPropertyInteger(XMPConst.NS_PDFUA_ID, XMPConst.PART);
if (!Integer.valueOf(1).equals(part)) {
throw new PdfUAConformanceException(PdfUAExceptionMessageConstants.METADATA_SHALL_CONTAIN_UA_VERSION_IDENTIFIER);
}
if (metadata.getProperty(XMPConst.NS_DC, XMPConst.TITLE) == null) {
throw new PdfUAConformanceException(PdfUAExceptionMessageConstants.METADATA_SHALL_CONTAIN_DC_TITLE_ENTRY);
}
} catch (XMPException e) {
throw new PdfUAConformanceException(PdfUAExceptionMessageConstants.DOCUMENT_SHALL_CONTAIN_XMP_METADATA_STREAM, e);
}
}
private void checkViewerPreferences(PdfCatalog catalog) {
PdfDictionary viewerPreferences = catalog.getPdfObject().getAsDictionary(PdfName.ViewerPreferences);
if (viewerPreferences == null) {
throw new PdfUAConformanceException(PdfUAExceptionMessageConstants.MISSING_VIEWER_PREFERENCES);
}
PdfObject displayDocTitle = viewerPreferences.get(PdfName.DisplayDocTitle);
if (!(displayDocTitle instanceof PdfBoolean)) {
throw new PdfUAConformanceException(PdfUAExceptionMessageConstants.MISSING_VIEWER_PREFERENCES);
}
if (PdfBoolean.FALSE.equals(displayDocTitle)) {
throw new PdfUAConformanceException(PdfUAExceptionMessageConstants.VIEWER_PREFERENCES_IS_FALSE);
}
}
private void checkOnWritingCanvasToContent(Object data) {
Stack> tagStack = getTagStack(data);
if (tagStack.isEmpty()) {
throw new PdfUAConformanceException(
PdfUAExceptionMessageConstants.TAG_HASNT_BEEN_ADDED_BEFORE_CONTENT_ADDING);
}
final boolean insideRealContent = isInsideRealContent(tagStack);
final boolean insideArtifact = isInsideArtifact(tagStack);
if (insideRealContent && insideArtifact) {
throw new PdfUAConformanceException(
PdfUAExceptionMessageConstants.REAL_CONTENT_INSIDE_ARTIFACT_OR_VICE_VERSA);
} else if (!insideRealContent && !insideArtifact) {
throw new PdfUAConformanceException(
PdfUAExceptionMessageConstants.CONTENT_IS_NOT_REAL_CONTENT_AND_NOT_ARTIFACT);
}
}
private Stack> getTagStack(Object data) {
return (Stack>) data;
}
private void checkOnOpeningBeginMarkedContent(Object obj, Object extra) {
Tuple2 currentBmc = (Tuple2) extra;
checkStandardRoleMapping(currentBmc);
Stack> stack = getTagStack(obj);
if (stack.isEmpty()) {
return;
}
boolean isRealContent = isRealContent(currentBmc);
boolean isArtifact = PdfName.Artifact.equals(currentBmc.getFirst());
if (isArtifact && isInsideRealContent(stack)) {
throw new PdfUAConformanceException(PdfUAExceptionMessageConstants.ARTIFACT_CANT_BE_INSIDE_REAL_CONTENT);
}
if (isRealContent && isInsideArtifact(stack)) {
throw new PdfUAConformanceException(PdfUAExceptionMessageConstants.REAL_CONTENT_CANT_BE_INSIDE_ARTIFACT);
}
}
private void checkStandardRoleMapping(Tuple2 tag) {
final PdfNamespace namespace = tagStructureContext.getDocumentDefaultNamespace();
final String role = tag.getFirst().getValue();
if (!StandardRoles.ARTIFACT.equals(role) && !tagStructureContext.checkIfRoleShallBeMappedToStandardRole(role,
namespace)) {
throw new PdfUAConformanceException(
MessageFormatUtil.format(
PdfUAExceptionMessageConstants.TAG_MAPPING_DOESNT_TERMINATE_WITH_STANDARD_TYPE, role));
}
}
private boolean isInsideArtifact(Stack> tagStack) {
for (Tuple2 tag : tagStack) {
if (PdfName.Artifact.equals(tag.getFirst())) {
return true;
}
}
return false;
}
private boolean isInsideRealContent(Stack> tagStack) {
for (Tuple2 tag : tagStack) {
if (isRealContent(tag)) {
return true;
}
}
return false;
}
private boolean isRealContent(Tuple2 tag) {
if (PdfName.Artifact.equals(tag.getFirst())) {
return false;
}
PdfDictionary properties = tag.getSecond();
if (properties == null || !properties.containsKey(PdfName.MCID)) {
return false;
}
PdfMcr mcr = this.pdfDocument.getStructTreeRoot()
.findMcrByMcid(pdfDocument, (int) properties.getAsInt(PdfName.MCID));
if (mcr == null) {
throw new PdfUAConformanceException(
PdfUAExceptionMessageConstants.CONTENT_WITH_MCID_BUT_MCID_NOT_FOUND_IN_STRUCT_TREE_ROOT);
}
return true;
}
private void checkCatalog(PdfCatalog catalog) {
PdfDictionary catalogDict = catalog.getPdfObject();
if (!catalogDict.containsKey(PdfName.Metadata)) {
throw new PdfUAConformanceException(
PdfUAExceptionMessageConstants.METADATA_SHALL_BE_PRESENT_IN_THE_CATALOG_DICTIONARY);
}
if (!(catalogDict.get(PdfName.Lang) instanceof PdfString) || !BCP47Validator.validate(catalogDict.get(PdfName.Lang).toString())) {
throw new PdfUAConformanceException(
PdfUAExceptionMessageConstants.DOCUMENT_SHALL_CONTAIN_VALID_LANG_ENTRY);
}
PdfDictionary markInfo = catalogDict.getAsDictionary(PdfName.MarkInfo);
if (markInfo != null && markInfo.containsKey(PdfName.Suspects)) {
PdfBoolean markInfoSuspects = markInfo.getAsBoolean(PdfName.Suspects);
if (markInfoSuspects != null && markInfoSuspects.getValue()) {
throw new PdfUAConformanceException(
PdfUAExceptionMessageConstants.
SUSPECTS_ENTRY_IN_MARK_INFO_DICTIONARY_SHALL_NOT_HAVE_A_VALUE_OF_TRUE);
}
}
checkViewerPreferences(catalog);
checkMetadata(catalog);
checkOCProperties(catalogDict.getAsDictionary(PdfName.OCProperties));
}
private void checkStructureTreeRoot(PdfStructTreeRoot structTreeRoot) {
PdfDictionary roleMap = structTreeRoot.getRoleMap();
for (Map.Entry entry : roleMap.entrySet()) {
final String role = entry.getKey().getValue();
final IRoleMappingResolver roleMappingResolver = pdfDocument.getTagStructureContext()
.getRoleMappingResolver(role);
if (roleMappingResolver.currentRoleIsStandard()) {
throw new PdfUAConformanceException(PdfUAExceptionMessageConstants.ONE_OR_MORE_STANDARD_ROLE_REMAPPED);
}
}
TagTreeIterator tagTreeIterator = new TagTreeIterator(structTreeRoot);
tagTreeIterator.addHandler(new GraphicsCheckUtil.GraphicsHandler(context));
tagTreeIterator.addHandler(new FormulaCheckUtil.FormulaTagHandler(context));
tagTreeIterator.addHandler(new NoteCheckUtil.NoteTagHandler(context));
tagTreeIterator.addHandler(new HeadingsChecker.HeadingHandler(context));
tagTreeIterator.addHandler(new TableCheckUtil.TableHandler(context));
tagTreeIterator.addHandler(new AnnotationCheckUtil.AnnotationHandler(context));
tagTreeIterator.addHandler(new FormCheckUtil.FormTagHandler(context));
tagTreeIterator.traverse();
}
private void checkOCProperties(PdfDictionary ocProperties) {
if (ocProperties == null) {
return;
}
if (!(ocProperties.get(PdfName.Configs) instanceof PdfArray)) {
throw new PdfUAConformanceException(PdfUAExceptionMessageConstants.OCG_PROPERTIES_CONFIG_SHALL_BE_AN_ARRAY);
}
PdfArray configs = ocProperties.getAsArray(PdfName.Configs);
if (configs != null && !configs.isEmpty()) {
PdfDictionary d = ocProperties.getAsDictionary(PdfName.D);
checkOCGNameAndASKey(d);
for (PdfObject config : configs) {
checkOCGNameAndASKey((PdfDictionary) config);
}
PdfArray ocgsArray = ocProperties.getAsArray(PdfName.OCGs);
if (ocgsArray != null) {
for (PdfObject ocg : ocgsArray) {
checkOCGNameAndASKey((PdfDictionary) ocg);
}
}
}
}
private void checkOCGNameAndASKey(PdfDictionary dict) {
if (dict == null) {
return;
}
if (dict.get(PdfName.AS) != null) {
throw new PdfUAConformanceException(PdfUAExceptionMessageConstants.OCG_SHALL_NOT_CONTAIN_AS_ENTRY);
}
if (!(dict.get(PdfName.Name) instanceof PdfString) || (((PdfString)dict.get(PdfName.Name)).toString().isEmpty())) {
throw new PdfUAConformanceException(PdfUAExceptionMessageConstants.NAME_ENTRY_IS_MISSING_OR_EMPTY_IN_OCG);
}
}
private void checkFonts(Collection fontsInDocument) {
Set fontNamesThatAreNotEmbedded = new HashSet<>();
for (PdfFont font : fontsInDocument) {
if (!font.isEmbedded()) {
fontNamesThatAreNotEmbedded.add(font.getFontProgram().getFontNames().getFontName());
}
}
if (!fontNamesThatAreNotEmbedded.isEmpty()) {
throw new PdfUAConformanceException(
MessageFormatUtil.format(
PdfUAExceptionMessageConstants.FONT_SHOULD_BE_EMBEDDED,
String.join(", ", fontNamesThatAreNotEmbedded)
));
}
}
private void checkCrypto(PdfDictionary encryptionDictionary) {
if (encryptionDictionary != null) {
if (!(encryptionDictionary.get(PdfName.P) instanceof PdfNumber)) {
throw new PdfUAConformanceException(
PdfUAExceptionMessageConstants.P_VALUE_IS_ABSENT_IN_ENCRYPTION_DICTIONARY);
}
int permissions = ((PdfNumber) encryptionDictionary.get(PdfName.P)).intValue();
if ((EncryptionConstants.ALLOW_SCREENREADERS & permissions) == 0) {
throw new PdfUAConformanceException(
PdfUAExceptionMessageConstants.TENTH_BIT_OF_P_VALUE_IN_ENCRYPTION_SHOULD_BE_NON_ZERO);
}
}
}
/**
* This method checks the requirements that must be fulfilled by a COS
* object in a PDF/UA document.
*
* @param obj the COS object that must be checked
*/
private void checkPdfObject(PdfObject obj) {
if (obj.getType() == PdfObject.DICTIONARY) {
PdfDictionary dict = (PdfDictionary) obj;
PdfName type = dict.getAsName(PdfName.Type);
if (PdfName.Filespec.equals(type)) {
checkFileSpec(dict);
}
}
}
private static final class UaCharacterChecker implements FontCheckUtil.CharacterChecker {
@Override
public boolean check(int ch, PdfFont font) {
if (font.containsGlyph(ch)) {
return !font.getGlyph(ch).hasValidUnicode();
} else {
return true;
}
}
}
}