com.itextpdf.kernel.utils.CompareTool Maven / Gradle / Ivy
/*
This file is part of the iText (R) project.
Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
For commercial licensing, contact us at https://itextpdf.com/sales. For AGPL licensing, see below.
AGPL licensing:
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see .
*/
package com.itextpdf.kernel.utils;
import com.itextpdf.commons.actions.contexts.IMetaInfo;
import com.itextpdf.commons.utils.FileUtil;
import com.itextpdf.commons.utils.MessageFormatUtil;
import com.itextpdf.io.font.PdfEncodings;
import com.itextpdf.io.logs.IoLogMessageConstant;
import com.itextpdf.io.util.GhostscriptHelper;
import com.itextpdf.io.util.ImageMagickHelper;
import com.itextpdf.io.util.UrlUtil;
import com.itextpdf.io.util.XmlUtil;
import com.itextpdf.kernel.geom.Rectangle;
import com.itextpdf.kernel.pdf.DocumentProperties;
import com.itextpdf.kernel.pdf.PdfArray;
import com.itextpdf.kernel.pdf.PdfBoolean;
import com.itextpdf.kernel.pdf.PdfDictionary;
import com.itextpdf.kernel.pdf.PdfDocument;
import com.itextpdf.kernel.pdf.PdfDocumentInfo;
import com.itextpdf.kernel.pdf.PdfIndirectReference;
import com.itextpdf.kernel.pdf.PdfName;
import com.itextpdf.kernel.pdf.PdfNameTree;
import com.itextpdf.kernel.pdf.PdfNumber;
import com.itextpdf.kernel.pdf.PdfObject;
import com.itextpdf.kernel.pdf.PdfReader;
import com.itextpdf.kernel.pdf.PdfStream;
import com.itextpdf.kernel.pdf.PdfString;
import com.itextpdf.kernel.pdf.PdfWriter;
import com.itextpdf.kernel.pdf.ReaderProperties;
import com.itextpdf.kernel.pdf.StampingProperties;
import com.itextpdf.kernel.pdf.WriterProperties;
import com.itextpdf.kernel.pdf.annot.PdfAnnotation;
import com.itextpdf.kernel.pdf.annot.PdfLinkAnnotation;
import com.itextpdf.kernel.pdf.canvas.PdfCanvas;
import com.itextpdf.kernel.utils.objectpathitems.ObjectPath;
import com.itextpdf.kernel.utils.objectpathitems.TrailerPath;
import com.itextpdf.kernel.xmp.PdfConst;
import com.itextpdf.kernel.xmp.XMPConst;
import com.itextpdf.kernel.xmp.XMPMeta;
import com.itextpdf.kernel.xmp.XMPMetaFactory;
import com.itextpdf.kernel.xmp.XMPUtils;
import com.itextpdf.kernel.xmp.options.ParseOptions;
import com.itextpdf.kernel.xmp.options.SerializeOptions;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileFilter;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.TransformerException;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.xml.sax.SAXException;
/**
* This class provides means to compare two PDF files both by content and visually
* and gives the report on their differences.
*
* For visual comparison it uses external tools: Ghostscript and ImageMagick, which
* should be installed on your machine. To allow CompareTool to use them, you need
* to pass either java properties or environment variables with names "ITEXT_GS_EXEC" and
* "ITEXT_MAGICK_COMPARE_EXEC", which would contain the commands to execute the
* Ghostscript and ImageMagick tools.
*
* CompareTool class was mainly designed for the testing purposes of iText in order to
* ensure that the same code produces the same PDF document. For this reason you will
* often encounter such parameter names as "outDoc" and "cmpDoc" which stand for output
* document and document-for-comparison. The first one is viewed as the current result,
* and the second one is referred as normal or ideal result. OutDoc is compared to the
* ideal cmpDoc. Therefore all reports of the comparison are in the form: "Expected ...,
* but was ...". This should be interpreted in the following way: "expected" part stands
* for the content of the cmpDoc and "but was" part stands for the content of the outDoc.
*/
public class CompareTool {
private static final String FILE_PROTOCOL = "file://";
private static final String UNEXPECTED_NUMBER_OF_PAGES = "Unexpected number of pages for .";
private static final String DIFFERENT_PAGES = "File " + FILE_PROTOCOL + " differs on page .";
private static final String IGNORED_AREAS_PREFIX = "ignored_areas_";
private static final String VERSION_REGEXP = "(\\d+\\.)+\\d+(-SNAPSHOT)?";
private static final String VERSION_REPLACEMENT = "";
private static final String COPYRIGHT_REGEXP = "\u00a9\\d+-\\d+ (?:iText Group NV|Apryse Group NV)";
private static final String COPYRIGHT_REPLACEMENT = "\u00a9 Apryse Group NV";
private static final String NEW_LINES = "\\r|\\n";
private String cmpPdfName;
private String outPdfName;
private String cmpPdf;
private String cmpImage;
private String outPdf;
private String outImage;
private ReaderProperties outProps;
private ReaderProperties cmpProps;
private List outPagesRef;
private List cmpPagesRef;
private int compareByContentErrorsLimit = 1000;
private boolean generateCompareByContentXmlReport = false;
private boolean encryptionCompareEnabled = false;
private boolean useCachedPagesForComparison = true;
private IMetaInfo metaInfo;
private String gsExec;
private String compareExec;
public CompareTool() {
}
CompareTool(String gsExec, String compareExec) {
this.gsExec = gsExec;
this.compareExec = compareExec;
}
/**
* Create {@link PdfWriter} optimized for tests.
*
* @param filename File to write to when necessary.
* @return {@link PdfWriter} to be used in tests.
* @throws FileNotFoundException if the file exists but is a directory
* rather than a regular file, does not exist but cannot
* be created, or cannot be opened for any other reason.
*/
public static PdfWriter createTestPdfWriter(String filename) throws FileNotFoundException {
return createTestPdfWriter(filename, new WriterProperties());
}
/**
* Create {@link PdfWriter} optimized for tests.
*
* @param filename File to write to when necessary.
* @param properties {@link WriterProperties} to use.
* @return {@link PdfWriter} to be used in tests.
* @throws FileNotFoundException if the file exists but is a directory
* rather than a regular file, does not exist but cannot
* be created, or cannot be opened for any other reason.
*/
public static PdfWriter createTestPdfWriter(String filename, WriterProperties properties) throws FileNotFoundException {
return new PdfWriter(filename, properties);
}
/**
* Create {@link PdfReader} out of the data created recently or read from disk.
*
* @param filename File to read the data from when necessary.
* @return {@link PdfReader} to be used in tests.
* @throws IOException on error
*/
public static PdfReader createOutputReader(String filename) throws IOException {
return CompareTool.createOutputReader(filename, new ReaderProperties());
}
/**
* Create {@link PdfReader} out of the data created recently or read from disk.
*
* @param filename File to read the data from when necessary.
* @param properties {@link ReaderProperties} to use.
* @return {@link PdfReader} to be used in tests.
* @throws IOException on error
*/
public static PdfReader createOutputReader(String filename, ReaderProperties properties) throws IOException {
MemoryFirstPdfWriter outWriter = MemoryFirstPdfWriter.get(filename);
if (outWriter != null) {
return new PdfReader(new ByteArrayInputStream(outWriter.getBAOutputStream().toByteArray()), properties);
} else {
return new PdfReader(filename, properties);
}
}
/**
* Clean up memory occupied for the tests.
*
* @param path Path to clean up memory for.
*/
public static void cleanup(String path) {
MemoryFirstPdfWriter.cleanup(path);
}
/**
* Compares two PDF documents by content starting from Catalog dictionary and then recursively comparing
* corresponding objects which are referenced from it. You can roughly imagine it as depth-first traversal
* of the two trees that represent pdf objects structure of the documents.
*
* The main difference between this method and the {@link #compareByContent(String, String, String, String)}
* methods is the return value. This method returns a {@link CompareResult} class instance, which could be used
* in code, whilst compareByContent methods in case of the differences simply return String value, which could
* only be printed. Also, keep in mind that this method doesn't perform visual comparison of the documents.
*
* For more explanations about what outDoc and cmpDoc are see last paragraph of the {@link CompareTool}
* class description.
*
* @param outDocument a {@link PdfDocument} corresponding to the output file, which is to be compared with cmp-file.
* @param cmpDocument a {@link PdfDocument} corresponding to the cmp-file, which is to be compared with output file.
* @return the report on comparison of two files in the form of the custom class {@link CompareResult} instance.
* @see CompareResult
*/
public CompareResult compareByCatalog(PdfDocument outDocument, PdfDocument cmpDocument) {
CompareResult compareResult = null;
compareResult = new CompareResult(compareByContentErrorsLimit);
ObjectPath catalogPath = new ObjectPath(cmpDocument.getCatalog().getPdfObject().getIndirectReference(),
outDocument.getCatalog().getPdfObject().getIndirectReference());
Set ignoredCatalogEntries = new LinkedHashSet<>(Arrays.asList(PdfName.Metadata));
compareDictionariesExtended(outDocument.getCatalog().getPdfObject(), cmpDocument.getCatalog().getPdfObject(),
catalogPath, compareResult, ignoredCatalogEntries);
// Method compareDictionariesExtended eventually calls compareObjects method which doesn't compare page objects.
// At least for now compare page dictionaries explicitly here like this.
if (cmpPagesRef == null || outPagesRef == null) {
return compareResult;
}
if (outPagesRef.size() != cmpPagesRef.size() && !compareResult.isMessageLimitReached()) {
compareResult.addError(catalogPath, "Documents have different numbers of pages.");
}
for (int i = 0; i < Math.min(cmpPagesRef.size(), outPagesRef.size()); i++) {
if (compareResult.isMessageLimitReached()) {
break;
}
ObjectPath currentPath = new ObjectPath(cmpPagesRef.get(i), outPagesRef.get(i));
PdfDictionary outPageDict = (PdfDictionary) outPagesRef.get(i).getRefersTo();
PdfDictionary cmpPageDict = (PdfDictionary) cmpPagesRef.get(i).getRefersTo();
compareDictionariesExtended(outPageDict, cmpPageDict, currentPath, compareResult);
}
return compareResult;
}
/**
* Disables the default logic of pages comparison.
* This option makes sense only for {@link CompareTool#compareByCatalog(PdfDocument, PdfDocument)} method.
*
* By default, pages are treated as special objects and if they are met in the process of comparison, then they are
* not checked as objects, but rather simply checked that they have same page numbers in both documents.
* This behaviour is intended for the {@link CompareTool#compareByContent}
* set of methods, because in them documents are compared in page by page basis. Thus, we don't need to check if pages
* are of the same content when they are met in comparison process, we are sure that we will compare their content or
* we have already compared them.
*
* However, if you would use {@link CompareTool#compareByCatalog} with default behaviour
* of pages comparison, pages won't be checked at all, every time when reference to the page dictionary is met,
* only page numbers will be compared for both documents. You can say that in this case, comparison will be performed
* for all document's catalog entries except /Pages (However in fact, document's page tree structures will be compared,
* but pages themselves - won't).
*
* @return this {@link CompareTool} instance.
*/
public CompareTool disableCachedPagesComparison() {
this.useCachedPagesForComparison = false;
return this;
}
/**
* Sets the maximum errors count which will be returned as the result of the comparison.
*
* @param compareByContentMaxErrorCount the errors count.
* @return this CompareTool instance.
*/
public CompareTool setCompareByContentErrorsLimit(int compareByContentMaxErrorCount) {
this.compareByContentErrorsLimit = compareByContentMaxErrorCount;
return this;
}
/**
* Enables or disables the generation of the comparison report in the form of an xml document.
*
* IMPORTANT NOTE: this flag affects only the comparison performed by compareByContent methods!
*
* @param generateCompareByContentXmlReport true to enable xml report generation, false - to disable.
* @return this CompareTool instance.
*/
public CompareTool setGenerateCompareByContentXmlReport(boolean generateCompareByContentXmlReport) {
this.generateCompareByContentXmlReport = generateCompareByContentXmlReport;
return this;
}
/**
* Sets {@link IMetaInfo} info that will be used for both read and written documents creation.
*
* @param metaInfo meta info to set
*/
public void setEventCountingMetaInfo(IMetaInfo metaInfo) {
this.metaInfo = metaInfo;
}
/**
* Enables the comparison of the encryption properties of the documents. Encryption properties comparison
* results are returned along with all other comparison results.
*
* IMPORTANT NOTE: this flag affects only the comparison performed by compareByContent methods!
* {@link #compareByCatalog(PdfDocument, PdfDocument)} doesn't compare encryption properties
* because encryption properties aren't part of the document's Catalog.
*
* @return this CompareTool instance.
*/
public CompareTool enableEncryptionCompare() {
this.encryptionCompareEnabled = true;
return this;
}
/**
* Gets {@link ReaderProperties} to be passed later to the {@link PdfReader} of the output document.
*
* Documents for comparison are opened in reader mode. This method is intended to alter {@link ReaderProperties}
* which are used to open the output document. This is particularly useful for comparison of encrypted documents.
*
* For more explanations about what outDoc and cmpDoc are see last paragraph of the {@link CompareTool}
* class description.
*
* @return {@link ReaderProperties} instance to be passed later to the {@link PdfReader} of the output document.
*/
public ReaderProperties getOutReaderProperties() {
if (outProps == null) {
outProps = new ReaderProperties();
}
return outProps;
}
/**
* Gets {@link ReaderProperties} to be passed later to the {@link PdfReader} of the cmp document.
*
* Documents for comparison are opened in reader mode. This method is intended to alter {@link ReaderProperties}
* which are used to open the cmp document. This is particularly useful for comparison of encrypted documents.
*
* For more explanations about what outDoc and cmpDoc are see last paragraph of the {@link CompareTool}
* class description.
*
* @return {@link ReaderProperties} instance to be passed later to the {@link PdfReader} of the cmp document.
*/
public ReaderProperties getCmpReaderProperties() {
if (cmpProps == null) {
cmpProps = new ReaderProperties();
}
return cmpProps;
}
/**
* Compares two documents visually. For the comparison two external tools are used: Ghostscript and ImageMagick.
* For more info about needed configuration for visual comparison process see {@link CompareTool} class description.
*
* Note, that this method uses {@link ImageMagickHelper} and {@link GhostscriptHelper} classes and therefore may
* create temporary files and directories.
*
* During comparison for every page of the two documents an image file will be created in the folder specified by
* outPath parameter. Then those page images will be compared and if there are any differences for some pages,
* another image file will be created with marked differences on it.
*
* @param outPdf the absolute path to the output file, which is to be compared to cmp-file.
* @param cmpPdf the absolute path to the cmp-file, which is to be compared to output file.
* @param outPath the absolute path to the folder, which will be used to store image files for visual comparison.
* @param differenceImagePrefix file name prefix for image files with marked differences if there is any.
* @return string containing list of the pages that are visually different, or null if there are no visual differences.
* @throws InterruptedException if the current thread is interrupted by another thread while it is waiting
* for ghostscript or imagemagic processes, then the wait is ended and
* an {@link InterruptedException} is thrown.
* @throws IOException is thrown if any of the input files are missing or any of the auxiliary files
* that are created during comparison process weren't possible to be created.
*/
public String compareVisually(String outPdf, String cmpPdf, String outPath, String differenceImagePrefix) throws InterruptedException, IOException {
return compareVisually(outPdf, cmpPdf, outPath, differenceImagePrefix, null);
}
/**
* Compares two documents visually. For the comparison two external tools are used: Ghostscript and ImageMagick.
* For more info about needed configuration for visual comparison process see {@link CompareTool} class description.
*
* Note, that this method uses {@link ImageMagickHelper} and {@link GhostscriptHelper} classes and therefore may
* create temporary files and directories.
*
* During comparison for every page of two documents an image file will be created in the folder specified by
* outPath parameter. Then those page images will be compared and if there are any differences for some pages,
* another image file will be created with marked differences on it.
*
* It is possible to ignore certain areas of the document pages during visual comparison. This is useful for example
* in case if documents should be the same except certain page area with date on it. In this case, in the folder
* specified by the outPath, new pdf documents will be created with the black rectangles at the specified ignored
* areas, and visual comparison will be performed on these new documents.
*
* @param outPdf the absolute path to the output file, which is to be compared to cmp-file.
* @param cmpPdf the absolute path to the cmp-file, which is to be compared to output file.
* @param outPath the absolute path to the folder, which will be used to store image files for visual comparison.
* @param differenceImagePrefix file name prefix for image files with marked differences if there is any.
* @param ignoredAreas a map with one-based page numbers as keys and lists of ignored rectangles as values.
* @return string containing list of the pages that are visually different, or null if there are no visual differences.
* @throws InterruptedException if the current thread is interrupted by another thread while it is waiting
* for ghostscript or imagemagic processes, then the wait is ended and
* an {@link InterruptedException} is thrown.
* @throws IOException is thrown if any of the input files are missing or any of the auxiliary files
* that are created during comparison process weren't possible to be created.
*/
public String compareVisually(String outPdf, String cmpPdf, String outPath, String differenceImagePrefix, Map> ignoredAreas) throws InterruptedException, IOException {
init(outPdf, cmpPdf);
System.out.println("Out pdf: " + UrlUtil.getNormalizedFileUriString(outPdf));
System.out.println("Cmp pdf: " + UrlUtil.getNormalizedFileUriString(cmpPdf)+ "\n");
return compareVisually(outPath, differenceImagePrefix, ignoredAreas);
}
/**
* Compares two PDF documents by content starting from page dictionaries and then recursively comparing
* corresponding objects which are referenced from them. You can roughly imagine it as depth-first traversal
* of the two trees that represent pdf objects structure of the documents.
*
* When comparison by content is finished, if any differences were found, visual comparison is automatically started.
* For this overload, differenceImagePrefix value is generated using diff_%outPdfFileName%_ format.
*
* For more explanations about what outPdf and cmpPdf are see last paragraph of the {@link CompareTool}
* class description.
*
* @param outPdf the absolute path to the output file, which is to be compared to cmp-file.
* @param cmpPdf the absolute path to the cmp-file, which is to be compared to output file.
* @param outPath the absolute path to the folder, which will be used to store image files for visual comparison.
* @return string containing text report on the encountered content differences and also list of the pages that are
* visually different, or null if there are no content and therefore no visual differences.
* @throws InterruptedException if the current thread is interrupted by another thread while it is waiting
* for ghostscript or imagemagic processes, then the wait is ended and an {@link InterruptedException} is thrown.
* @throws IOException is thrown if any of the input files are missing or any of the auxiliary files
* that are created during comparison process weren't possible to be created.
* @see #compareVisually(String, String, String, String)
*/
public String compareByContent(String outPdf, String cmpPdf, String outPath) throws InterruptedException, IOException {
return compareByContent(outPdf, cmpPdf, outPath, null, null, null, null);
}
/**
* Compares two PDF documents by content starting from page dictionaries and then recursively comparing
* corresponding objects which are referenced from them. You can roughly imagine it as depth-first traversal
* of the two trees that represent pdf objects structure of the documents.
*
* When comparison by content is finished, if any differences were found, visual comparison is automatically started.
*
* For more explanations about what outPdf and cmpPdf are see last paragraph of the {@link CompareTool}
* class description.
*
* @param outPdf the absolute path to the output file, which is to be compared to cmp-file.
* @param cmpPdf the absolute path to the cmp-file, which is to be compared to output file.
* @param outPath the absolute path to the folder, which will be used to store image files for visual comparison.
* @param differenceImagePrefix file name prefix for image files with marked visual differences if there are any;
* if it's set to null the prefix defaults to diff_%outPdfFileName%_ format.
* @return string containing text report on the encountered content differences and also list of the pages that are
* visually different, or null if there are no content and therefore no visual differences.
* @throws InterruptedException if the current thread is interrupted by another thread while it is waiting
* for ghostscript or imagemagic processes, then the wait is ended and an {@link InterruptedException} is thrown.
* @throws IOException is thrown if any of the input files are missing or any of the auxiliary files
* that are created during comparison process weren't possible to be created.
* @see #compareVisually(String, String, String, String)
*/
public String compareByContent(String outPdf, String cmpPdf, String outPath, String differenceImagePrefix) throws InterruptedException, IOException {
return compareByContent(outPdf, cmpPdf, outPath, differenceImagePrefix, null, null, null);
}
/**
* This method overload is used to compare two encrypted PDF documents. Document passwords are passed with
* outPass and cmpPass parameters.
*
* Compares two PDF documents by content starting from page dictionaries and then recursively comparing
* corresponding objects which are referenced from them. You can roughly imagine it as depth-first traversal
* of the two trees that represent pdf objects structure of the documents.
*
* When comparison by content is finished, if any differences were found, visual comparison is automatically started.
* For more info see {@link #compareVisually(String, String, String, String)}.
*
* For more explanations about what outPdf and cmpPdf are see last paragraph of the {@link CompareTool}
* class description.
*
* @param outPdf the absolute path to the output file, which is to be compared to cmp-file.
* @param cmpPdf the absolute path to the cmp-file, which is to be compared to output file.
* @param outPath the absolute path to the folder, which will be used to store image files for visual comparison.
* @param differenceImagePrefix file name prefix for image files with marked visual differences if there is any;
* if it's set to null the prefix defaults to diff_%outPdfFileName%_ format.
* @param outPass password for the encrypted document specified by the outPdf absolute path.
* @param cmpPass password for the encrypted document specified by the cmpPdf absolute path.
* @return string containing text report on the encountered content differences and also list of the pages that are
* visually different, or null if there are no content and therefore no visual differences.
* @throws InterruptedException if the current thread is interrupted by another thread while it is waiting
* for ghostscript or imagemagic processes, then the wait is ended and an {@link InterruptedException} is thrown.
* @throws IOException is thrown if any of the input files are missing or any of the auxiliary files
* that are created during comparison process weren't possible to be created.
* @see #compareVisually(String, String, String, String)
*/
public String compareByContent(String outPdf, String cmpPdf, String outPath, String differenceImagePrefix, byte[] outPass, byte[] cmpPass) throws InterruptedException, IOException {
return compareByContent(outPdf, cmpPdf, outPath, differenceImagePrefix, null, outPass, cmpPass);
}
/**
* Compares two PDF documents by content starting from page dictionaries and then recursively comparing
* corresponding objects which are referenced from them. You can roughly imagine it as depth-first traversal
* of the two trees that represent pdf objects structure of the documents.
*
* When comparison by content is finished, if any differences were found, visual comparison is automatically started.
*
* For more explanations about what outPdf and cmpPdf are see last paragraph of the {@link CompareTool}
* class description.
*
* @param outPdf the absolute path to the output file, which is to be compared to cmp-file.
* @param cmpPdf the absolute path to the cmp-file, which is to be compared to output file.
* @param outPath the absolute path to the folder, which will be used to store image files for visual comparison.
* @param differenceImagePrefix file name prefix for image files with marked visual differences if there are any;
* if it's set to null the prefix defaults to diff_%outPdfFileName%_ format.
* @param ignoredAreas a map with one-based page numbers as keys and lists of ignored rectangles as values.
* @return string containing text report on the encountered content differences and also list of the pages that are
* visually different, or null if there are no content and therefore no visual differences.
* @throws InterruptedException if the current thread is interrupted by another thread while it is waiting
* for ghostscript or imagemagic processes, then the wait is ended and an {@link InterruptedException} is thrown.
* @throws IOException is thrown if any of the input files are missing or any of the auxiliary files
* that are created during comparison process weren't possible to be created.
* @see #compareVisually(String, String, String, String)
*/
public String compareByContent(String outPdf, String cmpPdf, String outPath, String differenceImagePrefix, Map> ignoredAreas) throws InterruptedException, IOException {
return compareByContent(outPdf, cmpPdf, outPath, differenceImagePrefix, ignoredAreas, null, null);
}
/**
* This method overload is used to compare two encrypted PDF documents. Document passwords are passed with
* outPass and cmpPass parameters.
*
* Compares two PDF documents by content starting from page dictionaries and then recursively comparing
* corresponding objects which are referenced from them. You can roughly imagine it as depth-first traversal
* of the two trees that represent pdf objects structure of the documents.
*
* When comparison by content is finished, if any differences were found, visual comparison is automatically started.
*
* For more explanations about what outPdf and cmpPdf are see last paragraph of the {@link CompareTool}
* class description.
*
* @param outPdf the absolute path to the output file, which is to be compared to cmp-file.
* @param cmpPdf the absolute path to the cmp-file, which is to be compared to output file.
* @param outPath the absolute path to the folder, which will be used to store image files for visual comparison.
* @param differenceImagePrefix file name prefix for image files with marked visual differences if there are any;
* if it's set to null the prefix defaults to diff_%outPdfFileName%_ format.
* @param ignoredAreas a map with one-based page numbers as keys and lists of ignored rectangles as values.
* @param outPass password for the encrypted document specified by the outPdf absolute path.
* @param cmpPass password for the encrypted document specified by the cmpPdf absolute path.
* @return string containing text report on the encountered content differences and also list of the pages that are
* visually different, or null if there are no content and therefore no visual differences.
* @throws InterruptedException if the current thread is interrupted by another thread while it is waiting
* for ghostscript or imagemagic processes, then the wait is ended and an {@link InterruptedException} is thrown.
* @throws IOException is thrown if any of the input files are missing or any of the auxiliary files
* that are created during comparison process weren't possible to be created.
* @see #compareVisually(String, String, String, String)
*/
public String compareByContent(String outPdf, String cmpPdf, String outPath, String differenceImagePrefix, Map> ignoredAreas, byte[] outPass, byte[] cmpPass) throws InterruptedException, IOException {
init(outPdf, cmpPdf);
System.out.println("Out pdf: " + UrlUtil.getNormalizedFileUriString(outPdf));
System.out.println("Cmp pdf: " + UrlUtil.getNormalizedFileUriString(cmpPdf)+ "\n");
setPassword(outPass, cmpPass);
return compareByContent(outPath, differenceImagePrefix, ignoredAreas);
}
/**
* Simple method that compares two given PdfDictionaries by content. This is "deep" comparing, which means that all
* nested objects are also compared by content.
*
* @param outDict dictionary to compare.
* @param cmpDict dictionary to compare.
* @return true if dictionaries are equal by content, otherwise false.
*/
public boolean compareDictionaries(PdfDictionary outDict, PdfDictionary cmpDict) {
return compareDictionariesExtended(outDict, cmpDict, null, null);
}
/**
* Recursively compares structures of two corresponding dictionaries from out and cmp PDF documents. You can roughly
* imagine it as depth-first traversal of the two trees that represent pdf objects structure of the documents.
*
* Both out and cmp {@link PdfDictionary} shall have indirect references.
*
* By default page dictionaries are excluded from the comparison when met and are instead compared in a special manner,
* simply comparing their page numbers. This behavior can be disabled by calling {@link #disableCachedPagesComparison()}.
*
* For more explanations about what outPdf and cmpPdf are see last paragraph of the {@link CompareTool}
* class description.
*
* @param outDict an indirect {@link PdfDictionary} from the output file, which is to be compared to cmp-file dictionary.
* @param cmpDict an indirect {@link PdfDictionary} from the cmp-file file, which is to be compared to output file dictionary.
* @return {@link CompareResult} instance containing differences between the two dictionaries,
* or {@code null} if dictionaries are equal.
*/
public CompareResult compareDictionariesStructure(PdfDictionary outDict, PdfDictionary cmpDict) {
return compareDictionariesStructure(outDict, cmpDict, null);
}
/**
* Recursively compares structures of two corresponding dictionaries from out and cmp PDF documents. You can roughly
* imagine it as depth-first traversal of the two trees that represent pdf objects structure of the documents.
*
* Both out and cmp {@link PdfDictionary} shall have indirect references.
*
* By default page dictionaries are excluded from the comparison when met and are instead compared in a special manner,
* simply comparing their page numbers. This behavior can be disabled by calling {@link #disableCachedPagesComparison()}.
*
* For more explanations about what outPdf and cmpPdf are see last paragraph of the {@link CompareTool}
* class description.
*
* @param outDict an indirect {@link PdfDictionary} from the output file, which is to be compared to cmp-file dictionary.
* @param cmpDict an indirect {@link PdfDictionary} from the cmp-file file, which is to be compared to output file dictionary.
* @param excludedKeys a {@link Set} of names that designate entries from {@code outDict} and {@code cmpDict} dictionaries
* which are to be skipped during comparison.
* @return {@link CompareResult} instance containing differences between the two dictionaries,
* or {@code null} if dictionaries are equal.
*/
public CompareResult compareDictionariesStructure(PdfDictionary outDict, PdfDictionary cmpDict, Set excludedKeys) {
if (outDict.getIndirectReference() == null || cmpDict.getIndirectReference() == null) {
throw new IllegalArgumentException("The 'outDict' and 'cmpDict' objects shall have indirect references.");
}
CompareResult compareResult = new CompareResult(compareByContentErrorsLimit);
final ObjectPath currentPath = new ObjectPath(cmpDict.getIndirectReference(), outDict.getIndirectReference());
if (!compareDictionariesExtended(outDict, cmpDict, currentPath, compareResult, excludedKeys)) {
assert !compareResult.isOk();
System.out.println(compareResult.getReport());
return compareResult;
}
assert compareResult.isOk();
return null;
}
/**
* Compares structures of two corresponding streams from out and cmp PDF documents. You can roughly
* imagine it as depth-first traversal of the two trees that represent pdf objects structure of the documents.
*
* For more explanations about what outPdf and cmpPdf are see last paragraph of the {@link CompareTool}
* class description.
*
* @param outStream a {@link PdfStream} from the output file, which is to be compared to cmp-file stream.
* @param cmpStream a {@link PdfStream} from the cmp-file file, which is to be compared to output file stream.
* @return {@link CompareResult} instance containing differences between the two streams,
* or {@code null} if streams are equal.
*/
public CompareResult compareStreamsStructure(PdfStream outStream, PdfStream cmpStream) {
CompareResult compareResult = new CompareResult(compareByContentErrorsLimit);
final ObjectPath currentPath = new ObjectPath(cmpStream.getIndirectReference(),
outStream.getIndirectReference());
if (!compareStreamsExtended(outStream, cmpStream, currentPath, compareResult)) {
assert !compareResult.isOk();
System.out.println(compareResult.getReport());
return compareResult;
}
assert compareResult.isOk();
return null;
}
/**
* Simple method that compares two given PdfStreams by content. This is "deep" comparing, which means that all
* nested objects are also compared by content.
*
* @param outStream stream to compare.
* @param cmpStream stream to compare.
* @return true if stream are equal by content, otherwise false.
*/
public boolean compareStreams(PdfStream outStream, PdfStream cmpStream) {
return compareStreamsExtended(outStream, cmpStream, null, null);
}
/**
* Simple method that compares two given PdfArrays by content. This is "deep" comparing, which means that all
* nested objects are also compared by content.
*
* @param outArray array to compare.
* @param cmpArray array to compare.
* @return true if arrays are equal by content, otherwise false.
*/
public boolean compareArrays(PdfArray outArray, PdfArray cmpArray) {
return compareArraysExtended(outArray, cmpArray, null, null);
}
/**
* Simple method that compares two given PdfNames.
*
* @param outName name to compare.
* @param cmpName name to compare.
* @return true if names are equal, otherwise false.
*/
public boolean compareNames(PdfName outName, PdfName cmpName) {
return cmpName.equals(outName);
}
/**
* Simple method that compares two given PdfNumbers.
*
* @param outNumber number to compare.
* @param cmpNumber number to compare.
* @return true if numbers are equal, otherwise false.
*/
public boolean compareNumbers(PdfNumber outNumber, PdfNumber cmpNumber) {
return cmpNumber.getValue() == outNumber.getValue();
}
/**
* Simple method that compares two given PdfStrings.
*
* @param outString string to compare.
* @param cmpString string to compare.
* @return true if strings are equal, otherwise false.
*/
public boolean compareStrings(PdfString outString, PdfString cmpString) {
return cmpString.getValue().equals(outString.getValue());
}
/**
* Simple method that compares two given PdfBooleans.
*
* @param outBoolean boolean to compare.
* @param cmpBoolean boolean to compare.
* @return true if booleans are equal, otherwise false.
*/
public boolean compareBooleans(PdfBoolean outBoolean, PdfBoolean cmpBoolean) {
return cmpBoolean.getValue() == outBoolean.getValue();
}
/**
* Compares xmp metadata of the two given PDF documents.
*
* @param outPdf the absolute path to the output file, which xmp is to be compared to cmp-file.
* @param cmpPdf the absolute path to the cmp-file, which xmp is to be compared to output file.
* @return text report on the xmp differences, or null if there are no differences.
*/
public String compareXmp(String outPdf, String cmpPdf) {
return compareXmp(outPdf, cmpPdf, false);
}
/**
* Compares xmp metadata of the two given PDF documents.
*
* @param outPdf the absolute path to the output file, which xmp is to be compared to cmp-file.
* @param cmpPdf the absolute path to the cmp-file, which xmp is to be compared to output file.
* @param ignoreDateAndProducerProperties true, if to ignore differences in date or producer xmp metadata
* properties.
* @return text report on the xmp differences, or null if there are no differences.
*/
public String compareXmp(String outPdf, String cmpPdf, boolean ignoreDateAndProducerProperties) {
init(outPdf, cmpPdf);
try (PdfReader readerCmp = CompareTool.createOutputReader(this.cmpPdf);
PdfDocument cmpDocument = new PdfDocument(readerCmp,
new DocumentProperties().setEventCountingMetaInfo(metaInfo));
PdfReader readerOut = CompareTool.createOutputReader(this.outPdf);
PdfDocument outDocument = new PdfDocument(readerOut,
new DocumentProperties().setEventCountingMetaInfo(metaInfo))) {
byte[] cmpBytes = cmpDocument.getXmpMetadata(), outBytes = outDocument.getXmpMetadata();
if (ignoreDateAndProducerProperties) {
XMPMeta xmpMeta = XMPMetaFactory.parseFromBuffer(cmpBytes, new ParseOptions().setOmitNormalization(true));
XMPUtils.removeProperties(xmpMeta, XMPConst.NS_XMP, PdfConst.CreateDate, true, true);
XMPUtils.removeProperties(xmpMeta, XMPConst.NS_XMP, PdfConst.ModifyDate, true, true);
XMPUtils.removeProperties(xmpMeta, XMPConst.NS_XMP, PdfConst.MetadataDate, true, true);
XMPUtils.removeProperties(xmpMeta, XMPConst.NS_PDF, PdfConst.Producer, true, true);
cmpBytes = XMPMetaFactory.serializeToBuffer(xmpMeta, new SerializeOptions(SerializeOptions.SORT));
xmpMeta = XMPMetaFactory.parseFromBuffer(outBytes, new ParseOptions().setOmitNormalization(true));
XMPUtils.removeProperties(xmpMeta, XMPConst.NS_XMP, PdfConst.CreateDate, true, true);
XMPUtils.removeProperties(xmpMeta, XMPConst.NS_XMP, PdfConst.ModifyDate, true, true);
XMPUtils.removeProperties(xmpMeta, XMPConst.NS_XMP, PdfConst.MetadataDate, true, true);
XMPUtils.removeProperties(xmpMeta, XMPConst.NS_PDF, PdfConst.Producer, true, true);
outBytes = XMPMetaFactory.serializeToBuffer(xmpMeta, new SerializeOptions(SerializeOptions.SORT));
}
if (!compareXmls(cmpBytes, outBytes)) {
return "The XMP packages different!";
}
} catch (Exception ex) {
return "XMP parsing failure!";
}
return null;
}
/**
* Utility method that provides simple comparison of the two xml files stored in byte arrays.
*
* @param xml1 first xml file data to compare.
* @param xml2 second xml file data to compare.
* @return true if xml structures are identical, false otherwise.
* @throws ParserConfigurationException if a XML DocumentBuilder cannot be created
* which satisfies the configuration requested.
* @throws SAXException if any XML parse errors occur.
* @throws IOException If any IO errors occur during reading XML files.
*/
public boolean compareXmls(byte[] xml1, byte[] xml2) throws ParserConfigurationException, SAXException, IOException {
return XmlUtils.compareXmls(new ByteArrayInputStream(xml1), new ByteArrayInputStream(xml2));
}
/**
* Utility method that provides simple comparison of the two xml files.
*
* @param outXmlFile absolute path to the out xml file to compare.
* @param cmpXmlFile absolute path to the cmp xml file to compare.
* @return true if xml structures are identical, false otherwise.
* @throws ParserConfigurationException if a XML DocumentBuilder cannot be created
* which satisfies the configuration requested.
* @throws SAXException if any XML parse errors occur.
* @throws IOException If any IO errors occur during reading XML files.
*/
public boolean compareXmls(String outXmlFile, String cmpXmlFile) throws ParserConfigurationException, SAXException, IOException {
System.out.println("Out xml: " + UrlUtil.getNormalizedFileUriString(outXmlFile));
System.out.println("Cmp xml: " + UrlUtil.getNormalizedFileUriString(cmpXmlFile) + "\n");
try (InputStream outXmlStream = FileUtil.getInputStreamForFile(outXmlFile);
InputStream cmpXmlStream = FileUtil.getInputStreamForFile(cmpXmlFile)) {
return XmlUtils.compareXmls(outXmlStream, cmpXmlStream);
}
}
/**
* Compares document info dictionaries of two pdf documents.
*
* This method overload is used to compare two encrypted PDF documents. Document passwords are passed with
* outPass and cmpPass parameters.
*
* @param outPdf the absolute path to the output file, which info is to be compared to cmp-file info.
* @param cmpPdf the absolute path to the cmp-file, which info is to be compared to output file info.
* @param outPass password for the encrypted document specified by the outPdf absolute path.
* @param cmpPass password for the encrypted document specified by the cmpPdf absolute path.
* @return text report on the differences in documents infos.
* @throws IOException if PDF reader cannot be created due to IO issues
*/
public String compareDocumentInfo(String outPdf, String cmpPdf, byte[] outPass, byte[] cmpPass) throws IOException {
System.out.print("[itext] INFO Comparing document info.......");
String message = null;
setPassword(outPass, cmpPass);
try (PdfReader readerOut = CompareTool.createOutputReader(outPdf, getOutReaderProperties());
PdfDocument outDocument = new PdfDocument(readerOut,
new DocumentProperties().setEventCountingMetaInfo(metaInfo));
PdfReader readerCmp = CompareTool.createOutputReader(cmpPdf, getCmpReaderProperties());
PdfDocument cmpDocument = new PdfDocument(readerCmp,
new DocumentProperties().setEventCountingMetaInfo(metaInfo))) {
String[] cmpInfo = convertDocInfoToStrings(cmpDocument.getDocumentInfo());
String[] outInfo = convertDocInfoToStrings(outDocument.getDocumentInfo());
for (int i = 0; i < cmpInfo.length; ++i) {
if (!cmpInfo[i].equals(outInfo[i])) {
message = MessageFormatUtil.format("Document info fail. Expected: \"{0}\", actual: \"{1}\"", cmpInfo[i], outInfo[i]);
break;
}
}
}
if (message == null) {
System.out.println("OK");
} else {
CompareTool.writeOnDisk(outPdf);
CompareTool.writeOnDiskIfNotExists(cmpPdf);
System.out.println("Fail");
}
System.out.flush();
return message;
}
/**
* Compares document info dictionaries of two pdf documents.
*
* @param outPdf the absolute path to the output file, which info is to be compared to cmp-file info.
* @param cmpPdf the absolute path to the cmp-file, which info is to be compared to output file info.
* @return text report on the differences in documents infos.
* @throws IOException if PDF reader cannot be created due to IO issues
*/
public String compareDocumentInfo(String outPdf, String cmpPdf) throws IOException {
return compareDocumentInfo(outPdf, cmpPdf, null, null);
}
/**
* Checks if two documents have identical link annotations on corresponding pages.
*
* @param outPdf the absolute path to the output file, which links are to be compared to cmp-file links.
* @param cmpPdf the absolute path to the cmp-file, which links are to be compared to output file links.
* @return text report on the differences in documents links.
* @throws IOException if PDF reader cannot be created due to IO issues
*/
public String compareLinkAnnotations(String outPdf, String cmpPdf) throws IOException {
System.out.print("[itext] INFO Comparing link annotations....");
String message = null;
try (PdfReader readerOut = CompareTool.createOutputReader(outPdf);
PdfDocument outDocument = new PdfDocument(readerOut,
new DocumentProperties().setEventCountingMetaInfo(metaInfo));
PdfReader readerCmp = CompareTool.createOutputReader(cmpPdf);
PdfDocument cmpDocument = new PdfDocument(readerCmp,
new DocumentProperties().setEventCountingMetaInfo(metaInfo))){
for (int i = 0; i < outDocument.getNumberOfPages() && i < cmpDocument.getNumberOfPages(); i++) {
List outLinks = getLinkAnnotations(i + 1, outDocument);
List cmpLinks = getLinkAnnotations(i + 1, cmpDocument);
if (cmpLinks.size() != outLinks.size()) {
message = MessageFormatUtil.format("Different number of links on page {0}.", i + 1);
break;
}
for (int j = 0; j < cmpLinks.size(); j++) {
if (!compareLinkAnnotations(cmpLinks.get(j), outLinks.get(j), cmpDocument, outDocument)) {
message = MessageFormatUtil.format("Different links on page {0}.\n{1}\n{2}", i + 1, cmpLinks.get(j).toString(), outLinks.get(j).toString());
break;
}
}
}
}
if (message == null) {
System.out.println("OK");
} else {
CompareTool.writeOnDisk(outPdf);
CompareTool.writeOnDiskIfNotExists(cmpPdf);
System.out.println("Fail");
}
System.out.flush();
return message;
}
/**
* Compares tag structures of the two PDF documents.
*
* This method creates xml files in the same folder with outPdf file. These xml files contain documents tag structures
* converted into the xml structure. These xml files are compared if they are equal.
*
* @param outPdf the absolute path to the output file, which tags are to be compared to cmp-file tags.
* @param cmpPdf the absolute path to the cmp-file, which tags are to be compared to output file tags.
* @return text report of the differences in documents tags.
* @throws IOException is thrown if any of the input files are missing or any of the auxiliary files
* that are created during comparison process weren't possible to be created.
* @throws ParserConfigurationException if a XML DocumentBuilder cannot be created
* which satisfies the configuration requested.
* @throws SAXException if any XML parse errors occur.
*/
public String compareTagStructures(String outPdf, String cmpPdf) throws IOException, ParserConfigurationException, SAXException {
System.out.print("[itext] INFO Comparing tag structures......");
String outXmlPath = outPdf.replace(".pdf", ".xml");
String cmpXmlPath = outPdf.replace(".pdf", ".cmp.xml");
String message = null;
try (PdfReader readerOut = CompareTool.createOutputReader(outPdf);
PdfDocument docOut = new PdfDocument(readerOut,
new DocumentProperties().setEventCountingMetaInfo(metaInfo));
FileOutputStream xmlOut = new FileOutputStream(outXmlPath)) {
new TaggedPdfReaderTool(docOut).setRootTag("root").convertToXml(xmlOut);
}
try (PdfReader readerCmp = CompareTool.createOutputReader(cmpPdf);
PdfDocument docCmp = new PdfDocument(readerCmp,
new DocumentProperties().setEventCountingMetaInfo(metaInfo));
FileOutputStream xmlCmp = new FileOutputStream(cmpXmlPath)) {
new TaggedPdfReaderTool(docCmp).setRootTag("root").convertToXml(xmlCmp);
}
if (!compareXmls(outXmlPath, cmpXmlPath)) {
message = "The tag structures are different.";
}
if (message == null) {
System.out.println("OK");
} else {
CompareTool.writeOnDisk(outPdf);
CompareTool.writeOnDiskIfNotExists(cmpPdf);
System.out.println("Fail");
}
System.out.flush();
return message;
}
/**
* Converts document info into a string array.
*
* Converts document info into a string array. It can be used to compare PdfDocumentInfo later on.
* Default implementation retrieves title, author, subject, keywords and producer.
*
* @param info an instance of PdfDocumentInfo to be converted.
* @return String array with all the document info tester is interested in.
*/
protected String[] convertDocInfoToStrings(PdfDocumentInfo info) {
String[] convertedInfo = new String[]{"", "", "", "", ""};
String infoValue = info.getTitle();
if (infoValue != null)
convertedInfo[0] = infoValue;
infoValue = info.getAuthor();
if (infoValue != null)
convertedInfo[1] = infoValue;
infoValue = info.getSubject();
if (infoValue != null)
convertedInfo[2] = infoValue;
infoValue = info.getKeywords();
if (infoValue != null)
convertedInfo[3] = infoValue;
infoValue = info.getProducer();
if (infoValue != null) {
convertedInfo[4] = convertProducerLine(infoValue);
}
return convertedInfo;
}
String convertProducerLine(String producer) {
return producer.replaceAll(VERSION_REGEXP, VERSION_REPLACEMENT).replaceAll(COPYRIGHT_REGEXP,
COPYRIGHT_REPLACEMENT);
}
private void init(String outPdf, String cmpPdf) {
this.outPdf = outPdf;
this.cmpPdf = cmpPdf;
outPdfName = new File(outPdf).getName();
cmpPdfName = new File(cmpPdf).getName();
outImage = outPdfName;
if (cmpPdfName.startsWith("cmp_")) {
cmpImage = cmpPdfName;
} else {
cmpImage = "cmp_" + cmpPdfName;
}
}
private void setPassword(byte[] outPass, byte[] cmpPass) {
if (outPass != null) {
getOutReaderProperties().setPassword(outPass);
}
if (cmpPass != null) {
getCmpReaderProperties().setPassword(outPass);
}
}
private String compareVisually(String outPath, String differenceImagePrefix, Map> ignoredAreas) throws InterruptedException, IOException {
return compareVisually(outPath, differenceImagePrefix, ignoredAreas, null);
}
private String compareVisually(String outPath, String differenceImagePrefix, Map> ignoredAreas, List equalPages) throws IOException, InterruptedException {
if (!outPath.endsWith("/")) {
outPath = outPath + "/";
}
if (differenceImagePrefix == null) {
String fileBasedPrefix = "";
if (outPdfName != null) {
// should always be initialized by this moment
fileBasedPrefix = outPdfName + "_";
}
differenceImagePrefix = "diff_" + fileBasedPrefix;
}
prepareOutputDirs(outPath, differenceImagePrefix);
System.out.println("Comparing visually..........");
if (ignoredAreas != null && !ignoredAreas.isEmpty()) {
createIgnoredAreasPdfs(outPath, ignoredAreas);
}
GhostscriptHelper ghostscriptHelper = null;
try {
ghostscriptHelper = new GhostscriptHelper(gsExec);
} catch (IllegalArgumentException e) {
throw new CompareToolExecutionException(e.getMessage());
}
ghostscriptHelper.runGhostScriptImageGeneration(outPdf, outPath, outImage);
ghostscriptHelper.runGhostScriptImageGeneration(cmpPdf, outPath, cmpImage);
return compareImagesOfPdfs(outPath, differenceImagePrefix, equalPages);
}
private String compareImagesOfPdfs(String outPath, String differenceImagePrefix, List equalPages) throws IOException, InterruptedException {
File[] imageFiles = FileUtil.listFilesInDirectoryByFilter(outPath, new PngFileFilter(outPdfName));
File[] cmpImageFiles = FileUtil.listFilesInDirectoryByFilter(outPath, new CmpPngFileFilter(cmpPdfName));
boolean bUnexpectedNumberOfPages = false;
if (imageFiles.length != cmpImageFiles.length) {
bUnexpectedNumberOfPages = true;
}
int cnt = Math.min(imageFiles.length, cmpImageFiles.length);
if (cnt < 1) {
throw new CompareToolExecutionException(
"No files for comparing. The result or sample pdf file is not processed by GhostScript.");
}
Arrays.sort(imageFiles, new ImageNameComparator());
Arrays.sort(cmpImageFiles, new ImageNameComparator());
boolean compareExecIsOk;
String imageMagickInitError = null;
ImageMagickHelper imageMagickHelper = null;
try {
imageMagickHelper = new ImageMagickHelper(compareExec);
compareExecIsOk = true;
} catch (IllegalArgumentException e) {
compareExecIsOk = false;
imageMagickInitError = e.getMessage();
LoggerFactory.getLogger(CompareTool.class).warn(e.getMessage());
}
List diffPages = new ArrayList<>();
String differentPagesFail = null;
for (int i = 0; i < cnt; i++) {
if (equalPages != null && equalPages.contains(i))
continue;
System.out.println("Comparing page " + Integer.toString(i + 1) + ": " + UrlUtil.getNormalizedFileUriString(imageFiles[i].getName()) + " ...");
System.out.println("Comparing page " + Integer.toString(i + 1) + ": " + UrlUtil.getNormalizedFileUriString(imageFiles[i].getName()) + " ...");
FileInputStream is1 = new FileInputStream(imageFiles[i].getAbsolutePath());
FileInputStream is2 = new FileInputStream(cmpImageFiles[i].getAbsolutePath());
boolean cmpResult = compareStreams(is1, is2);
is1.close();
is2.close();
if (!cmpResult) {
differentPagesFail = "Page is different!";
diffPages.add(i + 1);
if (compareExecIsOk) {
String diffName = outPath + differenceImagePrefix + Integer.toString(i + 1) + ".png";
if (!imageMagickHelper.runImageMagickImageCompare(imageFiles[i].getAbsolutePath(),
cmpImageFiles[i].getAbsolutePath(), diffName)) {
File diffFile = new File(diffName);
differentPagesFail += "\nPlease, examine " + FILE_PROTOCOL
+ UrlUtil.toNormalizedURI(diffFile).getPath() + " for more details.";
}
}
System.out.println(differentPagesFail);
} else {
System.out.println(" done.");
}
}
if (differentPagesFail != null) {
String errorMessage = DIFFERENT_PAGES.replace("", UrlUtil.toNormalizedURI(outPdf).getPath()).replace("", listDiffPagesAsString(diffPages));
if (!compareExecIsOk) {
errorMessage += "\n" + imageMagickInitError;
}
return errorMessage;
} else {
if (bUnexpectedNumberOfPages)
return UNEXPECTED_NUMBER_OF_PAGES.replace("", outPdf);
}
return null;
}
private String listDiffPagesAsString(List diffPages) {
StringBuilder sb = new StringBuilder("[");
for (int i = 0; i < diffPages.size(); i++) {
sb.append(diffPages.get(i));
if (i < diffPages.size() - 1) {
sb.append(", ");
}
}
sb.append("]");
return sb.toString();
}
private void createIgnoredAreasPdfs(String outPath, Map> ignoredAreas) throws IOException {
StampingProperties properties = new StampingProperties();
properties.setEventCountingMetaInfo(metaInfo);
try (PdfWriter outWriter = new PdfWriter(outPath + IGNORED_AREAS_PREFIX + outPdfName);
PdfReader readerOut = CompareTool.createOutputReader(outPdf);
PdfDocument pdfOutDoc = new PdfDocument(readerOut, outWriter, properties);
PdfWriter cmpWriter = new PdfWriter(outPath + IGNORED_AREAS_PREFIX + cmpPdfName);
PdfReader readerCmp = CompareTool.createOutputReader(cmpPdf);
PdfDocument pdfCmpDoc = new PdfDocument(readerCmp, cmpWriter, properties)) {
for (Map.Entry> entry : ignoredAreas.entrySet()) {
int pageNumber = entry.getKey();
List rectangles = entry.getValue();
if (rectangles != null && !rectangles.isEmpty()) {
PdfCanvas outCanvas = new PdfCanvas(pdfOutDoc.getPage(pageNumber));
PdfCanvas cmpCanvas = new PdfCanvas(pdfCmpDoc.getPage(pageNumber));
outCanvas.saveState();
cmpCanvas.saveState();
for (Rectangle rect : rectangles) {
outCanvas.rectangle(rect).fill();
cmpCanvas.rectangle(rect).fill();
}
outCanvas.restoreState();
cmpCanvas.restoreState();
}
}
}
init(outPath + IGNORED_AREAS_PREFIX + outPdfName, outPath + IGNORED_AREAS_PREFIX + cmpPdfName);
}
private void prepareOutputDirs(String outPath, String differenceImagePrefix) {
File[] imageFiles;
File[] cmpImageFiles;
File[] diffFiles;
if (!FileUtil.directoryExists(outPath)) {
FileUtil.createDirectories(outPath);
} else {
imageFiles = FileUtil.listFilesInDirectoryByFilter(outPath, new PngFileFilter(cmpPdfName));
for (File file : imageFiles) {
file.delete();
}
cmpImageFiles = FileUtil.listFilesInDirectoryByFilter(outPath, new CmpPngFileFilter(cmpPdfName));
for (File file : cmpImageFiles) {
file.delete();
}
diffFiles = FileUtil.listFilesInDirectoryByFilter(outPath, new DiffPngFileFilter(differenceImagePrefix));
for (File file : diffFiles) {
file.delete();
}
}
}
private void printOutCmpDirectories() {
System.out.println("Out file folder: " + FILE_PROTOCOL
+ UrlUtil.toNormalizedURI(new File(outPdf).getParentFile()).getPath());
System.out.println("Cmp file folder: " + FILE_PROTOCOL
+ UrlUtil.toNormalizedURI(new File(cmpPdf).getParentFile()).getPath());
}
private String compareByContent(String outPath, String differenceImagePrefix, Map> ignoredAreas) throws InterruptedException, IOException {
printOutCmpDirectories();
System.out.print("Comparing by content..........");
try (PdfReader readerOut = CompareTool.createOutputReader(outPdf, getOutReaderProperties());
PdfDocument outDocument = new PdfDocument(readerOut,
new DocumentProperties().setEventCountingMetaInfo(metaInfo));
PdfReader readerCmp = CompareTool.createOutputReader(cmpPdf, getCmpReaderProperties());
PdfDocument cmpDocument = new PdfDocument(readerCmp,
new DocumentProperties().setEventCountingMetaInfo(metaInfo))) {
List outPages = new ArrayList<>();
outPagesRef = new ArrayList<>();
loadPagesFromReader(outDocument, outPages, outPagesRef);
List cmpPages = new ArrayList<>();
cmpPagesRef = new ArrayList<>();
loadPagesFromReader(cmpDocument, cmpPages, cmpPagesRef);
if (outPages.size() != cmpPages.size()) {
CompareTool.writeOnDisk(outPdf);
CompareTool.writeOnDiskIfNotExists(cmpPdf);
return compareVisuallyAndCombineReports("Documents have different numbers of pages.", outPath, differenceImagePrefix, ignoredAreas, null);
}
CompareResult compareResult = new CompareResult(compareByContentErrorsLimit);
List equalPages = new ArrayList<>(cmpPages.size());
for (int i = 0; i < cmpPages.size(); i++) {
ObjectPath currentPath = new ObjectPath(cmpPagesRef.get(i), outPagesRef.get(i));
if (compareDictionariesExtended(outPages.get(i), cmpPages.get(i), currentPath, compareResult))
equalPages.add(i);
}
ObjectPath catalogPath = new ObjectPath(cmpDocument.getCatalog().getPdfObject().getIndirectReference(),
outDocument.getCatalog().getPdfObject().getIndirectReference());
Set ignoredCatalogEntries = new LinkedHashSet<>(Arrays.asList(PdfName.Pages, PdfName.Metadata));
compareDictionariesExtended(outDocument.getCatalog().getPdfObject(), cmpDocument.getCatalog().getPdfObject(),
catalogPath, compareResult, ignoredCatalogEntries);
if (encryptionCompareEnabled) {
compareDocumentsEncryption(outDocument, cmpDocument, compareResult);
}
if (generateCompareByContentXmlReport) {
String outPdfName = new File(outPdf).getName();
FileOutputStream xml = new FileOutputStream(outPath + "/" + outPdfName.substring(0, outPdfName.length() - 3) + "report.xml");
try {
compareResult.writeReportToXml(xml);
} catch (Exception e) {
throw new RuntimeException(e.getMessage(), e);
} finally {
xml.close();
}
}
if (equalPages.size() == cmpPages.size() && compareResult.isOk()) {
System.out.println("OK");
System.out.flush();
return null;
} else {
CompareTool.writeOnDisk(outPdf);
CompareTool.writeOnDiskIfNotExists(cmpPdf);
return compareVisuallyAndCombineReports(compareResult.getReport(), outPath, differenceImagePrefix, ignoredAreas, equalPages);
}
}
}
private static void writeOnDisk(String filename) throws IOException {
MemoryFirstPdfWriter outWriter = MemoryFirstPdfWriter.get(filename);
if (outWriter != null) {
outWriter.dump();
}
}
private static void writeOnDiskIfNotExists(String filename) throws IOException {
if (!new File(filename).exists()) {
CompareTool.writeOnDisk(filename);
}
}
private String compareVisuallyAndCombineReports(String compareByFailContentReason, String outPath, String differenceImagePrefix,
Map> ignoredAreas,
List equalPages) throws IOException, InterruptedException {
System.out.println("Fail");
System.out.flush();
String compareByContentReport = "Compare by content report:\n" + compareByFailContentReason;
System.out.println(compareByContentReport);
System.out.flush();
String message = compareVisually(outPath, differenceImagePrefix, ignoredAreas, equalPages);
if (message == null || message.length() == 0)
return "Compare by content fails. No visual differences";
return message;
}
private void loadPagesFromReader(PdfDocument doc, List pages, List pagesRef) {
int numOfPages = doc.getNumberOfPages();
for (int i = 0; i < numOfPages; ++i) {
pages.add(doc.getPage(i + 1).getPdfObject());
pagesRef.add(pages.get(i).getIndirectReference());
}
}
private void compareDocumentsEncryption(PdfDocument outDocument, PdfDocument cmpDocument, CompareResult compareResult) {
PdfDictionary outEncrypt = outDocument.getTrailer().getAsDictionary(PdfName.Encrypt);
PdfDictionary cmpEncrypt = cmpDocument.getTrailer().getAsDictionary(PdfName.Encrypt);
if (outEncrypt == null && cmpEncrypt == null) {
return;
}
TrailerPath trailerPath = new TrailerPath(cmpDocument, outDocument);
if (outEncrypt == null) {
compareResult.addError(trailerPath, "Expected encrypted document.");
return;
}
if (cmpEncrypt == null) {
compareResult.addError(trailerPath, "Expected not encrypted document.");
return;
}
Set ignoredEncryptEntries = new LinkedHashSet<>(Arrays.asList(PdfName.O, PdfName.U, PdfName.OE, PdfName.UE, PdfName.Perms, PdfName.CF, PdfName.Recipients));
ObjectPath objectPath = new ObjectPath(outEncrypt.getIndirectReference(), cmpEncrypt.getIndirectReference());
compareDictionariesExtended(outEncrypt, cmpEncrypt, objectPath, compareResult, ignoredEncryptEntries);
PdfDictionary outCfDict = outEncrypt.getAsDictionary(PdfName.CF);
PdfDictionary cmpCfDict = cmpEncrypt.getAsDictionary(PdfName.CF);
if (cmpCfDict != null || outCfDict != null) {
if (cmpCfDict != null && outCfDict == null || cmpCfDict == null) {
compareResult.addError(objectPath, "One of the dictionaries is null, the other is not.");
} else {
Set mergedKeys = new TreeSet<>(outCfDict.keySet());
mergedKeys.addAll(cmpCfDict.keySet());
for (PdfName key : mergedKeys) {
objectPath.pushDictItemToPath(key);
LinkedHashSet excludedKeys = new LinkedHashSet<>(Arrays.asList(PdfName.Recipients));
compareDictionariesExtended(outCfDict.getAsDictionary(key), cmpCfDict.getAsDictionary(key), objectPath, compareResult, excludedKeys);
objectPath.pop();
}
}
}
}
private boolean compareStreams(InputStream is1, InputStream is2) throws IOException {
byte[] buffer1 = new byte[64 * 1024];
byte[] buffer2 = new byte[64 * 1024];
int len1;
int len2;
for (; ; ) {
len1 = is1.read(buffer1);
len2 = is2.read(buffer2);
if (len1 != len2)
return false;
if (!Arrays.equals(buffer1, buffer2))
return false;
if (len1 == -1)
break;
}
return true;
}
private boolean compareDictionariesExtended(PdfDictionary outDict, PdfDictionary cmpDict, ObjectPath currentPath, CompareResult compareResult) {
return compareDictionariesExtended(outDict, cmpDict, currentPath, compareResult, null);
}
private boolean compareDictionariesExtended(PdfDictionary outDict, PdfDictionary cmpDict, ObjectPath currentPath, CompareResult compareResult, Set excludedKeys) {
if (cmpDict != null && outDict == null || outDict != null && cmpDict == null) {
compareResult.addError(currentPath, "One of the dictionaries is null, the other is not.");
return false;
}
boolean dictsAreSame = true;
// Iterate through the union of the keys of the cmp and out dictionaries
Set mergedKeys = new TreeSet<>(cmpDict.keySet());
mergedKeys.addAll(outDict.keySet());
for (PdfName key : mergedKeys) {
if (!dictsAreSame && (currentPath == null || compareResult == null || compareResult.isMessageLimitReached())) {
return false;
}
if (excludedKeys != null && excludedKeys.contains(key)) {
continue;
}
if (key.equals(PdfName.Parent) || key.equals(PdfName.P) || key.equals(PdfName.ModDate)) continue;
if (outDict.isStream() && cmpDict.isStream() && (key.equals(PdfName.Filter) || key.equals(PdfName.Length)))
continue;
if (key.equals(PdfName.BaseFont) || key.equals(PdfName.FontName)) {
PdfObject cmpObj = cmpDict.get(key);
if (cmpObj != null && cmpObj.isName() && cmpObj.toString().indexOf('+') > 0) {
PdfObject outObj = outDict.get(key);
if (!outObj.isName() || outObj.toString().indexOf('+') == -1) {
if (compareResult != null && currentPath != null)
compareResult.addError(currentPath, MessageFormatUtil.format("PdfDictionary {0} entry: Expected: {1}. Found: {2}", key.toString(), cmpObj.toString(), outObj.toString()));
dictsAreSame = false;
} else {
String cmpName = cmpObj.toString().substring(cmpObj.toString().indexOf('+'));
String outName = outObj.toString().substring(outObj.toString().indexOf('+'));
if (!cmpName.equals(outName)) {
if (compareResult != null && currentPath != null)
compareResult.addError(currentPath, MessageFormatUtil.format("PdfDictionary {0} entry: Expected: {1}. Found: {2}", key.toString(), cmpObj.toString(), outObj.toString()));
dictsAreSame = false;
}
}
continue;
}
}
// A number tree can be stored in multiple, semantically equivalent ways.
// Flatten to a single array, in order to get a canonical representation.
if (key.equals(PdfName.ParentTree) || key.equals(PdfName.PageLabels)) {
if (currentPath != null) {
currentPath.pushDictItemToPath(key);
}
PdfDictionary outNumTree = outDict.getAsDictionary(key);
PdfDictionary cmpNumTree = cmpDict.getAsDictionary(key);
LinkedList outItems = new LinkedList();
LinkedList cmpItems = new LinkedList();
PdfNumber outLeftover = flattenNumTree(outNumTree, null, outItems);
PdfNumber cmpLeftover = flattenNumTree(cmpNumTree, null, cmpItems);
if (outLeftover != null) {
LoggerFactory.getLogger(CompareTool.class).warn(IoLogMessageConstant.NUM_TREE_SHALL_NOT_END_WITH_KEY);
if (cmpLeftover == null) {
if (compareResult != null && currentPath != null) {
compareResult.addError(currentPath, "Number tree unexpectedly ends with a key");
}
dictsAreSame = false;
}
}
if (cmpLeftover != null) {
LoggerFactory.getLogger(CompareTool.class).warn(IoLogMessageConstant.NUM_TREE_SHALL_NOT_END_WITH_KEY);
if (outLeftover == null) {
if (compareResult != null && currentPath != null) {
compareResult.addError(currentPath, "Number tree was expected to end with a key (although it is invalid according to the specification), but ended with a value");
}
dictsAreSame = false;
}
}
if (outLeftover != null && cmpLeftover != null && !compareNumbers(outLeftover, cmpLeftover)) {
if (compareResult != null && currentPath != null) {
compareResult.addError(currentPath, "Number tree was expected to end with a different key (although it is invalid according to the specification)");
}
dictsAreSame = false;
}
PdfArray outArray = new PdfArray(outItems, outItems.size());
PdfArray cmpArray = new PdfArray(cmpItems, cmpItems.size());
if (!compareArraysExtended(outArray, cmpArray, currentPath, compareResult)) {
if (compareResult != null && currentPath != null) {
compareResult.addError(currentPath, "Number trees were flattened, compared and found to be different.");
}
dictsAreSame = false;
}
if (currentPath != null) {
currentPath.pop();
}
continue;
}
if (currentPath != null) {
currentPath.pushDictItemToPath(key);
}
dictsAreSame = compareObjects(outDict.get(key, false), cmpDict.get(key, false), currentPath, compareResult) && dictsAreSame;
if (currentPath != null) {
currentPath.pop();
}
}
return dictsAreSame;
}
private PdfNumber flattenNumTree(PdfDictionary dictionary, PdfNumber leftOver, LinkedList items /*Map items*/) {
PdfArray nums = dictionary.getAsArray(PdfName.Nums);
if (nums != null) {
for (int k = 0; k < nums.size(); k++) {
PdfNumber number;
if (leftOver == null)
number = nums.getAsNumber(k++);
else {
number = leftOver;
leftOver = null;
}
if (k < nums.size()) {
items.addLast(number);
items.addLast(nums.get(k, false));
} else {
return number;
}
}
} else if ((nums = dictionary.getAsArray(PdfName.Kids)) != null) {
for (int k = 0; k < nums.size(); k++) {
PdfDictionary kid = nums.getAsDictionary(k);
leftOver = flattenNumTree(kid, leftOver, items);
}
}
return null;
}
protected boolean compareObjects(PdfObject outObj, PdfObject cmpObj, ObjectPath currentPath, CompareResult compareResult) {
PdfObject outDirectObj = null;
PdfObject cmpDirectObj = null;
if (outObj != null)
outDirectObj = outObj.isIndirectReference() ? ((PdfIndirectReference) outObj).getRefersTo(false) : outObj;
if (cmpObj != null)
cmpDirectObj = cmpObj.isIndirectReference() ? ((PdfIndirectReference) cmpObj).getRefersTo(false) : cmpObj;
if (cmpDirectObj == null && outDirectObj == null)
return true;
if (outDirectObj == null) {
compareResult.addError(currentPath, "Expected object was not found.");
return false;
} else if (cmpDirectObj == null) {
compareResult.addError(currentPath, "Found object which was not expected to be found.");
return false;
} else if (cmpDirectObj.getType() != outDirectObj.getType()) {
compareResult.addError(currentPath, MessageFormatUtil.format("Types do not match. Expected: {0}. Found: {1}.", cmpDirectObj.getClass().getSimpleName(), outDirectObj.getClass().getSimpleName()));
return false;
} else if (cmpObj.isIndirectReference() && !outObj.isIndirectReference()) {
compareResult.addError(currentPath, "Expected indirect object.");
return false;
} else if (!cmpObj.isIndirectReference() && outObj.isIndirectReference()) {
compareResult.addError(currentPath, "Expected direct object.");
return false;
}
if (currentPath != null && cmpObj.isIndirectReference() && outObj.isIndirectReference()) {
if (currentPath.isComparing((PdfIndirectReference) cmpObj, (PdfIndirectReference) outObj))
return true;
currentPath = currentPath.resetDirectPath((PdfIndirectReference) cmpObj, (PdfIndirectReference) outObj);
}
if (cmpDirectObj.isDictionary() && PdfName.Page.equals(((PdfDictionary) cmpDirectObj).getAsName(PdfName.Type))
&& useCachedPagesForComparison) {
if (!outDirectObj.isDictionary() || !PdfName.Page.equals(((PdfDictionary) outDirectObj).getAsName(PdfName.Type))) {
if (compareResult != null && currentPath != null)
compareResult.addError(currentPath, "Expected a page. Found not a page.");
return false;
}
PdfIndirectReference cmpRefKey = cmpObj.isIndirectReference() ? (PdfIndirectReference) cmpObj : cmpObj.getIndirectReference();
PdfIndirectReference outRefKey = outObj.isIndirectReference() ? (PdfIndirectReference) outObj : outObj.getIndirectReference();
// References to the same page
if (cmpPagesRef == null) {
cmpPagesRef = new ArrayList<>();
for (int i = 1; i <= cmpRefKey.getDocument().getNumberOfPages(); ++i) {
cmpPagesRef.add(cmpRefKey.getDocument().getPage(i).getPdfObject().getIndirectReference());
}
}
if (outPagesRef == null) {
outPagesRef = new ArrayList<>();
for (int i = 1; i <= outRefKey.getDocument().getNumberOfPages(); ++i) {
outPagesRef.add(outRefKey.getDocument().getPage(i).getPdfObject().getIndirectReference());
}
}
// If at least one of the page dictionaries is in the document's page tree, we don't proceed with deep comparison,
// because pages are compared at different level, so we compare only their index.
// However only if both page dictionaries are not in the document's page trees, we continue to comparing them as normal dictionaries.
if (cmpPagesRef.contains(cmpRefKey) || outPagesRef.contains(outRefKey)) {
if (cmpPagesRef.contains(cmpRefKey) && cmpPagesRef.indexOf(cmpRefKey) == outPagesRef.indexOf(outRefKey)) {
return true;
}
if (compareResult != null && currentPath != null)
compareResult.addError(currentPath, MessageFormatUtil.format("The dictionaries refer to different pages. Expected page number: {0}. Found: {1}",
cmpPagesRef.indexOf(cmpRefKey) + 1, outPagesRef.indexOf(outRefKey) + 1));
return false;
}
}
if (cmpDirectObj.isDictionary()) {
return compareDictionariesExtended((PdfDictionary) outDirectObj, (PdfDictionary) cmpDirectObj, currentPath, compareResult);
} else if (cmpDirectObj.isStream()) {
return compareStreamsExtended((PdfStream) outDirectObj, (PdfStream) cmpDirectObj, currentPath, compareResult);
} else if (cmpDirectObj.isArray()) {
return compareArraysExtended((PdfArray) outDirectObj, (PdfArray) cmpDirectObj, currentPath, compareResult);
} else if (cmpDirectObj.isName()) {
return compareNamesExtended((PdfName) outDirectObj, (PdfName) cmpDirectObj, currentPath, compareResult);
} else if (cmpDirectObj.isNumber()) {
return compareNumbersExtended((PdfNumber) outDirectObj, (PdfNumber) cmpDirectObj, currentPath, compareResult);
} else if (cmpDirectObj.isString()) {
return compareStringsExtended((PdfString) outDirectObj, (PdfString) cmpDirectObj, currentPath, compareResult);
} else if (cmpDirectObj.isBoolean()) {
return compareBooleansExtended((PdfBoolean) outDirectObj, (PdfBoolean) cmpDirectObj, currentPath, compareResult);
} else if (outDirectObj.isNull() && cmpDirectObj.isNull()) {
return true;
} else {
throw new UnsupportedOperationException();
}
}
private boolean compareStreamsExtended(PdfStream outStream, PdfStream cmpStream, ObjectPath currentPath, CompareResult compareResult) {
boolean toDecode = PdfName.FlateDecode.equals(outStream.get(PdfName.Filter));
byte[] outStreamBytes = outStream.getBytes(toDecode);
byte[] cmpStreamBytes = cmpStream.getBytes(toDecode);
if (Arrays.equals(outStreamBytes, cmpStreamBytes)) {
return compareDictionariesExtended(outStream, cmpStream, currentPath, compareResult);
} else {
StringBuilder errorMessage = new StringBuilder();
if (cmpStreamBytes.length != outStreamBytes.length) {
errorMessage.append(MessageFormatUtil.format("PdfStream. Lengths are different. Expected: {0}. Found: {1}\n", cmpStreamBytes.length, outStreamBytes.length));
} else {
errorMessage.append("PdfStream. Bytes are different.\n");
}
int firstDifferenceOffset = findBytesDifference(outStreamBytes, cmpStreamBytes, errorMessage);
if (compareResult != null && currentPath != null) {
currentPath.pushOffsetToPath(firstDifferenceOffset);
compareResult.addError(currentPath, errorMessage.toString());
currentPath.pop();
}
return false;
}
}
/**
* @return first difference offset
*/
private int findBytesDifference(byte[] outStreamBytes, byte[] cmpStreamBytes, StringBuilder errorMessage) {
int numberOfDifferentBytes = 0;
int firstDifferenceOffset = 0;
int minLength = Math.min(cmpStreamBytes.length, outStreamBytes.length);
for (int i = 0; i < minLength; i++) {
if (cmpStreamBytes[i] != outStreamBytes[i]) {
++numberOfDifferentBytes;
if (numberOfDifferentBytes == 1) {
firstDifferenceOffset = i;
}
}
}
String bytesDifference = null;
if (numberOfDifferentBytes > 0) {
int diffBytesAreaL = 10;
int diffBytesAreaR = 10;
int lCmp = Math.max(0, firstDifferenceOffset - diffBytesAreaL);
int rCmp = Math.min(cmpStreamBytes.length, firstDifferenceOffset + diffBytesAreaR);
int lOut = Math.max(0, firstDifferenceOffset - diffBytesAreaL);
int rOut = Math.min(outStreamBytes.length, firstDifferenceOffset + diffBytesAreaR);
String cmpByte = new String(new byte[]{cmpStreamBytes[firstDifferenceOffset]}, StandardCharsets.ISO_8859_1);
String cmpByteNeighbours = new String(cmpStreamBytes, lCmp, rCmp - lCmp, StandardCharsets.ISO_8859_1).replaceAll(NEW_LINES, " ");
String outByte = new String(new byte[]{outStreamBytes[firstDifferenceOffset]}, StandardCharsets.ISO_8859_1);
String outBytesNeighbours = new String(outStreamBytes, lOut, rOut - lOut, StandardCharsets.ISO_8859_1).replaceAll(NEW_LINES, " ");
bytesDifference = MessageFormatUtil.format("First bytes difference is encountered at index {0}. Expected: {1} ({2}). Found: {3} ({4}). Total number of different bytes: {5}",
Integer.valueOf(firstDifferenceOffset).toString(), cmpByte, cmpByteNeighbours, outByte, outBytesNeighbours, numberOfDifferentBytes);
} else {
// lengths are different
firstDifferenceOffset = minLength;
bytesDifference = MessageFormatUtil.format("Bytes of the shorter array are the same as the first {0} bytes of the longer one.", minLength);
}
errorMessage.append(bytesDifference);
return firstDifferenceOffset;
}
private boolean compareArraysExtended(PdfArray outArray, PdfArray cmpArray, ObjectPath currentPath, CompareResult compareResult) {
if (outArray == null) {
if (compareResult != null && currentPath != null)
compareResult.addError(currentPath, "Found null. Expected PdfArray.");
return false;
} else if (outArray.size() != cmpArray.size()) {
if (compareResult != null && currentPath != null)
compareResult.addError(currentPath, MessageFormatUtil.format("PdfArrays. Lengths are different. Expected: {0}. Found: {1}.", cmpArray.size(), outArray.size()));
return false;
}
boolean arraysAreEqual = true;
for (int i = 0; i < cmpArray.size(); i++) {
if (currentPath != null)
currentPath.pushArrayItemToPath(i);
arraysAreEqual = compareObjects(outArray.get(i, false), cmpArray.get(i, false), currentPath, compareResult) && arraysAreEqual;
if (currentPath != null)
currentPath.pop();
if (!arraysAreEqual && (currentPath == null || compareResult == null || compareResult.isMessageLimitReached()))
return false;
}
return arraysAreEqual;
}
private boolean compareNamesExtended(PdfName outName, PdfName cmpName, ObjectPath currentPath, CompareResult compareResult) {
if (cmpName.equals(outName)) {
return true;
} else {
if (compareResult != null && currentPath != null)
compareResult.addError(currentPath, MessageFormatUtil.format("PdfName. Expected: {0}. Found: {1}", cmpName.toString(), outName.toString()));
return false;
}
}
private boolean compareNumbersExtended(PdfNumber outNumber, PdfNumber cmpNumber, ObjectPath currentPath, CompareResult compareResult) {
if (cmpNumber.getValue() == outNumber.getValue()) {
return true;
} else {
if (compareResult != null && currentPath != null)
compareResult.addError(currentPath, MessageFormatUtil.format("PdfNumber. Expected: {0}. Found: {1}", cmpNumber, outNumber));
return false;
}
}
private boolean compareStringsExtended(PdfString outString, PdfString cmpString, ObjectPath currentPath, CompareResult compareResult) {
if (Arrays.equals(convertPdfStringToBytes(cmpString), convertPdfStringToBytes(outString))) {
return true;
} else {
String cmpStr = cmpString.toUnicodeString();
String outStr = outString.toUnicodeString();
StringBuilder errorMessage = new StringBuilder();
if (cmpStr.length() != outStr.length()) {
errorMessage.append(MessageFormatUtil.format("PdfString. Lengths are different. Expected: {0}. Found: {1}\n", cmpStr.length(), outStr.length()));
} else {
errorMessage.append("PdfString. Characters are different.\n");
}
int firstDifferenceOffset = findStringDifference(outStr, cmpStr, errorMessage);
if (compareResult != null && currentPath != null) {
currentPath.pushOffsetToPath(firstDifferenceOffset);
compareResult.addError(currentPath, errorMessage.toString());
currentPath.pop();
}
return false;
}
}
private int findStringDifference(String outString, String cmpString, StringBuilder errorMessage) {
int numberOfDifferentChars = 0;
int firstDifferenceOffset = 0;
int minLength = Math.min(cmpString.length(), outString.length());
for (int i = 0; i < minLength; i++) {
if (cmpString.charAt(i) != outString.charAt(i)) {
++numberOfDifferentChars;
if (numberOfDifferentChars == 1) {
firstDifferenceOffset = i;
}
}
}
String stringDifference = null;
if (numberOfDifferentChars > 0) {
int diffBytesAreaL = 15;
int diffBytesAreaR = 15;
int lCmp = Math.max(0, firstDifferenceOffset - diffBytesAreaL);
int rCmp = Math.min(cmpString.length(), firstDifferenceOffset + diffBytesAreaR);
int lOut = Math.max(0, firstDifferenceOffset - diffBytesAreaL);
int rOut = Math.min(outString.length(), firstDifferenceOffset + diffBytesAreaR);
String cmpByte = String.valueOf(cmpString.charAt(firstDifferenceOffset));
String cmpByteNeighbours = cmpString.substring(lCmp, rCmp).replaceAll(NEW_LINES, " ");
String outByte = String.valueOf(outString.charAt(firstDifferenceOffset));
String outBytesNeighbours = outString.substring(lOut, rOut).replaceAll(NEW_LINES, " ");
stringDifference = MessageFormatUtil.format("First characters difference is encountered at index {0}.\nExpected: {1} ({2}).\nFound: {3} ({4}).\nTotal number of different characters: {5}",
Integer.valueOf(firstDifferenceOffset).toString(), cmpByte, cmpByteNeighbours, outByte, outBytesNeighbours, numberOfDifferentChars);
} else {
// lengths are different
firstDifferenceOffset = minLength;
stringDifference = MessageFormatUtil.format("All characters of the shorter string are the same as the first {0} characters of the longer one.", minLength);
}
errorMessage.append(stringDifference);
return firstDifferenceOffset;
}
private byte[] convertPdfStringToBytes(PdfString pdfString) {
byte[] bytes;
String value = pdfString.getValue();
String encoding = pdfString.getEncoding();
if (encoding != null && PdfEncodings.UNICODE_BIG.equals(encoding) && PdfEncodings.isPdfDocEncoding(value))
bytes = PdfEncodings.convertToBytes(value, PdfEncodings.PDF_DOC_ENCODING);
else
bytes = PdfEncodings.convertToBytes(value, encoding);
return bytes;
}
private boolean compareBooleansExtended(PdfBoolean outBoolean, PdfBoolean cmpBoolean, ObjectPath currentPath, CompareResult compareResult) {
if (cmpBoolean.getValue() == outBoolean.getValue()) {
return true;
} else {
if (compareResult != null && currentPath != null)
compareResult.addError(currentPath, MessageFormatUtil.format("PdfBoolean. Expected: {0}. Found: {1}.", cmpBoolean.getValue(), outBoolean.getValue()));
return false;
}
}
private List getLinkAnnotations(int pageNum, PdfDocument document) {
List linkAnnotations = new ArrayList<>();
List annotations = document.getPage(pageNum).getAnnotations();
for (PdfAnnotation annotation : annotations) {
if (PdfName.Link.equals(annotation.getSubtype())) {
linkAnnotations.add((PdfLinkAnnotation) annotation);
}
}
return linkAnnotations;
}
private boolean compareLinkAnnotations(PdfLinkAnnotation cmpLink, PdfLinkAnnotation outLink, PdfDocument cmpDocument, PdfDocument outDocument) {
// Compare link rectangles, page numbers the links refer to, and simple parameters (non-indirect, non-arrays, non-dictionaries)
PdfObject cmpDestObject = cmpLink.getDestinationObject();
PdfObject outDestObject = outLink.getDestinationObject();
if (cmpDestObject != null && outDestObject != null) {
if (cmpDestObject.getType() != outDestObject.getType())
return false;
else {
PdfArray explicitCmpDest = null;
PdfArray explicitOutDest = null;
PdfNameTree cmpNamedDestinations = cmpDocument
.getCatalog().getNameTree(PdfName.Dests);
PdfNameTree outNamedDestinations = outDocument
.getCatalog().getNameTree(PdfName.Dests);
switch (cmpDestObject.getType()) {
case PdfObject.ARRAY:
explicitCmpDest = (PdfArray) cmpDestObject;
explicitOutDest = (PdfArray) outDestObject;
break;
case PdfObject.NAME:
String cmpDestName = ((PdfName) cmpDestObject).getValue();
explicitCmpDest = (PdfArray) cmpNamedDestinations.getEntry(cmpDestName);
String outDestName = ((PdfName) outDestObject).getValue();
explicitOutDest = (PdfArray) outNamedDestinations.getEntry(outDestName);
break;
case PdfObject.STRING:
explicitCmpDest = (PdfArray) cmpNamedDestinations
.getEntry((PdfString) cmpDestObject);
explicitOutDest = (PdfArray) outNamedDestinations
.getEntry((PdfString) outDestObject);
break;
default:
break;
}
if (getExplicitDestinationPageNum(explicitCmpDest) != getExplicitDestinationPageNum(explicitOutDest))
return false;
}
}
PdfDictionary cmpDict = cmpLink.getPdfObject();
PdfDictionary outDict = outLink.getPdfObject();
if (cmpDict.size() != outDict.size())
return false;
Rectangle cmpRect = cmpDict.getAsRectangle(PdfName.Rect);
Rectangle outRect = outDict.getAsRectangle(PdfName.Rect);
if (cmpRect.getHeight() != outRect.getHeight() ||
cmpRect.getWidth() != outRect.getWidth() ||
cmpRect.getX() != outRect.getX() ||
cmpRect.getY() != outRect.getY())
return false;
for (Map.Entry cmpEntry : cmpDict.entrySet()) {
PdfObject cmpObj = cmpEntry.getValue();
if (!outDict.containsKey(cmpEntry.getKey()))
return false;
PdfObject outObj = outDict.get(cmpEntry.getKey());
if (cmpObj.getType() != outObj.getType())
return false;
switch (cmpObj.getType()) {
case PdfObject.NULL:
case PdfObject.BOOLEAN:
case PdfObject.NUMBER:
case PdfObject.STRING:
case PdfObject.NAME:
if (!cmpObj.toString().equals(outObj.toString()))
return false;
break;
}
}
return true;
}
private int getExplicitDestinationPageNum(PdfArray explicitDest) {
PdfIndirectReference pageReference = (PdfIndirectReference) explicitDest.get(0, false);
PdfDocument doc = pageReference.getDocument();
for (int i = 1; i <= doc.getNumberOfPages(); ++i) {
if (doc.getPage(i).getPdfObject().getIndirectReference().equals(pageReference))
return i;
}
throw new IllegalArgumentException("PdfLinkAnnotation comparison: Page not found.");
}
private static class PngFileFilter implements FileFilter {
private String currentOutPdfName;
public PngFileFilter (String currentOutPdfName) {
this.currentOutPdfName = currentOutPdfName;
}
public boolean accept(File pathname) {
String ap = pathname.getName();
boolean b1 = ap.endsWith(".png");
boolean b2 = ap.contains("cmp_");
return b1 && !b2 && ap.contains(currentOutPdfName);
}
}
private static class CmpPngFileFilter implements FileFilter {
private String currentCmpPdfName;
public CmpPngFileFilter (String currentCmpPdfName) {
this.currentCmpPdfName = currentCmpPdfName;
}
public boolean accept(File pathname) {
String ap = pathname.getName();
boolean b1 = ap.endsWith(".png");
boolean b2 = ap.contains("cmp_");
return b1 && b2 && ap.contains(currentCmpPdfName);
}
}
private static class DiffPngFileFilter implements FileFilter {
private String differenceImagePrefix;
public DiffPngFileFilter(String differenceImagePrefix) {
this.differenceImagePrefix = differenceImagePrefix;
}
public boolean accept(File pathname) {
String ap = pathname.getName();
boolean b1 = ap.endsWith(".png");
boolean b2 = ap.startsWith(differenceImagePrefix);
return b1 && b2;
}
}
private static class ImageNameComparator implements Comparator {
public int compare(File f1, File f2) {
String f1Name = f1.getName();
String f2Name = f2.getName();
return f1Name.compareTo(f2Name);
}
}
/**
* Class containing results of the comparison of two documents.
*/
public static class CompareResult {
// LinkedHashMap to retain order. HashMap has different order in Java6/7 and Java8
protected Map differences = new LinkedHashMap<>();
protected int messageLimit = 1;
/**
* Creates new empty instance of CompareResult with given limit of difference messages.
*
* @param messageLimit maximum number of difference messages to be handled by this CompareResult.
*/
public CompareResult(int messageLimit) {
this.messageLimit = messageLimit;
}
/**
* Verifies if documents are considered equal after comparison.
*
* @return true if documents are equal, false otherwise.
*/
public boolean isOk() {
return differences.size() == 0;
}
/**
* Returns number of differences between two documents detected during comparison.
*
* @return number of differences.
*/
public int getErrorCount() {
return differences.size();
}
/**
* Converts this CompareResult into text form.
*
* @return text report on the differences between two documents.
*/
public String getReport() {
StringBuilder sb = new StringBuilder();
boolean firstEntry = true;
for (Map.Entry entry : differences.entrySet()) {
if (!firstEntry)
sb.append("-----------------------------").append("\n");
ObjectPath diffPath = entry.getKey();
sb.append(entry.getValue()).append("\n").append(diffPath.toString()).append("\n");
firstEntry = false;
}
return sb.toString();
}
/**
* Returns map with {@link ObjectPath} as keys and difference descriptions as values.
*
* @return differences map which could be used to find in the document the objects that are different.
*/
public Map getDifferences() {
return differences;
}
/**
* Converts this CompareResult into xml form.
*
* @param stream output stream to which xml report will be written.
* @throws ParserConfigurationException if a XML DocumentBuilder cannot be created
* which satisfies the configuration requested.
* @throws TransformerException if it is not possible to create an XML Transformer instance or
* an unrecoverable error occurs during the course of the transformation.
*/
public void writeReportToXml(OutputStream stream) throws ParserConfigurationException, TransformerException {
final Document xmlReport = XmlUtil.initNewXmlDocument();
Element root = xmlReport.createElement("report");
Element errors = xmlReport.createElement("errors");
errors.setAttribute("count", String.valueOf(differences.size()));
root.appendChild(errors);
for (Map.Entry entry : differences.entrySet()) {
Node errorNode = xmlReport.createElement("error");
Node message = xmlReport.createElement("message");
message.appendChild(xmlReport.createTextNode(entry.getValue()));
Node path = entry.getKey().toXmlNode(xmlReport);
errorNode.appendChild(message);
errorNode.appendChild(path);
errors.appendChild(errorNode);
}
xmlReport.appendChild(root);
XmlUtils.writeXmlDocToStream(xmlReport, stream);
}
protected boolean isMessageLimitReached() {
return differences.size() >= messageLimit;
}
protected void addError(ObjectPath path, String message) {
if (differences.size() < messageLimit) {
differences.put(new ObjectPath(path), message);
}
}
}
/**
* Exceptions thrown when errors occur during generation and comparison of images obtained on the basis of pdf
* files.
*/
public static class CompareToolExecutionException extends RuntimeException {
/**
* Creates a new {@link CompareToolExecutionException}.
*
* @param msg the detail message.
*/
public CompareToolExecutionException(String msg) {
super(msg);
}
}
}