com.itextpdf.kernel.utils.CompareTool Maven / Gradle / Ivy
/*
This file is part of the iText (R) project.
Copyright (c) 1998-2022 iText Group NV
Authors: Bruno Lowagie, Paulo Soares, et al.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License version 3
as published by the Free Software Foundation with the addition of the
following permission added to Section 15 as permitted in Section 7(a):
FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY
ITEXT GROUP. ITEXT GROUP DISCLAIMS THE WARRANTY OF NON INFRINGEMENT
OF THIRD PARTY RIGHTS
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program; if not, see http://www.gnu.org/licenses or write to
the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
Boston, MA, 02110-1301 USA, or download the license from the following URL:
http://itextpdf.com/terms-of-use/
The interactive user interfaces in modified source and object code versions
of this program must display Appropriate Legal Notices, as required under
Section 5 of the GNU Affero General Public License.
In accordance with Section 7(b) of the GNU Affero General Public License,
a covered work must retain the producer line in every PDF that is created
or manipulated using iText.
You can be released from the requirements of the license by purchasing
a commercial license. Buying such a license is mandatory as soon as you
develop commercial activities involving the iText software without
disclosing the source code of your own applications.
These activities include: offering paid services to customers as an ASP,
serving PDFs on the fly in a web application, shipping iText with a closed
source product.
For more information, please contact iText Software Corp. at this
address: [email protected]
*/
package com.itextpdf.kernel.utils;
import com.itextpdf.io.logs.IoLogMessageConstant;
import com.itextpdf.io.font.PdfEncodings;
import com.itextpdf.commons.utils.FileUtil;
import com.itextpdf.io.util.GhostscriptHelper;
import com.itextpdf.io.util.ImageMagickHelper;
import com.itextpdf.commons.utils.MessageFormatUtil;
import com.itextpdf.io.util.UrlUtil;
import com.itextpdf.io.util.XmlUtil;
import com.itextpdf.commons.actions.contexts.IMetaInfo;
import com.itextpdf.kernel.geom.Rectangle;
import com.itextpdf.kernel.pdf.DocumentProperties;
import com.itextpdf.kernel.pdf.PdfArray;
import com.itextpdf.kernel.pdf.PdfBoolean;
import com.itextpdf.kernel.pdf.PdfDictionary;
import com.itextpdf.kernel.pdf.PdfDocument;
import com.itextpdf.kernel.pdf.PdfDocumentInfo;
import com.itextpdf.kernel.pdf.PdfIndirectReference;
import com.itextpdf.kernel.pdf.PdfName;
import com.itextpdf.kernel.pdf.PdfNumber;
import com.itextpdf.kernel.pdf.PdfObject;
import com.itextpdf.kernel.pdf.PdfReader;
import com.itextpdf.kernel.pdf.PdfStream;
import com.itextpdf.kernel.pdf.PdfString;
import com.itextpdf.kernel.pdf.PdfWriter;
import com.itextpdf.kernel.pdf.ReaderProperties;
import com.itextpdf.kernel.pdf.StampingProperties;
import com.itextpdf.kernel.pdf.annot.PdfAnnotation;
import com.itextpdf.kernel.pdf.annot.PdfLinkAnnotation;
import com.itextpdf.kernel.pdf.canvas.PdfCanvas;
import com.itextpdf.kernel.utils.objectpathitems.ObjectPath;
import com.itextpdf.kernel.utils.objectpathitems.TrailerPath;
import com.itextpdf.kernel.xmp.PdfConst;
import com.itextpdf.kernel.xmp.XMPConst;
import com.itextpdf.kernel.xmp.XMPMeta;
import com.itextpdf.kernel.xmp.XMPMetaFactory;
import com.itextpdf.kernel.xmp.XMPUtils;
import com.itextpdf.kernel.xmp.options.ParseOptions;
import com.itextpdf.kernel.xmp.options.SerializeOptions;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileFilter;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.TransformerException;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.xml.sax.SAXException;
/**
* This class provides means to compare two PDF files both by content and visually
* and gives the report on their differences.
*
* For visual comparison it uses external tools: Ghostscript and ImageMagick, which
* should be installed on your machine. To allow CompareTool to use them, you need
* to pass either java properties or environment variables with names "ITEXT_GS_EXEC" and
* "ITEXT_MAGICK_COMPARE_EXEC", which would contain the commands to execute the
* Ghostscript and ImageMagick tools.
*
* CompareTool class was mainly designed for the testing purposes of iText in order to
* ensure that the same code produces the same PDF document. For this reason you will
* often encounter such parameter names as "outDoc" and "cmpDoc" which stand for output
* document and document-for-comparison. The first one is viewed as the current result,
* and the second one is referred as normal or ideal result. OutDoc is compared to the
* ideal cmpDoc. Therefore all reports of the comparison are in the form: "Expected ...,
* but was ...". This should be interpreted in the following way: "expected" part stands
* for the content of the cmpDoc and "but was" part stands for the content of the outDoc.
*/
public class CompareTool {
private static final String FILE_PROTOCOL = "file://";
private static final String UNEXPECTED_NUMBER_OF_PAGES = "Unexpected number of pages for .";
private static final String DIFFERENT_PAGES = "File " + FILE_PROTOCOL + " differs on page .";
private static final String IGNORED_AREAS_PREFIX = "ignored_areas_";
private static final String VERSION_REGEXP = "(\\d+\\.)+\\d+(-SNAPSHOT)?";
private static final String VERSION_REPLACEMENT = "";
private static final String COPYRIGHT_REGEXP = "\u00a9\\d+-\\d+ iText Group NV";
private static final String COPYRIGHT_REPLACEMENT = "\u00a9 iText Group NV";
private static final String NEW_LINES = "\\r|\\n";
private String cmpPdfName;
private String outPdfName;
private String cmpPdf;
private String cmpImage;
private String outPdf;
private String outImage;
private ReaderProperties outProps;
private ReaderProperties cmpProps;
private List outPagesRef;
private List cmpPagesRef;
private int compareByContentErrorsLimit = 1000;
private boolean generateCompareByContentXmlReport = false;
private boolean encryptionCompareEnabled = false;
private boolean useCachedPagesForComparison = true;
private IMetaInfo metaInfo;
private String gsExec;
private String compareExec;
public CompareTool() {
}
CompareTool(String gsExec, String compareExec) {
this.gsExec = gsExec;
this.compareExec = compareExec;
}
/**
* Compares two PDF documents by content starting from Catalog dictionary and then recursively comparing
* corresponding objects which are referenced from it. You can roughly imagine it as depth-first traversal
* of the two trees that represent pdf objects structure of the documents.
*
* The main difference between this method and the {@link #compareByContent(String, String, String, String)}
* methods is the return value. This method returns a {@link CompareResult} class instance, which could be used
* in code, whilst compareByContent methods in case of the differences simply return String value, which could
* only be printed. Also, keep in mind that this method doesn't perform visual comparison of the documents.
*
* For more explanations about what outDoc and cmpDoc are see last paragraph of the {@link CompareTool}
* class description.
*
* @param outDocument a {@link PdfDocument} corresponding to the output file, which is to be compared with cmp-file.
* @param cmpDocument a {@link PdfDocument} corresponding to the cmp-file, which is to be compared with output file.
* @return the report on comparison of two files in the form of the custom class {@link CompareResult} instance.
* @see CompareResult
*/
public CompareResult compareByCatalog(PdfDocument outDocument, PdfDocument cmpDocument) {
CompareResult compareResult = null;
compareResult = new CompareResult(compareByContentErrorsLimit);
ObjectPath catalogPath = new ObjectPath(cmpDocument.getCatalog().getPdfObject().getIndirectReference(),
outDocument.getCatalog().getPdfObject().getIndirectReference());
Set ignoredCatalogEntries = new LinkedHashSet<>(Arrays.asList(PdfName.Metadata));
compareDictionariesExtended(outDocument.getCatalog().getPdfObject(), cmpDocument.getCatalog().getPdfObject(),
catalogPath, compareResult, ignoredCatalogEntries);
// Method compareDictionariesExtended eventually calls compareObjects method which doesn't compare page objects.
// At least for now compare page dictionaries explicitly here like this.
if (cmpPagesRef == null || outPagesRef == null) {
return compareResult;
}
if (outPagesRef.size() != cmpPagesRef.size() && !compareResult.isMessageLimitReached()) {
compareResult.addError(catalogPath, "Documents have different numbers of pages.");
}
for (int i = 0; i < Math.min(cmpPagesRef.size(), outPagesRef.size()); i++) {
if (compareResult.isMessageLimitReached()) {
break;
}
ObjectPath currentPath = new ObjectPath(cmpPagesRef.get(i), outPagesRef.get(i));
PdfDictionary outPageDict = (PdfDictionary) outPagesRef.get(i).getRefersTo();
PdfDictionary cmpPageDict = (PdfDictionary) cmpPagesRef.get(i).getRefersTo();
compareDictionariesExtended(outPageDict, cmpPageDict, currentPath, compareResult);
}
return compareResult;
}
/**
* Disables the default logic of pages comparison.
* This option makes sense only for {@link CompareTool#compareByCatalog(PdfDocument, PdfDocument)} method.
*
* By default, pages are treated as special objects and if they are met in the process of comparison, then they are
* not checked as objects, but rather simply checked that they have same page numbers in both documents.
* This behaviour is intended for the {@link CompareTool#compareByContent}
* set of methods, because in them documents are compared in page by page basis. Thus, we don't need to check if pages
* are of the same content when they are met in comparison process, we are sure that we will compare their content or
* we have already compared them.
*
* However, if you would use {@link CompareTool#compareByCatalog} with default behaviour
* of pages comparison, pages won't be checked at all, every time when reference to the page dictionary is met,
* only page numbers will be compared for both documents. You can say that in this case, comparison will be performed
* for all document's catalog entries except /Pages (However in fact, document's page tree structures will be compared,
* but pages themselves - won't).
*
* @return this {@link CompareTool} instance.
*/
public CompareTool disableCachedPagesComparison() {
this.useCachedPagesForComparison = false;
return this;
}
/**
* Sets the maximum errors count which will be returned as the result of the comparison.
*
* @param compareByContentMaxErrorCount the errors count.
* @return this CompareTool instance.
*/
public CompareTool setCompareByContentErrorsLimit(int compareByContentMaxErrorCount) {
this.compareByContentErrorsLimit = compareByContentMaxErrorCount;
return this;
}
/**
* Enables or disables the generation of the comparison report in the form of an xml document.
*
* IMPORTANT NOTE: this flag affects only the comparison performed by compareByContent methods!
*
* @param generateCompareByContentXmlReport true to enable xml report generation, false - to disable.
* @return this CompareTool instance.
*/
public CompareTool setGenerateCompareByContentXmlReport(boolean generateCompareByContentXmlReport) {
this.generateCompareByContentXmlReport = generateCompareByContentXmlReport;
return this;
}
/**
* Sets {@link IMetaInfo} info that will be used for both read and written documents creation.
*
* @param metaInfo meta info to set
*/
public void setEventCountingMetaInfo(IMetaInfo metaInfo) {
this.metaInfo = metaInfo;
}
/**
* Enables the comparison of the encryption properties of the documents. Encryption properties comparison
* results are returned along with all other comparison results.
*
* IMPORTANT NOTE: this flag affects only the comparison performed by compareByContent methods!
* {@link #compareByCatalog(PdfDocument, PdfDocument)} doesn't compare encryption properties
* because encryption properties aren't part of the document's Catalog.
*
* @return this CompareTool instance.
*/
public CompareTool enableEncryptionCompare() {
this.encryptionCompareEnabled = true;
return this;
}
/**
* Gets {@link ReaderProperties} to be passed later to the {@link PdfReader} of the output document.
*
* Documents for comparison are opened in reader mode. This method is intended to alter {@link ReaderProperties}
* which are used to open the output document. This is particularly useful for comparison of encrypted documents.
*
* For more explanations about what outDoc and cmpDoc are see last paragraph of the {@link CompareTool}
* class description.
*
* @return {@link ReaderProperties} instance to be passed later to the {@link PdfReader} of the output document.
*/
public ReaderProperties getOutReaderProperties() {
if (outProps == null) {
outProps = new ReaderProperties();
}
return outProps;
}
/**
* Gets {@link ReaderProperties} to be passed later to the {@link PdfReader} of the cmp document.
*
* Documents for comparison are opened in reader mode. This method is intended to alter {@link ReaderProperties}
* which are used to open the cmp document. This is particularly useful for comparison of encrypted documents.
*
* For more explanations about what outDoc and cmpDoc are see last paragraph of the {@link CompareTool}
* class description.
*
* @return {@link ReaderProperties} instance to be passed later to the {@link PdfReader} of the cmp document.
*/
public ReaderProperties getCmpReaderProperties() {
if (cmpProps == null) {
cmpProps = new ReaderProperties();
}
return cmpProps;
}
/**
* Compares two documents visually. For the comparison two external tools are used: Ghostscript and ImageMagick.
* For more info about needed configuration for visual comparison process see {@link CompareTool} class description.
*
* During comparison for every page of the two documents an image file will be created in the folder specified by
* outPath parameter. Then those page images will be compared and if there are any differences for some pages,
* another image file will be created with marked differences on it.
*
* @param outPdf the absolute path to the output file, which is to be compared to cmp-file.
* @param cmpPdf the absolute path to the cmp-file, which is to be compared to output file.
* @param outPath the absolute path to the folder, which will be used to store image files for visual comparison.
* @param differenceImagePrefix file name prefix for image files with marked differences if there is any.
* @return string containing list of the pages that are visually different, or null if there are no visual differences.
* @throws InterruptedException if the current thread is interrupted by another thread while it is waiting
* for ghostscript or imagemagic processes, then the wait is ended and
* an {@link InterruptedException} is thrown.
* @throws IOException is thrown if any of the input files are missing or any of the auxiliary files
* that are created during comparison process weren't possible to be created.
*/
public String compareVisually(String outPdf, String cmpPdf, String outPath, String differenceImagePrefix) throws InterruptedException, IOException {
return compareVisually(outPdf, cmpPdf, outPath, differenceImagePrefix, null);
}
/**
* Compares two documents visually. For the comparison two external tools are used: Ghostscript and ImageMagick.
* For more info about needed configuration for visual comparison process see {@link CompareTool} class description.
*
* During comparison for every page of two documents an image file will be created in the folder specified by
* outPath parameter. Then those page images will be compared and if there are any differences for some pages,
* another image file will be created with marked differences on it.
*
* It is possible to ignore certain areas of the document pages during visual comparison. This is useful for example
* in case if documents should be the same except certain page area with date on it. In this case, in the folder
* specified by the outPath, new pdf documents will be created with the black rectangles at the specified ignored
* areas, and visual comparison will be performed on these new documents.
*
* @param outPdf the absolute path to the output file, which is to be compared to cmp-file.
* @param cmpPdf the absolute path to the cmp-file, which is to be compared to output file.
* @param outPath the absolute path to the folder, which will be used to store image files for visual comparison.
* @param differenceImagePrefix file name prefix for image files with marked differences if there is any.
* @param ignoredAreas a map with one-based page numbers as keys and lists of ignored rectangles as values.
* @return string containing list of the pages that are visually different, or null if there are no visual differences.
* @throws InterruptedException if the current thread is interrupted by another thread while it is waiting
* for ghostscript or imagemagic processes, then the wait is ended and
* an {@link InterruptedException} is thrown.
* @throws IOException is thrown if any of the input files are missing or any of the auxiliary files
* that are created during comparison process weren't possible to be created.
*/
public String compareVisually(String outPdf, String cmpPdf, String outPath, String differenceImagePrefix, Map> ignoredAreas) throws InterruptedException, IOException {
init(outPdf, cmpPdf);
System.out.println("Out pdf: " + UrlUtil.getNormalizedFileUriString(outPdf));
System.out.println("Cmp pdf: " + UrlUtil.getNormalizedFileUriString(cmpPdf)+ "\n");
return compareVisually(outPath, differenceImagePrefix, ignoredAreas);
}
/**
* Compares two PDF documents by content starting from page dictionaries and then recursively comparing
* corresponding objects which are referenced from them. You can roughly imagine it as depth-first traversal
* of the two trees that represent pdf objects structure of the documents.
*
* When comparison by content is finished, if any differences were found, visual comparison is automatically started.
* For this overload, differenceImagePrefix value is generated using diff_%outPdfFileName%_ format.
*
* For more explanations about what outPdf and cmpPdf are see last paragraph of the {@link CompareTool}
* class description.
*
* @param outPdf the absolute path to the output file, which is to be compared to cmp-file.
* @param cmpPdf the absolute path to the cmp-file, which is to be compared to output file.
* @param outPath the absolute path to the folder, which will be used to store image files for visual comparison.
* @return string containing text report on the encountered content differences and also list of the pages that are
* visually different, or null if there are no content and therefore no visual differences.
* @throws InterruptedException if the current thread is interrupted by another thread while it is waiting
* for ghostscript or imagemagic processes, then the wait is ended and an {@link InterruptedException} is thrown.
* @throws IOException is thrown if any of the input files are missing or any of the auxiliary files
* that are created during comparison process weren't possible to be created.
* @see #compareVisually(String, String, String, String)
*/
public String compareByContent(String outPdf, String cmpPdf, String outPath) throws InterruptedException, IOException {
return compareByContent(outPdf, cmpPdf, outPath, null, null, null, null);
}
/**
* Compares two PDF documents by content starting from page dictionaries and then recursively comparing
* corresponding objects which are referenced from them. You can roughly imagine it as depth-first traversal
* of the two trees that represent pdf objects structure of the documents.
*
* When comparison by content is finished, if any differences were found, visual comparison is automatically started.
*
* For more explanations about what outPdf and cmpPdf are see last paragraph of the {@link CompareTool}
* class description.
*
* @param outPdf the absolute path to the output file, which is to be compared to cmp-file.
* @param cmpPdf the absolute path to the cmp-file, which is to be compared to output file.
* @param outPath the absolute path to the folder, which will be used to store image files for visual comparison.
* @param differenceImagePrefix file name prefix for image files with marked visual differences if there are any;
* if it's set to null the prefix defaults to diff_%outPdfFileName%_ format.
* @return string containing text report on the encountered content differences and also list of the pages that are
* visually different, or null if there are no content and therefore no visual differences.
* @throws InterruptedException if the current thread is interrupted by another thread while it is waiting
* for ghostscript or imagemagic processes, then the wait is ended and an {@link InterruptedException} is thrown.
* @throws IOException is thrown if any of the input files are missing or any of the auxiliary files
* that are created during comparison process weren't possible to be created.
* @see #compareVisually(String, String, String, String)
*/
public String compareByContent(String outPdf, String cmpPdf, String outPath, String differenceImagePrefix) throws InterruptedException, IOException {
return compareByContent(outPdf, cmpPdf, outPath, differenceImagePrefix, null, null, null);
}
/**
* This method overload is used to compare two encrypted PDF documents. Document passwords are passed with
* outPass and cmpPass parameters.
*
* Compares two PDF documents by content starting from page dictionaries and then recursively comparing
* corresponding objects which are referenced from them. You can roughly imagine it as depth-first traversal
* of the two trees that represent pdf objects structure of the documents.
*
* When comparison by content is finished, if any differences were found, visual comparison is automatically started.
* For more info see {@link #compareVisually(String, String, String, String)}.
*
* For more explanations about what outPdf and cmpPdf are see last paragraph of the {@link CompareTool}
* class description.
*
* @param outPdf the absolute path to the output file, which is to be compared to cmp-file.
* @param cmpPdf the absolute path to the cmp-file, which is to be compared to output file.
* @param outPath the absolute path to the folder, which will be used to store image files for visual comparison.
* @param differenceImagePrefix file name prefix for image files with marked visual differences if there is any;
* if it's set to null the prefix defaults to diff_%outPdfFileName%_ format.
* @param outPass password for the encrypted document specified by the outPdf absolute path.
* @param cmpPass password for the encrypted document specified by the cmpPdf absolute path.
* @return string containing text report on the encountered content differences and also list of the pages that are
* visually different, or null if there are no content and therefore no visual differences.
* @throws InterruptedException if the current thread is interrupted by another thread while it is waiting
* for ghostscript or imagemagic processes, then the wait is ended and an {@link InterruptedException} is thrown.
* @throws IOException is thrown if any of the input files are missing or any of the auxiliary files
* that are created during comparison process weren't possible to be created.
* @see #compareVisually(String, String, String, String)
*/
public String compareByContent(String outPdf, String cmpPdf, String outPath, String differenceImagePrefix, byte[] outPass, byte[] cmpPass) throws InterruptedException, IOException {
return compareByContent(outPdf, cmpPdf, outPath, differenceImagePrefix, null, outPass, cmpPass);
}
/**
* Compares two PDF documents by content starting from page dictionaries and then recursively comparing
* corresponding objects which are referenced from them. You can roughly imagine it as depth-first traversal
* of the two trees that represent pdf objects structure of the documents.
*
* When comparison by content is finished, if any differences were found, visual comparison is automatically started.
*
* For more explanations about what outPdf and cmpPdf are see last paragraph of the {@link CompareTool}
* class description.
*
* @param outPdf the absolute path to the output file, which is to be compared to cmp-file.
* @param cmpPdf the absolute path to the cmp-file, which is to be compared to output file.
* @param outPath the absolute path to the folder, which will be used to store image files for visual comparison.
* @param differenceImagePrefix file name prefix for image files with marked visual differences if there are any;
* if it's set to null the prefix defaults to diff_%outPdfFileName%_ format.
* @param ignoredAreas a map with one-based page numbers as keys and lists of ignored rectangles as values.
* @return string containing text report on the encountered content differences and also list of the pages that are
* visually different, or null if there are no content and therefore no visual differences.
* @throws InterruptedException if the current thread is interrupted by another thread while it is waiting
* for ghostscript or imagemagic processes, then the wait is ended and an {@link InterruptedException} is thrown.
* @throws IOException is thrown if any of the input files are missing or any of the auxiliary files
* that are created during comparison process weren't possible to be created.
* @see #compareVisually(String, String, String, String)
*/
public String compareByContent(String outPdf, String cmpPdf, String outPath, String differenceImagePrefix, Map> ignoredAreas) throws InterruptedException, IOException {
return compareByContent(outPdf, cmpPdf, outPath, differenceImagePrefix, ignoredAreas, null, null);
}
/**
* This method overload is used to compare two encrypted PDF documents. Document passwords are passed with
* outPass and cmpPass parameters.
*
* Compares two PDF documents by content starting from page dictionaries and then recursively comparing
* corresponding objects which are referenced from them. You can roughly imagine it as depth-first traversal
* of the two trees that represent pdf objects structure of the documents.
*
* When comparison by content is finished, if any differences were found, visual comparison is automatically started.
*
* For more explanations about what outPdf and cmpPdf are see last paragraph of the {@link CompareTool}
* class description.
*
* @param outPdf the absolute path to the output file, which is to be compared to cmp-file.
* @param cmpPdf the absolute path to the cmp-file, which is to be compared to output file.
* @param outPath the absolute path to the folder, which will be used to store image files for visual comparison.
* @param differenceImagePrefix file name prefix for image files with marked visual differences if there are any;
* if it's set to null the prefix defaults to diff_%outPdfFileName%_ format.
* @param ignoredAreas a map with one-based page numbers as keys and lists of ignored rectangles as values.
* @param outPass password for the encrypted document specified by the outPdf absolute path.
* @param cmpPass password for the encrypted document specified by the cmpPdf absolute path.
* @return string containing text report on the encountered content differences and also list of the pages that are
* visually different, or null if there are no content and therefore no visual differences.
* @throws InterruptedException if the current thread is interrupted by another thread while it is waiting
* for ghostscript or imagemagic processes, then the wait is ended and an {@link InterruptedException} is thrown.
* @throws IOException is thrown if any of the input files are missing or any of the auxiliary files
* that are created during comparison process weren't possible to be created.
* @see #compareVisually(String, String, String, String)
*/
public String compareByContent(String outPdf, String cmpPdf, String outPath, String differenceImagePrefix, Map> ignoredAreas, byte[] outPass, byte[] cmpPass) throws InterruptedException, IOException {
init(outPdf, cmpPdf);
System.out.println("Out pdf: " + UrlUtil.getNormalizedFileUriString(outPdf));
System.out.println("Cmp pdf: " + UrlUtil.getNormalizedFileUriString(cmpPdf)+ "\n");
setPassword(outPass, cmpPass);
return compareByContent(outPath, differenceImagePrefix, ignoredAreas);
}
/**
* Simple method that compares two given PdfDictionaries by content. This is "deep" comparing, which means that all
* nested objects are also compared by content.
*
* @param outDict dictionary to compare.
* @param cmpDict dictionary to compare.
* @return true if dictionaries are equal by content, otherwise false.
*/
public boolean compareDictionaries(PdfDictionary outDict, PdfDictionary cmpDict) {
return compareDictionariesExtended(outDict, cmpDict, null, null);
}
/**
* Recursively compares structures of two corresponding dictionaries from out and cmp PDF documents. You can roughly
* imagine it as depth-first traversal of the two trees that represent pdf objects structure of the documents.
*
* Both out and cmp {@link PdfDictionary} shall have indirect references.
*
* By default page dictionaries are excluded from the comparison when met and are instead compared in a special manner,
* simply comparing their page numbers. This behavior can be disabled by calling {@link #disableCachedPagesComparison()}.
*
* For more explanations about what outPdf and cmpPdf are see last paragraph of the {@link CompareTool}
* class description.
*
* @param outDict an indirect {@link PdfDictionary} from the output file, which is to be compared to cmp-file dictionary.
* @param cmpDict an indirect {@link PdfDictionary} from the cmp-file file, which is to be compared to output file dictionary.
* @return {@link CompareResult} instance containing differences between the two dictionaries,
* or {@code null} if dictionaries are equal.
*/
public CompareResult compareDictionariesStructure(PdfDictionary outDict, PdfDictionary cmpDict) {
return compareDictionariesStructure(outDict, cmpDict, null);
}
/**
* Recursively compares structures of two corresponding dictionaries from out and cmp PDF documents. You can roughly
* imagine it as depth-first traversal of the two trees that represent pdf objects structure of the documents.
*
* Both out and cmp {@link PdfDictionary} shall have indirect references.
*
* By default page dictionaries are excluded from the comparison when met and are instead compared in a special manner,
* simply comparing their page numbers. This behavior can be disabled by calling {@link #disableCachedPagesComparison()}.
*
* For more explanations about what outPdf and cmpPdf are see last paragraph of the {@link CompareTool}
* class description.
*
* @param outDict an indirect {@link PdfDictionary} from the output file, which is to be compared to cmp-file dictionary.
* @param cmpDict an indirect {@link PdfDictionary} from the cmp-file file, which is to be compared to output file dictionary.
* @param excludedKeys a {@link Set} of names that designate entries from {@code outDict} and {@code cmpDict} dictionaries
* which are to be skipped during comparison.
* @return {@link CompareResult} instance containing differences between the two dictionaries,
* or {@code null} if dictionaries are equal.
*/
public CompareResult compareDictionariesStructure(PdfDictionary outDict, PdfDictionary cmpDict, Set excludedKeys) {
if (outDict.getIndirectReference() == null || cmpDict.getIndirectReference() == null) {
throw new IllegalArgumentException("The 'outDict' and 'cmpDict' objects shall have indirect references.");
}
CompareResult compareResult = new CompareResult(compareByContentErrorsLimit);
final ObjectPath currentPath = new ObjectPath(cmpDict.getIndirectReference(), outDict.getIndirectReference());
if (!compareDictionariesExtended(outDict, cmpDict, currentPath, compareResult, excludedKeys)) {
assert !compareResult.isOk();
System.out.println(compareResult.getReport());
return compareResult;
}
assert compareResult.isOk();
return null;
}
/**
* Compares structures of two corresponding streams from out and cmp PDF documents. You can roughly
* imagine it as depth-first traversal of the two trees that represent pdf objects structure of the documents.
*
* For more explanations about what outPdf and cmpPdf are see last paragraph of the {@link CompareTool}
* class description.
*
* @param outStream a {@link PdfStream} from the output file, which is to be compared to cmp-file stream.
* @param cmpStream a {@link PdfStream} from the cmp-file file, which is to be compared to output file stream.
* @return {@link CompareResult} instance containing differences between the two streams,
* or {@code null} if streams are equal.
*/
public CompareResult compareStreamsStructure(PdfStream outStream, PdfStream cmpStream) {
CompareResult compareResult = new CompareResult(compareByContentErrorsLimit);
final ObjectPath currentPath = new ObjectPath(cmpStream.getIndirectReference(),
outStream.getIndirectReference());
if (!compareStreamsExtended(outStream, cmpStream, currentPath, compareResult)) {
assert !compareResult.isOk();
System.out.println(compareResult.getReport());
return compareResult;
}
assert compareResult.isOk();
return null;
}
/**
* Simple method that compares two given PdfStreams by content. This is "deep" comparing, which means that all
* nested objects are also compared by content.
*
* @param outStream stream to compare.
* @param cmpStream stream to compare.
* @return true if stream are equal by content, otherwise false.
*/
public boolean compareStreams(PdfStream outStream, PdfStream cmpStream) {
return compareStreamsExtended(outStream, cmpStream, null, null);
}
/**
* Simple method that compares two given PdfArrays by content. This is "deep" comparing, which means that all
* nested objects are also compared by content.
*
* @param outArray array to compare.
* @param cmpArray array to compare.
* @return true if arrays are equal by content, otherwise false.
*/
public boolean compareArrays(PdfArray outArray, PdfArray cmpArray) {
return compareArraysExtended(outArray, cmpArray, null, null);
}
/**
* Simple method that compares two given PdfNames.
*
* @param outName name to compare.
* @param cmpName name to compare.
* @return true if names are equal, otherwise false.
*/
public boolean compareNames(PdfName outName, PdfName cmpName) {
return cmpName.equals(outName);
}
/**
* Simple method that compares two given PdfNumbers.
*
* @param outNumber number to compare.
* @param cmpNumber number to compare.
* @return true if numbers are equal, otherwise false.
*/
public boolean compareNumbers(PdfNumber outNumber, PdfNumber cmpNumber) {
return cmpNumber.getValue() == outNumber.getValue();
}
/**
* Simple method that compares two given PdfStrings.
*
* @param outString string to compare.
* @param cmpString string to compare.
* @return true if strings are equal, otherwise false.
*/
public boolean compareStrings(PdfString outString, PdfString cmpString) {
return cmpString.getValue().equals(outString.getValue());
}
/**
* Simple method that compares two given PdfBooleans.
*
* @param outBoolean boolean to compare.
* @param cmpBoolean boolean to compare.
* @return true if booleans are equal, otherwise false.
*/
public boolean compareBooleans(PdfBoolean outBoolean, PdfBoolean cmpBoolean) {
return cmpBoolean.getValue() == outBoolean.getValue();
}
/**
* Compares xmp metadata of the two given PDF documents.
*
* @param outPdf the absolute path to the output file, which xmp is to be compared to cmp-file.
* @param cmpPdf the absolute path to the cmp-file, which xmp is to be compared to output file.
* @return text report on the xmp differences, or null if there are no differences.
*/
public String compareXmp(String outPdf, String cmpPdf) {
return compareXmp(outPdf, cmpPdf, false);
}
/**
* Compares xmp metadata of the two given PDF documents.
*
* @param outPdf the absolute path to the output file, which xmp is to be compared to cmp-file.
* @param cmpPdf the absolute path to the cmp-file, which xmp is to be compared to output file.
* @param ignoreDateAndProducerProperties true, if to ignore differences in date or producer xmp metadata
* properties.
* @return text report on the xmp differences, or null if there are no differences.
*/
public String compareXmp(String outPdf, String cmpPdf, boolean ignoreDateAndProducerProperties) {
init(outPdf, cmpPdf);
try (PdfReader readerCmp = new PdfReader(this.cmpPdf);
PdfDocument cmpDocument = new PdfDocument(readerCmp,
new DocumentProperties().setEventCountingMetaInfo(metaInfo));
PdfReader readerOut = new PdfReader(this.outPdf);
PdfDocument outDocument = new PdfDocument(readerOut,
new DocumentProperties().setEventCountingMetaInfo(metaInfo))) {
byte[] cmpBytes = cmpDocument.getXmpMetadata(), outBytes = outDocument.getXmpMetadata();
if (ignoreDateAndProducerProperties) {
XMPMeta xmpMeta = XMPMetaFactory.parseFromBuffer(cmpBytes, new ParseOptions().setOmitNormalization(true));
XMPUtils.removeProperties(xmpMeta, XMPConst.NS_XMP, PdfConst.CreateDate, true, true);
XMPUtils.removeProperties(xmpMeta, XMPConst.NS_XMP, PdfConst.ModifyDate, true, true);
XMPUtils.removeProperties(xmpMeta, XMPConst.NS_XMP, PdfConst.MetadataDate, true, true);
XMPUtils.removeProperties(xmpMeta, XMPConst.NS_PDF, PdfConst.Producer, true, true);
cmpBytes = XMPMetaFactory.serializeToBuffer(xmpMeta, new SerializeOptions(SerializeOptions.SORT));
xmpMeta = XMPMetaFactory.parseFromBuffer(outBytes, new ParseOptions().setOmitNormalization(true));
XMPUtils.removeProperties(xmpMeta, XMPConst.NS_XMP, PdfConst.CreateDate, true, true);
XMPUtils.removeProperties(xmpMeta, XMPConst.NS_XMP, PdfConst.ModifyDate, true, true);
XMPUtils.removeProperties(xmpMeta, XMPConst.NS_XMP, PdfConst.MetadataDate, true, true);
XMPUtils.removeProperties(xmpMeta, XMPConst.NS_PDF, PdfConst.Producer, true, true);
outBytes = XMPMetaFactory.serializeToBuffer(xmpMeta, new SerializeOptions(SerializeOptions.SORT));
}
if (!compareXmls(cmpBytes, outBytes)) {
return "The XMP packages different!";
}
} catch (Exception ex) {
return "XMP parsing failure!";
}
return null;
}
/**
* Utility method that provides simple comparison of the two xml files stored in byte arrays.
*
* @param xml1 first xml file data to compare.
* @param xml2 second xml file data to compare.
* @return true if xml structures are identical, false otherwise.
* @throws ParserConfigurationException if a XML DocumentBuilder cannot be created
* which satisfies the configuration requested.
* @throws SAXException if any XML parse errors occur.
* @throws IOException If any IO errors occur during reading XML files.
*/
public boolean compareXmls(byte[] xml1, byte[] xml2) throws ParserConfigurationException, SAXException, IOException {
return XmlUtils.compareXmls(new ByteArrayInputStream(xml1), new ByteArrayInputStream(xml2));
}
/**
* Utility method that provides simple comparison of the two xml files.
*
* @param outXmlFile absolute path to the out xml file to compare.
* @param cmpXmlFile absolute path to the cmp xml file to compare.
* @return true if xml structures are identical, false otherwise.
* @throws ParserConfigurationException if a XML DocumentBuilder cannot be created
* which satisfies the configuration requested.
* @throws SAXException if any XML parse errors occur.
* @throws IOException If any IO errors occur during reading XML files.
*/
public boolean compareXmls(String outXmlFile, String cmpXmlFile) throws ParserConfigurationException, SAXException, IOException {
System.out.println("Out xml: " + UrlUtil.getNormalizedFileUriString(outXmlFile));
System.out.println("Cmp xml: " + UrlUtil.getNormalizedFileUriString(cmpXmlFile) + "\n");
try (InputStream outXmlStream = FileUtil.getInputStreamForFile(outXmlFile);
InputStream cmpXmlStream = FileUtil.getInputStreamForFile(cmpXmlFile)) {
return XmlUtils.compareXmls(outXmlStream, cmpXmlStream);
}
}
/**
* Compares document info dictionaries of two pdf documents.
*
* This method overload is used to compare two encrypted PDF documents. Document passwords are passed with
* outPass and cmpPass parameters.
*
* @param outPdf the absolute path to the output file, which info is to be compared to cmp-file info.
* @param cmpPdf the absolute path to the cmp-file, which info is to be compared to output file info.
* @param outPass password for the encrypted document specified by the outPdf absolute path.
* @param cmpPass password for the encrypted document specified by the cmpPdf absolute path.
* @return text report on the differences in documents infos.
* @throws IOException if PDF reader cannot be created due to IO issues
*/
public String compareDocumentInfo(String outPdf, String cmpPdf, byte[] outPass, byte[] cmpPass) throws IOException {
System.out.print("[itext] INFO Comparing document info.......");
String message = null;
setPassword(outPass, cmpPass);
try (PdfReader readerOut = new PdfReader(outPdf, getOutReaderProperties());
PdfDocument outDocument = new PdfDocument(readerOut,
new DocumentProperties().setEventCountingMetaInfo(metaInfo));
PdfReader readerCmp = new PdfReader(cmpPdf, getCmpReaderProperties());
PdfDocument cmpDocument = new PdfDocument(readerCmp,
new DocumentProperties().setEventCountingMetaInfo(metaInfo))) {
String[] cmpInfo = convertInfo(cmpDocument.getDocumentInfo());
String[] outInfo = convertInfo(outDocument.getDocumentInfo());
for (int i = 0; i < cmpInfo.length; ++i) {
if (!cmpInfo[i].equals(outInfo[i])) {
message = MessageFormatUtil.format("Document info fail. Expected: \"{0}\", actual: \"{1}\"", cmpInfo[i], outInfo[i]);
break;
}
}
}
if (message == null)
System.out.println("OK");
else
System.out.println("Fail");
System.out.flush();
return message;
}
/**
* Compares document info dictionaries of two pdf documents.
*
* @param outPdf the absolute path to the output file, which info is to be compared to cmp-file info.
* @param cmpPdf the absolute path to the cmp-file, which info is to be compared to output file info.
* @return text report on the differences in documents infos.
* @throws IOException if PDF reader cannot be created due to IO issues
*/
public String compareDocumentInfo(String outPdf, String cmpPdf) throws IOException {
return compareDocumentInfo(outPdf, cmpPdf, null, null);
}
/**
* Checks if two documents have identical link annotations on corresponding pages.
*
* @param outPdf the absolute path to the output file, which links are to be compared to cmp-file links.
* @param cmpPdf the absolute path to the cmp-file, which links are to be compared to output file links.
* @return text report on the differences in documents links.
* @throws IOException if PDF reader cannot be created due to IO issues
*/
public String compareLinkAnnotations(String outPdf, String cmpPdf) throws IOException {
System.out.print("[itext] INFO Comparing link annotations....");
String message = null;
try (PdfReader readerOut = new PdfReader(outPdf);
PdfDocument outDocument = new PdfDocument(readerOut,
new DocumentProperties().setEventCountingMetaInfo(metaInfo));
PdfReader readerCmp = new PdfReader(cmpPdf);
PdfDocument cmpDocument = new PdfDocument(readerCmp,
new DocumentProperties().setEventCountingMetaInfo(metaInfo))){
for (int i = 0; i < outDocument.getNumberOfPages() && i < cmpDocument.getNumberOfPages(); i++) {
List outLinks = getLinkAnnotations(i + 1, outDocument);
List cmpLinks = getLinkAnnotations(i + 1, cmpDocument);
if (cmpLinks.size() != outLinks.size()) {
message = MessageFormatUtil.format("Different number of links on page {0}.", i + 1);
break;
}
for (int j = 0; j < cmpLinks.size(); j++) {
if (!compareLinkAnnotations(cmpLinks.get(j), outLinks.get(j), cmpDocument, outDocument)) {
message = MessageFormatUtil.format("Different links on page {0}.\n{1}\n{2}", i + 1, cmpLinks.get(j).toString(), outLinks.get(j).toString());
break;
}
}
}
}
if (message == null)
System.out.println("OK");
else
System.out.println("Fail");
System.out.flush();
return message;
}
/**
* Compares tag structures of the two PDF documents.
*
* This method creates xml files in the same folder with outPdf file. These xml files contain documents tag structures
* converted into the xml structure. These xml files are compared if they are equal.
*
* @param outPdf the absolute path to the output file, which tags are to be compared to cmp-file tags.
* @param cmpPdf the absolute path to the cmp-file, which tags are to be compared to output file tags.
* @return text report of the differences in documents tags.
* @throws IOException is thrown if any of the input files are missing or any of the auxiliary files
* that are created during comparison process weren't possible to be created.
* @throws ParserConfigurationException if a XML DocumentBuilder cannot be created
* which satisfies the configuration requested.
* @throws SAXException if any XML parse errors occur.
*/
public String compareTagStructures(String outPdf, String cmpPdf) throws IOException, ParserConfigurationException, SAXException {
System.out.print("[itext] INFO Comparing tag structures......");
String outXmlPath = outPdf.replace(".pdf", ".xml");
String cmpXmlPath = outPdf.replace(".pdf", ".cmp.xml");
String message = null;
try (PdfReader readerOut = new PdfReader(outPdf);
PdfDocument docOut = new PdfDocument(readerOut,
new DocumentProperties().setEventCountingMetaInfo(metaInfo));
FileOutputStream xmlOut = new FileOutputStream(outXmlPath)) {
new TaggedPdfReaderTool(docOut).setRootTag("root").convertToXml(xmlOut);
}
try (PdfReader readerCmp = new PdfReader(cmpPdf);
PdfDocument docCmp = new PdfDocument(readerCmp,
new DocumentProperties().setEventCountingMetaInfo(metaInfo));
FileOutputStream xmlCmp = new FileOutputStream(cmpXmlPath)) {
new TaggedPdfReaderTool(docCmp).setRootTag("root").convertToXml(xmlCmp);
}
if (!compareXmls(outXmlPath, cmpXmlPath)) {
message = "The tag structures are different.";
}
if (message == null)
System.out.println("OK");
else
System.out.println("Fail");
System.out.flush();
return message;
}
String[] convertInfo(PdfDocumentInfo info) {
String[] convertedInfo = new String[]{"", "", "", "", ""};
String infoValue = info.getTitle();
if (infoValue != null)
convertedInfo[0] = infoValue;
infoValue = info.getAuthor();
if (infoValue != null)
convertedInfo[1] = infoValue;
infoValue = info.getSubject();
if (infoValue != null)
convertedInfo[2] = infoValue;
infoValue = info.getKeywords();
if (infoValue != null)
convertedInfo[3] = infoValue;
infoValue = info.getProducer();
if (infoValue != null) {
convertedInfo[4] = convertProducerLine(infoValue);
}
return convertedInfo;
}
String convertProducerLine(String producer) {
return producer.replaceAll(VERSION_REGEXP, VERSION_REPLACEMENT).replaceAll(COPYRIGHT_REGEXP,
COPYRIGHT_REPLACEMENT);
}
private void init(String outPdf, String cmpPdf) {
this.outPdf = outPdf;
this.cmpPdf = cmpPdf;
outPdfName = new File(outPdf).getName();
cmpPdfName = new File(cmpPdf).getName();
outImage = outPdfName;
if (cmpPdfName.startsWith("cmp_")) {
cmpImage = cmpPdfName;
} else {
cmpImage = "cmp_" + cmpPdfName;
}
}
private void setPassword(byte[] outPass, byte[] cmpPass) {
if (outPass != null) {
getOutReaderProperties().setPassword(outPass);
}
if (cmpPass != null) {
getCmpReaderProperties().setPassword(outPass);
}
}
private String compareVisually(String outPath, String differenceImagePrefix, Map> ignoredAreas) throws InterruptedException, IOException {
return compareVisually(outPath, differenceImagePrefix, ignoredAreas, null);
}
private String compareVisually(String outPath, String differenceImagePrefix, Map> ignoredAreas, List equalPages) throws IOException, InterruptedException {
if (!outPath.endsWith("/")) {
outPath = outPath + "/";
}
if (differenceImagePrefix == null) {
String fileBasedPrefix = "";
if (outPdfName != null) {
// should always be initialized by this moment
fileBasedPrefix = outPdfName + "_";
}
differenceImagePrefix = "diff_" + fileBasedPrefix;
}
prepareOutputDirs(outPath, differenceImagePrefix);
System.out.println("Comparing visually..........");
if (ignoredAreas != null && !ignoredAreas.isEmpty()) {
createIgnoredAreasPdfs(outPath, ignoredAreas);
}
GhostscriptHelper ghostscriptHelper = null;
try {
ghostscriptHelper = new GhostscriptHelper(gsExec);
} catch (IllegalArgumentException e) {
throw new CompareToolExecutionException(e.getMessage());
}
ghostscriptHelper.runGhostScriptImageGeneration(outPdf, outPath, outImage);
ghostscriptHelper.runGhostScriptImageGeneration(cmpPdf, outPath, cmpImage);
return compareImagesOfPdfs(outPath, differenceImagePrefix, equalPages);
}
private String compareImagesOfPdfs(String outPath, String differenceImagePrefix, List equalPages) throws IOException, InterruptedException {
File[] imageFiles = FileUtil.listFilesInDirectoryByFilter(outPath, new PngFileFilter(outPdfName));
File[] cmpImageFiles = FileUtil.listFilesInDirectoryByFilter(outPath, new CmpPngFileFilter(cmpPdfName));
boolean bUnexpectedNumberOfPages = false;
if (imageFiles.length != cmpImageFiles.length) {
bUnexpectedNumberOfPages = true;
}
int cnt = Math.min(imageFiles.length, cmpImageFiles.length);
if (cnt < 1) {
throw new CompareToolExecutionException(
"No files for comparing. The result or sample pdf file is not processed by GhostScript.");
}
Arrays.sort(imageFiles, new ImageNameComparator());
Arrays.sort(cmpImageFiles, new ImageNameComparator());
boolean compareExecIsOk;
String imageMagickInitError = null;
ImageMagickHelper imageMagickHelper = null;
try {
imageMagickHelper = new ImageMagickHelper(compareExec);
compareExecIsOk = true;
} catch (IllegalArgumentException e) {
compareExecIsOk = false;
imageMagickInitError = e.getMessage();
LoggerFactory.getLogger(CompareTool.class).warn(e.getMessage());
}
List diffPages = new ArrayList<>();
String differentPagesFail = null;
for (int i = 0; i < cnt; i++) {
if (equalPages != null && equalPages.contains(i))
continue;
System.out.println("Comparing page " + Integer.toString(i + 1) + ": " + UrlUtil.getNormalizedFileUriString(imageFiles[i].getName()) + " ...");
System.out.println("Comparing page " + Integer.toString(i + 1) + ": " + UrlUtil.getNormalizedFileUriString(imageFiles[i].getName()) + " ...");
FileInputStream is1 = new FileInputStream(imageFiles[i].getAbsolutePath());
FileInputStream is2 = new FileInputStream(cmpImageFiles[i].getAbsolutePath());
boolean cmpResult = compareStreams(is1, is2);
is1.close();
is2.close();
if (!cmpResult) {
differentPagesFail = "Page is different!";
diffPages.add(i + 1);
if (compareExecIsOk) {
String diffName = outPath + differenceImagePrefix + Integer.toString(i + 1) + ".png";
if (!imageMagickHelper.runImageMagickImageCompare(imageFiles[i].getAbsolutePath(),
cmpImageFiles[i].getAbsolutePath(), diffName)) {
File diffFile = new File(diffName);
differentPagesFail += "\nPlease, examine " + FILE_PROTOCOL
+ UrlUtil.toNormalizedURI(diffFile).getPath() + " for more details.";
}
}
System.out.println(differentPagesFail);
} else {
System.out.println(" done.");
}
}
if (differentPagesFail != null) {
String errorMessage = DIFFERENT_PAGES.replace("", UrlUtil.toNormalizedURI(outPdf).getPath()).replace("", listDiffPagesAsString(diffPages));
if (!compareExecIsOk) {
errorMessage += "\n" + imageMagickInitError;
}
return errorMessage;
} else {
if (bUnexpectedNumberOfPages)
return UNEXPECTED_NUMBER_OF_PAGES.replace("", outPdf);
}
return null;
}
private String listDiffPagesAsString(List diffPages) {
StringBuilder sb = new StringBuilder("[");
for (int i = 0; i < diffPages.size(); i++) {
sb.append(diffPages.get(i));
if (i < diffPages.size() - 1) {
sb.append(", ");
}
}
sb.append("]");
return sb.toString();
}
private void createIgnoredAreasPdfs(String outPath, Map> ignoredAreas) throws IOException {
StampingProperties properties = new StampingProperties();
properties.setEventCountingMetaInfo(metaInfo);
try (PdfWriter outWriter = new PdfWriter(outPath + IGNORED_AREAS_PREFIX + outPdfName);
PdfReader readerOut = new PdfReader(outPdf);
PdfDocument pdfOutDoc = new PdfDocument(readerOut, outWriter, properties);
PdfWriter cmpWriter = new PdfWriter(outPath + IGNORED_AREAS_PREFIX + cmpPdfName);
PdfReader readerCmp = new PdfReader(cmpPdf);
PdfDocument pdfCmpDoc = new PdfDocument(readerCmp, cmpWriter, properties)) {
for (Map.Entry> entry : ignoredAreas.entrySet()) {
int pageNumber = entry.getKey();
List rectangles = entry.getValue();
if (rectangles != null && !rectangles.isEmpty()) {
PdfCanvas outCanvas = new PdfCanvas(pdfOutDoc.getPage(pageNumber));
PdfCanvas cmpCanvas = new PdfCanvas(pdfCmpDoc.getPage(pageNumber));
outCanvas.saveState();
cmpCanvas.saveState();
for (Rectangle rect : rectangles) {
outCanvas.rectangle(rect).fill();
cmpCanvas.rectangle(rect).fill();
}
outCanvas.restoreState();
cmpCanvas.restoreState();
}
}
}
init(outPath + IGNORED_AREAS_PREFIX + outPdfName, outPath + IGNORED_AREAS_PREFIX + cmpPdfName);
}
private void prepareOutputDirs(String outPath, String differenceImagePrefix) {
File[] imageFiles;
File[] cmpImageFiles;
File[] diffFiles;
if (!FileUtil.directoryExists(outPath)) {
FileUtil.createDirectories(outPath);
} else {
imageFiles = FileUtil.listFilesInDirectoryByFilter(outPath, new PngFileFilter(cmpPdfName));
for (File file : imageFiles) {
file.delete();
}
cmpImageFiles = FileUtil.listFilesInDirectoryByFilter(outPath, new CmpPngFileFilter(cmpPdfName));
for (File file : cmpImageFiles) {
file.delete();
}
diffFiles = FileUtil.listFilesInDirectoryByFilter(outPath, new DiffPngFileFilter(differenceImagePrefix));
for (File file : diffFiles) {
file.delete();
}
}
}
private void printOutCmpDirectories() {
System.out.println("Out file folder: " + FILE_PROTOCOL
+ UrlUtil.toNormalizedURI(new File(outPdf).getParentFile()).getPath());
System.out.println("Cmp file folder: " + FILE_PROTOCOL
+ UrlUtil.toNormalizedURI(new File(cmpPdf).getParentFile()).getPath());
}
private String compareByContent(String outPath, String differenceImagePrefix, Map> ignoredAreas) throws InterruptedException, IOException {
printOutCmpDirectories();
System.out.print("Comparing by content..........");
try (PdfReader readerOut = new PdfReader(outPdf, getOutReaderProperties());
PdfDocument outDocument = new PdfDocument(readerOut,
new DocumentProperties().setEventCountingMetaInfo(metaInfo));
PdfReader readerCmp = new PdfReader(cmpPdf, getCmpReaderProperties());
PdfDocument cmpDocument = new PdfDocument(readerCmp,
new DocumentProperties().setEventCountingMetaInfo(metaInfo))) {
List outPages = new ArrayList<>();
outPagesRef = new ArrayList<>();
loadPagesFromReader(outDocument, outPages, outPagesRef);
List cmpPages = new ArrayList<>();
cmpPagesRef = new ArrayList<>();
loadPagesFromReader(cmpDocument, cmpPages, cmpPagesRef);
if (outPages.size() != cmpPages.size())
return compareVisuallyAndCombineReports("Documents have different numbers of pages.", outPath, differenceImagePrefix, ignoredAreas, null);
CompareResult compareResult = new CompareResult(compareByContentErrorsLimit);
List equalPages = new ArrayList<>(cmpPages.size());
for (int i = 0; i < cmpPages.size(); i++) {
ObjectPath currentPath = new ObjectPath(cmpPagesRef.get(i), outPagesRef.get(i));
if (compareDictionariesExtended(outPages.get(i), cmpPages.get(i), currentPath, compareResult))
equalPages.add(i);
}
ObjectPath catalogPath = new ObjectPath(cmpDocument.getCatalog().getPdfObject().getIndirectReference(),
outDocument.getCatalog().getPdfObject().getIndirectReference());
Set ignoredCatalogEntries = new LinkedHashSet<>(Arrays.asList(PdfName.Pages, PdfName.Metadata));
compareDictionariesExtended(outDocument.getCatalog().getPdfObject(), cmpDocument.getCatalog().getPdfObject(),
catalogPath, compareResult, ignoredCatalogEntries);
if (encryptionCompareEnabled) {
compareDocumentsEncryption(outDocument, cmpDocument, compareResult);
}
if (generateCompareByContentXmlReport) {
String outPdfName = new File(outPdf).getName();
FileOutputStream xml = new FileOutputStream(outPath + "/" + outPdfName.substring(0, outPdfName.length() - 3) + "report.xml");
try {
compareResult.writeReportToXml(xml);
} catch (Exception e) {
throw new RuntimeException(e.getMessage(), e);
} finally {
xml.close();
}
}
if (equalPages.size() == cmpPages.size() && compareResult.isOk()) {
System.out.println("OK");
System.out.flush();
return null;
} else {
return compareVisuallyAndCombineReports(compareResult.getReport(), outPath, differenceImagePrefix, ignoredAreas, equalPages);
}
}
}
private String compareVisuallyAndCombineReports(String compareByFailContentReason, String outPath, String differenceImagePrefix,
Map> ignoredAreas,
List equalPages) throws IOException, InterruptedException {
System.out.println("Fail");
System.out.flush();
String compareByContentReport = "Compare by content report:\n" + compareByFailContentReason;
System.out.println(compareByContentReport);
System.out.flush();
String message = compareVisually(outPath, differenceImagePrefix, ignoredAreas, equalPages);
if (message == null || message.length() == 0)
return "Compare by content fails. No visual differences";
return message;
}
private void loadPagesFromReader(PdfDocument doc, List pages, List pagesRef) {
int numOfPages = doc.getNumberOfPages();
for (int i = 0; i < numOfPages; ++i) {
pages.add(doc.getPage(i + 1).getPdfObject());
pagesRef.add(pages.get(i).getIndirectReference());
}
}
private void compareDocumentsEncryption(PdfDocument outDocument, PdfDocument cmpDocument, CompareResult compareResult) {
PdfDictionary outEncrypt = outDocument.getTrailer().getAsDictionary(PdfName.Encrypt);
PdfDictionary cmpEncrypt = cmpDocument.getTrailer().getAsDictionary(PdfName.Encrypt);
if (outEncrypt == null && cmpEncrypt == null) {
return;
}
TrailerPath trailerPath = new TrailerPath(cmpDocument, outDocument);
if (outEncrypt == null) {
compareResult.addError(trailerPath, "Expected encrypted document.");
return;
}
if (cmpEncrypt == null) {
compareResult.addError(trailerPath, "Expected not encrypted document.");
return;
}
Set ignoredEncryptEntries = new LinkedHashSet<>(Arrays.asList(PdfName.O, PdfName.U, PdfName.OE, PdfName.UE, PdfName.Perms, PdfName.CF, PdfName.Recipients));
ObjectPath objectPath = new ObjectPath(outEncrypt.getIndirectReference(), cmpEncrypt.getIndirectReference());
compareDictionariesExtended(outEncrypt, cmpEncrypt, objectPath, compareResult, ignoredEncryptEntries);
PdfDictionary outCfDict = outEncrypt.getAsDictionary(PdfName.CF);
PdfDictionary cmpCfDict = cmpEncrypt.getAsDictionary(PdfName.CF);
if (cmpCfDict != null || outCfDict != null) {
if (cmpCfDict != null && outCfDict == null || cmpCfDict == null) {
compareResult.addError(objectPath, "One of the dictionaries is null, the other is not.");
} else {
Set mergedKeys = new TreeSet<>(outCfDict.keySet());
mergedKeys.addAll(cmpCfDict.keySet());
for (PdfName key : mergedKeys) {
objectPath.pushDictItemToPath(key);
LinkedHashSet excludedKeys = new LinkedHashSet<>(Arrays.asList(PdfName.Recipients));
compareDictionariesExtended(outCfDict.getAsDictionary(key), cmpCfDict.getAsDictionary(key), objectPath, compareResult, excludedKeys);
objectPath.pop();
}
}
}
}
private boolean compareStreams(InputStream is1, InputStream is2) throws IOException {
byte[] buffer1 = new byte[64 * 1024];
byte[] buffer2 = new byte[64 * 1024];
int len1;
int len2;
for (; ; ) {
len1 = is1.read(buffer1);
len2 = is2.read(buffer2);
if (len1 != len2)
return false;
if (!Arrays.equals(buffer1, buffer2))
return false;
if (len1 == -1)
break;
}
return true;
}
private boolean compareDictionariesExtended(PdfDictionary outDict, PdfDictionary cmpDict, ObjectPath currentPath, CompareResult compareResult) {
return compareDictionariesExtended(outDict, cmpDict, currentPath, compareResult, null);
}
private boolean compareDictionariesExtended(PdfDictionary outDict, PdfDictionary cmpDict, ObjectPath currentPath, CompareResult compareResult, Set excludedKeys) {
if (cmpDict != null && outDict == null || outDict != null && cmpDict == null) {
compareResult.addError(currentPath, "One of the dictionaries is null, the other is not.");
return false;
}
boolean dictsAreSame = true;
// Iterate through the union of the keys of the cmp and out dictionaries
Set mergedKeys = new TreeSet<>(cmpDict.keySet());
mergedKeys.addAll(outDict.keySet());
for (PdfName key : mergedKeys) {
if (!dictsAreSame && (currentPath == null || compareResult == null || compareResult.isMessageLimitReached())) {
return false;
}
if (excludedKeys != null && excludedKeys.contains(key)) {
continue;
}
if (key.equals(PdfName.Parent) || key.equals(PdfName.P) || key.equals(PdfName.ModDate)) continue;
if (outDict.isStream() && cmpDict.isStream() && (key.equals(PdfName.Filter) || key.equals(PdfName.Length)))
continue;
if (key.equals(PdfName.BaseFont) || key.equals(PdfName.FontName)) {
PdfObject cmpObj = cmpDict.get(key);
if (cmpObj != null && cmpObj.isName() && cmpObj.toString().indexOf('+') > 0) {
PdfObject outObj = outDict.get(key);
if (!outObj.isName() || outObj.toString().indexOf('+') == -1) {
if (compareResult != null && currentPath != null)
compareResult.addError(currentPath, MessageFormatUtil.format("PdfDictionary {0} entry: Expected: {1}. Found: {2}", key.toString(), cmpObj.toString(), outObj.toString()));
dictsAreSame = false;
} else {
String cmpName = cmpObj.toString().substring(cmpObj.toString().indexOf('+'));
String outName = outObj.toString().substring(outObj.toString().indexOf('+'));
if (!cmpName.equals(outName)) {
if (compareResult != null && currentPath != null)
compareResult.addError(currentPath, MessageFormatUtil.format("PdfDictionary {0} entry: Expected: {1}. Found: {2}", key.toString(), cmpObj.toString(), outObj.toString()));
dictsAreSame = false;
}
}
continue;
}
}
// A number tree can be stored in multiple, semantically equivalent ways.
// Flatten to a single array, in order to get a canonical representation.
if (key.equals(PdfName.ParentTree) || key.equals(PdfName.PageLabels)) {
if (currentPath != null) {
currentPath.pushDictItemToPath(key);
}
PdfDictionary outNumTree = outDict.getAsDictionary(key);
PdfDictionary cmpNumTree = cmpDict.getAsDictionary(key);
LinkedList outItems = new LinkedList();
LinkedList cmpItems = new LinkedList();
PdfNumber outLeftover = flattenNumTree(outNumTree, null, outItems);
PdfNumber cmpLeftover = flattenNumTree(cmpNumTree, null, cmpItems);
if (outLeftover != null) {
LoggerFactory.getLogger(CompareTool.class).warn(IoLogMessageConstant.NUM_TREE_SHALL_NOT_END_WITH_KEY);
if (cmpLeftover == null) {
if (compareResult != null && currentPath != null) {
compareResult.addError(currentPath, "Number tree unexpectedly ends with a key");
}
dictsAreSame = false;
}
}
if (cmpLeftover != null) {
LoggerFactory.getLogger(CompareTool.class).warn(IoLogMessageConstant.NUM_TREE_SHALL_NOT_END_WITH_KEY);
if (outLeftover == null) {
if (compareResult != null && currentPath != null) {
compareResult.addError(currentPath, "Number tree was expected to end with a key (although it is invalid according to the specification), but ended with a value");
}
dictsAreSame = false;
}
}
if (outLeftover != null && cmpLeftover != null && !compareNumbers(outLeftover, cmpLeftover)) {
if (compareResult != null && currentPath != null) {
compareResult.addError(currentPath, "Number tree was expected to end with a different key (although it is invalid according to the specification)");
}
dictsAreSame = false;
}
PdfArray outArray = new PdfArray(outItems, outItems.size());
PdfArray cmpArray = new PdfArray(cmpItems, cmpItems.size());
if (!compareArraysExtended(outArray, cmpArray, currentPath, compareResult)) {
if (compareResult != null && currentPath != null) {
compareResult.addError(currentPath, "Number trees were flattened, compared and found to be different.");
}
dictsAreSame = false;
}
if (currentPath != null) {
currentPath.pop();
}
continue;
}
if (currentPath != null) {
currentPath.pushDictItemToPath(key);
}
dictsAreSame = compareObjects(outDict.get(key, false), cmpDict.get(key, false), currentPath, compareResult) && dictsAreSame;
if (currentPath != null) {
currentPath.pop();
}
}
return dictsAreSame;
}
private PdfNumber flattenNumTree(PdfDictionary dictionary, PdfNumber leftOver, LinkedList items /*Map items*/) {
PdfArray nums = dictionary.getAsArray(PdfName.Nums);
if (nums != null) {
for (int k = 0; k < nums.size(); k++) {
PdfNumber number;
if (leftOver == null)
number = nums.getAsNumber(k++);
else {
number = leftOver;
leftOver = null;
}
if (k < nums.size()) {
items.addLast(number);
items.addLast(nums.get(k, false));
} else {
return number;
}
}
} else if ((nums = dictionary.getAsArray(PdfName.Kids)) != null) {
for (int k = 0; k < nums.size(); k++) {
PdfDictionary kid = nums.getAsDictionary(k);
leftOver = flattenNumTree(kid, leftOver, items);
}
}
return null;
}
protected boolean compareObjects(PdfObject outObj, PdfObject cmpObj, ObjectPath currentPath, CompareResult compareResult) {
PdfObject outDirectObj = null;
PdfObject cmpDirectObj = null;
if (outObj != null)
outDirectObj = outObj.isIndirectReference() ? ((PdfIndirectReference) outObj).getRefersTo(false) : outObj;
if (cmpObj != null)
cmpDirectObj = cmpObj.isIndirectReference() ? ((PdfIndirectReference) cmpObj).getRefersTo(false) : cmpObj;
if (cmpDirectObj == null && outDirectObj == null)
return true;
if (outDirectObj == null) {
compareResult.addError(currentPath, "Expected object was not found.");
return false;
} else if (cmpDirectObj == null) {
compareResult.addError(currentPath, "Found object which was not expected to be found.");
return false;
} else if (cmpDirectObj.getType() != outDirectObj.getType()) {
compareResult.addError(currentPath, MessageFormatUtil.format("Types do not match. Expected: {0}. Found: {1}.", cmpDirectObj.getClass().getSimpleName(), outDirectObj.getClass().getSimpleName()));
return false;
} else if (cmpObj.isIndirectReference() && !outObj.isIndirectReference()) {
compareResult.addError(currentPath, "Expected indirect object.");
return false;
} else if (!cmpObj.isIndirectReference() && outObj.isIndirectReference()) {
compareResult.addError(currentPath, "Expected direct object.");
return false;
}
if (currentPath != null && cmpObj.isIndirectReference() && outObj.isIndirectReference()) {
if (currentPath.isComparing((PdfIndirectReference) cmpObj, (PdfIndirectReference) outObj))
return true;
currentPath = currentPath.resetDirectPath((PdfIndirectReference) cmpObj, (PdfIndirectReference) outObj);
}
if (cmpDirectObj.isDictionary() && PdfName.Page.equals(((PdfDictionary) cmpDirectObj).getAsName(PdfName.Type))
&& useCachedPagesForComparison) {
if (!outDirectObj.isDictionary() || !PdfName.Page.equals(((PdfDictionary) outDirectObj).getAsName(PdfName.Type))) {
if (compareResult != null && currentPath != null)
compareResult.addError(currentPath, "Expected a page. Found not a page.");
return false;
}
PdfIndirectReference cmpRefKey = cmpObj.isIndirectReference() ? (PdfIndirectReference) cmpObj : cmpObj.getIndirectReference();
PdfIndirectReference outRefKey = outObj.isIndirectReference() ? (PdfIndirectReference) outObj : outObj.getIndirectReference();
// References to the same page
if (cmpPagesRef == null) {
cmpPagesRef = new ArrayList<>();
for (int i = 1; i <= cmpRefKey.getDocument().getNumberOfPages(); ++i) {
cmpPagesRef.add(cmpRefKey.getDocument().getPage(i).getPdfObject().getIndirectReference());
}
}
if (outPagesRef == null) {
outPagesRef = new ArrayList<>();
for (int i = 1; i <= outRefKey.getDocument().getNumberOfPages(); ++i) {
outPagesRef.add(outRefKey.getDocument().getPage(i).getPdfObject().getIndirectReference());
}
}
// If at least one of the page dictionaries is in the document's page tree, we don't proceed with deep comparison,
// because pages are compared at different level, so we compare only their index.
// However only if both page dictionaries are not in the document's page trees, we continue to comparing them as normal dictionaries.
if (cmpPagesRef.contains(cmpRefKey) || outPagesRef.contains(outRefKey)) {
if (cmpPagesRef.contains(cmpRefKey) && cmpPagesRef.indexOf(cmpRefKey) == outPagesRef.indexOf(outRefKey)) {
return true;
}
if (compareResult != null && currentPath != null)
compareResult.addError(currentPath, MessageFormatUtil.format("The dictionaries refer to different pages. Expected page number: {0}. Found: {1}",
cmpPagesRef.indexOf(cmpRefKey) + 1, outPagesRef.indexOf(outRefKey) + 1));
return false;
}
}
if (cmpDirectObj.isDictionary()) {
return compareDictionariesExtended((PdfDictionary) outDirectObj, (PdfDictionary) cmpDirectObj, currentPath, compareResult);
} else if (cmpDirectObj.isStream()) {
return compareStreamsExtended((PdfStream) outDirectObj, (PdfStream) cmpDirectObj, currentPath, compareResult);
} else if (cmpDirectObj.isArray()) {
return compareArraysExtended((PdfArray) outDirectObj, (PdfArray) cmpDirectObj, currentPath, compareResult);
} else if (cmpDirectObj.isName()) {
return compareNamesExtended((PdfName) outDirectObj, (PdfName) cmpDirectObj, currentPath, compareResult);
} else if (cmpDirectObj.isNumber()) {
return compareNumbersExtended((PdfNumber) outDirectObj, (PdfNumber) cmpDirectObj, currentPath, compareResult);
} else if (cmpDirectObj.isString()) {
return compareStringsExtended((PdfString) outDirectObj, (PdfString) cmpDirectObj, currentPath, compareResult);
} else if (cmpDirectObj.isBoolean()) {
return compareBooleansExtended((PdfBoolean) outDirectObj, (PdfBoolean) cmpDirectObj, currentPath, compareResult);
} else if (outDirectObj.isNull() && cmpDirectObj.isNull()) {
return true;
} else {
throw new UnsupportedOperationException();
}
}
private boolean compareStreamsExtended(PdfStream outStream, PdfStream cmpStream, ObjectPath currentPath, CompareResult compareResult) {
boolean toDecode = PdfName.FlateDecode.equals(outStream.get(PdfName.Filter));
byte[] outStreamBytes = outStream.getBytes(toDecode);
byte[] cmpStreamBytes = cmpStream.getBytes(toDecode);
if (Arrays.equals(outStreamBytes, cmpStreamBytes)) {
return compareDictionariesExtended(outStream, cmpStream, currentPath, compareResult);
} else {
StringBuilder errorMessage = new StringBuilder();
if (cmpStreamBytes.length != outStreamBytes.length) {
errorMessage.append(MessageFormatUtil.format("PdfStream. Lengths are different. Expected: {0}. Found: {1}\n", cmpStreamBytes.length, outStreamBytes.length));
} else {
errorMessage.append("PdfStream. Bytes are different.\n");
}
int firstDifferenceOffset = findBytesDifference(outStreamBytes, cmpStreamBytes, errorMessage);
if (compareResult != null && currentPath != null) {
currentPath.pushOffsetToPath(firstDifferenceOffset);
compareResult.addError(currentPath, errorMessage.toString());
currentPath.pop();
}
return false;
}
}
/**
* @return first difference offset
*/
private int findBytesDifference(byte[] outStreamBytes, byte[] cmpStreamBytes, StringBuilder errorMessage) {
int numberOfDifferentBytes = 0;
int firstDifferenceOffset = 0;
int minLength = Math.min(cmpStreamBytes.length, outStreamBytes.length);
for (int i = 0; i < minLength; i++) {
if (cmpStreamBytes[i] != outStreamBytes[i]) {
++numberOfDifferentBytes;
if (numberOfDifferentBytes == 1) {
firstDifferenceOffset = i;
}
}
}
String bytesDifference = null;
if (numberOfDifferentBytes > 0) {
int diffBytesAreaL = 10;
int diffBytesAreaR = 10;
int lCmp = Math.max(0, firstDifferenceOffset - diffBytesAreaL);
int rCmp = Math.min(cmpStreamBytes.length, firstDifferenceOffset + diffBytesAreaR);
int lOut = Math.max(0, firstDifferenceOffset - diffBytesAreaL);
int rOut = Math.min(outStreamBytes.length, firstDifferenceOffset + diffBytesAreaR);
String cmpByte = new String(new byte[]{cmpStreamBytes[firstDifferenceOffset]}, StandardCharsets.ISO_8859_1);
String cmpByteNeighbours = new String(cmpStreamBytes, lCmp, rCmp - lCmp, StandardCharsets.ISO_8859_1).replaceAll(NEW_LINES, " ");
String outByte = new String(new byte[]{outStreamBytes[firstDifferenceOffset]}, StandardCharsets.ISO_8859_1);
String outBytesNeighbours = new String(outStreamBytes, lOut, rOut - lOut, StandardCharsets.ISO_8859_1).replaceAll(NEW_LINES, " ");
bytesDifference = MessageFormatUtil.format("First bytes difference is encountered at index {0}. Expected: {1} ({2}). Found: {3} ({4}). Total number of different bytes: {5}",
Integer.valueOf(firstDifferenceOffset).toString(), cmpByte, cmpByteNeighbours, outByte, outBytesNeighbours, numberOfDifferentBytes);
} else {
// lengths are different
firstDifferenceOffset = minLength;
bytesDifference = MessageFormatUtil.format("Bytes of the shorter array are the same as the first {0} bytes of the longer one.", minLength);
}
errorMessage.append(bytesDifference);
return firstDifferenceOffset;
}
private boolean compareArraysExtended(PdfArray outArray, PdfArray cmpArray, ObjectPath currentPath, CompareResult compareResult) {
if (outArray == null) {
if (compareResult != null && currentPath != null)
compareResult.addError(currentPath, "Found null. Expected PdfArray.");
return false;
} else if (outArray.size() != cmpArray.size()) {
if (compareResult != null && currentPath != null)
compareResult.addError(currentPath, MessageFormatUtil.format("PdfArrays. Lengths are different. Expected: {0}. Found: {1}.", cmpArray.size(), outArray.size()));
return false;
}
boolean arraysAreEqual = true;
for (int i = 0; i < cmpArray.size(); i++) {
if (currentPath != null)
currentPath.pushArrayItemToPath(i);
arraysAreEqual = compareObjects(outArray.get(i, false), cmpArray.get(i, false), currentPath, compareResult) && arraysAreEqual;
if (currentPath != null)
currentPath.pop();
if (!arraysAreEqual && (currentPath == null || compareResult == null || compareResult.isMessageLimitReached()))
return false;
}
return arraysAreEqual;
}
private boolean compareNamesExtended(PdfName outName, PdfName cmpName, ObjectPath currentPath, CompareResult compareResult) {
if (cmpName.equals(outName)) {
return true;
} else {
if (compareResult != null && currentPath != null)
compareResult.addError(currentPath, MessageFormatUtil.format("PdfName. Expected: {0}. Found: {1}", cmpName.toString(), outName.toString()));
return false;
}
}
private boolean compareNumbersExtended(PdfNumber outNumber, PdfNumber cmpNumber, ObjectPath currentPath, CompareResult compareResult) {
if (cmpNumber.getValue() == outNumber.getValue()) {
return true;
} else {
if (compareResult != null && currentPath != null)
compareResult.addError(currentPath, MessageFormatUtil.format("PdfNumber. Expected: {0}. Found: {1}", cmpNumber, outNumber));
return false;
}
}
private boolean compareStringsExtended(PdfString outString, PdfString cmpString, ObjectPath currentPath, CompareResult compareResult) {
if (Arrays.equals(convertPdfStringToBytes(cmpString), convertPdfStringToBytes(outString))) {
return true;
} else {
String cmpStr = cmpString.toUnicodeString();
String outStr = outString.toUnicodeString();
StringBuilder errorMessage = new StringBuilder();
if (cmpStr.length() != outStr.length()) {
errorMessage.append(MessageFormatUtil.format("PdfString. Lengths are different. Expected: {0}. Found: {1}\n", cmpStr.length(), outStr.length()));
} else {
errorMessage.append("PdfString. Characters are different.\n");
}
int firstDifferenceOffset = findStringDifference(outStr, cmpStr, errorMessage);
if (compareResult != null && currentPath != null) {
currentPath.pushOffsetToPath(firstDifferenceOffset);
compareResult.addError(currentPath, errorMessage.toString());
currentPath.pop();
}
return false;
}
}
private int findStringDifference(String outString, String cmpString, StringBuilder errorMessage) {
int numberOfDifferentChars = 0;
int firstDifferenceOffset = 0;
int minLength = Math.min(cmpString.length(), outString.length());
for (int i = 0; i < minLength; i++) {
if (cmpString.charAt(i) != outString.charAt(i)) {
++numberOfDifferentChars;
if (numberOfDifferentChars == 1) {
firstDifferenceOffset = i;
}
}
}
String stringDifference = null;
if (numberOfDifferentChars > 0) {
int diffBytesAreaL = 15;
int diffBytesAreaR = 15;
int lCmp = Math.max(0, firstDifferenceOffset - diffBytesAreaL);
int rCmp = Math.min(cmpString.length(), firstDifferenceOffset + diffBytesAreaR);
int lOut = Math.max(0, firstDifferenceOffset - diffBytesAreaL);
int rOut = Math.min(outString.length(), firstDifferenceOffset + diffBytesAreaR);
String cmpByte = String.valueOf(cmpString.charAt(firstDifferenceOffset));
String cmpByteNeighbours = cmpString.substring(lCmp, rCmp).replaceAll(NEW_LINES, " ");
String outByte = String.valueOf(outString.charAt(firstDifferenceOffset));
String outBytesNeighbours = outString.substring(lOut, rOut).replaceAll(NEW_LINES, " ");
stringDifference = MessageFormatUtil.format("First characters difference is encountered at index {0}.\nExpected: {1} ({2}).\nFound: {3} ({4}).\nTotal number of different characters: {5}",
Integer.valueOf(firstDifferenceOffset).toString(), cmpByte, cmpByteNeighbours, outByte, outBytesNeighbours, numberOfDifferentChars);
} else {
// lengths are different
firstDifferenceOffset = minLength;
stringDifference = MessageFormatUtil.format("All characters of the shorter string are the same as the first {0} characters of the longer one.", minLength);
}
errorMessage.append(stringDifference);
return firstDifferenceOffset;
}
private byte[] convertPdfStringToBytes(PdfString pdfString) {
byte[] bytes;
String value = pdfString.getValue();
String encoding = pdfString.getEncoding();
if (encoding != null && PdfEncodings.UNICODE_BIG.equals(encoding) && PdfEncodings.isPdfDocEncoding(value))
bytes = PdfEncodings.convertToBytes(value, PdfEncodings.PDF_DOC_ENCODING);
else
bytes = PdfEncodings.convertToBytes(value, encoding);
return bytes;
}
private boolean compareBooleansExtended(PdfBoolean outBoolean, PdfBoolean cmpBoolean, ObjectPath currentPath, CompareResult compareResult) {
if (cmpBoolean.getValue() == outBoolean.getValue()) {
return true;
} else {
if (compareResult != null && currentPath != null)
compareResult.addError(currentPath, MessageFormatUtil.format("PdfBoolean. Expected: {0}. Found: {1}.", cmpBoolean.getValue(), outBoolean.getValue()));
return false;
}
}
private List getLinkAnnotations(int pageNum, PdfDocument document) {
List linkAnnotations = new ArrayList<>();
List annotations = document.getPage(pageNum).getAnnotations();
for (PdfAnnotation annotation : annotations) {
if (PdfName.Link.equals(annotation.getSubtype())) {
linkAnnotations.add((PdfLinkAnnotation) annotation);
}
}
return linkAnnotations;
}
private boolean compareLinkAnnotations(PdfLinkAnnotation cmpLink, PdfLinkAnnotation outLink, PdfDocument cmpDocument, PdfDocument outDocument) {
// Compare link rectangles, page numbers the links refer to, and simple parameters (non-indirect, non-arrays, non-dictionaries)
PdfObject cmpDestObject = cmpLink.getDestinationObject();
PdfObject outDestObject = outLink.getDestinationObject();
if (cmpDestObject != null && outDestObject != null) {
if (cmpDestObject.getType() != outDestObject.getType())
return false;
else {
PdfArray explicitCmpDest = null;
PdfArray explicitOutDest = null;
Map cmpNamedDestinations = cmpDocument.getCatalog().getNameTree(PdfName.Dests).getNames();
Map outNamedDestinations = outDocument.getCatalog().getNameTree(PdfName.Dests).getNames();
switch (cmpDestObject.getType()) {
case PdfObject.ARRAY:
explicitCmpDest = (PdfArray) cmpDestObject;
explicitOutDest = (PdfArray) outDestObject;
break;
case PdfObject.NAME:
explicitCmpDest = (PdfArray) cmpNamedDestinations.get(((PdfName) cmpDestObject).getValue());
explicitOutDest = (PdfArray) outNamedDestinations.get(((PdfName) outDestObject).getValue());
break;
case PdfObject.STRING:
explicitCmpDest = (PdfArray) cmpNamedDestinations.get(((PdfString) cmpDestObject).toUnicodeString());
explicitOutDest = (PdfArray) outNamedDestinations.get(((PdfString) outDestObject).toUnicodeString());
break;
default:
break;
}
if (getExplicitDestinationPageNum(explicitCmpDest) != getExplicitDestinationPageNum(explicitOutDest))
return false;
}
}
PdfDictionary cmpDict = cmpLink.getPdfObject();
PdfDictionary outDict = outLink.getPdfObject();
if (cmpDict.size() != outDict.size())
return false;
Rectangle cmpRect = cmpDict.getAsRectangle(PdfName.Rect);
Rectangle outRect = outDict.getAsRectangle(PdfName.Rect);
if (cmpRect.getHeight() != outRect.getHeight() ||
cmpRect.getWidth() != outRect.getWidth() ||
cmpRect.getX() != outRect.getX() ||
cmpRect.getY() != outRect.getY())
return false;
for (Map.Entry cmpEntry : cmpDict.entrySet()) {
PdfObject cmpObj = cmpEntry.getValue();
if (!outDict.containsKey(cmpEntry.getKey()))
return false;
PdfObject outObj = outDict.get(cmpEntry.getKey());
if (cmpObj.getType() != outObj.getType())
return false;
switch (cmpObj.getType()) {
case PdfObject.NULL:
case PdfObject.BOOLEAN:
case PdfObject.NUMBER:
case PdfObject.STRING:
case PdfObject.NAME:
if (!cmpObj.toString().equals(outObj.toString()))
return false;
break;
}
}
return true;
}
private int getExplicitDestinationPageNum(PdfArray explicitDest) {
PdfIndirectReference pageReference = (PdfIndirectReference) explicitDest.get(0, false);
PdfDocument doc = pageReference.getDocument();
for (int i = 1; i <= doc.getNumberOfPages(); ++i) {
if (doc.getPage(i).getPdfObject().getIndirectReference().equals(pageReference))
return i;
}
throw new IllegalArgumentException("PdfLinkAnnotation comparison: Page not found.");
}
private static class PngFileFilter implements FileFilter {
private String currentOutPdfName;
public PngFileFilter (String currentOutPdfName) {
this.currentOutPdfName = currentOutPdfName;
}
public boolean accept(File pathname) {
String ap = pathname.getName();
boolean b1 = ap.endsWith(".png");
boolean b2 = ap.contains("cmp_");
return b1 && !b2 && ap.contains(currentOutPdfName);
}
}
private static class CmpPngFileFilter implements FileFilter {
private String currentCmpPdfName;
public CmpPngFileFilter (String currentCmpPdfName) {
this.currentCmpPdfName = currentCmpPdfName;
}
public boolean accept(File pathname) {
String ap = pathname.getName();
boolean b1 = ap.endsWith(".png");
boolean b2 = ap.contains("cmp_");
return b1 && b2 && ap.contains(currentCmpPdfName);
}
}
private static class DiffPngFileFilter implements FileFilter {
private String differenceImagePrefix;
public DiffPngFileFilter(String differenceImagePrefix) {
this.differenceImagePrefix = differenceImagePrefix;
}
public boolean accept(File pathname) {
String ap = pathname.getName();
boolean b1 = ap.endsWith(".png");
boolean b2 = ap.startsWith(differenceImagePrefix);
return b1 && b2;
}
}
private static class ImageNameComparator implements Comparator {
public int compare(File f1, File f2) {
String f1Name = f1.getName();
String f2Name = f2.getName();
return f1Name.compareTo(f2Name);
}
}
/**
* Class containing results of the comparison of two documents.
*/
public static class CompareResult {
// LinkedHashMap to retain order. HashMap has different order in Java6/7 and Java8
protected Map differences = new LinkedHashMap<>();
protected int messageLimit = 1;
/**
* Creates new empty instance of CompareResult with given limit of difference messages.
*
* @param messageLimit maximum number of difference messages to be handled by this CompareResult.
*/
public CompareResult(int messageLimit) {
this.messageLimit = messageLimit;
}
/**
* Verifies if documents are considered equal after comparison.
*
* @return true if documents are equal, false otherwise.
*/
public boolean isOk() {
return differences.size() == 0;
}
/**
* Returns number of differences between two documents detected during comparison.
*
* @return number of differences.
*/
public int getErrorCount() {
return differences.size();
}
/**
* Converts this CompareResult into text form.
*
* @return text report on the differences between two documents.
*/
public String getReport() {
StringBuilder sb = new StringBuilder();
boolean firstEntry = true;
for (Map.Entry entry : differences.entrySet()) {
if (!firstEntry)
sb.append("-----------------------------").append("\n");
ObjectPath diffPath = entry.getKey();
sb.append(entry.getValue()).append("\n").append(diffPath.toString()).append("\n");
firstEntry = false;
}
return sb.toString();
}
/**
* Returns map with {@link ObjectPath} as keys and difference descriptions as values.
*
* @return differences map which could be used to find in the document the objects that are different.
*/
public Map getDifferences() {
return differences;
}
/**
* Converts this CompareResult into xml form.
*
* @param stream output stream to which xml report will be written.
* @throws ParserConfigurationException if a XML DocumentBuilder cannot be created
* which satisfies the configuration requested.
* @throws TransformerException if it is not possible to create an XML Transformer instance or
* an unrecoverable error occurs during the course of the transformation.
*/
public void writeReportToXml(OutputStream stream) throws ParserConfigurationException, TransformerException {
final Document xmlReport = XmlUtil.initNewXmlDocument();
Element root = xmlReport.createElement("report");
Element errors = xmlReport.createElement("errors");
errors.setAttribute("count", String.valueOf(differences.size()));
root.appendChild(errors);
for (Map.Entry entry : differences.entrySet()) {
Node errorNode = xmlReport.createElement("error");
Node message = xmlReport.createElement("message");
message.appendChild(xmlReport.createTextNode(entry.getValue()));
Node path = entry.getKey().toXmlNode(xmlReport);
errorNode.appendChild(message);
errorNode.appendChild(path);
errors.appendChild(errorNode);
}
xmlReport.appendChild(root);
XmlUtils.writeXmlDocToStream(xmlReport, stream);
}
protected boolean isMessageLimitReached() {
return differences.size() >= messageLimit;
}
protected void addError(ObjectPath path, String message) {
if (differences.size() < messageLimit) {
differences.put(new ObjectPath(path), message);
}
}
}
/**
* Exceptions thrown when errors occur during generation and comparison of images obtained on the basis of pdf
* files.
*/
public static class CompareToolExecutionException extends RuntimeException {
/**
* Creates a new {@link CompareToolExecutionException}.
*
* @param msg the detail message.
*/
public CompareToolExecutionException(String msg) {
super(msg);
}
}
}