All Downloads are FREE. Search and download functionalities are using the official Maven repository.

de.redsix.pdfcompare.Exclusions Maven / Gradle / Ivy

Go to download

A simple Java library to compare two PDF files. Files are rendered and compared pixel by pixel.

There is a newer version: 1.2.3
Show newest version
package de.redsix.pdfcompare;

import com.typesafe.config.*;
import de.redsix.pdfcompare.env.Environment;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.*;
import java.nio.charset.StandardCharsets;
import java.nio.file.Path;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.function.Consumer;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Stream;

/**
 * Exclusions collect rectangular areas of the document, that shall be ignored during comparison.
 * Each area is specified through a {@link PageArea} object.
 * 

* Exclusions can be read from a file in JSON format (or actually a superset called HOCON) which has the following form: *

 * exclusions: [
 *     {
 *         page: 2
 *         x1: 300 // entries without a unit are in pixels, when Pdf is rendered at 300DPI
 *         y1: 1000
 *         x2: 550
 *         y2: 1300
 *     },
 *     {
 *         // page is optional. When not given, the exclusion applies to all pages.
 *         x1: 130.5mm // entries can also be given in units of cm, mm or pt (DTP-Point defined as 1/72 Inches)
 *         y1: 3.3cm
 *         x2: 190mm
 *         y2: 3.7cm
 *     },
 *     {
 *         page: 7
 *         // coordinates are optional. When not given, the whole page is excluded.
 *     }
 * ]
*/ public class Exclusions { private static final Logger LOG = LoggerFactory.getLogger(Exclusions.class); private final Environment environment; private final float CM_TO_PIXEL; private final float MM_TO_PIXEL; private final float PT_TO_PIXEL; private static final Pattern NUMBER = Pattern.compile("([0-9.]+)(cm|mm|pt)"); private static final ConfigParseOptions configParseOptions = ConfigParseOptions.defaults().setSyntax(ConfigSyntax.CONF).setAllowMissing(true); private final Map exclusionsPerPage = new HashMap<>(); private final PageExclusions exclusionsForAllPages = new PageExclusions(); public Exclusions(Environment environment) { this.environment = environment; int dpi = environment.getDPI(); CM_TO_PIXEL = 1f / 2.54f * dpi; MM_TO_PIXEL = CM_TO_PIXEL / 10f; PT_TO_PIXEL = ((float) dpi) / 72f; } public Exclusions add(final PageArea exclusion) { Objects.requireNonNull(exclusion); if (exclusion.hasPage()) { exclusionsPerPage.computeIfAbsent(exclusion.page, k -> new PageExclusions(exclusionsForAllPages)).add(exclusion); } else { exclusionsForAllPages.add(exclusion); } return this; } public Exclusions remove(final PageArea exclusion) { Objects.requireNonNull(exclusion); if (exclusion.hasPage()) { exclusionsPerPage.computeIfAbsent(exclusion.page, k -> new PageExclusions(exclusionsForAllPages)).remove(exclusion); } else { exclusionsForAllPages.remove(exclusion); } return this; } public PageExclusions forPage(final int page) { return exclusionsPerPage.getOrDefault(page, exclusionsForAllPages); } public void readExclusions(final String filename) { Objects.requireNonNull(filename, "filename must not be null"); readExclusions(new File(filename)); } public void readExclusions(final Path path) { Objects.requireNonNull(path, "path must not be null"); readExclusions(path.toFile()); } public void readExclusions(final File file) { Objects.requireNonNull(file, "file must not be null"); if (file.exists()) { final Config exclusionConfig = ConfigFactory.parseFile(file, configParseOptions); readFromConfig(exclusionConfig); } else { if (environment.failOnMissingIgnoreFile()) { throw new IgnoreFileMissing(file); } LOG.info("Ignore-file at '{}' not found. Continuing without ignores.", file); } } public void readExclusions(InputStream inputStream) { Objects.requireNonNull(inputStream, "inputStream must not be null"); try (final InputStreamReader inputStreamReader = new InputStreamReader(inputStream, StandardCharsets.UTF_8)) { readExclusions(inputStreamReader); } catch (IOException e) { LOG.warn("Could not read ignores from InputStream. Continuing without ignores.", e); } } public void readExclusions(Reader reader) { Objects.requireNonNull(reader, "reader must not be null"); final Config exclusionConfig = ConfigFactory.parseReader(reader, configParseOptions); readFromConfig(exclusionConfig); } private void readFromConfig(final Config exclusionConfig) { final List exclusions = exclusionConfig.getObjectList("exclusions"); exclusions.stream().map(co -> { final Config c = co.toConfig(); if (!c.hasPath("x1") && !c.hasPath("y1") && !c.hasPath("x2") && !c.hasPath("y2")) { return new PageArea(c.getInt("page")); } if (c.hasPath("page")) { return new PageArea(c.getInt("page"), toPix(c, "x1"), toPix(c, "y1"), toPix(c, "x2"), toPix(c, "y2")); } return new PageArea(toPix(c, "x1"), toPix(c, "y1"), toPix(c, "x2"), toPix(c, "y2")); }).forEach(this::add); } private int toPix(final Config c, final String key) { try { return c.getInt(key); } catch (ConfigException.WrongType e) { final String valueStr = c.getString(key); final Matcher matcher = NUMBER.matcher(valueStr); if (matcher.matches()) { float factor = 0; if ("mm".equals(matcher.group(2))) { factor = MM_TO_PIXEL; } else if ("cm".equals(matcher.group(2))) { factor = CM_TO_PIXEL; } else if ("pt".equals(matcher.group(2))) { factor = PT_TO_PIXEL; } return Math.round(factor * Float.parseFloat(matcher.group(1))); } else { throw new RuntimeException("Exclusion can't be read. String not parseable to a number: " + valueStr); } } } public void forEach(final Consumer exclusionConsumer) { getPageAreaStream().forEach(exclusionConsumer); } public String asJson() { return PageArea.asJsonWithExclusion(getPageAreaStream()); } private Stream getPageAreaStream() { Stream allPages = exclusionsForAllPages.getExclusions().stream(); Stream pageAreas = exclusionsPerPage.entrySet().stream() .sorted(Map.Entry.comparingByKey()) .flatMap(e -> e.getValue().getExclusions().stream()); return Stream.concat(allPages, pageAreas); } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy