All Downloads are FREE. Search and download functionalities are using the official Maven repository.

cn.afterturn.easypoi.pdf.watermark.WatermarkRemover Maven / Gradle / Ivy

package cn.afterturn.easypoi.pdf.watermark;

import java.util.ArrayList;
import java.util.List;
import java.util.Objects;

import org.apache.pdfbox.contentstream.operator.Operator;
import org.apache.pdfbox.cos.COSString;
import org.apache.pdfbox.pdfparser.PDFStreamParser;
import org.apache.pdfbox.pdmodel.PDPage;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class WatermarkRemover {
    Logger logger = LoggerFactory.getLogger(WatermarkRemover.class);

    IWatermarkProcessor remover;
    List pageTokens = new ArrayList<>();
    List watermarks = null;
    int pageStartIndex;
    int pageLength;

    public WatermarkRemover(IWatermarkProcessor remover, int pageStartIndex, int pageLength, List watermarks) {
        this.remover = remover;
        this.pageStartIndex = pageStartIndex;
        this.pageLength = pageLength;
        this.watermarks = watermarks;
    }

    public void removeWatermark() {
        for (int i = pageStartIndex; i < pageStartIndex + pageLength; i++) {
            if (i >= remover.getDocument().getNumberOfPages()) {
                break;
            }
            try {
                processPage(i, remover.getDocument().getPage(i));
            } catch (Exception e) {
                logger.error("【解析PDF页面失败】", e);
            }
        }
    }

    public void processPage(int index, PDPage page) throws Exception {
        Object next;
        Operator op;

        PDFStreamParser parser = new PDFStreamParser(page);
        parser.parse();
        List tokens = parser.getTokens();
        if (Objects.nonNull(tokens)) {
            for (int j = 0; j < tokens.size(); j++) {
                next = tokens.get(j);
                if (Objects.isNull(next))
                    continue;

                if (next instanceof Operator) {
                    op = (Operator) next;

                    if (op.getName().equals("Tj")) {
                        COSString previous = (COSString) tokens.get(j - 1);
                        String string = previous.getString();
                        // 判断是否是水印
                        if (null != watermarks && watermarks.contains(string)) {
                            previous.setValue("".getBytes("GBK"));
                        } else if (remover.isWatermarkWord(string)) {
                            // 判断是否是水印
                            previous.setValue("".getBytes("GBK"));
                        }
                    }
                }
            }
        }

        RemoveResult pageResult = new RemoveResult(page, index, tokens);
        pageTokens.add(pageResult);
    }

    public List getPageTokens() {
        return pageTokens;
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy