pro.verron.officestamper.core.CommentUtil Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of engine Show documentation
Office-stamper is a Java template engine for docx documents, forked from org.wickedsource.docx-stamper
There is a newer version: 2.6.0
package pro.verron.officestamper.core;

import org.docx4j.TextUtils;
import org.docx4j.XmlUtils;
import org.docx4j.jaxb.Context;
import org.docx4j.openpackaging.exceptions.Docx4JException;
import org.docx4j.openpackaging.exceptions.InvalidFormatException;
import org.docx4j.openpackaging.packages.WordprocessingMLPackage;
import org.docx4j.openpackaging.parts.PartName;
import org.docx4j.openpackaging.parts.WordprocessingML.CommentsPart;
import org.docx4j.wml.*;
import org.jvnet.jaxb2_commons.ppp.Child;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import pro.verron.officestamper.api.Comment;
import pro.verron.officestamper.api.DocxPart;
import pro.verron.officestamper.api.OfficeStamperException;
import pro.verron.officestamper.api.Placeholder;

import java.math.BigInteger;
import java.util.*;
import java.util.stream.Collectors;

import static java.util.stream.Collectors.toSet;
import static org.docx4j.XmlUtils.unwrap;

/**
 * Utility class for working with comments in a DOCX document.
 *
 * @author Joseph Verron
 * @author Tom Hombergs
 * @version ${version}
 * @since 1.0.0
 */
public class CommentUtil {
    public static final PartName WORD_COMMENTS_PART_NAME;
    private static final Logger logger = LoggerFactory.getLogger(CommentUtil.class);

    static {
        try {
            WORD_COMMENTS_PART_NAME = new PartName("/word/comments.xml");
        } catch (InvalidFormatException e) {
            throw new OfficeStamperException(e);
        }
    }

    private CommentUtil() {
        throw new OfficeStamperException("Utility class shouldn't be instantiated");
    }

    /**
     * Returns the comment the given DOCX4J object is commented with.
     *
     * @param run      the DOCX4J object whose comment to retrieve.
     * @param document the document that contains the object.
     *
     * @return Optional of the comment, if found, Optional.empty() otherwise.
     */
    public static Optional getCommentAround(R run, WordprocessingMLPackage document) {
        ContentAccessor parent = (ContentAccessor) ((Child) run).getParent();
        if (parent == null) return Optional.empty();

        try {
            return getComment(run, document, parent);
        } catch (Docx4JException e) {
            throw new OfficeStamperException("error accessing the comments of the document!", e);
        }
    }

    private static Optional getComment(
            R run,
            WordprocessingMLPackage document,
            ContentAccessor parent
    )
            throws Docx4JException {
        CommentRangeStart possibleComment = null;
        boolean foundChild = false;
        for (Object contentElement : parent.getContent()) {
            // so first we look for the start of the comment
            if (unwrap(contentElement) instanceof CommentRangeStart crs)
                possibleComment = crs;
                // then we check if the child we are looking for is ours
            else if (possibleComment != null && run.equals(contentElement))
                foundChild = true;
                // and then, if we have an end of a comment, we are good!
            else if (possibleComment != null && foundChild && unwrap(
                    contentElement) instanceof CommentRangeEnd) {
                try {
                    var id = possibleComment.getId();
                    return findComment(document, id);
                } catch (InvalidFormatException e) {
                    var format = "Error while searching comment. Skipping run %s.";
                    var message = String.format(format, run);
                    logger.warn(message, e);
                }
            }
            // else restart
            else {
                possibleComment = null;
                foundChild = false;
            }
        }
        return Optional.empty();
    }

    /**
     * Finds a comment with the given ID in the specified WordprocessingMLPackage document.
     *
     * @param document the WordprocessingMLPackage document to search for the comment
     * @param id       the ID of the comment to find
     *
     * @return an Optional containing the Comment if found, or an empty Optional if not found
     *
     * @throws Docx4JException if an error occurs while searching for the comment
     */
    public static Optional findComment(WordprocessingMLPackage document, BigInteger id)
            throws Docx4JException {
        var parts = document.getParts();
        var wordComments = (CommentsPart) parts.get(WORD_COMMENTS_PART_NAME);
        var comments = wordComments.getContents();
        return comments.getComment()
                       .stream()
                       .filter(comment -> comment.getId()
                                                 .equals(id))
                       .findFirst();
    }

    /**
     * Returns the first comment found for the given docx object. Note that an object is
     * only considered commented if the comment STARTS within the object. Comments
     * spanning several objects are not supported by this method.
     *
     * @param object   the object whose comment to load.
     * @param document the document in which the object is embedded (needed to load the
     *                 comment from the comments.xml part).
     *
     * @return the concatenated string of all text paragraphs within the
     * comment or null if the specified object is not commented.
     */
    public static Optional getCommentFor(ContentAccessor object, WordprocessingMLPackage document) {
        for (Object contentObject : object.getContent()) {
            if (!(contentObject instanceof CommentRangeStart crs)) continue;
            BigInteger id = crs.getId();
            CommentsPart commentsPart = (CommentsPart) document.getParts()
                                                               .get(WORD_COMMENTS_PART_NAME);
            Comments comments;
            try {
                comments = commentsPart.getContents();
            } catch (Docx4JException e) {
                throw new OfficeStamperException("error accessing the comments of the document!", e);
            }

            for (Comments.Comment comment : comments.getComment()) {
                var commentId = comment.getId();
                if (commentId.equals(id)) {
                    return Optional.of(comment);
                }
            }
        }
        return Optional.empty();
    }

    /**
     * Returns the string value of the specified comment object.
     *
     * @param comment a {@link Comments.Comment} object
     *
     * @return a {@link String} object
     */
    public static Placeholder getCommentString(Comments.Comment comment) {
        StringBuilder builder = new StringBuilder();
        for (Object commentChildObject : comment.getContent()) {
            if (commentChildObject instanceof P p) {
                builder.append(new StandardParagraph(p).asString());
            }
        }
        String string = builder.toString();
        return Placeholders.raw(string);
    }

    /**
     * Returns the string value of the specified comment object.
     *
     * @param comment a {@link Comment} object
     */
    public static void deleteComment(Comment comment) {
        CommentRangeEnd end = comment.getCommentRangeEnd();
        if (end != null) {
            ContentAccessor endParent = (ContentAccessor) end.getParent();
            endParent.getContent()
                     .remove(end);
        }
        CommentRangeStart start = comment.getCommentRangeStart();
        if (start != null) {
            ContentAccessor startParent = (ContentAccessor) start.getParent();
            startParent.getContent()
                       .remove(start);
        }
        R.CommentReference reference = comment.getCommentReference();
        if (reference != null) {
            ContentAccessor referenceParent = (ContentAccessor) reference.getParent();
            referenceParent.getContent()
                           .remove(reference);
        }
    }

    /**
     * Extracts all comments from the given document.
     *
     * @return a map of all comments, with the key being the comment id.
     */
    public static Map getComments(DocxPart document) {
        Map rootComments = new HashMap<>();
        Map allComments = new HashMap<>();
        collectCommentRanges(rootComments, allComments, document);
        collectComments(allComments, document.getCommentsPart());
        return cleanMalformedComments(rootComments);
    }

    /**
     * Returns the string value of the specified comment object.
     *
     * @param items     a {@link List} object
     * @param commentId a {@link BigInteger} object
     */
    public static void deleteCommentFromElements(List