All Downloads are FREE. Search and download functionalities are using the official Maven repository.

jcmp.DocCompare Maven / Gradle / Ivy

There is a newer version: 2.56.1
Show newest version
package jcmp;

import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.util.HashMap;
import java.util.ListIterator;
import java.util.LinkedList;
import java.util.Iterator;
import java.util.HashSet;

import jiff.JsonDiff;
import jiff.AbstractFieldFilter;

/**
 * Compares two documents json documents, and builds an list of all changes
 *
 * Containers (objects and arrays) are compared recursively. The comparison
 * algorithm works like this:
 *
 * Objects: A field-by-field comparison is done. If a field exists in the first
 * document but not in the second, that field is removed. If a field exists in
 * the second document but not the first, that field is added. If a field exists
 * in both documents with different values, that field is modified.
 *
 * Arrays: There are two possible algorithms to compare arrays. If array
 * elements contain a unique identifier (which is defined by the caller), then
 * array elements of the first and the second document are matched using the
 * unique identifiers of array elements. Then each matching array element is
 * compared to generate the detailed difference. If array elements don't have
 * unique identifiers, then each element of the first array is compared to each
 * element of the second array, and the elements with minimal number of changes
 * are associated. Elements that are too different from each other are not
 * associated.
 *
 * Differences:
 *
 * An Addition denotes a new field or array element. Addition.field1 is null,
 * meaning the field does not exist in document1, and Addition.field2 denotes
 * the new field, or array element.
 *
 * A Removal denotes a removed field or array element. Removal.field1 denotes
 * the element in document1, and Removal.field2 is null.
 *
 * A Modification denotes a content modification of a field, or array element.
 * Both field1 and field2 are non-null, and set to the name of the modified
 * field.
 *
 * A Move denotes an array element move. field1 denotes the old index of the
 * array element, and field2 denotes the new index.
 *
 * If new elements are added to an array, or existing elements are removed, the
 * addition and removal appear as diff, and any node that shifted during the
 * operation appears within a Move.
 */
public abstract class DocCompare {

    /**
     * Thrown if there is an array whose elements contain identities, but for at
     * least one element identity cannot be retrieved
     */
    public static final class InvalidArrayIdentity extends Exception {
        public InvalidArrayIdentity(List p) {
            this(JsonDiff.toString(p));
        }

        public InvalidArrayIdentity(String p) {
            super(p);
        }
    }

    /**
     * Thrown if there is an array whose elements contain identities, but they
     * are not unique
     */
    public static final class DuplicateArrayIdentity extends Exception {
        public DuplicateArrayIdentity(List p) {
            this(JsonDiff.toString(p));
        }

        public DuplicateArrayIdentity(String p) {
            super(p);
        }
    }

    /**
     * The array element identity extractor interface.
     */
    public interface IdentityExtractor {

        /**
         * It should return an identity object from the given array element. The
         * returned object should implement equals and hashCode methods.
         */
        Object getIdentity(T element);
    }

    /**
     * Contains the edit script, and number of changed and unchanged fields. The
     * number of changed fields does not include array element moves, it only
     * includes additions, removals, and modifications.
     */
    public static class Difference {
        private final List> delta;
        private int numUnchangedFields;
        private int numChangedFields;

        /**
         * Default ctor, sets numUnchangedFields to zero, initializes an empty
         * list
         */
        public Difference() {
            delta = new ArrayList<>();
            numUnchangedFields = 0;
        }

        public Difference(List> delta) {
            this.delta = delta;
            for (Delta d : this.delta) {
                if (d instanceof Addition
                        || d instanceof Removal
                        || d instanceof Modification) {
                    numChangedFields++;
                }
            }
        }

        /**
         * Constructs a difference with one modification, no unchanged fields
         */
        public Difference(Delta d) {
            this(new ArrayList>(1));
            add(d);

        }

        /**
         * Constructs a Difference denoting no difference
         */
        public Difference(int numFields) {
            delta = new ArrayList<>();
            numUnchangedFields = numFields;
        }

        /**
         * Returns the number of unmodified fields
         */
        public int getNumUnchangedFields() {
            return numUnchangedFields;
        }

        /**
         * Returns the number of modified fields, excluding array element moves
         */
        public int getNumChangedFields() {
            return numChangedFields;
        }

        /**
         * Returns the list of changes
         */
        public List> getDelta() {
            return delta;
        }

        public void add(Difference diff) {
            delta.addAll(diff.delta);
            numUnchangedFields += diff.numUnchangedFields;
            numChangedFields += diff.numChangedFields;
        }

        public void add(Delta d) {
            delta.add(d);
            if (d instanceof Addition
                    || d instanceof Removal
                    || d instanceof Modification) {
                numChangedFields++;
            }
        }

        /**
         * A numeric value between 0 and 1 denoting how much an object is
         * changed. 0 means no change, 1 means everything is changed.
         */
        public double getChangeAmount() {
            double d = numChangedFields + numUnchangedFields;
            return d == 0 ? 0 : numChangedFields / d;
        }

        /**
         * Returns true if there are not changes
         */
        public boolean same() {
            return delta.isEmpty();
        }

        @Override
        public String toString() {
            StringBuilder bld = new StringBuilder();
            for (Delta x : delta) {
                bld.append(x.toString()).append('\n');
            }
            return bld.toString();
        }
    }

    /**
     * Base class for a delta
     */
    public static abstract class Delta {
        protected final String field1;
        protected final String field2;

        public Delta(String field1, String field2) {
            this.field1 = field1;
            this.field2 = field2;
        }

        public Delta(List field1, List field2) {
            this(field1 == null ? null : JsonDiff.toString(field1), field2 == null ? null : JsonDiff.toString(field2));
        }

        public String getField1() {
            return field1;
        }

        public String getField2() {
            return field2;
        }

        /**
         * Return the non-null field, or field1 if both are non-null
         */
        public String getField() {
            return field1 == null ? field2 : field1;
        }
    }

    /**
     * Denotes an addition of a field or array element that isn't present in
     * doc1
     */
    public static class Addition extends Delta {
        private final T addedNode;

        public Addition(String field2, T addedNode) {
            super(null, field2);
            this.addedNode = addedNode;
        }

        public Addition(List field2, T addedNode) {
            super(null, field2);
            this.addedNode = addedNode;
        }

        public T getAddedNode() {
            return addedNode;
        }

        @Override
        public String toString() {
            return "+ " + field2 + ":" + addedNode;
        }
    }

    /**
     * Denotes a removal of a field or array element that is present in doc1 but
     * not in doc2
     */
    public static class Removal extends Delta {
        private final T removedNode;

        public Removal(String field1, T removedNode) {
            super(field1, null);
            this.removedNode = removedNode;
        }

        public Removal(List field1, T removedNode) {
            super(field1, null);
            this.removedNode = removedNode;
        }

        public T getRemovedNode() {
            return removedNode;
        }

        @Override
        public String toString() {
            return "- " + field1 + ":" + removedNode;
        }
    }

    /**
     * Denotes an array element move from one element index to another
     */
    public static class Move extends Delta {
        private final T movedNode;

        public Move(String field1, String field2, T movedNode) {
            super(field1, field2);
            this.movedNode = movedNode;
        }

        public Move(List field1, List field2, T movedNode) {
            super(field1, field2);
            this.movedNode = movedNode;
        }

        public T getMovedNode() {
            return movedNode;
        }

        @Override
        public String toString() {
            return "* " + field1 + "->" + field2 + ":" + movedNode;
        }
    }

    /**
     * Denotes a field modification
     */
    public static class Modification extends Delta {
        private final T node1;
        private final T node2;

        public Modification(String field1, T node1, String field2, T node2) {
            super(field1, field2);
            this.node1 = node1;
            this.node2 = node2;
        }

        public Modification(List field1, T node1, List field2, T node2) {
            super(field1, field2);
            this.node1 = node1;
            this.node2 = node2;
        }

        public T getUnmodifiedNode() {
            return node1;
        }

        public T getModifiedNode() {
            return node2;
        }

        @Override
        public String toString() {
            return "* " + field1 + "->" + field2 + ":" + node1 + " -> " + node2;
        }
    }

    /**
     * Contains a list of fields contained in array elements that uniquely
     * identify array elements
     */
    public static class ArrayIdentityFields {
        private String[] fields;

        public ArrayIdentityFields(String... fields) {
            this.fields = fields;
        }

        public String[] getFields() {
            return fields;
        }
    }

    /**
     * Default array identity object. Contains the identity values, computes
     * hashcode from them
     */
    public static class DefaultIdentity {
        private final T[] nodes;
        private Integer hcode;

        public DefaultIdentity(T[] nodes) {
            this.nodes = nodes;
        }

        @Override
        public int hashCode() {
            if (hcode == null) {
                int code = 0;
                for (int i = 0; i < nodes.length; i++) {
                    if (nodes[i] != null) {
                        code += nodes[i].hashCode();
                    }
                }
                hcode = code;
            }
            return hcode;
        }

        @Override
        public boolean equals(Object x) {
            try {
                DefaultIdentity d = (DefaultIdentity) x;
                for (int i = 0; i < nodes.length; i++) {
                    if (!d.nodes[i].equals(nodes[i])) {
                        return false;
                    }
                }
            } catch (Exception e) {
                return false;
            }

            return true;
        }
    }

    private final Map arrayIdentities = new HashMap<>();

    protected abstract boolean isValue(BaseType value);

    protected abstract boolean isArray(BaseType value);

    protected abstract boolean isObject(BaseType value);

    protected abstract boolean isNull(BaseType value);

    protected abstract ValueType asValue(BaseType value);

    protected abstract ArrayType asArray(BaseType value);

    protected abstract ObjectType asObject(BaseType value);

    protected abstract boolean equals(ValueType v1, ValueType v2);

    protected abstract Iterator> getFields(ObjectType o);

    protected abstract boolean hasField(ObjectType value, String field);

    protected abstract BaseType getField(ObjectType value, String field);

    protected abstract IdentityExtractor getArrayIdentityExtractorImpl(ArrayIdentityFields fields);

    protected abstract BaseType getElement(ArrayType value, int index);

    protected abstract int size(ArrayType value);

    /**
     * Adds a group of fields that can uniquely identify array elements for
     * object arrays
     *
     * @param array The name of the array field
     * @param identities The fields of the array element that can identiy an
     * element
     *
     * In the following document:      
     * {
     * ...
     *   "aField": [
     *      { "_id":1,"field":...},
     *      { "_id":2,"field":...}
     *   ]
     * }
     * 
     * the call looks like
     * 
     *    jsonCompare.addArrayIdentity(new Path("aField"),new Path("_id"));
     * 
If there are more than one fields that uniquely identify an * eleent, list those in the argument list. */ public void addArrayIdentity(String array, String... identities) { arrayIdentities.put(array, new ArrayIdentityFields(identities)); } /** * Compares two documents and returns the difference */ public Difference compareNodes(BaseType node1, BaseType node2) throws InvalidArrayIdentity, DuplicateArrayIdentity { return compareNodes(new ArrayList(), node1, new ArrayList(), node2); } public Difference compareNodes(List field1, BaseType node1, List field2, BaseType node2) throws InvalidArrayIdentity, DuplicateArrayIdentity { if (isValue(node1) && isValue(node2)) { if (!equals(asValue(node1), asValue(node2))) { return new Difference(new Modification(field1, node1, field2, node2)); } } else if (isArray(node1) && isArray(node2)) { return compareArrays(field1, asArray(node1), field2, asArray(node2)); } else if (isObject(node1) && isObject(node2)) { return compareObjects(field1, asObject(node1), field2, asObject(node2)); } else if (!(isNull(node1) && isNull(node2))) { return new Difference<>(new Modification(field1, node1, field2, node2)); } return new Difference<>(1); } /** * Compares two object nodes recursively and returns the differences */ public Difference compareObjects(List field1, ObjectType node1, List field2, ObjectType node2) throws InvalidArrayIdentity, DuplicateArrayIdentity { Difference ret = new Difference<>(); // Field by field comparison of obj1 to obj2. for (Iterator> fields = getFields(node1); fields.hasNext();) { Map.Entry field = fields.next(); String fieldName = field.getKey(); field1.add(fieldName); BaseType value1 = field.getValue(); if (hasField(node2, fieldName)) { // If both obj1 and obj2 have the same field, compare recursively field2.add(fieldName); BaseType value2 = getField(node2, fieldName); ret.add(compareNodes(field1, value1, field2, value2)); pop(field2); } else { // obj1.field1 exists, obj2.field1 does not, so it is removed ret.add(new Removal(field1, value1)); } pop(field1); } // Now compare any new nodes added to obj2 for (Iterator> fields = getFields(node2); fields.hasNext();) { Map.Entry field = fields.next(); String fieldName = field.getKey(); if (!hasField(node1, fieldName)) { field2.add(fieldName); ret.add(new Addition(field2, field.getValue())); pop(field2); } } return ret; } public IdentityExtractor getArrayIdentityExtractor(String arrayField) { return getArrayIdentityExtractor(AbstractFieldFilter.parse(arrayField)); } public IdentityExtractor getArrayIdentityExtractor(List arrayField) { List p = new ArrayList<>(); int n = arrayField.size(); for (int i = 0; i < n; i++) { String s = arrayField.get(i); if ("*".equals(s) || isIndex(s)) { p.add("*"); } else { p.add(s); } } ArrayIdentityFields fields = arrayIdentities.get(JsonDiff.toString(p)); if (fields != null) { return getArrayIdentityExtractorImpl(fields); } else { return null; } } private boolean isIndex(String s) { try { Integer.valueOf(s); return true; } catch (Exception e) { return false; } } public Difference compareArrays(List field1, ArrayType node1, List field2, ArrayType node2) throws InvalidArrayIdentity, DuplicateArrayIdentity { IdentityExtractor ext = getArrayIdentityExtractor(field1); if (ext == null) { return compareArraysNoId(field1, node1, field2, node2); } else { return compareArraysWithId(field1, node1, field2, node2, ext); } } /** * Computes difference between arrays whose elements can be identitied by a * unique identifier */ public Difference compareArraysWithId(List field1, ArrayType node1, List field2, ArrayType node2, IdentityExtractor idex) throws InvalidArrayIdentity, DuplicateArrayIdentity { Difference ret = new Difference<>(); // Build a map of identity -> index for both arrays final Map identities1 = getIdentityMap(field1, node1, idex); final Map identities2 = getIdentityMap(field2, node2, idex); // Iterate all elements of array 1 for (Map.Entry entry1 : identities1.entrySet()) { // Append index to the field name field1.add(Integer.toString(entry1.getValue())); // If array2 doesn't have an element with the same ID, this is a deletion Integer index2 = identities2.get(entry1.getKey()); if (index2 == null) { ret.add(new Removal(field1, getElement(node1, entry1.getValue()))); } else { field2.add(Integer.toString(index2)); // array2 has the same element // If it is at a different index, this is a move if (index2 != entry1.getValue()) { ret.add(new Move(field1, field2, getElement(node1, entry1.getValue()))); } // Recursively compare contents to get detailed diff ret.add(compareNodes(field1, getElement(node1, entry1.getValue()), field2, getElement(node2, index2))); pop(field2); } pop(field1); } // Now check elements of array 2 that are not in array 1 for (Map.Entry entry2 : identities2.entrySet()) { if (!identities1.containsKey(entry2.getKey())) { // entry2 is not in array 1: addition field2.add(Integer.toString(entry2.getValue())); ret.add(new Addition(field2, getElement(node2, entry2.getValue()))); pop(field2); } } return ret; } private static class Pair { private final int i1, i2; public int hashCode() { return i1 * 1001 + i2; } public boolean equals(Object o) { try { return ((Pair) o).i1 == i1 && ((Pair) o).i2 == i2; } catch (Exception e) { return false; } } public Pair(int i1, int i2) { this.i1 = i1; this.i2 = i2; } } /** * Computes difference between arrays by comparing every element recursively * and trying to find the closest match */ public Difference compareArraysNoId(List field1, ArrayType node1, List field2, ArrayType node2) throws InvalidArrayIdentity, DuplicateArrayIdentity { Difference ret = new Difference<>(); IndexAssoc assoc = new IndexAssoc(size(node1), size(node2)); HashSet comparedPairs = new HashSet<>(); // First associate exact matches // We loop through the unassociated elements of node1, and node2 // If the nodes are equal, we associate them // if they are not, we note the distance between the two, so later // we don't need to re-compare them for (assoc.start1(); assoc.hasNext1();) { int index1 = assoc.next1(); BaseType element1 = getElement(node1, index1); field1.add(Integer.toString(index1)); for (assoc.start2(); assoc.hasNext2();) { int index2 = assoc.next2(); BaseType element2 = getElement(node2, index2); field2.add(Integer.toString(index2)); comparedPairs.add(new Pair(index1, index2)); Difference diff = compareNodes(field1, element1, field2, element2); if (diff.same()) { assoc.associate(index1, index2); pop(field2); break; } else { assoc.recordDistance(index1, index2, diff); } pop(field2); } pop(field1); } // Here, we associated all exact matching nodes // All remaining nodes need to be compared to each other // First compare all node1 elements to node2 elements for (assoc.start1(); assoc.hasNext1();) { int index1 = assoc.next1(); BaseType element1 = getElement(node1, index1); field1.add(Integer.toString(index1)); for (assoc.start2(); assoc.hasNext2();) { int index2 = assoc.next2(); BaseType element2 = getElement(node2, index2); field2.add(Integer.toString(index2)); Pair p = new Pair(index1, index2); // Do we have a distance recorded for these nodes? if (comparedPairs.contains(p)) { // No distance: compare the nodes Difference diff = compareNodes(field1, element1, field2, element2); assoc.recordDistance(index1, index2, diff); comparedPairs.add(p); } pop(field2); } IxDiff ixdiff = assoc.getMin(index1); // If an object has changed more that 0.5 (more than half // of its fields are changed), then it is not a match if (ixdiff == null || ixdiff.change > 0.5) { // No matching node for node1 ret.add(new Removal(field1, element1)); assoc.remove1(index1); } else { // Matching node assoc.associate(index1, ixdiff.index2); ret.add(ixdiff.diff); } pop(field1); } // Anything remaining on node2 are nodes that are added for (assoc.start2(); assoc.hasNext2();) { int index = assoc.next2(); BaseType element2 = getElement(node2, index); field2.add(Integer.toString(index)); ret.add(new Addition(field2, element2)); pop(field2); } // Look at associations for moved nodes for (Map.Entry entry : assoc.assoc.entrySet()) { if (entry.getKey() != entry.getValue()) { field1.add(Integer.toString(entry.getKey())); field2.add(Integer.toString(entry.getValue())); BaseType node = getElement(node1, entry.getKey()); ret.add(new Move(field1, field2, node)); pop(field2); pop(field1); } } return ret; } /** * Keeps the distance between two array indexes. */ private static class IxDiff { private Difference diff; private double change; private int index1, index2; public IxDiff(Difference diff, double change, int index1, int index2) { this.diff = diff; this.change = change; this.index1 = index1; this.index2 = index2; } } /** * Keeps associations between array indexes */ private static class IndexAssoc { private final ArrayList ix1 = new ArrayList<>(); private final ArrayList ix2 = new ArrayList<>(); private int itr1; private int itr2; private int last1, last2; private final Map assoc = new HashMap<>(); /** * Keeps the IxDiff with the minimum change between index1 and index2, * keyed on index1 */ private final Map minimums1 = new HashMap<>(); /** * Construct with two arrays of size1 and size2 */ public IndexAssoc(int size1, int size2) { for (int i = 0; i < size1; i++) { ix1.add(i); } for (int i = 0; i < size2; i++) { ix2.add(i); } } /** * Start iterating the unassociated indexes of the first array */ public void start1() { itr1 = -1; } /** * Returns true if the first array has more unassociated indexes */ public boolean hasNext1() { return (itr1 + 1) < ix1.size(); } /** * Returns the current unassociated index of the first array, moves to * the next unassociated index */ public int next1() { itr1++; return last1 = ix1.get(itr1); } public void remove1(int index) { int l1 = ix1.indexOf(index); if (l1 >= 0) { ix1.remove(l1); if (itr1 >= l1) { itr1--; } } } /** * Start iterating the unassociated indexes of the second array */ public void start2() { itr2 = -1; } /** * Returns true if the second array has more unassociated indexes */ public boolean hasNext2() { return (itr2 + 1) < ix2.size(); } /** * Returns the current unassociated index of the second array, moves to * the next unassociated index */ public int next2() { itr2++; return last2 = ix2.get(itr2); } public void remove2(int index) { int l2 = ix2.indexOf(index); if (l2 >= 0) { ix2.remove(l2); if (itr2 >= l2) { itr2--; } } } public void associate(int index1, int index2) { remove1(index1); remove2(index2); assoc.put(index1, index2); } /** * Records the amount of difference between the nodes last returned by * next(). Also stores the minumum amount of difference for node1 */ public void recordDistance(int index1, int index2, Difference diff) { double change = diff.getChangeAmount(); IxDiff m = minimums1.get(index1); if (m == null || m.change > change) { minimums1.put(index1, m = new IxDiff(diff, change, index1, index2)); } } public IxDiff getMin(int index1) { return minimums1.get(index1); } } private Map getIdentityMap(List field, ArrayType array, IdentityExtractor idex) throws InvalidArrayIdentity, DuplicateArrayIdentity { final int size = size(array); final Map identities = new HashMap<>(size); // Fill up identities into identity maps for (int i = 0; i < size; i++) { Object id = idex.getIdentity(getElement(array, i)); if (id == null) { throw new InvalidArrayIdentity(JsonDiff.toString(field) + "." + i); } if (identities.put(id, i) != null) { throw new DuplicateArrayIdentity(JsonDiff.toString(field) + "." + i); } } return identities; } private static void pop(List l) { l.remove(l.size() - 1); } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy